PyPI - agentic-threat-hunting-framework - Versions diffs - 0.1.0__py3-none-any.whl - Mend

agentic-threat-hunting-framework 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

agentic_threat_hunting_framework-0.1.0.dist-info/METADATA +339 -0
agentic_threat_hunting_framework-0.1.0.dist-info/RECORD +17 -0
agentic_threat_hunting_framework-0.1.0.dist-info/WHEEL +5 -0
agentic_threat_hunting_framework-0.1.0.dist-info/entry_points.txt +2 -0
agentic_threat_hunting_framework-0.1.0.dist-info/licenses/LICENSE +21 -0
agentic_threat_hunting_framework-0.1.0.dist-info/top_level.txt +1 -0
athf/__init__.py +9 -0
athf/__version__.py +3 -0
athf/cli.py +127 -0
athf/commands/__init__.py +1 -0
athf/commands/hunt.py +596 -0
athf/commands/init.py +411 -0
athf/core/__init__.py +1 -0
athf/core/hunt_manager.py +245 -0
athf/core/hunt_parser.py +169 -0
athf/core/template_engine.py +224 -0
athf/utils/__init__.py +1 -0

athf/commands/init.py ADDED Viewed

@@ -0,0 +1,411 @@
+"""Initialize ATHF directory structure."""
+from pathlib import Path
+import click
+import yaml
+from rich.console import Console
+from rich.prompt import Confirm, Prompt
+console = Console()
+@click.command()
+@click.option("--path", default=".", help="Directory to initialize ATHF in")
+@click.option("--non-interactive", is_flag=True, help="Skip interactive prompts")
+def init(path: str, non_interactive: bool) -> None:
+    """Initialize a new ATHF threat hunting workspace.
+    \b
+    Creates directory structure:
+      config/         Configuration files
+      hunts/          Hunt hypothesis cards
+      queries/        Reusable query library
+      runs/           Hunt execution results
+      templates/      Hunt templates (LOCK pattern)
+      knowledge/      Domain expertise and frameworks
+      prompts/        AI workflow prompts
+      integrations/   Tool integration configs
+      docs/           Documentation
+    \b
+    Generates files:
+      • config/.athfconfig.yaml (workspace configuration)
+      • AGENTS.md (AI assistant context)
+      • templates/HUNT_LOCK.md (hunt template)
+    \b
+    Examples:
+      # Interactive setup (recommended for first time)
+      athf init
+      # Non-interactive with defaults
+      athf init --non-interactive
+      # Initialize in specific directory
+      athf init --path /path/to/workspace
+    \b
+    Interactive setup will ask you:
+      1. Workspace name (default: directory name)
+      2. SIEM platform (Splunk, Sentinel, Elastic, etc.)
+      3. EDR platform (CrowdStrike, SentinelOne, etc.)
+      4. Hunt ID prefix (default: H-)
+      5. Hunt retention period (default: 365 days)
+    \b
+    After initialization:
+      1. Customize AGENTS.md with your environment details
+      2. Add data sources to config/.athfconfig.yaml
+      3. Create your first hunt: athf hunt new
+    """
+    base_path = Path(path).resolve()
+    # Check if already initialized (check both old and new locations)
+    old_config_path = base_path / ".athfconfig.yaml"
+    new_config_path = base_path / "config" / ".athfconfig.yaml"
+    if (old_config_path.exists() or new_config_path.exists()) and not Confirm.ask(
+        f"ATHF already initialized in {base_path}. Reinitialize?", default=False
+    ):
+        console.print("[yellow]Initialization cancelled.[/yellow]")
+        return
+    config_path = new_config_path
+    console.print("\n[bold cyan]🎯 Initializing Agentic Threat Hunting Framework[/bold cyan]\n")
+    # Gather configuration
+    if non_interactive:
+        config = _default_config(base_path)
+    else:
+        config = _interactive_config(base_path)
+    # Create directory structure
+    directories = ["config", "hunts", "queries", "runs", "templates", "knowledge", "prompts", "integrations", "docs"]
+    console.print("\n[bold]Creating directory structure...[/bold]")
+    for dir_name in directories:
+        dir_path = base_path / dir_name
+        dir_path.mkdir(exist_ok=True)
+        console.print(f"  ✓ Created [cyan]{dir_name}/[/cyan]")
+    # Save configuration
+    with open(config_path, "w", encoding="utf-8") as f:
+        yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+    console.print("  ✓ Created [cyan]config/.athfconfig.yaml[/cyan]")
+    # Create AGENTS.md if it doesn't exist
+    agents_path = base_path / "AGENTS.md"
+    if not agents_path.exists():
+        _create_agents_file(agents_path, config)
+        console.print("  ✓ Created [cyan]AGENTS.md[/cyan]")
+    # Copy templates if they don't exist
+    templates_path = base_path / "templates"
+    if not (templates_path / "HUNT_LOCK.md").exists():
+        _create_hunt_template(templates_path / "HUNT_LOCK.md")
+        console.print("  ✓ Created [cyan]templates/HUNT_LOCK.md[/cyan]")
+    console.print("\n[bold green]✅ ATHF initialized successfully![/bold green]")
+    console.print("\n[bold]Next steps:[/bold]")
+    console.print("  1. Customize [cyan]AGENTS.md[/cyan] with your environment details")
+    console.print("  2. Create your first hunt: [cyan]athf hunt new[/cyan]")
+    console.print("  3. Check out the docs at [cyan]docs/getting-started.md[/cyan]")
+def _default_config(base_path: Path) -> dict:
+    """Return default configuration."""
+    return {
+        "workspace_name": base_path.name,
+        "hunt_prefix": "H-",
+        "siem": "Splunk",
+        "edr": "CrowdStrike",
+        "query_language": "SPL",
+        "hunt_retention_days": 365,
+    }
+def _interactive_config(base_path: Path) -> dict:
+    """Gather configuration interactively."""
+    console.print("[bold]📋 Quick setup questions:[/bold]")
+    config: dict = {}
+    # Workspace name
+    workspace_name = Prompt.ask(
+        "1. Workspace name (e.g., 'Production Hunts', 'Client-Acme', 'SOC Team')", default=base_path.name
+    )
+    config["workspace_name"] = workspace_name
+    # SIEM
+    siem = Prompt.ask(
+        "2. What SIEM do you use?", choices=["Splunk", "Sentinel", "Elastic", "Chronicle", "Other"], default="Splunk"
+    )
+    config["siem"] = siem
+    # Query language mapping
+    query_lang_map = {"Splunk": "SPL", "Sentinel": "KQL", "Elastic": "Lucene", "Chronicle": "YARA-L", "Other": "Custom"}
+    config["query_language"] = query_lang_map.get(siem, "SPL")
+    # EDR
+    edr = Prompt.ask(
+        "3. What's your primary EDR?",
+        choices=["CrowdStrike", "SentinelOne", "Defender", "Carbon Black", "Other"],
+        default="CrowdStrike",
+    )
+    config["edr"] = edr
+    # Hunt prefix
+    hunt_prefix = Prompt.ask("4. Hunt ID prefix (e.g., H-, HUNT-)", default="H-")
+    config["hunt_prefix"] = hunt_prefix
+    # Retention
+    retention = Prompt.ask("5. Hunt retention (days)", default="365")
+    config["hunt_retention_days"] = int(retention) if isinstance(retention, str) else retention
+    return config
+def _create_agents_file(path: Path, config: dict) -> None:
+    """Create AGENTS.md file with configuration."""
+    content = f"""# ATHF Agent Context
+**Workspace:** {config['workspace_name']}
+This file provides context to AI assistants about your threat hunting environment.
+## Data Sources
+### SIEM / Log Aggregation
+- **Platform:** {config['siem']}
+- **Query Language:** {config['query_language']}
+- **Indexes:** [Add your indexes here]
+- **Retention:** 90 days
+- **Access:** [Add access method]
+### EDR / Endpoint Security
+- **Platform:** {config['edr']}
+- **Telemetry:** Process execution, network connections, file modifications
+- **Query Access:** [Add query method]
+### Other Data Sources
+[Add additional data sources]
+## Technology Stack
+### Security Tools
+- SIEM: {config['siem']}
+- EDR: {config['edr']}
+- [Add more tools]
+### Cloud Platforms
+[Add cloud platforms if applicable]
+## Known Visibility Gaps
+Document what you can't see:
+- [Add visibility gaps]
+## Hunt Numbering Convention
+- **Prefix:** {config['hunt_prefix']}
+- **Format:** {config['hunt_prefix']}XXXX (e.g., {config['hunt_prefix']}0001)
+- **Retention:** {config['hunt_retention_days']} days
+## Team Context
+[Add information about your team, shift coverage, escalation procedures]
+"""
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(content)
+def _create_hunt_template(path: Path) -> None:
+    """Create hunt template file."""
+    content = """---
+hunt_id: H-XXXX
+title: [Hunt Title]
+status: planning
+date: YYYY-MM-DD
+hunter: [Your Name]
+platform: [Windows/Linux/macOS/Cloud]
+tactics: [persistence, credential-access, etc.]
+techniques: [T1003.001, T1005, etc.]
+data_sources: [SIEM, EDR, etc.]
+related_hunts: []
+findings_count: 0
+true_positives: 0
+false_positives: 0
+customer_deliverables: []
+tags: []
+---
+# H-XXXX: [Hunt Title]
+**Hunt Metadata**
+- **Date:** YYYY-MM-DD
+- **Hunter:** [Your Name]
+- **Status:** Planning
+- **MITRE ATT&CK:** [Primary Technique]
+---
+## LEARN: Prepare the Hunt
+### Hypothesis Statement
+[What behavior are you looking for? What will you observe if the hypothesis is true?]
+### Threat Context
+[What threat actor/malware/TTP motivates this hunt?]
+### ABLE Scoping
+| **Field**   | **Your Input** |
+|-------------|----------------|
+| **Actor** *(Optional)* | [Threat actor or malware family] |
+| **Behavior** | [TTP or behavior pattern] |
+| **Location** | [Systems, networks, or environments to hunt] |
+| **Evidence** | [Data sources and key fields to examine] |
+### Threat Intel & Research
+- **MITRE ATT&CK Techniques:** [List relevant techniques]
+- **CTI Sources & References:** [Links to reports, blogs, etc.]
+### Related Tickets
+| **Team** | **Ticket/Details** |
+|----------|-------------------|
+| **SOC/IR** | [Ticket numbers or N/A] |
+---
+## OBSERVE: Expected Behaviors
+### What Normal Looks Like
+[Describe legitimate activity that should not trigger alerts]
+### What Suspicious Looks Like
+[Describe adversary behavior patterns to hunt for]
+### Expected Observables
+- **Processes:** [Process names, command lines]
+- **Network:** [Connections, protocols, domains]
+- **Files:** [File paths, extensions, sizes]
+- **Registry:** [Registry keys if applicable]
+- **Authentication:** [Login patterns if applicable]
+---
+## CHECK: Execute & Analyze
+### Data Source Information
+- **Index/Data Source:** [SIEM index or data source]
+- **Time Range:** [Date range for hunt]
+- **Events Analyzed:** [Approximate count]
+- **Data Quality:** [Assessment of data completeness]
+### Hunting Queries
+#### Initial Query
+```
+[Your initial query]
+```
+**Query Notes:**
+- [What did this query return?]
+- [What worked? What didn't?]
+#### Refined Query
+```
+[Your refined query after iterations]
+```
+**Refinement Rationale:**
+- [Why did you change the query?]
+- [What improvements were made?]
+### Visualization & Analytics
+[Describe any visualizations, timelines, or statistical analysis]
+### Query Performance
+**What Worked Well:**
+- [Effective filters or techniques]
+**What Didn't Work:**
+- [Challenges or limitations]
+**Iterations Made:**
+- [Document query evolution]
+---
+## KEEP: Findings & Response
+### Executive Summary
+[Concise summary of hunt results and key findings]
+### Findings
+| **Finding** | **Ticket** | **Description** |
+|-------------|-----------|-----------------|
+| True Positive | [Ticket] | [Description] |
+| False Positive | N/A | [Description] |
+**True Positives:** [Count]
+**False Positives:** [Count]
+### Detection Logic
+**Automation Opportunity:**
+[Can this hunt become an automated detection rule?]
+**Proposed Detection:**
+```
+[Detection rule if applicable]
+```
+### Lessons Learned
+**What Worked Well:**
+- [Successes]
+**What Could Be Improved:**
+- [Areas for improvement]
+**Telemetry Gaps Identified:**
+- [Missing data sources or visibility gaps]
+### Follow-up Actions
+- [ ] [Action item 1]
+- [ ] [Action item 2]
+### Follow-up Hunts
+- [Related hunt ideas for future investigation]
+---
+**Hunt Completed:** YYYY-MM-DD
+**Next Review:** [Date for recurring hunt if applicable]
+"""
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(content)

athf/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Core ATHF functionality."""

athf/core/hunt_manager.py ADDED Viewed

@@ -0,0 +1,245 @@
+"""Manage hunt files and operations."""
+import re
+from pathlib import Path
+from typing import Dict, List, Optional
+from athf.core.hunt_parser import parse_hunt_file
+class HuntManager:
+    """Manage hunt files and operations."""
+    def __init__(self, hunts_dir: Optional[Path] = None):
+        """Initialize hunt manager.
+        Args:
+            hunts_dir: Directory containing hunt files (default: ./hunts)
+        """
+        self.hunts_dir = Path(hunts_dir) if hunts_dir else Path.cwd() / "hunts"
+        if not self.hunts_dir.exists():
+            self.hunts_dir.mkdir(parents=True, exist_ok=True)
+    def list_hunts(
+        self,
+        status: Optional[str] = None,
+        tactic: Optional[str] = None,
+        technique: Optional[str] = None,
+        platform: Optional[str] = None,
+    ) -> List[Dict]:
+        """List all hunts with optional filters.
+        Args:
+            status: Filter by status (planning, active, completed, etc.)
+            tactic: Filter by MITRE tactic
+            technique: Filter by MITRE technique (e.g., T1003.001)
+            platform: Filter by platform (Windows, Linux, macOS, Cloud)
+        Returns:
+            List of hunt metadata dicts
+        """
+        hunts = []
+        # Find all hunt files
+        hunt_files = sorted(self.hunts_dir.glob("*.md"))
+        for hunt_file in hunt_files:
+            try:
+                hunt_data = parse_hunt_file(hunt_file)
+                frontmatter = hunt_data.get("frontmatter", {})
+                # Apply filters
+                if status and frontmatter.get("status") != status:
+                    continue
+                if tactic and tactic not in frontmatter.get("tactics", []):
+                    continue
+                if technique and technique not in frontmatter.get("techniques", []):
+                    continue
+                if platform and platform not in frontmatter.get("platform", []):
+                    continue
+                # Extract summary info
+                date_val = frontmatter.get("date")
+                # Convert date objects to strings for JSON serialization
+                if hasattr(date_val, "isoformat"):
+                    date_str = date_val.isoformat()
+                else:
+                    date_str = str(date_val) if date_val else None
+                hunts.append(
+                    {
+                        "hunt_id": frontmatter.get("hunt_id"),
+                        "title": frontmatter.get("title"),
+                        "status": frontmatter.get("status"),
+                        "date": date_str,
+                        "platform": frontmatter.get("platform", []),
+                        "tactics": frontmatter.get("tactics", []),
+                        "techniques": frontmatter.get("techniques", []),
+                        "findings_count": frontmatter.get("findings_count", 0),
+                        "true_positives": frontmatter.get("true_positives", 0),
+                        "false_positives": frontmatter.get("false_positives", 0),
+                        "file_path": str(hunt_file),
+                    }
+                )
+            except Exception:
+                # Skip files that can't be parsed
+                continue
+        return hunts
+    def get_hunt(self, hunt_id: str) -> Optional[Dict]:
+        """Get a specific hunt by ID.
+        Args:
+            hunt_id: Hunt ID (e.g., H-0001)
+        Returns:
+            Hunt data dict or None if not found
+        """
+        hunt_file = self.hunts_dir / f"{hunt_id}.md"
+        if not hunt_file.exists():
+            return None
+        return parse_hunt_file(hunt_file)
+    def get_next_hunt_id(self, prefix: str = "H-") -> str:
+        """Calculate the next available hunt ID.
+        Args:
+            prefix: Hunt ID prefix (default: H-)
+        Returns:
+            Next hunt ID (e.g., H-0023)
+        """
+        hunts = self.list_hunts()
+        if not hunts:
+            return f"{prefix}0001"
+        # Extract numbers from hunt IDs with matching prefix
+        numbers = []
+        pattern = re.compile(rf"^{re.escape(prefix)}(\d+)$")
+        for hunt in hunts:
+            hunt_id = hunt.get("hunt_id")
+            if not hunt_id or not isinstance(hunt_id, str):
+                continue
+            match = pattern.match(hunt_id)
+            if match:
+                numbers.append(int(match.group(1)))
+        if not numbers:
+            return f"{prefix}0001"
+        # Next number with zero-padding
+        next_num = max(numbers) + 1
+        return f"{prefix}{next_num:04d}"
+    def search_hunts(self, query: str) -> List[Dict]:
+        """Full-text search across all hunt files.
+        Args:
+            query: Search query string
+        Returns:
+            List of matching hunts
+        """
+        results = []
+        query_lower = query.lower()
+        for hunt_file in self.hunts_dir.glob("*.md"):
+            try:
+                with open(hunt_file, "r", encoding="utf-8") as f:
+                    content = f.read()
+                # Check if query appears in file
+                if query_lower in content.lower():
+                    hunt_data = parse_hunt_file(hunt_file)
+                    frontmatter = hunt_data.get("frontmatter", {})
+                    results.append(
+                        {
+                            "hunt_id": frontmatter.get("hunt_id"),
+                            "title": frontmatter.get("title"),
+                            "status": frontmatter.get("status"),
+                            "file_path": str(hunt_file),
+                        }
+                    )
+            except Exception:
+                continue
+        return results
+    def calculate_stats(self) -> Dict:
+        """Calculate hunt program statistics.
+        Returns:
+            Dict with success rates, TP/FP ratios, coverage metrics
+        """
+        hunts = self.list_hunts()
+        if not hunts:
+            return {
+                "total_hunts": 0,
+                "completed_hunts": 0,
+                "total_findings": 0,
+                "true_positives": 0,
+                "false_positives": 0,
+                "success_rate": 0.0,
+                "tp_fp_ratio": 0.0,
+            }
+        total_hunts = len(hunts)
+        completed_hunts = len([h for h in hunts if h.get("status") == "completed"])
+        total_findings = sum(h.get("findings_count", 0) for h in hunts)
+        total_tp = sum(h.get("true_positives", 0) for h in hunts)
+        total_fp = sum(h.get("false_positives", 0) for h in hunts)
+        # Calculate success rate (hunts with TP / completed hunts)
+        hunts_with_tp = len([h for h in hunts if h.get("true_positives", 0) > 0])
+        success_rate = (hunts_with_tp / completed_hunts * 100) if completed_hunts > 0 else 0.0
+        # Calculate TP/FP ratio
+        tp_fp_ratio = (total_tp / total_fp) if total_fp > 0 else float("inf")
+        return {
+            "total_hunts": total_hunts,
+            "completed_hunts": completed_hunts,
+            "total_findings": total_findings,
+            "true_positives": total_tp,
+            "false_positives": total_fp,
+            "success_rate": round(success_rate, 1),
+            "tp_fp_ratio": round(tp_fp_ratio, 2) if tp_fp_ratio != float("inf") else "∞",
+        }
+    def calculate_attack_coverage(self) -> Dict[str, List[str]]:
+        """Calculate MITRE ATT&CK technique coverage.
+        Returns:
+            Dict mapping tactics to lists of covered techniques
+        """
+        hunts = self.list_hunts()
+        coverage: Dict = {}
+        for hunt in hunts:
+            tactics = hunt.get("tactics", [])
+            techniques = hunt.get("techniques", [])
+            for tactic in tactics:
+                if tactic not in coverage:
+                    coverage[tactic] = set()
+                for technique in techniques:
+                    coverage[tactic].add(technique)
+        # Convert sets to sorted lists
+        return {tactic: sorted(list(techniques)) for tactic, techniques in coverage.items()}