PyPI - agentic-threat-hunting-framework - Versions diffs - 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

agentic-threat-hunting-framework 0.2.3py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

{agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/METADATA +38 -40
agentic_threat_hunting_framework-0.3.0.dist-info/RECORD +51 -0
athf/__version__.py +1 -1
athf/cli.py +7 -2
athf/commands/__init__.py +4 -0
athf/commands/agent.py +452 -0
athf/commands/context.py +6 -9
athf/commands/env.py +2 -2
athf/commands/hunt.py +3 -3
athf/commands/init.py +45 -0
athf/commands/research.py +530 -0
athf/commands/similar.py +5 -5
athf/core/research_manager.py +419 -0
athf/core/web_search.py +340 -0
athf/data/__init__.py +19 -0
athf/data/docs/CHANGELOG.md +147 -0
athf/data/docs/CLI_REFERENCE.md +1797 -0
athf/data/docs/INSTALL.md +594 -0
athf/data/docs/README.md +31 -0
athf/data/docs/environment.md +256 -0
athf/data/docs/getting-started.md +419 -0
athf/data/docs/level4-agentic-workflows.md +480 -0
athf/data/docs/lock-pattern.md +149 -0
athf/data/docs/maturity-model.md +400 -0
athf/data/docs/why-athf.md +44 -0
athf/data/hunts/FORMAT_GUIDELINES.md +507 -0
athf/data/hunts/H-0001.md +453 -0
athf/data/hunts/H-0002.md +436 -0
athf/data/hunts/H-0003.md +546 -0
athf/data/hunts/README.md +231 -0
athf/data/integrations/MCP_CATALOG.md +45 -0
athf/data/integrations/README.md +129 -0
athf/data/integrations/quickstart/splunk.md +162 -0
athf/data/knowledge/hunting-knowledge.md +2375 -0
athf/data/prompts/README.md +172 -0
athf/data/prompts/ai-workflow.md +581 -0
athf/data/prompts/basic-prompts.md +316 -0
athf/data/templates/HUNT_LOCK.md +228 -0
agentic_threat_hunting_framework-0.2.3.dist-info/RECORD +0 -23
{agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/WHEEL +0 -0
{agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/entry_points.txt +0 -0
{agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/licenses/LICENSE +0 -0
{agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/top_level.txt +0 -0

athf/core/web_search.py ADDED Viewed

@@ -0,0 +1,340 @@
+"""Web search integration for threat research."""
+import os
+import time
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+@dataclass
+class SearchResult:
+    """Single search result."""
+    title: str
+    url: str
+    content: str  # Snippet or full content
+    score: float  # Relevance score (0-1)
+@dataclass
+class SearchResponse:
+    """Web search response."""
+    query: str
+    results: List[SearchResult]
+    answer: Optional[str] = None  # AI-generated answer summary
+    response_time_ms: int = 0
+    search_depth: str = "basic"
+    images: List[Dict[str, str]] = field(default_factory=list)
+class TavilySearchClient:
+    """Tavily Search API client for threat research.
+    Tavily is designed for AI/LLM integration and provides:
+    - Basic and advanced search depth
+    - AI-generated answer summaries
+    - Domain filtering
+    - Structured results for LLM consumption
+    Features:
+    - Security-focused domain filtering
+    - Configurable search depth (basic=fast, advanced=thorough)
+    - Graceful error handling with fallbacks
+    - Cost tracking
+    Environment:
+        TAVILY_API_KEY: API key from https://tavily.com
+    """
+    SECURITY_DOMAINS = [
+        "attack.mitre.org",
+        "github.com",
+        "elastic.co",
+        "microsoft.com",
+        "crowdstrike.com",
+        "mandiant.com",
+        "redcanary.com",
+        "thehackernews.com",
+        "bleepingcomputer.com",
+        "unit42.paloaltonetworks.com",
+        "blog.talosintelligence.com",
+        "securelist.com",
+        "thedfirreport.com",
+        "atomicredteam.io",
+        "lolbas-project.github.io",
+        "gtfobins.github.io",
+    ]
+    def __init__(self, api_key: Optional[str] = None) -> None:
+        """Initialize client with API key.
+        Args:
+            api_key: Tavily API key (defaults to TAVILY_API_KEY env var)
+        Raises:
+            ValueError: If no API key is provided or found
+        """
+        self.api_key = api_key or os.getenv("TAVILY_API_KEY")
+        if not self.api_key:
+            raise ValueError("TAVILY_API_KEY not set. Get your API key from https://tavily.com")
+        self._client: Optional[Any] = None
+    def _get_client(self) -> Any:
+        """Get or create Tavily client instance."""
+        if self._client is None:
+            try:
+                from tavily import TavilyClient
+                self._client = TavilyClient(api_key=self.api_key)
+            except ImportError:
+                raise ImportError("tavily-python package not installed. Run: pip install tavily-python")
+        return self._client
+    def search(
+        self,
+        query: str,
+        search_depth: str = "basic",
+        max_results: int = 10,
+        include_domains: Optional[List[str]] = None,
+        exclude_domains: Optional[List[str]] = None,
+        include_answer: bool = True,
+        include_raw_content: bool = False,
+    ) -> SearchResponse:
+        """Execute search query.
+        Args:
+            query: Search query string
+            search_depth: "basic" (fast, ~5 results) or "advanced" (thorough, ~10 results)
+            max_results: Maximum number of results (1-20)
+            include_domains: Limit search to these domains
+            exclude_domains: Exclude these domains from search
+            include_answer: Include AI-generated answer summary
+            include_raw_content: Include full page content (increases response size)
+        Returns:
+            SearchResponse with results
+        Raises:
+            Exception: If search fails
+        """
+        client = self._get_client()
+        start_time = time.time()
+        # Build search parameters
+        search_params: Dict[str, Any] = {
+            "query": query,
+            "search_depth": search_depth,
+            "max_results": max_results,
+            "include_answer": include_answer,
+            "include_raw_content": include_raw_content,
+        }
+        if include_domains:
+            search_params["include_domains"] = include_domains
+        if exclude_domains:
+            search_params["exclude_domains"] = exclude_domains
+        # Execute search
+        response = client.search(**search_params)
+        response_time_ms = int((time.time() - start_time) * 1000)
+        # Parse results
+        results = []
+        for result in response.get("results", []):
+            results.append(
+                SearchResult(
+                    title=result.get("title", ""),
+                    url=result.get("url", ""),
+                    content=result.get("content", ""),
+                    score=result.get("score", 0.0),
+                )
+            )
+        return SearchResponse(
+            query=query,
+            results=results,
+            answer=response.get("answer"),
+            response_time_ms=response_time_ms,
+            search_depth=search_depth,
+            images=response.get("images", []),
+        )
+    def search_threat_intel(
+        self,
+        topic: str,
+        technique: Optional[str] = None,
+        search_depth: str = "advanced",
+    ) -> SearchResponse:
+        """Search with security-focused parameters.
+        Optimized for threat hunting research with:
+        - Security-focused domain filtering
+        - Advanced search depth by default
+        - AI-generated answer summary
+        Args:
+            topic: Research topic (e.g., "LSASS memory dumping")
+            technique: Optional MITRE ATT&CK technique (e.g., "T1003.001")
+            search_depth: Search depth ("basic" or "advanced")
+        Returns:
+            SearchResponse with security-focused results
+        """
+        # Build security-focused query
+        query = f"{topic} threat hunting detection"
+        if technique:
+            query += f" MITRE ATT&CK {technique}"
+        return self.search(
+            query=query,
+            search_depth=search_depth,
+            include_domains=self.SECURITY_DOMAINS,
+            include_answer=True,
+        )
+    def search_system_internals(
+        self,
+        topic: str,
+        search_depth: str = "advanced",
+    ) -> SearchResponse:
+        """Search for system/technology internals.
+        Focused on understanding how systems work normally,
+        useful for the "System Research" skill.
+        Args:
+            topic: Technology/system topic (e.g., "LSASS", "Windows Authentication")
+            search_depth: Search depth ("basic" or "advanced")
+        Returns:
+            SearchResponse with technical documentation
+        """
+        query = f"{topic} how it works internals documentation"
+        # Focus on technical documentation sources
+        technical_domains = [
+            "microsoft.com",
+            "learn.microsoft.com",
+            "docs.microsoft.com",
+            "developer.apple.com",
+            "man7.org",
+            "linux.die.net",
+            "kernel.org",
+            "aws.amazon.com",
+            "docs.aws.amazon.com",
+            "cloud.google.com",
+            "en.wikipedia.org",
+        ]
+        return self.search(
+            query=query,
+            search_depth=search_depth,
+            include_domains=technical_domains,
+            include_answer=True,
+        )
+    def search_adversary_tradecraft(
+        self,
+        topic: str,
+        technique: Optional[str] = None,
+        search_depth: str = "advanced",
+    ) -> SearchResponse:
+        """Search for adversary tradecraft and attack techniques.
+        Focused on how adversaries abuse systems,
+        useful for the "Adversary Tradecraft" skill.
+        Args:
+            topic: Attack topic (e.g., "credential dumping", "lateral movement")
+            technique: Optional MITRE ATT&CK technique
+            search_depth: Search depth ("basic" or "advanced")
+        Returns:
+            SearchResponse with adversary technique information
+        """
+        query = f"{topic} adversary technique attack method"
+        if technique:
+            query += f" {technique}"
+        # Focus on threat intelligence sources
+        threat_intel_domains = [
+            "attack.mitre.org",
+            "thedfirreport.com",
+            "mandiant.com",
+            "crowdstrike.com",
+            "unit42.paloaltonetworks.com",
+            "blog.talosintelligence.com",
+            "securelist.com",
+            "redcanary.com",
+            "elastic.co",
+            "atomicredteam.io",
+            "lolbas-project.github.io",
+            "gtfobins.github.io",
+        ]
+        return self.search(
+            query=query,
+            search_depth=search_depth,
+            include_domains=threat_intel_domains,
+            include_answer=True,
+        )
+    def search_detection_methods(
+        self,
+        topic: str,
+        technique: Optional[str] = None,
+        search_depth: str = "advanced",
+    ) -> SearchResponse:
+        """Search for detection methods and analytics.
+        Focused on how to detect specific behaviors,
+        useful for detection engineering.
+        Args:
+            topic: Detection topic (e.g., "LSASS access detection")
+            technique: Optional MITRE ATT&CK technique
+            search_depth: Search depth ("basic" or "advanced")
+        Returns:
+            SearchResponse with detection method information
+        """
+        query = f"{topic} detection rule query sigma"
+        if technique:
+            query += f" {technique}"
+        # Focus on detection and SIEM sources
+        detection_domains = [
+            "github.com",
+            "elastic.co",
+            "splunk.com",
+            "microsoft.com",
+            "redcanary.com",
+            "sigma-hq.github.io",
+            "detection.fyi",
+        ]
+        return self.search(
+            query=query,
+            search_depth=search_depth,
+            include_domains=detection_domains,
+            include_answer=True,
+        )
+def create_search_client(api_key: Optional[str] = None) -> Optional[TavilySearchClient]:
+    """Create a Tavily search client if API key is available.
+    Args:
+        api_key: Optional API key (defaults to TAVILY_API_KEY env var)
+    Returns:
+        TavilySearchClient if API key is available, None otherwise
+    """
+    key = api_key or os.getenv("TAVILY_API_KEY")
+    if not key:
+        return None
+    return TavilySearchClient(api_key=key)

athf/data/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""ATHF reference data and templates."""
+import sys
+from pathlib import Path
+if sys.version_info >= (3, 9):
+    from importlib.resources import files
+else:
+    from importlib_resources import files  # type: ignore[import-not-found,no-redef]
+def get_data_path() -> Path:
+    """Get the path to ATHF data directory.
+    Returns:
+        Path to the athf/data directory containing templates, knowledge,
+        prompts, hunts, docs, and integrations.
+    """
+    return Path(str(files("athf.data")))

athf/data/docs/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,147 @@
+# Changelog
+All notable changes to the Agentic Threat Hunting Framework (ATHF) will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased]
+### Added
+- None
+### Changed
+- None
+### Deprecated
+- None
+### Removed
+- None
+### Fixed
+- None
+### Security
+- None
+## [0.2.2] - 2024-12-17
+### Fixed
+- Type errors in `athf/core/attack_matrix.py` (added TypedDict for proper mypy checking)
+- Python 3.8 compatibility: `list[str]` → `List[str]` in `athf/core/attack_matrix.py`
+- Python 3.8 compatibility: `tuple[...]` → `Tuple[...]` in `athf/core/investigation_parser.py`
+- Python 3.8 compatibility: `tuple[...]`, `list[str]` → `Tuple[...]`, `List[str]` in `athf/commands/investigate.py`
+- Python 3.8 compatibility: `set[str]` → `Set[str]` in `athf/core/hunt_manager.py`
+- Python 3.8 compatibility: `int | str` → `Union[int, str]` in `athf/commands/env.py`
+- Windows UTF-8 encoding errors in `athf/commands/context.py` (3 instances) and `athf/commands/similar.py` (2 instances)
+- Test assertion errors in `tests/commands/test_env.py` for env info and activate commands
+- Mypy unused-ignore errors in `athf/commands/similar.py` (sklearn imports handled by --ignore-missing-imports flag)
+- CI/CD pipeline errors blocking builds on Python 3.8-3.12 across all platforms
+## [0.2.1] - 2024-12-17
+### Fixed
+- Type errors in `athf/core/attack_matrix.py` (added TypedDict for proper mypy checking)
+- Python 3.8 compatibility: `list[str]` → `List[str]` in `athf/core/attack_matrix.py`
+- Python 3.8 compatibility: `tuple[...]` → `Tuple[...]` in `athf/core/investigation_parser.py`
+- Python 3.8 compatibility: `tuple[...]`, `list[str]` → `Tuple[...]`, `List[str]` in `athf/commands/investigate.py`
+- Python 3.8 compatibility: `set[str]` → `Set[str]` in `athf/core/hunt_manager.py`
+- Python 3.8 compatibility: `int | str` → `Union[int, str]` in `athf/commands/env.py`
+- Windows UTF-8 encoding errors in `athf/commands/context.py` (3 instances) and `athf/commands/similar.py` (2 instances)
+- Test assertion errors in `tests/commands/test_env.py` for env info and activate commands
+- Mypy unused-ignore errors in `athf/commands/similar.py` (sklearn imports handled by --ignore-missing-imports flag)
+- CI/CD pipeline errors blocking builds on Python 3.8-3.12 across all platforms
+## [0.2.0] - 2024-12-17
+### Added
+- **CLI Commands**
+  - `athf context` - AI-optimized context loading (replaces ~5 Read operations, 75% token savings)
+  - `athf env` - Environment setup and management (setup, info, activate, clean)
+  - `athf investigate` - Investigation workflow for exploratory work (separate from hunt metrics)
+  - `athf similar` - Semantic search for similar hunts using scikit-learn embeddings
+- **Core Modules**
+  - `athf/core/attack_matrix.py` - MITRE ATT&CK coverage tracking and analysis
+  - `athf/core/investigation_parser.py` - Parser for I-XXXX investigation files
+- **Testing Infrastructure**
+  - Comprehensive test suite for all new commands (tests/commands/)
+  - Command-specific test modules (test_context.py, test_env.py, test_similar.py)
+  - Integration tests for multi-command workflows
+- **Rich Content CLI Flags**
+  - `--hypothesis`, `--threat-context`, `--actor`, `--behavior`, `--location`, `--evidence`
+  - Enable fully-populated hunt files via single CLI command
+  - AI-friendly one-liner hunt creation without manual editing
+### Changed
+- Enhanced `athf hunt` command with investigation integration
+- Updated CLI help system with improved command descriptions
+- Improved context bundling for AI workflows (structured JSON/YAML output)
+- Updated documentation to reflect new commands and workflows
+### Fixed
+- Python 3.8 compatibility issues
+- Testing framework stability improvements
+## [0.1.0] - 2024-12-10
+### Added
+- Initial ATHF framework documentation
+  - LOCK pattern (Learn, Observe, Check, Keep)
+  - 5-level maturity model
+  - USING_ATHF.md adoption guide
+  - INSTALL.md installation guide
+- Example hunt implementations
+  - H-0001: macOS Data Collection via AppleScript Detection
+  - H-0002: Linux Crontab Persistence Detection
+  - H-0003: AWS Lambda Persistence Detection
+- Templates
+  - HUNT_LOCK.md template
+  - Query templates for Splunk, KQL, Elastic
+- Documentation
+  - README.md with visual enhancements
+  - SHOWCASE.md with real results
+  - docs/CLI_REFERENCE.md (planned for CLI implementation)
+- Knowledge base
+  - hunting-knowledge.md expert hunting frameworks
+  - AGENTS.md AI assistant instructions
+  - environment.md template
+- Integration guides
+  - MCP_CATALOG.md for tool integrations
+  - SIEM integration examples
+  - EDR integration examples
+### Philosophy
+- Framework-first approach: "Structure over software, adapt to your environment"
+- Document-first methodology: Works with markdown, git, and AI assistants
+- Optional tooling: CLI enhances but doesn't replace core workflow
+- Progression-minded: Start simple, scale when complexity demands it
+---
+## Version History
+**Legend:**
+- `[Unreleased]` - Changes in development
+- `[X.Y.Z]` - Released versions
+**Version Format:**
+- `X` - Major version (breaking changes)
+- `Y` - Minor version (new features, backward compatible)
+- `Z` - Patch version (bug fixes, backward compatible)
+**Change Categories:**
+- `Added` - New features
+- `Changed` - Changes to existing functionality
+- `Deprecated` - Soon-to-be removed features
+- `Removed` - Removed features
+- `Fixed` - Bug fixes
+- `Security` - Security improvements
+---
+## Contribution Notes
+ATHF is a framework to internalize, not a platform to extend. However, if you've adapted ATHF in interesting ways or have feedback, we'd love to hear about it in [GitHub Discussions](https://github.com/Nebulock-Inc/agentic-threat-hunting-framework/discussions).
+For more on the philosophy, see [USING_ATHF.md](../../../USING_ATHF.md).

agentic-threat-hunting-framework 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

agentic-threat-hunting-framework 0.2.3py3-none-any.whl → 0.3.0py3-none-any.whl