agentic-threat-hunting-framework 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/METADATA +38 -40
  2. agentic_threat_hunting_framework-0.3.0.dist-info/RECORD +51 -0
  3. athf/__version__.py +1 -1
  4. athf/cli.py +7 -2
  5. athf/commands/__init__.py +4 -0
  6. athf/commands/agent.py +452 -0
  7. athf/commands/context.py +6 -9
  8. athf/commands/env.py +2 -2
  9. athf/commands/hunt.py +3 -3
  10. athf/commands/init.py +45 -0
  11. athf/commands/research.py +530 -0
  12. athf/commands/similar.py +5 -5
  13. athf/core/research_manager.py +419 -0
  14. athf/core/web_search.py +340 -0
  15. athf/data/__init__.py +19 -0
  16. athf/data/docs/CHANGELOG.md +147 -0
  17. athf/data/docs/CLI_REFERENCE.md +1797 -0
  18. athf/data/docs/INSTALL.md +594 -0
  19. athf/data/docs/README.md +31 -0
  20. athf/data/docs/environment.md +256 -0
  21. athf/data/docs/getting-started.md +419 -0
  22. athf/data/docs/level4-agentic-workflows.md +480 -0
  23. athf/data/docs/lock-pattern.md +149 -0
  24. athf/data/docs/maturity-model.md +400 -0
  25. athf/data/docs/why-athf.md +44 -0
  26. athf/data/hunts/FORMAT_GUIDELINES.md +507 -0
  27. athf/data/hunts/H-0001.md +453 -0
  28. athf/data/hunts/H-0002.md +436 -0
  29. athf/data/hunts/H-0003.md +546 -0
  30. athf/data/hunts/README.md +231 -0
  31. athf/data/integrations/MCP_CATALOG.md +45 -0
  32. athf/data/integrations/README.md +129 -0
  33. athf/data/integrations/quickstart/splunk.md +162 -0
  34. athf/data/knowledge/hunting-knowledge.md +2375 -0
  35. athf/data/prompts/README.md +172 -0
  36. athf/data/prompts/ai-workflow.md +581 -0
  37. athf/data/prompts/basic-prompts.md +316 -0
  38. athf/data/templates/HUNT_LOCK.md +228 -0
  39. agentic_threat_hunting_framework-0.2.3.dist-info/RECORD +0 -23
  40. {agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/WHEEL +0 -0
  41. {agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/entry_points.txt +0 -0
  42. {agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/licenses/LICENSE +0 -0
  43. {agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,340 @@
1
+ """Web search integration for threat research."""
2
+
3
+ import os
4
+ import time
5
+ from dataclasses import dataclass, field
6
+ from typing import Any, Dict, List, Optional
7
+
8
+
9
+ @dataclass
10
+ class SearchResult:
11
+ """Single search result."""
12
+
13
+ title: str
14
+ url: str
15
+ content: str # Snippet or full content
16
+ score: float # Relevance score (0-1)
17
+
18
+
19
+ @dataclass
20
+ class SearchResponse:
21
+ """Web search response."""
22
+
23
+ query: str
24
+ results: List[SearchResult]
25
+ answer: Optional[str] = None # AI-generated answer summary
26
+ response_time_ms: int = 0
27
+ search_depth: str = "basic"
28
+ images: List[Dict[str, str]] = field(default_factory=list)
29
+
30
+
31
+ class TavilySearchClient:
32
+ """Tavily Search API client for threat research.
33
+
34
+ Tavily is designed for AI/LLM integration and provides:
35
+ - Basic and advanced search depth
36
+ - AI-generated answer summaries
37
+ - Domain filtering
38
+ - Structured results for LLM consumption
39
+
40
+ Features:
41
+ - Security-focused domain filtering
42
+ - Configurable search depth (basic=fast, advanced=thorough)
43
+ - Graceful error handling with fallbacks
44
+ - Cost tracking
45
+
46
+ Environment:
47
+ TAVILY_API_KEY: API key from https://tavily.com
48
+ """
49
+
50
+ SECURITY_DOMAINS = [
51
+ "attack.mitre.org",
52
+ "github.com",
53
+ "elastic.co",
54
+ "microsoft.com",
55
+ "crowdstrike.com",
56
+ "mandiant.com",
57
+ "redcanary.com",
58
+ "thehackernews.com",
59
+ "bleepingcomputer.com",
60
+ "unit42.paloaltonetworks.com",
61
+ "blog.talosintelligence.com",
62
+ "securelist.com",
63
+ "thedfirreport.com",
64
+ "atomicredteam.io",
65
+ "lolbas-project.github.io",
66
+ "gtfobins.github.io",
67
+ ]
68
+
69
+ def __init__(self, api_key: Optional[str] = None) -> None:
70
+ """Initialize client with API key.
71
+
72
+ Args:
73
+ api_key: Tavily API key (defaults to TAVILY_API_KEY env var)
74
+
75
+ Raises:
76
+ ValueError: If no API key is provided or found
77
+ """
78
+ self.api_key = api_key or os.getenv("TAVILY_API_KEY")
79
+ if not self.api_key:
80
+ raise ValueError("TAVILY_API_KEY not set. Get your API key from https://tavily.com")
81
+ self._client: Optional[Any] = None
82
+
83
+ def _get_client(self) -> Any:
84
+ """Get or create Tavily client instance."""
85
+ if self._client is None:
86
+ try:
87
+ from tavily import TavilyClient
88
+
89
+ self._client = TavilyClient(api_key=self.api_key)
90
+ except ImportError:
91
+ raise ImportError("tavily-python package not installed. Run: pip install tavily-python")
92
+ return self._client
93
+
94
+ def search(
95
+ self,
96
+ query: str,
97
+ search_depth: str = "basic",
98
+ max_results: int = 10,
99
+ include_domains: Optional[List[str]] = None,
100
+ exclude_domains: Optional[List[str]] = None,
101
+ include_answer: bool = True,
102
+ include_raw_content: bool = False,
103
+ ) -> SearchResponse:
104
+ """Execute search query.
105
+
106
+ Args:
107
+ query: Search query string
108
+ search_depth: "basic" (fast, ~5 results) or "advanced" (thorough, ~10 results)
109
+ max_results: Maximum number of results (1-20)
110
+ include_domains: Limit search to these domains
111
+ exclude_domains: Exclude these domains from search
112
+ include_answer: Include AI-generated answer summary
113
+ include_raw_content: Include full page content (increases response size)
114
+
115
+ Returns:
116
+ SearchResponse with results
117
+
118
+ Raises:
119
+ Exception: If search fails
120
+ """
121
+ client = self._get_client()
122
+
123
+ start_time = time.time()
124
+
125
+ # Build search parameters
126
+ search_params: Dict[str, Any] = {
127
+ "query": query,
128
+ "search_depth": search_depth,
129
+ "max_results": max_results,
130
+ "include_answer": include_answer,
131
+ "include_raw_content": include_raw_content,
132
+ }
133
+
134
+ if include_domains:
135
+ search_params["include_domains"] = include_domains
136
+
137
+ if exclude_domains:
138
+ search_params["exclude_domains"] = exclude_domains
139
+
140
+ # Execute search
141
+ response = client.search(**search_params)
142
+
143
+ response_time_ms = int((time.time() - start_time) * 1000)
144
+
145
+ # Parse results
146
+ results = []
147
+ for result in response.get("results", []):
148
+ results.append(
149
+ SearchResult(
150
+ title=result.get("title", ""),
151
+ url=result.get("url", ""),
152
+ content=result.get("content", ""),
153
+ score=result.get("score", 0.0),
154
+ )
155
+ )
156
+
157
+ return SearchResponse(
158
+ query=query,
159
+ results=results,
160
+ answer=response.get("answer"),
161
+ response_time_ms=response_time_ms,
162
+ search_depth=search_depth,
163
+ images=response.get("images", []),
164
+ )
165
+
166
+ def search_threat_intel(
167
+ self,
168
+ topic: str,
169
+ technique: Optional[str] = None,
170
+ search_depth: str = "advanced",
171
+ ) -> SearchResponse:
172
+ """Search with security-focused parameters.
173
+
174
+ Optimized for threat hunting research with:
175
+ - Security-focused domain filtering
176
+ - Advanced search depth by default
177
+ - AI-generated answer summary
178
+
179
+ Args:
180
+ topic: Research topic (e.g., "LSASS memory dumping")
181
+ technique: Optional MITRE ATT&CK technique (e.g., "T1003.001")
182
+ search_depth: Search depth ("basic" or "advanced")
183
+
184
+ Returns:
185
+ SearchResponse with security-focused results
186
+ """
187
+ # Build security-focused query
188
+ query = f"{topic} threat hunting detection"
189
+ if technique:
190
+ query += f" MITRE ATT&CK {technique}"
191
+
192
+ return self.search(
193
+ query=query,
194
+ search_depth=search_depth,
195
+ include_domains=self.SECURITY_DOMAINS,
196
+ include_answer=True,
197
+ )
198
+
199
+ def search_system_internals(
200
+ self,
201
+ topic: str,
202
+ search_depth: str = "advanced",
203
+ ) -> SearchResponse:
204
+ """Search for system/technology internals.
205
+
206
+ Focused on understanding how systems work normally,
207
+ useful for the "System Research" skill.
208
+
209
+ Args:
210
+ topic: Technology/system topic (e.g., "LSASS", "Windows Authentication")
211
+ search_depth: Search depth ("basic" or "advanced")
212
+
213
+ Returns:
214
+ SearchResponse with technical documentation
215
+ """
216
+ query = f"{topic} how it works internals documentation"
217
+
218
+ # Focus on technical documentation sources
219
+ technical_domains = [
220
+ "microsoft.com",
221
+ "learn.microsoft.com",
222
+ "docs.microsoft.com",
223
+ "developer.apple.com",
224
+ "man7.org",
225
+ "linux.die.net",
226
+ "kernel.org",
227
+ "aws.amazon.com",
228
+ "docs.aws.amazon.com",
229
+ "cloud.google.com",
230
+ "en.wikipedia.org",
231
+ ]
232
+
233
+ return self.search(
234
+ query=query,
235
+ search_depth=search_depth,
236
+ include_domains=technical_domains,
237
+ include_answer=True,
238
+ )
239
+
240
+ def search_adversary_tradecraft(
241
+ self,
242
+ topic: str,
243
+ technique: Optional[str] = None,
244
+ search_depth: str = "advanced",
245
+ ) -> SearchResponse:
246
+ """Search for adversary tradecraft and attack techniques.
247
+
248
+ Focused on how adversaries abuse systems,
249
+ useful for the "Adversary Tradecraft" skill.
250
+
251
+ Args:
252
+ topic: Attack topic (e.g., "credential dumping", "lateral movement")
253
+ technique: Optional MITRE ATT&CK technique
254
+ search_depth: Search depth ("basic" or "advanced")
255
+
256
+ Returns:
257
+ SearchResponse with adversary technique information
258
+ """
259
+ query = f"{topic} adversary technique attack method"
260
+ if technique:
261
+ query += f" {technique}"
262
+
263
+ # Focus on threat intelligence sources
264
+ threat_intel_domains = [
265
+ "attack.mitre.org",
266
+ "thedfirreport.com",
267
+ "mandiant.com",
268
+ "crowdstrike.com",
269
+ "unit42.paloaltonetworks.com",
270
+ "blog.talosintelligence.com",
271
+ "securelist.com",
272
+ "redcanary.com",
273
+ "elastic.co",
274
+ "atomicredteam.io",
275
+ "lolbas-project.github.io",
276
+ "gtfobins.github.io",
277
+ ]
278
+
279
+ return self.search(
280
+ query=query,
281
+ search_depth=search_depth,
282
+ include_domains=threat_intel_domains,
283
+ include_answer=True,
284
+ )
285
+
286
+ def search_detection_methods(
287
+ self,
288
+ topic: str,
289
+ technique: Optional[str] = None,
290
+ search_depth: str = "advanced",
291
+ ) -> SearchResponse:
292
+ """Search for detection methods and analytics.
293
+
294
+ Focused on how to detect specific behaviors,
295
+ useful for detection engineering.
296
+
297
+ Args:
298
+ topic: Detection topic (e.g., "LSASS access detection")
299
+ technique: Optional MITRE ATT&CK technique
300
+ search_depth: Search depth ("basic" or "advanced")
301
+
302
+ Returns:
303
+ SearchResponse with detection method information
304
+ """
305
+ query = f"{topic} detection rule query sigma"
306
+ if technique:
307
+ query += f" {technique}"
308
+
309
+ # Focus on detection and SIEM sources
310
+ detection_domains = [
311
+ "github.com",
312
+ "elastic.co",
313
+ "splunk.com",
314
+ "microsoft.com",
315
+ "redcanary.com",
316
+ "sigma-hq.github.io",
317
+ "detection.fyi",
318
+ ]
319
+
320
+ return self.search(
321
+ query=query,
322
+ search_depth=search_depth,
323
+ include_domains=detection_domains,
324
+ include_answer=True,
325
+ )
326
+
327
+
328
+ def create_search_client(api_key: Optional[str] = None) -> Optional[TavilySearchClient]:
329
+ """Create a Tavily search client if API key is available.
330
+
331
+ Args:
332
+ api_key: Optional API key (defaults to TAVILY_API_KEY env var)
333
+
334
+ Returns:
335
+ TavilySearchClient if API key is available, None otherwise
336
+ """
337
+ key = api_key or os.getenv("TAVILY_API_KEY")
338
+ if not key:
339
+ return None
340
+ return TavilySearchClient(api_key=key)
athf/data/__init__.py ADDED
@@ -0,0 +1,19 @@
1
+ """ATHF reference data and templates."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ if sys.version_info >= (3, 9):
7
+ from importlib.resources import files
8
+ else:
9
+ from importlib_resources import files # type: ignore[import-not-found,no-redef]
10
+
11
+
12
+ def get_data_path() -> Path:
13
+ """Get the path to ATHF data directory.
14
+
15
+ Returns:
16
+ Path to the athf/data directory containing templates, knowledge,
17
+ prompts, hunts, docs, and integrations.
18
+ """
19
+ return Path(str(files("athf.data")))
@@ -0,0 +1,147 @@
1
+ # Changelog
2
+
3
+ All notable changes to the Agentic Threat Hunting Framework (ATHF) will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Added
11
+ - None
12
+
13
+ ### Changed
14
+ - None
15
+
16
+ ### Deprecated
17
+ - None
18
+
19
+ ### Removed
20
+ - None
21
+
22
+ ### Fixed
23
+ - None
24
+
25
+ ### Security
26
+ - None
27
+
28
+ ## [0.2.2] - 2024-12-17
29
+
30
+ ### Fixed
31
+ - Type errors in `athf/core/attack_matrix.py` (added TypedDict for proper mypy checking)
32
+ - Python 3.8 compatibility: `list[str]` → `List[str]` in `athf/core/attack_matrix.py`
33
+ - Python 3.8 compatibility: `tuple[...]` → `Tuple[...]` in `athf/core/investigation_parser.py`
34
+ - Python 3.8 compatibility: `tuple[...]`, `list[str]` → `Tuple[...]`, `List[str]` in `athf/commands/investigate.py`
35
+ - Python 3.8 compatibility: `set[str]` → `Set[str]` in `athf/core/hunt_manager.py`
36
+ - Python 3.8 compatibility: `int | str` → `Union[int, str]` in `athf/commands/env.py`
37
+ - Windows UTF-8 encoding errors in `athf/commands/context.py` (3 instances) and `athf/commands/similar.py` (2 instances)
38
+ - Test assertion errors in `tests/commands/test_env.py` for env info and activate commands
39
+ - Mypy unused-ignore errors in `athf/commands/similar.py` (sklearn imports handled by --ignore-missing-imports flag)
40
+ - CI/CD pipeline errors blocking builds on Python 3.8-3.12 across all platforms
41
+
42
+ ## [0.2.1] - 2024-12-17
43
+
44
+ ### Fixed
45
+ - Type errors in `athf/core/attack_matrix.py` (added TypedDict for proper mypy checking)
46
+ - Python 3.8 compatibility: `list[str]` → `List[str]` in `athf/core/attack_matrix.py`
47
+ - Python 3.8 compatibility: `tuple[...]` → `Tuple[...]` in `athf/core/investigation_parser.py`
48
+ - Python 3.8 compatibility: `tuple[...]`, `list[str]` → `Tuple[...]`, `List[str]` in `athf/commands/investigate.py`
49
+ - Python 3.8 compatibility: `set[str]` → `Set[str]` in `athf/core/hunt_manager.py`
50
+ - Python 3.8 compatibility: `int | str` → `Union[int, str]` in `athf/commands/env.py`
51
+ - Windows UTF-8 encoding errors in `athf/commands/context.py` (3 instances) and `athf/commands/similar.py` (2 instances)
52
+ - Test assertion errors in `tests/commands/test_env.py` for env info and activate commands
53
+ - Mypy unused-ignore errors in `athf/commands/similar.py` (sklearn imports handled by --ignore-missing-imports flag)
54
+ - CI/CD pipeline errors blocking builds on Python 3.8-3.12 across all platforms
55
+
56
+ ## [0.2.0] - 2024-12-17
57
+
58
+ ### Added
59
+ - **CLI Commands**
60
+ - `athf context` - AI-optimized context loading (replaces ~5 Read operations, 75% token savings)
61
+ - `athf env` - Environment setup and management (setup, info, activate, clean)
62
+ - `athf investigate` - Investigation workflow for exploratory work (separate from hunt metrics)
63
+ - `athf similar` - Semantic search for similar hunts using scikit-learn embeddings
64
+ - **Core Modules**
65
+ - `athf/core/attack_matrix.py` - MITRE ATT&CK coverage tracking and analysis
66
+ - `athf/core/investigation_parser.py` - Parser for I-XXXX investigation files
67
+ - **Testing Infrastructure**
68
+ - Comprehensive test suite for all new commands (tests/commands/)
69
+ - Command-specific test modules (test_context.py, test_env.py, test_similar.py)
70
+ - Integration tests for multi-command workflows
71
+ - **Rich Content CLI Flags**
72
+ - `--hypothesis`, `--threat-context`, `--actor`, `--behavior`, `--location`, `--evidence`
73
+ - Enable fully-populated hunt files via single CLI command
74
+ - AI-friendly one-liner hunt creation without manual editing
75
+
76
+ ### Changed
77
+ - Enhanced `athf hunt` command with investigation integration
78
+ - Updated CLI help system with improved command descriptions
79
+ - Improved context bundling for AI workflows (structured JSON/YAML output)
80
+ - Updated documentation to reflect new commands and workflows
81
+
82
+ ### Fixed
83
+ - Python 3.8 compatibility issues
84
+ - Testing framework stability improvements
85
+
86
+ ## [0.1.0] - 2024-12-10
87
+
88
+ ### Added
89
+ - Initial ATHF framework documentation
90
+ - LOCK pattern (Learn, Observe, Check, Keep)
91
+ - 5-level maturity model
92
+ - USING_ATHF.md adoption guide
93
+ - INSTALL.md installation guide
94
+ - Example hunt implementations
95
+ - H-0001: macOS Data Collection via AppleScript Detection
96
+ - H-0002: Linux Crontab Persistence Detection
97
+ - H-0003: AWS Lambda Persistence Detection
98
+ - Templates
99
+ - HUNT_LOCK.md template
100
+ - Query templates for Splunk, KQL, Elastic
101
+ - Documentation
102
+ - README.md with visual enhancements
103
+ - SHOWCASE.md with real results
104
+ - docs/CLI_REFERENCE.md (planned for CLI implementation)
105
+ - Knowledge base
106
+ - hunting-knowledge.md expert hunting frameworks
107
+ - AGENTS.md AI assistant instructions
108
+ - environment.md template
109
+ - Integration guides
110
+ - MCP_CATALOG.md for tool integrations
111
+ - SIEM integration examples
112
+ - EDR integration examples
113
+
114
+ ### Philosophy
115
+ - Framework-first approach: "Structure over software, adapt to your environment"
116
+ - Document-first methodology: Works with markdown, git, and AI assistants
117
+ - Optional tooling: CLI enhances but doesn't replace core workflow
118
+ - Progression-minded: Start simple, scale when complexity demands it
119
+
120
+ ---
121
+
122
+ ## Version History
123
+
124
+ **Legend:**
125
+ - `[Unreleased]` - Changes in development
126
+ - `[X.Y.Z]` - Released versions
127
+
128
+ **Version Format:**
129
+ - `X` - Major version (breaking changes)
130
+ - `Y` - Minor version (new features, backward compatible)
131
+ - `Z` - Patch version (bug fixes, backward compatible)
132
+
133
+ **Change Categories:**
134
+ - `Added` - New features
135
+ - `Changed` - Changes to existing functionality
136
+ - `Deprecated` - Soon-to-be removed features
137
+ - `Removed` - Removed features
138
+ - `Fixed` - Bug fixes
139
+ - `Security` - Security improvements
140
+
141
+ ---
142
+
143
+ ## Contribution Notes
144
+
145
+ ATHF is a framework to internalize, not a platform to extend. However, if you've adapted ATHF in interesting ways or have feedback, we'd love to hear about it in [GitHub Discussions](https://github.com/Nebulock-Inc/agentic-threat-hunting-framework/discussions).
146
+
147
+ For more on the philosophy, see [USING_ATHF.md](../../../USING_ATHF.md).