agentic-threat-hunting-framework 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
athf/commands/init.py ADDED
@@ -0,0 +1,411 @@
1
+ """Initialize ATHF directory structure."""
2
+
3
+ from pathlib import Path
4
+
5
+ import click
6
+ import yaml
7
+ from rich.console import Console
8
+ from rich.prompt import Confirm, Prompt
9
+
10
+ console = Console()
11
+
12
+
13
+ @click.command()
14
+ @click.option("--path", default=".", help="Directory to initialize ATHF in")
15
+ @click.option("--non-interactive", is_flag=True, help="Skip interactive prompts")
16
+ def init(path: str, non_interactive: bool) -> None:
17
+ """Initialize a new ATHF threat hunting workspace.
18
+
19
+ \b
20
+ Creates directory structure:
21
+ config/ Configuration files
22
+ hunts/ Hunt hypothesis cards
23
+ queries/ Reusable query library
24
+ runs/ Hunt execution results
25
+ templates/ Hunt templates (LOCK pattern)
26
+ knowledge/ Domain expertise and frameworks
27
+ prompts/ AI workflow prompts
28
+ integrations/ Tool integration configs
29
+ docs/ Documentation
30
+
31
+ \b
32
+ Generates files:
33
+ • config/.athfconfig.yaml (workspace configuration)
34
+ • AGENTS.md (AI assistant context)
35
+ • templates/HUNT_LOCK.md (hunt template)
36
+
37
+ \b
38
+ Examples:
39
+ # Interactive setup (recommended for first time)
40
+ athf init
41
+
42
+ # Non-interactive with defaults
43
+ athf init --non-interactive
44
+
45
+ # Initialize in specific directory
46
+ athf init --path /path/to/workspace
47
+
48
+ \b
49
+ Interactive setup will ask you:
50
+ 1. Workspace name (default: directory name)
51
+ 2. SIEM platform (Splunk, Sentinel, Elastic, etc.)
52
+ 3. EDR platform (CrowdStrike, SentinelOne, etc.)
53
+ 4. Hunt ID prefix (default: H-)
54
+ 5. Hunt retention period (default: 365 days)
55
+
56
+ \b
57
+ After initialization:
58
+ 1. Customize AGENTS.md with your environment details
59
+ 2. Add data sources to config/.athfconfig.yaml
60
+ 3. Create your first hunt: athf hunt new
61
+ """
62
+ base_path = Path(path).resolve()
63
+
64
+ # Check if already initialized (check both old and new locations)
65
+ old_config_path = base_path / ".athfconfig.yaml"
66
+ new_config_path = base_path / "config" / ".athfconfig.yaml"
67
+
68
+ if (old_config_path.exists() or new_config_path.exists()) and not Confirm.ask(
69
+ f"ATHF already initialized in {base_path}. Reinitialize?", default=False
70
+ ):
71
+ console.print("[yellow]Initialization cancelled.[/yellow]")
72
+ return
73
+
74
+ config_path = new_config_path
75
+
76
+ console.print("\n[bold cyan]🎯 Initializing Agentic Threat Hunting Framework[/bold cyan]\n")
77
+
78
+ # Gather configuration
79
+ if non_interactive:
80
+ config = _default_config(base_path)
81
+ else:
82
+ config = _interactive_config(base_path)
83
+
84
+ # Create directory structure
85
+ directories = ["config", "hunts", "queries", "runs", "templates", "knowledge", "prompts", "integrations", "docs"]
86
+
87
+ console.print("\n[bold]Creating directory structure...[/bold]")
88
+ for dir_name in directories:
89
+ dir_path = base_path / dir_name
90
+ dir_path.mkdir(exist_ok=True)
91
+ console.print(f" ✓ Created [cyan]{dir_name}/[/cyan]")
92
+
93
+ # Save configuration
94
+ with open(config_path, "w", encoding="utf-8") as f:
95
+ yaml.dump(config, f, default_flow_style=False, sort_keys=False)
96
+ console.print(" ✓ Created [cyan]config/.athfconfig.yaml[/cyan]")
97
+
98
+ # Create AGENTS.md if it doesn't exist
99
+ agents_path = base_path / "AGENTS.md"
100
+ if not agents_path.exists():
101
+ _create_agents_file(agents_path, config)
102
+ console.print(" ✓ Created [cyan]AGENTS.md[/cyan]")
103
+
104
+ # Copy templates if they don't exist
105
+ templates_path = base_path / "templates"
106
+ if not (templates_path / "HUNT_LOCK.md").exists():
107
+ _create_hunt_template(templates_path / "HUNT_LOCK.md")
108
+ console.print(" ✓ Created [cyan]templates/HUNT_LOCK.md[/cyan]")
109
+
110
+ console.print("\n[bold green]✅ ATHF initialized successfully![/bold green]")
111
+ console.print("\n[bold]Next steps:[/bold]")
112
+ console.print(" 1. Customize [cyan]AGENTS.md[/cyan] with your environment details")
113
+ console.print(" 2. Create your first hunt: [cyan]athf hunt new[/cyan]")
114
+ console.print(" 3. Check out the docs at [cyan]docs/getting-started.md[/cyan]")
115
+
116
+
117
+ def _default_config(base_path: Path) -> dict:
118
+ """Return default configuration."""
119
+ return {
120
+ "workspace_name": base_path.name,
121
+ "hunt_prefix": "H-",
122
+ "siem": "Splunk",
123
+ "edr": "CrowdStrike",
124
+ "query_language": "SPL",
125
+ "hunt_retention_days": 365,
126
+ }
127
+
128
+
129
+ def _interactive_config(base_path: Path) -> dict:
130
+ """Gather configuration interactively."""
131
+ console.print("[bold]📋 Quick setup questions:[/bold]")
132
+
133
+ config: dict = {}
134
+
135
+ # Workspace name
136
+ workspace_name = Prompt.ask(
137
+ "1. Workspace name (e.g., 'Production Hunts', 'Client-Acme', 'SOC Team')", default=base_path.name
138
+ )
139
+ config["workspace_name"] = workspace_name
140
+
141
+ # SIEM
142
+ siem = Prompt.ask(
143
+ "2. What SIEM do you use?", choices=["Splunk", "Sentinel", "Elastic", "Chronicle", "Other"], default="Splunk"
144
+ )
145
+ config["siem"] = siem
146
+
147
+ # Query language mapping
148
+ query_lang_map = {"Splunk": "SPL", "Sentinel": "KQL", "Elastic": "Lucene", "Chronicle": "YARA-L", "Other": "Custom"}
149
+ config["query_language"] = query_lang_map.get(siem, "SPL")
150
+
151
+ # EDR
152
+ edr = Prompt.ask(
153
+ "3. What's your primary EDR?",
154
+ choices=["CrowdStrike", "SentinelOne", "Defender", "Carbon Black", "Other"],
155
+ default="CrowdStrike",
156
+ )
157
+ config["edr"] = edr
158
+
159
+ # Hunt prefix
160
+ hunt_prefix = Prompt.ask("4. Hunt ID prefix (e.g., H-, HUNT-)", default="H-")
161
+ config["hunt_prefix"] = hunt_prefix
162
+
163
+ # Retention
164
+ retention = Prompt.ask("5. Hunt retention (days)", default="365")
165
+ config["hunt_retention_days"] = int(retention) if isinstance(retention, str) else retention
166
+
167
+ return config
168
+
169
+
170
+ def _create_agents_file(path: Path, config: dict) -> None:
171
+ """Create AGENTS.md file with configuration."""
172
+ content = f"""# ATHF Agent Context
173
+
174
+ **Workspace:** {config['workspace_name']}
175
+
176
+ This file provides context to AI assistants about your threat hunting environment.
177
+
178
+ ## Data Sources
179
+
180
+ ### SIEM / Log Aggregation
181
+ - **Platform:** {config['siem']}
182
+ - **Query Language:** {config['query_language']}
183
+ - **Indexes:** [Add your indexes here]
184
+ - **Retention:** 90 days
185
+ - **Access:** [Add access method]
186
+
187
+ ### EDR / Endpoint Security
188
+ - **Platform:** {config['edr']}
189
+ - **Telemetry:** Process execution, network connections, file modifications
190
+ - **Query Access:** [Add query method]
191
+
192
+ ### Other Data Sources
193
+ [Add additional data sources]
194
+
195
+ ## Technology Stack
196
+
197
+ ### Security Tools
198
+ - SIEM: {config['siem']}
199
+ - EDR: {config['edr']}
200
+ - [Add more tools]
201
+
202
+ ### Cloud Platforms
203
+ [Add cloud platforms if applicable]
204
+
205
+ ## Known Visibility Gaps
206
+
207
+ Document what you can't see:
208
+ - [Add visibility gaps]
209
+
210
+ ## Hunt Numbering Convention
211
+
212
+ - **Prefix:** {config['hunt_prefix']}
213
+ - **Format:** {config['hunt_prefix']}XXXX (e.g., {config['hunt_prefix']}0001)
214
+ - **Retention:** {config['hunt_retention_days']} days
215
+
216
+ ## Team Context
217
+
218
+ [Add information about your team, shift coverage, escalation procedures]
219
+ """
220
+
221
+ with open(path, "w", encoding="utf-8") as f:
222
+ f.write(content)
223
+
224
+
225
+ def _create_hunt_template(path: Path) -> None:
226
+ """Create hunt template file."""
227
+ content = """---
228
+ hunt_id: H-XXXX
229
+ title: [Hunt Title]
230
+ status: planning
231
+ date: YYYY-MM-DD
232
+ hunter: [Your Name]
233
+ platform: [Windows/Linux/macOS/Cloud]
234
+ tactics: [persistence, credential-access, etc.]
235
+ techniques: [T1003.001, T1005, etc.]
236
+ data_sources: [SIEM, EDR, etc.]
237
+ related_hunts: []
238
+ findings_count: 0
239
+ true_positives: 0
240
+ false_positives: 0
241
+ customer_deliverables: []
242
+ tags: []
243
+ ---
244
+
245
+ # H-XXXX: [Hunt Title]
246
+
247
+ **Hunt Metadata**
248
+
249
+ - **Date:** YYYY-MM-DD
250
+ - **Hunter:** [Your Name]
251
+ - **Status:** Planning
252
+ - **MITRE ATT&CK:** [Primary Technique]
253
+
254
+ ---
255
+
256
+ ## LEARN: Prepare the Hunt
257
+
258
+ ### Hypothesis Statement
259
+
260
+ [What behavior are you looking for? What will you observe if the hypothesis is true?]
261
+
262
+ ### Threat Context
263
+
264
+ [What threat actor/malware/TTP motivates this hunt?]
265
+
266
+ ### ABLE Scoping
267
+
268
+ | **Field** | **Your Input** |
269
+ |-------------|----------------|
270
+ | **Actor** *(Optional)* | [Threat actor or malware family] |
271
+ | **Behavior** | [TTP or behavior pattern] |
272
+ | **Location** | [Systems, networks, or environments to hunt] |
273
+ | **Evidence** | [Data sources and key fields to examine] |
274
+
275
+ ### Threat Intel & Research
276
+
277
+ - **MITRE ATT&CK Techniques:** [List relevant techniques]
278
+ - **CTI Sources & References:** [Links to reports, blogs, etc.]
279
+
280
+ ### Related Tickets
281
+
282
+ | **Team** | **Ticket/Details** |
283
+ |----------|-------------------|
284
+ | **SOC/IR** | [Ticket numbers or N/A] |
285
+
286
+ ---
287
+
288
+ ## OBSERVE: Expected Behaviors
289
+
290
+ ### What Normal Looks Like
291
+
292
+ [Describe legitimate activity that should not trigger alerts]
293
+
294
+ ### What Suspicious Looks Like
295
+
296
+ [Describe adversary behavior patterns to hunt for]
297
+
298
+ ### Expected Observables
299
+
300
+ - **Processes:** [Process names, command lines]
301
+ - **Network:** [Connections, protocols, domains]
302
+ - **Files:** [File paths, extensions, sizes]
303
+ - **Registry:** [Registry keys if applicable]
304
+ - **Authentication:** [Login patterns if applicable]
305
+
306
+ ---
307
+
308
+ ## CHECK: Execute & Analyze
309
+
310
+ ### Data Source Information
311
+
312
+ - **Index/Data Source:** [SIEM index or data source]
313
+ - **Time Range:** [Date range for hunt]
314
+ - **Events Analyzed:** [Approximate count]
315
+ - **Data Quality:** [Assessment of data completeness]
316
+
317
+ ### Hunting Queries
318
+
319
+ #### Initial Query
320
+
321
+ ```
322
+ [Your initial query]
323
+ ```
324
+
325
+ **Query Notes:**
326
+ - [What did this query return?]
327
+ - [What worked? What didn't?]
328
+
329
+ #### Refined Query
330
+
331
+ ```
332
+ [Your refined query after iterations]
333
+ ```
334
+
335
+ **Refinement Rationale:**
336
+ - [Why did you change the query?]
337
+ - [What improvements were made?]
338
+
339
+ ### Visualization & Analytics
340
+
341
+ [Describe any visualizations, timelines, or statistical analysis]
342
+
343
+ ### Query Performance
344
+
345
+ **What Worked Well:**
346
+ - [Effective filters or techniques]
347
+
348
+ **What Didn't Work:**
349
+ - [Challenges or limitations]
350
+
351
+ **Iterations Made:**
352
+ - [Document query evolution]
353
+
354
+ ---
355
+
356
+ ## KEEP: Findings & Response
357
+
358
+ ### Executive Summary
359
+
360
+ [Concise summary of hunt results and key findings]
361
+
362
+ ### Findings
363
+
364
+ | **Finding** | **Ticket** | **Description** |
365
+ |-------------|-----------|-----------------|
366
+ | True Positive | [Ticket] | [Description] |
367
+ | False Positive | N/A | [Description] |
368
+
369
+ **True Positives:** [Count]
370
+ **False Positives:** [Count]
371
+
372
+ ### Detection Logic
373
+
374
+ **Automation Opportunity:**
375
+
376
+ [Can this hunt become an automated detection rule?]
377
+
378
+ **Proposed Detection:**
379
+
380
+ ```
381
+ [Detection rule if applicable]
382
+ ```
383
+
384
+ ### Lessons Learned
385
+
386
+ **What Worked Well:**
387
+ - [Successes]
388
+
389
+ **What Could Be Improved:**
390
+ - [Areas for improvement]
391
+
392
+ **Telemetry Gaps Identified:**
393
+ - [Missing data sources or visibility gaps]
394
+
395
+ ### Follow-up Actions
396
+
397
+ - [ ] [Action item 1]
398
+ - [ ] [Action item 2]
399
+
400
+ ### Follow-up Hunts
401
+
402
+ - [Related hunt ideas for future investigation]
403
+
404
+ ---
405
+
406
+ **Hunt Completed:** YYYY-MM-DD
407
+ **Next Review:** [Date for recurring hunt if applicable]
408
+ """
409
+
410
+ with open(path, "w", encoding="utf-8") as f:
411
+ f.write(content)
athf/core/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Core ATHF functionality."""
@@ -0,0 +1,245 @@
1
+ """Manage hunt files and operations."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+ from typing import Dict, List, Optional
6
+
7
+ from athf.core.hunt_parser import parse_hunt_file
8
+
9
+
10
+ class HuntManager:
11
+ """Manage hunt files and operations."""
12
+
13
+ def __init__(self, hunts_dir: Optional[Path] = None):
14
+ """Initialize hunt manager.
15
+
16
+ Args:
17
+ hunts_dir: Directory containing hunt files (default: ./hunts)
18
+ """
19
+ self.hunts_dir = Path(hunts_dir) if hunts_dir else Path.cwd() / "hunts"
20
+
21
+ if not self.hunts_dir.exists():
22
+ self.hunts_dir.mkdir(parents=True, exist_ok=True)
23
+
24
+ def list_hunts(
25
+ self,
26
+ status: Optional[str] = None,
27
+ tactic: Optional[str] = None,
28
+ technique: Optional[str] = None,
29
+ platform: Optional[str] = None,
30
+ ) -> List[Dict]:
31
+ """List all hunts with optional filters.
32
+
33
+ Args:
34
+ status: Filter by status (planning, active, completed, etc.)
35
+ tactic: Filter by MITRE tactic
36
+ technique: Filter by MITRE technique (e.g., T1003.001)
37
+ platform: Filter by platform (Windows, Linux, macOS, Cloud)
38
+
39
+ Returns:
40
+ List of hunt metadata dicts
41
+ """
42
+ hunts = []
43
+
44
+ # Find all hunt files
45
+ hunt_files = sorted(self.hunts_dir.glob("*.md"))
46
+
47
+ for hunt_file in hunt_files:
48
+ try:
49
+ hunt_data = parse_hunt_file(hunt_file)
50
+ frontmatter = hunt_data.get("frontmatter", {})
51
+
52
+ # Apply filters
53
+ if status and frontmatter.get("status") != status:
54
+ continue
55
+
56
+ if tactic and tactic not in frontmatter.get("tactics", []):
57
+ continue
58
+
59
+ if technique and technique not in frontmatter.get("techniques", []):
60
+ continue
61
+
62
+ if platform and platform not in frontmatter.get("platform", []):
63
+ continue
64
+
65
+ # Extract summary info
66
+ date_val = frontmatter.get("date")
67
+ # Convert date objects to strings for JSON serialization
68
+ if hasattr(date_val, "isoformat"):
69
+ date_str = date_val.isoformat()
70
+ else:
71
+ date_str = str(date_val) if date_val else None
72
+
73
+ hunts.append(
74
+ {
75
+ "hunt_id": frontmatter.get("hunt_id"),
76
+ "title": frontmatter.get("title"),
77
+ "status": frontmatter.get("status"),
78
+ "date": date_str,
79
+ "platform": frontmatter.get("platform", []),
80
+ "tactics": frontmatter.get("tactics", []),
81
+ "techniques": frontmatter.get("techniques", []),
82
+ "findings_count": frontmatter.get("findings_count", 0),
83
+ "true_positives": frontmatter.get("true_positives", 0),
84
+ "false_positives": frontmatter.get("false_positives", 0),
85
+ "file_path": str(hunt_file),
86
+ }
87
+ )
88
+
89
+ except Exception:
90
+ # Skip files that can't be parsed
91
+ continue
92
+
93
+ return hunts
94
+
95
+ def get_hunt(self, hunt_id: str) -> Optional[Dict]:
96
+ """Get a specific hunt by ID.
97
+
98
+ Args:
99
+ hunt_id: Hunt ID (e.g., H-0001)
100
+
101
+ Returns:
102
+ Hunt data dict or None if not found
103
+ """
104
+ hunt_file = self.hunts_dir / f"{hunt_id}.md"
105
+
106
+ if not hunt_file.exists():
107
+ return None
108
+
109
+ return parse_hunt_file(hunt_file)
110
+
111
+ def get_next_hunt_id(self, prefix: str = "H-") -> str:
112
+ """Calculate the next available hunt ID.
113
+
114
+ Args:
115
+ prefix: Hunt ID prefix (default: H-)
116
+
117
+ Returns:
118
+ Next hunt ID (e.g., H-0023)
119
+ """
120
+ hunts = self.list_hunts()
121
+
122
+ if not hunts:
123
+ return f"{prefix}0001"
124
+
125
+ # Extract numbers from hunt IDs with matching prefix
126
+ numbers = []
127
+ pattern = re.compile(rf"^{re.escape(prefix)}(\d+)$")
128
+
129
+ for hunt in hunts:
130
+ hunt_id = hunt.get("hunt_id")
131
+ if not hunt_id or not isinstance(hunt_id, str):
132
+ continue
133
+ match = pattern.match(hunt_id)
134
+ if match:
135
+ numbers.append(int(match.group(1)))
136
+
137
+ if not numbers:
138
+ return f"{prefix}0001"
139
+
140
+ # Next number with zero-padding
141
+ next_num = max(numbers) + 1
142
+ return f"{prefix}{next_num:04d}"
143
+
144
+ def search_hunts(self, query: str) -> List[Dict]:
145
+ """Full-text search across all hunt files.
146
+
147
+ Args:
148
+ query: Search query string
149
+
150
+ Returns:
151
+ List of matching hunts
152
+ """
153
+ results = []
154
+ query_lower = query.lower()
155
+
156
+ for hunt_file in self.hunts_dir.glob("*.md"):
157
+ try:
158
+ with open(hunt_file, "r", encoding="utf-8") as f:
159
+ content = f.read()
160
+
161
+ # Check if query appears in file
162
+ if query_lower in content.lower():
163
+ hunt_data = parse_hunt_file(hunt_file)
164
+ frontmatter = hunt_data.get("frontmatter", {})
165
+
166
+ results.append(
167
+ {
168
+ "hunt_id": frontmatter.get("hunt_id"),
169
+ "title": frontmatter.get("title"),
170
+ "status": frontmatter.get("status"),
171
+ "file_path": str(hunt_file),
172
+ }
173
+ )
174
+
175
+ except Exception:
176
+ continue
177
+
178
+ return results
179
+
180
+ def calculate_stats(self) -> Dict:
181
+ """Calculate hunt program statistics.
182
+
183
+ Returns:
184
+ Dict with success rates, TP/FP ratios, coverage metrics
185
+ """
186
+ hunts = self.list_hunts()
187
+
188
+ if not hunts:
189
+ return {
190
+ "total_hunts": 0,
191
+ "completed_hunts": 0,
192
+ "total_findings": 0,
193
+ "true_positives": 0,
194
+ "false_positives": 0,
195
+ "success_rate": 0.0,
196
+ "tp_fp_ratio": 0.0,
197
+ }
198
+
199
+ total_hunts = len(hunts)
200
+ completed_hunts = len([h for h in hunts if h.get("status") == "completed"])
201
+
202
+ total_findings = sum(h.get("findings_count", 0) for h in hunts)
203
+ total_tp = sum(h.get("true_positives", 0) for h in hunts)
204
+ total_fp = sum(h.get("false_positives", 0) for h in hunts)
205
+
206
+ # Calculate success rate (hunts with TP / completed hunts)
207
+ hunts_with_tp = len([h for h in hunts if h.get("true_positives", 0) > 0])
208
+ success_rate = (hunts_with_tp / completed_hunts * 100) if completed_hunts > 0 else 0.0
209
+
210
+ # Calculate TP/FP ratio
211
+ tp_fp_ratio = (total_tp / total_fp) if total_fp > 0 else float("inf")
212
+
213
+ return {
214
+ "total_hunts": total_hunts,
215
+ "completed_hunts": completed_hunts,
216
+ "total_findings": total_findings,
217
+ "true_positives": total_tp,
218
+ "false_positives": total_fp,
219
+ "success_rate": round(success_rate, 1),
220
+ "tp_fp_ratio": round(tp_fp_ratio, 2) if tp_fp_ratio != float("inf") else "∞",
221
+ }
222
+
223
+ def calculate_attack_coverage(self) -> Dict[str, List[str]]:
224
+ """Calculate MITRE ATT&CK technique coverage.
225
+
226
+ Returns:
227
+ Dict mapping tactics to lists of covered techniques
228
+ """
229
+ hunts = self.list_hunts()
230
+
231
+ coverage: Dict = {}
232
+
233
+ for hunt in hunts:
234
+ tactics = hunt.get("tactics", [])
235
+ techniques = hunt.get("techniques", [])
236
+
237
+ for tactic in tactics:
238
+ if tactic not in coverage:
239
+ coverage[tactic] = set()
240
+
241
+ for technique in techniques:
242
+ coverage[tactic].add(technique)
243
+
244
+ # Convert sets to sorted lists
245
+ return {tactic: sorted(list(techniques)) for tactic, techniques in coverage.items()}