agentic-threat-hunting-framework 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,169 @@
1
+ """Parse hunt files (YAML frontmatter + markdown)."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+ from typing import Dict, List, Tuple
6
+
7
+ import yaml
8
+
9
+
10
+ class HuntParser:
11
+ """Parser for ATHF hunt files."""
12
+
13
+ def __init__(self, file_path: Path):
14
+ """Initialize parser with hunt file path."""
15
+ self.file_path = Path(file_path)
16
+ self.frontmatter: Dict = {}
17
+ self.content = ""
18
+ self.lock_sections: Dict = {}
19
+
20
+ def parse(self) -> Dict:
21
+ """Parse hunt file and return structured data.
22
+
23
+ Returns:
24
+ Dict containing frontmatter, content, and LOCK sections
25
+ """
26
+ if not self.file_path.exists():
27
+ raise FileNotFoundError(f"Hunt file not found: {self.file_path}")
28
+
29
+ with open(self.file_path, "r", encoding="utf-8") as f:
30
+ content = f.read()
31
+
32
+ # Parse YAML frontmatter
33
+ self.frontmatter = self._parse_frontmatter(content)
34
+
35
+ # Extract main content (after frontmatter)
36
+ self.content = self._extract_content(content)
37
+
38
+ # Parse LOCK sections
39
+ self.lock_sections = self._parse_lock_sections(self.content)
40
+
41
+ return {
42
+ "file_path": str(self.file_path),
43
+ "hunt_id": self.frontmatter.get("hunt_id"),
44
+ "frontmatter": self.frontmatter,
45
+ "content": self.content,
46
+ "lock_sections": self.lock_sections,
47
+ }
48
+
49
+ def _parse_frontmatter(self, content: str) -> Dict:
50
+ """Extract and parse YAML frontmatter.
51
+
52
+ Args:
53
+ content: Full file content
54
+
55
+ Returns:
56
+ Dict of frontmatter fields
57
+ """
58
+ # Match YAML frontmatter between --- delimiters
59
+ frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n"
60
+ match = re.match(frontmatter_pattern, content, re.DOTALL)
61
+
62
+ if not match:
63
+ return {}
64
+
65
+ frontmatter_text = match.group(1)
66
+
67
+ try:
68
+ return yaml.safe_load(frontmatter_text) or {}
69
+ except yaml.YAMLError as e:
70
+ raise ValueError(f"Invalid YAML frontmatter: {e}")
71
+
72
+ def _extract_content(self, content: str) -> str:
73
+ """Extract content after frontmatter.
74
+
75
+ Args:
76
+ content: Full file content
77
+
78
+ Returns:
79
+ Content after frontmatter
80
+ """
81
+ # Remove frontmatter
82
+ frontmatter_pattern = r"^---\s*\n.*?\n---\s*\n"
83
+ content_without_fm = re.sub(frontmatter_pattern, "", content, count=1, flags=re.DOTALL)
84
+
85
+ return content_without_fm.strip()
86
+
87
+ def _parse_lock_sections(self, content: str) -> Dict[str, str]:
88
+ """Parse LOCK pattern sections from content.
89
+
90
+ Args:
91
+ content: Hunt content (without frontmatter)
92
+
93
+ Returns:
94
+ Dict with keys: learn, observe, check, keep
95
+ """
96
+ sections = {}
97
+
98
+ # Define section patterns (case-insensitive)
99
+ section_patterns = {
100
+ "learn": r"##\s+LEARN[:\s].*?(?=##\s+OBSERVE|$)",
101
+ "observe": r"##\s+OBSERVE[:\s].*?(?=##\s+CHECK|$)",
102
+ "check": r"##\s+CHECK[:\s].*?(?=##\s+KEEP|$)",
103
+ "keep": r"##\s+KEEP[:\s].*?(?=##\s+[A-Z]|$)",
104
+ }
105
+
106
+ for section_name, pattern in section_patterns.items():
107
+ match = re.search(pattern, content, re.DOTALL | re.IGNORECASE)
108
+ if match:
109
+ sections[section_name] = match.group(0).strip()
110
+
111
+ return sections
112
+
113
+ def validate(self) -> Tuple[bool, List[str]]:
114
+ """Validate hunt structure.
115
+
116
+ Returns:
117
+ Tuple of (is_valid, list of error messages)
118
+ """
119
+ errors = []
120
+
121
+ # Check frontmatter exists
122
+ if not self.frontmatter:
123
+ errors.append("Missing YAML frontmatter")
124
+
125
+ # Check required frontmatter fields
126
+ required_fields = ["hunt_id", "title", "status", "date"]
127
+ for field in required_fields:
128
+ if field not in self.frontmatter:
129
+ errors.append(f"Missing required frontmatter field: {field}")
130
+
131
+ # Validate hunt_id format (e.g., H-0001)
132
+ hunt_id = self.frontmatter.get("hunt_id", "")
133
+ if hunt_id and not re.match(r"^[A-Z]+-\d+$", hunt_id):
134
+ errors.append(f"Invalid hunt_id format: {hunt_id} (expected format: H-0001)")
135
+
136
+ # Check LOCK sections present
137
+ lock_sections = ["learn", "observe", "check", "keep"]
138
+ for section in lock_sections:
139
+ if section not in self.lock_sections:
140
+ errors.append(f"Missing LOCK section: {section.upper()}")
141
+
142
+ return (len(errors) == 0, errors)
143
+
144
+
145
+ def parse_hunt_file(file_path: Path) -> Dict:
146
+ """Convenience function to parse a hunt file.
147
+
148
+ Args:
149
+ file_path: Path to hunt file
150
+
151
+ Returns:
152
+ Parsed hunt data
153
+ """
154
+ parser = HuntParser(file_path)
155
+ return parser.parse()
156
+
157
+
158
+ def validate_hunt_file(file_path: Path) -> Tuple[bool, List[str]]:
159
+ """Convenience function to validate a hunt file.
160
+
161
+ Args:
162
+ file_path: Path to hunt file
163
+
164
+ Returns:
165
+ Tuple of (is_valid, list of error messages)
166
+ """
167
+ parser = HuntParser(file_path)
168
+ parser.parse()
169
+ return parser.validate()
@@ -0,0 +1,224 @@
1
+ """Render hunt templates with metadata."""
2
+
3
+ from datetime import datetime
4
+ from typing import Optional
5
+
6
+ from jinja2 import Template
7
+
8
+ HUNT_TEMPLATE = """---
9
+ hunt_id: {{ hunt_id }}
10
+ title: {{ title }}
11
+ status: {{ status }}
12
+ date: {{ date }}
13
+ hunter: {{ hunter }}
14
+ platform: {{ platform }}
15
+ tactics: {{ tactics }}
16
+ techniques: {{ techniques }}
17
+ data_sources: {{ data_sources }}
18
+ related_hunts: []
19
+ findings_count: 0
20
+ true_positives: 0
21
+ false_positives: 0
22
+ customer_deliverables: []
23
+ tags: {{ tags }}
24
+ ---
25
+
26
+ # {{ hunt_id }}: {{ title }}
27
+
28
+ **Hunt Metadata**
29
+
30
+ - **Date:** {{ date }}
31
+ - **Hunter:** {{ hunter }}
32
+ - **Status:** {{ status }}
33
+ - **MITRE ATT&CK:** {{ techniques[0] if techniques else '[Primary Technique]' }}
34
+
35
+ ---
36
+
37
+ ## LEARN: Prepare the Hunt
38
+
39
+ ### Hypothesis Statement
40
+
41
+ {{ hypothesis if hypothesis else '[What behavior are you looking for? What will you observe if the hypothesis is true?]' }}
42
+
43
+ ### Threat Context
44
+
45
+ {{ threat_context if threat_context else '[What threat actor/malware/TTP motivates this hunt?]' }}
46
+
47
+ ### ABLE Scoping
48
+
49
+ | **Field** | **Your Input** |
50
+ |-------------|----------------|
51
+ | **Actor** *(Optional)* | {{ actor if actor else '[Threat actor or malware family]' }} |
52
+ | **Behavior** | {{ behavior if behavior else '[TTP or behavior pattern]' }} |
53
+ | **Location** | {{ location if location else '[Systems, networks, or environments to hunt]' }} |
54
+ | **Evidence** | {{ evidence if evidence else '[Data sources and key fields to examine]' }} |
55
+
56
+ ### Threat Intel & Research
57
+
58
+ - **MITRE ATT&CK Techniques:** {{ ', '.join(techniques) if techniques else '[List relevant techniques]' }}
59
+ - **CTI Sources & References:** [Links to reports, blogs, etc.]
60
+
61
+ ### Related Tickets
62
+
63
+ | **Team** | **Ticket/Details** |
64
+ |----------|-------------------|
65
+ | **SOC/IR** | [Ticket numbers or N/A] |
66
+
67
+ ---
68
+
69
+ ## OBSERVE: Expected Behaviors
70
+
71
+ ### What Normal Looks Like
72
+
73
+ [Describe legitimate activity that should not trigger alerts]
74
+
75
+ ### What Suspicious Looks Like
76
+
77
+ [Describe adversary behavior patterns to hunt for]
78
+
79
+ ### Expected Observables
80
+
81
+ - **Processes:** [Process names, command lines]
82
+ - **Network:** [Connections, protocols, domains]
83
+ - **Files:** [File paths, extensions, sizes]
84
+ - **Registry:** [Registry keys if applicable]
85
+ - **Authentication:** [Login patterns if applicable]
86
+
87
+ ---
88
+
89
+ ## CHECK: Execute & Analyze
90
+
91
+ ### Data Source Information
92
+
93
+ - **Index/Data Source:** {{ data_sources[0] if data_sources else '[SIEM index or data source]' }}
94
+ - **Time Range:** [Date range for hunt]
95
+ - **Events Analyzed:** [Approximate count]
96
+ - **Data Quality:** [Assessment of data completeness]
97
+
98
+ ### Hunting Queries
99
+
100
+ #### Initial Query
101
+
102
+ ```
103
+ [Your initial query]
104
+ ```
105
+
106
+ **Query Notes:**
107
+ - [What did this query return?]
108
+ - [What worked? What didn't?]
109
+
110
+ ### Query Performance
111
+
112
+ **What Worked Well:**
113
+ - [Effective filters or techniques]
114
+
115
+ **What Didn't Work:**
116
+ - [Challenges or limitations]
117
+
118
+ **Iterations Made:**
119
+ - [Document query evolution]
120
+
121
+ ---
122
+
123
+ ## KEEP: Findings & Response
124
+
125
+ ### Executive Summary
126
+
127
+ [Concise summary of hunt results and key findings]
128
+
129
+ ### Findings
130
+
131
+ | **Finding** | **Ticket** | **Description** |
132
+ |-------------|-----------|-----------------|
133
+ | [Type] | [Ticket] | [Description] |
134
+
135
+ **True Positives:** 0
136
+ **False Positives:** 0
137
+
138
+ ### Lessons Learned
139
+
140
+ **What Worked Well:**
141
+ - [Successes]
142
+
143
+ **What Could Be Improved:**
144
+ - [Areas for improvement]
145
+
146
+ **Telemetry Gaps Identified:**
147
+ - [Missing data sources or visibility gaps]
148
+
149
+ ### Follow-up Actions
150
+
151
+ - [ ] [Action item 1]
152
+ - [ ] [Action item 2]
153
+
154
+ ---
155
+
156
+ **Hunt Completed:** [Date]
157
+ **Next Review:** [Date for recurring hunt if applicable]
158
+ """
159
+
160
+
161
+ def render_hunt_template(
162
+ hunt_id: str,
163
+ title: str,
164
+ technique: Optional[str] = None,
165
+ tactics: Optional[list] = None,
166
+ platform: Optional[list] = None,
167
+ data_sources: Optional[list] = None,
168
+ hunter: str = "[Your Name]",
169
+ hypothesis: Optional[str] = None,
170
+ threat_context: Optional[str] = None,
171
+ actor: Optional[str] = None,
172
+ behavior: Optional[str] = None,
173
+ location: Optional[str] = None,
174
+ evidence: Optional[str] = None,
175
+ ) -> str:
176
+ """Render a hunt template with provided metadata.
177
+
178
+ Args:
179
+ hunt_id: Hunt identifier (e.g., H-0001)
180
+ title: Hunt title
181
+ technique: Primary MITRE technique (e.g., T1003.001)
182
+ tactics: List of MITRE tactics
183
+ platform: List of platforms (Windows, Linux, macOS, Cloud)
184
+ data_sources: List of data sources
185
+ hunter: Hunter name
186
+ hypothesis: Hypothesis statement
187
+ threat_context: Threat context description
188
+ actor: Threat actor (for ABLE)
189
+ behavior: Behavior description (for ABLE)
190
+ location: Location/scope (for ABLE)
191
+ evidence: Evidence description (for ABLE)
192
+
193
+ Returns:
194
+ Rendered hunt markdown content
195
+ """
196
+ # Build techniques list
197
+ techniques_list = [technique] if technique else []
198
+
199
+ # Format lists as YAML arrays
200
+ tactics_str = f"[{', '.join(tactics)}]" if tactics else "[]"
201
+ platform_str = f"[{', '.join(platform)}]" if platform else "[]"
202
+ data_sources_str = f"[{', '.join(data_sources)}]" if data_sources else "[]"
203
+ tags_str = "[]"
204
+
205
+ template = Template(HUNT_TEMPLATE)
206
+
207
+ return template.render(
208
+ hunt_id=hunt_id,
209
+ title=title,
210
+ status="planning",
211
+ date=datetime.now().strftime("%Y-%m-%d"),
212
+ hunter=hunter,
213
+ platform=platform_str,
214
+ tactics=tactics_str,
215
+ techniques=techniques_list,
216
+ data_sources=data_sources_str,
217
+ tags=tags_str,
218
+ hypothesis=hypothesis,
219
+ threat_context=threat_context,
220
+ actor=actor,
221
+ behavior=behavior,
222
+ location=location,
223
+ evidence=evidence,
224
+ )
athf/utils/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """ATHF utility functions."""