agentic-threat-hunting-framework 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/METADATA +38 -40
- agentic_threat_hunting_framework-0.3.0.dist-info/RECORD +51 -0
- athf/__version__.py +1 -1
- athf/cli.py +7 -2
- athf/commands/__init__.py +4 -0
- athf/commands/agent.py +452 -0
- athf/commands/context.py +6 -9
- athf/commands/env.py +2 -2
- athf/commands/hunt.py +3 -3
- athf/commands/init.py +45 -0
- athf/commands/research.py +530 -0
- athf/commands/similar.py +5 -5
- athf/core/research_manager.py +419 -0
- athf/core/web_search.py +340 -0
- athf/data/__init__.py +19 -0
- athf/data/docs/CHANGELOG.md +147 -0
- athf/data/docs/CLI_REFERENCE.md +1797 -0
- athf/data/docs/INSTALL.md +594 -0
- athf/data/docs/README.md +31 -0
- athf/data/docs/environment.md +256 -0
- athf/data/docs/getting-started.md +419 -0
- athf/data/docs/level4-agentic-workflows.md +480 -0
- athf/data/docs/lock-pattern.md +149 -0
- athf/data/docs/maturity-model.md +400 -0
- athf/data/docs/why-athf.md +44 -0
- athf/data/hunts/FORMAT_GUIDELINES.md +507 -0
- athf/data/hunts/H-0001.md +453 -0
- athf/data/hunts/H-0002.md +436 -0
- athf/data/hunts/H-0003.md +546 -0
- athf/data/hunts/README.md +231 -0
- athf/data/integrations/MCP_CATALOG.md +45 -0
- athf/data/integrations/README.md +129 -0
- athf/data/integrations/quickstart/splunk.md +162 -0
- athf/data/knowledge/hunting-knowledge.md +2375 -0
- athf/data/prompts/README.md +172 -0
- athf/data/prompts/ai-workflow.md +581 -0
- athf/data/prompts/basic-prompts.md +316 -0
- athf/data/templates/HUNT_LOCK.md +228 -0
- agentic_threat_hunting_framework-0.2.3.dist-info/RECORD +0 -23
- {agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/WHEEL +0 -0
- {agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/entry_points.txt +0 -0
- {agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {agentic_threat_hunting_framework-0.2.3.dist-info → agentic_threat_hunting_framework-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
"""Manage research files and operations."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ResearchParser:
|
|
12
|
+
"""Parser for research files (YAML frontmatter + markdown)."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, file_path: Path) -> None:
|
|
15
|
+
"""Initialize parser with research file path."""
|
|
16
|
+
self.file_path = Path(file_path)
|
|
17
|
+
self.frontmatter: Dict[str, Any] = {}
|
|
18
|
+
self.content = ""
|
|
19
|
+
self.sections: Dict[str, str] = {}
|
|
20
|
+
|
|
21
|
+
def parse(self) -> Dict[str, Any]:
|
|
22
|
+
"""Parse research file and return structured data.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Dict containing frontmatter, content, and sections
|
|
26
|
+
"""
|
|
27
|
+
if not self.file_path.exists():
|
|
28
|
+
raise FileNotFoundError(f"Research file not found: {self.file_path}")
|
|
29
|
+
|
|
30
|
+
with open(self.file_path, "r", encoding="utf-8") as f:
|
|
31
|
+
content = f.read()
|
|
32
|
+
|
|
33
|
+
# Parse YAML frontmatter
|
|
34
|
+
self.frontmatter = self._parse_frontmatter(content)
|
|
35
|
+
|
|
36
|
+
# Extract main content (after frontmatter)
|
|
37
|
+
self.content = self._extract_content(content)
|
|
38
|
+
|
|
39
|
+
# Parse research sections
|
|
40
|
+
self.sections = self._parse_sections(self.content)
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
"file_path": str(self.file_path),
|
|
44
|
+
"research_id": self.frontmatter.get("research_id"),
|
|
45
|
+
"frontmatter": self.frontmatter,
|
|
46
|
+
"content": self.content,
|
|
47
|
+
"sections": self.sections,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
def _parse_frontmatter(self, content: str) -> Dict[str, Any]:
|
|
51
|
+
"""Extract and parse YAML frontmatter."""
|
|
52
|
+
frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n"
|
|
53
|
+
match = re.match(frontmatter_pattern, content, re.DOTALL)
|
|
54
|
+
|
|
55
|
+
if not match:
|
|
56
|
+
return {}
|
|
57
|
+
|
|
58
|
+
frontmatter_text = match.group(1)
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
return yaml.safe_load(frontmatter_text) or {}
|
|
62
|
+
except yaml.YAMLError as e:
|
|
63
|
+
raise ValueError(f"Invalid YAML frontmatter: {e}")
|
|
64
|
+
|
|
65
|
+
def _extract_content(self, content: str) -> str:
|
|
66
|
+
"""Extract content after frontmatter."""
|
|
67
|
+
frontmatter_pattern = r"^---\s*\n.*?\n---\s*\n"
|
|
68
|
+
content_without_fm = re.sub(frontmatter_pattern, "", content, count=1, flags=re.DOTALL)
|
|
69
|
+
return content_without_fm.strip()
|
|
70
|
+
|
|
71
|
+
def _parse_sections(self, content: str) -> Dict[str, str]:
|
|
72
|
+
"""Parse research sections from content.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Dict with section names and content
|
|
76
|
+
"""
|
|
77
|
+
sections = {}
|
|
78
|
+
|
|
79
|
+
# Define section patterns for the 5 research skills
|
|
80
|
+
section_patterns = {
|
|
81
|
+
"system_research": r"##\s+1\.\s+System Research.*?(?=##\s+2\.|$)",
|
|
82
|
+
"adversary_tradecraft": r"##\s+2\.\s+Adversary Tradecraft.*?(?=##\s+3\.|$)",
|
|
83
|
+
"telemetry_mapping": r"##\s+3\.\s+Telemetry Mapping.*?(?=##\s+4\.|$)",
|
|
84
|
+
"related_work": r"##\s+4\.\s+Related Work.*?(?=##\s+5\.|$)",
|
|
85
|
+
"synthesis": r"##\s+5\.\s+Research Synthesis.*?(?=##\s+[A-Z]|$)",
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
for section_name, pattern in section_patterns.items():
|
|
89
|
+
match = re.search(pattern, content, re.DOTALL | re.IGNORECASE)
|
|
90
|
+
if match:
|
|
91
|
+
sections[section_name] = match.group(0).strip()
|
|
92
|
+
|
|
93
|
+
return sections
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def parse_research_file(file_path: Path) -> Dict[str, Any]:
|
|
97
|
+
"""Convenience function to parse a research file."""
|
|
98
|
+
parser = ResearchParser(file_path)
|
|
99
|
+
return parser.parse()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class ResearchManager:
|
|
103
|
+
"""Manage research files and operations.
|
|
104
|
+
|
|
105
|
+
Similar pattern to HuntManager but for research documents.
|
|
106
|
+
Research files use R-XXXX IDs and are stored in research/ directory.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
def __init__(self, research_dir: Optional[Path] = None) -> None:
|
|
110
|
+
"""Initialize research manager.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
research_dir: Directory containing research files (default: ./research)
|
|
114
|
+
"""
|
|
115
|
+
self.research_dir = Path(research_dir) if research_dir else Path.cwd() / "research"
|
|
116
|
+
|
|
117
|
+
if not self.research_dir.exists():
|
|
118
|
+
self.research_dir.mkdir(parents=True, exist_ok=True)
|
|
119
|
+
|
|
120
|
+
def _find_all_research_files(self) -> List[Path]:
|
|
121
|
+
"""Find all research files (R-*.md).
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
List of paths to research files
|
|
125
|
+
"""
|
|
126
|
+
research_files: List[Path] = []
|
|
127
|
+
|
|
128
|
+
# Find flat files (R-*.md)
|
|
129
|
+
research_files.extend(self.research_dir.rglob("R-*.md"))
|
|
130
|
+
|
|
131
|
+
return sorted(set(research_files))
|
|
132
|
+
|
|
133
|
+
def get_next_research_id(self, prefix: str = "R-") -> str:
|
|
134
|
+
"""Calculate the next available research ID.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
prefix: Research ID prefix (default: R-)
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Next research ID (e.g., R-0023)
|
|
141
|
+
"""
|
|
142
|
+
research_files = self._find_all_research_files()
|
|
143
|
+
|
|
144
|
+
if not research_files:
|
|
145
|
+
return f"{prefix}0001"
|
|
146
|
+
|
|
147
|
+
# Extract numbers from research IDs with matching prefix
|
|
148
|
+
numbers = []
|
|
149
|
+
pattern = re.compile(rf"^{re.escape(prefix)}(\d+)$")
|
|
150
|
+
|
|
151
|
+
for research_file in research_files:
|
|
152
|
+
try:
|
|
153
|
+
research_data = parse_research_file(research_file)
|
|
154
|
+
research_id = research_data.get("frontmatter", {}).get("research_id")
|
|
155
|
+
|
|
156
|
+
if not research_id or not isinstance(research_id, str):
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
match = pattern.match(research_id)
|
|
160
|
+
if match:
|
|
161
|
+
numbers.append(int(match.group(1)))
|
|
162
|
+
except Exception:
|
|
163
|
+
# Try to extract from filename if parsing fails
|
|
164
|
+
match = pattern.match(research_file.stem)
|
|
165
|
+
if match:
|
|
166
|
+
numbers.append(int(match.group(1)))
|
|
167
|
+
|
|
168
|
+
if not numbers:
|
|
169
|
+
return f"{prefix}0001"
|
|
170
|
+
|
|
171
|
+
# Next number with zero-padding
|
|
172
|
+
next_num = max(numbers) + 1
|
|
173
|
+
return f"{prefix}{next_num:04d}"
|
|
174
|
+
|
|
175
|
+
def list_research(
|
|
176
|
+
self,
|
|
177
|
+
status: Optional[str] = None,
|
|
178
|
+
technique: Optional[str] = None,
|
|
179
|
+
topic: Optional[str] = None,
|
|
180
|
+
) -> List[Dict[str, Any]]:
|
|
181
|
+
"""List all research documents with optional filters.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
status: Filter by status (draft, in_progress, completed)
|
|
185
|
+
technique: Filter by MITRE technique
|
|
186
|
+
topic: Filter by topic (substring match)
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
List of research metadata dicts
|
|
190
|
+
"""
|
|
191
|
+
research_list = []
|
|
192
|
+
|
|
193
|
+
for research_file in self._find_all_research_files():
|
|
194
|
+
try:
|
|
195
|
+
research_data = parse_research_file(research_file)
|
|
196
|
+
frontmatter = research_data.get("frontmatter", {})
|
|
197
|
+
|
|
198
|
+
# Apply filters
|
|
199
|
+
if status and frontmatter.get("status") != status:
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
if technique:
|
|
203
|
+
techniques = frontmatter.get("mitre_techniques", [])
|
|
204
|
+
if technique not in techniques:
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
if topic:
|
|
208
|
+
research_topic = frontmatter.get("topic", "").lower()
|
|
209
|
+
if topic.lower() not in research_topic:
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
# Extract summary info
|
|
213
|
+
research_list.append(
|
|
214
|
+
{
|
|
215
|
+
"research_id": frontmatter.get("research_id"),
|
|
216
|
+
"topic": frontmatter.get("topic"),
|
|
217
|
+
"status": frontmatter.get("status"),
|
|
218
|
+
"created_date": frontmatter.get("created_date"),
|
|
219
|
+
"depth": frontmatter.get("depth"),
|
|
220
|
+
"mitre_techniques": frontmatter.get("mitre_techniques", []),
|
|
221
|
+
"linked_hunts": frontmatter.get("linked_hunts", []),
|
|
222
|
+
"duration_minutes": frontmatter.get("duration_minutes"),
|
|
223
|
+
"total_cost_usd": frontmatter.get("total_cost_usd"),
|
|
224
|
+
"file_path": str(research_file),
|
|
225
|
+
}
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
except Exception:
|
|
229
|
+
# Skip files that can't be parsed
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
return research_list
|
|
233
|
+
|
|
234
|
+
def get_research(self, research_id: str) -> Optional[Dict[str, Any]]:
|
|
235
|
+
"""Get a specific research document by ID.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
research_id: Research ID (e.g., R-0001)
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Research data dict or None if not found
|
|
242
|
+
"""
|
|
243
|
+
# Try direct file
|
|
244
|
+
research_file = self.research_dir / f"{research_id}.md"
|
|
245
|
+
if research_file.exists():
|
|
246
|
+
return parse_research_file(research_file)
|
|
247
|
+
|
|
248
|
+
# Try nested search
|
|
249
|
+
research_files = list(self.research_dir.rglob(f"{research_id}.md"))
|
|
250
|
+
if research_files:
|
|
251
|
+
return parse_research_file(research_files[0])
|
|
252
|
+
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
def search_research(self, query: str) -> List[Dict[str, Any]]:
|
|
256
|
+
"""Full-text search across research documents.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
query: Search query string
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
List of matching research documents
|
|
263
|
+
"""
|
|
264
|
+
results = []
|
|
265
|
+
query_lower = query.lower()
|
|
266
|
+
|
|
267
|
+
for research_file in self._find_all_research_files():
|
|
268
|
+
try:
|
|
269
|
+
with open(research_file, "r", encoding="utf-8") as f:
|
|
270
|
+
content = f.read()
|
|
271
|
+
|
|
272
|
+
if query_lower in content.lower():
|
|
273
|
+
research_data = parse_research_file(research_file)
|
|
274
|
+
frontmatter = research_data.get("frontmatter", {})
|
|
275
|
+
|
|
276
|
+
results.append(
|
|
277
|
+
{
|
|
278
|
+
"research_id": frontmatter.get("research_id"),
|
|
279
|
+
"topic": frontmatter.get("topic"),
|
|
280
|
+
"status": frontmatter.get("status"),
|
|
281
|
+
"file_path": str(research_file),
|
|
282
|
+
}
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
except Exception:
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
return results
|
|
289
|
+
|
|
290
|
+
def link_hunt_to_research(self, research_id: str, hunt_id: str) -> bool:
|
|
291
|
+
"""Link a hunt to its source research.
|
|
292
|
+
|
|
293
|
+
Updates the research document's linked_hunts field.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
research_id: Research ID (e.g., R-0001)
|
|
297
|
+
hunt_id: Hunt ID to link (e.g., H-0001)
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
True if successful, False otherwise
|
|
301
|
+
"""
|
|
302
|
+
research_data = self.get_research(research_id)
|
|
303
|
+
if not research_data:
|
|
304
|
+
return False
|
|
305
|
+
|
|
306
|
+
file_path = Path(research_data["file_path"])
|
|
307
|
+
|
|
308
|
+
try:
|
|
309
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
310
|
+
content = f.read()
|
|
311
|
+
|
|
312
|
+
# Parse frontmatter
|
|
313
|
+
frontmatter = research_data.get("frontmatter", {})
|
|
314
|
+
linked_hunts = frontmatter.get("linked_hunts", [])
|
|
315
|
+
|
|
316
|
+
# Add hunt if not already linked
|
|
317
|
+
if hunt_id not in linked_hunts:
|
|
318
|
+
linked_hunts.append(hunt_id)
|
|
319
|
+
|
|
320
|
+
# Update the YAML frontmatter
|
|
321
|
+
# Find and replace linked_hunts line
|
|
322
|
+
if "linked_hunts:" in content:
|
|
323
|
+
# Replace existing linked_hunts
|
|
324
|
+
pattern = r"linked_hunts:.*?(?=\n[a-z_]+:|---)"
|
|
325
|
+
replacement = f"linked_hunts: {linked_hunts}\n"
|
|
326
|
+
content = re.sub(pattern, replacement, content, flags=re.DOTALL)
|
|
327
|
+
else:
|
|
328
|
+
# Add linked_hunts before closing ---
|
|
329
|
+
pattern = r"\n---\s*\n"
|
|
330
|
+
replacement = f"\nlinked_hunts: {linked_hunts}\n---\n"
|
|
331
|
+
content = re.sub(pattern, replacement, content, count=1)
|
|
332
|
+
|
|
333
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
334
|
+
f.write(content)
|
|
335
|
+
|
|
336
|
+
return True
|
|
337
|
+
|
|
338
|
+
except Exception:
|
|
339
|
+
return False
|
|
340
|
+
|
|
341
|
+
def create_research_file(
|
|
342
|
+
self,
|
|
343
|
+
research_id: str,
|
|
344
|
+
topic: str,
|
|
345
|
+
content: str,
|
|
346
|
+
frontmatter: Dict[str, Any],
|
|
347
|
+
) -> Path:
|
|
348
|
+
"""Create a new research file.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
research_id: Research ID (e.g., R-0001)
|
|
352
|
+
topic: Research topic
|
|
353
|
+
content: Markdown content
|
|
354
|
+
frontmatter: YAML frontmatter dict
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Path to created file
|
|
358
|
+
"""
|
|
359
|
+
# Ensure research_id and topic are in frontmatter
|
|
360
|
+
frontmatter["research_id"] = research_id
|
|
361
|
+
frontmatter["topic"] = topic
|
|
362
|
+
frontmatter.setdefault("created_date", datetime.now().strftime("%Y-%m-%d"))
|
|
363
|
+
frontmatter.setdefault("status", "completed")
|
|
364
|
+
|
|
365
|
+
# Build file content
|
|
366
|
+
yaml_content = yaml.dump(frontmatter, default_flow_style=False, sort_keys=False)
|
|
367
|
+
file_content = f"---\n{yaml_content}---\n\n{content}"
|
|
368
|
+
|
|
369
|
+
# Write file
|
|
370
|
+
file_path = self.research_dir / f"{research_id}.md"
|
|
371
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
372
|
+
f.write(file_content)
|
|
373
|
+
|
|
374
|
+
return file_path
|
|
375
|
+
|
|
376
|
+
def calculate_stats(self) -> Dict[str, Any]:
|
|
377
|
+
"""Calculate research program statistics.
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
Dict with counts, costs, and other metrics
|
|
381
|
+
"""
|
|
382
|
+
research_list = self.list_research()
|
|
383
|
+
|
|
384
|
+
if not research_list:
|
|
385
|
+
return {
|
|
386
|
+
"total_research": 0,
|
|
387
|
+
"completed_research": 0,
|
|
388
|
+
"total_cost_usd": 0.0,
|
|
389
|
+
"total_duration_minutes": 0,
|
|
390
|
+
"avg_duration_minutes": 0.0,
|
|
391
|
+
"by_status": {},
|
|
392
|
+
"total_linked_hunts": 0,
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
total_research = len(research_list)
|
|
396
|
+
completed_research = len([r for r in research_list if r.get("status") == "completed"])
|
|
397
|
+
|
|
398
|
+
total_cost = sum(r.get("total_cost_usd", 0) or 0 for r in research_list)
|
|
399
|
+
total_duration = sum(r.get("duration_minutes", 0) or 0 for r in research_list)
|
|
400
|
+
avg_duration = total_duration / total_research if total_research > 0 else 0.0
|
|
401
|
+
|
|
402
|
+
# Count by status
|
|
403
|
+
by_status: Dict[str, int] = {}
|
|
404
|
+
for research in research_list:
|
|
405
|
+
status = research.get("status", "unknown")
|
|
406
|
+
by_status[status] = by_status.get(status, 0) + 1
|
|
407
|
+
|
|
408
|
+
# Count linked hunts
|
|
409
|
+
total_linked_hunts = sum(len(r.get("linked_hunts", [])) for r in research_list)
|
|
410
|
+
|
|
411
|
+
return {
|
|
412
|
+
"total_research": total_research,
|
|
413
|
+
"completed_research": completed_research,
|
|
414
|
+
"total_cost_usd": round(total_cost, 4),
|
|
415
|
+
"total_duration_minutes": total_duration,
|
|
416
|
+
"avg_duration_minutes": round(avg_duration, 1),
|
|
417
|
+
"by_status": by_status,
|
|
418
|
+
"total_linked_hunts": total_linked_hunts,
|
|
419
|
+
}
|