agentic-threat-hunting-framework 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,211 @@
1
+ """Parse investigation files (YAML frontmatter + markdown).
2
+
3
+ Investigation parser is simpler than hunt parser:
4
+ - Minimal validation (only ID, title, date required)
5
+ - No LOCK section validation (optional/flexible content)
6
+ - No findings count validation (investigations not tracked in metrics)
7
+ """
8
+
9
+ import re
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List
12
+
13
+ import yaml
14
+
15
+
16
+ class InvestigationParser:
17
+ """Parser for ATHF investigation files."""
18
+
19
+ def __init__(self, file_path: Path):
20
+ """Initialize parser with investigation file path."""
21
+ self.file_path = Path(file_path)
22
+ self.frontmatter: Dict[str, Any] = {}
23
+ self.content = ""
24
+
25
+ def parse(self) -> Dict[str, Any]:
26
+ """Parse investigation file and return structured data.
27
+
28
+ Returns:
29
+ Dict containing frontmatter and content
30
+ """
31
+ if not self.file_path.exists():
32
+ raise FileNotFoundError(f"Investigation file not found: {self.file_path}")
33
+
34
+ with open(self.file_path, "r", encoding="utf-8") as f:
35
+ content = f.read()
36
+
37
+ # Parse YAML frontmatter
38
+ self.frontmatter = self._parse_frontmatter(content)
39
+
40
+ # Extract main content (after frontmatter)
41
+ self.content = self._extract_content(content)
42
+
43
+ return {
44
+ "file_path": str(self.file_path),
45
+ "investigation_id": self.frontmatter.get("investigation_id"),
46
+ "frontmatter": self.frontmatter,
47
+ "content": self.content,
48
+ }
49
+
50
+ def _parse_frontmatter(self, content: str) -> Dict[str, Any]:
51
+ """Extract and parse YAML frontmatter.
52
+
53
+ Args:
54
+ content: Full file content
55
+
56
+ Returns:
57
+ Dict of frontmatter fields
58
+ """
59
+ # Match YAML frontmatter between --- delimiters
60
+ frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n"
61
+ match = re.match(frontmatter_pattern, content, re.DOTALL)
62
+
63
+ if not match:
64
+ return {}
65
+
66
+ frontmatter_text = match.group(1)
67
+
68
+ try:
69
+ return yaml.safe_load(frontmatter_text) or {}
70
+ except yaml.YAMLError as e:
71
+ raise ValueError(f"Invalid YAML frontmatter: {e}")
72
+
73
+ def _extract_content(self, content: str) -> str:
74
+ """Extract content after frontmatter.
75
+
76
+ Args:
77
+ content: Full file content
78
+
79
+ Returns:
80
+ Content after frontmatter
81
+ """
82
+ # Remove frontmatter
83
+ frontmatter_pattern = r"^---\s*\n.*?\n---\s*\n"
84
+ content_without_fm = re.sub(frontmatter_pattern, "", content, count=1, flags=re.DOTALL)
85
+
86
+ return content_without_fm.strip()
87
+
88
+ def validate(self) -> tuple[bool, List[str]]:
89
+ """Validate investigation structure.
90
+
91
+ Lightweight validation - only checks minimal required fields.
92
+ Does NOT validate LOCK sections or findings counts.
93
+
94
+ Returns:
95
+ Tuple of (is_valid, list of error messages)
96
+ """
97
+ errors = []
98
+
99
+ # Check frontmatter exists
100
+ if not self.frontmatter:
101
+ errors.append("Missing YAML frontmatter")
102
+ return (False, errors)
103
+
104
+ # Check required frontmatter fields (minimal)
105
+ required_fields = ["investigation_id", "title", "date"]
106
+ for field in required_fields:
107
+ if field not in self.frontmatter:
108
+ errors.append(f"Missing required frontmatter field: {field}")
109
+
110
+ # Validate investigation_id format (e.g., I-0001)
111
+ investigation_id = self.frontmatter.get("investigation_id", "")
112
+ if investigation_id and not re.match(r"^I-\d{4}$", investigation_id):
113
+ errors.append(f"Invalid investigation_id format: {investigation_id} (expected format: I-0001)")
114
+
115
+ # Validate file name matches investigation_id
116
+ if investigation_id:
117
+ expected_filename = f"{investigation_id}.md"
118
+ if self.file_path.name != expected_filename:
119
+ errors.append(f"File name mismatch: {self.file_path.name} (expected: {expected_filename})")
120
+
121
+ # Validate type field if present
122
+ investigation_type = self.frontmatter.get("type")
123
+ valid_types = ["finding", "baseline", "exploratory", "other"]
124
+ if investigation_type and investigation_type not in valid_types:
125
+ errors.append(f"Invalid investigation type: {investigation_type} (expected one of: {', '.join(valid_types)})")
126
+
127
+ return (len(errors) == 0, errors)
128
+
129
+
130
+ def parse_investigation_file(file_path: Path) -> Dict[str, Any]:
131
+ """Convenience function to parse an investigation file.
132
+
133
+ Args:
134
+ file_path: Path to investigation file
135
+
136
+ Returns:
137
+ Parsed investigation data
138
+ """
139
+ parser = InvestigationParser(file_path)
140
+ return parser.parse()
141
+
142
+
143
+ def validate_investigation_file(file_path: Path) -> tuple[bool, List[str]]:
144
+ """Convenience function to validate an investigation file.
145
+
146
+ Args:
147
+ file_path: Path to investigation file
148
+
149
+ Returns:
150
+ Tuple of (is_valid, list of error messages)
151
+ """
152
+ parser = InvestigationParser(file_path)
153
+ parser.parse()
154
+ return parser.validate()
155
+
156
+
157
+ def get_all_investigations(investigations_dir: Path) -> List[Dict[str, Any]]:
158
+ """Get all investigation files from the investigations directory.
159
+
160
+ Args:
161
+ investigations_dir: Path to investigations directory
162
+
163
+ Returns:
164
+ List of parsed investigation data (sorted by investigation_id)
165
+ """
166
+ investigations_dir = Path(investigations_dir)
167
+
168
+ if not investigations_dir.exists():
169
+ return []
170
+
171
+ # Find all I-*.md files
172
+ investigation_files = sorted(investigations_dir.glob("I-*.md"))
173
+
174
+ investigations = []
175
+ for file_path in investigation_files:
176
+ try:
177
+ investigation = parse_investigation_file(file_path)
178
+ investigations.append(investigation)
179
+ except Exception as e:
180
+ # Skip invalid files but log the error
181
+ print(f"Warning: Failed to parse {file_path}: {e}")
182
+ continue
183
+
184
+ return investigations
185
+
186
+
187
+ def get_next_investigation_id(investigations_dir: Path) -> str:
188
+ """Get the next available investigation ID.
189
+
190
+ Args:
191
+ investigations_dir: Path to investigations directory
192
+
193
+ Returns:
194
+ Next investigation ID (e.g., "I-0001", "I-0042")
195
+ """
196
+ investigations = get_all_investigations(investigations_dir)
197
+
198
+ if not investigations:
199
+ return "I-0001"
200
+
201
+ # Extract numeric IDs and find max
202
+ max_id = 0
203
+ for investigation in investigations:
204
+ investigation_id = investigation.get("investigation_id", "")
205
+ match = re.match(r"^I-(\d{4})$", investigation_id)
206
+ if match:
207
+ id_num = int(match.group(1))
208
+ max_id = max(max_id, id_num)
209
+
210
+ # Return next ID with zero-padding
211
+ return f"I-{max_id + 1:04d}"
@@ -1,17 +0,0 @@
1
- agentic_threat_hunting_framework-0.1.0.dist-info/licenses/LICENSE,sha256=_KObErRfiKoolznt-DF0nJnr3U9Rdh7Z4Ba7G5qqckk,1071
2
- athf/__init__.py,sha256=OrjZe8P97_BTEkscapnwSsqKSjwXNP9d8-HtGr19Ni0,241
3
- athf/__version__.py,sha256=esXptUrfVtDh81i72UK2ehkLx1LobFoISaPLeDdwcNM,59
4
- athf/cli.py,sha256=l7pptt14nWCkdRkLDo2e4KKDA90ZNyxW1wdqMLYIxTg,4280
5
- athf/commands/__init__.py,sha256=uDyr0bz-agpGO8fraXQl24wuQCxqbeCevZsJ2bDK29s,25
6
- athf/commands/hunt.py,sha256=BOHk8H5t1LVETUlNFbSmPmmKOEcnqlR5KpTYZxIVBIU,20132
7
- athf/commands/init.py,sha256=L_29fvZF8SZ1BKh2D6NyDuacCC5JXOTezIxdBnnK88E,10941
8
- athf/core/__init__.py,sha256=yG7C8ljx3UW4QZoYvDjUxsWHlbS8M-GLGB7Je7rRfqo,31
9
- athf/core/hunt_manager.py,sha256=tJywunHB_06e0Z3gPWoktGqsLtEyHAO5ZsrUjAXy-IQ,8064
10
- athf/core/hunt_parser.py,sha256=FUj0yyBIcZnaS9aItMImeBDhegQwpkewIwUMNXW_ZWU,5122
11
- athf/core/template_engine.py,sha256=vNTVhlxIXZpxU7VmQyrqCSt6ORS0IVjAV54TOmUDMTE,5636
12
- athf/utils/__init__.py,sha256=aEAPI1xnAsowOtc036cCb9ZOek5nrrfevu8PElhbNgk,30
13
- agentic_threat_hunting_framework-0.1.0.dist-info/METADATA,sha256=nn-YAzCd2zd-8UkfyT-ruhcmgfJ0yvmJ-M_ll0H5fzU,12900
14
- agentic_threat_hunting_framework-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
- agentic_threat_hunting_framework-0.1.0.dist-info/entry_points.txt,sha256=GopR2iTiBs-yNMWiUZ2DaFIFglXxWJx1XPjTa3ePtfE,39
16
- agentic_threat_hunting_framework-0.1.0.dist-info/top_level.txt,sha256=Cxxg6SMLfawDJWBITsciRzq27XV8fiaAor23o9Byoes,5
17
- agentic_threat_hunting_framework-0.1.0.dist-info/RECORD,,