agentic-threat-hunting-framework 0.2.4__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,340 @@
1
+ """Web search integration for threat research."""
2
+
3
+ import os
4
+ import time
5
+ from dataclasses import dataclass, field
6
+ from typing import Any, Dict, List, Optional
7
+
8
+
9
+ @dataclass
10
+ class SearchResult:
11
+ """Single search result."""
12
+
13
+ title: str
14
+ url: str
15
+ content: str # Snippet or full content
16
+ score: float # Relevance score (0-1)
17
+
18
+
19
+ @dataclass
20
+ class SearchResponse:
21
+ """Web search response."""
22
+
23
+ query: str
24
+ results: List[SearchResult]
25
+ answer: Optional[str] = None # AI-generated answer summary
26
+ response_time_ms: int = 0
27
+ search_depth: str = "basic"
28
+ images: List[Dict[str, str]] = field(default_factory=list)
29
+
30
+
31
+ class TavilySearchClient:
32
+ """Tavily Search API client for threat research.
33
+
34
+ Tavily is designed for AI/LLM integration and provides:
35
+ - Basic and advanced search depth
36
+ - AI-generated answer summaries
37
+ - Domain filtering
38
+ - Structured results for LLM consumption
39
+
40
+ Features:
41
+ - Security-focused domain filtering
42
+ - Configurable search depth (basic=fast, advanced=thorough)
43
+ - Graceful error handling with fallbacks
44
+ - Cost tracking
45
+
46
+ Environment:
47
+ TAVILY_API_KEY: API key from https://tavily.com
48
+ """
49
+
50
+ SECURITY_DOMAINS = [
51
+ "attack.mitre.org",
52
+ "github.com",
53
+ "elastic.co",
54
+ "microsoft.com",
55
+ "crowdstrike.com",
56
+ "mandiant.com",
57
+ "redcanary.com",
58
+ "thehackernews.com",
59
+ "bleepingcomputer.com",
60
+ "unit42.paloaltonetworks.com",
61
+ "blog.talosintelligence.com",
62
+ "securelist.com",
63
+ "thedfirreport.com",
64
+ "atomicredteam.io",
65
+ "lolbas-project.github.io",
66
+ "gtfobins.github.io",
67
+ ]
68
+
69
+ def __init__(self, api_key: Optional[str] = None) -> None:
70
+ """Initialize client with API key.
71
+
72
+ Args:
73
+ api_key: Tavily API key (defaults to TAVILY_API_KEY env var)
74
+
75
+ Raises:
76
+ ValueError: If no API key is provided or found
77
+ """
78
+ self.api_key = api_key or os.getenv("TAVILY_API_KEY")
79
+ if not self.api_key:
80
+ raise ValueError("TAVILY_API_KEY not set. Get your API key from https://tavily.com")
81
+ self._client: Optional[Any] = None
82
+
83
+ def _get_client(self) -> Any:
84
+ """Get or create Tavily client instance."""
85
+ if self._client is None:
86
+ try:
87
+ from tavily import TavilyClient
88
+
89
+ self._client = TavilyClient(api_key=self.api_key)
90
+ except ImportError:
91
+ raise ImportError("tavily-python package not installed. Run: pip install tavily-python")
92
+ return self._client
93
+
94
+ def search(
95
+ self,
96
+ query: str,
97
+ search_depth: str = "basic",
98
+ max_results: int = 10,
99
+ include_domains: Optional[List[str]] = None,
100
+ exclude_domains: Optional[List[str]] = None,
101
+ include_answer: bool = True,
102
+ include_raw_content: bool = False,
103
+ ) -> SearchResponse:
104
+ """Execute search query.
105
+
106
+ Args:
107
+ query: Search query string
108
+ search_depth: "basic" (fast, ~5 results) or "advanced" (thorough, ~10 results)
109
+ max_results: Maximum number of results (1-20)
110
+ include_domains: Limit search to these domains
111
+ exclude_domains: Exclude these domains from search
112
+ include_answer: Include AI-generated answer summary
113
+ include_raw_content: Include full page content (increases response size)
114
+
115
+ Returns:
116
+ SearchResponse with results
117
+
118
+ Raises:
119
+ Exception: If search fails
120
+ """
121
+ client = self._get_client()
122
+
123
+ start_time = time.time()
124
+
125
+ # Build search parameters
126
+ search_params: Dict[str, Any] = {
127
+ "query": query,
128
+ "search_depth": search_depth,
129
+ "max_results": max_results,
130
+ "include_answer": include_answer,
131
+ "include_raw_content": include_raw_content,
132
+ }
133
+
134
+ if include_domains:
135
+ search_params["include_domains"] = include_domains
136
+
137
+ if exclude_domains:
138
+ search_params["exclude_domains"] = exclude_domains
139
+
140
+ # Execute search
141
+ response = client.search(**search_params)
142
+
143
+ response_time_ms = int((time.time() - start_time) * 1000)
144
+
145
+ # Parse results
146
+ results = []
147
+ for result in response.get("results", []):
148
+ results.append(
149
+ SearchResult(
150
+ title=result.get("title", ""),
151
+ url=result.get("url", ""),
152
+ content=result.get("content", ""),
153
+ score=result.get("score", 0.0),
154
+ )
155
+ )
156
+
157
+ return SearchResponse(
158
+ query=query,
159
+ results=results,
160
+ answer=response.get("answer"),
161
+ response_time_ms=response_time_ms,
162
+ search_depth=search_depth,
163
+ images=response.get("images", []),
164
+ )
165
+
166
+ def search_threat_intel(
167
+ self,
168
+ topic: str,
169
+ technique: Optional[str] = None,
170
+ search_depth: str = "advanced",
171
+ ) -> SearchResponse:
172
+ """Search with security-focused parameters.
173
+
174
+ Optimized for threat hunting research with:
175
+ - Security-focused domain filtering
176
+ - Advanced search depth by default
177
+ - AI-generated answer summary
178
+
179
+ Args:
180
+ topic: Research topic (e.g., "LSASS memory dumping")
181
+ technique: Optional MITRE ATT&CK technique (e.g., "T1003.001")
182
+ search_depth: Search depth ("basic" or "advanced")
183
+
184
+ Returns:
185
+ SearchResponse with security-focused results
186
+ """
187
+ # Build security-focused query
188
+ query = f"{topic} threat hunting detection"
189
+ if technique:
190
+ query += f" MITRE ATT&CK {technique}"
191
+
192
+ return self.search(
193
+ query=query,
194
+ search_depth=search_depth,
195
+ include_domains=self.SECURITY_DOMAINS,
196
+ include_answer=True,
197
+ )
198
+
199
+ def search_system_internals(
200
+ self,
201
+ topic: str,
202
+ search_depth: str = "advanced",
203
+ ) -> SearchResponse:
204
+ """Search for system/technology internals.
205
+
206
+ Focused on understanding how systems work normally,
207
+ useful for the "System Research" skill.
208
+
209
+ Args:
210
+ topic: Technology/system topic (e.g., "LSASS", "Windows Authentication")
211
+ search_depth: Search depth ("basic" or "advanced")
212
+
213
+ Returns:
214
+ SearchResponse with technical documentation
215
+ """
216
+ query = f"{topic} how it works internals documentation"
217
+
218
+ # Focus on technical documentation sources
219
+ technical_domains = [
220
+ "microsoft.com",
221
+ "learn.microsoft.com",
222
+ "docs.microsoft.com",
223
+ "developer.apple.com",
224
+ "man7.org",
225
+ "linux.die.net",
226
+ "kernel.org",
227
+ "aws.amazon.com",
228
+ "docs.aws.amazon.com",
229
+ "cloud.google.com",
230
+ "en.wikipedia.org",
231
+ ]
232
+
233
+ return self.search(
234
+ query=query,
235
+ search_depth=search_depth,
236
+ include_domains=technical_domains,
237
+ include_answer=True,
238
+ )
239
+
240
+ def search_adversary_tradecraft(
241
+ self,
242
+ topic: str,
243
+ technique: Optional[str] = None,
244
+ search_depth: str = "advanced",
245
+ ) -> SearchResponse:
246
+ """Search for adversary tradecraft and attack techniques.
247
+
248
+ Focused on how adversaries abuse systems,
249
+ useful for the "Adversary Tradecraft" skill.
250
+
251
+ Args:
252
+ topic: Attack topic (e.g., "credential dumping", "lateral movement")
253
+ technique: Optional MITRE ATT&CK technique
254
+ search_depth: Search depth ("basic" or "advanced")
255
+
256
+ Returns:
257
+ SearchResponse with adversary technique information
258
+ """
259
+ query = f"{topic} adversary technique attack method"
260
+ if technique:
261
+ query += f" {technique}"
262
+
263
+ # Focus on threat intelligence sources
264
+ threat_intel_domains = [
265
+ "attack.mitre.org",
266
+ "thedfirreport.com",
267
+ "mandiant.com",
268
+ "crowdstrike.com",
269
+ "unit42.paloaltonetworks.com",
270
+ "blog.talosintelligence.com",
271
+ "securelist.com",
272
+ "redcanary.com",
273
+ "elastic.co",
274
+ "atomicredteam.io",
275
+ "lolbas-project.github.io",
276
+ "gtfobins.github.io",
277
+ ]
278
+
279
+ return self.search(
280
+ query=query,
281
+ search_depth=search_depth,
282
+ include_domains=threat_intel_domains,
283
+ include_answer=True,
284
+ )
285
+
286
+ def search_detection_methods(
287
+ self,
288
+ topic: str,
289
+ technique: Optional[str] = None,
290
+ search_depth: str = "advanced",
291
+ ) -> SearchResponse:
292
+ """Search for detection methods and analytics.
293
+
294
+ Focused on how to detect specific behaviors,
295
+ useful for detection engineering.
296
+
297
+ Args:
298
+ topic: Detection topic (e.g., "LSASS access detection")
299
+ technique: Optional MITRE ATT&CK technique
300
+ search_depth: Search depth ("basic" or "advanced")
301
+
302
+ Returns:
303
+ SearchResponse with detection method information
304
+ """
305
+ query = f"{topic} detection rule query sigma"
306
+ if technique:
307
+ query += f" {technique}"
308
+
309
+ # Focus on detection and SIEM sources
310
+ detection_domains = [
311
+ "github.com",
312
+ "elastic.co",
313
+ "splunk.com",
314
+ "microsoft.com",
315
+ "redcanary.com",
316
+ "sigma-hq.github.io",
317
+ "detection.fyi",
318
+ ]
319
+
320
+ return self.search(
321
+ query=query,
322
+ search_depth=search_depth,
323
+ include_domains=detection_domains,
324
+ include_answer=True,
325
+ )
326
+
327
+
328
+ def create_search_client(api_key: Optional[str] = None) -> Optional[TavilySearchClient]:
329
+ """Create a Tavily search client if API key is available.
330
+
331
+ Args:
332
+ api_key: Optional API key (defaults to TAVILY_API_KEY env var)
333
+
334
+ Returns:
335
+ TavilySearchClient if API key is available, None otherwise
336
+ """
337
+ key = api_key or os.getenv("TAVILY_API_KEY")
338
+ if not key:
339
+ return None
340
+ return TavilySearchClient(api_key=key)
athf/data/__init__.py CHANGED
@@ -1,8 +1,13 @@
1
1
  """ATHF reference data and templates."""
2
2
 
3
- from importlib.resources import files
3
+ import sys
4
4
  from pathlib import Path
5
5
 
6
+ if sys.version_info >= (3, 9):
7
+ from importlib.resources import files
8
+ else:
9
+ from importlib_resources import files # type: ignore[import-not-found,no-redef]
10
+
6
11
 
7
12
  def get_data_path() -> Path:
8
13
  """Get the path to ATHF data directory.
@@ -25,6 +25,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
25
25
  ### Security
26
26
  - None
27
27
 
28
+ ## [0.3.1] - 2026-01-13
29
+
30
+ ### Fixed
31
+ - **Packaging Bug** - Fixed `ModuleNotFoundError: No module named 'athf.agents'` when installing via pip/pipx
32
+ - Added missing packages to `pyproject.toml`: `athf.agents`, `athf.agents.llm`
33
+ - Packages list now includes all subdirectories: athf, athf.agents, athf.agents.llm, athf.commands, athf.core, athf.data, athf.utils
34
+ - Verified wheel build includes all agent module files
35
+
36
+ ## [0.3.0] - 2026-01-11
37
+
38
+ ### Added
39
+ - **Agent Framework** - Autonomous agents for threat hunting workflows
40
+ - `athf.agents` - Base agent framework and orchestration
41
+ - `athf.agents.llm` - LLM-powered agents (hypothesis generation, research, finding analysis)
42
+ - Agent orchestration with task delegation and result aggregation
43
+ - **Research Workflow** - Pre-hunt research and investigation (`athf research`)
44
+ - **Drift Detection** - Behavioral anomaly detection infrastructure (`athf drift`)
45
+ - **Signal Investigation** - Low-fidelity pattern scoring and investigation (`athf signals`)
46
+
47
+ ### Changed
48
+ - CLI refactored to support agent-based workflows
49
+ - Enhanced hunt creation with agent-generated hypotheses
50
+
28
51
  ## [0.2.2] - 2024-12-17
29
52
 
30
53
  ### Fixed