agentic-threat-hunting-framework 0.2.4__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentic_threat_hunting_framework-0.2.4.dist-info → agentic_threat_hunting_framework-0.3.1.dist-info}/METADATA +38 -40
- {agentic_threat_hunting_framework-0.2.4.dist-info → agentic_threat_hunting_framework-0.3.1.dist-info}/RECORD +24 -15
- athf/__version__.py +1 -1
- athf/agents/__init__.py +14 -0
- athf/agents/base.py +141 -0
- athf/agents/llm/__init__.py +27 -0
- athf/agents/llm/hunt_researcher.py +762 -0
- athf/agents/llm/hypothesis_generator.py +238 -0
- athf/cli.py +6 -1
- athf/commands/__init__.py +4 -0
- athf/commands/agent.py +452 -0
- athf/commands/context.py +6 -9
- athf/commands/env.py +2 -2
- athf/commands/hunt.py +3 -1
- athf/commands/research.py +530 -0
- athf/commands/similar.py +3 -3
- athf/core/research_manager.py +419 -0
- athf/core/web_search.py +340 -0
- athf/data/__init__.py +6 -1
- athf/data/docs/CHANGELOG.md +23 -0
- {agentic_threat_hunting_framework-0.2.4.dist-info → agentic_threat_hunting_framework-0.3.1.dist-info}/WHEEL +0 -0
- {agentic_threat_hunting_framework-0.2.4.dist-info → agentic_threat_hunting_framework-0.3.1.dist-info}/entry_points.txt +0 -0
- {agentic_threat_hunting_framework-0.2.4.dist-info → agentic_threat_hunting_framework-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {agentic_threat_hunting_framework-0.2.4.dist-info → agentic_threat_hunting_framework-0.3.1.dist-info}/top_level.txt +0 -0
athf/core/web_search.py
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
"""Web search integration for threat research."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class SearchResult:
|
|
11
|
+
"""Single search result."""
|
|
12
|
+
|
|
13
|
+
title: str
|
|
14
|
+
url: str
|
|
15
|
+
content: str # Snippet or full content
|
|
16
|
+
score: float # Relevance score (0-1)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class SearchResponse:
|
|
21
|
+
"""Web search response."""
|
|
22
|
+
|
|
23
|
+
query: str
|
|
24
|
+
results: List[SearchResult]
|
|
25
|
+
answer: Optional[str] = None # AI-generated answer summary
|
|
26
|
+
response_time_ms: int = 0
|
|
27
|
+
search_depth: str = "basic"
|
|
28
|
+
images: List[Dict[str, str]] = field(default_factory=list)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TavilySearchClient:
|
|
32
|
+
"""Tavily Search API client for threat research.
|
|
33
|
+
|
|
34
|
+
Tavily is designed for AI/LLM integration and provides:
|
|
35
|
+
- Basic and advanced search depth
|
|
36
|
+
- AI-generated answer summaries
|
|
37
|
+
- Domain filtering
|
|
38
|
+
- Structured results for LLM consumption
|
|
39
|
+
|
|
40
|
+
Features:
|
|
41
|
+
- Security-focused domain filtering
|
|
42
|
+
- Configurable search depth (basic=fast, advanced=thorough)
|
|
43
|
+
- Graceful error handling with fallbacks
|
|
44
|
+
- Cost tracking
|
|
45
|
+
|
|
46
|
+
Environment:
|
|
47
|
+
TAVILY_API_KEY: API key from https://tavily.com
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
SECURITY_DOMAINS = [
|
|
51
|
+
"attack.mitre.org",
|
|
52
|
+
"github.com",
|
|
53
|
+
"elastic.co",
|
|
54
|
+
"microsoft.com",
|
|
55
|
+
"crowdstrike.com",
|
|
56
|
+
"mandiant.com",
|
|
57
|
+
"redcanary.com",
|
|
58
|
+
"thehackernews.com",
|
|
59
|
+
"bleepingcomputer.com",
|
|
60
|
+
"unit42.paloaltonetworks.com",
|
|
61
|
+
"blog.talosintelligence.com",
|
|
62
|
+
"securelist.com",
|
|
63
|
+
"thedfirreport.com",
|
|
64
|
+
"atomicredteam.io",
|
|
65
|
+
"lolbas-project.github.io",
|
|
66
|
+
"gtfobins.github.io",
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
def __init__(self, api_key: Optional[str] = None) -> None:
|
|
70
|
+
"""Initialize client with API key.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
api_key: Tavily API key (defaults to TAVILY_API_KEY env var)
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
ValueError: If no API key is provided or found
|
|
77
|
+
"""
|
|
78
|
+
self.api_key = api_key or os.getenv("TAVILY_API_KEY")
|
|
79
|
+
if not self.api_key:
|
|
80
|
+
raise ValueError("TAVILY_API_KEY not set. Get your API key from https://tavily.com")
|
|
81
|
+
self._client: Optional[Any] = None
|
|
82
|
+
|
|
83
|
+
def _get_client(self) -> Any:
|
|
84
|
+
"""Get or create Tavily client instance."""
|
|
85
|
+
if self._client is None:
|
|
86
|
+
try:
|
|
87
|
+
from tavily import TavilyClient
|
|
88
|
+
|
|
89
|
+
self._client = TavilyClient(api_key=self.api_key)
|
|
90
|
+
except ImportError:
|
|
91
|
+
raise ImportError("tavily-python package not installed. Run: pip install tavily-python")
|
|
92
|
+
return self._client
|
|
93
|
+
|
|
94
|
+
def search(
|
|
95
|
+
self,
|
|
96
|
+
query: str,
|
|
97
|
+
search_depth: str = "basic",
|
|
98
|
+
max_results: int = 10,
|
|
99
|
+
include_domains: Optional[List[str]] = None,
|
|
100
|
+
exclude_domains: Optional[List[str]] = None,
|
|
101
|
+
include_answer: bool = True,
|
|
102
|
+
include_raw_content: bool = False,
|
|
103
|
+
) -> SearchResponse:
|
|
104
|
+
"""Execute search query.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
query: Search query string
|
|
108
|
+
search_depth: "basic" (fast, ~5 results) or "advanced" (thorough, ~10 results)
|
|
109
|
+
max_results: Maximum number of results (1-20)
|
|
110
|
+
include_domains: Limit search to these domains
|
|
111
|
+
exclude_domains: Exclude these domains from search
|
|
112
|
+
include_answer: Include AI-generated answer summary
|
|
113
|
+
include_raw_content: Include full page content (increases response size)
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
SearchResponse with results
|
|
117
|
+
|
|
118
|
+
Raises:
|
|
119
|
+
Exception: If search fails
|
|
120
|
+
"""
|
|
121
|
+
client = self._get_client()
|
|
122
|
+
|
|
123
|
+
start_time = time.time()
|
|
124
|
+
|
|
125
|
+
# Build search parameters
|
|
126
|
+
search_params: Dict[str, Any] = {
|
|
127
|
+
"query": query,
|
|
128
|
+
"search_depth": search_depth,
|
|
129
|
+
"max_results": max_results,
|
|
130
|
+
"include_answer": include_answer,
|
|
131
|
+
"include_raw_content": include_raw_content,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if include_domains:
|
|
135
|
+
search_params["include_domains"] = include_domains
|
|
136
|
+
|
|
137
|
+
if exclude_domains:
|
|
138
|
+
search_params["exclude_domains"] = exclude_domains
|
|
139
|
+
|
|
140
|
+
# Execute search
|
|
141
|
+
response = client.search(**search_params)
|
|
142
|
+
|
|
143
|
+
response_time_ms = int((time.time() - start_time) * 1000)
|
|
144
|
+
|
|
145
|
+
# Parse results
|
|
146
|
+
results = []
|
|
147
|
+
for result in response.get("results", []):
|
|
148
|
+
results.append(
|
|
149
|
+
SearchResult(
|
|
150
|
+
title=result.get("title", ""),
|
|
151
|
+
url=result.get("url", ""),
|
|
152
|
+
content=result.get("content", ""),
|
|
153
|
+
score=result.get("score", 0.0),
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
return SearchResponse(
|
|
158
|
+
query=query,
|
|
159
|
+
results=results,
|
|
160
|
+
answer=response.get("answer"),
|
|
161
|
+
response_time_ms=response_time_ms,
|
|
162
|
+
search_depth=search_depth,
|
|
163
|
+
images=response.get("images", []),
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def search_threat_intel(
|
|
167
|
+
self,
|
|
168
|
+
topic: str,
|
|
169
|
+
technique: Optional[str] = None,
|
|
170
|
+
search_depth: str = "advanced",
|
|
171
|
+
) -> SearchResponse:
|
|
172
|
+
"""Search with security-focused parameters.
|
|
173
|
+
|
|
174
|
+
Optimized for threat hunting research with:
|
|
175
|
+
- Security-focused domain filtering
|
|
176
|
+
- Advanced search depth by default
|
|
177
|
+
- AI-generated answer summary
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
topic: Research topic (e.g., "LSASS memory dumping")
|
|
181
|
+
technique: Optional MITRE ATT&CK technique (e.g., "T1003.001")
|
|
182
|
+
search_depth: Search depth ("basic" or "advanced")
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
SearchResponse with security-focused results
|
|
186
|
+
"""
|
|
187
|
+
# Build security-focused query
|
|
188
|
+
query = f"{topic} threat hunting detection"
|
|
189
|
+
if technique:
|
|
190
|
+
query += f" MITRE ATT&CK {technique}"
|
|
191
|
+
|
|
192
|
+
return self.search(
|
|
193
|
+
query=query,
|
|
194
|
+
search_depth=search_depth,
|
|
195
|
+
include_domains=self.SECURITY_DOMAINS,
|
|
196
|
+
include_answer=True,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def search_system_internals(
|
|
200
|
+
self,
|
|
201
|
+
topic: str,
|
|
202
|
+
search_depth: str = "advanced",
|
|
203
|
+
) -> SearchResponse:
|
|
204
|
+
"""Search for system/technology internals.
|
|
205
|
+
|
|
206
|
+
Focused on understanding how systems work normally,
|
|
207
|
+
useful for the "System Research" skill.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
topic: Technology/system topic (e.g., "LSASS", "Windows Authentication")
|
|
211
|
+
search_depth: Search depth ("basic" or "advanced")
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
SearchResponse with technical documentation
|
|
215
|
+
"""
|
|
216
|
+
query = f"{topic} how it works internals documentation"
|
|
217
|
+
|
|
218
|
+
# Focus on technical documentation sources
|
|
219
|
+
technical_domains = [
|
|
220
|
+
"microsoft.com",
|
|
221
|
+
"learn.microsoft.com",
|
|
222
|
+
"docs.microsoft.com",
|
|
223
|
+
"developer.apple.com",
|
|
224
|
+
"man7.org",
|
|
225
|
+
"linux.die.net",
|
|
226
|
+
"kernel.org",
|
|
227
|
+
"aws.amazon.com",
|
|
228
|
+
"docs.aws.amazon.com",
|
|
229
|
+
"cloud.google.com",
|
|
230
|
+
"en.wikipedia.org",
|
|
231
|
+
]
|
|
232
|
+
|
|
233
|
+
return self.search(
|
|
234
|
+
query=query,
|
|
235
|
+
search_depth=search_depth,
|
|
236
|
+
include_domains=technical_domains,
|
|
237
|
+
include_answer=True,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
def search_adversary_tradecraft(
|
|
241
|
+
self,
|
|
242
|
+
topic: str,
|
|
243
|
+
technique: Optional[str] = None,
|
|
244
|
+
search_depth: str = "advanced",
|
|
245
|
+
) -> SearchResponse:
|
|
246
|
+
"""Search for adversary tradecraft and attack techniques.
|
|
247
|
+
|
|
248
|
+
Focused on how adversaries abuse systems,
|
|
249
|
+
useful for the "Adversary Tradecraft" skill.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
topic: Attack topic (e.g., "credential dumping", "lateral movement")
|
|
253
|
+
technique: Optional MITRE ATT&CK technique
|
|
254
|
+
search_depth: Search depth ("basic" or "advanced")
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
SearchResponse with adversary technique information
|
|
258
|
+
"""
|
|
259
|
+
query = f"{topic} adversary technique attack method"
|
|
260
|
+
if technique:
|
|
261
|
+
query += f" {technique}"
|
|
262
|
+
|
|
263
|
+
# Focus on threat intelligence sources
|
|
264
|
+
threat_intel_domains = [
|
|
265
|
+
"attack.mitre.org",
|
|
266
|
+
"thedfirreport.com",
|
|
267
|
+
"mandiant.com",
|
|
268
|
+
"crowdstrike.com",
|
|
269
|
+
"unit42.paloaltonetworks.com",
|
|
270
|
+
"blog.talosintelligence.com",
|
|
271
|
+
"securelist.com",
|
|
272
|
+
"redcanary.com",
|
|
273
|
+
"elastic.co",
|
|
274
|
+
"atomicredteam.io",
|
|
275
|
+
"lolbas-project.github.io",
|
|
276
|
+
"gtfobins.github.io",
|
|
277
|
+
]
|
|
278
|
+
|
|
279
|
+
return self.search(
|
|
280
|
+
query=query,
|
|
281
|
+
search_depth=search_depth,
|
|
282
|
+
include_domains=threat_intel_domains,
|
|
283
|
+
include_answer=True,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
def search_detection_methods(
|
|
287
|
+
self,
|
|
288
|
+
topic: str,
|
|
289
|
+
technique: Optional[str] = None,
|
|
290
|
+
search_depth: str = "advanced",
|
|
291
|
+
) -> SearchResponse:
|
|
292
|
+
"""Search for detection methods and analytics.
|
|
293
|
+
|
|
294
|
+
Focused on how to detect specific behaviors,
|
|
295
|
+
useful for detection engineering.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
topic: Detection topic (e.g., "LSASS access detection")
|
|
299
|
+
technique: Optional MITRE ATT&CK technique
|
|
300
|
+
search_depth: Search depth ("basic" or "advanced")
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
SearchResponse with detection method information
|
|
304
|
+
"""
|
|
305
|
+
query = f"{topic} detection rule query sigma"
|
|
306
|
+
if technique:
|
|
307
|
+
query += f" {technique}"
|
|
308
|
+
|
|
309
|
+
# Focus on detection and SIEM sources
|
|
310
|
+
detection_domains = [
|
|
311
|
+
"github.com",
|
|
312
|
+
"elastic.co",
|
|
313
|
+
"splunk.com",
|
|
314
|
+
"microsoft.com",
|
|
315
|
+
"redcanary.com",
|
|
316
|
+
"sigma-hq.github.io",
|
|
317
|
+
"detection.fyi",
|
|
318
|
+
]
|
|
319
|
+
|
|
320
|
+
return self.search(
|
|
321
|
+
query=query,
|
|
322
|
+
search_depth=search_depth,
|
|
323
|
+
include_domains=detection_domains,
|
|
324
|
+
include_answer=True,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def create_search_client(api_key: Optional[str] = None) -> Optional[TavilySearchClient]:
|
|
329
|
+
"""Create a Tavily search client if API key is available.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
api_key: Optional API key (defaults to TAVILY_API_KEY env var)
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
TavilySearchClient if API key is available, None otherwise
|
|
336
|
+
"""
|
|
337
|
+
key = api_key or os.getenv("TAVILY_API_KEY")
|
|
338
|
+
if not key:
|
|
339
|
+
return None
|
|
340
|
+
return TavilySearchClient(api_key=key)
|
athf/data/__init__.py
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
"""ATHF reference data and templates."""
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import sys
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
+
if sys.version_info >= (3, 9):
|
|
7
|
+
from importlib.resources import files
|
|
8
|
+
else:
|
|
9
|
+
from importlib_resources import files # type: ignore[import-not-found,no-redef]
|
|
10
|
+
|
|
6
11
|
|
|
7
12
|
def get_data_path() -> Path:
|
|
8
13
|
"""Get the path to ATHF data directory.
|
athf/data/docs/CHANGELOG.md
CHANGED
|
@@ -25,6 +25,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
25
25
|
### Security
|
|
26
26
|
- None
|
|
27
27
|
|
|
28
|
+
## [0.3.1] - 2026-01-13
|
|
29
|
+
|
|
30
|
+
### Fixed
|
|
31
|
+
- **Packaging Bug** - Fixed `ModuleNotFoundError: No module named 'athf.agents'` when installing via pip/pipx
|
|
32
|
+
- Added missing packages to `pyproject.toml`: `athf.agents`, `athf.agents.llm`
|
|
33
|
+
- Packages list now includes all subdirectories: athf, athf.agents, athf.agents.llm, athf.commands, athf.core, athf.data, athf.utils
|
|
34
|
+
- Verified wheel build includes all agent module files
|
|
35
|
+
|
|
36
|
+
## [0.3.0] - 2026-01-11
|
|
37
|
+
|
|
38
|
+
### Added
|
|
39
|
+
- **Agent Framework** - Autonomous agents for threat hunting workflows
|
|
40
|
+
- `athf.agents` - Base agent framework and orchestration
|
|
41
|
+
- `athf.agents.llm` - LLM-powered agents (hypothesis generation, research, finding analysis)
|
|
42
|
+
- Agent orchestration with task delegation and result aggregation
|
|
43
|
+
- **Research Workflow** - Pre-hunt research and investigation (`athf research`)
|
|
44
|
+
- **Drift Detection** - Behavioral anomaly detection infrastructure (`athf drift`)
|
|
45
|
+
- **Signal Investigation** - Low-fidelity pattern scoring and investigation (`athf signals`)
|
|
46
|
+
|
|
47
|
+
### Changed
|
|
48
|
+
- CLI refactored to support agent-based workflows
|
|
49
|
+
- Enhanced hunt creation with agent-generated hypotheses
|
|
50
|
+
|
|
28
51
|
## [0.2.2] - 2024-12-17
|
|
29
52
|
|
|
30
53
|
### Fixed
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|