agentic-threat-hunting-framework 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,360 @@
1
+ """Splunk REST API client for ATHF.
2
+
3
+ This module provides direct Splunk API integration using authentication tokens.
4
+ Use this when MCP integration is not available or for programmatic access.
5
+ """
6
+
7
+ import time
8
+ from typing import Any, Dict, List, Optional
9
+ from urllib.parse import urljoin
10
+
11
+ import requests
12
+ from requests.adapters import HTTPAdapter
13
+ from urllib3.util.retry import Retry
14
+
15
+
16
+ class SplunkClient:
17
+ """Client for Splunk REST API operations.
18
+
19
+ Args:
20
+ host: Splunk host (e.g., "splunk.example.com" or "https://splunk.example.com:8089")
21
+ token: Splunk authentication token
22
+ verify_ssl: Whether to verify SSL certificates (default: True)
23
+ timeout: Request timeout in seconds (default: 30)
24
+
25
+ Example:
26
+ >>> client = SplunkClient(host="splunk.example.com", token="your-token")
27
+ >>> results = client.search("index=main | head 10", max_count=10)
28
+ >>> for event in results:
29
+ ... print(event)
30
+ """
31
+
32
+ def __init__(self, host: str, token: str, verify_ssl: bool = True, timeout: int = 30):
33
+ # Normalize host URL
34
+ if not host.startswith(("http://", "https://")):
35
+ host = f"https://{host}"
36
+ if ":8089" not in host and not host.endswith(":8089"):
37
+ # Add default management port if not specified
38
+ host = host.rstrip("/") + ":8089"
39
+
40
+ self.base_url = host.rstrip("/")
41
+ self.token = token
42
+ self.verify_ssl = verify_ssl
43
+ self.timeout = timeout
44
+
45
+ # Create session with retry logic
46
+ self.session = requests.Session()
47
+ retry_strategy = Retry(
48
+ total=3,
49
+ backoff_factor=1,
50
+ status_forcelist=[429, 500, 502, 503, 504],
51
+ )
52
+ adapter = HTTPAdapter(max_retries=retry_strategy)
53
+ self.session.mount("http://", adapter)
54
+ self.session.mount("https://", adapter)
55
+
56
+ # Set default headers
57
+ self.session.headers.update(
58
+ {
59
+ "Authorization": f"Bearer {token}",
60
+ "Content-Type": "application/x-www-form-urlencoded",
61
+ }
62
+ )
63
+
64
+ def _request(
65
+ self,
66
+ method: str,
67
+ endpoint: str,
68
+ params: Optional[Dict[str, Any]] = None,
69
+ data: Optional[Dict[str, Any]] = None,
70
+ json_response: bool = True,
71
+ ) -> Any:
72
+ """Make HTTP request to Splunk API.
73
+
74
+ Args:
75
+ method: HTTP method (GET, POST, DELETE)
76
+ endpoint: API endpoint path
77
+ params: Query parameters
78
+ data: Form data for POST requests
79
+ json_response: Whether to parse JSON response
80
+
81
+ Returns:
82
+ Response data (parsed JSON or raw response)
83
+
84
+ Raises:
85
+ requests.HTTPError: If request fails
86
+ """
87
+ url = urljoin(self.base_url, endpoint)
88
+
89
+ response = self.session.request(
90
+ method=method, url=url, params=params, data=data, verify=self.verify_ssl, timeout=self.timeout
91
+ )
92
+
93
+ response.raise_for_status()
94
+
95
+ if json_response:
96
+ return response.json()
97
+ return response
98
+
99
+ def test_connection(self) -> Dict[str, Any]:
100
+ """Test connection and authentication to Splunk.
101
+
102
+ Returns:
103
+ Dict with server info if successful
104
+
105
+ Raises:
106
+ requests.HTTPError: If authentication fails
107
+ """
108
+ return self._request("GET", "/services/server/info", params={"output_mode": "json"}) # type: ignore[no-any-return]
109
+
110
+ def get_indexes(self) -> List[str]:
111
+ """List available Splunk indexes.
112
+
113
+ Returns:
114
+ List of index names
115
+ """
116
+ response = self._request("GET", "/services/data/indexes", params={"output_mode": "json"})
117
+ return [entry["name"] for entry in response.get("entry", [])]
118
+
119
+ def search(
120
+ self,
121
+ query: str,
122
+ earliest_time: str = "-24h",
123
+ latest_time: str = "now",
124
+ max_count: int = 100,
125
+ output_mode: str = "json",
126
+ ) -> List[Dict[str, Any]]:
127
+ """Execute a Splunk search query (oneshot search for quick results).
128
+
129
+ Args:
130
+ query: SPL search query
131
+ earliest_time: Start time (e.g., "-24h", "2024-01-01T00:00:00")
132
+ latest_time: End time (e.g., "now", "2024-01-02T00:00:00")
133
+ max_count: Maximum number of results to return
134
+ output_mode: Output format (json, xml, csv)
135
+
136
+ Returns:
137
+ List of search results
138
+
139
+ Example:
140
+ >>> results = client.search(
141
+ ... 'index=main sourcetype=linux_secure "Failed password"',
142
+ ... earliest_time="-1h",
143
+ ... max_count=50
144
+ ... )
145
+ """
146
+ # Use oneshot search for quick results (no job creation)
147
+ data = {
148
+ "search": query if query.startswith("search") else f"search {query}",
149
+ "earliest_time": earliest_time,
150
+ "latest_time": latest_time,
151
+ "max_count": max_count,
152
+ "output_mode": output_mode,
153
+ }
154
+
155
+ response = self._request("POST", "/services/search/jobs/oneshot", data=data)
156
+
157
+ # Extract results from response
158
+ results = []
159
+ if "results" in response:
160
+ results = response["results"]
161
+ elif "entry" in response:
162
+ # Handle alternative response format
163
+ for entry in response["entry"]:
164
+ if "content" in entry:
165
+ results.append(entry["content"])
166
+
167
+ return results
168
+
169
+ def create_search_job(self, query: str, earliest_time: str = "-24h", latest_time: str = "now", **kwargs: Any) -> str:
170
+ """Create an async search job for long-running queries.
171
+
172
+ Args:
173
+ query: SPL search query
174
+ earliest_time: Start time
175
+ latest_time: End time
176
+ **kwargs: Additional search parameters
177
+
178
+ Returns:
179
+ Search job ID (sid)
180
+
181
+ Example:
182
+ >>> sid = client.create_search_job(
183
+ ... 'index=* | stats count by sourcetype',
184
+ ... earliest_time="-7d"
185
+ ... )
186
+ >>> results = client.get_search_results(sid)
187
+ """
188
+ data = {
189
+ "search": query if query.startswith("search") else f"search {query}",
190
+ "earliest_time": earliest_time,
191
+ "latest_time": latest_time,
192
+ "output_mode": "json",
193
+ **kwargs,
194
+ }
195
+
196
+ response = self._request("POST", "/services/search/jobs", data=data)
197
+
198
+ # Extract search ID from response
199
+ if "sid" in response:
200
+ return response["sid"] # type: ignore[no-any-return]
201
+ elif "entry" in response and len(response["entry"]) > 0:
202
+ return response["entry"][0]["name"] # type: ignore[no-any-return]
203
+
204
+ raise ValueError("Could not extract search job ID from response")
205
+
206
+ def get_search_job_status(self, sid: str) -> Dict[str, Any]:
207
+ """Get status of a search job.
208
+
209
+ Args:
210
+ sid: Search job ID
211
+
212
+ Returns:
213
+ Dict with job status information
214
+ """
215
+ return self._request("GET", f"/services/search/jobs/{sid}", params={"output_mode": "json"}) # type: ignore[no-any-return]
216
+
217
+ def wait_for_search_job(self, sid: str, poll_interval: int = 2, max_wait: int = 300) -> bool:
218
+ """Wait for search job to complete.
219
+
220
+ Args:
221
+ sid: Search job ID
222
+ poll_interval: Seconds between status checks
223
+ max_wait: Maximum seconds to wait
224
+
225
+ Returns:
226
+ True if job completed, False if timeout
227
+ """
228
+ elapsed = 0
229
+ while elapsed < max_wait:
230
+ status = self.get_search_job_status(sid)
231
+
232
+ # Check if job is done
233
+ if "entry" in status and len(status["entry"]) > 0:
234
+ content = status["entry"][0].get("content", {})
235
+ if content.get("isDone"):
236
+ return True
237
+
238
+ time.sleep(poll_interval)
239
+ elapsed += poll_interval
240
+
241
+ return False
242
+
243
+ def get_search_results(
244
+ self, sid: str, offset: int = 0, count: int = 100, output_mode: str = "json"
245
+ ) -> List[Dict[str, Any]]:
246
+ """Get results from a completed search job.
247
+
248
+ Args:
249
+ sid: Search job ID
250
+ offset: Result offset (for pagination)
251
+ count: Number of results to return
252
+ output_mode: Output format
253
+
254
+ Returns:
255
+ List of search results
256
+ """
257
+ params = {
258
+ "output_mode": output_mode,
259
+ "offset": offset,
260
+ "count": count,
261
+ }
262
+
263
+ response = self._request("GET", f"/services/search/jobs/{sid}/results", params=params)
264
+
265
+ results = []
266
+ if "results" in response:
267
+ results = response["results"]
268
+ elif "entry" in response:
269
+ for entry in response["entry"]:
270
+ if "content" in entry:
271
+ results.append(entry["content"])
272
+
273
+ return results
274
+
275
+ def delete_search_job(self, sid: str) -> None:
276
+ """Delete a search job.
277
+
278
+ Args:
279
+ sid: Search job ID
280
+ """
281
+ self._request("DELETE", f"/services/search/jobs/{sid}")
282
+
283
+ def search_async(
284
+ self,
285
+ query: str,
286
+ earliest_time: str = "-24h",
287
+ latest_time: str = "now",
288
+ max_results: int = 100,
289
+ wait: bool = True,
290
+ max_wait: int = 300,
291
+ ) -> List[Dict[str, Any]]:
292
+ """Execute a search asynchronously and return results.
293
+
294
+ This is useful for longer-running queries that may timeout with oneshot.
295
+
296
+ Args:
297
+ query: SPL search query
298
+ earliest_time: Start time
299
+ latest_time: End time
300
+ max_results: Maximum results to return
301
+ wait: Whether to wait for job completion
302
+ max_wait: Maximum seconds to wait for job
303
+
304
+ Returns:
305
+ List of search results
306
+
307
+ Example:
308
+ >>> results = client.search_async(
309
+ ... 'index=* | stats count by sourcetype',
310
+ ... earliest_time="-7d",
311
+ ... max_results=1000
312
+ ... )
313
+ """
314
+ # Create search job
315
+ sid = self.create_search_job(query, earliest_time, latest_time)
316
+
317
+ try:
318
+ if wait:
319
+ # Wait for completion
320
+ if not self.wait_for_search_job(sid, max_wait=max_wait):
321
+ raise TimeoutError(f"Search job {sid} did not complete within {max_wait}s")
322
+
323
+ # Get results
324
+ return self.get_search_results(sid, count=max_results)
325
+
326
+ finally:
327
+ # Clean up search job
328
+ try:
329
+ self.delete_search_job(sid)
330
+ except Exception:
331
+ pass # Ignore cleanup errors
332
+
333
+
334
+ def create_client_from_env() -> SplunkClient:
335
+ """Create Splunk client from environment variables.
336
+
337
+ Environment variables:
338
+ SPLUNK_HOST: Splunk host
339
+ SPLUNK_TOKEN: Authentication token
340
+ SPLUNK_VERIFY_SSL: Whether to verify SSL (default: true)
341
+
342
+ Returns:
343
+ Configured SplunkClient instance
344
+
345
+ Raises:
346
+ ValueError: If required environment variables are missing
347
+ """
348
+ import os
349
+
350
+ host = os.getenv("SPLUNK_HOST")
351
+ token = os.getenv("SPLUNK_TOKEN")
352
+
353
+ if not host:
354
+ raise ValueError("SPLUNK_HOST environment variable is required")
355
+ if not token:
356
+ raise ValueError("SPLUNK_TOKEN environment variable is required")
357
+
358
+ verify_ssl = os.getenv("SPLUNK_VERIFY_SSL", "true").lower() in ("true", "1", "yes")
359
+
360
+ return SplunkClient(host=host, token=token, verify_ssl=verify_ssl)
@@ -16,7 +16,8 @@ tactics: {{ tactics }}
16
16
  techniques: {{ techniques }}
17
17
  data_sources: {{ data_sources }}
18
18
  related_hunts: []
19
- findings_count: 0
19
+ {% if spawned_from %}spawned_from: {{ spawned_from }}
20
+ {% endif %}findings_count: 0
20
21
  true_positives: 0
21
22
  false_positives: 0
22
23
  customer_deliverables: []
@@ -57,6 +58,8 @@ tags: {{ tags }}
57
58
 
58
59
  - **MITRE ATT&CK Techniques:** {{ ', '.join(techniques) if techniques else '[List relevant techniques]' }}
59
60
  - **CTI Sources & References:** [Links to reports, blogs, etc.]
61
+ {% if spawned_from %}- **Research Document:** See [{{ spawned_from }}](../research/{{ spawned_from }}.md) for detailed pre-hunt research
62
+ {% endif %}
60
63
 
61
64
  ### Related Tickets
62
65
 
@@ -172,6 +175,7 @@ def render_hunt_template(
172
175
  behavior: Optional[str] = None,
173
176
  location: Optional[str] = None,
174
177
  evidence: Optional[str] = None,
178
+ spawned_from: Optional[str] = None,
175
179
  ) -> str:
176
180
  """Render a hunt template with provided metadata.
177
181
 
@@ -189,6 +193,7 @@ def render_hunt_template(
189
193
  behavior: Behavior description (for ABLE)
190
194
  location: Location/scope (for ABLE)
191
195
  evidence: Evidence description (for ABLE)
196
+ spawned_from: Research document ID (e.g., R-0001) that this hunt is based on
192
197
 
193
198
  Returns:
194
199
  Rendered hunt markdown content
@@ -221,4 +226,5 @@ def render_hunt_template(
221
226
  behavior=behavior,
222
227
  location=location,
223
228
  evidence=evidence,
229
+ spawned_from=spawned_from,
224
230
  )
@@ -25,6 +25,35 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
25
25
  ### Security
26
26
  - None
27
27
 
28
+ ## [0.4.0] - 2026-01-14
29
+
30
+ ### Added
31
+ - **Splunk Integration** - Native Splunk data source support
32
+ - `athf commands/splunk.py` - Splunk CLI command for query execution
33
+ - `athf/core/splunk_client.py` - Splunk REST API client
34
+ - Optional dependencies in pyproject.toml: `splunk = ["requests>=2.25.0"]`
35
+ - Integration quickstart guide at `integrations/quickstart/splunk.md`
36
+ - **Documentation Expansion** - Comprehensive CLI reference and user guides
37
+ - CLI_REFERENCE.md expanded by +530 lines with complete command documentation
38
+ - Enhanced getting-started.md with improved onboarding workflow
39
+ - Improved level4-agentic-workflows.md with agent orchestration patterns
40
+ - Enhanced maturity-model.md with +70 lines of maturity progression guidance
41
+ - **Workspace Structure** - Standard directory initialization
42
+ - docs/, hunts/, integrations/, knowledge/, prompts/, templates/ directories
43
+ - environment.md template for documenting data sources and tech stack
44
+
45
+ ### Changed
46
+ - **AGENTS.md** - Updated AI assistant instructions with Splunk integration context
47
+ - **CLI Enhancements** - Improved command structure and error handling
48
+ - **Template Engine** - Enhanced template rendering capabilities
49
+ - **Web Search** - Updated Tavily integration for research workflows
50
+
51
+ ### Removed
52
+ - **Testing Infrastructure** - Removed testing/ directory (8 files)
53
+ - Consolidated testing approach for cleaner repository structure
54
+ - Files removed: AGENTS.md, PRESENTATION_OUTLINE.md, README.md, TEST-SUMMARY.md, TESTING.md
55
+ - Scripts removed: test-fresh-install.sh, test-local.sh, test-quick.sh
56
+
28
57
  ## [0.3.1] - 2026-01-13
29
58
 
30
59
  ### Fixed