agentic-threat-hunting-framework 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentic_threat_hunting_framework-0.3.0.dist-info → agentic_threat_hunting_framework-0.4.0.dist-info}/METADATA +4 -1
- {agentic_threat_hunting_framework-0.3.0.dist-info → agentic_threat_hunting_framework-0.4.0.dist-info}/RECORD +26 -19
- athf/__version__.py +1 -1
- athf/agents/__init__.py +14 -0
- athf/agents/base.py +141 -0
- athf/agents/llm/__init__.py +27 -0
- athf/agents/llm/hunt_researcher.py +762 -0
- athf/agents/llm/hypothesis_generator.py +238 -0
- athf/cli.py +17 -10
- athf/commands/__init__.py +19 -3
- athf/commands/agent.py +43 -1
- athf/commands/hunt.py +63 -12
- athf/commands/similar.py +2 -2
- athf/commands/splunk.py +323 -0
- athf/core/splunk_client.py +360 -0
- athf/core/template_engine.py +7 -1
- athf/core/web_search.py +1 -1
- athf/data/docs/CHANGELOG.md +52 -0
- athf/data/docs/CLI_REFERENCE.md +518 -12
- athf/data/docs/getting-started.md +47 -3
- athf/data/docs/level4-agentic-workflows.md +9 -1
- athf/data/docs/maturity-model.md +56 -14
- {agentic_threat_hunting_framework-0.3.0.dist-info → agentic_threat_hunting_framework-0.4.0.dist-info}/WHEEL +0 -0
- {agentic_threat_hunting_framework-0.3.0.dist-info → agentic_threat_hunting_framework-0.4.0.dist-info}/entry_points.txt +0 -0
- {agentic_threat_hunting_framework-0.3.0.dist-info → agentic_threat_hunting_framework-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {agentic_threat_hunting_framework-0.3.0.dist-info → agentic_threat_hunting_framework-0.4.0.dist-info}/top_level.txt +0 -0
athf/commands/splunk.py
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
"""Splunk integration commands for ATHF.
|
|
2
|
+
|
|
3
|
+
This module provides CLI commands for interacting with Splunk via REST API.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
import click
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
from rich.table import Table
|
|
13
|
+
|
|
14
|
+
from athf.core.splunk_client import SplunkClient
|
|
15
|
+
|
|
16
|
+
console = Console()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_client(host: Optional[str], token: Optional[str], verify_ssl: Optional[bool]) -> SplunkClient:
|
|
20
|
+
"""Get Splunk client from CLI args or environment variables.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
host: Splunk host (from CLI)
|
|
24
|
+
token: Auth token (from CLI)
|
|
25
|
+
verify_ssl: Whether to verify SSL (None to read from env)
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Configured SplunkClient
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
click.UsageError: If credentials are not provided
|
|
32
|
+
"""
|
|
33
|
+
# Try CLI args first, fall back to environment
|
|
34
|
+
if not host:
|
|
35
|
+
host = os.getenv("SPLUNK_HOST")
|
|
36
|
+
if not token:
|
|
37
|
+
token = os.getenv("SPLUNK_TOKEN")
|
|
38
|
+
|
|
39
|
+
# Read verify_ssl from environment if not provided via CLI
|
|
40
|
+
if verify_ssl is None:
|
|
41
|
+
env_verify = os.getenv("SPLUNK_VERIFY_SSL", "true")
|
|
42
|
+
verify_ssl = env_verify.lower() in ("true", "1", "yes")
|
|
43
|
+
|
|
44
|
+
if not host or not token:
|
|
45
|
+
raise click.UsageError(
|
|
46
|
+
"Splunk credentials required. Provide via:\n"
|
|
47
|
+
" • CLI: --host and --token flags\n"
|
|
48
|
+
" • Environment: SPLUNK_HOST and SPLUNK_TOKEN variables\n"
|
|
49
|
+
" • Config file: Create .env with credentials"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return SplunkClient(host=host, token=token, verify_ssl=verify_ssl)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@click.group()
|
|
56
|
+
def splunk() -> None:
|
|
57
|
+
"""Splunk REST API integration.
|
|
58
|
+
|
|
59
|
+
\b
|
|
60
|
+
Execute SPL queries and interact with Splunk directly from ATHF CLI.
|
|
61
|
+
|
|
62
|
+
\b
|
|
63
|
+
Setup:
|
|
64
|
+
1. Create a Splunk authentication token:
|
|
65
|
+
Settings → Tokens → New Token
|
|
66
|
+
|
|
67
|
+
2. Set environment variables (recommended):
|
|
68
|
+
export SPLUNK_HOST="splunk.example.com"
|
|
69
|
+
export SPLUNK_TOKEN="your-token-here"
|
|
70
|
+
|
|
71
|
+
3. Or use --host and --token flags with each command
|
|
72
|
+
|
|
73
|
+
\b
|
|
74
|
+
Examples:
|
|
75
|
+
# Test connection
|
|
76
|
+
athf splunk test
|
|
77
|
+
|
|
78
|
+
# List available indexes
|
|
79
|
+
athf splunk indexes
|
|
80
|
+
|
|
81
|
+
# Execute a query
|
|
82
|
+
athf splunk search 'index=main "Failed password" | head 10'
|
|
83
|
+
|
|
84
|
+
# Query with time range
|
|
85
|
+
athf splunk search 'index=* | stats count by sourcetype' \\
|
|
86
|
+
--earliest "-7d" --latest "now" --count 100
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@splunk.command()
|
|
91
|
+
@click.option("--host", envvar="SPLUNK_HOST", help="Splunk host (e.g., splunk.example.com)")
|
|
92
|
+
@click.option("--token", envvar="SPLUNK_TOKEN", help="Splunk authentication token")
|
|
93
|
+
@click.option("--verify-ssl/--no-verify-ssl", default=None, help="Verify SSL certificates")
|
|
94
|
+
def test(host: Optional[str], token: Optional[str], verify_ssl: Optional[bool]) -> None:
|
|
95
|
+
"""Test Splunk connection and authentication.
|
|
96
|
+
|
|
97
|
+
\b
|
|
98
|
+
Validates that:
|
|
99
|
+
• Host is reachable
|
|
100
|
+
• Token is valid
|
|
101
|
+
• API access is working
|
|
102
|
+
|
|
103
|
+
\b
|
|
104
|
+
Example:
|
|
105
|
+
athf splunk test
|
|
106
|
+
"""
|
|
107
|
+
try:
|
|
108
|
+
client = get_client(host, token, verify_ssl)
|
|
109
|
+
info = client.test_connection()
|
|
110
|
+
|
|
111
|
+
console.print("\n[bold green]✓ Connection successful![/bold green]\n")
|
|
112
|
+
|
|
113
|
+
# Display server info
|
|
114
|
+
if "entry" in info and len(info["entry"]) > 0:
|
|
115
|
+
content = info["entry"][0].get("content", {})
|
|
116
|
+
console.print(f"[bold]Server:[/bold] {content.get('serverName', 'N/A')}")
|
|
117
|
+
console.print(f"[bold]Version:[/bold] {content.get('version', 'N/A')}")
|
|
118
|
+
console.print(f"[bold]Build:[/bold] {content.get('build', 'N/A')}")
|
|
119
|
+
|
|
120
|
+
except Exception as e:
|
|
121
|
+
console.print(f"\n[bold red]✗ Connection failed:[/bold red] {e}\n", style="red")
|
|
122
|
+
raise click.Abort()
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@splunk.command()
|
|
126
|
+
@click.option("--host", envvar="SPLUNK_HOST", help="Splunk host")
|
|
127
|
+
@click.option("--token", envvar="SPLUNK_TOKEN", help="Splunk authentication token")
|
|
128
|
+
@click.option("--verify-ssl/--no-verify-ssl", default=None, help="Verify SSL certificates")
|
|
129
|
+
@click.option("--format", "output_format", type=click.Choice(["table", "json", "list"]), default="list", help="Output format")
|
|
130
|
+
def indexes(host: Optional[str], token: Optional[str], verify_ssl: Optional[bool], output_format: str) -> None:
|
|
131
|
+
"""List available Splunk indexes.
|
|
132
|
+
|
|
133
|
+
\b
|
|
134
|
+
Shows all indexes accessible with current credentials.
|
|
135
|
+
|
|
136
|
+
\b
|
|
137
|
+
Example:
|
|
138
|
+
athf splunk indexes
|
|
139
|
+
athf splunk indexes --format json
|
|
140
|
+
"""
|
|
141
|
+
try:
|
|
142
|
+
client = get_client(host, token, verify_ssl)
|
|
143
|
+
index_list = client.get_indexes()
|
|
144
|
+
|
|
145
|
+
if not index_list:
|
|
146
|
+
console.print("[yellow]No indexes found[/yellow]")
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
if output_format == "json":
|
|
150
|
+
click.echo(json.dumps({"indexes": index_list}, indent=2))
|
|
151
|
+
elif output_format == "table":
|
|
152
|
+
table = Table(title=f"Splunk Indexes ({len(index_list)} total)")
|
|
153
|
+
table.add_column("Index Name", style="cyan")
|
|
154
|
+
for idx in sorted(index_list):
|
|
155
|
+
table.add_row(idx)
|
|
156
|
+
console.print(table)
|
|
157
|
+
else: # list
|
|
158
|
+
console.print(f"\n[bold]Available Indexes ({len(index_list)}):[/bold]\n")
|
|
159
|
+
for idx in sorted(index_list):
|
|
160
|
+
console.print(f" • {idx}")
|
|
161
|
+
console.print()
|
|
162
|
+
|
|
163
|
+
except Exception as e:
|
|
164
|
+
console.print(f"[bold red]Error:[/bold red] {e}", style="red")
|
|
165
|
+
raise click.Abort()
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@splunk.command()
|
|
169
|
+
@click.argument("query")
|
|
170
|
+
@click.option("--host", envvar="SPLUNK_HOST", help="Splunk host")
|
|
171
|
+
@click.option("--token", envvar="SPLUNK_TOKEN", help="Splunk authentication token")
|
|
172
|
+
@click.option("--verify-ssl/--no-verify-ssl", default=None, help="Verify SSL certificates")
|
|
173
|
+
@click.option("--earliest", default="-24h", help="Earliest time (e.g., '-24h', '2024-01-01T00:00:00')")
|
|
174
|
+
@click.option("--latest", default="now", help="Latest time (e.g., 'now', '2024-01-02T00:00:00')")
|
|
175
|
+
@click.option("--count", default=100, type=int, help="Maximum results to return")
|
|
176
|
+
@click.option("--format", "output_format", type=click.Choice(["json", "table", "raw"]), default="json", help="Output format")
|
|
177
|
+
@click.option("--async-search/--oneshot", "use_async", default=False, help="Use async search for long queries")
|
|
178
|
+
@click.option("--max-wait", default=300, type=int, help="Max wait time for async searches (seconds)")
|
|
179
|
+
def search(
|
|
180
|
+
query: str,
|
|
181
|
+
host: Optional[str],
|
|
182
|
+
token: Optional[str],
|
|
183
|
+
verify_ssl: Optional[bool],
|
|
184
|
+
earliest: str,
|
|
185
|
+
latest: str,
|
|
186
|
+
count: int,
|
|
187
|
+
output_format: str,
|
|
188
|
+
use_async: bool,
|
|
189
|
+
max_wait: int,
|
|
190
|
+
) -> None:
|
|
191
|
+
"""Execute a Splunk search query.
|
|
192
|
+
|
|
193
|
+
\b
|
|
194
|
+
Runs SPL (Splunk Processing Language) queries and returns results.
|
|
195
|
+
|
|
196
|
+
\b
|
|
197
|
+
Query Examples:
|
|
198
|
+
'index=main "Failed password"'
|
|
199
|
+
'index=* sourcetype=linux_secure | stats count by user'
|
|
200
|
+
'index=web status>=400 | timechart count by status'
|
|
201
|
+
|
|
202
|
+
\b
|
|
203
|
+
Time Format Examples:
|
|
204
|
+
--earliest "-1h" (last hour)
|
|
205
|
+
--earliest "-7d" (last 7 days)
|
|
206
|
+
--earliest "2024-01-01T00:00:00" (absolute time)
|
|
207
|
+
|
|
208
|
+
\b
|
|
209
|
+
Examples:
|
|
210
|
+
# Basic search
|
|
211
|
+
athf splunk search 'index=main error'
|
|
212
|
+
|
|
213
|
+
# With time range
|
|
214
|
+
athf splunk search 'index=* | stats count by sourcetype' \\
|
|
215
|
+
--earliest "-7d" --count 1000
|
|
216
|
+
|
|
217
|
+
# JSON output for parsing
|
|
218
|
+
athf splunk search 'index=main | head 10' --format json
|
|
219
|
+
|
|
220
|
+
# Long-running query (async)
|
|
221
|
+
athf splunk search 'index=* | rare limit=20 sourcetype' \\
|
|
222
|
+
--async-search --max-wait 600
|
|
223
|
+
"""
|
|
224
|
+
try:
|
|
225
|
+
client = get_client(host, token, verify_ssl)
|
|
226
|
+
|
|
227
|
+
console.print(f"\n[bold]Executing query:[/bold] {query}")
|
|
228
|
+
console.print(f"[bold]Time range:[/bold] {earliest} to {latest}")
|
|
229
|
+
console.print(f"[bold]Max results:[/bold] {count}\n")
|
|
230
|
+
|
|
231
|
+
# Execute search
|
|
232
|
+
if use_async:
|
|
233
|
+
console.print("[dim]Using async search (for longer queries)...[/dim]")
|
|
234
|
+
results = client.search_async(
|
|
235
|
+
query=query, earliest_time=earliest, latest_time=latest, max_results=count, max_wait=max_wait
|
|
236
|
+
)
|
|
237
|
+
else:
|
|
238
|
+
console.print("[dim]Using oneshot search (fast for small queries)...[/dim]")
|
|
239
|
+
results = client.search(query=query, earliest_time=earliest, latest_time=latest, max_count=count)
|
|
240
|
+
|
|
241
|
+
if not results:
|
|
242
|
+
console.print("[yellow]No results found[/yellow]")
|
|
243
|
+
return
|
|
244
|
+
|
|
245
|
+
console.print(f"[green]✓ Found {len(results)} results[/green]\n")
|
|
246
|
+
|
|
247
|
+
# Output results
|
|
248
|
+
if output_format == "json":
|
|
249
|
+
click.echo(json.dumps(results, indent=2, default=str))
|
|
250
|
+
elif output_format == "table":
|
|
251
|
+
if not results:
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
# Extract all unique fields
|
|
255
|
+
all_fields: set[str] = set()
|
|
256
|
+
for result in results:
|
|
257
|
+
all_fields.update(result.keys())
|
|
258
|
+
|
|
259
|
+
# Create table
|
|
260
|
+
table = Table(title=f"Search Results ({len(results)} events)")
|
|
261
|
+
for field in sorted(all_fields):
|
|
262
|
+
table.add_column(field, overflow="fold")
|
|
263
|
+
|
|
264
|
+
# Add rows
|
|
265
|
+
for result in results[:count]: # Limit display
|
|
266
|
+
row = [str(result.get(field, "")) for field in sorted(all_fields)]
|
|
267
|
+
table.add_row(*row)
|
|
268
|
+
|
|
269
|
+
console.print(table)
|
|
270
|
+
else: # raw
|
|
271
|
+
for i, result in enumerate(results, 1):
|
|
272
|
+
console.print(f"[bold cyan]Event {i}:[/bold cyan]")
|
|
273
|
+
for key, value in result.items():
|
|
274
|
+
console.print(f" {key}: {value}")
|
|
275
|
+
console.print()
|
|
276
|
+
|
|
277
|
+
except TimeoutError as e:
|
|
278
|
+
console.print(f"\n[bold red]Timeout:[/bold red] {e}", style="red")
|
|
279
|
+
console.print("[yellow]Try using --async-search for long queries[/yellow]")
|
|
280
|
+
raise click.Abort()
|
|
281
|
+
except Exception as e:
|
|
282
|
+
console.print(f"\n[bold red]Error:[/bold red] {e}", style="red")
|
|
283
|
+
raise click.Abort()
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
@splunk.command()
|
|
287
|
+
@click.option("--host", envvar="SPLUNK_HOST", help="Splunk host")
|
|
288
|
+
@click.option("--token", envvar="SPLUNK_TOKEN", help="Splunk authentication token")
|
|
289
|
+
@click.option("--verify-ssl/--no-verify-ssl", default=None, help="Verify SSL certificates")
|
|
290
|
+
def config(host: Optional[str], token: Optional[str], verify_ssl: Optional[bool]) -> None:
|
|
291
|
+
"""Show current Splunk configuration.
|
|
292
|
+
|
|
293
|
+
\b
|
|
294
|
+
Displays configuration from environment variables and validates credentials.
|
|
295
|
+
|
|
296
|
+
\b
|
|
297
|
+
Example:
|
|
298
|
+
athf splunk config
|
|
299
|
+
"""
|
|
300
|
+
console.print("\n[bold]Splunk Configuration:[/bold]\n")
|
|
301
|
+
|
|
302
|
+
# Check environment
|
|
303
|
+
env_host = os.getenv("SPLUNK_HOST")
|
|
304
|
+
env_token = os.getenv("SPLUNK_TOKEN")
|
|
305
|
+
env_verify = os.getenv("SPLUNK_VERIFY_SSL", "true")
|
|
306
|
+
|
|
307
|
+
console.print(f"[bold]SPLUNK_HOST:[/bold] {env_host or '[red]Not set[/red]'}")
|
|
308
|
+
console.print(f"[bold]SPLUNK_TOKEN:[/bold] {'[green]Set[/green]' if env_token else '[red]Not set[/red]'}")
|
|
309
|
+
console.print(f"[bold]SPLUNK_VERIFY_SSL:[/bold] {env_verify}")
|
|
310
|
+
|
|
311
|
+
# Test connection if credentials available
|
|
312
|
+
if (host or env_host) and (token or env_token):
|
|
313
|
+
console.print("\n[dim]Testing connection...[/dim]")
|
|
314
|
+
try:
|
|
315
|
+
# get_client will read environment variable if verify_ssl is None
|
|
316
|
+
client = get_client(host, token, verify_ssl)
|
|
317
|
+
client.test_connection()
|
|
318
|
+
console.print("[bold green]✓ Connection successful[/bold green]\n")
|
|
319
|
+
except Exception as e:
|
|
320
|
+
console.print(f"[bold red]✗ Connection failed:[/bold red] {e}\n")
|
|
321
|
+
else:
|
|
322
|
+
console.print("\n[yellow]⚠ Missing credentials - cannot test connection[/yellow]\n")
|
|
323
|
+
console.print("[dim]Set SPLUNK_HOST and SPLUNK_TOKEN environment variables[/dim]\n")
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"""Splunk REST API client for ATHF.
|
|
2
|
+
|
|
3
|
+
This module provides direct Splunk API integration using authentication tokens.
|
|
4
|
+
Use this when MCP integration is not available or for programmatic access.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
from urllib.parse import urljoin
|
|
10
|
+
|
|
11
|
+
import requests
|
|
12
|
+
from requests.adapters import HTTPAdapter
|
|
13
|
+
from urllib3.util.retry import Retry
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SplunkClient:
|
|
17
|
+
"""Client for Splunk REST API operations.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
host: Splunk host (e.g., "splunk.example.com" or "https://splunk.example.com:8089")
|
|
21
|
+
token: Splunk authentication token
|
|
22
|
+
verify_ssl: Whether to verify SSL certificates (default: True)
|
|
23
|
+
timeout: Request timeout in seconds (default: 30)
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
>>> client = SplunkClient(host="splunk.example.com", token="your-token")
|
|
27
|
+
>>> results = client.search("index=main | head 10", max_count=10)
|
|
28
|
+
>>> for event in results:
|
|
29
|
+
... print(event)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, host: str, token: str, verify_ssl: bool = True, timeout: int = 30):
|
|
33
|
+
# Normalize host URL
|
|
34
|
+
if not host.startswith(("http://", "https://")):
|
|
35
|
+
host = f"https://{host}"
|
|
36
|
+
if ":8089" not in host and not host.endswith(":8089"):
|
|
37
|
+
# Add default management port if not specified
|
|
38
|
+
host = host.rstrip("/") + ":8089"
|
|
39
|
+
|
|
40
|
+
self.base_url = host.rstrip("/")
|
|
41
|
+
self.token = token
|
|
42
|
+
self.verify_ssl = verify_ssl
|
|
43
|
+
self.timeout = timeout
|
|
44
|
+
|
|
45
|
+
# Create session with retry logic
|
|
46
|
+
self.session = requests.Session()
|
|
47
|
+
retry_strategy = Retry(
|
|
48
|
+
total=3,
|
|
49
|
+
backoff_factor=1,
|
|
50
|
+
status_forcelist=[429, 500, 502, 503, 504],
|
|
51
|
+
)
|
|
52
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
53
|
+
self.session.mount("http://", adapter)
|
|
54
|
+
self.session.mount("https://", adapter)
|
|
55
|
+
|
|
56
|
+
# Set default headers
|
|
57
|
+
self.session.headers.update(
|
|
58
|
+
{
|
|
59
|
+
"Authorization": f"Bearer {token}",
|
|
60
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
61
|
+
}
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def _request(
|
|
65
|
+
self,
|
|
66
|
+
method: str,
|
|
67
|
+
endpoint: str,
|
|
68
|
+
params: Optional[Dict[str, Any]] = None,
|
|
69
|
+
data: Optional[Dict[str, Any]] = None,
|
|
70
|
+
json_response: bool = True,
|
|
71
|
+
) -> Any:
|
|
72
|
+
"""Make HTTP request to Splunk API.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
method: HTTP method (GET, POST, DELETE)
|
|
76
|
+
endpoint: API endpoint path
|
|
77
|
+
params: Query parameters
|
|
78
|
+
data: Form data for POST requests
|
|
79
|
+
json_response: Whether to parse JSON response
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Response data (parsed JSON or raw response)
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
requests.HTTPError: If request fails
|
|
86
|
+
"""
|
|
87
|
+
url = urljoin(self.base_url, endpoint)
|
|
88
|
+
|
|
89
|
+
response = self.session.request(
|
|
90
|
+
method=method, url=url, params=params, data=data, verify=self.verify_ssl, timeout=self.timeout
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
response.raise_for_status()
|
|
94
|
+
|
|
95
|
+
if json_response:
|
|
96
|
+
return response.json()
|
|
97
|
+
return response
|
|
98
|
+
|
|
99
|
+
def test_connection(self) -> Dict[str, Any]:
|
|
100
|
+
"""Test connection and authentication to Splunk.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Dict with server info if successful
|
|
104
|
+
|
|
105
|
+
Raises:
|
|
106
|
+
requests.HTTPError: If authentication fails
|
|
107
|
+
"""
|
|
108
|
+
return self._request("GET", "/services/server/info", params={"output_mode": "json"}) # type: ignore[no-any-return]
|
|
109
|
+
|
|
110
|
+
def get_indexes(self) -> List[str]:
|
|
111
|
+
"""List available Splunk indexes.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
List of index names
|
|
115
|
+
"""
|
|
116
|
+
response = self._request("GET", "/services/data/indexes", params={"output_mode": "json"})
|
|
117
|
+
return [entry["name"] for entry in response.get("entry", [])]
|
|
118
|
+
|
|
119
|
+
def search(
|
|
120
|
+
self,
|
|
121
|
+
query: str,
|
|
122
|
+
earliest_time: str = "-24h",
|
|
123
|
+
latest_time: str = "now",
|
|
124
|
+
max_count: int = 100,
|
|
125
|
+
output_mode: str = "json",
|
|
126
|
+
) -> List[Dict[str, Any]]:
|
|
127
|
+
"""Execute a Splunk search query (oneshot search for quick results).
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
query: SPL search query
|
|
131
|
+
earliest_time: Start time (e.g., "-24h", "2024-01-01T00:00:00")
|
|
132
|
+
latest_time: End time (e.g., "now", "2024-01-02T00:00:00")
|
|
133
|
+
max_count: Maximum number of results to return
|
|
134
|
+
output_mode: Output format (json, xml, csv)
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
List of search results
|
|
138
|
+
|
|
139
|
+
Example:
|
|
140
|
+
>>> results = client.search(
|
|
141
|
+
... 'index=main sourcetype=linux_secure "Failed password"',
|
|
142
|
+
... earliest_time="-1h",
|
|
143
|
+
... max_count=50
|
|
144
|
+
... )
|
|
145
|
+
"""
|
|
146
|
+
# Use oneshot search for quick results (no job creation)
|
|
147
|
+
data = {
|
|
148
|
+
"search": query if query.startswith("search") else f"search {query}",
|
|
149
|
+
"earliest_time": earliest_time,
|
|
150
|
+
"latest_time": latest_time,
|
|
151
|
+
"max_count": max_count,
|
|
152
|
+
"output_mode": output_mode,
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
response = self._request("POST", "/services/search/jobs/oneshot", data=data)
|
|
156
|
+
|
|
157
|
+
# Extract results from response
|
|
158
|
+
results = []
|
|
159
|
+
if "results" in response:
|
|
160
|
+
results = response["results"]
|
|
161
|
+
elif "entry" in response:
|
|
162
|
+
# Handle alternative response format
|
|
163
|
+
for entry in response["entry"]:
|
|
164
|
+
if "content" in entry:
|
|
165
|
+
results.append(entry["content"])
|
|
166
|
+
|
|
167
|
+
return results
|
|
168
|
+
|
|
169
|
+
def create_search_job(self, query: str, earliest_time: str = "-24h", latest_time: str = "now", **kwargs: Any) -> str:
|
|
170
|
+
"""Create an async search job for long-running queries.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
query: SPL search query
|
|
174
|
+
earliest_time: Start time
|
|
175
|
+
latest_time: End time
|
|
176
|
+
**kwargs: Additional search parameters
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Search job ID (sid)
|
|
180
|
+
|
|
181
|
+
Example:
|
|
182
|
+
>>> sid = client.create_search_job(
|
|
183
|
+
... 'index=* | stats count by sourcetype',
|
|
184
|
+
... earliest_time="-7d"
|
|
185
|
+
... )
|
|
186
|
+
>>> results = client.get_search_results(sid)
|
|
187
|
+
"""
|
|
188
|
+
data = {
|
|
189
|
+
"search": query if query.startswith("search") else f"search {query}",
|
|
190
|
+
"earliest_time": earliest_time,
|
|
191
|
+
"latest_time": latest_time,
|
|
192
|
+
"output_mode": "json",
|
|
193
|
+
**kwargs,
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
response = self._request("POST", "/services/search/jobs", data=data)
|
|
197
|
+
|
|
198
|
+
# Extract search ID from response
|
|
199
|
+
if "sid" in response:
|
|
200
|
+
return response["sid"] # type: ignore[no-any-return]
|
|
201
|
+
elif "entry" in response and len(response["entry"]) > 0:
|
|
202
|
+
return response["entry"][0]["name"] # type: ignore[no-any-return]
|
|
203
|
+
|
|
204
|
+
raise ValueError("Could not extract search job ID from response")
|
|
205
|
+
|
|
206
|
+
def get_search_job_status(self, sid: str) -> Dict[str, Any]:
|
|
207
|
+
"""Get status of a search job.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
sid: Search job ID
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Dict with job status information
|
|
214
|
+
"""
|
|
215
|
+
return self._request("GET", f"/services/search/jobs/{sid}", params={"output_mode": "json"}) # type: ignore[no-any-return]
|
|
216
|
+
|
|
217
|
+
def wait_for_search_job(self, sid: str, poll_interval: int = 2, max_wait: int = 300) -> bool:
|
|
218
|
+
"""Wait for search job to complete.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
sid: Search job ID
|
|
222
|
+
poll_interval: Seconds between status checks
|
|
223
|
+
max_wait: Maximum seconds to wait
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
True if job completed, False if timeout
|
|
227
|
+
"""
|
|
228
|
+
elapsed = 0
|
|
229
|
+
while elapsed < max_wait:
|
|
230
|
+
status = self.get_search_job_status(sid)
|
|
231
|
+
|
|
232
|
+
# Check if job is done
|
|
233
|
+
if "entry" in status and len(status["entry"]) > 0:
|
|
234
|
+
content = status["entry"][0].get("content", {})
|
|
235
|
+
if content.get("isDone"):
|
|
236
|
+
return True
|
|
237
|
+
|
|
238
|
+
time.sleep(poll_interval)
|
|
239
|
+
elapsed += poll_interval
|
|
240
|
+
|
|
241
|
+
return False
|
|
242
|
+
|
|
243
|
+
def get_search_results(
|
|
244
|
+
self, sid: str, offset: int = 0, count: int = 100, output_mode: str = "json"
|
|
245
|
+
) -> List[Dict[str, Any]]:
|
|
246
|
+
"""Get results from a completed search job.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
sid: Search job ID
|
|
250
|
+
offset: Result offset (for pagination)
|
|
251
|
+
count: Number of results to return
|
|
252
|
+
output_mode: Output format
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
List of search results
|
|
256
|
+
"""
|
|
257
|
+
params = {
|
|
258
|
+
"output_mode": output_mode,
|
|
259
|
+
"offset": offset,
|
|
260
|
+
"count": count,
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
response = self._request("GET", f"/services/search/jobs/{sid}/results", params=params)
|
|
264
|
+
|
|
265
|
+
results = []
|
|
266
|
+
if "results" in response:
|
|
267
|
+
results = response["results"]
|
|
268
|
+
elif "entry" in response:
|
|
269
|
+
for entry in response["entry"]:
|
|
270
|
+
if "content" in entry:
|
|
271
|
+
results.append(entry["content"])
|
|
272
|
+
|
|
273
|
+
return results
|
|
274
|
+
|
|
275
|
+
def delete_search_job(self, sid: str) -> None:
|
|
276
|
+
"""Delete a search job.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
sid: Search job ID
|
|
280
|
+
"""
|
|
281
|
+
self._request("DELETE", f"/services/search/jobs/{sid}")
|
|
282
|
+
|
|
283
|
+
def search_async(
|
|
284
|
+
self,
|
|
285
|
+
query: str,
|
|
286
|
+
earliest_time: str = "-24h",
|
|
287
|
+
latest_time: str = "now",
|
|
288
|
+
max_results: int = 100,
|
|
289
|
+
wait: bool = True,
|
|
290
|
+
max_wait: int = 300,
|
|
291
|
+
) -> List[Dict[str, Any]]:
|
|
292
|
+
"""Execute a search asynchronously and return results.
|
|
293
|
+
|
|
294
|
+
This is useful for longer-running queries that may timeout with oneshot.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
query: SPL search query
|
|
298
|
+
earliest_time: Start time
|
|
299
|
+
latest_time: End time
|
|
300
|
+
max_results: Maximum results to return
|
|
301
|
+
wait: Whether to wait for job completion
|
|
302
|
+
max_wait: Maximum seconds to wait for job
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
List of search results
|
|
306
|
+
|
|
307
|
+
Example:
|
|
308
|
+
>>> results = client.search_async(
|
|
309
|
+
... 'index=* | stats count by sourcetype',
|
|
310
|
+
... earliest_time="-7d",
|
|
311
|
+
... max_results=1000
|
|
312
|
+
... )
|
|
313
|
+
"""
|
|
314
|
+
# Create search job
|
|
315
|
+
sid = self.create_search_job(query, earliest_time, latest_time)
|
|
316
|
+
|
|
317
|
+
try:
|
|
318
|
+
if wait:
|
|
319
|
+
# Wait for completion
|
|
320
|
+
if not self.wait_for_search_job(sid, max_wait=max_wait):
|
|
321
|
+
raise TimeoutError(f"Search job {sid} did not complete within {max_wait}s")
|
|
322
|
+
|
|
323
|
+
# Get results
|
|
324
|
+
return self.get_search_results(sid, count=max_results)
|
|
325
|
+
|
|
326
|
+
finally:
|
|
327
|
+
# Clean up search job
|
|
328
|
+
try:
|
|
329
|
+
self.delete_search_job(sid)
|
|
330
|
+
except Exception:
|
|
331
|
+
pass # Ignore cleanup errors
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def create_client_from_env() -> SplunkClient:
|
|
335
|
+
"""Create Splunk client from environment variables.
|
|
336
|
+
|
|
337
|
+
Environment variables:
|
|
338
|
+
SPLUNK_HOST: Splunk host
|
|
339
|
+
SPLUNK_TOKEN: Authentication token
|
|
340
|
+
SPLUNK_VERIFY_SSL: Whether to verify SSL (default: true)
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
Configured SplunkClient instance
|
|
344
|
+
|
|
345
|
+
Raises:
|
|
346
|
+
ValueError: If required environment variables are missing
|
|
347
|
+
"""
|
|
348
|
+
import os
|
|
349
|
+
|
|
350
|
+
host = os.getenv("SPLUNK_HOST")
|
|
351
|
+
token = os.getenv("SPLUNK_TOKEN")
|
|
352
|
+
|
|
353
|
+
if not host:
|
|
354
|
+
raise ValueError("SPLUNK_HOST environment variable is required")
|
|
355
|
+
if not token:
|
|
356
|
+
raise ValueError("SPLUNK_TOKEN environment variable is required")
|
|
357
|
+
|
|
358
|
+
verify_ssl = os.getenv("SPLUNK_VERIFY_SSL", "true").lower() in ("true", "1", "yes")
|
|
359
|
+
|
|
360
|
+
return SplunkClient(host=host, token=token, verify_ssl=verify_ssl)
|