loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""Splunk log exporter — pulls search results from Splunk REST API to local files.
|
|
2
|
+
|
|
3
|
+
Invoked via: loghunter export (or: loghunter export splunk)
|
|
4
|
+
Connects to the Splunk management port (default 8089), runs hourly-chunked oneshot
|
|
5
|
+
queries, and writes results as flat syslog text to the configured output file.
|
|
6
|
+
|
|
7
|
+
Credentials (in priority order):
|
|
8
|
+
LOGHUNTER_SPLUNK_USER, LOGHUNTER_SPLUNK_PASS environment variables
|
|
9
|
+
username, password in [export.splunk] config section
|
|
10
|
+
|
|
11
|
+
Splunk developer/free licenses enforce a hard per-query result cap at the binary
|
|
12
|
+
level that limits.conf cannot override. Hourly chunking keeps each query well
|
|
13
|
+
under this ceiling. For a 7-day pull this is 168 queries.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
import re
|
|
20
|
+
from datetime import datetime, timedelta
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
from tqdm import tqdm
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
import splunklib.client as splunk_client
|
|
28
|
+
import splunklib.results as splunk_results
|
|
29
|
+
except ImportError:
|
|
30
|
+
splunk_client = None # type: ignore[assignment]
|
|
31
|
+
splunk_results = None # type: ignore[assignment]
|
|
32
|
+
|
|
33
|
+
# RFC 3164 PRI field: <N> or <NN> or <NNN> at start of line
|
|
34
|
+
PRI_RE = re.compile(r"^<\d+>")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def is_configured(backend_cfg: dict[str, Any]) -> bool:
|
|
38
|
+
"""True when [export.splunk] has a non-empty host — preserves prior auto-detect behavior."""
|
|
39
|
+
return bool(backend_cfg.get("host", "").strip())
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def summary_descriptor(backend_cfg: dict[str, Any]) -> str:
|
|
43
|
+
"""Identifier shown in the final export summary's `Backend :` line."""
|
|
44
|
+
host = backend_cfg.get("host", "")
|
|
45
|
+
port = backend_cfg.get("port", "")
|
|
46
|
+
return f"{host}:{port}"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _get_credentials(config: dict[str, Any]) -> tuple[str, str]:
|
|
50
|
+
"""Return (username, password) from env vars or config.
|
|
51
|
+
|
|
52
|
+
Environment variables take priority over config-file values.
|
|
53
|
+
"""
|
|
54
|
+
user = os.environ.get("LOGHUNTER_SPLUNK_USER", "").strip() or config.get("username", "").strip()
|
|
55
|
+
passwd = os.environ.get("LOGHUNTER_SPLUNK_PASS", "").strip() or config.get("password", "").strip()
|
|
56
|
+
if not user or not passwd:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
"Splunk credentials not found — set LOGHUNTER_SPLUNK_USER and "
|
|
59
|
+
"LOGHUNTER_SPLUNK_PASS, or add username/password to [export.splunk] in config"
|
|
60
|
+
)
|
|
61
|
+
return user, passwd
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _sdk_error_message(exc: Exception, host: str, port: int) -> str:
|
|
65
|
+
"""Return an actionable user-facing message for Splunk SDK failures."""
|
|
66
|
+
exc_name = exc.__class__.__name__
|
|
67
|
+
if exc_name == "AuthenticationError":
|
|
68
|
+
return (
|
|
69
|
+
"Splunk login failed — check [export.splunk].username/password in config "
|
|
70
|
+
"and LOGHUNTER_SPLUNK_USER/LOGHUNTER_SPLUNK_PASS environment overrides"
|
|
71
|
+
)
|
|
72
|
+
return (
|
|
73
|
+
f"Could not connect to Splunk management API at {host}:{port} — "
|
|
74
|
+
f"check [export.splunk].host, [export.splunk].port, network reachability, and credentials"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _build_hour_windows(
|
|
79
|
+
since: datetime,
|
|
80
|
+
until: datetime,
|
|
81
|
+
) -> list[tuple[datetime, datetime]]:
|
|
82
|
+
"""Return one-hour (start, end) pairs spanning since..until.
|
|
83
|
+
|
|
84
|
+
Both since and until are floored to their hour boundary so every emitted
|
|
85
|
+
chunk is exactly one hour — no partial-hour chunks, mirroring the migration.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
since: Start of the window (timezone-aware or naive).
|
|
89
|
+
until: End of the window (timezone-aware or naive).
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
List of (chunk_start, chunk_end) in the timezone of the passed-in
|
|
93
|
+
datetimes, oldest first.
|
|
94
|
+
"""
|
|
95
|
+
# Use datetimes as-is — honor the tzinfo already embedded in them.
|
|
96
|
+
# Calling .astimezone() with no argument would re-express in the process
|
|
97
|
+
# timezone (UTC on a server), shifting hour boundaries away from the user's
|
|
98
|
+
# calendar day. replace() below preserves tzinfo unchanged.
|
|
99
|
+
local_since = since
|
|
100
|
+
local_until = until
|
|
101
|
+
|
|
102
|
+
# Floor both endpoints to their hour boundary
|
|
103
|
+
window_start = local_since.replace(minute=0, second=0, microsecond=0)
|
|
104
|
+
window_end = local_until.replace(minute=0, second=0, microsecond=0)
|
|
105
|
+
|
|
106
|
+
total_hours = int((window_end - window_start).total_seconds() // 3600)
|
|
107
|
+
return [
|
|
108
|
+
(window_start + timedelta(hours=i), window_start + timedelta(hours=i + 1))
|
|
109
|
+
for i in range(max(total_hours, 0))
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def fetch(
|
|
114
|
+
query_config: dict[str, Any],
|
|
115
|
+
splunk_config: dict[str, Any],
|
|
116
|
+
since: datetime,
|
|
117
|
+
until: datetime,
|
|
118
|
+
verbose: bool,
|
|
119
|
+
*,
|
|
120
|
+
skip_confirm: bool = False,
|
|
121
|
+
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
|
122
|
+
"""Connect to Splunk and pull all rows in hourly chunks.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
query_config: Single query stanza from config (must have "spl" key).
|
|
126
|
+
splunk_config: [export.splunk] section of config (host, port, credentials).
|
|
127
|
+
since: Start of window.
|
|
128
|
+
until: End of window.
|
|
129
|
+
verbose: Threaded from the orchestrator. The W4 grammar keeps export
|
|
130
|
+
stdout terse and level-invariant — Splunk's fetch currently
|
|
131
|
+
ignores this flag (no per-chunk chatter at level 1).
|
|
132
|
+
skip_confirm: Part of the uniform backend contract — Splunk has no
|
|
133
|
+
cost-prompt and ignores this. Accepted so the orchestrator can
|
|
134
|
+
invoke every backend with the same signature.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
A tuple ``(rows, fetch_meta)``:
|
|
138
|
+
- ``rows``: result rows as a flat list of dicts with at minimum _time and _raw.
|
|
139
|
+
- ``fetch_meta``: ``{"units": <hour-window count>, "unit_label": "chunks"}``
|
|
140
|
+
— used by the orchestrator to render the run-summary span string.
|
|
141
|
+
"""
|
|
142
|
+
if splunk_client is None:
|
|
143
|
+
raise ValueError("splunk-sdk not installed — run: pip install loghunt[splunk]")
|
|
144
|
+
|
|
145
|
+
user, passwd = _get_credentials(splunk_config)
|
|
146
|
+
host = splunk_config.get("host", "")
|
|
147
|
+
port = int(splunk_config.get("port", 8089))
|
|
148
|
+
spl = query_config.get("spl", "")
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
service = splunk_client.connect(
|
|
152
|
+
host=host,
|
|
153
|
+
port=port,
|
|
154
|
+
username=user,
|
|
155
|
+
password=passwd,
|
|
156
|
+
)
|
|
157
|
+
except Exception as exc:
|
|
158
|
+
raise ValueError(_sdk_error_message(exc, host, port)) from exc
|
|
159
|
+
|
|
160
|
+
windows = _build_hour_windows(since, until)
|
|
161
|
+
all_rows: list[dict[str, Any]] = []
|
|
162
|
+
|
|
163
|
+
for chunk_start, chunk_end in tqdm(
|
|
164
|
+
windows,
|
|
165
|
+
desc="fetching",
|
|
166
|
+
unit="hr",
|
|
167
|
+
leave=True,
|
|
168
|
+
bar_format="{desc}: {n_fmt} hours [{elapsed}]",
|
|
169
|
+
):
|
|
170
|
+
earliest = str(int(chunk_start.timestamp()))
|
|
171
|
+
latest = str(int(chunk_end.timestamp()))
|
|
172
|
+
try:
|
|
173
|
+
job = service.jobs.oneshot(
|
|
174
|
+
spl,
|
|
175
|
+
count=0,
|
|
176
|
+
output_mode="json",
|
|
177
|
+
earliest_time=earliest,
|
|
178
|
+
latest_time=latest,
|
|
179
|
+
)
|
|
180
|
+
except Exception as exc:
|
|
181
|
+
raise ValueError(_sdk_error_message(exc, host, port)) from exc
|
|
182
|
+
chunk = [r for r in splunk_results.JSONResultsReader(job) if isinstance(r, dict)]
|
|
183
|
+
all_rows.extend(chunk)
|
|
184
|
+
|
|
185
|
+
return all_rows, {"units": len(windows), "unit_label": "chunks"}
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def write(
|
|
189
|
+
rows: list[dict[str, Any]],
|
|
190
|
+
outpath: Path,
|
|
191
|
+
verbose: bool,
|
|
192
|
+
) -> tuple[int, dict[str, Any]]:
|
|
193
|
+
"""Write syslog rows to a flat text file, one line per event.
|
|
194
|
+
|
|
195
|
+
Sorts by _time ascending, strips RFC 3164 PRI prefixes, writes non-empty lines.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
rows: Result rows from fetch(), each with _time and _raw fields.
|
|
199
|
+
outpath: Destination file path.
|
|
200
|
+
verbose: Reserved for future use.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
``(line_count, write_meta)`` where ``write_meta`` carries
|
|
204
|
+
``{"bytes": int, "paths": list[Path]}``. Splunk writes a single file —
|
|
205
|
+
``paths`` is a one-element list.
|
|
206
|
+
"""
|
|
207
|
+
rows_sorted = sorted(rows, key=lambda r: r.get("_time", ""))
|
|
208
|
+
count = 0
|
|
209
|
+
byte_total = 0
|
|
210
|
+
try:
|
|
211
|
+
outpath.parent.mkdir(parents=True, exist_ok=True)
|
|
212
|
+
with outpath.open("w", encoding="utf-8") as fh:
|
|
213
|
+
for row in rows_sorted:
|
|
214
|
+
raw = PRI_RE.sub("", row.get("_raw", "").strip())
|
|
215
|
+
if raw:
|
|
216
|
+
line = raw + "\n"
|
|
217
|
+
fh.write(line)
|
|
218
|
+
byte_total += len(line.encode("utf-8"))
|
|
219
|
+
count += 1
|
|
220
|
+
except OSError as exc:
|
|
221
|
+
raise ValueError(f"Could not write export file {outpath}: {exc}") from exc
|
|
222
|
+
return count, {"bytes": byte_total, "paths": [outpath]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Output format handlers. Each module registers itself via output.register_handler()."""
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Allowlist export renderer — writes flat allowlist lines to stdout.
|
|
2
|
+
|
|
3
|
+
Invoked when --export-allowlist is passed. Bypasses the normal output pipeline
|
|
4
|
+
entirely. Output is ready to paste directly into a flat allowlist file.
|
|
5
|
+
|
|
6
|
+
Format (one line per finding, sorted by score descending):
|
|
7
|
+
192.0.2.10 192.0.2.1 :22/tcp # score=0.610 period=60.0s
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
from loghunter.common.finding import Finding
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def render(findings: list[Finding]) -> None:
|
|
18
|
+
"""Write flat allowlist lines to stdout, sorted by score descending.
|
|
19
|
+
|
|
20
|
+
Each line encodes src_ip, dst_ip, and an optional :port/proto token drawn
|
|
21
|
+
from finding.evidence. Only findings that carry at least src_ip and dst_ip
|
|
22
|
+
are emitted; findings from detectors that don't populate those fields are
|
|
23
|
+
silently skipped.
|
|
24
|
+
|
|
25
|
+
Example output:
|
|
26
|
+
192.0.2.10 192.0.2.1 :22/tcp # score=0.610 period=60.0s
|
|
27
|
+
"""
|
|
28
|
+
exportable = [f for f in findings if _has_ip_pair(f)]
|
|
29
|
+
exportable.sort(key=lambda f: float(f.evidence.get("beacon_score", 0.0)), reverse=True)
|
|
30
|
+
|
|
31
|
+
if not exportable:
|
|
32
|
+
return
|
|
33
|
+
|
|
34
|
+
print("# loghunter --export-allowlist — review each entry before merging into allowlist")
|
|
35
|
+
print()
|
|
36
|
+
|
|
37
|
+
for finding in exportable:
|
|
38
|
+
ev = finding.evidence
|
|
39
|
+
src = ev.get("src_ip", "")
|
|
40
|
+
dst = ev.get("dst_ip", "")
|
|
41
|
+
port_token = _port_token(ev.get("dst_port"), ev.get("proto", ""))
|
|
42
|
+
score = ev.get("beacon_score", ev.get("score", 0.0))
|
|
43
|
+
period_str = ev.get("period_str", "")
|
|
44
|
+
|
|
45
|
+
parts = [src, dst]
|
|
46
|
+
if port_token:
|
|
47
|
+
parts.append(port_token)
|
|
48
|
+
|
|
49
|
+
comment = f"# score={float(score):.3f}"
|
|
50
|
+
if period_str:
|
|
51
|
+
comment += f" period={period_str}"
|
|
52
|
+
|
|
53
|
+
line = " ".join(parts) + " " + comment
|
|
54
|
+
print(line, file=sys.stdout)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _has_ip_pair(finding: Finding) -> bool:
|
|
58
|
+
ev = finding.evidence
|
|
59
|
+
return bool(ev.get("src_ip")) and bool(ev.get("dst_ip"))
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _port_token(port: int | str | None, proto: str | None) -> str:
|
|
63
|
+
"""Build a :port/proto token. Returns empty string if port is absent or zero."""
|
|
64
|
+
if port is None:
|
|
65
|
+
return ""
|
|
66
|
+
try:
|
|
67
|
+
port_int = int(port)
|
|
68
|
+
except (TypeError, ValueError):
|
|
69
|
+
return ""
|
|
70
|
+
if port_int == 0:
|
|
71
|
+
return ""
|
|
72
|
+
proto = (proto or "").strip()
|
|
73
|
+
if proto:
|
|
74
|
+
return f":{port_int}/{proto}"
|
|
75
|
+
return f":{port_int}"
|
loghunter/outputs/csv.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""CSV output handler — flattened findings for spreadsheet import.
|
|
2
|
+
|
|
3
|
+
Evidence dict is flattened with dot-notation keys (e.g. evidence.score).
|
|
4
|
+
One row per finding. Suitable for Excel, Google Sheets, or pandas downstream.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import csv
|
|
10
|
+
import sys
|
|
11
|
+
from typing import Any, TextIO
|
|
12
|
+
|
|
13
|
+
from loghunter.common.finding import Finding, RunSummary
|
|
14
|
+
from loghunter.common.output import OutputHandler, register_handler
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CsvHandler(OutputHandler):
|
|
18
|
+
"""Write findings as CSV to stdout or a file."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, stream: TextIO = sys.stdout, verbose_level: int = 0) -> None:
|
|
21
|
+
self._stream = stream
|
|
22
|
+
self._verbose_level = verbose_level
|
|
23
|
+
self._writer: csv.DictWriter | None = None
|
|
24
|
+
self._rows: list[dict[str, Any]] = []
|
|
25
|
+
|
|
26
|
+
def begin(self, run_summary: RunSummary) -> None:
|
|
27
|
+
"""Store run metadata for inclusion in each CSV row."""
|
|
28
|
+
self._run_summary = run_summary
|
|
29
|
+
|
|
30
|
+
def write(self, findings: list[Finding]) -> None:
|
|
31
|
+
"""Write one CSV row per finding."""
|
|
32
|
+
self._rows.extend(self._flatten_finding(f) for f in findings)
|
|
33
|
+
|
|
34
|
+
def end(self) -> None:
|
|
35
|
+
"""Write the CSV header and rows once all evidence keys are known."""
|
|
36
|
+
base_fields = [
|
|
37
|
+
"detector",
|
|
38
|
+
"severity",
|
|
39
|
+
"title",
|
|
40
|
+
"description",
|
|
41
|
+
"ts_generated",
|
|
42
|
+
"data_window_start",
|
|
43
|
+
"data_window_end",
|
|
44
|
+
]
|
|
45
|
+
evidence_fields = sorted(
|
|
46
|
+
{key for row in self._rows for key in row if key.startswith("evidence.")}
|
|
47
|
+
)
|
|
48
|
+
fieldnames = base_fields + evidence_fields
|
|
49
|
+
self._writer = csv.DictWriter(self._stream, fieldnames=fieldnames)
|
|
50
|
+
self._writer.writeheader()
|
|
51
|
+
for row in self._rows:
|
|
52
|
+
self._writer.writerow(row)
|
|
53
|
+
|
|
54
|
+
def _flatten_finding(self, finding: Finding) -> dict[str, Any]:
|
|
55
|
+
"""Flatten a Finding into a dict with dot-notation evidence keys."""
|
|
56
|
+
row: dict[str, Any] = {
|
|
57
|
+
"detector": finding.detector,
|
|
58
|
+
"severity": finding.severity.name.lower(),
|
|
59
|
+
"title": finding.title,
|
|
60
|
+
"description": finding.description if self._verbose_level >= 1 else "",
|
|
61
|
+
"ts_generated": finding.ts_generated.isoformat(),
|
|
62
|
+
"data_window_start": finding.data_window[0].isoformat(),
|
|
63
|
+
"data_window_end": finding.data_window[1].isoformat(),
|
|
64
|
+
}
|
|
65
|
+
for key, value in finding.evidence.items():
|
|
66
|
+
row[f"evidence.{key}"] = value
|
|
67
|
+
return row
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
register_handler("csv", CsvHandler)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Email output handler — plain text suitable for piping to sendmail. (planned)
|
|
2
|
+
|
|
3
|
+
Formats findings as a plain-text email body. Handler is dormant — not registered
|
|
4
|
+
in the output registry and has no shipped config section. Wiring up will land a
|
|
5
|
+
new config surface and handler registration.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from loghunter.common.finding import Finding, RunSummary
|
|
11
|
+
from loghunter.common.output import OutputHandler, register_handler
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class EmailHandler(OutputHandler):
|
|
15
|
+
"""Format findings as a plain-text email and send via SMTP."""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
smtp_host: str = "localhost",
|
|
20
|
+
smtp_port: int = 25,
|
|
21
|
+
to: str = "",
|
|
22
|
+
from_addr: str = "",
|
|
23
|
+
) -> None:
|
|
24
|
+
self._smtp_host = smtp_host
|
|
25
|
+
self._smtp_port = smtp_port
|
|
26
|
+
self._to = to
|
|
27
|
+
self._from = from_addr
|
|
28
|
+
self._findings: list[Finding] = []
|
|
29
|
+
self._run_summary: RunSummary | None = None
|
|
30
|
+
|
|
31
|
+
def begin(self, run_summary: RunSummary) -> None:
|
|
32
|
+
"""Store run summary for the email subject line."""
|
|
33
|
+
...
|
|
34
|
+
|
|
35
|
+
def write(self, findings: list[Finding]) -> None:
|
|
36
|
+
"""Accumulate findings for transmission at end()."""
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
def end(self) -> None:
|
|
40
|
+
"""Compose and send the email via SMTP."""
|
|
41
|
+
...
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
register_handler("email", EmailHandler)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""HTML output handler — self-contained report file for browser viewing.
|
|
2
|
+
|
|
3
|
+
Produces a single .html file with the same content as text output, formatted
|
|
4
|
+
for readability in a browser. No external dependencies — all styles inline.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import html
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from loghunter.common.finding import Finding, RunSummary
|
|
13
|
+
from loghunter.common.output import OutputHandler, register_handler
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class HtmlHandler(OutputHandler):
|
|
17
|
+
"""Write findings as a self-contained HTML report file."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, output_path: Path | None = None, verbose_level: int = 0) -> None:
|
|
20
|
+
if output_path is None:
|
|
21
|
+
output_path = Path("loghunter-report.html")
|
|
22
|
+
self._output_path = output_path
|
|
23
|
+
self._verbose_level = verbose_level
|
|
24
|
+
self._findings: list[Finding] = []
|
|
25
|
+
self._run_summary: RunSummary | None = None
|
|
26
|
+
|
|
27
|
+
def begin(self, run_summary: RunSummary) -> None:
|
|
28
|
+
"""Store run summary for the report header."""
|
|
29
|
+
self._run_summary = run_summary
|
|
30
|
+
|
|
31
|
+
def write(self, findings: list[Finding]) -> None:
|
|
32
|
+
"""Accumulate findings for rendering at end()."""
|
|
33
|
+
self._findings.extend(findings)
|
|
34
|
+
|
|
35
|
+
def end(self) -> None:
|
|
36
|
+
"""Render and write the complete HTML file."""
|
|
37
|
+
self._output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
38
|
+
self._output_path.write_text(self._render_html(), encoding="utf-8")
|
|
39
|
+
|
|
40
|
+
def _render_html(self) -> str:
|
|
41
|
+
"""Produce the full HTML document string."""
|
|
42
|
+
summary = self._run_summary
|
|
43
|
+
window = ""
|
|
44
|
+
records = ""
|
|
45
|
+
detectors = ""
|
|
46
|
+
if summary is not None:
|
|
47
|
+
start, end = summary.data_window
|
|
48
|
+
window = f"{start:%Y-%m-%d %H:%M} → {end:%Y-%m-%d %H:%M}"
|
|
49
|
+
records = " · ".join(
|
|
50
|
+
f"{count:,} {html.escape(name)}"
|
|
51
|
+
for name, count in summary.record_counts.items()
|
|
52
|
+
)
|
|
53
|
+
detectors = " ".join(html.escape(name) for name in summary.detectors_run)
|
|
54
|
+
|
|
55
|
+
findings = "\n".join(self._render_finding(f) for f in self._findings)
|
|
56
|
+
return f"""<!doctype html>
|
|
57
|
+
<html lang="en">
|
|
58
|
+
<head>
|
|
59
|
+
<meta charset="utf-8">
|
|
60
|
+
<title>LogHunter report</title>
|
|
61
|
+
<style>
|
|
62
|
+
body {{ font: 15px/1.45 -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; margin: 32px; color: #1f2933; }}
|
|
63
|
+
header {{ border-bottom: 1px solid #c9d1d9; margin-bottom: 24px; padding-bottom: 16px; }}
|
|
64
|
+
h1 {{ font-size: 24px; margin: 0 0 12px; }}
|
|
65
|
+
h2 {{ font-size: 18px; margin: 28px 0 10px; }}
|
|
66
|
+
.meta {{ color: #52606d; }}
|
|
67
|
+
.finding {{ border-top: 1px solid #e5e8eb; padding: 12px 0; }}
|
|
68
|
+
.tag {{ font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-weight: 700; }}
|
|
69
|
+
pre {{ background: #f6f8fa; padding: 12px; overflow-x: auto; }}
|
|
70
|
+
</style>
|
|
71
|
+
</head>
|
|
72
|
+
<body>
|
|
73
|
+
<header>
|
|
74
|
+
<h1>LogHunter report</h1>
|
|
75
|
+
<div class="meta">Data found: {window}</div>
|
|
76
|
+
<div class="meta">Records: {records}</div>
|
|
77
|
+
<div class="meta">Detectors: {detectors}</div>
|
|
78
|
+
</header>
|
|
79
|
+
<main>
|
|
80
|
+
{findings}
|
|
81
|
+
</main>
|
|
82
|
+
</body>
|
|
83
|
+
</html>
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def _render_finding(self, finding: Finding) -> str:
|
|
87
|
+
"""Render one finding block."""
|
|
88
|
+
evidence = html.escape(str(finding.evidence))
|
|
89
|
+
description = f"<p>{html.escape(finding.description)}</p>" if self._verbose_level >= 1 else ""
|
|
90
|
+
return (
|
|
91
|
+
'<section class="finding">'
|
|
92
|
+
f'<div><span class="tag">{html.escape(str(finding.severity))}</span> '
|
|
93
|
+
f'{html.escape(finding.detector)} - {html.escape(finding.title)}</div>'
|
|
94
|
+
f"{description}<pre>{evidence}</pre>"
|
|
95
|
+
"</section>"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
register_handler("html", HtmlHandler)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""JSON output handler — structured findings, one object per finding.
|
|
2
|
+
|
|
3
|
+
Suitable for piping to jq or downstream tooling. Outputs a JSON array.
|
|
4
|
+
All datetime fields serialized as ISO 8601 strings.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import sys
|
|
11
|
+
from typing import Any, TextIO
|
|
12
|
+
|
|
13
|
+
from loghunter.common.finding import Finding, RunSummary
|
|
14
|
+
from loghunter.common.output import OutputHandler, register_handler
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class JsonHandler(OutputHandler):
|
|
18
|
+
"""Write findings as a JSON array to stdout or a file."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, stream: TextIO = sys.stdout, verbose_level: int = 0) -> None:
|
|
21
|
+
self._stream = stream
|
|
22
|
+
self._verbose_level = verbose_level
|
|
23
|
+
self._findings: list[Finding] = []
|
|
24
|
+
self._run_summary: RunSummary | None = None
|
|
25
|
+
|
|
26
|
+
def begin(self, run_summary: RunSummary) -> None:
|
|
27
|
+
"""Store run summary for inclusion in output."""
|
|
28
|
+
self._run_summary = run_summary
|
|
29
|
+
|
|
30
|
+
def write(self, findings: list[Finding]) -> None:
|
|
31
|
+
"""Accumulate findings for serialization at end()."""
|
|
32
|
+
self._findings.extend(findings)
|
|
33
|
+
|
|
34
|
+
def end(self) -> None:
|
|
35
|
+
"""Serialize all accumulated findings to JSON and write to stream."""
|
|
36
|
+
payload = {
|
|
37
|
+
"run_summary": self._run_summary_to_dict(self._run_summary),
|
|
38
|
+
"findings": [self._finding_to_dict(f) for f in self._findings],
|
|
39
|
+
}
|
|
40
|
+
json.dump(payload, self._stream, indent=2, default=str)
|
|
41
|
+
print(file=self._stream)
|
|
42
|
+
|
|
43
|
+
def _finding_to_dict(self, finding: Finding) -> dict[str, Any]:
|
|
44
|
+
"""Convert a Finding to a JSON-serializable dict."""
|
|
45
|
+
return {
|
|
46
|
+
"detector": finding.detector,
|
|
47
|
+
"severity": finding.severity.name.lower(),
|
|
48
|
+
"severity_tag": str(finding.severity),
|
|
49
|
+
"title": finding.title,
|
|
50
|
+
"description": finding.description,
|
|
51
|
+
"evidence": finding.evidence,
|
|
52
|
+
"next_steps": finding.next_steps,
|
|
53
|
+
"ts_generated": finding.ts_generated.isoformat(),
|
|
54
|
+
"data_window": [
|
|
55
|
+
finding.data_window[0].isoformat(),
|
|
56
|
+
finding.data_window[1].isoformat(),
|
|
57
|
+
],
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
def _run_summary_to_dict(self, run_summary: RunSummary | None) -> dict[str, Any] | None:
|
|
61
|
+
"""Convert RunSummary to a JSON-serializable dict."""
|
|
62
|
+
if run_summary is None:
|
|
63
|
+
return None
|
|
64
|
+
return {
|
|
65
|
+
"data_window": [
|
|
66
|
+
run_summary.data_window[0].isoformat(),
|
|
67
|
+
run_summary.data_window[1].isoformat(),
|
|
68
|
+
],
|
|
69
|
+
"record_counts": run_summary.record_counts,
|
|
70
|
+
"data_size_bytes": run_summary.data_size_bytes,
|
|
71
|
+
"detectors_run": run_summary.detectors_run,
|
|
72
|
+
"detectors_skipped": run_summary.detectors_skipped,
|
|
73
|
+
"notes": run_summary.notes,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
register_handler("json", JsonHandler)
|