loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,222 @@
1
+ """Splunk log exporter — pulls search results from Splunk REST API to local files.
2
+
3
+ Invoked via: loghunter export (or: loghunter export splunk)
4
+ Connects to the Splunk management port (default 8089), runs hourly-chunked oneshot
5
+ queries, and writes results as flat syslog text to the configured output file.
6
+
7
+ Credentials (in priority order):
8
+ LOGHUNTER_SPLUNK_USER, LOGHUNTER_SPLUNK_PASS environment variables
9
+ username, password in [export.splunk] config section
10
+
11
+ Splunk developer/free licenses enforce a hard per-query result cap at the binary
12
+ level that limits.conf cannot override. Hourly chunking keeps each query well
13
+ under this ceiling. For a 7-day pull this is 168 queries.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import os
19
+ import re
20
+ from datetime import datetime, timedelta
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+ from tqdm import tqdm
25
+
26
+ try:
27
+ import splunklib.client as splunk_client
28
+ import splunklib.results as splunk_results
29
+ except ImportError:
30
+ splunk_client = None # type: ignore[assignment]
31
+ splunk_results = None # type: ignore[assignment]
32
+
33
+ # RFC 3164 PRI field: <N> or <NN> or <NNN> at start of line
34
+ PRI_RE = re.compile(r"^<\d+>")
35
+
36
+
37
+ def is_configured(backend_cfg: dict[str, Any]) -> bool:
38
+ """True when [export.splunk] has a non-empty host — preserves prior auto-detect behavior."""
39
+ return bool(backend_cfg.get("host", "").strip())
40
+
41
+
42
+ def summary_descriptor(backend_cfg: dict[str, Any]) -> str:
43
+ """Identifier shown in the final export summary's `Backend :` line."""
44
+ host = backend_cfg.get("host", "")
45
+ port = backend_cfg.get("port", "")
46
+ return f"{host}:{port}"
47
+
48
+
49
+ def _get_credentials(config: dict[str, Any]) -> tuple[str, str]:
50
+ """Return (username, password) from env vars or config.
51
+
52
+ Environment variables take priority over config-file values.
53
+ """
54
+ user = os.environ.get("LOGHUNTER_SPLUNK_USER", "").strip() or config.get("username", "").strip()
55
+ passwd = os.environ.get("LOGHUNTER_SPLUNK_PASS", "").strip() or config.get("password", "").strip()
56
+ if not user or not passwd:
57
+ raise ValueError(
58
+ "Splunk credentials not found — set LOGHUNTER_SPLUNK_USER and "
59
+ "LOGHUNTER_SPLUNK_PASS, or add username/password to [export.splunk] in config"
60
+ )
61
+ return user, passwd
62
+
63
+
64
+ def _sdk_error_message(exc: Exception, host: str, port: int) -> str:
65
+ """Return an actionable user-facing message for Splunk SDK failures."""
66
+ exc_name = exc.__class__.__name__
67
+ if exc_name == "AuthenticationError":
68
+ return (
69
+ "Splunk login failed — check [export.splunk].username/password in config "
70
+ "and LOGHUNTER_SPLUNK_USER/LOGHUNTER_SPLUNK_PASS environment overrides"
71
+ )
72
+ return (
73
+ f"Could not connect to Splunk management API at {host}:{port} — "
74
+ f"check [export.splunk].host, [export.splunk].port, network reachability, and credentials"
75
+ )
76
+
77
+
78
+ def _build_hour_windows(
79
+ since: datetime,
80
+ until: datetime,
81
+ ) -> list[tuple[datetime, datetime]]:
82
+ """Return one-hour (start, end) pairs spanning since..until.
83
+
84
+ Both since and until are floored to their hour boundary so every emitted
85
+ chunk is exactly one hour — no partial-hour chunks, mirroring the migration.
86
+
87
+ Args:
88
+ since: Start of the window (timezone-aware or naive).
89
+ until: End of the window (timezone-aware or naive).
90
+
91
+ Returns:
92
+ List of (chunk_start, chunk_end) in the timezone of the passed-in
93
+ datetimes, oldest first.
94
+ """
95
+ # Use datetimes as-is — honor the tzinfo already embedded in them.
96
+ # Calling .astimezone() with no argument would re-express in the process
97
+ # timezone (UTC on a server), shifting hour boundaries away from the user's
98
+ # calendar day. replace() below preserves tzinfo unchanged.
99
+ local_since = since
100
+ local_until = until
101
+
102
+ # Floor both endpoints to their hour boundary
103
+ window_start = local_since.replace(minute=0, second=0, microsecond=0)
104
+ window_end = local_until.replace(minute=0, second=0, microsecond=0)
105
+
106
+ total_hours = int((window_end - window_start).total_seconds() // 3600)
107
+ return [
108
+ (window_start + timedelta(hours=i), window_start + timedelta(hours=i + 1))
109
+ for i in range(max(total_hours, 0))
110
+ ]
111
+
112
+
113
+ def fetch(
114
+ query_config: dict[str, Any],
115
+ splunk_config: dict[str, Any],
116
+ since: datetime,
117
+ until: datetime,
118
+ verbose: bool,
119
+ *,
120
+ skip_confirm: bool = False,
121
+ ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
122
+ """Connect to Splunk and pull all rows in hourly chunks.
123
+
124
+ Args:
125
+ query_config: Single query stanza from config (must have "spl" key).
126
+ splunk_config: [export.splunk] section of config (host, port, credentials).
127
+ since: Start of window.
128
+ until: End of window.
129
+ verbose: Threaded from the orchestrator. The W4 grammar keeps export
130
+ stdout terse and level-invariant — Splunk's fetch currently
131
+ ignores this flag (no per-chunk chatter at level 1).
132
+ skip_confirm: Part of the uniform backend contract — Splunk has no
133
+ cost-prompt and ignores this. Accepted so the orchestrator can
134
+ invoke every backend with the same signature.
135
+
136
+ Returns:
137
+ A tuple ``(rows, fetch_meta)``:
138
+ - ``rows``: result rows as a flat list of dicts with at minimum _time and _raw.
139
+ - ``fetch_meta``: ``{"units": <hour-window count>, "unit_label": "chunks"}``
140
+ — used by the orchestrator to render the run-summary span string.
141
+ """
142
+ if splunk_client is None:
143
+ raise ValueError("splunk-sdk not installed — run: pip install loghunt[splunk]")
144
+
145
+ user, passwd = _get_credentials(splunk_config)
146
+ host = splunk_config.get("host", "")
147
+ port = int(splunk_config.get("port", 8089))
148
+ spl = query_config.get("spl", "")
149
+
150
+ try:
151
+ service = splunk_client.connect(
152
+ host=host,
153
+ port=port,
154
+ username=user,
155
+ password=passwd,
156
+ )
157
+ except Exception as exc:
158
+ raise ValueError(_sdk_error_message(exc, host, port)) from exc
159
+
160
+ windows = _build_hour_windows(since, until)
161
+ all_rows: list[dict[str, Any]] = []
162
+
163
+ for chunk_start, chunk_end in tqdm(
164
+ windows,
165
+ desc="fetching",
166
+ unit="hr",
167
+ leave=True,
168
+ bar_format="{desc}: {n_fmt} hours [{elapsed}]",
169
+ ):
170
+ earliest = str(int(chunk_start.timestamp()))
171
+ latest = str(int(chunk_end.timestamp()))
172
+ try:
173
+ job = service.jobs.oneshot(
174
+ spl,
175
+ count=0,
176
+ output_mode="json",
177
+ earliest_time=earliest,
178
+ latest_time=latest,
179
+ )
180
+ except Exception as exc:
181
+ raise ValueError(_sdk_error_message(exc, host, port)) from exc
182
+ chunk = [r for r in splunk_results.JSONResultsReader(job) if isinstance(r, dict)]
183
+ all_rows.extend(chunk)
184
+
185
+ return all_rows, {"units": len(windows), "unit_label": "chunks"}
186
+
187
+
188
+ def write(
189
+ rows: list[dict[str, Any]],
190
+ outpath: Path,
191
+ verbose: bool,
192
+ ) -> tuple[int, dict[str, Any]]:
193
+ """Write syslog rows to a flat text file, one line per event.
194
+
195
+ Sorts by _time ascending, strips RFC 3164 PRI prefixes, writes non-empty lines.
196
+
197
+ Args:
198
+ rows: Result rows from fetch(), each with _time and _raw fields.
199
+ outpath: Destination file path.
200
+ verbose: Reserved for future use.
201
+
202
+ Returns:
203
+ ``(line_count, write_meta)`` where ``write_meta`` carries
204
+ ``{"bytes": int, "paths": list[Path]}``. Splunk writes a single file —
205
+ ``paths`` is a one-element list.
206
+ """
207
+ rows_sorted = sorted(rows, key=lambda r: r.get("_time", ""))
208
+ count = 0
209
+ byte_total = 0
210
+ try:
211
+ outpath.parent.mkdir(parents=True, exist_ok=True)
212
+ with outpath.open("w", encoding="utf-8") as fh:
213
+ for row in rows_sorted:
214
+ raw = PRI_RE.sub("", row.get("_raw", "").strip())
215
+ if raw:
216
+ line = raw + "\n"
217
+ fh.write(line)
218
+ byte_total += len(line.encode("utf-8"))
219
+ count += 1
220
+ except OSError as exc:
221
+ raise ValueError(f"Could not write export file {outpath}: {exc}") from exc
222
+ return count, {"bytes": byte_total, "paths": [outpath]}
@@ -0,0 +1 @@
1
+ """Output format handlers. Each module registers itself via output.register_handler()."""
@@ -0,0 +1,75 @@
1
+ """Allowlist export renderer — writes flat allowlist lines to stdout.
2
+
3
+ Invoked when --export-allowlist is passed. Bypasses the normal output pipeline
4
+ entirely. Output is ready to paste directly into a flat allowlist file.
5
+
6
+ Format (one line per finding, sorted by score descending):
7
+ 192.0.2.10 192.0.2.1 :22/tcp # score=0.610 period=60.0s
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import sys
13
+
14
+ from loghunter.common.finding import Finding
15
+
16
+
17
+ def render(findings: list[Finding]) -> None:
18
+ """Write flat allowlist lines to stdout, sorted by score descending.
19
+
20
+ Each line encodes src_ip, dst_ip, and an optional :port/proto token drawn
21
+ from finding.evidence. Only findings that carry at least src_ip and dst_ip
22
+ are emitted; findings from detectors that don't populate those fields are
23
+ silently skipped.
24
+
25
+ Example output:
26
+ 192.0.2.10 192.0.2.1 :22/tcp # score=0.610 period=60.0s
27
+ """
28
+ exportable = [f for f in findings if _has_ip_pair(f)]
29
+ exportable.sort(key=lambda f: float(f.evidence.get("beacon_score", 0.0)), reverse=True)
30
+
31
+ if not exportable:
32
+ return
33
+
34
+ print("# loghunter --export-allowlist — review each entry before merging into allowlist")
35
+ print()
36
+
37
+ for finding in exportable:
38
+ ev = finding.evidence
39
+ src = ev.get("src_ip", "")
40
+ dst = ev.get("dst_ip", "")
41
+ port_token = _port_token(ev.get("dst_port"), ev.get("proto", ""))
42
+ score = ev.get("beacon_score", ev.get("score", 0.0))
43
+ period_str = ev.get("period_str", "")
44
+
45
+ parts = [src, dst]
46
+ if port_token:
47
+ parts.append(port_token)
48
+
49
+ comment = f"# score={float(score):.3f}"
50
+ if period_str:
51
+ comment += f" period={period_str}"
52
+
53
+ line = " ".join(parts) + " " + comment
54
+ print(line, file=sys.stdout)
55
+
56
+
57
+ def _has_ip_pair(finding: Finding) -> bool:
58
+ ev = finding.evidence
59
+ return bool(ev.get("src_ip")) and bool(ev.get("dst_ip"))
60
+
61
+
62
+ def _port_token(port: int | str | None, proto: str | None) -> str:
63
+ """Build a :port/proto token. Returns empty string if port is absent or zero."""
64
+ if port is None:
65
+ return ""
66
+ try:
67
+ port_int = int(port)
68
+ except (TypeError, ValueError):
69
+ return ""
70
+ if port_int == 0:
71
+ return ""
72
+ proto = (proto or "").strip()
73
+ if proto:
74
+ return f":{port_int}/{proto}"
75
+ return f":{port_int}"
@@ -0,0 +1,70 @@
1
+ """CSV output handler — flattened findings for spreadsheet import.
2
+
3
+ Evidence dict is flattened with dot-notation keys (e.g. evidence.score).
4
+ One row per finding. Suitable for Excel, Google Sheets, or pandas downstream.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import csv
10
+ import sys
11
+ from typing import Any, TextIO
12
+
13
+ from loghunter.common.finding import Finding, RunSummary
14
+ from loghunter.common.output import OutputHandler, register_handler
15
+
16
+
17
+ class CsvHandler(OutputHandler):
18
+ """Write findings as CSV to stdout or a file."""
19
+
20
+ def __init__(self, stream: TextIO = sys.stdout, verbose_level: int = 0) -> None:
21
+ self._stream = stream
22
+ self._verbose_level = verbose_level
23
+ self._writer: csv.DictWriter | None = None
24
+ self._rows: list[dict[str, Any]] = []
25
+
26
+ def begin(self, run_summary: RunSummary) -> None:
27
+ """Store run metadata for inclusion in each CSV row."""
28
+ self._run_summary = run_summary
29
+
30
+ def write(self, findings: list[Finding]) -> None:
31
+ """Write one CSV row per finding."""
32
+ self._rows.extend(self._flatten_finding(f) for f in findings)
33
+
34
+ def end(self) -> None:
35
+ """Write the CSV header and rows once all evidence keys are known."""
36
+ base_fields = [
37
+ "detector",
38
+ "severity",
39
+ "title",
40
+ "description",
41
+ "ts_generated",
42
+ "data_window_start",
43
+ "data_window_end",
44
+ ]
45
+ evidence_fields = sorted(
46
+ {key for row in self._rows for key in row if key.startswith("evidence.")}
47
+ )
48
+ fieldnames = base_fields + evidence_fields
49
+ self._writer = csv.DictWriter(self._stream, fieldnames=fieldnames)
50
+ self._writer.writeheader()
51
+ for row in self._rows:
52
+ self._writer.writerow(row)
53
+
54
+ def _flatten_finding(self, finding: Finding) -> dict[str, Any]:
55
+ """Flatten a Finding into a dict with dot-notation evidence keys."""
56
+ row: dict[str, Any] = {
57
+ "detector": finding.detector,
58
+ "severity": finding.severity.name.lower(),
59
+ "title": finding.title,
60
+ "description": finding.description if self._verbose_level >= 1 else "",
61
+ "ts_generated": finding.ts_generated.isoformat(),
62
+ "data_window_start": finding.data_window[0].isoformat(),
63
+ "data_window_end": finding.data_window[1].isoformat(),
64
+ }
65
+ for key, value in finding.evidence.items():
66
+ row[f"evidence.{key}"] = value
67
+ return row
68
+
69
+
70
+ register_handler("csv", CsvHandler)
@@ -0,0 +1,44 @@
1
+ """Email output handler — plain text suitable for piping to sendmail. (planned)
2
+
3
+ Formats findings as a plain-text email body. Handler is dormant — not registered
4
+ in the output registry and has no shipped config section. Wiring up will land a
5
+ new config surface and handler registration.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from loghunter.common.finding import Finding, RunSummary
11
+ from loghunter.common.output import OutputHandler, register_handler
12
+
13
+
14
+ class EmailHandler(OutputHandler):
15
+ """Format findings as a plain-text email and send via SMTP."""
16
+
17
+ def __init__(
18
+ self,
19
+ smtp_host: str = "localhost",
20
+ smtp_port: int = 25,
21
+ to: str = "",
22
+ from_addr: str = "",
23
+ ) -> None:
24
+ self._smtp_host = smtp_host
25
+ self._smtp_port = smtp_port
26
+ self._to = to
27
+ self._from = from_addr
28
+ self._findings: list[Finding] = []
29
+ self._run_summary: RunSummary | None = None
30
+
31
+ def begin(self, run_summary: RunSummary) -> None:
32
+ """Store run summary for the email subject line."""
33
+ ...
34
+
35
+ def write(self, findings: list[Finding]) -> None:
36
+ """Accumulate findings for transmission at end()."""
37
+ ...
38
+
39
+ def end(self) -> None:
40
+ """Compose and send the email via SMTP."""
41
+ ...
42
+
43
+
44
+ register_handler("email", EmailHandler)
@@ -0,0 +1,99 @@
1
+ """HTML output handler — self-contained report file for browser viewing.
2
+
3
+ Produces a single .html file with the same content as text output, formatted
4
+ for readability in a browser. No external dependencies — all styles inline.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import html
10
+ from pathlib import Path
11
+
12
+ from loghunter.common.finding import Finding, RunSummary
13
+ from loghunter.common.output import OutputHandler, register_handler
14
+
15
+
16
+ class HtmlHandler(OutputHandler):
17
+ """Write findings as a self-contained HTML report file."""
18
+
19
+ def __init__(self, output_path: Path | None = None, verbose_level: int = 0) -> None:
20
+ if output_path is None:
21
+ output_path = Path("loghunter-report.html")
22
+ self._output_path = output_path
23
+ self._verbose_level = verbose_level
24
+ self._findings: list[Finding] = []
25
+ self._run_summary: RunSummary | None = None
26
+
27
+ def begin(self, run_summary: RunSummary) -> None:
28
+ """Store run summary for the report header."""
29
+ self._run_summary = run_summary
30
+
31
+ def write(self, findings: list[Finding]) -> None:
32
+ """Accumulate findings for rendering at end()."""
33
+ self._findings.extend(findings)
34
+
35
+ def end(self) -> None:
36
+ """Render and write the complete HTML file."""
37
+ self._output_path.parent.mkdir(parents=True, exist_ok=True)
38
+ self._output_path.write_text(self._render_html(), encoding="utf-8")
39
+
40
+ def _render_html(self) -> str:
41
+ """Produce the full HTML document string."""
42
+ summary = self._run_summary
43
+ window = ""
44
+ records = ""
45
+ detectors = ""
46
+ if summary is not None:
47
+ start, end = summary.data_window
48
+ window = f"{start:%Y-%m-%d %H:%M} &rarr; {end:%Y-%m-%d %H:%M}"
49
+ records = " &middot; ".join(
50
+ f"{count:,} {html.escape(name)}"
51
+ for name, count in summary.record_counts.items()
52
+ )
53
+ detectors = " ".join(html.escape(name) for name in summary.detectors_run)
54
+
55
+ findings = "\n".join(self._render_finding(f) for f in self._findings)
56
+ return f"""<!doctype html>
57
+ <html lang="en">
58
+ <head>
59
+ <meta charset="utf-8">
60
+ <title>LogHunter report</title>
61
+ <style>
62
+ body {{ font: 15px/1.45 -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; margin: 32px; color: #1f2933; }}
63
+ header {{ border-bottom: 1px solid #c9d1d9; margin-bottom: 24px; padding-bottom: 16px; }}
64
+ h1 {{ font-size: 24px; margin: 0 0 12px; }}
65
+ h2 {{ font-size: 18px; margin: 28px 0 10px; }}
66
+ .meta {{ color: #52606d; }}
67
+ .finding {{ border-top: 1px solid #e5e8eb; padding: 12px 0; }}
68
+ .tag {{ font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-weight: 700; }}
69
+ pre {{ background: #f6f8fa; padding: 12px; overflow-x: auto; }}
70
+ </style>
71
+ </head>
72
+ <body>
73
+ <header>
74
+ <h1>LogHunter report</h1>
75
+ <div class="meta">Data found: {window}</div>
76
+ <div class="meta">Records: {records}</div>
77
+ <div class="meta">Detectors: {detectors}</div>
78
+ </header>
79
+ <main>
80
+ {findings}
81
+ </main>
82
+ </body>
83
+ </html>
84
+ """
85
+
86
+ def _render_finding(self, finding: Finding) -> str:
87
+ """Render one finding block."""
88
+ evidence = html.escape(str(finding.evidence))
89
+ description = f"<p>{html.escape(finding.description)}</p>" if self._verbose_level >= 1 else ""
90
+ return (
91
+ '<section class="finding">'
92
+ f'<div><span class="tag">{html.escape(str(finding.severity))}</span> '
93
+ f'{html.escape(finding.detector)} - {html.escape(finding.title)}</div>'
94
+ f"{description}<pre>{evidence}</pre>"
95
+ "</section>"
96
+ )
97
+
98
+
99
+ register_handler("html", HtmlHandler)
@@ -0,0 +1,77 @@
1
+ """JSON output handler — structured findings, one object per finding.
2
+
3
+ Suitable for piping to jq or downstream tooling. Outputs a JSON array.
4
+ All datetime fields serialized as ISO 8601 strings.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import sys
11
+ from typing import Any, TextIO
12
+
13
+ from loghunter.common.finding import Finding, RunSummary
14
+ from loghunter.common.output import OutputHandler, register_handler
15
+
16
+
17
+ class JsonHandler(OutputHandler):
18
+ """Write findings as a JSON array to stdout or a file."""
19
+
20
+ def __init__(self, stream: TextIO = sys.stdout, verbose_level: int = 0) -> None:
21
+ self._stream = stream
22
+ self._verbose_level = verbose_level
23
+ self._findings: list[Finding] = []
24
+ self._run_summary: RunSummary | None = None
25
+
26
+ def begin(self, run_summary: RunSummary) -> None:
27
+ """Store run summary for inclusion in output."""
28
+ self._run_summary = run_summary
29
+
30
+ def write(self, findings: list[Finding]) -> None:
31
+ """Accumulate findings for serialization at end()."""
32
+ self._findings.extend(findings)
33
+
34
+ def end(self) -> None:
35
+ """Serialize all accumulated findings to JSON and write to stream."""
36
+ payload = {
37
+ "run_summary": self._run_summary_to_dict(self._run_summary),
38
+ "findings": [self._finding_to_dict(f) for f in self._findings],
39
+ }
40
+ json.dump(payload, self._stream, indent=2, default=str)
41
+ print(file=self._stream)
42
+
43
+ def _finding_to_dict(self, finding: Finding) -> dict[str, Any]:
44
+ """Convert a Finding to a JSON-serializable dict."""
45
+ return {
46
+ "detector": finding.detector,
47
+ "severity": finding.severity.name.lower(),
48
+ "severity_tag": str(finding.severity),
49
+ "title": finding.title,
50
+ "description": finding.description,
51
+ "evidence": finding.evidence,
52
+ "next_steps": finding.next_steps,
53
+ "ts_generated": finding.ts_generated.isoformat(),
54
+ "data_window": [
55
+ finding.data_window[0].isoformat(),
56
+ finding.data_window[1].isoformat(),
57
+ ],
58
+ }
59
+
60
+ def _run_summary_to_dict(self, run_summary: RunSummary | None) -> dict[str, Any] | None:
61
+ """Convert RunSummary to a JSON-serializable dict."""
62
+ if run_summary is None:
63
+ return None
64
+ return {
65
+ "data_window": [
66
+ run_summary.data_window[0].isoformat(),
67
+ run_summary.data_window[1].isoformat(),
68
+ ],
69
+ "record_counts": run_summary.record_counts,
70
+ "data_size_bytes": run_summary.data_size_bytes,
71
+ "detectors_run": run_summary.detectors_run,
72
+ "detectors_skipped": run_summary.detectors_skipped,
73
+ "notes": run_summary.notes,
74
+ }
75
+
76
+
77
+ register_handler("json", JsonHandler)