loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
|
@@ -0,0 +1,1422 @@
|
|
|
1
|
+
"""Text output handler — default stdout format.
|
|
2
|
+
|
|
3
|
+
Output is grouped by detector, each section with a header and ───── separator.
|
|
4
|
+
Default output: title, severity tag, key evidence fields only.
|
|
5
|
+
Verbose adds: description, full evidence dict, next_steps, data window.
|
|
6
|
+
next_steps are never shown in default output.
|
|
7
|
+
|
|
8
|
+
Looks crafted, not generated. Minimal ASCII decoration.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import sys
|
|
14
|
+
import textwrap
|
|
15
|
+
from collections import defaultdict
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from datetime import timedelta
|
|
18
|
+
from typing import Any, TextIO
|
|
19
|
+
|
|
20
|
+
from loghunter.common.display import (
|
|
21
|
+
TEXT_RULE,
|
|
22
|
+
TEXT_RULE_DOUBLE,
|
|
23
|
+
TEXT_RULE_WIDTH,
|
|
24
|
+
human_bytes,
|
|
25
|
+
paint,
|
|
26
|
+
)
|
|
27
|
+
from loghunter.common.finding import (
|
|
28
|
+
BlobCard,
|
|
29
|
+
DigestCard,
|
|
30
|
+
Finding,
|
|
31
|
+
MethodTag,
|
|
32
|
+
RunSummary,
|
|
33
|
+
Severity,
|
|
34
|
+
)
|
|
35
|
+
from loghunter.common.output import OutputHandler, register_handler
|
|
36
|
+
|
|
37
|
+
_WIDTH = TEXT_RULE_WIDTH
|
|
38
|
+
_SEP = TEXT_RULE
|
|
39
|
+
_SEP_DOUBLE = TEXT_RULE_DOUBLE
|
|
40
|
+
_SUMMARY_LABEL_WIDTH = 14
|
|
41
|
+
|
|
42
|
+
# Minimum (requested_span − data_span) before the data-found line discloses an
|
|
43
|
+
# underfilled window — below this the operator effectively got what they asked for.
|
|
44
|
+
_UNDERFILL_TOLERANCE = timedelta(hours=1)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _fmt_window(window: tuple) -> str:
|
|
48
|
+
s, e = window
|
|
49
|
+
span = e - s
|
|
50
|
+
days = span.days + span.seconds / 86400
|
|
51
|
+
return (
|
|
52
|
+
f"{s.strftime('%Y-%m-%d %H:%M')} → {e.strftime('%Y-%m-%d %H:%M')}"
|
|
53
|
+
f" ({days:.1f}d)"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _fmt_span(td: timedelta) -> str:
|
|
58
|
+
"""Compact span for the data-found underfill parenthetical.
|
|
59
|
+
|
|
60
|
+
``< 24h`` → integer hours (``"18h"``); ``>= 24h`` → days, integer when whole
|
|
61
|
+
else one decimal (``"2d"``, ``"1.5d"``). Rounding never crosses a unit
|
|
62
|
+
surprisingly: an hours value that rounds up to a full day prints ``"1d"``,
|
|
63
|
+
not ``"24h"``.
|
|
64
|
+
"""
|
|
65
|
+
hours = td.total_seconds() / 3600
|
|
66
|
+
if hours < 24:
|
|
67
|
+
rounded = int(round(hours))
|
|
68
|
+
if rounded < 24:
|
|
69
|
+
return f"{rounded}h"
|
|
70
|
+
# rounded up to a full day — promote the unit rather than print "24h"
|
|
71
|
+
return "1d"
|
|
72
|
+
days = td.total_seconds() / 86400
|
|
73
|
+
if abs(days - round(days)) < 1e-9:
|
|
74
|
+
return f"{int(round(days))}d"
|
|
75
|
+
return f"{days:.1f}d"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _aws_span_str(seconds: float) -> str:
|
|
79
|
+
"""Compact span used by burst rows: 45s / 7m / 3h / 2d."""
|
|
80
|
+
s = int(seconds)
|
|
81
|
+
if s < 60:
|
|
82
|
+
return f"{s}s"
|
|
83
|
+
if s < 3600:
|
|
84
|
+
return f"{s // 60}m"
|
|
85
|
+
if s < 86400:
|
|
86
|
+
return f"{s // 3600}h"
|
|
87
|
+
return f"{s // 86400}d"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# ── digest helpers (used by TextHandler.render_digest) ───────────────────────
|
|
91
|
+
|
|
92
|
+
_HIST_GLYPHS = "▁▂▃▄▅▆▇█" # U+2581..U+2588
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _bar_glyph(value: int, peak: int) -> str:
|
|
96
|
+
"""Map a per-bin count to one of 8 block-character glyphs.
|
|
97
|
+
|
|
98
|
+
Zero and below render as the lowest glyph (▁) — visual continuity beats
|
|
99
|
+
a blank space when the histogram band is meant to be read as a line.
|
|
100
|
+
Values at or above peak render as the highest glyph (█).
|
|
101
|
+
"""
|
|
102
|
+
if peak <= 0 or value <= 0:
|
|
103
|
+
return _HIST_GLYPHS[0]
|
|
104
|
+
if value >= peak:
|
|
105
|
+
return _HIST_GLYPHS[-1]
|
|
106
|
+
idx = int((value / peak) * (len(_HIST_GLYPHS) - 1))
|
|
107
|
+
return _HIST_GLYPHS[max(0, min(len(_HIST_GLYPHS) - 1, idx))]
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _format_count(n: int) -> str:
|
|
111
|
+
"""Compact-number formatter for histogram peak anchors."""
|
|
112
|
+
if n < 1000:
|
|
113
|
+
return str(n)
|
|
114
|
+
if n < 1_000_000:
|
|
115
|
+
return f"{n / 1000:.1f}k"
|
|
116
|
+
if n < 1_000_000_000:
|
|
117
|
+
return f"{n / 1_000_000:.1f}M"
|
|
118
|
+
return f"{n / 1_000_000_000:.1f}B"
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _render_histogram(
|
|
122
|
+
counts: list[int], unit: str, peak: int, *, unavailable: bool = False,
|
|
123
|
+
) -> str:
|
|
124
|
+
"""Render the temporal histogram as a single-line band, flush-left.
|
|
125
|
+
|
|
126
|
+
The line carries BOTH an axis unit label ("hourly bins" / "daily bins")
|
|
127
|
+
AND a scale anchor ("peak: N"). Without both, a busy-flat and a
|
|
128
|
+
quiet-flat timeline render identically — the unit names the bar width
|
|
129
|
+
and the anchor names the bar height.
|
|
130
|
+
|
|
131
|
+
Three rendering branches, in precedence order:
|
|
132
|
+
|
|
133
|
+
1. ``unavailable=True`` → bare ``(timeline unavailable)`` — the caller
|
|
134
|
+
suppressed the histogram because timestamps in the source frame
|
|
135
|
+
could not be parsed with confidence. Distinct from "no events".
|
|
136
|
+
Both former failure modes (low-coverage / zero-span) render here
|
|
137
|
+
identically; the flat card has no footer to differentiate them.
|
|
138
|
+
2. ``peak <= 0`` or empty ``counts`` → ``(no events in window)`` — the
|
|
139
|
+
valid empty-timeline case: no records in the loaded window.
|
|
140
|
+
3. Otherwise → the bar render with unit label + peak anchor.
|
|
141
|
+
"""
|
|
142
|
+
if unavailable:
|
|
143
|
+
return "(timeline unavailable)"
|
|
144
|
+
if peak <= 0 or not counts:
|
|
145
|
+
return "(no events in window)"
|
|
146
|
+
bars = "".join(_bar_glyph(c, peak) for c in counts)
|
|
147
|
+
unit_label = "hourly bins" if unit == "hr" else "daily bins"
|
|
148
|
+
return f"{bars} {unit_label} · peak: {_format_count(peak)}"
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _render_label_value_block(rows: list[tuple[str, str]]) -> list[str]:
|
|
152
|
+
"""Flush-left ``label: value`` block with the value column aligned.
|
|
153
|
+
|
|
154
|
+
Shared by the ambient block (Zone 1) and the fields block (former
|
|
155
|
+
Zone 3) on every digest card. Label width is computed from the rows
|
|
156
|
+
in this block only — no cross-block alignment. The labels are
|
|
157
|
+
flush-left at column 0; alignment is in the value column.
|
|
158
|
+
|
|
159
|
+
Long entities (flows, domains) render in FULL — never truncated. The
|
|
160
|
+
text-output rail forbids truncating naturally-long values on schema
|
|
161
|
+
cards. Blob's wide-list slots use a separate blob-local clamp; see
|
|
162
|
+
``_wrap_blob_slot_value`` below.
|
|
163
|
+
"""
|
|
164
|
+
if not rows:
|
|
165
|
+
return []
|
|
166
|
+
label_w = max(len(label) for label, _ in rows)
|
|
167
|
+
return [
|
|
168
|
+
f"{(label + ':').ljust(label_w + 2)}{value}"
|
|
169
|
+
for label, value in rows
|
|
170
|
+
]
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ── Blob-only: two-line clamp for the wide-list slot (`fields:` / `tokens:`)
|
|
174
|
+
#
|
|
175
|
+
# Blob's `fields:` row carries a top-level-keys list that on a Zeek conn
|
|
176
|
+
# log can easily run 20+ names; the `tokens:` row defensively shares the
|
|
177
|
+
# same clamp so a degenerate token row cannot blow past the 80-col frame.
|
|
178
|
+
# Schema cards keep rendering through ``_render_label_value_block`` and
|
|
179
|
+
# are exempt from this clamp — long entities like flows/domains must
|
|
180
|
+
# render in full per the text-output rail.
|
|
181
|
+
|
|
182
|
+
def _wrap_blob_slot_value(
|
|
183
|
+
value: str, *, label_col: int, sep: str,
|
|
184
|
+
) -> list[str]:
|
|
185
|
+
"""Two-line clamp for blob's wide-list slot value.
|
|
186
|
+
|
|
187
|
+
Line 1 starts at column ``label_col`` (max blob-slot label width + 2,
|
|
188
|
+
matching ``_render_label_value_block``'s sizing exactly). Line 2 hang-
|
|
189
|
+
indents to ``label_col``. Splits ONLY on ``sep`` — never breaks a
|
|
190
|
+
part — so the "never split a field name" rule is honoured.
|
|
191
|
+
|
|
192
|
+
A list that fits one line renders one line, no suffix. When the full
|
|
193
|
+
list doesn't fit two lines, truncates to what fits on line 2 and
|
|
194
|
+
appends ``… +N more`` (N = total parts minus parts rendered). A part
|
|
195
|
+
longer than the available width lands on its own line and may exceed
|
|
196
|
+
the 80-col frame; the load-bearing rule is unbroken parts.
|
|
197
|
+
"""
|
|
198
|
+
available = _WIDTH - label_col
|
|
199
|
+
parts = value.split(sep)
|
|
200
|
+
|
|
201
|
+
# Single-line short-circuit.
|
|
202
|
+
if len(value) <= available:
|
|
203
|
+
return [value]
|
|
204
|
+
|
|
205
|
+
# Greedy-pack line 1.
|
|
206
|
+
line1_parts: list[str] = []
|
|
207
|
+
line1_len = 0
|
|
208
|
+
i = 0
|
|
209
|
+
while i < len(parts):
|
|
210
|
+
part = parts[i]
|
|
211
|
+
added = len(part) if not line1_parts else len(sep) + len(part)
|
|
212
|
+
if line1_parts and line1_len + added > available:
|
|
213
|
+
break
|
|
214
|
+
line1_parts.append(part)
|
|
215
|
+
line1_len += added
|
|
216
|
+
i += 1
|
|
217
|
+
if not line1_parts: # first part already too wide — emit it alone
|
|
218
|
+
line1_parts = [parts[0]]
|
|
219
|
+
i = 1
|
|
220
|
+
line1 = sep.join(line1_parts)
|
|
221
|
+
|
|
222
|
+
# Greedy-pack line 2, reserving suffix room only if MORE parts remain
|
|
223
|
+
# after a tentative full pack.
|
|
224
|
+
indent = " " * label_col
|
|
225
|
+
remaining = parts[i:]
|
|
226
|
+
suffix_template = f"{sep}… +{{n}} more"
|
|
227
|
+
|
|
228
|
+
# First pass: greedy pack remaining into line 2 without suffix reserve.
|
|
229
|
+
line2_parts: list[str] = []
|
|
230
|
+
line2_len = 0
|
|
231
|
+
j = 0
|
|
232
|
+
while j < len(remaining):
|
|
233
|
+
part = remaining[j]
|
|
234
|
+
added = len(part) if not line2_parts else len(sep) + len(part)
|
|
235
|
+
if line2_parts and line2_len + added > available:
|
|
236
|
+
break
|
|
237
|
+
line2_parts.append(part)
|
|
238
|
+
line2_len += added
|
|
239
|
+
j += 1
|
|
240
|
+
if not line2_parts and remaining:
|
|
241
|
+
line2_parts = [remaining[0]]
|
|
242
|
+
j = 1
|
|
243
|
+
|
|
244
|
+
truncated = j < len(remaining)
|
|
245
|
+
if truncated:
|
|
246
|
+
# Re-pack reserving room for `… +N more`. N is unknown until
|
|
247
|
+
# we know how many parts we kept, so iterate: each removed part
|
|
248
|
+
# bumps N (suffix grows by ~1 char per digit decade). Cap the
|
|
249
|
+
# re-pack loop trivially — at most len(remaining) iterations.
|
|
250
|
+
for _ in range(len(remaining) + 1):
|
|
251
|
+
n_remaining = len(remaining) - len(line2_parts)
|
|
252
|
+
if n_remaining <= 0:
|
|
253
|
+
break
|
|
254
|
+
suffix = suffix_template.format(n=n_remaining)
|
|
255
|
+
candidate_len = (
|
|
256
|
+
sum(len(p) for p in line2_parts)
|
|
257
|
+
+ len(sep) * (len(line2_parts) - 1)
|
|
258
|
+
+ len(suffix)
|
|
259
|
+
)
|
|
260
|
+
if candidate_len <= available:
|
|
261
|
+
break
|
|
262
|
+
if len(line2_parts) <= 1:
|
|
263
|
+
break # can't shrink further — accept overflow
|
|
264
|
+
line2_parts.pop()
|
|
265
|
+
n_remaining = len(remaining) - len(line2_parts)
|
|
266
|
+
line2 = (
|
|
267
|
+
indent
|
|
268
|
+
+ sep.join(line2_parts)
|
|
269
|
+
+ suffix_template.format(n=n_remaining)
|
|
270
|
+
)
|
|
271
|
+
else:
|
|
272
|
+
line2 = indent + sep.join(line2_parts) if line2_parts else ""
|
|
273
|
+
|
|
274
|
+
return [line1, line2] if line2 else [line1]
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _summary_line(label: str, value: object) -> list[str]:
|
|
278
|
+
"""Render a wrapped run-summary row with continuation text aligned."""
|
|
279
|
+
prefix = f"{label:<{_SUMMARY_LABEL_WIDTH}} "
|
|
280
|
+
subsequent = " " * len(prefix)
|
|
281
|
+
text = str(value)
|
|
282
|
+
wrap_width = max(20, _WIDTH - len(prefix))
|
|
283
|
+
wrapped = textwrap.wrap(
|
|
284
|
+
text,
|
|
285
|
+
width=wrap_width,
|
|
286
|
+
break_long_words=False,
|
|
287
|
+
break_on_hyphens=False,
|
|
288
|
+
)
|
|
289
|
+
if not wrapped:
|
|
290
|
+
wrapped = [""]
|
|
291
|
+
return [
|
|
292
|
+
f"{prefix if i == 0 else subsequent}{part}"
|
|
293
|
+
for i, part in enumerate(wrapped)
|
|
294
|
+
]
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
# ── W2 card pipeline — structured findings before row formatting ────────────
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
@dataclass
|
|
301
|
+
class Section:
|
|
302
|
+
"""One subsection of a detector's findings — already level-filtered,
|
|
303
|
+
severity-sorted, and post-cap. Renderers consume this — no filtering,
|
|
304
|
+
sorting, or capping happens inside per-detector row formatters.
|
|
305
|
+
|
|
306
|
+
``label`` is None for a flat detector (no subsection line emitted).
|
|
307
|
+
``pre_cap_count`` is this section's level-visible size BEFORE the cap;
|
|
308
|
+
the subsection label always reports the pre-cap count.
|
|
309
|
+
"""
|
|
310
|
+
|
|
311
|
+
label: str | None
|
|
312
|
+
findings: list[Finding]
|
|
313
|
+
pre_cap_count: int
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
@dataclass
|
|
317
|
+
class DetectorRenderable:
|
|
318
|
+
"""Per-detector pipeline result. Built by ``_build_renderable`` before any
|
|
319
|
+
row formatting. Carries pre-cap counts and severity breakdown as sidecars
|
|
320
|
+
so the group header NEVER re-reads severity from post-cap ``Section.findings``.
|
|
321
|
+
"""
|
|
322
|
+
|
|
323
|
+
sections: list[Section]
|
|
324
|
+
level_visible_total: int
|
|
325
|
+
severity_breakdown: dict[Severity, int]
|
|
326
|
+
cap_truncated: int = 0
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
_SEVERITY_ORDER: tuple[Severity, ...] = (
|
|
330
|
+
Severity.HIGH,
|
|
331
|
+
Severity.MEDIUM,
|
|
332
|
+
Severity.LOW,
|
|
333
|
+
Severity.INFO,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _severity_sort_key(f: Finding) -> int:
|
|
338
|
+
"""Stable severity-primary sort key (HIGH=0 … INFO=3). Within a band, the
|
|
339
|
+
detector's incoming secondary order survives (entropy desc, composite-z
|
|
340
|
+
desc, etc.) because Python's sort is stable."""
|
|
341
|
+
return _SEVERITY_ORDER.index(f.severity)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _partition_dns(findings: list[Finding]) -> list[Section]:
|
|
345
|
+
"""DNS: singletons FIRST (no subdomain_count), then groups (Dave's call —
|
|
346
|
+
the singletons tier is consistently the more interesting one). Each
|
|
347
|
+
speaks-iff-non-empty: an empty subsection vanishes entirely."""
|
|
348
|
+
singletons = [f for f in findings if "subdomain_count" not in f.evidence]
|
|
349
|
+
groups = [f for f in findings if "subdomain_count" in f.evidence]
|
|
350
|
+
out: list[Section] = []
|
|
351
|
+
if singletons:
|
|
352
|
+
out.append(Section("singletons", singletons, len(singletons)))
|
|
353
|
+
if groups:
|
|
354
|
+
out.append(Section("groups", groups, len(groups)))
|
|
355
|
+
return out
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _partition_aws(findings: list[Finding]) -> list[Section]:
|
|
359
|
+
"""AWS: bursts first, then ranked (+ synthetic ranked_summary). The ranked
|
|
360
|
+
section bundles per-principal and the summary line together."""
|
|
361
|
+
bursts = [f for f in findings if f.evidence.get("tier") == "burst"]
|
|
362
|
+
ranked = [f for f in findings if f.evidence.get("tier") in ("ranked", "ranked_summary")]
|
|
363
|
+
out: list[Section] = []
|
|
364
|
+
if bursts:
|
|
365
|
+
out.append(Section("burst sweeps", bursts, len(bursts)))
|
|
366
|
+
if ranked:
|
|
367
|
+
out.append(Section("ranked principals", ranked, len(ranked)))
|
|
368
|
+
return out
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _partition_flat(findings: list[Finding]) -> list[Section]:
|
|
372
|
+
"""Flat detector — one section with no label."""
|
|
373
|
+
return [Section(None, findings, len(findings))]
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
_PARTITIONERS = {
|
|
377
|
+
"dns": _partition_dns,
|
|
378
|
+
"aws": _partition_aws,
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
# Per-detector severity-sort opt-out (CR #2 from James). Severity sort is the
|
|
382
|
+
# right DEFAULT — within a flat or per-section list, H → M → L → I reads as
|
|
383
|
+
# urgency-first. But syslog's row order CARRIES meaning: the detector emits
|
|
384
|
+
# chronologically so a synthetic reboot INFO annotation sits AMONG the rare
|
|
385
|
+
# MEDIUM template events near it (the "these rare events cluster around this
|
|
386
|
+
# reboot" narrative). Severity-sorting regroups all-MEDIUM-then-all-INFO and
|
|
387
|
+
# divorces each reboot from its context. Detectors listed here keep their
|
|
388
|
+
# incoming order.
|
|
389
|
+
_SEVERITY_SORT_EXEMPT: frozenset[str] = frozenset({"syslog"})
|
|
390
|
+
|
|
391
|
+
# Synthetic always-show finding tiers (CR #4 from James). These are
|
|
392
|
+
# all-clear / quiet-summary rows the detector designed to render
|
|
393
|
+
# unconditionally. They are exempt from the W5 cap budget — they neither
|
|
394
|
+
# count against the budget nor get dropped when the budget runs out. Today
|
|
395
|
+
# the only entry is aws's ``ranked_summary`` (the "nothing stood out" line).
|
|
396
|
+
# New synthetic all-show tiers join this set; the renderer is otherwise
|
|
397
|
+
# unchanged.
|
|
398
|
+
_ALWAYS_SHOW_TIERS: frozenset[str] = frozenset({"ranked_summary"})
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _is_always_show(finding: Finding) -> bool:
|
|
402
|
+
"""True for synthetic always-show findings (CR #4). Exempt from the cap."""
|
|
403
|
+
return finding.evidence.get("tier") in _ALWAYS_SHOW_TIERS
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _level_filter(detector: str, findings: list[Finding], verbose_level: int) -> list[Finding]:
|
|
407
|
+
"""W2 pipeline step 1 — the one finding-visibility-by-level rule.
|
|
408
|
+
|
|
409
|
+
Duration hides LOW findings at verbose_level 0 (W6 moved this off the
|
|
410
|
+
detector's run() and into the text-render seam). Every other detector is
|
|
411
|
+
a no-op. The result-set returned to machine handlers is invariant; only
|
|
412
|
+
the text handler applies this filter.
|
|
413
|
+
"""
|
|
414
|
+
if detector == "duration" and verbose_level == 0:
|
|
415
|
+
return [f for f in findings if f.severity != Severity.LOW]
|
|
416
|
+
return findings
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def _build_renderable(
|
|
420
|
+
detector: str,
|
|
421
|
+
findings: list[Finding],
|
|
422
|
+
verbose_level: int,
|
|
423
|
+
max_per_detector: int,
|
|
424
|
+
) -> DetectorRenderable:
|
|
425
|
+
"""Run the W2 pipeline on one detector's findings.
|
|
426
|
+
|
|
427
|
+
Order is binding:
|
|
428
|
+
1. level-filter (duration LOW at level 0)
|
|
429
|
+
2. partition into Sections (detector-specific)
|
|
430
|
+
3. capture pre-cap level_visible_total + severity_breakdown
|
|
431
|
+
4. severity-sort each section in place
|
|
432
|
+
5. cap walks sections in declared order; truncates findings; sets
|
|
433
|
+
cap_truncated; later sections may end up with findings=[] and
|
|
434
|
+
vanish at render time
|
|
435
|
+
|
|
436
|
+
Both ``level_visible_total`` and ``severity_breakdown`` are captured
|
|
437
|
+
BEFORE the cap so the group header NEVER drifts to post-cap counts —
|
|
438
|
+
the pre-cap regression test in tests/test_text_output.py guards this.
|
|
439
|
+
"""
|
|
440
|
+
level_visible = _level_filter(detector, findings, verbose_level)
|
|
441
|
+
|
|
442
|
+
partition = _PARTITIONERS.get(detector, _partition_flat)
|
|
443
|
+
sections = partition(level_visible)
|
|
444
|
+
|
|
445
|
+
level_visible_total = len(level_visible)
|
|
446
|
+
breakdown: dict[Severity, int] = {}
|
|
447
|
+
for f in level_visible:
|
|
448
|
+
breakdown[f.severity] = breakdown.get(f.severity, 0) + 1
|
|
449
|
+
|
|
450
|
+
if detector not in _SEVERITY_SORT_EXEMPT:
|
|
451
|
+
for s in sections:
|
|
452
|
+
s.findings.sort(key=_severity_sort_key)
|
|
453
|
+
|
|
454
|
+
# CR #4: synthetic always-show findings are exempt from the cap. Pull
|
|
455
|
+
# them out per-section before the budget walk so they neither consume
|
|
456
|
+
# the budget nor risk being dropped, then re-append them at the tail
|
|
457
|
+
# of their section (preserving the existing aws renderer's
|
|
458
|
+
# per-principal-then-summary order). Renderer code is unchanged.
|
|
459
|
+
always_show_by_section: list[list[Finding]] = []
|
|
460
|
+
for s in sections:
|
|
461
|
+
always = [f for f in s.findings if _is_always_show(f)]
|
|
462
|
+
if always:
|
|
463
|
+
s.findings = [f for f in s.findings if not _is_always_show(f)]
|
|
464
|
+
always_show_by_section.append(always)
|
|
465
|
+
|
|
466
|
+
cap_truncated = 0
|
|
467
|
+
# Cap accounting runs against the cappable count only (always-show
|
|
468
|
+
# findings live outside the budget).
|
|
469
|
+
cappable_total = sum(len(s.findings) for s in sections)
|
|
470
|
+
if max_per_detector > 0 and cappable_total > max_per_detector:
|
|
471
|
+
remaining = max_per_detector
|
|
472
|
+
for s in sections:
|
|
473
|
+
if remaining <= 0:
|
|
474
|
+
cap_truncated += len(s.findings)
|
|
475
|
+
s.findings = []
|
|
476
|
+
continue
|
|
477
|
+
if len(s.findings) > remaining:
|
|
478
|
+
cap_truncated += len(s.findings) - remaining
|
|
479
|
+
s.findings = s.findings[:remaining]
|
|
480
|
+
remaining = 0
|
|
481
|
+
else:
|
|
482
|
+
remaining -= len(s.findings)
|
|
483
|
+
|
|
484
|
+
# Re-append the held-back always-show findings at the tail of their
|
|
485
|
+
# section. This preserves the existing aws renderer's "per-principal
|
|
486
|
+
# rows, then summary line" layout and keeps the all-clear visible even
|
|
487
|
+
# when the cap empties the cappable rows.
|
|
488
|
+
for s, always in zip(sections, always_show_by_section):
|
|
489
|
+
if always:
|
|
490
|
+
s.findings.extend(always)
|
|
491
|
+
|
|
492
|
+
return DetectorRenderable(
|
|
493
|
+
sections=sections,
|
|
494
|
+
level_visible_total=level_visible_total,
|
|
495
|
+
severity_breakdown=breakdown,
|
|
496
|
+
cap_truncated=cap_truncated,
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def _verbose_tail(finding: Finding, indent: str, extras: dict[str, Any] | None = None) -> list[str]:
|
|
501
|
+
"""Curated 'why it scored' — level 1. Returns [] when no material to show.
|
|
502
|
+
|
|
503
|
+
Vanish discipline: a Finding with empty description / next_steps and an
|
|
504
|
+
empty curated-evidence subset renders the title line ALONE — no empty
|
|
505
|
+
headers, no dangling indents, NO trailing ``data window:`` line. The
|
|
506
|
+
data-window line appears only when at least one other body element is
|
|
507
|
+
present.
|
|
508
|
+
"""
|
|
509
|
+
body: list[str] = []
|
|
510
|
+
if finding.description:
|
|
511
|
+
body.append(f"{indent}{finding.description}")
|
|
512
|
+
if extras:
|
|
513
|
+
body.append(f"{indent}evidence:")
|
|
514
|
+
for k, v in extras.items():
|
|
515
|
+
body.append(f"{indent} {k}: {v}")
|
|
516
|
+
if finding.next_steps:
|
|
517
|
+
body.append(f"{indent}next steps:")
|
|
518
|
+
for step in finding.next_steps:
|
|
519
|
+
body.append(f"{indent} · {step}")
|
|
520
|
+
if not body:
|
|
521
|
+
return []
|
|
522
|
+
body.append(f"{indent}data window: {_fmt_window(finding.data_window)}")
|
|
523
|
+
return body
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def _debug_tail(finding: Finding, indent: str) -> list[str]:
|
|
527
|
+
"""Raw debug — level 2. Full evidence dict. Same vanish discipline as
|
|
528
|
+
``_verbose_tail``: empty description / evidence / next_steps → ``[]``."""
|
|
529
|
+
body: list[str] = []
|
|
530
|
+
if finding.description:
|
|
531
|
+
body.append(f"{indent}{finding.description}")
|
|
532
|
+
if finding.evidence:
|
|
533
|
+
body.append(f"{indent}evidence:")
|
|
534
|
+
for k, v in finding.evidence.items():
|
|
535
|
+
body.append(f"{indent} {k}: {v}")
|
|
536
|
+
if finding.next_steps:
|
|
537
|
+
body.append(f"{indent}next steps:")
|
|
538
|
+
for step in finding.next_steps:
|
|
539
|
+
body.append(f"{indent} · {step}")
|
|
540
|
+
if not body:
|
|
541
|
+
return []
|
|
542
|
+
body.append(f"{indent}data window: {_fmt_window(finding.data_window)}")
|
|
543
|
+
return body
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
# Per-detector curated-evidence subsets for level 1 — tolerant: omit absent
|
|
547
|
+
# keys rather than printing ``None``. Per-variant lookup uses existing
|
|
548
|
+
# evidence keys (scan's scan_type, dns's source, aws's tier, syslog by
|
|
549
|
+
# template-vs-reboot shape).
|
|
550
|
+
def _curated_evidence(finding: Finding) -> dict[str, Any]:
|
|
551
|
+
"""Return ONLY the keys present on this Finding from the curated set for
|
|
552
|
+
its detector (and variant where applicable)."""
|
|
553
|
+
ev = finding.evidence
|
|
554
|
+
keys: tuple[str, ...] = ()
|
|
555
|
+
det = finding.detector
|
|
556
|
+
|
|
557
|
+
if det == "beacon":
|
|
558
|
+
keys = (
|
|
559
|
+
"beacon_score", "spectral_ratio", "prominence_norm",
|
|
560
|
+
"jitter_cv", "conn_count", "period_str",
|
|
561
|
+
)
|
|
562
|
+
elif det == "dns":
|
|
563
|
+
src = ev.get("source")
|
|
564
|
+
if "subdomain_count" in ev: # group
|
|
565
|
+
base = ("sample_domains", "unique_sources", "min_entropy", "max_entropy")
|
|
566
|
+
extra = ("was_blocked", "block_ratio", "qtype_counts") if src == "pihole" else ()
|
|
567
|
+
keys = base + extra
|
|
568
|
+
elif src == "pihole": # pihole singleton
|
|
569
|
+
keys = (
|
|
570
|
+
"unique_sources", "querier_ips",
|
|
571
|
+
"was_blocked", "block_ratio",
|
|
572
|
+
"cache_ratio", "forward_ratio", "qtype_counts",
|
|
573
|
+
)
|
|
574
|
+
else: # zeek singleton (and both-mode Zeek with pihole enrichment)
|
|
575
|
+
base = ("rcode_distribution", "unique_sources", "querier_ips")
|
|
576
|
+
extra = ("was_blocked", "block_ratio") if "was_blocked" in ev else ()
|
|
577
|
+
keys = base + extra
|
|
578
|
+
elif det == "syslog":
|
|
579
|
+
if "template_str" in ev:
|
|
580
|
+
keys = ("template_str", "host", "count", "threshold")
|
|
581
|
+
else: # reboot annotation
|
|
582
|
+
keys = ("host", "reboot_ts", "suppressed_window_seconds")
|
|
583
|
+
elif det == "scan":
|
|
584
|
+
keys = ("scan_state_ratio", "top_states", "direction", "pattern_tag")
|
|
585
|
+
elif det == "duration":
|
|
586
|
+
keys = ("avg_bytes_per_second", "conn_states", "connection_count")
|
|
587
|
+
elif det == "aws":
|
|
588
|
+
tier = ev.get("tier")
|
|
589
|
+
if tier == "burst":
|
|
590
|
+
keys = ("new_actions", "new_services", "error_rate", "mean_rarity")
|
|
591
|
+
else: # ranked / ranked_summary
|
|
592
|
+
keys = (
|
|
593
|
+
"composite_z", "z_error_rate", "event_count",
|
|
594
|
+
"top_actions", "distinct_event_source",
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
return {k: ev[k] for k in keys if k in ev and not _is_empty(ev[k])}
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
def _is_empty(value: Any) -> bool:
|
|
601
|
+
"""numpy-safe emptiness test for curated evidence values.
|
|
602
|
+
|
|
603
|
+
The naive ``value not in (None, [], {})`` idiom evaluates ``value == []``
|
|
604
|
+
which a numpy scalar broadcasts into an empty boolean array; ``bool()`` of
|
|
605
|
+
that array raises ``ValueError`` and propagates straight out through
|
|
606
|
+
``reporter.write``. ``aws`` burst ``error_rate``, ``scan``
|
|
607
|
+
``scan_state_ratio`` (a rounded pandas mean), and beacon's spectral
|
|
608
|
+
scores all reach this path as numpy scalars under real data —
|
|
609
|
+
``loghunter aws -v`` / ``scan -v`` died on every run.
|
|
610
|
+
|
|
611
|
+
Guard explicitly on the container types we want to omit (None / empty
|
|
612
|
+
str/list/tuple/dict). Anything else — including numpy scalars and
|
|
613
|
+
arrays — is treated as "has content" for display purposes; the renderer
|
|
614
|
+
formats them via the default ``f"{value}"`` path downstream.
|
|
615
|
+
"""
|
|
616
|
+
if value is None:
|
|
617
|
+
return True
|
|
618
|
+
if isinstance(value, (list, tuple, dict, str)) and len(value) == 0:
|
|
619
|
+
return True
|
|
620
|
+
return False
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def _level_tail(finding: Finding, indent: str, verbose_level: int) -> list[str]:
|
|
624
|
+
"""Dispatch to the right tail by level. Level 0 returns []; level 1 emits
|
|
625
|
+
the curated tail; level 2 emits the full debug tail. Both tails honor
|
|
626
|
+
vanish-don't-dash."""
|
|
627
|
+
if verbose_level <= 0:
|
|
628
|
+
return []
|
|
629
|
+
if verbose_level >= 2:
|
|
630
|
+
return _debug_tail(finding, indent)
|
|
631
|
+
return _verbose_tail(finding, indent, _curated_evidence(finding))
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
def _render_group_header(detector: str, renderable: DetectorRenderable) -> list[str]:
|
|
635
|
+
"""Header line: ``detector — N findings · 3 H 18 M 51 I`` + 80-col rule.
|
|
636
|
+
|
|
637
|
+
Counts and breakdown are PRE-CAP — read straight off the renderable's
|
|
638
|
+
``level_visible_total`` and ``severity_breakdown`` sidecars, never
|
|
639
|
+
recomputed from ``Section.findings`` (which is post-cap)."""
|
|
640
|
+
total = renderable.level_visible_total
|
|
641
|
+
label = "findings" if total != 1 else "finding"
|
|
642
|
+
parts = [f"{detector} — {total} {label}"]
|
|
643
|
+
breakdown = renderable.severity_breakdown
|
|
644
|
+
cells = []
|
|
645
|
+
for sev in _SEVERITY_ORDER:
|
|
646
|
+
n = breakdown.get(sev, 0)
|
|
647
|
+
if n > 0:
|
|
648
|
+
cells.append(f"{n} {sev.value}")
|
|
649
|
+
if cells:
|
|
650
|
+
parts.append(" · " + " ".join(cells))
|
|
651
|
+
return ["".join(parts), TEXT_RULE]
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def _render_cap_disclosure(detector: str, renderable: DetectorRenderable, cap: int) -> str:
|
|
655
|
+
"""W5 disclosure: factual; deferred error-voice pass owns final wording.
|
|
656
|
+
|
|
657
|
+
Honesty rail (CR #3): the cap trims sections in DECLARED order, NOT
|
|
658
|
+
global severity. For a FLAT detector (one implicit section) the cap is
|
|
659
|
+
indeed by severity — sort-then-cap retains the highest tiers. For a
|
|
660
|
+
SUBSECTIONED detector (dns: singletons-first; aws: bursts-first) a
|
|
661
|
+
later section's HIGH row can be dropped while an earlier section's LOW
|
|
662
|
+
row survives the cap. So "by severity" is true only in the flat case.
|
|
663
|
+
|
|
664
|
+
Rather than spell out the cross-section non-guarantee in two arms, the
|
|
665
|
+
wording simply drops the severity claim. The hidden count and the cap
|
|
666
|
+
cap are what the operator needs to act on. Wording is placeholder-tier
|
|
667
|
+
pending the error-voice pass — the binding constraint is that we MUST
|
|
668
|
+
NOT claim severity-retention the cap doesn't provide.
|
|
669
|
+
"""
|
|
670
|
+
hidden = renderable.cap_truncated
|
|
671
|
+
return (
|
|
672
|
+
f"… {hidden:,} more not shown (showing first {cap:,}). "
|
|
673
|
+
f"Unusually high — narrow with the allowlist, or this detector may be "
|
|
674
|
+
f"misbehaving."
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
class TextHandler(OutputHandler):
|
|
679
|
+
"""Write findings as aligned plain text to stdout (or a given stream)."""
|
|
680
|
+
|
|
681
|
+
def __init__(
|
|
682
|
+
self,
|
|
683
|
+
stream: TextIO = sys.stdout,
|
|
684
|
+
verbose_level: int = 0,
|
|
685
|
+
max_findings_per_detector: int = 100,
|
|
686
|
+
) -> None:
|
|
687
|
+
self._stream = stream
|
|
688
|
+
self._verbose_level = verbose_level
|
|
689
|
+
self._max_findings_per_detector = max_findings_per_detector
|
|
690
|
+
|
|
691
|
+
def begin(self, run_summary: RunSummary) -> None:
|
|
692
|
+
"""Print the run summary header before any findings."""
|
|
693
|
+
print(file=self._stream)
|
|
694
|
+
print(self._render_run_summary(run_summary), file=self._stream)
|
|
695
|
+
|
|
696
|
+
def write(self, findings: list[Finding]) -> None:
|
|
697
|
+
"""Print findings grouped by detector with aligned columns.
|
|
698
|
+
|
|
699
|
+
Per detector, runs the W2 pipeline via ``_build_renderable`` (level-
|
|
700
|
+
filter → partition → pre-cap stats → severity-sort → cap). A detector
|
|
701
|
+
whose level-visible set is empty renders NOTHING — no header, no
|
|
702
|
+
rule, no label (the vanish rule). The disclosure line fires only
|
|
703
|
+
when the cap actually trimmed rows.
|
|
704
|
+
"""
|
|
705
|
+
if not findings:
|
|
706
|
+
return
|
|
707
|
+
|
|
708
|
+
by_detector: dict[str, list[Finding]] = defaultdict(list)
|
|
709
|
+
for f in findings:
|
|
710
|
+
by_detector[f.detector].append(f)
|
|
711
|
+
|
|
712
|
+
for detector, group in by_detector.items():
|
|
713
|
+
renderable = _build_renderable(
|
|
714
|
+
detector, group, self._verbose_level, self._max_findings_per_detector,
|
|
715
|
+
)
|
|
716
|
+
if renderable.level_visible_total == 0:
|
|
717
|
+
continue
|
|
718
|
+
print(file=self._stream)
|
|
719
|
+
for line in _render_group_header(detector, renderable):
|
|
720
|
+
print(line, file=self._stream)
|
|
721
|
+
for line in self._render_group(detector, renderable.sections):
|
|
722
|
+
print(line, file=self._stream)
|
|
723
|
+
if renderable.cap_truncated > 0:
|
|
724
|
+
print(file=self._stream)
|
|
725
|
+
print(
|
|
726
|
+
_render_cap_disclosure(detector, renderable, self._max_findings_per_detector),
|
|
727
|
+
file=self._stream,
|
|
728
|
+
)
|
|
729
|
+
print(file=self._stream)
|
|
730
|
+
|
|
731
|
+
def end(self) -> None:
|
|
732
|
+
"""No-op for text output — stdout needs no closing."""
|
|
733
|
+
|
|
734
|
+
def _render_run_summary(
|
|
735
|
+
self,
|
|
736
|
+
run_summary: RunSummary,
|
|
737
|
+
banner: str = "LogHunter · Threat Hunt",
|
|
738
|
+
) -> str:
|
|
739
|
+
"""Render the detect-run banner.
|
|
740
|
+
|
|
741
|
+
Digest no longer flows through this helper — each digest card
|
|
742
|
+
carries its own identity block. This stays the single source of
|
|
743
|
+
truth for the detect-path banner; its output must be byte-identical
|
|
744
|
+
to its pre-flat-digest form on a normal detect run.
|
|
745
|
+
"""
|
|
746
|
+
lines = [
|
|
747
|
+
banner,
|
|
748
|
+
_SEP_DOUBLE,
|
|
749
|
+
]
|
|
750
|
+
|
|
751
|
+
if run_summary.data_window[0] and run_summary.data_window[1]:
|
|
752
|
+
lines.extend(_summary_line(
|
|
753
|
+
"Data found:", self._fmt_data_found(run_summary)
|
|
754
|
+
))
|
|
755
|
+
|
|
756
|
+
if run_summary.record_counts:
|
|
757
|
+
counts_str = " · ".join(
|
|
758
|
+
f"{v:,} {k}" for k, v in run_summary.record_counts.items()
|
|
759
|
+
)
|
|
760
|
+
lines.extend(_summary_line("Records:", counts_str))
|
|
761
|
+
|
|
762
|
+
if run_summary.detectors_run:
|
|
763
|
+
lines.extend(_summary_line(
|
|
764
|
+
"Detectors:",
|
|
765
|
+
self._render_detectors_value(run_summary),
|
|
766
|
+
))
|
|
767
|
+
|
|
768
|
+
if run_summary.detectors_skipped:
|
|
769
|
+
for name, reason in run_summary.detectors_skipped.items():
|
|
770
|
+
lines.extend(_summary_line("Skipped:", f"{name} — {reason}"))
|
|
771
|
+
|
|
772
|
+
for note in run_summary.notes:
|
|
773
|
+
lines.extend(_summary_line("Note:", note))
|
|
774
|
+
|
|
775
|
+
lines.append(_SEP_DOUBLE)
|
|
776
|
+
return "\n".join(lines)
|
|
777
|
+
|
|
778
|
+
@staticmethod
|
|
779
|
+
def _fmt_data_found(run_summary: RunSummary) -> str:
|
|
780
|
+
"""Render the data-found value.
|
|
781
|
+
|
|
782
|
+
Full / disjoint runs use ``_fmt_window`` UNCHANGED (the same helper feeds
|
|
783
|
+
the digest card and verbose finding tails — it must stay byte-identical).
|
|
784
|
+
Only an underfilled default/explicit window — data span short of the
|
|
785
|
+
requested span by at least ``_UNDERFILL_TOLERANCE`` — swaps in the
|
|
786
|
+
informative parenthetical (both spans via ``_fmt_span``).
|
|
787
|
+
"""
|
|
788
|
+
s, e = run_summary.data_window
|
|
789
|
+
data_span = e - s
|
|
790
|
+
rs = run_summary.requested_span
|
|
791
|
+
if rs is not None and (rs - data_span) >= _UNDERFILL_TOLERANCE:
|
|
792
|
+
return (
|
|
793
|
+
f"{s.strftime('%Y-%m-%d %H:%M')} → {e.strftime('%Y-%m-%d %H:%M')}"
|
|
794
|
+
f" ({_fmt_span(data_span)} data span in {_fmt_span(rs)} window)"
|
|
795
|
+
)
|
|
796
|
+
return _fmt_window(run_summary.data_window)
|
|
797
|
+
|
|
798
|
+
def _render_detectors_value(self, run_summary: RunSummary) -> str:
|
|
799
|
+
"""Build the right-hand side of the Detectors: row.
|
|
800
|
+
|
|
801
|
+
Named methods (``MethodTag(named=True)``) render as ``name (label)``
|
|
802
|
+
with the label painted when ``self._stream`` is a real TTY (and
|
|
803
|
+
NO_COLOR / TERM=dumb don't opt out). Honest house badges
|
|
804
|
+
(``named=False``) render as ``name [label]`` plain. Detectors with
|
|
805
|
+
no ``DETECTOR_METHOD`` constant fall back to the bare name —
|
|
806
|
+
forward-compat for any future detector that ships without one.
|
|
807
|
+
Detectors joined by `` · ``.
|
|
808
|
+
"""
|
|
809
|
+
parts: list[str] = []
|
|
810
|
+
for name in run_summary.detectors_run:
|
|
811
|
+
tag: "MethodTag | None" = run_summary.detector_methods.get(name)
|
|
812
|
+
if tag is None:
|
|
813
|
+
parts.append(name)
|
|
814
|
+
elif tag.named:
|
|
815
|
+
parts.append(f"{name} ({paint(tag.label, stream=self._stream)})")
|
|
816
|
+
else:
|
|
817
|
+
parts.append(f"{name} [{tag.label}]")
|
|
818
|
+
return " · ".join(parts)
|
|
819
|
+
|
|
820
|
+
def _render_group(self, detector: str, sections: list[Section]) -> list[str]:
|
|
821
|
+
"""Render a detector's already-prepared sections (post level-filter,
|
|
822
|
+
sort, and cap). Per-detector renderers do pure row formatting only —
|
|
823
|
+
no filtering, sorting, counting, or capping leaks back here.
|
|
824
|
+
"""
|
|
825
|
+
# Drop empty sections (cap may have emptied a later section — its label
|
|
826
|
+
# vanishes, no lonely label).
|
|
827
|
+
live = [s for s in sections if s.findings]
|
|
828
|
+
if not live:
|
|
829
|
+
return []
|
|
830
|
+
if detector == "beacon":
|
|
831
|
+
return self._render_beacon_group(live)
|
|
832
|
+
if detector == "dns":
|
|
833
|
+
return self._render_dns_group(live)
|
|
834
|
+
if detector == "scan":
|
|
835
|
+
return self._render_scan_group(live)
|
|
836
|
+
if detector == "syslog":
|
|
837
|
+
return self._render_syslog_group(live)
|
|
838
|
+
if detector == "duration":
|
|
839
|
+
return self._render_duration_group(live)
|
|
840
|
+
if detector == "aws":
|
|
841
|
+
return self._render_aws_group(live)
|
|
842
|
+
# Generic fallback — flat detector, one Section with label=None.
|
|
843
|
+
out: list[str] = []
|
|
844
|
+
for s in live:
|
|
845
|
+
for f in s.findings:
|
|
846
|
+
out.append(self._render_finding(f))
|
|
847
|
+
return out
|
|
848
|
+
|
|
849
|
+
def _render_beacon_group(self, sections: list[Section]) -> list[str]:
|
|
850
|
+
"""Render beacon findings with fully aligned columns. Beacon is a flat
|
|
851
|
+
detector — one Section with label=None — so per-section column widths
|
|
852
|
+
match per-detector. → arrows align via independent sub-field padding."""
|
|
853
|
+
indent = " "
|
|
854
|
+
out: list[str] = []
|
|
855
|
+
findings = sections[0].findings # flat: single section
|
|
856
|
+
|
|
857
|
+
rows = []
|
|
858
|
+
for f in findings:
|
|
859
|
+
ev = f.evidence
|
|
860
|
+
src = ev.get("src_ip", "")
|
|
861
|
+
dst_str = f"{ev.get('dst_ip', '')}:{ev.get('dst_port', '')}/{ev.get('proto', '')}"
|
|
862
|
+
period_col = f"period={ev.get('period_str', '?')}"
|
|
863
|
+
score_col = f"score={ev.get('beacon_score', 0):.3f}"
|
|
864
|
+
conns_col = f"{ev.get('conn_count', 0):,} conns"
|
|
865
|
+
rows.append((str(f.severity), src, dst_str, period_col, score_col, conns_col, f))
|
|
866
|
+
|
|
867
|
+
src_w = max(len(r[1]) for r in rows)
|
|
868
|
+
dst_w = max(len(r[2]) for r in rows)
|
|
869
|
+
period_w = max(len(r[3]) for r in rows)
|
|
870
|
+
# score is always "score=0.XXX" — 11 chars, no padding needed
|
|
871
|
+
conns_w = max(len(r[5]) for r in rows)
|
|
872
|
+
|
|
873
|
+
for tag, src, dst_str, period_col, score_col, conns_col, f in rows:
|
|
874
|
+
line = (
|
|
875
|
+
f"{tag} {src:<{src_w}} → {dst_str:<{dst_w}} "
|
|
876
|
+
f"{period_col:<{period_w}} {score_col} "
|
|
877
|
+
f"{conns_col:>{conns_w}}"
|
|
878
|
+
)
|
|
879
|
+
tail = _level_tail(f, indent, self._verbose_level)
|
|
880
|
+
if tail:
|
|
881
|
+
out.append(line + "\n" + "\n".join(tail))
|
|
882
|
+
else:
|
|
883
|
+
out.append(line)
|
|
884
|
+
return out
|
|
885
|
+
|
|
886
|
+
def _render_dns_group(self, sections: list[Section]) -> list[str]:
|
|
887
|
+
"""Render DNS findings: singletons FIRST, then groups (Dave redline:
|
|
888
|
+
singletons-first preserved). Each section gets a plain lowercase
|
|
889
|
+
``label (N)`` line (pre-cap count from the section) — no ── rules.
|
|
890
|
+
Column widths derived from the section's findings before any row is
|
|
891
|
+
formatted. blocked column omitted when no row in the section has
|
|
892
|
+
was_blocked=True — preserves the pre-pihole Zeek-only output exactly.
|
|
893
|
+
"""
|
|
894
|
+
indent = " "
|
|
895
|
+
out: list[str] = []
|
|
896
|
+
|
|
897
|
+
for si, section in enumerate(sections):
|
|
898
|
+
label_line = f"{section.label} ({section.pre_cap_count})"
|
|
899
|
+
if si > 0:
|
|
900
|
+
out.append("")
|
|
901
|
+
out.append(label_line)
|
|
902
|
+
|
|
903
|
+
if section.label == "singletons":
|
|
904
|
+
rows = []
|
|
905
|
+
for f in section.findings:
|
|
906
|
+
ev = f.evidence
|
|
907
|
+
tag = f"{str(f.severity):<4}"
|
|
908
|
+
ent_col = f"ent={ev['entropy']:.2f}"
|
|
909
|
+
qry_col = f"{ev['query_count']} qry"
|
|
910
|
+
src_col = f"{ev['unique_sources']} src"
|
|
911
|
+
blocked_col = "BLOCKED" if ev.get("was_blocked") else ""
|
|
912
|
+
rows.append((tag, ent_col, qry_col, src_col, blocked_col, f.title, f))
|
|
913
|
+
|
|
914
|
+
ent_w = max(len(r[1]) for r in rows)
|
|
915
|
+
qry_w = max(len(r[2]) for r in rows)
|
|
916
|
+
src_w = max(len(r[3]) for r in rows)
|
|
917
|
+
blocked_w = max(len(r[4]) for r in rows)
|
|
918
|
+
|
|
919
|
+
for tag, ent_col, qry_col, src_col, blocked_col, domain, f in rows:
|
|
920
|
+
if blocked_w > 0:
|
|
921
|
+
line = (
|
|
922
|
+
f" {tag} {ent_col:<{ent_w}} "
|
|
923
|
+
f"{qry_col:>{qry_w}} {src_col:>{src_w}} "
|
|
924
|
+
f"{blocked_col:<{blocked_w}} {domain}"
|
|
925
|
+
)
|
|
926
|
+
else:
|
|
927
|
+
line = (
|
|
928
|
+
f" {tag} {ent_col:<{ent_w}} "
|
|
929
|
+
f"{qry_col:>{qry_w}} {src_col:>{src_w}} {domain}"
|
|
930
|
+
)
|
|
931
|
+
tail = _level_tail(f, indent, self._verbose_level)
|
|
932
|
+
if tail:
|
|
933
|
+
out.append(line + "\n" + "\n".join(tail))
|
|
934
|
+
else:
|
|
935
|
+
out.append(line)
|
|
936
|
+
else: # "groups"
|
|
937
|
+
rows = []
|
|
938
|
+
for f in section.findings:
|
|
939
|
+
ev = f.evidence
|
|
940
|
+
tag = f"{str(f.severity):<4}"
|
|
941
|
+
sub_col = f"{ev['subdomain_count']} sub"
|
|
942
|
+
max_e, min_e = ev["max_entropy"], ev["min_entropy"]
|
|
943
|
+
ent_col = (
|
|
944
|
+
f"ent={max_e:.2f}"
|
|
945
|
+
if max_e == min_e
|
|
946
|
+
else f"ent={max_e:.2f}–{min_e:.2f}"
|
|
947
|
+
)
|
|
948
|
+
qry_col = f"{ev['total_queries']} qry"
|
|
949
|
+
src_col = f"{ev['unique_sources']} src"
|
|
950
|
+
blocked_col = "BLOCKED" if ev.get("was_blocked") else ""
|
|
951
|
+
rows.append((tag, sub_col, ent_col, qry_col, src_col, blocked_col, ev["registrable_domain"], f))
|
|
952
|
+
|
|
953
|
+
sub_w = max(len(r[1]) for r in rows)
|
|
954
|
+
ent_w = max(len(r[2]) for r in rows)
|
|
955
|
+
qry_w = max(len(r[3]) for r in rows)
|
|
956
|
+
src_w = max(len(r[4]) for r in rows)
|
|
957
|
+
blocked_w = max(len(r[5]) for r in rows)
|
|
958
|
+
|
|
959
|
+
for tag, sub_col, ent_col, qry_col, src_col, blocked_col, domain, f in rows:
|
|
960
|
+
if blocked_w > 0:
|
|
961
|
+
line = (
|
|
962
|
+
f" {tag} {sub_col:>{sub_w}} {ent_col:<{ent_w}} "
|
|
963
|
+
f"{qry_col:>{qry_w}} {src_col:>{src_w}} "
|
|
964
|
+
f"{blocked_col:<{blocked_w}} {domain}"
|
|
965
|
+
)
|
|
966
|
+
else:
|
|
967
|
+
line = (
|
|
968
|
+
f" {tag} {sub_col:>{sub_w}} {ent_col:<{ent_w}} "
|
|
969
|
+
f"{qry_col:>{qry_w}} {src_col:>{src_w}} {domain}"
|
|
970
|
+
)
|
|
971
|
+
tail = _level_tail(f, indent, self._verbose_level)
|
|
972
|
+
if tail:
|
|
973
|
+
out.append(line + "\n" + "\n".join(tail))
|
|
974
|
+
else:
|
|
975
|
+
out.append(line)
|
|
976
|
+
|
|
977
|
+
return out
|
|
978
|
+
|
|
979
|
+
def _render_scan_group(self, sections: list[Section]) -> list[str]:
|
|
980
|
+
"""Render scan findings with aligned columns across all scan types. Flat
|
|
981
|
+
detector. Columns: severity | scan_type | ratio | src | type-specific
|
|
982
|
+
middle | metric. Widths derived from the section's findings before any
|
|
983
|
+
row is formatted."""
|
|
984
|
+
indent = " "
|
|
985
|
+
out: list[str] = []
|
|
986
|
+
findings = sections[0].findings
|
|
987
|
+
|
|
988
|
+
rows = []
|
|
989
|
+
for f in findings:
|
|
990
|
+
ev = f.evidence
|
|
991
|
+
tag = f"{str(f.severity):<4}"
|
|
992
|
+
type_col = ev.get("scan_type", "")
|
|
993
|
+
ratio_col = f"ratio={ev.get('scan_state_ratio', 0):.2f}"
|
|
994
|
+
src_col = ev.get("src", "")
|
|
995
|
+
scan_type = ev.get("scan_type", "")
|
|
996
|
+
|
|
997
|
+
if scan_type == "vertical":
|
|
998
|
+
middle_col = f"→ {ev.get('dst', '')}"
|
|
999
|
+
metric_col = f"{ev.get('distinct_ports', 0)} ports"
|
|
1000
|
+
elif scan_type == "horizontal":
|
|
1001
|
+
middle_col = f"→ *:{ev.get('port', '')}"
|
|
1002
|
+
metric_col = f"{ev.get('distinct_hosts', 0)} hosts"
|
|
1003
|
+
elif scan_type == "block":
|
|
1004
|
+
middle_col = "→ *"
|
|
1005
|
+
metric_col = f"{ev.get('distinct_ports', 0)}p × {ev.get('distinct_hosts', 0)}h"
|
|
1006
|
+
else: # slow
|
|
1007
|
+
middle_col = ""
|
|
1008
|
+
metric_col = f"{ev.get('distinct_ports', 0)} ports/{ev.get('active_buckets', 0)} win"
|
|
1009
|
+
|
|
1010
|
+
rows.append((tag, type_col, ratio_col, src_col, middle_col, metric_col, f))
|
|
1011
|
+
|
|
1012
|
+
type_w = max(len(r[1]) for r in rows)
|
|
1013
|
+
ratio_w = max(len(r[2]) for r in rows)
|
|
1014
|
+
src_w = max(len(r[3]) for r in rows)
|
|
1015
|
+
middle_w = max(len(r[4]) for r in rows)
|
|
1016
|
+
metric_w = max(len(r[5]) for r in rows)
|
|
1017
|
+
|
|
1018
|
+
for tag, type_col, ratio_col, src_col, middle_col, metric_col, f in rows:
|
|
1019
|
+
line = (
|
|
1020
|
+
f"{tag} {type_col:<{type_w}} {ratio_col:<{ratio_w}} "
|
|
1021
|
+
f"{src_col:<{src_w}} {middle_col:<{middle_w}} {metric_col:>{metric_w}}"
|
|
1022
|
+
)
|
|
1023
|
+
tail = _level_tail(f, indent, self._verbose_level)
|
|
1024
|
+
if tail:
|
|
1025
|
+
out.append(line + "\n" + "\n".join(tail))
|
|
1026
|
+
else:
|
|
1027
|
+
out.append(line)
|
|
1028
|
+
return out
|
|
1029
|
+
|
|
1030
|
+
def _render_syslog_group(self, sections: list[Section]) -> list[str]:
|
|
1031
|
+
"""Render syslog findings. Flat detector. Default MEDIUM output is the
|
|
1032
|
+
severity + raw event line; INFO reboot annotations are a compact single
|
|
1033
|
+
line. Verbose tails are shared via ``_level_tail`` — for MEDIUM the
|
|
1034
|
+
curated subset surfaces the template & count details (drain3 internals
|
|
1035
|
+
that stay behind -v / -vv)."""
|
|
1036
|
+
indent = " "
|
|
1037
|
+
out: list[str] = []
|
|
1038
|
+
findings = sections[0].findings
|
|
1039
|
+
|
|
1040
|
+
host_w = max(len(f.evidence.get("host", "")) for f in findings)
|
|
1041
|
+
|
|
1042
|
+
for f in findings:
|
|
1043
|
+
ev = f.evidence
|
|
1044
|
+
host = ev.get("host", "")
|
|
1045
|
+
tag = str(f.severity)
|
|
1046
|
+
|
|
1047
|
+
if f.severity == Severity.MEDIUM:
|
|
1048
|
+
line = f"{tag} {f.title}"
|
|
1049
|
+
else: # INFO — reboot annotation
|
|
1050
|
+
reboot_ts = ev.get("reboot_ts") or "unknown"
|
|
1051
|
+
window = ev.get("suppressed_window_seconds", 0)
|
|
1052
|
+
line = (
|
|
1053
|
+
f"{tag} {host:<{host_w}} reboot @ {reboot_ts}"
|
|
1054
|
+
f" suppressed {window}s window"
|
|
1055
|
+
)
|
|
1056
|
+
|
|
1057
|
+
tail = _level_tail(f, indent, self._verbose_level)
|
|
1058
|
+
if tail:
|
|
1059
|
+
out.append(line + "\n" + "\n".join(tail))
|
|
1060
|
+
else:
|
|
1061
|
+
out.append(line)
|
|
1062
|
+
|
|
1063
|
+
return out
|
|
1064
|
+
|
|
1065
|
+
def _render_duration_group(self, sections: list[Section]) -> list[str]:
|
|
1066
|
+
"""Render duration findings with aligned columns. Flat detector. Each
|
|
1067
|
+
sub-field of the flow is padded independently so → arrows align
|
|
1068
|
+
vertically. Columns: severity | src → dst:port/proto | max_dur_str |
|
|
1069
|
+
avg_bps | N_conns | states."""
|
|
1070
|
+
indent = " "
|
|
1071
|
+
out: list[str] = []
|
|
1072
|
+
findings = sections[0].findings
|
|
1073
|
+
|
|
1074
|
+
rows = []
|
|
1075
|
+
for f in findings:
|
|
1076
|
+
ev = f.evidence
|
|
1077
|
+
src = ev.get("src", "")
|
|
1078
|
+
port = ev.get("port")
|
|
1079
|
+
proto = ev.get("proto", "")
|
|
1080
|
+
port_str = str(port) if port is not None else "?"
|
|
1081
|
+
dst_str = f"{ev.get('dst', '')}:{port_str}/{proto}"
|
|
1082
|
+
dur_str = ev.get("max_duration_str", "")
|
|
1083
|
+
bps = ev.get("avg_bytes_per_second")
|
|
1084
|
+
if bps is None:
|
|
1085
|
+
bps_col = ""
|
|
1086
|
+
elif bps >= 1_000_000:
|
|
1087
|
+
bps_col = f"{bps / 1_000_000:.1f}mbps"
|
|
1088
|
+
elif bps >= 1_000:
|
|
1089
|
+
bps_col = f"{bps / 1_000:.1f}kbps"
|
|
1090
|
+
else:
|
|
1091
|
+
bps_col = f"{bps:.0f}bps"
|
|
1092
|
+
count = ev.get("connection_count", 1)
|
|
1093
|
+
conns_col = f"{count} conn" if count == 1 else f"{count} conns"
|
|
1094
|
+
states = ev.get("conn_states", [])
|
|
1095
|
+
state_col = ", ".join(states) if states else ""
|
|
1096
|
+
rows.append((str(f.severity), src, dst_str, dur_str, bps_col, conns_col, state_col, f))
|
|
1097
|
+
|
|
1098
|
+
src_w = max(len(r[1]) for r in rows)
|
|
1099
|
+
dst_w = max(len(r[2]) for r in rows)
|
|
1100
|
+
dur_w = max(len(r[3]) for r in rows)
|
|
1101
|
+
bps_w = max(len(r[4]) for r in rows)
|
|
1102
|
+
conns_w = max(len(r[5]) for r in rows)
|
|
1103
|
+
state_w = max(len(r[6]) for r in rows)
|
|
1104
|
+
|
|
1105
|
+
for tag, src, dst_str, dur_str, bps_col, conns_col, state_col, f in rows:
|
|
1106
|
+
line = (
|
|
1107
|
+
f"{tag} {src:<{src_w}} → {dst_str:<{dst_w}} "
|
|
1108
|
+
f"{dur_str:<{dur_w}} {bps_col:>{bps_w}} {conns_col:>{conns_w}} {state_col:>{state_w}}"
|
|
1109
|
+
).rstrip()
|
|
1110
|
+
tail = _level_tail(f, indent, self._verbose_level)
|
|
1111
|
+
if tail:
|
|
1112
|
+
out.append(line + "\n" + "\n".join(tail))
|
|
1113
|
+
else:
|
|
1114
|
+
out.append(line)
|
|
1115
|
+
return out
|
|
1116
|
+
|
|
1117
|
+
def _render_aws_group(self, sections: list[Section]) -> list[str]:
|
|
1118
|
+
"""Render AWS findings as subsections: burst sweeps, then ranked
|
|
1119
|
+
principals. The ranked section already bundles per-principal
|
|
1120
|
+
``ranked`` rows + the synthetic ``ranked_summary`` quiet line (the
|
|
1121
|
+
partitioner glued them together). Plain lowercase subsection labels
|
|
1122
|
+
(no ── rules). Each tier computes its own column widths."""
|
|
1123
|
+
indent = " "
|
|
1124
|
+
out: list[str] = []
|
|
1125
|
+
|
|
1126
|
+
for si, section in enumerate(sections):
|
|
1127
|
+
label_line = f"{section.label} ({section.pre_cap_count})"
|
|
1128
|
+
if si > 0:
|
|
1129
|
+
out.append("")
|
|
1130
|
+
out.append(label_line)
|
|
1131
|
+
|
|
1132
|
+
if section.label == "burst sweeps":
|
|
1133
|
+
rows = []
|
|
1134
|
+
for f in section.findings:
|
|
1135
|
+
ev = f.evidence
|
|
1136
|
+
tag = f"{str(f.severity):<4}"
|
|
1137
|
+
principal = str(ev.get("principal", ""))
|
|
1138
|
+
actions_col = f"{int(ev.get('new_action_count', 0))} new"
|
|
1139
|
+
svcs_col = f"{int(ev.get('new_service_count', 0))} svc"
|
|
1140
|
+
span_col = _aws_span_str(float(ev.get("span_seconds", 0.0)))
|
|
1141
|
+
err_col = f"err={float(ev.get('error_rate', 0.0)):.0%}"
|
|
1142
|
+
rows.append((tag, principal, actions_col, svcs_col, span_col, err_col, f))
|
|
1143
|
+
|
|
1144
|
+
principal_w = max(len(r[1]) for r in rows)
|
|
1145
|
+
actions_w = max(len(r[2]) for r in rows)
|
|
1146
|
+
svcs_w = max(len(r[3]) for r in rows)
|
|
1147
|
+
span_w = max(len(r[4]) for r in rows)
|
|
1148
|
+
err_w = max(len(r[5]) for r in rows)
|
|
1149
|
+
|
|
1150
|
+
for tag, principal, actions_col, svcs_col, span_col, err_col, f in rows:
|
|
1151
|
+
line = (
|
|
1152
|
+
f" {tag} {principal:<{principal_w}} "
|
|
1153
|
+
f"{actions_col:>{actions_w}} {svcs_col:>{svcs_w}} "
|
|
1154
|
+
f"{span_col:>{span_w}} {err_col:>{err_w}}"
|
|
1155
|
+
)
|
|
1156
|
+
tail = _level_tail(f, indent, self._verbose_level)
|
|
1157
|
+
if tail:
|
|
1158
|
+
out.append(line + "\n" + "\n".join(tail))
|
|
1159
|
+
else:
|
|
1160
|
+
out.append(line)
|
|
1161
|
+
else: # "ranked principals"
|
|
1162
|
+
ranked = [f for f in section.findings if f.evidence.get("tier") == "ranked"]
|
|
1163
|
+
summary = [f for f in section.findings if f.evidence.get("tier") == "ranked_summary"]
|
|
1164
|
+
|
|
1165
|
+
if ranked:
|
|
1166
|
+
rows = []
|
|
1167
|
+
for f in ranked:
|
|
1168
|
+
ev = f.evidence
|
|
1169
|
+
tag = f"{str(f.severity):<4}"
|
|
1170
|
+
principal = str(ev.get("principal", ""))
|
|
1171
|
+
z_col = f"z={float(ev.get('composite_z', 0.0)):.2f}"
|
|
1172
|
+
err_col = f"err={float(ev.get('error_rate', 0.0)):.0%}"
|
|
1173
|
+
ev_col = f"{int(ev.get('event_count', 0))} ev"
|
|
1174
|
+
ip_col = f"{int(ev.get('distinct_source_ip', 0))} ip"
|
|
1175
|
+
rows.append((tag, principal, z_col, err_col, ev_col, ip_col, f))
|
|
1176
|
+
|
|
1177
|
+
principal_w = max(len(r[1]) for r in rows)
|
|
1178
|
+
z_w = max(len(r[2]) for r in rows)
|
|
1179
|
+
err_w = max(len(r[3]) for r in rows)
|
|
1180
|
+
ev_w = max(len(r[4]) for r in rows)
|
|
1181
|
+
ip_w = max(len(r[5]) for r in rows)
|
|
1182
|
+
|
|
1183
|
+
for tag, principal, z_col, err_col, ev_col, ip_col, f in rows:
|
|
1184
|
+
line = (
|
|
1185
|
+
f" {tag} {principal:<{principal_w}} "
|
|
1186
|
+
f"{z_col:>{z_w}} {err_col:>{err_w}} "
|
|
1187
|
+
f"{ev_col:>{ev_w}} {ip_col:>{ip_w}}"
|
|
1188
|
+
)
|
|
1189
|
+
tail = _level_tail(f, indent, self._verbose_level)
|
|
1190
|
+
if tail:
|
|
1191
|
+
out.append(line + "\n" + "\n".join(tail))
|
|
1192
|
+
else:
|
|
1193
|
+
out.append(line)
|
|
1194
|
+
|
|
1195
|
+
for f in summary:
|
|
1196
|
+
ev = f.evidence
|
|
1197
|
+
tag = f"{str(f.severity):<4}"
|
|
1198
|
+
line = (
|
|
1199
|
+
f" {tag} {f.title} "
|
|
1200
|
+
f"({int(ev.get('scorable_count', 0))} scored; "
|
|
1201
|
+
f"top {ev.get('top_principal', '')} "
|
|
1202
|
+
f"z={float(ev.get('top_composite_z', 0.0)):.2f})"
|
|
1203
|
+
)
|
|
1204
|
+
tail = _level_tail(f, indent, self._verbose_level)
|
|
1205
|
+
if tail:
|
|
1206
|
+
out.append(line + "\n" + "\n".join(tail))
|
|
1207
|
+
else:
|
|
1208
|
+
out.append(line)
|
|
1209
|
+
|
|
1210
|
+
return out
|
|
1211
|
+
|
|
1212
|
+
def _render_finding(self, finding: Finding) -> str:
|
|
1213
|
+
tag = str(finding.severity)
|
|
1214
|
+
line = f"{tag} {finding.title}"
|
|
1215
|
+
|
|
1216
|
+
indent = " "
|
|
1217
|
+
tail = _level_tail(finding, indent, self._verbose_level)
|
|
1218
|
+
if not tail:
|
|
1219
|
+
return line
|
|
1220
|
+
return line + "\n" + "\n".join(tail)
|
|
1221
|
+
|
|
1222
|
+
def render_digest(self, card: DigestCard) -> None:
|
|
1223
|
+
"""Render a digest schema card — flat, flush-left, no banner.
|
|
1224
|
+
|
|
1225
|
+
Order: 3-line identity block · ambient block · histogram · insights
|
|
1226
|
+
· fields. Each block separated by one blank line. No header rule,
|
|
1227
|
+
no N.B. footer, no trailing rule. The inter-card separator on a
|
|
1228
|
+
multi-card run is emitted by the caller (run_digest) immediately
|
|
1229
|
+
before invoking this method.
|
|
1230
|
+
|
|
1231
|
+
Called directly by ``runner.run_digest`` — bypassing the Finding-
|
|
1232
|
+
shaped Reporter.begin/write/end lifecycle, because a digest run
|
|
1233
|
+
produces ONE card. The Finding render path is intentionally
|
|
1234
|
+
untouched.
|
|
1235
|
+
"""
|
|
1236
|
+
# ── Identity block ────────────────────────────────────────────────
|
|
1237
|
+
print(card.source_name, file=self._stream)
|
|
1238
|
+
if card.data_window[0] and card.data_window[1]:
|
|
1239
|
+
print(_fmt_window(card.data_window), file=self._stream)
|
|
1240
|
+
else:
|
|
1241
|
+
# Timeline unavailable: line 2 dashes; the histogram line
|
|
1242
|
+
# below carries the descriptive "(timeline unavailable)".
|
|
1243
|
+
print("—", file=self._stream)
|
|
1244
|
+
print(
|
|
1245
|
+
f"{card.schema} · {card.record_count:,} lines · "
|
|
1246
|
+
f"{human_bytes(card.data_size_bytes)}",
|
|
1247
|
+
file=self._stream,
|
|
1248
|
+
)
|
|
1249
|
+
|
|
1250
|
+
# ── Ambient (former Zone 1) block ─────────────────────────────────
|
|
1251
|
+
ambient = _render_label_value_block(card.zone1_extras)
|
|
1252
|
+
if ambient:
|
|
1253
|
+
print(file=self._stream)
|
|
1254
|
+
for line in ambient:
|
|
1255
|
+
print(line, file=self._stream)
|
|
1256
|
+
|
|
1257
|
+
# ── Histogram ─────────────────────────────────────────────────────
|
|
1258
|
+
print(file=self._stream)
|
|
1259
|
+
print(
|
|
1260
|
+
_render_histogram(
|
|
1261
|
+
card.histogram_counts,
|
|
1262
|
+
card.histogram_unit,
|
|
1263
|
+
card.histogram_peak,
|
|
1264
|
+
unavailable=card.timeline_unavailable,
|
|
1265
|
+
),
|
|
1266
|
+
file=self._stream,
|
|
1267
|
+
)
|
|
1268
|
+
|
|
1269
|
+
# ── Insights ──────────────────────────────────────────────────────
|
|
1270
|
+
if card.insights:
|
|
1271
|
+
print(file=self._stream)
|
|
1272
|
+
for insight in card.insights:
|
|
1273
|
+
print(insight, file=self._stream)
|
|
1274
|
+
|
|
1275
|
+
# ── Fields (former Zone 3) block ──────────────────────────────────
|
|
1276
|
+
field_rows = [
|
|
1277
|
+
(slot.label, " ".join(slot.cells))
|
|
1278
|
+
for slot in card.fields
|
|
1279
|
+
if slot.cells is not None
|
|
1280
|
+
]
|
|
1281
|
+
field_lines = _render_label_value_block(field_rows)
|
|
1282
|
+
if field_lines:
|
|
1283
|
+
print(file=self._stream)
|
|
1284
|
+
for line in field_lines:
|
|
1285
|
+
print(line, file=self._stream)
|
|
1286
|
+
|
|
1287
|
+
|
|
1288
|
+
def render_blob(self, card: BlobCard) -> None:
|
|
1289
|
+
"""Render a blob digest card — flat, flush-left, no banner.
|
|
1290
|
+
|
|
1291
|
+
Two-line identity block (blob has no window), labeled best-guess
|
|
1292
|
+
headline, vanish-don't-dash slot list rendered through the shared
|
|
1293
|
+
flat label/value helper. No footer, no inner separator, no
|
|
1294
|
+
trailing rule. The inter-card separator on a multi-card run is
|
|
1295
|
+
emitted by the caller (_render_blob_for_path) immediately before
|
|
1296
|
+
invoking this method.
|
|
1297
|
+
"""
|
|
1298
|
+
# ── Identity block (two lines — blob has no window) ───────────────
|
|
1299
|
+
print(card.source_name, file=self._stream)
|
|
1300
|
+
# Provenance line — blob's own; not the schema cards' rows/size line.
|
|
1301
|
+
# Terminal-binary FIRST: a positive-magic ID has no line concept.
|
|
1302
|
+
# For today's gzip-container path, file_type_guess is None —
|
|
1303
|
+
# containers profile the content under decompression — so this
|
|
1304
|
+
# ordering does not steal the compressed branch. The "binary,
|
|
1305
|
+
# sampled from head" phrasing is card grammar, not a literal I/O
|
|
1306
|
+
# trace: a large plain binary may have done seek reads before the
|
|
1307
|
+
# terminal verdict held; the user-facing fact is "we ID'd it from
|
|
1308
|
+
# the head and stopped looking for log content."
|
|
1309
|
+
if card.file_type_guess is not None:
|
|
1310
|
+
provenance = (
|
|
1311
|
+
f"{human_bytes(card.byte_size)} · binary, sampled from head"
|
|
1312
|
+
)
|
|
1313
|
+
elif card.is_compressed:
|
|
1314
|
+
provenance = (
|
|
1315
|
+
f"{human_bytes(card.byte_size)} compressed · sampled from head"
|
|
1316
|
+
)
|
|
1317
|
+
else:
|
|
1318
|
+
provenance = (
|
|
1319
|
+
f"{human_bytes(card.byte_size)} · "
|
|
1320
|
+
f"sampled {card.sampled_line_count:,} lines across "
|
|
1321
|
+
f"{card.sample_read_count} reads"
|
|
1322
|
+
)
|
|
1323
|
+
print(provenance, file=self._stream)
|
|
1324
|
+
print(file=self._stream)
|
|
1325
|
+
|
|
1326
|
+
# ── Headline — labeled best-guess ─────────────────────────────────
|
|
1327
|
+
if card.file_type_guess is not None:
|
|
1328
|
+
headline = f"This looks like a {card.file_type_guess}, not a log."
|
|
1329
|
+
else:
|
|
1330
|
+
headline = (
|
|
1331
|
+
f"Unrecognized source — but this looks like {card.shape_guess}."
|
|
1332
|
+
)
|
|
1333
|
+
print(headline, file=self._stream)
|
|
1334
|
+
print(file=self._stream)
|
|
1335
|
+
|
|
1336
|
+
# ── Slot list (vanish-don't-dash) ─────────────────────────────────
|
|
1337
|
+
slot_rows: list[tuple[str, str]] = []
|
|
1338
|
+
|
|
1339
|
+
# bytes: always present.
|
|
1340
|
+
if card.file_type_guess is not None:
|
|
1341
|
+
magic_repr = (
|
|
1342
|
+
repr(card.file_type_magic)[2:-1] # strip b'...' wrapper
|
|
1343
|
+
if card.file_type_magic is not None else "?"
|
|
1344
|
+
)
|
|
1345
|
+
slot_rows.append((
|
|
1346
|
+
"bytes",
|
|
1347
|
+
f"binary ({card.printable_pct:.1f}% printable), "
|
|
1348
|
+
f"magic {magic_repr}",
|
|
1349
|
+
))
|
|
1350
|
+
else:
|
|
1351
|
+
tail = ", UTF-8 clean" if card.utf8_clean else ""
|
|
1352
|
+
slot_rows.append((
|
|
1353
|
+
"bytes",
|
|
1354
|
+
f"text ({card.printable_pct:.1f}% printable){tail}",
|
|
1355
|
+
))
|
|
1356
|
+
|
|
1357
|
+
# shape: text only.
|
|
1358
|
+
if card.shape_guess is not None:
|
|
1359
|
+
slot_rows.append(("shape", card.shape_guess))
|
|
1360
|
+
|
|
1361
|
+
# lines: text only; absent on binary terminal.
|
|
1362
|
+
if card.mean_line_length is not None:
|
|
1363
|
+
shape_tail = (
|
|
1364
|
+
f", {card.line_length_shape}"
|
|
1365
|
+
if card.line_length_shape else ""
|
|
1366
|
+
)
|
|
1367
|
+
slot_rows.append((
|
|
1368
|
+
"lines",
|
|
1369
|
+
f"mean {card.mean_line_length:.0f} chars, "
|
|
1370
|
+
f"p95 {card.line_length_p95}, "
|
|
1371
|
+
f"max {card.max_line_length}{shape_tail}",
|
|
1372
|
+
))
|
|
1373
|
+
|
|
1374
|
+
# fields: / tokens: — one or the other, never both. The summariser
|
|
1375
|
+
# sets json_field_names on a JSON shape-guess (names-no-values),
|
|
1376
|
+
# which the renderer prefers; otherwise the existing top-tokens
|
|
1377
|
+
# row carries the literal-token spray. Vanish if neither populates.
|
|
1378
|
+
wrap_label: str | None = None # which label gets the two-line clamp
|
|
1379
|
+
wrap_sep: str = ", "
|
|
1380
|
+
if card.json_field_names:
|
|
1381
|
+
slot_rows.append(("fields", ", ".join(card.json_field_names)))
|
|
1382
|
+
wrap_label = "fields"
|
|
1383
|
+
wrap_sep = ", "
|
|
1384
|
+
elif card.top_tokens:
|
|
1385
|
+
tokens_str = " ".join(f'"{tok}"' for tok, _ in card.top_tokens[:5])
|
|
1386
|
+
slot_rows.append(("tokens", f"{tokens_str} [literal]"))
|
|
1387
|
+
wrap_label = "tokens"
|
|
1388
|
+
wrap_sep = " "
|
|
1389
|
+
|
|
1390
|
+
# templates: text only; vanish on freeform floor / drain3 dormant.
|
|
1391
|
+
if card.distinct_templates is not None:
|
|
1392
|
+
slot_rows.append((
|
|
1393
|
+
"templates",
|
|
1394
|
+
f"~{card.distinct_templates} distinct structures over "
|
|
1395
|
+
f"{card.sampled_line_count:,} sampled lines",
|
|
1396
|
+
))
|
|
1397
|
+
|
|
1398
|
+
# Render: single-line slots through the shared label/value shape
|
|
1399
|
+
# (matching _render_label_value_block's sizing exactly so the two
|
|
1400
|
+
# cannot drift); the wrap-label row through the blob-local
|
|
1401
|
+
# _wrap_blob_slot_value clamp.
|
|
1402
|
+
label_w = max(len(lbl) for lbl, _ in slot_rows)
|
|
1403
|
+
label_col = label_w + 2
|
|
1404
|
+
for lbl, val in slot_rows:
|
|
1405
|
+
if lbl == wrap_label:
|
|
1406
|
+
wrapped = _wrap_blob_slot_value(
|
|
1407
|
+
val, label_col=label_col, sep=wrap_sep,
|
|
1408
|
+
)
|
|
1409
|
+
print(
|
|
1410
|
+
f"{(lbl + ':').ljust(label_col)}{wrapped[0]}",
|
|
1411
|
+
file=self._stream,
|
|
1412
|
+
)
|
|
1413
|
+
for cont in wrapped[1:]:
|
|
1414
|
+
print(cont, file=self._stream)
|
|
1415
|
+
else:
|
|
1416
|
+
print(
|
|
1417
|
+
f"{(lbl + ':').ljust(label_col)}{val}",
|
|
1418
|
+
file=self._stream,
|
|
1419
|
+
)
|
|
1420
|
+
|
|
1421
|
+
|
|
1422
|
+
register_handler("text", TextHandler)
|