loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
|
@@ -0,0 +1,724 @@
|
|
|
1
|
+
"""Tests for the syslog digest card (three fixed slots, no fidelity branching).
|
|
2
|
+
|
|
3
|
+
Covers:
|
|
4
|
+
- cliff statistic over host-volume and program-volume (gate, floor, names rank-1)
|
|
5
|
+
- rate statistic over error-rate: kind definition (text matching, not severity),
|
|
6
|
+
word-boundary semantics (errors matches, terror does not, oom-killer matches,
|
|
7
|
+
out of memory matches as a phrase), message-not-raw scope, RATE_FLOOR floor,
|
|
8
|
+
top-host attribution, no badness language
|
|
9
|
+
- three-slot card has no absent slots — rendered card has no N.B. footer
|
|
10
|
+
- ledes derive from speaking gating slots and stay brief
|
|
11
|
+
- summariser shape: entity_label, slots in fixed order
|
|
12
|
+
- CLI dispatch: sniff-driven schema routing to syslog_dir, flag/config precedence,
|
|
13
|
+
cross-schema rejections at CLI and runner layers
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import io
|
|
19
|
+
from datetime import datetime, timedelta, timezone
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
import pandas as pd
|
|
24
|
+
import pytest
|
|
25
|
+
|
|
26
|
+
import loghunter.cli as cli
|
|
27
|
+
import loghunter.runner as runner
|
|
28
|
+
from loghunter.common.finding import DigestCard, RunSummary
|
|
29
|
+
from loghunter.digest import syslog as syslog_digest
|
|
30
|
+
from loghunter.outputs.text import TextHandler
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ─── Fixtures ────────────────────────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
_NOW = datetime(2026, 6, 11, 12, 0, tzinfo=timezone.utc)
|
|
36
|
+
_BASE_TS = _NOW.timestamp()
|
|
37
|
+
|
|
38
|
+
_SYSLOG_COLUMNS = ["ts", "host", "program", "raw", "message"]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _syslog_row(
|
|
42
|
+
host: str = "host1",
|
|
43
|
+
program: str = "sshd",
|
|
44
|
+
message: str = "Accepted publickey for user from 192.0.2.10",
|
|
45
|
+
ts: float = _BASE_TS,
|
|
46
|
+
raw: str | None = None,
|
|
47
|
+
) -> dict:
|
|
48
|
+
"""Build one canonical syslog row.
|
|
49
|
+
|
|
50
|
+
`raw` defaults to a synthetic RFC 3164 header + the message body, so a
|
|
51
|
+
test using only the helper has a realistic raw line. Tests that exercise
|
|
52
|
+
the message-vs-raw scope override `raw` and `message` independently.
|
|
53
|
+
"""
|
|
54
|
+
if raw is None:
|
|
55
|
+
raw = f"<14>Jun 11 12:00:00 {host} {program}: {message}"
|
|
56
|
+
return {
|
|
57
|
+
"ts": ts,
|
|
58
|
+
"host": host,
|
|
59
|
+
"program": program,
|
|
60
|
+
"raw": raw,
|
|
61
|
+
"message": f"{program}: {message}",
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _syslog_df(rows: list[dict]) -> pd.DataFrame:
|
|
66
|
+
if not rows:
|
|
67
|
+
return pd.DataFrame(columns=_SYSLOG_COLUMNS)
|
|
68
|
+
return pd.DataFrame(rows, columns=_SYSLOG_COLUMNS)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _run_summary(
|
|
72
|
+
window: tuple[datetime, datetime] = (_NOW - timedelta(days=1), _NOW),
|
|
73
|
+
) -> RunSummary:
|
|
74
|
+
return RunSummary(
|
|
75
|
+
data_window=window,
|
|
76
|
+
record_counts={"*.log*": 100},
|
|
77
|
+
data_size_bytes=0,
|
|
78
|
+
detectors_run=[],
|
|
79
|
+
detectors_skipped={},
|
|
80
|
+
notes=[],
|
|
81
|
+
data_sources=["syslog_raw"],
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _card_from_body(body: dict) -> DigestCard:
|
|
86
|
+
return DigestCard(
|
|
87
|
+
schema="syslog",
|
|
88
|
+
source_name="syslog.log",
|
|
89
|
+
data_window=(_NOW - timedelta(days=1), _NOW),
|
|
90
|
+
record_count=100,
|
|
91
|
+
histogram_counts=[1, 2, 3, 5, 8, 5, 3, 2, 1],
|
|
92
|
+
histogram_unit="hr",
|
|
93
|
+
histogram_peak=8,
|
|
94
|
+
zone1_extras=body["zone1_extras"],
|
|
95
|
+
insights=body["insights"],
|
|
96
|
+
fields=body["fields"],
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _render(card: DigestCard) -> str:
|
|
101
|
+
handler = TextHandler(stream=io.StringIO())
|
|
102
|
+
handler.render_digest(card)
|
|
103
|
+
return handler._stream.getvalue()
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# ─── host-volume cliff ───────────────────────────────────────────────────────
|
|
107
|
+
|
|
108
|
+
def test_host_volume_dashes_below_population_floor() -> None:
|
|
109
|
+
# 3 distinct hosts, dominant one — population below POPULATION_FLOOR=5
|
|
110
|
+
rows = [_syslog_row(host="a") for _ in range(20)]
|
|
111
|
+
rows.append(_syslog_row(host="b"))
|
|
112
|
+
rows.append(_syslog_row(host="c"))
|
|
113
|
+
df = _syslog_df(rows)
|
|
114
|
+
slot = syslog_digest._slot_host_volume(df)
|
|
115
|
+
assert slot.cells is None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def test_host_volume_dashes_below_gate() -> None:
|
|
119
|
+
# 6 hosts with rank1/rank2 = 1.5 < CLIFF_GATE=2.0
|
|
120
|
+
counts = [15, 10, 9, 8, 7, 6]
|
|
121
|
+
rows = []
|
|
122
|
+
for i, n in enumerate(counts):
|
|
123
|
+
rows.extend([_syslog_row(host=f"host{i}") for _ in range(n)])
|
|
124
|
+
df = _syslog_df(rows)
|
|
125
|
+
slot = syslog_digest._slot_host_volume(df)
|
|
126
|
+
assert slot.cells is None
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_host_volume_names_rank1_when_speaking() -> None:
|
|
130
|
+
# 6 hosts; rank1 emits 40 lines, others ~10 each → ratio 4
|
|
131
|
+
rows = [_syslog_row(host="loud") for _ in range(40)]
|
|
132
|
+
for i in range(5):
|
|
133
|
+
rows.extend([_syslog_row(host=f"quiet{i}") for _ in range(10)])
|
|
134
|
+
df = _syslog_df(rows)
|
|
135
|
+
slot = syslog_digest._slot_host_volume(df)
|
|
136
|
+
assert slot.cells is not None
|
|
137
|
+
assert slot.entity == "loud"
|
|
138
|
+
assert slot.cells[0] == "loud"
|
|
139
|
+
assert slot.cells[1].endswith("%")
|
|
140
|
+
assert slot.cells[2].endswith("x")
|
|
141
|
+
assert slot.ratio is not None and slot.ratio >= 2.0
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def test_host_volume_capped_display_above_threshold() -> None:
|
|
145
|
+
"""Cliff ratio >= CLIFF_DISPLAY_CAP renders the capped form in cells.
|
|
146
|
+
|
|
147
|
+
Locks the conn-imported display cap on the new card so the shared cell
|
|
148
|
+
formatter is exercised through host-volume too.
|
|
149
|
+
"""
|
|
150
|
+
# 100 lines from "loud", 1 each from 5 quiet hosts → ratio 100 → capped
|
|
151
|
+
rows = [_syslog_row(host="loud") for _ in range(100)]
|
|
152
|
+
for i in range(5):
|
|
153
|
+
rows.append(_syslog_row(host=f"quiet{i}"))
|
|
154
|
+
df = _syslog_df(rows)
|
|
155
|
+
slot = syslog_digest._slot_host_volume(df)
|
|
156
|
+
assert slot.cells is not None
|
|
157
|
+
assert slot.cells[2] == ">50x"
|
|
158
|
+
# Raw ratio preserved for lede sort
|
|
159
|
+
assert slot.ratio == pytest.approx(100.0)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# ─── program-volume cliff ────────────────────────────────────────────────────
|
|
163
|
+
|
|
164
|
+
def test_program_volume_dashes_below_population_floor() -> None:
|
|
165
|
+
rows = [_syslog_row(program="audisp") for _ in range(20)]
|
|
166
|
+
rows.append(_syslog_row(program="sshd"))
|
|
167
|
+
rows.append(_syslog_row(program="kernel"))
|
|
168
|
+
df = _syslog_df(rows)
|
|
169
|
+
slot = syslog_digest._slot_program_volume(df)
|
|
170
|
+
assert slot.cells is None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def test_program_volume_names_rank1_when_speaking() -> None:
|
|
174
|
+
"""Audit-logging-dominated pile: audisp dominates 20:1 over sshd.
|
|
175
|
+
|
|
176
|
+
This is the realistic motif the brief calls out — a pile in which
|
|
177
|
+
`audisp` runs the table is what the program-volume slot exists to flag.
|
|
178
|
+
"""
|
|
179
|
+
rows = [_syslog_row(program="audisp", message="op=USYS_CONFIG res=success")
|
|
180
|
+
for _ in range(60)]
|
|
181
|
+
for prog in ("sshd", "kernel", "postfix/smtpd", "systemd", "cron"):
|
|
182
|
+
rows.extend([_syslog_row(program=prog, message="routine line") for _ in range(3)])
|
|
183
|
+
df = _syslog_df(rows)
|
|
184
|
+
slot = syslog_digest._slot_program_volume(df)
|
|
185
|
+
assert slot.cells is not None
|
|
186
|
+
assert slot.entity == "audisp"
|
|
187
|
+
assert slot.cells[0] == "audisp"
|
|
188
|
+
# Magnitude is a raw count for program-volume (mirrors dns domain-volume)
|
|
189
|
+
assert slot.cells[1] == "60"
|
|
190
|
+
assert slot.cells[2].endswith("x")
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# ─── error-rate kind ─────────────────────────────────────────────────────────
|
|
194
|
+
|
|
195
|
+
def test_error_rate_fires_on_lines_with_error_token() -> None:
|
|
196
|
+
rows = [_syslog_row(message="connect failed: connection refused") for _ in range(10)]
|
|
197
|
+
df = _syslog_df(rows)
|
|
198
|
+
slot = syslog_digest._slot_error_rate(df, "syslog")
|
|
199
|
+
assert slot.cells is not None
|
|
200
|
+
assert slot.cells[0] == "100%"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def test_error_rate_does_not_fire_on_clean_lines() -> None:
|
|
204
|
+
rows = [_syslog_row(message="accepted publickey for user") for _ in range(10)]
|
|
205
|
+
df = _syslog_df(rows)
|
|
206
|
+
slot = syslog_digest._slot_error_rate(df, "syslog")
|
|
207
|
+
assert slot.cells is None
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def test_error_rate_matches_plural_form_at_word_start() -> None:
|
|
211
|
+
"""'errors' matches 'error' — start-bounded, free suffix."""
|
|
212
|
+
rows = [_syslog_row(message="five errors occurred during sync") for _ in range(10)]
|
|
213
|
+
df = _syslog_df(rows)
|
|
214
|
+
slot = syslog_digest._slot_error_rate(df, "syslog")
|
|
215
|
+
assert slot.cells is not None
|
|
216
|
+
assert slot.cells[0] == "100%"
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def test_error_rate_does_not_match_substring_in_middle_of_word() -> None:
|
|
220
|
+
"""'terror' must NOT trip 'error' — no boundary before 'error'."""
|
|
221
|
+
rows = [_syslog_row(message="cosmic terror of the void") for _ in range(10)]
|
|
222
|
+
df = _syslog_df(rows)
|
|
223
|
+
slot = syslog_digest._slot_error_rate(df, "syslog")
|
|
224
|
+
assert slot.cells is None
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def test_error_rate_matches_multiword_phrase() -> None:
|
|
228
|
+
"""'out of memory' matches as a literal phrase."""
|
|
229
|
+
rows = [_syslog_row(message="reaping process: out of memory") for _ in range(10)]
|
|
230
|
+
df = _syslog_df(rows)
|
|
231
|
+
slot = syslog_digest._slot_error_rate(df, "syslog")
|
|
232
|
+
assert slot.cells is not None
|
|
233
|
+
assert slot.cells[0] == "100%"
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def test_error_rate_matches_oom_in_hyphenated_token() -> None:
|
|
237
|
+
"""'oom-killer' matches 'oom' — hyphen is non-word, trailing context allowed."""
|
|
238
|
+
rows = [_syslog_row(program="kernel",
|
|
239
|
+
message="kernel: oom-killer invoked for pid 4242")
|
|
240
|
+
for _ in range(10)]
|
|
241
|
+
df = _syslog_df(rows)
|
|
242
|
+
slot = syslog_digest._slot_error_rate(df, "syslog")
|
|
243
|
+
assert slot.cells is not None
|
|
244
|
+
assert slot.cells[0] == "100%"
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def test_error_rate_matches_message_only_never_raw() -> None:
|
|
248
|
+
"""A row whose raw line contains 'error' but whose message does not
|
|
249
|
+
must NOT be flagged. Locks the message-only scope.
|
|
250
|
+
"""
|
|
251
|
+
rows = []
|
|
252
|
+
for _ in range(10):
|
|
253
|
+
rows.append({
|
|
254
|
+
"ts": _BASE_TS,
|
|
255
|
+
"host": "host1",
|
|
256
|
+
"program": "sshd",
|
|
257
|
+
"raw": "<14>Jun 11 12:00:00 host-error-prefix sshd: clean line",
|
|
258
|
+
"message": "sshd: clean line",
|
|
259
|
+
})
|
|
260
|
+
df = _syslog_df(rows)
|
|
261
|
+
slot = syslog_digest._slot_error_rate(df, "syslog")
|
|
262
|
+
assert slot.cells is None
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def test_error_rate_dashes_below_rate_floor() -> None:
|
|
266
|
+
"""0.5% error-token lines < RATE_FLOOR=0.01 → dashes."""
|
|
267
|
+
# 199 clean rows, 1 error row → 1/200 = 0.5% < 1%
|
|
268
|
+
rows = [_syslog_row(message="routine activity") for _ in range(199)]
|
|
269
|
+
rows.append(_syslog_row(message="connection refused"))
|
|
270
|
+
df = _syslog_df(rows)
|
|
271
|
+
slot = syslog_digest._slot_error_rate(df, "syslog")
|
|
272
|
+
assert slot.cells is None
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def test_error_rate_speaks_at_one_percent_pile() -> None:
|
|
276
|
+
"""1% error-token lines exactly meets RATE_FLOOR → speaks."""
|
|
277
|
+
rows = [_syslog_row(message="routine activity") for _ in range(99)]
|
|
278
|
+
rows.append(_syslog_row(host="noisy", message="operation failed"))
|
|
279
|
+
df = _syslog_df(rows)
|
|
280
|
+
slot = syslog_digest._slot_error_rate(df, "syslog")
|
|
281
|
+
assert slot.cells is not None
|
|
282
|
+
assert slot.cells[0] == "1%"
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def test_error_rate_attributes_top_host() -> None:
|
|
286
|
+
"""Top contributor = host emitting the most error-token lines."""
|
|
287
|
+
rows = [_syslog_row(host="quiet", message="routine activity") for _ in range(80)]
|
|
288
|
+
rows.extend([_syslog_row(host="noisy", message="connection refused")
|
|
289
|
+
for _ in range(5)])
|
|
290
|
+
rows.extend([_syslog_row(host="other1", message="operation failed")
|
|
291
|
+
for _ in range(2)])
|
|
292
|
+
rows.extend([_syslog_row(host="other2", message="login denied")
|
|
293
|
+
for _ in range(2)])
|
|
294
|
+
df = _syslog_df(rows)
|
|
295
|
+
slot = syslog_digest._slot_error_rate(df, "syslog")
|
|
296
|
+
assert slot.cells is not None
|
|
297
|
+
assert slot.entity == "noisy"
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def test_error_rate_lede_carries_no_badness_adjective() -> None:
|
|
301
|
+
"""The lede reports the fraction as a plain fact, no judgment."""
|
|
302
|
+
rows = [_syslog_row(host="host1", message="routine activity")
|
|
303
|
+
for _ in range(95)]
|
|
304
|
+
rows.extend([_syslog_row(host="noisy", message="connection refused")
|
|
305
|
+
for _ in range(5)])
|
|
306
|
+
df = _syslog_df(rows)
|
|
307
|
+
body = syslog_digest.summarize(df, "syslog")
|
|
308
|
+
matching = [l for l in body["insights"] if "error token" in l]
|
|
309
|
+
assert matching, f"expected an error-rate lede; got: {body['insights']}"
|
|
310
|
+
lede_lower = matching[0].lower()
|
|
311
|
+
for forbidden in ("suspicious", "concerning", "dangerous", "malicious",
|
|
312
|
+
"attack", "alarming", "bad"):
|
|
313
|
+
assert forbidden not in lede_lower, (
|
|
314
|
+
f"error-rate lede must not contain {forbidden!r}; got: {matching[0]}"
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
# ─── Three-slot card shape ──────────────────────────────────────────────────
|
|
319
|
+
|
|
320
|
+
def test_summarize_slot_computers_exist_in_fixed_order() -> None:
|
|
321
|
+
"""Three private slot computers, fixed order. The summariser body no
|
|
322
|
+
longer exposes the full slot list — `fields` is the post-selection
|
|
323
|
+
display set — so inspect the computers themselves."""
|
|
324
|
+
for label in ("host-volume", "program-volume", "error-rate"):
|
|
325
|
+
attr = "_slot_" + label.replace("-", "_")
|
|
326
|
+
assert hasattr(syslog_digest, attr), f"missing computer: {attr}"
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def test_summarize_returns_zone1_insights_fields_keys() -> None:
|
|
330
|
+
df = _syslog_df([_syslog_row()])
|
|
331
|
+
body = syslog_digest.summarize(df, "syslog")
|
|
332
|
+
assert set(body.keys()) == {"zone1_extras", "insights", "fields"}
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def test_zone1_extras_carry_distinct_host_and_program_counts() -> None:
|
|
336
|
+
rows = []
|
|
337
|
+
for h in ("h1", "h2", "h3"):
|
|
338
|
+
for p in ("sshd", "kernel"):
|
|
339
|
+
rows.append(_syslog_row(host=h, program=p))
|
|
340
|
+
df = _syslog_df(rows)
|
|
341
|
+
body = syslog_digest.summarize(df, "syslog")
|
|
342
|
+
extras = dict(body["zone1_extras"])
|
|
343
|
+
assert extras["hosts"] == "3"
|
|
344
|
+
assert extras["programs"] == "2"
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
# ─── Ledes ───────────────────────────────────────────────────────────────────
|
|
348
|
+
|
|
349
|
+
def test_ledes_silent_on_flat_pile() -> None:
|
|
350
|
+
"""No gating slot speaks → ledes is empty."""
|
|
351
|
+
rows = [_syslog_row(host=f"host{i}", program=("sshd", "cron")[i % 2],
|
|
352
|
+
message="routine line") for i in range(5)]
|
|
353
|
+
df = _syslog_df(rows)
|
|
354
|
+
body = syslog_digest.summarize(df, "syslog")
|
|
355
|
+
assert body["insights"] == []
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def test_ledes_verbalize_identity_and_magnitude() -> None:
|
|
359
|
+
"""A speaking host-volume cliff produces a brief lede mentioning the host."""
|
|
360
|
+
rows = [_syslog_row(host="loud") for _ in range(40)]
|
|
361
|
+
for i in range(5):
|
|
362
|
+
rows.extend([_syslog_row(host=f"quiet{i}") for _ in range(10)])
|
|
363
|
+
df = _syslog_df(rows)
|
|
364
|
+
body = syslog_digest.summarize(df, "syslog")
|
|
365
|
+
matching = [l for l in body["insights"] if "loud" in l]
|
|
366
|
+
assert matching, f"expected host-volume lede mentioning 'loud'; got: {body['insights']}"
|
|
367
|
+
# Never reveal raw statistic names
|
|
368
|
+
for line in body["insights"]:
|
|
369
|
+
assert "cliff" not in line.lower()
|
|
370
|
+
assert "rank1" not in line.lower()
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def test_ledes_thin_card_stays_brief() -> None:
|
|
374
|
+
"""Three slots + one rate → at most three lines, often fewer."""
|
|
375
|
+
rows = [_syslog_row(host="loud", program="audisp",
|
|
376
|
+
message="connection refused") for _ in range(40)]
|
|
377
|
+
for i in range(5):
|
|
378
|
+
rows.extend([_syslog_row(host=f"q{i}", program="sshd",
|
|
379
|
+
message="routine") for _ in range(10)])
|
|
380
|
+
df = _syslog_df(rows)
|
|
381
|
+
body = syslog_digest.summarize(df, "syslog")
|
|
382
|
+
assert len(body["insights"]) <= 3
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
# ─── Renderer: no footer for syslog cards ───────────────────────────────────
|
|
386
|
+
|
|
387
|
+
def test_render_syslog_card_has_no_absent_footer() -> None:
|
|
388
|
+
"""Rendered syslog card must NOT contain 'N.B.' — three slots, no absents.
|
|
389
|
+
|
|
390
|
+
Locks the absent-footer machinery against accidentally lighting up on
|
|
391
|
+
syslog: dns just exercised that footer for absent feed-specific slots,
|
|
392
|
+
and a future change shouldn't drag that into syslog where there are no
|
|
393
|
+
absents to report.
|
|
394
|
+
"""
|
|
395
|
+
rows = [_syslog_row(host="loud", program="audisp") for _ in range(40)]
|
|
396
|
+
for i in range(5):
|
|
397
|
+
rows.extend([_syslog_row(host=f"q{i}", program="sshd") for _ in range(10)])
|
|
398
|
+
df = _syslog_df(rows)
|
|
399
|
+
body = syslog_digest.summarize(df, "syslog")
|
|
400
|
+
output = _render(_card_from_body(body))
|
|
401
|
+
assert "N.B." not in output
|
|
402
|
+
assert "ABSENT" not in output
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def test_render_syslog_card_surfaces_each_speaking_slot_exactly_once() -> None:
|
|
406
|
+
"""Each slot that speaks surfaces exactly once — as an insight OR as
|
|
407
|
+
a fields row. Under the flat grammar, non-speaking slots vanish; no
|
|
408
|
+
dashed rows."""
|
|
409
|
+
# Five distinct hosts, dominant program (audisp), error-rate fires.
|
|
410
|
+
rows = [_syslog_row(host="host-a", program="audisp",
|
|
411
|
+
message="connection refused") for _ in range(20)]
|
|
412
|
+
for i in range(4):
|
|
413
|
+
rows.extend([
|
|
414
|
+
_syslog_row(host=f"host-{i}", program="sshd",
|
|
415
|
+
message="accepted publickey")
|
|
416
|
+
for _ in range(5)
|
|
417
|
+
])
|
|
418
|
+
df = _syslog_df(rows)
|
|
419
|
+
body = syslog_digest.summarize(df, "syslog")
|
|
420
|
+
output = _render(_card_from_body(body))
|
|
421
|
+
# Speaking slot identities surface — either as fields rows or as
|
|
422
|
+
# insight prose. host-a dominates host-volume and is the top
|
|
423
|
+
# contributor for the error-rate slot.
|
|
424
|
+
assert "host-a" in output
|
|
425
|
+
assert "%" in output
|
|
426
|
+
assert "ABSENT" not in output
|
|
427
|
+
assert "N.B." not in output
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
# ─── CLI dispatch ───────────────────────────────────────────────────────────
|
|
431
|
+
|
|
432
|
+
def _spy_run_digest(monkeypatch) -> dict:
|
|
433
|
+
captured: dict[str, Any] = {}
|
|
434
|
+
|
|
435
|
+
def fake_run_digest(**kwargs):
|
|
436
|
+
captured.update(kwargs)
|
|
437
|
+
|
|
438
|
+
monkeypatch.setattr(runner, "run_digest", fake_run_digest)
|
|
439
|
+
return captured
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def _stub_config(monkeypatch, cfg_dict: dict) -> None:
|
|
443
|
+
monkeypatch.setattr(cli.cfg, "load", lambda _path: cfg_dict)
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
_SYSLOG_LINE = (
|
|
447
|
+
"<13>Jun 1 12:00:00 examplehost sshd[1234]: Accepted publickey for placeholder\n"
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def _write_syslog_sniff_file(tmp_path: Path) -> Path:
|
|
452
|
+
log_path = tmp_path / "syslog.log"
|
|
453
|
+
log_path.write_text(_SYSLOG_LINE, encoding="utf-8")
|
|
454
|
+
return log_path
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def test_cli_digest_syslog_file_sniffs_and_routes_to_syslog_dir(
|
|
458
|
+
tmp_path, monkeypatch,
|
|
459
|
+
) -> None:
|
|
460
|
+
captured = _spy_run_digest(monkeypatch)
|
|
461
|
+
_stub_config(monkeypatch, {"loghunter": {}})
|
|
462
|
+
log_path = _write_syslog_sniff_file(tmp_path)
|
|
463
|
+
cli._main(["digest", str(log_path)])
|
|
464
|
+
assert captured.get("schema") == "syslog"
|
|
465
|
+
assert captured.get("syslog_dir") == str(log_path)
|
|
466
|
+
assert captured.get("zeek_dir") is None
|
|
467
|
+
assert captured.get("pihole_dir") is None
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def test_cli_digest_syslog_bare_falls_back_to_configured_dir(tmp_path, monkeypatch) -> None:
|
|
471
|
+
"""No positional → schema=conn, config-driven path. Syslog config alone
|
|
472
|
+
cannot drive a bare digest under the new sniff surface — documented
|
|
473
|
+
consequence of removing the schema token."""
|
|
474
|
+
captured = _spy_run_digest(monkeypatch)
|
|
475
|
+
syslog_dir = tmp_path / "syslog"
|
|
476
|
+
syslog_dir.mkdir()
|
|
477
|
+
_stub_config(monkeypatch, {"loghunter": {"syslog_dir": str(syslog_dir)}})
|
|
478
|
+
cli._main(["digest"])
|
|
479
|
+
# Bare digest is conn-default; the configured syslog_dir is not threaded.
|
|
480
|
+
assert captured.get("schema") == "conn"
|
|
481
|
+
assert captured.get("syslog_dir") is None
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def test_cli_digest_syslog_file_with_since_flag(tmp_path, monkeypatch) -> None:
|
|
485
|
+
captured = _spy_run_digest(monkeypatch)
|
|
486
|
+
_stub_config(monkeypatch, {"loghunter": {}})
|
|
487
|
+
log_path = _write_syslog_sniff_file(tmp_path)
|
|
488
|
+
cli._main(["digest", str(log_path), "--since=7d"])
|
|
489
|
+
assert captured.get("schema") == "syslog"
|
|
490
|
+
assert captured.get("syslog_dir") == str(log_path)
|
|
491
|
+
assert captured.get("since") is not None
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
# ─── Runner-level dispatch ───────────────────────────────────────────────────
|
|
495
|
+
|
|
496
|
+
def test_run_digest_rejects_both_zeek_and_syslog_dir_at_programmatic_boundary(
|
|
497
|
+
tmp_path,
|
|
498
|
+
) -> None:
|
|
499
|
+
"""Post-v1 promotion zeek_dir IS valid for syslog (Zeek syslog.log), so the
|
|
500
|
+
rejection no longer fires for zeek_dir alone. Instead, supplying BOTH
|
|
501
|
+
zeek_dir AND syslog_dir is the contradictory case the runner rejects —
|
|
502
|
+
mirrors the dns "zeek + pihole" xor-ladder rejection."""
|
|
503
|
+
config: dict[str, Any] = {"loghunter": {}}
|
|
504
|
+
with pytest.raises(
|
|
505
|
+
ValueError, match="cannot use both zeek_dir and syslog_dir"
|
|
506
|
+
):
|
|
507
|
+
runner.run_digest(
|
|
508
|
+
config=config, schema="syslog",
|
|
509
|
+
syslog_dir=tmp_path,
|
|
510
|
+
zeek_dir=tmp_path / "zeek",
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def test_run_digest_rejects_pihole_dir_at_programmatic_boundary(tmp_path) -> None:
|
|
515
|
+
config: dict[str, Any] = {"loghunter": {}}
|
|
516
|
+
with pytest.raises(ValueError, match="pihole_dir is not valid for the syslog schema"):
|
|
517
|
+
runner.run_digest(
|
|
518
|
+
config=config, schema="syslog",
|
|
519
|
+
syslog_dir=tmp_path,
|
|
520
|
+
pihole_dir=tmp_path / "pihole",
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def test_run_digest_rejects_missing_syslog_dir(tmp_path) -> None:
|
|
525
|
+
"""Neither zeek_dir nor syslog_dir → "no syslog source configured". The
|
|
526
|
+
error advertises only --zeek-dir (the one source-dir flag in
|
|
527
|
+
_DIGEST_ALLOWED_LONG_FLAGS) + the two config keys; --syslog-dir is NOT
|
|
528
|
+
advertised because it isn't an allowed digest flag."""
|
|
529
|
+
config: dict[str, Any] = {"loghunter": {}}
|
|
530
|
+
with pytest.raises(ValueError, match="no syslog source configured") as exc_info:
|
|
531
|
+
runner.run_digest(config=config, schema="syslog")
|
|
532
|
+
# Error text must not advertise --syslog-dir (it's not an allowed flag).
|
|
533
|
+
assert "--syslog-dir" not in str(exc_info.value)
|
|
534
|
+
assert "--zeek-dir" in str(exc_info.value)
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def _write_syslog_file(path: Path, lines: list[str]) -> None:
|
|
538
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
539
|
+
path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def test_run_digest_syslog_loads_full_no_default_window(tmp_path, capsys) -> None:
|
|
543
|
+
"""Digest default-windowing is Zeek-ONLY: a syslog DIRECTORY digest loads full
|
|
544
|
+
(no default window), so rows older than default_window survive. Pins the
|
|
545
|
+
caller-side Zeek-only gate after the window-model unification — an unqualified
|
|
546
|
+
run (no --all, no --since) must NOT trim the older row."""
|
|
547
|
+
syslog_dir = tmp_path / "syslog"
|
|
548
|
+
lines = [
|
|
549
|
+
"Jun 1 12:00:00 hostA sshd[1]: old line", # ~5 days before the newer row
|
|
550
|
+
"Jun 6 12:00:00 hostB sshd[1]: new line",
|
|
551
|
+
]
|
|
552
|
+
_write_syslog_file(syslog_dir / "syslog.log", lines)
|
|
553
|
+
|
|
554
|
+
config: dict[str, Any] = {"loghunter": {"default_window": "1d"}}
|
|
555
|
+
runner.run_digest(
|
|
556
|
+
config=config, schema="syslog", syslog_dir=syslog_dir, skip_confirm=True,
|
|
557
|
+
)
|
|
558
|
+
out = capsys.readouterr().out
|
|
559
|
+
# Both rows present on the identity line — NOT trimmed to the last 1d.
|
|
560
|
+
assert "2 lines" in out
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
def test_run_digest_syslog_end_to_end_renders_a_card(tmp_path, capsys) -> None:
|
|
564
|
+
"""Full path: synthetic syslog file → run_digest → rendered card."""
|
|
565
|
+
syslog_dir = tmp_path / "syslog"
|
|
566
|
+
# 30 dominant audisp lines + 6 background lines from 6 hosts/programs
|
|
567
|
+
lines = []
|
|
568
|
+
for i in range(30):
|
|
569
|
+
lines.append(
|
|
570
|
+
"Jun 11 12:00:00 router audisp: op=USYS_CONFIG res=success"
|
|
571
|
+
)
|
|
572
|
+
for i, prog in enumerate(("sshd", "kernel", "postfix/smtpd",
|
|
573
|
+
"systemd", "cron", "rsyslogd")):
|
|
574
|
+
lines.append(
|
|
575
|
+
f"Jun 11 12:00:0{i} host{i} {prog}: routine line"
|
|
576
|
+
)
|
|
577
|
+
_write_syslog_file(syslog_dir / "syslog.log", lines)
|
|
578
|
+
|
|
579
|
+
config: dict[str, Any] = {"loghunter": {}}
|
|
580
|
+
runner.run_digest(
|
|
581
|
+
config=config, schema="syslog",
|
|
582
|
+
syslog_dir=syslog_dir, load_all=True, skip_confirm=True,
|
|
583
|
+
)
|
|
584
|
+
out = capsys.readouterr().out
|
|
585
|
+
# Flat-card identity (no banner / no header rule).
|
|
586
|
+
assert "syslog ·" in out
|
|
587
|
+
assert "lines ·" in out
|
|
588
|
+
# The dominant program (audisp) is named — either as an insight
|
|
589
|
+
# sentence or as a fields-block row.
|
|
590
|
+
assert "audisp" in out
|
|
591
|
+
# No ABSENT slots, no footer / Zeek-nudge surface under the flat grammar.
|
|
592
|
+
assert "ABSENT" not in out
|
|
593
|
+
assert "N.B." not in out
|
|
594
|
+
assert "keyword heuristic" not in out
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
# ─── Fidelity-aware v1: Zeek feed (severity) + nudge footer ──────────────────
|
|
598
|
+
#
|
|
599
|
+
# The Zeek arm of the syslog summariser reads `severity` (RFC 5424 enum) and
|
|
600
|
+
# defines "error" as {EMERG, ALERT, CRIT, ERR}. The flat arm keeps the
|
|
601
|
+
# keyword-token heuristic and footers a Zeek-evangelisation nudge — mirrors
|
|
602
|
+
# the DNS Zeek-nudge pattern. Lede wording forks: Zeek MUST NOT say "token".
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
_ZEEK_SYSLOG_COLUMNS = ["ts", "host", "program", "raw", "message", "facility", "severity"]
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def _zeek_syslog_row(
|
|
609
|
+
host: str = "host1",
|
|
610
|
+
program: str = "sshd",
|
|
611
|
+
message: str = "Accepted publickey for user",
|
|
612
|
+
severity: str = "INFO",
|
|
613
|
+
facility: str = "DAEMON",
|
|
614
|
+
ts: float = _BASE_TS,
|
|
615
|
+
) -> dict:
|
|
616
|
+
raw = f"Jun 11 12:00:00 {host} {program}: {message}"
|
|
617
|
+
return {
|
|
618
|
+
"ts": ts,
|
|
619
|
+
"host": host,
|
|
620
|
+
"program": program,
|
|
621
|
+
"raw": raw,
|
|
622
|
+
"message": f"{program}: {message}",
|
|
623
|
+
"facility": facility,
|
|
624
|
+
"severity": severity,
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def _zeek_syslog_df(rows: list[dict]) -> pd.DataFrame:
|
|
629
|
+
if not rows:
|
|
630
|
+
return pd.DataFrame(columns=_ZEEK_SYSLOG_COLUMNS)
|
|
631
|
+
return pd.DataFrame(rows, columns=_ZEEK_SYSLOG_COLUMNS)
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
def test_error_rate_zeek_feed_uses_severity_not_keyword() -> None:
|
|
635
|
+
"""A frame whose `message` carries no error tokens but whose `severity`
|
|
636
|
+
is in the error-set MUST still drive the slot — proves the Zeek arm is
|
|
637
|
+
severity-based, not message-token based."""
|
|
638
|
+
rows = []
|
|
639
|
+
for _ in range(95):
|
|
640
|
+
rows.append(_zeek_syslog_row(host="quiet", severity="INFO"))
|
|
641
|
+
for _ in range(5):
|
|
642
|
+
# "routine activity" contains NO error tokens; severity=CRIT is what
|
|
643
|
+
# the Zeek arm reads. If the slot speaks, it's reading severity.
|
|
644
|
+
rows.append(_zeek_syslog_row(
|
|
645
|
+
host="noisy", message="routine activity", severity="CRIT",
|
|
646
|
+
))
|
|
647
|
+
df = _zeek_syslog_df(rows)
|
|
648
|
+
|
|
649
|
+
slot = syslog_digest._slot_error_rate(df, "zeek")
|
|
650
|
+
assert slot.cells is not None
|
|
651
|
+
assert slot.cells[0] == "5%"
|
|
652
|
+
assert slot.entity == "noisy"
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
def test_error_rate_zeek_feed_absent_severity_dashes() -> None:
|
|
656
|
+
"""Zeek arm with `severity` column missing → dashes (not "0%"). The
|
|
657
|
+
detector's source-blindness rail also asserts this column may be absent."""
|
|
658
|
+
rows = [_syslog_row() for _ in range(20)] # 5-col, no severity
|
|
659
|
+
df = _syslog_df(rows)
|
|
660
|
+
slot = syslog_digest._slot_error_rate(df, "zeek")
|
|
661
|
+
assert slot.cells is None
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
def test_error_rate_zeek_feed_zero_errors_dashes_not_zero_pct() -> None:
|
|
665
|
+
"""Glenn rev-1: present severity column with NO error-set values → DASH,
|
|
666
|
+
not "0%". The shared `_rate()` primitive enforces this via RATE_FLOOR;
|
|
667
|
+
both feeds converge on the same dash semantics for zero kind-count."""
|
|
668
|
+
rows = [_zeek_syslog_row(severity="INFO") for _ in range(50)]
|
|
669
|
+
rows.extend([_zeek_syslog_row(severity="DEBUG") for _ in range(50)])
|
|
670
|
+
df = _zeek_syslog_df(rows)
|
|
671
|
+
slot = syslog_digest._slot_error_rate(df, "zeek")
|
|
672
|
+
assert slot.cells is None
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
def test_error_rate_zeek_feed_case_insensitive_severity_match() -> None:
|
|
676
|
+
"""Zeek severity is uppercase by convention but we match
|
|
677
|
+
case-insensitively to absorb any mixed-case emission."""
|
|
678
|
+
rows = [_zeek_syslog_row(severity="INFO") for _ in range(95)]
|
|
679
|
+
rows.extend([_zeek_syslog_row(host="noisy", severity="err") for _ in range(5)])
|
|
680
|
+
df = _zeek_syslog_df(rows)
|
|
681
|
+
slot = syslog_digest._slot_error_rate(df, "zeek")
|
|
682
|
+
assert slot.cells is not None
|
|
683
|
+
assert slot.cells[0] == "5%"
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def test_error_rate_lede_zeek_feed_says_severity_not_token() -> None:
|
|
687
|
+
"""Glenn rev-1: Zeek lede MUST NOT say "token" or imply keyword matching."""
|
|
688
|
+
rows = [_zeek_syslog_row(host="quiet", severity="INFO") for _ in range(95)]
|
|
689
|
+
rows.extend([
|
|
690
|
+
_zeek_syslog_row(host="noisy", severity="ERR") for _ in range(5)
|
|
691
|
+
])
|
|
692
|
+
df = _zeek_syslog_df(rows)
|
|
693
|
+
body = syslog_digest.summarize(df, "zeek")
|
|
694
|
+
error_ledes = [l for l in body["insights"] if "error" in l.lower()]
|
|
695
|
+
assert error_ledes, f"expected error-rate lede; got: {body['insights']}"
|
|
696
|
+
lede = error_ledes[0]
|
|
697
|
+
assert "token" not in lede.lower(), (
|
|
698
|
+
f"Zeek lede must not mention 'token'; got: {lede!r}"
|
|
699
|
+
)
|
|
700
|
+
assert "error-severity" in lede or "severity" in lede.lower(), (
|
|
701
|
+
f"Zeek lede must speak in severity terms; got: {lede!r}"
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
def test_summarize_flat_feed_emits_no_footer_reasons_under_flat_grammar() -> None:
|
|
706
|
+
"""Under the flat card grammar, the digest has no footer surface and
|
|
707
|
+
no Zeek-evangelisation nudge — both went with the N.B. block. The
|
|
708
|
+
summariser body now exposes only zone1_extras/insights/fields."""
|
|
709
|
+
rows = [_syslog_row() for _ in range(20)]
|
|
710
|
+
df = _syslog_df(rows)
|
|
711
|
+
body = syslog_digest.summarize(df, "syslog")
|
|
712
|
+
assert "footer_reasons" not in body
|
|
713
|
+
card = _card_from_body(body)
|
|
714
|
+
output = _render(card)
|
|
715
|
+
assert "N.B." not in output
|
|
716
|
+
assert "keyword heuristic" not in output
|
|
717
|
+
|
|
718
|
+
|
|
719
|
+
def test_summarize_zeek_feed_emits_no_footer_either() -> None:
|
|
720
|
+
"""Mirror of the flat-feed check on the Zeek feed."""
|
|
721
|
+
rows = [_zeek_syslog_row() for _ in range(20)]
|
|
722
|
+
df = _zeek_syslog_df(rows)
|
|
723
|
+
body = syslog_digest.summarize(df, "zeek")
|
|
724
|
+
assert "footer_reasons" not in body
|