loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
tests/test_digest_cli.py
ADDED
|
@@ -0,0 +1,1040 @@
|
|
|
1
|
+
"""Stage 3 fan-out behavior for ``loghunter digest`` — schema-agnostic tests.
|
|
2
|
+
|
|
3
|
+
The per-schema digest test files own single-path CLI routing; this file owns
|
|
4
|
+
the cross-schema fan-out contract: N positionals digested independently,
|
|
5
|
+
per-path outcomes (rendered / empty / error) tallied to a three-way exit
|
|
6
|
+
code, and a shared ``--out`` target receiving concatenated cards.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import io
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import pytest
|
|
17
|
+
|
|
18
|
+
import loghunter.cli as cli
|
|
19
|
+
import loghunter.runner as runner
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# ─── Fixtures — single representative line per schema ───────────────────────
|
|
23
|
+
|
|
24
|
+
_ZEEK_NDJSON_CONN_LINE = (
|
|
25
|
+
'{"ts": 1779750000.0, "id.orig_h": "192.0.2.10", "id.resp_h": "198.51.100.20",'
|
|
26
|
+
' "id.resp_p": 443, "proto": "tcp", "duration": 1.23}\n'
|
|
27
|
+
)
|
|
28
|
+
_ZEEK_DNS_NDJSON_LINE = (
|
|
29
|
+
'{"ts": 1779750000.0, "id.orig_h": "192.0.2.10", "query": "example.test"}\n'
|
|
30
|
+
)
|
|
31
|
+
_PIHOLE_LINE = (
|
|
32
|
+
"Jun 1 12:00:00 piholehost dnsmasq[123]: query[A] example.test from 192.0.2.10\n"
|
|
33
|
+
)
|
|
34
|
+
_SYSLOG_LINE = (
|
|
35
|
+
"<13>Jun 1 12:00:00 examplehost sshd[1234]: Accepted publickey for placeholder\n"
|
|
36
|
+
)
|
|
37
|
+
_BLOB_LINE = (
|
|
38
|
+
"totally-unrecognized-application-banner xyzzy 42 frobnicate\n"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _stub_config(monkeypatch, cfg_dict: dict | None = None) -> None:
|
|
43
|
+
monkeypatch.setattr(cli.cfg, "load", lambda _p: cfg_dict or {"loghunter": {}})
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _spy_run_digest_calls(monkeypatch) -> list[dict[str, Any]]:
|
|
47
|
+
"""Replace runner.run_digest with a spy that records every call's kwargs."""
|
|
48
|
+
calls: list[dict[str, Any]] = []
|
|
49
|
+
monkeypatch.setattr(runner, "run_digest", lambda **kwargs: calls.append(kwargs))
|
|
50
|
+
return calls
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ─── Fan-out: multiple positionals digested in order ────────────────────────
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_digest_three_mixed_positionals_render_in_argv_order(
|
|
57
|
+
tmp_path: Path, monkeypatch,
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Three positionals of mixed formats → three run_digest calls, each
|
|
60
|
+
routed to the source-dir kwarg matching its sniffed schema."""
|
|
61
|
+
_stub_config(monkeypatch)
|
|
62
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
63
|
+
|
|
64
|
+
conn = tmp_path / "conn.log"
|
|
65
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
66
|
+
dns = tmp_path / "dns.log"
|
|
67
|
+
dns.write_text(_ZEEK_DNS_NDJSON_LINE, encoding="utf-8")
|
|
68
|
+
syslog = tmp_path / "syslog.log"
|
|
69
|
+
syslog.write_text(_SYSLOG_LINE, encoding="utf-8")
|
|
70
|
+
|
|
71
|
+
rc = cli._main(["digest", str(conn), str(dns), str(syslog)])
|
|
72
|
+
|
|
73
|
+
assert rc == 0
|
|
74
|
+
assert [c["schema"] for c in calls] == ["conn", "dns", "syslog"]
|
|
75
|
+
assert calls[0]["zeek_dir"] == str(conn)
|
|
76
|
+
assert calls[1]["zeek_dir"] == str(dns)
|
|
77
|
+
assert calls[2]["syslog_dir"] == str(syslog)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_digest_pihole_positional_routes_to_pihole_dir_in_fanout(
|
|
81
|
+
tmp_path: Path, monkeypatch,
|
|
82
|
+
) -> None:
|
|
83
|
+
"""A dnsmasq/Pi-hole line in a fan-out gets the ``pihole_dir`` route, not
|
|
84
|
+
``zeek_dir`` — Stage 1/2 origin distinction survives the loop."""
|
|
85
|
+
_stub_config(monkeypatch)
|
|
86
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
87
|
+
|
|
88
|
+
zeek_dns = tmp_path / "zeek_dns.log"
|
|
89
|
+
zeek_dns.write_text(_ZEEK_DNS_NDJSON_LINE, encoding="utf-8")
|
|
90
|
+
pihole = tmp_path / "pihole.log"
|
|
91
|
+
pihole.write_text(_PIHOLE_LINE, encoding="utf-8")
|
|
92
|
+
|
|
93
|
+
rc = cli._main(["digest", str(zeek_dns), str(pihole)])
|
|
94
|
+
|
|
95
|
+
assert rc == 0
|
|
96
|
+
assert len(calls) == 2
|
|
97
|
+
assert calls[0]["schema"] == "dns" and calls[0]["zeek_dir"] == str(zeek_dns)
|
|
98
|
+
assert calls[1]["schema"] == "dns" and calls[1]["pihole_dir"] == str(pihole)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ─── Three-way exit policy ───────────────────────────────────────────────────
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def test_digest_mixed_valid_empty_missing_renders_and_exits_zero(
|
|
105
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
106
|
+
) -> None:
|
|
107
|
+
"""1 valid + 1 empty + 1 missing → valid card renders, empty prints its
|
|
108
|
+
line on stdout, missing prints its error on stderr, exit 0 (≥1 rendered)."""
|
|
109
|
+
_stub_config(monkeypatch)
|
|
110
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
111
|
+
|
|
112
|
+
conn = tmp_path / "conn.log"
|
|
113
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
114
|
+
empty = tmp_path / "empty.log"
|
|
115
|
+
empty.write_text("", encoding="utf-8")
|
|
116
|
+
missing = tmp_path / "missing.log" # never created
|
|
117
|
+
|
|
118
|
+
rc = cli._main(["digest", str(conn), str(empty), str(missing)])
|
|
119
|
+
|
|
120
|
+
captured = capsys.readouterr()
|
|
121
|
+
assert rc == 0
|
|
122
|
+
assert len(calls) == 1
|
|
123
|
+
assert calls[0]["schema"] == "conn"
|
|
124
|
+
assert "empty.log is empty. Nothing to do!" in captured.out
|
|
125
|
+
assert "digest: path not found" in captured.err
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def test_digest_all_empty_exits_zero(
|
|
129
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
130
|
+
) -> None:
|
|
131
|
+
"""2 empty files, no valid, no missing → both "Nothing to do!" lines,
|
|
132
|
+
exit 0 (empty is not a failure)."""
|
|
133
|
+
_stub_config(monkeypatch)
|
|
134
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
135
|
+
|
|
136
|
+
a = tmp_path / "a.log"
|
|
137
|
+
a.write_text("", encoding="utf-8")
|
|
138
|
+
b = tmp_path / "b.log"
|
|
139
|
+
b.write_text("", encoding="utf-8")
|
|
140
|
+
|
|
141
|
+
rc = cli._main(["digest", str(a), str(b)])
|
|
142
|
+
|
|
143
|
+
captured = capsys.readouterr()
|
|
144
|
+
assert rc == 0
|
|
145
|
+
assert calls == []
|
|
146
|
+
assert captured.out.count("Nothing to do!") == 2
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_digest_all_error_exits_nonzero(
|
|
150
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
151
|
+
) -> None:
|
|
152
|
+
"""Missing path in a multi-path fan-out → error on stderr, exit 1.
|
|
153
|
+
|
|
154
|
+
Note: a directory positional in multi-path is silently skipped — see
|
|
155
|
+
test_digest_multipath_directory_is_silently_skipped. This test isolates
|
|
156
|
+
the missing-path error path, which retains its stderr message.
|
|
157
|
+
"""
|
|
158
|
+
_stub_config(monkeypatch)
|
|
159
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
160
|
+
|
|
161
|
+
other = tmp_path / "also_missing.log"
|
|
162
|
+
|
|
163
|
+
rc = cli._main(["digest", "/no/such/file.log", str(other)])
|
|
164
|
+
|
|
165
|
+
captured = capsys.readouterr()
|
|
166
|
+
assert rc == 1
|
|
167
|
+
assert calls == []
|
|
168
|
+
assert "path not found" in captured.err
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def test_digest_mixed_empty_and_error_no_render_exits_nonzero(
|
|
172
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
173
|
+
) -> None:
|
|
174
|
+
"""Mixed empty + error, no card rendered → exit 1 (a real error is present)."""
|
|
175
|
+
_stub_config(monkeypatch)
|
|
176
|
+
_spy_run_digest_calls(monkeypatch)
|
|
177
|
+
|
|
178
|
+
empty = tmp_path / "e.log"
|
|
179
|
+
empty.write_text("", encoding="utf-8")
|
|
180
|
+
|
|
181
|
+
rc = cli._main(["digest", str(empty), "/no/such/file.log"])
|
|
182
|
+
|
|
183
|
+
captured = capsys.readouterr()
|
|
184
|
+
assert rc == 1
|
|
185
|
+
assert "Nothing to do!" in captured.out
|
|
186
|
+
assert "path not found" in captured.err
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
# ─── Directory positionals: silent skip in fan-out, error on lone ───────────
|
|
190
|
+
#
|
|
191
|
+
# A directory positional in shell-expanded multi-path fan-out should not
|
|
192
|
+
# interleave error noise between cards. The v1 contract for a lone-directory
|
|
193
|
+
# positional (single positional, hits a directory) stays — actionable stderr
|
|
194
|
+
# message and exit 1. Other per-path errors (missing path, sniff failure)
|
|
195
|
+
# continue to surface in fan-out — only directories get the silent treatment.
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def test_digest_multipath_directory_is_silently_skipped(
|
|
199
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
200
|
+
) -> None:
|
|
201
|
+
"""Multi-path fan-out: a directory positional is silently skipped — no
|
|
202
|
+
stderr noise, no error tally, sibling files still render."""
|
|
203
|
+
_stub_config(monkeypatch)
|
|
204
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
205
|
+
|
|
206
|
+
conn = tmp_path / "conn.log"
|
|
207
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
208
|
+
a_dir = tmp_path / "subdir"
|
|
209
|
+
a_dir.mkdir()
|
|
210
|
+
|
|
211
|
+
rc = cli._main(["digest", str(conn), str(a_dir)])
|
|
212
|
+
|
|
213
|
+
captured = capsys.readouterr()
|
|
214
|
+
assert rc == 0 # ≥1 rendered
|
|
215
|
+
assert len(calls) == 1 # only the conn file routed
|
|
216
|
+
# No directory noise on stderr — that's the point.
|
|
217
|
+
assert "must be a file, not a directory" not in captured.err
|
|
218
|
+
assert str(a_dir) not in captured.err
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def test_digest_multipath_all_directories_exits_zero_silently(
|
|
222
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
223
|
+
) -> None:
|
|
224
|
+
"""Multi-path fan-out where every positional is a directory: no output to
|
|
225
|
+
stdout or stderr, exit 0 (consistent with 'silent skip directories').
|
|
226
|
+
|
|
227
|
+
rendered=0, errored=0 → exit-code policy returns 0 via the
|
|
228
|
+
``errored == 0`` branch in cli.py.
|
|
229
|
+
"""
|
|
230
|
+
_stub_config(monkeypatch)
|
|
231
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
232
|
+
|
|
233
|
+
d1 = tmp_path / "a"; d1.mkdir()
|
|
234
|
+
d2 = tmp_path / "b"; d2.mkdir()
|
|
235
|
+
d3 = tmp_path / "c"; d3.mkdir()
|
|
236
|
+
|
|
237
|
+
rc = cli._main(["digest", str(d1), str(d2), str(d3)])
|
|
238
|
+
|
|
239
|
+
captured = capsys.readouterr()
|
|
240
|
+
assert rc == 0
|
|
241
|
+
assert calls == []
|
|
242
|
+
assert captured.out == ""
|
|
243
|
+
assert captured.err == ""
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def test_digest_lone_directory_positional_still_errors(
|
|
247
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
248
|
+
) -> None:
|
|
249
|
+
"""Single positional that is a directory: v1 contract preserved —
|
|
250
|
+
actionable stderr message and exit 1. Whole-directory positionals are
|
|
251
|
+
not supported in v1."""
|
|
252
|
+
_stub_config(monkeypatch)
|
|
253
|
+
_spy_run_digest_calls(monkeypatch)
|
|
254
|
+
|
|
255
|
+
a_dir = tmp_path / "logs"
|
|
256
|
+
a_dir.mkdir()
|
|
257
|
+
|
|
258
|
+
rc = cli._main(["digest", str(a_dir)])
|
|
259
|
+
|
|
260
|
+
captured = capsys.readouterr()
|
|
261
|
+
assert rc == 1
|
|
262
|
+
assert "must be a file, not a directory" in captured.err
|
|
263
|
+
assert str(a_dir) in captured.err
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def test_digest_multipath_non_directory_errors_still_surface(
|
|
267
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
268
|
+
) -> None:
|
|
269
|
+
"""Silent-skip applies ONLY to directories — a missing-path positional in
|
|
270
|
+
fan-out still produces its stderr message and tallies as an error."""
|
|
271
|
+
_stub_config(monkeypatch)
|
|
272
|
+
_spy_run_digest_calls(monkeypatch)
|
|
273
|
+
|
|
274
|
+
conn = tmp_path / "conn.log"
|
|
275
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
276
|
+
|
|
277
|
+
rc = cli._main(["digest", str(conn), "/no/such/file.log"])
|
|
278
|
+
|
|
279
|
+
captured = capsys.readouterr()
|
|
280
|
+
assert rc == 0 # ≥1 rendered (conn)
|
|
281
|
+
assert "path not found" in captured.err
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
# ─── Real-route regression: notice-shape pathless NDJSON → blob ─────────────
|
|
285
|
+
#
|
|
286
|
+
# Reproduces the original incident: a notice.log-shaped pathless Zeek NDJSON
|
|
287
|
+
# (id.orig_h plus native src) historically reached the conn summariser via
|
|
288
|
+
# the field-set fallback and crashed it with the Grouper-not-1-dimensional
|
|
289
|
+
# pandas error. The collision guard now rejects the false claim at sniff,
|
|
290
|
+
# the orchestrator drops to the blob floor, and the real summariser is
|
|
291
|
+
# never invoked — so the defence-in-depth net never fires either.
|
|
292
|
+
# Unmocked end-to-end: this test fails if a future change accidentally
|
|
293
|
+
# bypasses the guard, even if the recognizer unit tests still pass.
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def test_digest_notice_no_path_routes_to_blob_with_no_breadcrumb(
|
|
297
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
298
|
+
) -> None:
|
|
299
|
+
_stub_config(monkeypatch)
|
|
300
|
+
|
|
301
|
+
notice = tmp_path / "notice.log"
|
|
302
|
+
notice.write_text(
|
|
303
|
+
'{"ts": 1779750000.0, "uid": "Cxxxxxx",'
|
|
304
|
+
' "id.orig_h": "192.0.2.10", "id.orig_p": 41514,'
|
|
305
|
+
' "id.resp_h": "198.51.100.20", "id.resp_p": 443, "proto": "tcp",'
|
|
306
|
+
' "src": "192.0.2.10", "dst": "198.51.100.20",'
|
|
307
|
+
' "note": "Placeholder::Note", "msg": "placeholder message"}\n',
|
|
308
|
+
encoding="utf-8",
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
rc = cli._main(["digest", str(notice)])
|
|
312
|
+
|
|
313
|
+
captured = capsys.readouterr()
|
|
314
|
+
assert rc == 0
|
|
315
|
+
# Blob card rendered to stdout — flat-grammar headline + identity line
|
|
316
|
+
# carries the source name.
|
|
317
|
+
assert "Unrecognized source" in captured.out
|
|
318
|
+
assert "notice.log" in captured.out
|
|
319
|
+
# Stderr silent on the defence-in-depth path — the guard prevents the
|
|
320
|
+
# summariser from ever being called, so there is no breadcrumb, no
|
|
321
|
+
# raw pandas error text, no traceback.
|
|
322
|
+
assert "summariser failed" not in captured.err
|
|
323
|
+
assert "Grouper for 'src'" not in captured.err
|
|
324
|
+
assert "ValueError" not in captured.err
|
|
325
|
+
assert "Traceback" not in captured.err
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
# ─── Blob fallback on summariser raise (item 2) ─────────────────────────────
|
|
329
|
+
#
|
|
330
|
+
# Defence-in-depth for a recognised-schema summariser raising on a pathological
|
|
331
|
+
# frame (e.g. duplicate `src` column → pandas Grouper failure). The narrow
|
|
332
|
+
# try/except in run_digest catches Exception (NOT BaseException), is silent on
|
|
333
|
+
# stderr by default and emits a one-line breadcrumb under --verbose, and
|
|
334
|
+
# always falls back to a blob card for THE SAME file on THE SAME stream.
|
|
335
|
+
# Sibling fan-out iterations continue to render.
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def test_digest_summariser_failure_falls_back_to_blob(
|
|
339
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
340
|
+
) -> None:
|
|
341
|
+
"""A summariser that raises on a recognised conn file produces a blob
|
|
342
|
+
card on the supplied stream. Default mode is SILENT on stderr — the
|
|
343
|
+
breadcrumb is verbose-gated so raw exception text never leaks to the
|
|
344
|
+
operator. No traceback, no abort.
|
|
345
|
+
|
|
346
|
+
Coverage strategy: monkeypatch ``loghunter.digest.get_summarizer`` to
|
|
347
|
+
return a callable that raises a synthetic exception. This exercises
|
|
348
|
+
the narrow wrap without contorting a physical fixture into a duplicate-
|
|
349
|
+
column / pathological-schema state — same coverage, smaller blast
|
|
350
|
+
radius."""
|
|
351
|
+
_stub_config(monkeypatch)
|
|
352
|
+
|
|
353
|
+
# A real conn NDJSON file — sniff routes to conn, loader succeeds, and
|
|
354
|
+
# the summariser is the only thing that fails.
|
|
355
|
+
conn = tmp_path / "conn.log"
|
|
356
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
357
|
+
|
|
358
|
+
def _exploding_summarizer(_schema_name: str):
|
|
359
|
+
def _raise(*_a, **_kw):
|
|
360
|
+
raise RuntimeError("induced summariser failure")
|
|
361
|
+
return _raise
|
|
362
|
+
|
|
363
|
+
monkeypatch.setattr(
|
|
364
|
+
"loghunter.digest.get_summarizer", _exploding_summarizer,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
rc = cli._main(["digest", str(conn)])
|
|
368
|
+
|
|
369
|
+
captured = capsys.readouterr()
|
|
370
|
+
assert rc == 0 # blob card counted as a render
|
|
371
|
+
# Default mode: NO breadcrumb, no raw exception text on stderr.
|
|
372
|
+
assert "summariser failed" not in captured.err
|
|
373
|
+
assert "RuntimeError: induced summariser failure" not in captured.err
|
|
374
|
+
# No traceback in either mode — the rail forbids raw exceptions
|
|
375
|
+
# reaching the user.
|
|
376
|
+
assert "Traceback" not in captured.err
|
|
377
|
+
# Blob card rendered to stdout: flat-grammar headline + identity line.
|
|
378
|
+
assert "Unrecognized source" in captured.out
|
|
379
|
+
assert "conn.log" in captured.out # identity line carries the source name
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def test_digest_summariser_failure_breadcrumb_shown_under_verbose(
|
|
383
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
384
|
+
) -> None:
|
|
385
|
+
"""Same defence-in-depth path as above, but invoked with --verbose:
|
|
386
|
+
the breadcrumb IS visible on stderr (debug aid). Blob card still
|
|
387
|
+
renders; no traceback in either mode."""
|
|
388
|
+
_stub_config(monkeypatch)
|
|
389
|
+
|
|
390
|
+
conn = tmp_path / "conn.log"
|
|
391
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
392
|
+
|
|
393
|
+
def _exploding_summarizer(_schema_name: str):
|
|
394
|
+
def _raise(*_a, **_kw):
|
|
395
|
+
raise RuntimeError("induced summariser failure")
|
|
396
|
+
return _raise
|
|
397
|
+
|
|
398
|
+
monkeypatch.setattr(
|
|
399
|
+
"loghunter.digest.get_summarizer", _exploding_summarizer,
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
rc = cli._main(["digest", "--verbose", str(conn)])
|
|
403
|
+
|
|
404
|
+
captured = capsys.readouterr()
|
|
405
|
+
assert rc == 0
|
|
406
|
+
# Verbose: the existing defence-in-depth breadcrumb is visible.
|
|
407
|
+
assert "summariser failed" in captured.err
|
|
408
|
+
assert "RuntimeError: induced summariser failure" in captured.err
|
|
409
|
+
assert "conn.log" in captured.err
|
|
410
|
+
# Still no traceback — verbose adds the breadcrumb, not a stack.
|
|
411
|
+
assert "Traceback" not in captured.err
|
|
412
|
+
# Blob card still renders.
|
|
413
|
+
assert "Unrecognized source" in captured.out
|
|
414
|
+
assert "conn.log" in captured.out
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def test_digest_summariser_failure_does_not_abort_sibling_paths(
|
|
418
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
419
|
+
) -> None:
|
|
420
|
+
"""In a multi-positional fan-out, a summariser raise on one path falls
|
|
421
|
+
back to a blob card AND lets subsequent paths render their cards.
|
|
422
|
+
Tests that the narrow wrap + blob fallback is a per-path concern, not
|
|
423
|
+
a fan-out abort."""
|
|
424
|
+
_stub_config(monkeypatch)
|
|
425
|
+
|
|
426
|
+
# Two real files, both routed to the conn schema by sniff.
|
|
427
|
+
a = tmp_path / "a.log"
|
|
428
|
+
a.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
429
|
+
b = tmp_path / "b.log"
|
|
430
|
+
b.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
431
|
+
|
|
432
|
+
# The summariser raises on the FIRST run only — second call succeeds.
|
|
433
|
+
# We monkeypatch get_summarizer to wrap the real one with a one-shot
|
|
434
|
+
# raise so we exercise the actual schema summariser thereafter.
|
|
435
|
+
from loghunter import digest as _digest_pkg
|
|
436
|
+
real_get = _digest_pkg.get_summarizer
|
|
437
|
+
call_n = {"n": 0}
|
|
438
|
+
|
|
439
|
+
def _flaky_get(schema_name: str):
|
|
440
|
+
def _wrap(*a, **kw):
|
|
441
|
+
call_n["n"] += 1
|
|
442
|
+
if call_n["n"] == 1:
|
|
443
|
+
raise RuntimeError("induced summariser failure")
|
|
444
|
+
return real_get(schema_name)(*a, **kw)
|
|
445
|
+
return _wrap
|
|
446
|
+
|
|
447
|
+
monkeypatch.setattr(
|
|
448
|
+
"loghunter.digest.get_summarizer", _flaky_get,
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
rc = cli._main(["digest", str(a), str(b)])
|
|
452
|
+
|
|
453
|
+
captured = capsys.readouterr()
|
|
454
|
+
assert rc == 0
|
|
455
|
+
# First file falls back silently (breadcrumb is verbose-gated) — no
|
|
456
|
+
# raw exception text on stderr in default mode.
|
|
457
|
+
assert "summariser failed" not in captured.err
|
|
458
|
+
assert "Traceback" not in captured.err
|
|
459
|
+
# Second file: a real conn card renders (identity line carries "conn ·").
|
|
460
|
+
assert "conn ·" in captured.out
|
|
461
|
+
# First file rendered a blob card as well — its headline is present.
|
|
462
|
+
assert "Unrecognized source" in captured.out
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def test_digest_runner_value_error_does_not_abort_loop(
|
|
466
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
467
|
+
) -> None:
|
|
468
|
+
"""A ValueError raised inside run_digest for one path is caught and
|
|
469
|
+
tallied; subsequent valid paths still render."""
|
|
470
|
+
_stub_config(monkeypatch)
|
|
471
|
+
|
|
472
|
+
calls: list[Path] = []
|
|
473
|
+
|
|
474
|
+
def flaky_run_digest(**kwargs):
|
|
475
|
+
# First call (conn) raises; second call (dns) succeeds.
|
|
476
|
+
called_for = kwargs.get("zeek_dir")
|
|
477
|
+
calls.append(called_for)
|
|
478
|
+
if len(calls) == 1:
|
|
479
|
+
raise ValueError("induced parser failure")
|
|
480
|
+
|
|
481
|
+
monkeypatch.setattr(runner, "run_digest", flaky_run_digest)
|
|
482
|
+
|
|
483
|
+
conn = tmp_path / "conn.log"
|
|
484
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
485
|
+
dns = tmp_path / "dns.log"
|
|
486
|
+
dns.write_text(_ZEEK_DNS_NDJSON_LINE, encoding="utf-8")
|
|
487
|
+
|
|
488
|
+
rc = cli._main(["digest", str(conn), str(dns)])
|
|
489
|
+
|
|
490
|
+
captured = capsys.readouterr()
|
|
491
|
+
assert rc == 0 # ≥1 rendered (the dns path)
|
|
492
|
+
assert len(calls) == 2
|
|
493
|
+
assert "induced parser failure" in captured.err
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
# ─── Shared --out concatenation ──────────────────────────────────────────────
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def test_digest_out_directory_writes_single_timestamped_file(
|
|
500
|
+
tmp_path: Path, monkeypatch,
|
|
501
|
+
) -> None:
|
|
502
|
+
"""N valid paths with --out=<dir>/ → exactly one file digest_<ts>.txt in
|
|
503
|
+
the directory, populated by all run_digest streams in argv order."""
|
|
504
|
+
_stub_config(monkeypatch)
|
|
505
|
+
|
|
506
|
+
streams_received: list[Any] = []
|
|
507
|
+
|
|
508
|
+
def fake_run_digest(**kwargs):
|
|
509
|
+
# Simulate render: write a schema tag to the provided stream.
|
|
510
|
+
stream = kwargs.get("stream")
|
|
511
|
+
streams_received.append(stream)
|
|
512
|
+
stream.write(f"[card {kwargs['schema']}]\n")
|
|
513
|
+
|
|
514
|
+
monkeypatch.setattr(runner, "run_digest", fake_run_digest)
|
|
515
|
+
|
|
516
|
+
conn = tmp_path / "conn.log"
|
|
517
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
518
|
+
syslog = tmp_path / "sl.log"
|
|
519
|
+
syslog.write_text(_SYSLOG_LINE, encoding="utf-8")
|
|
520
|
+
out_dir = tmp_path / "out"
|
|
521
|
+
|
|
522
|
+
rc = cli._main(["digest", str(conn), str(syslog), f"--out={out_dir}/"])
|
|
523
|
+
|
|
524
|
+
assert rc == 0
|
|
525
|
+
files = sorted(out_dir.iterdir())
|
|
526
|
+
assert len(files) == 1
|
|
527
|
+
assert files[0].name.startswith("digest_") and files[0].suffix == ".txt"
|
|
528
|
+
# No path-derived stem: the file name has no input basename embedded.
|
|
529
|
+
assert "conn" not in files[0].name and "sl" not in files[0].name
|
|
530
|
+
# Both calls wrote into the same TextIO.
|
|
531
|
+
assert streams_received[0] is streams_received[1]
|
|
532
|
+
body = files[0].read_text(encoding="utf-8")
|
|
533
|
+
assert body == "[card conn]\n[card syslog]\n"
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def test_digest_out_explicit_file_honors_path(
|
|
537
|
+
tmp_path: Path, monkeypatch,
|
|
538
|
+
) -> None:
|
|
539
|
+
"""`--out=<explicit-file>` with N paths → that exact file, all cards."""
|
|
540
|
+
_stub_config(monkeypatch)
|
|
541
|
+
|
|
542
|
+
def fake_run_digest(**kwargs):
|
|
543
|
+
kwargs["stream"].write(f"[card {kwargs['schema']}]\n")
|
|
544
|
+
|
|
545
|
+
monkeypatch.setattr(runner, "run_digest", fake_run_digest)
|
|
546
|
+
|
|
547
|
+
conn = tmp_path / "conn.log"
|
|
548
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
549
|
+
dns = tmp_path / "dns.log"
|
|
550
|
+
dns.write_text(_ZEEK_DNS_NDJSON_LINE, encoding="utf-8")
|
|
551
|
+
explicit = tmp_path / "my_report.txt"
|
|
552
|
+
|
|
553
|
+
rc = cli._main(["digest", str(conn), str(dns), f"--out={explicit}"])
|
|
554
|
+
|
|
555
|
+
assert rc == 0
|
|
556
|
+
assert explicit.read_text(encoding="utf-8") == "[card conn]\n[card dns]\n"
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def test_digest_single_positional_with_out_directory_uses_same_naming(
|
|
560
|
+
tmp_path: Path, monkeypatch,
|
|
561
|
+
) -> None:
|
|
562
|
+
"""N=1 with `--out=<dir>/` uses digest_<ts>.txt — no special case."""
|
|
563
|
+
_stub_config(monkeypatch)
|
|
564
|
+
monkeypatch.setattr(
|
|
565
|
+
runner, "run_digest",
|
|
566
|
+
lambda **kw: kw["stream"].write(f"[card {kw['schema']}]\n"),
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
conn = tmp_path / "conn.log"
|
|
570
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
571
|
+
out_dir = tmp_path / "out"
|
|
572
|
+
|
|
573
|
+
rc = cli._main(["digest", str(conn), f"--out={out_dir}/"])
|
|
574
|
+
|
|
575
|
+
assert rc == 0
|
|
576
|
+
files = sorted(out_dir.iterdir())
|
|
577
|
+
assert len(files) == 1 and files[0].name.startswith("digest_")
|
|
578
|
+
assert files[0].read_text(encoding="utf-8") == "[card conn]\n"
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
# ─── Lazy stream — no file created when nothing renders ─────────────────────
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def test_digest_out_directory_with_all_empty_creates_no_file(
|
|
585
|
+
tmp_path: Path, monkeypatch,
|
|
586
|
+
) -> None:
|
|
587
|
+
"""All-empty fan-out with --out=<dir>/ → no file is created (lazy open
|
|
588
|
+
proof)."""
|
|
589
|
+
_stub_config(monkeypatch)
|
|
590
|
+
_spy_run_digest_calls(monkeypatch)
|
|
591
|
+
|
|
592
|
+
a = tmp_path / "a.log"
|
|
593
|
+
a.write_text("", encoding="utf-8")
|
|
594
|
+
b = tmp_path / "b.log"
|
|
595
|
+
b.write_text("", encoding="utf-8")
|
|
596
|
+
out_dir = tmp_path / "out"
|
|
597
|
+
|
|
598
|
+
rc = cli._main(["digest", str(a), str(b), f"--out={out_dir}/"])
|
|
599
|
+
|
|
600
|
+
assert rc == 0
|
|
601
|
+
assert not out_dir.exists() or list(out_dir.iterdir()) == []
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def test_digest_out_directory_with_all_error_creates_no_file(
|
|
605
|
+
tmp_path: Path, monkeypatch,
|
|
606
|
+
) -> None:
|
|
607
|
+
"""All-error fan-out with --out=<dir>/ → no file is created and exit 1."""
|
|
608
|
+
_stub_config(monkeypatch)
|
|
609
|
+
_spy_run_digest_calls(monkeypatch)
|
|
610
|
+
|
|
611
|
+
out_dir = tmp_path / "out"
|
|
612
|
+
|
|
613
|
+
rc = cli._main(["digest", "/no/such.log", "/also/missing.log", f"--out={out_dir}/"])
|
|
614
|
+
|
|
615
|
+
assert rc == 1
|
|
616
|
+
assert not out_dir.exists() or list(out_dir.iterdir()) == []
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
# ─── Dry-run sidesteps --out ────────────────────────────────────────────────
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
def test_digest_dry_run_with_out_creates_no_file(
|
|
623
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
624
|
+
) -> None:
|
|
625
|
+
"""`digest *.log --dry-run --out=<dir>/` → no file materialises."""
|
|
626
|
+
_stub_config(monkeypatch)
|
|
627
|
+
|
|
628
|
+
# run_digest's dry-run branch must NOT receive an opened file stream.
|
|
629
|
+
# We let the real runner.run_digest run with dry_run=True so its early
|
|
630
|
+
# return is exercised, but spy on what stream= it was handed.
|
|
631
|
+
seen_streams: list[Any] = []
|
|
632
|
+
|
|
633
|
+
def fake_run_digest(**kwargs):
|
|
634
|
+
seen_streams.append(kwargs.get("stream"))
|
|
635
|
+
# dry-run never opens the handler in real runner.run_digest; mimic.
|
|
636
|
+
|
|
637
|
+
monkeypatch.setattr(runner, "run_digest", fake_run_digest)
|
|
638
|
+
|
|
639
|
+
conn = tmp_path / "conn.log"
|
|
640
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
641
|
+
out_dir = tmp_path / "out"
|
|
642
|
+
|
|
643
|
+
rc = cli._main(["digest", str(conn), "--dry-run", f"--out={out_dir}/"])
|
|
644
|
+
|
|
645
|
+
assert rc == 0
|
|
646
|
+
# Dry-run → get_stream() returned sys.stdout (or None per design); MUST
|
|
647
|
+
# not have opened a file in out_dir.
|
|
648
|
+
assert not out_dir.exists() or list(out_dir.iterdir()) == []
|
|
649
|
+
# Stream handed in is stdout (dry-run helper returns sys.stdout); never a
|
|
650
|
+
# file we opened.
|
|
651
|
+
assert seen_streams[0] is sys.stdout
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
# ─── Bare ``digest`` (no positional) still uses config-driven flow ──────────
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def test_digest_bare_no_positional_resolves_output_via_kwargs(
|
|
658
|
+
tmp_path: Path, monkeypatch,
|
|
659
|
+
) -> None:
|
|
660
|
+
"""Bare ``digest`` (no positional) is the config-driven path — output is
|
|
661
|
+
resolved by _digest_runner_kwargs (not the fan-out stream helper). The
|
|
662
|
+
runner call sees output_dir / output_file populated as today."""
|
|
663
|
+
cfg_zeek = tmp_path / "zeek"
|
|
664
|
+
cfg_zeek.mkdir()
|
|
665
|
+
out_dir = tmp_path / "report"
|
|
666
|
+
_stub_config(monkeypatch, {"loghunter": {"zeek_dir": str(cfg_zeek)}})
|
|
667
|
+
|
|
668
|
+
captured: dict[str, Any] = {}
|
|
669
|
+
monkeypatch.setattr(runner, "run_digest", lambda **kw: captured.update(kw))
|
|
670
|
+
|
|
671
|
+
rc = cli._main(["digest", f"--out={out_dir}/"])
|
|
672
|
+
assert rc == 0
|
|
673
|
+
# Bare digest still resolves output the old way.
|
|
674
|
+
assert captured.get("output_dir") == out_dir
|
|
675
|
+
assert captured.get("stream") is None # CLI never threads a stream here
|
|
676
|
+
assert captured.get("schema") == "conn"
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
# ─── Detect-path regression: parsed["paths"] does not bleed into detector ──
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def test_detect_path_unaffected_by_new_paths_key(
|
|
683
|
+
tmp_path: Path, monkeypatch,
|
|
684
|
+
) -> None:
|
|
685
|
+
"""A detector invocation with a positional still routes through
|
|
686
|
+
parsed["path"] only; the new parsed["paths"] key is irrelevant."""
|
|
687
|
+
_stub_config(monkeypatch)
|
|
688
|
+
|
|
689
|
+
captured: dict[str, Any] = {}
|
|
690
|
+
monkeypatch.setattr(runner, "run", lambda **kwargs: captured.update(kwargs))
|
|
691
|
+
|
|
692
|
+
log_path = tmp_path / "conn.log"
|
|
693
|
+
log_path.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
694
|
+
cli._main(["beacon", str(log_path)])
|
|
695
|
+
|
|
696
|
+
# Detector routes the positional to its required source key (zeek_dir).
|
|
697
|
+
assert captured.get("zeek_dir") == str(log_path)
|
|
698
|
+
assert captured.get("detect") == "beacon"
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
# ─── Source-dir flags rejected in fan-out ──────────────────────────────────
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def test_digest_source_dir_flag_rejected_with_positional(
|
|
705
|
+
tmp_path: Path, monkeypatch,
|
|
706
|
+
) -> None:
|
|
707
|
+
"""Source-dir flags are meaningless in fan-out — rejected up front.
|
|
708
|
+
|
|
709
|
+
--zeek-dir remains an advertised digest flag (useful for bare
|
|
710
|
+
config-driven conn digest), so with a positional present it hits the
|
|
711
|
+
positional-guard 'not valid alongside' error. The other three
|
|
712
|
+
(--pihole-dir, --syslog-dir, --cloudtrail-dir) are not in the digest
|
|
713
|
+
allowed set under the spec-driven parser, and raise the spec's
|
|
714
|
+
wrong-verb error ('is not valid for digest'). Either way the
|
|
715
|
+
combination is rejected."""
|
|
716
|
+
_stub_config(monkeypatch)
|
|
717
|
+
log_path = tmp_path / "conn.log"
|
|
718
|
+
log_path.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
719
|
+
with pytest.raises(ValueError, match="--zeek-dir is not valid alongside"):
|
|
720
|
+
cli._main(["digest", str(log_path), "--zeek-dir=/x"])
|
|
721
|
+
for pruned in ("--pihole-dir", "--syslog-dir", "--cloudtrail-dir"):
|
|
722
|
+
with pytest.raises(ValueError, match=f"{pruned} is not valid for digest"):
|
|
723
|
+
cli._main(["digest", str(log_path), f"{pruned}=/x"])
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def test_digest_unrecognized_single_file_still_routes_to_blob(
|
|
727
|
+
tmp_path: Path, monkeypatch,
|
|
728
|
+
) -> None:
|
|
729
|
+
"""The blob route lives at the CLI sniff layer, NOT inside run_digest.
|
|
730
|
+
A single-file Zeek bypass in run_digest must not introduce a new path
|
|
731
|
+
around that floor: unrecognized / garbage content must still sniff to
|
|
732
|
+
``schema="blob"`` and reach run_digest via ``blob_path``."""
|
|
733
|
+
_stub_config(monkeypatch)
|
|
734
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
735
|
+
garbage = tmp_path / "garbage.dat"
|
|
736
|
+
garbage.write_text(_BLOB_LINE, encoding="utf-8")
|
|
737
|
+
|
|
738
|
+
rc = cli._main(["digest", str(garbage)])
|
|
739
|
+
|
|
740
|
+
assert rc == 0
|
|
741
|
+
assert len(calls) == 1
|
|
742
|
+
assert calls[0]["schema"] == "blob"
|
|
743
|
+
assert calls[0]["blob_path"] == garbage
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
def test_digest_pruned_source_dir_flags_rejected_without_positional(
|
|
747
|
+
monkeypatch,
|
|
748
|
+
) -> None:
|
|
749
|
+
"""Without a positional, the pruned source-dir flags should also fail
|
|
750
|
+
with the spec-driven wrong-verb error — these flags are not in digest's
|
|
751
|
+
allowed set (schema is always conn with no positional, so only
|
|
752
|
+
--zeek-dir is meaningful). Locks the allowed-set asymmetry."""
|
|
753
|
+
_stub_config(monkeypatch)
|
|
754
|
+
for pruned in ("--pihole-dir", "--syslog-dir", "--cloudtrail-dir"):
|
|
755
|
+
with pytest.raises(ValueError, match=f"{pruned} is not valid for digest"):
|
|
756
|
+
cli._main(["digest", f"{pruned}=/x"])
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
# ─── Zeek syslog.log v1 promotion — fan-out routing + kwargs xor ladder ─────
|
|
760
|
+
|
|
761
|
+
_ZEEK_NDJSON_SYSLOG_LINE = (
|
|
762
|
+
'{"_path":"syslog","ts":1779750000.0,"uid":"CSL01",'
|
|
763
|
+
'"id.orig_h":"192.0.2.10","id.resp_h":"198.51.100.20","id.resp_p":514,'
|
|
764
|
+
'"proto":"udp","facility":"DAEMON","severity":"INFO",'
|
|
765
|
+
'"message":"Jun 11 12:00:00 host1 sshd[1234]: ok"}\n'
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
def test_digest_zeek_syslog_positional_routes_to_zeek_dir(
|
|
770
|
+
tmp_path: Path, monkeypatch,
|
|
771
|
+
) -> None:
|
|
772
|
+
"""A sniffed Zeek `syslog.log` positional (origin "zeek") synthesises
|
|
773
|
+
zeek_dir via _route_sniffed_path's new syslog origin split — mirrors the
|
|
774
|
+
dns origin split for Zeek vs Pi-hole."""
|
|
775
|
+
_stub_config(monkeypatch)
|
|
776
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
777
|
+
|
|
778
|
+
zeek_syslog = tmp_path / "syslog.log"
|
|
779
|
+
zeek_syslog.write_text(_ZEEK_NDJSON_SYSLOG_LINE, encoding="utf-8")
|
|
780
|
+
|
|
781
|
+
rc = cli._main(["digest", str(zeek_syslog)])
|
|
782
|
+
|
|
783
|
+
assert rc == 0
|
|
784
|
+
assert len(calls) == 1
|
|
785
|
+
assert calls[0]["schema"] == "syslog"
|
|
786
|
+
assert calls[0]["zeek_dir"] == str(zeek_syslog)
|
|
787
|
+
assert calls[0]["syslog_dir"] is None
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
def test_digest_flat_syslog_positional_still_routes_to_syslog_dir(
|
|
791
|
+
tmp_path: Path, monkeypatch,
|
|
792
|
+
) -> None:
|
|
793
|
+
"""Flat rsyslog (origin "syslog") continues to synthesise syslog_dir —
|
|
794
|
+
the origin split must not regress the historical path."""
|
|
795
|
+
_stub_config(monkeypatch)
|
|
796
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
797
|
+
|
|
798
|
+
flat = tmp_path / "syslog"
|
|
799
|
+
flat.write_text(_SYSLOG_LINE, encoding="utf-8")
|
|
800
|
+
|
|
801
|
+
rc = cli._main(["digest", str(flat)])
|
|
802
|
+
|
|
803
|
+
assert rc == 0
|
|
804
|
+
assert len(calls) == 1
|
|
805
|
+
assert calls[0]["schema"] == "syslog"
|
|
806
|
+
assert calls[0]["syslog_dir"] == str(flat)
|
|
807
|
+
assert calls[0]["zeek_dir"] is None
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
# The three CLI-layer ``_digest_runner_kwargs`` ladder tests were DELETED.
|
|
811
|
+
# After the single-ownership refactor, ``_digest_runner_kwargs`` does NOT
|
|
812
|
+
# resolve source dirs — it passes raw strings (or None) to ``run_digest``,
|
|
813
|
+
# which calls ``common.sources.resolve_digest_source``. The ladder + XOR +
|
|
814
|
+
# config-preference logic now lives there:
|
|
815
|
+
#
|
|
816
|
+
# syslog_dir > zeek_dir fallback → tests/test_sources.py:
|
|
817
|
+
# test_digest_syslog_syslog_preference_on_config_fallback
|
|
818
|
+
# test_digest_syslog_zeek_when_only_zeek_configured
|
|
819
|
+
# syslog XOR (zeek_dir + syslog_dir) → tests/test_sources.py:
|
|
820
|
+
# test_digest_syslog_xor_byte_preserved
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def test_digest_zeek_syslog_without_path_renders_syslog_card(
|
|
824
|
+
tmp_path: Path, monkeypatch,
|
|
825
|
+
) -> None:
|
|
826
|
+
"""P1 regression: a Zeek-NDJSON syslog.log without `_path` must sniff to
|
|
827
|
+
`schema="syslog", origin="zeek"` and route to zeek_dir, NOT fall into the
|
|
828
|
+
conn fallback. Pre-fix this rendered a conn card (or crashed) instead of
|
|
829
|
+
the syslog card."""
|
|
830
|
+
_stub_config(monkeypatch)
|
|
831
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
832
|
+
|
|
833
|
+
no_path = tmp_path / "syslog.log"
|
|
834
|
+
no_path.write_text(
|
|
835
|
+
'{"ts":1779750000.0,"uid":"CSL01",'
|
|
836
|
+
'"id.orig_h":"192.0.2.10","id.orig_p":41514,'
|
|
837
|
+
'"id.resp_h":"198.51.100.20","id.resp_p":514,'
|
|
838
|
+
'"proto":"udp","facility":"DAEMON","severity":"INFO",'
|
|
839
|
+
'"message":"Jun 11 12:00:00 host1 sshd[1234]: placeholder"}\n',
|
|
840
|
+
encoding="utf-8",
|
|
841
|
+
)
|
|
842
|
+
|
|
843
|
+
rc = cli._main(["digest", str(no_path)])
|
|
844
|
+
|
|
845
|
+
assert rc == 0
|
|
846
|
+
assert len(calls) == 1
|
|
847
|
+
assert calls[0]["schema"] == "syslog"
|
|
848
|
+
assert calls[0]["zeek_dir"] == str(no_path)
|
|
849
|
+
assert calls[0]["syslog_dir"] is None
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
# ─── Inter-card separator matrix ────────────────────────────────────────────
|
|
853
|
+
#
|
|
854
|
+
# A 40-col "─" * 40 rule separates adjacent RENDERED cards on a multi-card
|
|
855
|
+
# run; single-card runs draw no rule at all. Render-commit placement:
|
|
856
|
+
# `run_digest` / `_render_blob_for_path` emit the rule immediately before
|
|
857
|
+
# `handler.render_*(card)`, so a separator only ever precedes a card that
|
|
858
|
+
# reaches its render call. Skipped/empty/errored paths never trigger a rule.
|
|
859
|
+
|
|
860
|
+
_INTER_CARD_RULE = "─" * 40
|
|
861
|
+
_ZEEK_NDJSON_DNS_LINE = (
|
|
862
|
+
'{"_path": "dns", "ts": 1779750000.0, "id.orig_h": "192.0.2.10",'
|
|
863
|
+
' "id.resp_h": "198.51.100.20", "id.resp_p": 53, "proto": "udp",'
|
|
864
|
+
' "query": "example.test", "qtype": 1, "rcode": 0}\n'
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
def test_inter_card_separator_single_card_run_draws_no_rule(
|
|
869
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
870
|
+
) -> None:
|
|
871
|
+
"""One positional → one card → no separator anywhere."""
|
|
872
|
+
_stub_config(monkeypatch)
|
|
873
|
+
conn = tmp_path / "conn.log"
|
|
874
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
875
|
+
|
|
876
|
+
rc = cli._main(["digest", str(conn)])
|
|
877
|
+
|
|
878
|
+
captured = capsys.readouterr()
|
|
879
|
+
assert rc == 0
|
|
880
|
+
assert _INTER_CARD_RULE not in captured.out
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
def test_inter_card_separator_two_schema_cards_get_one_rule_between(
|
|
884
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
885
|
+
) -> None:
|
|
886
|
+
"""Two rendered schema cards → exactly one rule between, none before
|
|
887
|
+
the first or after the last."""
|
|
888
|
+
_stub_config(monkeypatch)
|
|
889
|
+
a = tmp_path / "a.log"
|
|
890
|
+
a.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
891
|
+
b = tmp_path / "b.log"
|
|
892
|
+
b.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
893
|
+
|
|
894
|
+
rc = cli._main(["digest", str(a), str(b)])
|
|
895
|
+
|
|
896
|
+
captured = capsys.readouterr()
|
|
897
|
+
assert rc == 0
|
|
898
|
+
assert captured.out.count(_INTER_CARD_RULE) == 1
|
|
899
|
+
# The first emitted line is identity-line-1 of card 1 (no leading rule).
|
|
900
|
+
assert captured.out.splitlines()[0] == "a.log"
|
|
901
|
+
# Output does not end with a trailing rule.
|
|
902
|
+
assert not captured.out.rstrip("\n").endswith(_INTER_CARD_RULE)
|
|
903
|
+
|
|
904
|
+
|
|
905
|
+
def test_inter_card_separator_skipped_path_does_not_get_rule(
|
|
906
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
907
|
+
) -> None:
|
|
908
|
+
"""An empty positional sitting BETWEEN two valid paths produces
|
|
909
|
+
exactly one rule, placed correctly (not adjacent to the empty path)."""
|
|
910
|
+
_stub_config(monkeypatch)
|
|
911
|
+
a = tmp_path / "a.log"
|
|
912
|
+
a.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
913
|
+
empty = tmp_path / "empty.log"
|
|
914
|
+
empty.write_text("", encoding="utf-8")
|
|
915
|
+
b = tmp_path / "b.log"
|
|
916
|
+
b.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
917
|
+
|
|
918
|
+
rc = cli._main(["digest", str(a), str(empty), str(b)])
|
|
919
|
+
|
|
920
|
+
captured = capsys.readouterr()
|
|
921
|
+
assert rc == 0
|
|
922
|
+
# One rule total — between the two rendered cards. The empty path
|
|
923
|
+
# was skipped before any render-commit, so no separator fired for it.
|
|
924
|
+
assert captured.out.count(_INTER_CARD_RULE) == 1
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
def test_inter_card_separator_schema_to_blob_top_level(
|
|
928
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
929
|
+
) -> None:
|
|
930
|
+
"""Schema card followed by a top-level blob (sniff floor) → one rule."""
|
|
931
|
+
_stub_config(monkeypatch)
|
|
932
|
+
conn = tmp_path / "a.log"
|
|
933
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
934
|
+
unknown = tmp_path / "mystery.txt"
|
|
935
|
+
unknown.write_text("alpha beta gamma\ndelta epsilon\n" * 50, encoding="utf-8")
|
|
936
|
+
|
|
937
|
+
rc = cli._main(["digest", str(conn), str(unknown)])
|
|
938
|
+
|
|
939
|
+
captured = capsys.readouterr()
|
|
940
|
+
assert rc == 0
|
|
941
|
+
assert captured.out.count(_INTER_CARD_RULE) == 1
|
|
942
|
+
|
|
943
|
+
|
|
944
|
+
def test_inter_card_separator_schema_to_internal_blob_fallback(
|
|
945
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
946
|
+
) -> None:
|
|
947
|
+
"""Schema card followed by a summariser-failure blob fallback → exactly
|
|
948
|
+
one rule (single owner: _render_blob_for_path emits, run_digest does
|
|
949
|
+
not double-emit on the fallback arm)."""
|
|
950
|
+
_stub_config(monkeypatch)
|
|
951
|
+
a = tmp_path / "a.log"
|
|
952
|
+
a.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
953
|
+
b = tmp_path / "b.log"
|
|
954
|
+
b.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
955
|
+
|
|
956
|
+
# Flake the SECOND summariser call so the first card renders normally
|
|
957
|
+
# and the second falls back to a blob.
|
|
958
|
+
from loghunter import digest as _digest_pkg
|
|
959
|
+
real_get = _digest_pkg.get_summarizer
|
|
960
|
+
call_n = {"n": 0}
|
|
961
|
+
|
|
962
|
+
def _flaky_get(schema_name: str):
|
|
963
|
+
def _wrap(*a, **kw):
|
|
964
|
+
call_n["n"] += 1
|
|
965
|
+
if call_n["n"] == 2:
|
|
966
|
+
raise RuntimeError("induced summariser failure")
|
|
967
|
+
return real_get(schema_name)(*a, **kw)
|
|
968
|
+
return _wrap
|
|
969
|
+
|
|
970
|
+
monkeypatch.setattr("loghunter.digest.get_summarizer", _flaky_get)
|
|
971
|
+
|
|
972
|
+
rc = cli._main(["digest", str(a), str(b)])
|
|
973
|
+
|
|
974
|
+
captured = capsys.readouterr()
|
|
975
|
+
assert rc == 0
|
|
976
|
+
assert captured.out.count(_INTER_CARD_RULE) == 1
|
|
977
|
+
|
|
978
|
+
|
|
979
|
+
# ─── Loader-progress suppression on multi-file fan-out ──────────────────────
|
|
980
|
+
|
|
981
|
+
|
|
982
|
+
def test_digest_single_positional_keeps_loader_progress(
|
|
983
|
+
tmp_path: Path, monkeypatch,
|
|
984
|
+
) -> None:
|
|
985
|
+
"""A single-positional digest still wants the loader bar — nothing
|
|
986
|
+
renders below it to pollute, and a large log is exactly when feedback
|
|
987
|
+
matters. run_digest must receive show_progress=True."""
|
|
988
|
+
_stub_config(monkeypatch)
|
|
989
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
990
|
+
|
|
991
|
+
conn = tmp_path / "conn.log"
|
|
992
|
+
conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
993
|
+
|
|
994
|
+
rc = cli._main(["digest", str(conn)])
|
|
995
|
+
|
|
996
|
+
assert rc == 0
|
|
997
|
+
assert len(calls) == 1
|
|
998
|
+
assert calls[0]["show_progress"] is True
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
def test_digest_multi_positional_suppresses_loader_progress(
|
|
1002
|
+
tmp_path: Path, monkeypatch,
|
|
1003
|
+
) -> None:
|
|
1004
|
+
"""Multi-positional fan-out: every card receives show_progress=False so
|
|
1005
|
+
the loader's leave=True bar can't interleave between a rendered card and
|
|
1006
|
+
the next card's separator. Suppress batch-wide (not just subsequent
|
|
1007
|
+
cards) — in a batch the cards are the whole report."""
|
|
1008
|
+
_stub_config(monkeypatch)
|
|
1009
|
+
calls = _spy_run_digest_calls(monkeypatch)
|
|
1010
|
+
|
|
1011
|
+
a = tmp_path / "a.log"
|
|
1012
|
+
a.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
1013
|
+
b = tmp_path / "b.log"
|
|
1014
|
+
b.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
1015
|
+
|
|
1016
|
+
rc = cli._main(["digest", str(a), str(b)])
|
|
1017
|
+
|
|
1018
|
+
assert rc == 0
|
|
1019
|
+
assert len(calls) == 2
|
|
1020
|
+
assert all(c["show_progress"] is False for c in calls)
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
def test_inter_card_separator_out_concatenation_matches_stdout_fanout(
|
|
1024
|
+
tmp_path: Path, monkeypatch,
|
|
1025
|
+
) -> None:
|
|
1026
|
+
"""`--out` concatenation produces the same separator behavior as the
|
|
1027
|
+
stdout fan-out — one rule between two rendered cards, none at edges."""
|
|
1028
|
+
_stub_config(monkeypatch)
|
|
1029
|
+
a = tmp_path / "a.log"
|
|
1030
|
+
a.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
1031
|
+
b = tmp_path / "b.log"
|
|
1032
|
+
b.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
|
|
1033
|
+
out = tmp_path / "digest.txt"
|
|
1034
|
+
|
|
1035
|
+
rc = cli._main(["digest", str(a), str(b), f"--out={out}"])
|
|
1036
|
+
|
|
1037
|
+
assert rc == 0
|
|
1038
|
+
content = out.read_text(encoding="utf-8")
|
|
1039
|
+
assert content.count(_INTER_CARD_RULE) == 1
|
|
1040
|
+
assert not content.rstrip("\n").endswith(_INTER_CARD_RULE)
|