loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
tests/test_config_cli.py
ADDED
|
@@ -0,0 +1,1006 @@
|
|
|
1
|
+
"""Tests for config defaults and CLI user-facing errors."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
import tomllib
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from loghunter import cli
|
|
12
|
+
from loghunter.cli import _runner_kwargs
|
|
13
|
+
from loghunter.common import config as cfg
|
|
14
|
+
from loghunter.detectors import dns
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_detector_defaults_are_owned_by_detector_modules(
|
|
18
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
19
|
+
) -> None:
|
|
20
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
21
|
+
config = cfg.load(config_file=None)
|
|
22
|
+
|
|
23
|
+
assert config["detectors"] == {}
|
|
24
|
+
assert cfg.get_detector_config(config, "dns", dns.DEFAULT_CONFIG) == dns.DEFAULT_CONFIG
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_detector_config_overrides_detector_defaults() -> None:
|
|
28
|
+
config = {"detectors": {"dns": {"min_cluster_size": 42}}}
|
|
29
|
+
|
|
30
|
+
merged = cfg.get_detector_config(config, "dns", dns.DEFAULT_CONFIG)
|
|
31
|
+
|
|
32
|
+
assert merged["min_cluster_size"] == 42
|
|
33
|
+
assert merged["min_samples"] == dns.DEFAULT_CONFIG["min_samples"]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_cli_formats_missing_config_file_as_actionable_error(
|
|
37
|
+
capsys: pytest.CaptureFixture[str],
|
|
38
|
+
tmp_path,
|
|
39
|
+
) -> None:
|
|
40
|
+
missing = tmp_path / "missing.toml"
|
|
41
|
+
with pytest.raises(SystemExit) as exc:
|
|
42
|
+
cli.main([f"--config={missing}", "--dry-run"])
|
|
43
|
+
|
|
44
|
+
assert exc.value.code == 1
|
|
45
|
+
captured = capsys.readouterr()
|
|
46
|
+
assert "loghunter: Config file not found" in captured.err
|
|
47
|
+
assert "run 'loghunter init' to create a config" in captured.err
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_cli_formats_bad_since_as_usage_error(
|
|
51
|
+
capsys: pytest.CaptureFixture[str],
|
|
52
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
53
|
+
) -> None:
|
|
54
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
55
|
+
with pytest.raises(SystemExit) as exc:
|
|
56
|
+
cli.main(["--since=tomorrow", "--dry-run"])
|
|
57
|
+
|
|
58
|
+
assert exc.value.code == 1
|
|
59
|
+
captured = capsys.readouterr()
|
|
60
|
+
assert "loghunter: --since expects a date like 2026-05-01" in captured.err
|
|
61
|
+
assert "Run 'loghunter --help' for usage." in captured.err
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_cli_formats_bad_days_as_usage_error(
|
|
65
|
+
capsys: pytest.CaptureFixture[str],
|
|
66
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
67
|
+
) -> None:
|
|
68
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
69
|
+
with pytest.raises(SystemExit) as exc:
|
|
70
|
+
cli.main(["--days=soon", "--dry-run"])
|
|
71
|
+
|
|
72
|
+
assert exc.value.code == 1
|
|
73
|
+
captured = capsys.readouterr()
|
|
74
|
+
assert "loghunter: --days expects a range like 3-5" in captured.err
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_cli_formats_unknown_output_as_usage_error(
|
|
78
|
+
capsys: pytest.CaptureFixture[str],
|
|
79
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
80
|
+
tmp_path,
|
|
81
|
+
) -> None:
|
|
82
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
83
|
+
zeek_dir = tmp_path / "zeek"
|
|
84
|
+
zeek_dir.mkdir()
|
|
85
|
+
(zeek_dir / "conn.log").write_text("", encoding="utf-8")
|
|
86
|
+
|
|
87
|
+
with pytest.raises(SystemExit) as exc:
|
|
88
|
+
cli.main([f"--zeek-dir={zeek_dir}", "--output=bogus"])
|
|
89
|
+
|
|
90
|
+
assert exc.value.code == 1
|
|
91
|
+
captured = capsys.readouterr()
|
|
92
|
+
assert "loghunter: Unknown output format 'bogus'." in captured.err
|
|
93
|
+
assert "Available formats:" in captured.err
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_runner_kwargs_pihole_dir_arg(tmp_path: Path) -> None:
|
|
97
|
+
"""--pihole-dir=PATH (parsed as pihole_dir key) flows through as the raw
|
|
98
|
+
string. The CLI does NOT resolve source-dir strings — that's the
|
|
99
|
+
resolver's job (covered by tests/test_sources.py). _runner_kwargs is
|
|
100
|
+
pure pass-through here."""
|
|
101
|
+
pihole = tmp_path / "pihole"
|
|
102
|
+
pihole.mkdir()
|
|
103
|
+
parsed = {"pihole_dir": str(pihole)}
|
|
104
|
+
kwargs = _runner_kwargs(parsed, config={})
|
|
105
|
+
assert kwargs["pihole_dir"] == str(pihole)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_runner_kwargs_cloudtrail_dir_arg(tmp_path: Path) -> None:
|
|
109
|
+
"""--cloudtrail-dir=PATH flows through as the raw string."""
|
|
110
|
+
cloudtrail = tmp_path / "ct"
|
|
111
|
+
cloudtrail.mkdir()
|
|
112
|
+
parsed = {"cloudtrail_dir": str(cloudtrail)}
|
|
113
|
+
kwargs = _runner_kwargs(parsed, config={})
|
|
114
|
+
assert kwargs["cloudtrail_dir"] == str(cloudtrail)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def test_runner_kwargs_none_when_flag_absent() -> None:
|
|
118
|
+
"""No flag → None override; the resolver decides whether to config-fill.
|
|
119
|
+
Replaces the old _from_config / unconfigured tests — that logic now lives
|
|
120
|
+
in resolve_sources (covered by tests/test_sources.py)."""
|
|
121
|
+
kwargs = _runner_kwargs({}, config={"loghunter": {"cloudtrail_dir": "/cfg/ct"}})
|
|
122
|
+
# CLI seam passes None for absent flags regardless of config — the runner
|
|
123
|
+
# routes the override+config into resolve_sources.
|
|
124
|
+
assert kwargs["cloudtrail_dir"] is None
|
|
125
|
+
assert kwargs["zeek_dir"] is None
|
|
126
|
+
assert kwargs["syslog_dir"] is None
|
|
127
|
+
assert kwargs["pihole_dir"] is None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def test_usage_advertises_cloudtrail_dir(capsys) -> None:
|
|
131
|
+
"""First-run / --help usage must mention --cloudtrail-dir alongside the other
|
|
132
|
+
source-dir flags."""
|
|
133
|
+
cli._print_usage()
|
|
134
|
+
out = capsys.readouterr().out
|
|
135
|
+
assert "--cloudtrail-dir" in out
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# ── Bare source-dir flag (no =value) → actionable CLI error ──────────────────
|
|
139
|
+
#
|
|
140
|
+
# _parse_args records a bare ``--zeek-dir`` (no =value) as
|
|
141
|
+
# parsed["zeek_dir"] = True. Pre-fix, ``Path(True)`` downstream raised a raw
|
|
142
|
+
# TypeError that escaped the CLI error boundary as a traceback. Post-fix,
|
|
143
|
+
# _coerce_source_dir catches the boolean at the seam and raises an actionable
|
|
144
|
+
# ``loghunter:`` ValueError with exit 1.
|
|
145
|
+
|
|
146
|
+
@pytest.mark.parametrize(
|
|
147
|
+
"flag", ["--zeek-dir", "--syslog-dir", "--pihole-dir", "--cloudtrail-dir"],
|
|
148
|
+
)
|
|
149
|
+
def test_bare_source_dir_flag_in_detect_raises_actionable_error(
|
|
150
|
+
capsys: pytest.CaptureFixture[str],
|
|
151
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
152
|
+
flag: str,
|
|
153
|
+
) -> None:
|
|
154
|
+
"""Bare ``--<source>-dir`` (no =value) on the detect route produces an
|
|
155
|
+
actionable ``loghunter:`` message and exit 1 — no raw TypeError."""
|
|
156
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
157
|
+
with pytest.raises(SystemExit) as exc:
|
|
158
|
+
cli.main([flag, "--dry-run"])
|
|
159
|
+
|
|
160
|
+
assert exc.value.code == 1
|
|
161
|
+
captured = capsys.readouterr()
|
|
162
|
+
assert f"loghunter: {flag} needs a value: {flag}=…" in captured.err
|
|
163
|
+
assert "Run 'loghunter --help' for usage." in captured.err
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def test_bare_zeek_dir_flag_in_bare_digest_raises_actionable_error(
|
|
167
|
+
capsys: pytest.CaptureFixture[str],
|
|
168
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
169
|
+
) -> None:
|
|
170
|
+
"""Bare ``digest --zeek-dir`` (no positional) takes the bare-digest path
|
|
171
|
+
where ``--zeek-dir`` is the only source-dir flag in
|
|
172
|
+
_DIGEST_ALLOWED_LONG_FLAGS. Same actionable shape as the detect route.
|
|
173
|
+
|
|
174
|
+
The other three source-dir flags (--pihole-dir, --syslog-dir,
|
|
175
|
+
--cloudtrail-dir) are intentionally NOT in the digest allow-list — they
|
|
176
|
+
raise "unknown digest flag --…" via the existing _validate_digest_flags
|
|
177
|
+
rail and that behaviour is preserved (the digest CLI surface stays
|
|
178
|
+
narrow per CODE.md).
|
|
179
|
+
"""
|
|
180
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
181
|
+
with pytest.raises(SystemExit) as exc:
|
|
182
|
+
cli.main(["digest", "--zeek-dir"])
|
|
183
|
+
|
|
184
|
+
assert exc.value.code == 1
|
|
185
|
+
captured = capsys.readouterr()
|
|
186
|
+
assert "loghunter: --zeek-dir needs a value: --zeek-dir=…" in captured.err
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def test_bare_value_flag_with_short_form_mentions_short(
|
|
190
|
+
capsys: pytest.CaptureFixture[str],
|
|
191
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
192
|
+
) -> None:
|
|
193
|
+
"""A value-taking flag that has a short form mentions both spellings."""
|
|
194
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
195
|
+
with pytest.raises(SystemExit) as exc:
|
|
196
|
+
cli.main(["--out"])
|
|
197
|
+
|
|
198
|
+
assert exc.value.code == 1
|
|
199
|
+
captured = capsys.readouterr()
|
|
200
|
+
assert "loghunter: --out (-o) needs a value: -o=… or --out=…" in captured.err
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
# The previous two mock-the-seam dns routing tests were DELETED. Their
|
|
204
|
+
# intent (content-sniff routes Zeek dns → zeek_dir, Pi-hole → pihole_dir)
|
|
205
|
+
# lives in tests/test_sources.py:
|
|
206
|
+
# - test_router_dns_pihole_content_under_neutral_name (the key locking
|
|
207
|
+
# content-not-name routing — fixture name does NOT match pihole*.log*)
|
|
208
|
+
# - test_router_dns_zeek_content_routes_to_zeek_dir
|
|
209
|
+
# The end-to-end scope rail (sibling source-dirs stay unloaded across the
|
|
210
|
+
# CLI ↔ runner seam) is locked by tests/test_source_resolution_seam.py.
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def test_config_example_is_valid_toml() -> None:
|
|
214
|
+
path = Path("loghunter/data/config_example.toml")
|
|
215
|
+
|
|
216
|
+
with path.open("rb") as fh:
|
|
217
|
+
parsed = tomllib.load(fh)
|
|
218
|
+
|
|
219
|
+
assert parsed["allowlist"]["domain_patterns"] == ["~/.loghunter/allowlist.d/domains_user.txt"]
|
|
220
|
+
assert parsed["allowlist"]["connection_rules"] == ["~/.loghunter/allowlist.d/connections.txt"]
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
# ── Stage 4: default_window + --all ───────────────────────────────────────────
|
|
224
|
+
|
|
225
|
+
from datetime import timedelta
|
|
226
|
+
|
|
227
|
+
from loghunter.common.config import parse_window_span
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def test_default_window_in_config_defaults() -> None:
|
|
231
|
+
config = cfg.load(None)
|
|
232
|
+
assert config["loghunter"]["default_window"] == "1d"
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def test_invalid_default_window_raises_at_load(tmp_path: Path) -> None:
|
|
236
|
+
cfg_file = tmp_path / "lh.toml"
|
|
237
|
+
cfg_file.write_text('[loghunter]\ndefault_window = "1week"\n', encoding="utf-8")
|
|
238
|
+
with pytest.raises(cfg.ConfigError, match="not a valid duration"):
|
|
239
|
+
cfg.load(cfg_file)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def test_zero_default_window_raises_at_load(tmp_path: Path) -> None:
|
|
243
|
+
cfg_file = tmp_path / "lh.toml"
|
|
244
|
+
cfg_file.write_text('[loghunter]\ndefault_window = "0d"\n', encoding="utf-8")
|
|
245
|
+
with pytest.raises(cfg.ConfigError):
|
|
246
|
+
cfg.load(cfg_file)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def test_empty_string_default_window_loads_cleanly(tmp_path: Path) -> None:
|
|
250
|
+
cfg_file = tmp_path / "lh.toml"
|
|
251
|
+
cfg_file.write_text('[loghunter]\ndefault_window = ""\n', encoding="utf-8")
|
|
252
|
+
config = cfg.load(cfg_file)
|
|
253
|
+
assert config["loghunter"]["default_window"] == ""
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def test_all_keyword_default_window_loads_cleanly(tmp_path: Path) -> None:
|
|
257
|
+
cfg_file = tmp_path / "lh.toml"
|
|
258
|
+
cfg_file.write_text('[loghunter]\ndefault_window = "all"\n', encoding="utf-8")
|
|
259
|
+
config = cfg.load(cfg_file)
|
|
260
|
+
assert config["loghunter"]["default_window"] == "all"
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def test_parse_window_span_days() -> None:
|
|
264
|
+
assert parse_window_span("1d") == timedelta(days=1)
|
|
265
|
+
assert parse_window_span("7d") == timedelta(days=7)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def test_parse_window_span_hours() -> None:
|
|
269
|
+
assert parse_window_span("24h") == timedelta(hours=24)
|
|
270
|
+
assert parse_window_span("12h") == timedelta(hours=12)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def test_parse_window_span_empty_and_all_disable() -> None:
|
|
274
|
+
assert parse_window_span(None) is None
|
|
275
|
+
assert parse_window_span("") is None
|
|
276
|
+
assert parse_window_span("all") is None
|
|
277
|
+
assert parse_window_span("ALL") is None
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def test_parse_window_span_invalid_raises() -> None:
|
|
281
|
+
with pytest.raises(cfg.ConfigError):
|
|
282
|
+
parse_window_span("nonsense")
|
|
283
|
+
with pytest.raises(cfg.ConfigError):
|
|
284
|
+
parse_window_span("7days")
|
|
285
|
+
with pytest.raises(cfg.ConfigError):
|
|
286
|
+
parse_window_span("-1d")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def test_runner_kwargs_all_with_since_raises() -> None:
|
|
290
|
+
with pytest.raises(ValueError, match="--all cannot be combined"):
|
|
291
|
+
_runner_kwargs({"all": True, "since": "7d"}, config={})
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def test_runner_kwargs_all_with_until_raises() -> None:
|
|
295
|
+
with pytest.raises(ValueError, match="--all cannot be combined"):
|
|
296
|
+
_runner_kwargs({"all": True, "until": "2026-01-01"}, config={})
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def test_runner_kwargs_all_with_days_raises() -> None:
|
|
300
|
+
with pytest.raises(ValueError, match="--all cannot be combined"):
|
|
301
|
+
_runner_kwargs({"all": True, "days": "3-5"}, config={})
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def test_runner_kwargs_all_with_hours_raises() -> None:
|
|
305
|
+
with pytest.raises(ValueError, match="--all cannot be combined"):
|
|
306
|
+
_runner_kwargs({"all": True, "hours": "2-6"}, config={})
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def test_runner_kwargs_all_flag_sets_load_all() -> None:
|
|
310
|
+
kwargs = _runner_kwargs({"all": True}, config={})
|
|
311
|
+
assert kwargs["load_all"] is True
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def test_runner_kwargs_no_all_flag_sets_load_all_false() -> None:
|
|
315
|
+
kwargs = _runner_kwargs({}, config={})
|
|
316
|
+
assert kwargs["load_all"] is False
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
# ── --yes / -y wiring ─────────────────────────────────────────────────────────
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def test_parse_args_recognizes_long_yes() -> None:
|
|
323
|
+
"""--yes is a bool flag on every verb that allows it (analyze allows it)."""
|
|
324
|
+
result = cli._parse_args(["--yes"], "")
|
|
325
|
+
assert result.get("yes") is True
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def test_parse_args_recognizes_short_y() -> None:
|
|
329
|
+
"""-y is the canonical short for --yes (allowed on analyze)."""
|
|
330
|
+
result = cli._parse_args(["-y"], "")
|
|
331
|
+
assert result.get("yes") is True
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def test_parse_args_rejects_unknown_short_flag() -> None:
|
|
335
|
+
"""Unknown short flags now RAISE — the old silent-ignore behavior is gone."""
|
|
336
|
+
with pytest.raises(ValueError, match="unknown flag -x"):
|
|
337
|
+
cli._parse_args(["-x", "PATH"], "")
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def test_parse_args_rejects_unknown_long_flag() -> None:
|
|
341
|
+
with pytest.raises(ValueError, match="unknown flag --foo"):
|
|
342
|
+
cli._parse_args(["--foo", "PATH"], "")
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def test_parse_args_captures_path_and_paths() -> None:
|
|
346
|
+
"""Both ``path`` (first positional) and ``paths`` (full list) populate."""
|
|
347
|
+
result = cli._parse_args(["a.log", "b.log"], "digest")
|
|
348
|
+
assert result["path"] == "a.log"
|
|
349
|
+
assert result["paths"] == ["a.log", "b.log"]
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def test_parse_args_wrong_verb_long_form_lead_spelling() -> None:
|
|
353
|
+
"""``digest --detect`` reports wrong-verb with the long-form lead."""
|
|
354
|
+
with pytest.raises(ValueError, match=r"--detect \(-d\) is not valid for digest"):
|
|
355
|
+
cli._parse_args(["--detect=all"], "digest")
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def test_parse_args_wrong_verb_short_form_lead_spelling() -> None:
|
|
359
|
+
"""``digest -d`` reports wrong-verb with the short-form lead."""
|
|
360
|
+
with pytest.raises(ValueError, match=r"-d \(--detect\) is not valid for digest"):
|
|
361
|
+
cli._parse_args(["-d=all"], "digest")
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def test_parse_args_wrong_verb_beats_value_shape_for_bare_short() -> None:
|
|
365
|
+
"""Validation order: wrong-verb wins over needs-a-value for ``digest -d``."""
|
|
366
|
+
with pytest.raises(ValueError, match=r"-d \(--detect\) is not valid for digest"):
|
|
367
|
+
cli._parse_args(["-d"], "digest")
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def test_parse_args_wrong_verb_beats_value_shape_for_bare_long() -> None:
|
|
371
|
+
"""Same as above for ``digest --detect``."""
|
|
372
|
+
with pytest.raises(ValueError, match=r"--detect \(-d\) is not valid for digest"):
|
|
373
|
+
cli._parse_args(["--detect"], "digest")
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def test_parse_args_value_on_bool_raises_long() -> None:
|
|
377
|
+
with pytest.raises(ValueError, match=r"--verbose \(-v\) takes no value"):
|
|
378
|
+
cli._parse_args(["--verbose=1"], "")
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def test_parse_args_value_on_bool_raises_short() -> None:
|
|
382
|
+
with pytest.raises(ValueError, match=r"--verbose \(-v\) takes no value"):
|
|
383
|
+
cli._parse_args(["-v=1"], "")
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def test_parse_args_bundling_known_shorts_suggests_separation() -> None:
|
|
387
|
+
with pytest.raises(ValueError, match="short flags can't be combined"):
|
|
388
|
+
cli._parse_args(["-vy"], "")
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def test_parse_args_bundling_unknown_short_is_plain_unknown() -> None:
|
|
392
|
+
with pytest.raises(ValueError, match="unknown flag -vq"):
|
|
393
|
+
cli._parse_args(["-vq"], "")
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def test_parse_args_help_eq_value_is_takes_no_value() -> None:
|
|
397
|
+
"""``--help=foo`` is NOT a help short-circuit — strict parser rejects it."""
|
|
398
|
+
with pytest.raises(ValueError, match=r"--help \(-h\) takes no value"):
|
|
399
|
+
cli._parse_args(["--help=foo"], "")
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def test_parse_args_duplicate_flag_last_wins() -> None:
|
|
403
|
+
"""A repeated value flag stays single-valued; last write wins."""
|
|
404
|
+
result = cli._parse_args(["--out=a", "--out=b"], "")
|
|
405
|
+
assert result["out"] == "b"
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
# ── W3: -vv literal token + verbose-level resolution ─────────────────────────
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def test_parse_args_short_v_sets_verbose_true() -> None:
|
|
412
|
+
result = cli._parse_args(["-v"], "")
|
|
413
|
+
assert result.get("verbose") is True
|
|
414
|
+
assert "verbose_level" not in result
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def test_parse_args_long_verbose_sets_verbose_true() -> None:
|
|
418
|
+
result = cli._parse_args(["--verbose"], "")
|
|
419
|
+
assert result.get("verbose") is True
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def test_parse_args_literal_vv_sets_verbose_level_two() -> None:
|
|
423
|
+
"""`-vv` is recognized as an explicit literal token BEFORE the bundling
|
|
424
|
+
refusal fires (regression against the old `pass separately` error)."""
|
|
425
|
+
result = cli._parse_args(["-vv"], "")
|
|
426
|
+
assert result.get("verbose_level") == 2
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def test_parse_args_combined_v_and_vv_resolves_to_level_two() -> None:
|
|
430
|
+
"""Last-wins duplication: `-v -vv` resolves to 2 via _resolve_verbose_level."""
|
|
431
|
+
parsed = cli._parse_args(["-v", "-vv"], "")
|
|
432
|
+
assert cli._resolve_verbose_level(parsed) == 2
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def test_parse_args_vvv_still_rejected_as_bundling() -> None:
|
|
436
|
+
"""`-vvv` is not a registered literal — falls through to the bundling
|
|
437
|
+
refusal lattice with the existing pass-separately message."""
|
|
438
|
+
with pytest.raises(ValueError, match="short flags can't be combined"):
|
|
439
|
+
cli._parse_args(["-vvv"], "")
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def test_parse_args_vy_still_rejected_as_bundling() -> None:
|
|
443
|
+
"""`-vy` is not the literal `-vv` and still hits bundling refusal."""
|
|
444
|
+
with pytest.raises(ValueError, match="short flags can't be combined"):
|
|
445
|
+
cli._parse_args(["-vy"], "")
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def test_parse_args_vv_wrong_verb_matches_v_error_shape() -> None:
|
|
449
|
+
"""`init` disallows verbose; `-vv` raises the SAME wrong-verb error shape
|
|
450
|
+
as `-v` would. Validation order: identity → verb-membership → value-shape."""
|
|
451
|
+
with pytest.raises(ValueError, match=r"-v \(--verbose\) is not valid for init"):
|
|
452
|
+
cli._parse_args(["-vv"], "init")
|
|
453
|
+
# And the parity check on -v:
|
|
454
|
+
with pytest.raises(ValueError, match=r"-v \(--verbose\) is not valid for init"):
|
|
455
|
+
cli._parse_args(["-v"], "init")
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def test_resolve_verbose_level_collapses_states() -> None:
|
|
459
|
+
"""none → 0; -v → 1; -vv → 2; combined → 2."""
|
|
460
|
+
assert cli._resolve_verbose_level({}) == 0
|
|
461
|
+
assert cli._resolve_verbose_level({"verbose": True}) == 1
|
|
462
|
+
assert cli._resolve_verbose_level({"verbose_level": 2}) == 2
|
|
463
|
+
assert cli._resolve_verbose_level({"verbose": True, "verbose_level": 2}) == 2
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def test_runner_kwargs_yes_flag_sets_skip_confirm() -> None:
|
|
467
|
+
kwargs = _runner_kwargs({"yes": True}, config={})
|
|
468
|
+
assert kwargs.get("skip_confirm") is True
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def test_runner_kwargs_no_yes_flag_skip_confirm_false() -> None:
|
|
472
|
+
kwargs = _runner_kwargs({}, config={})
|
|
473
|
+
assert kwargs.get("skip_confirm") is False
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def test_usage_includes_yes_flag(capsys) -> None:
|
|
477
|
+
"""--help / first-run usage must advertise --yes and -y."""
|
|
478
|
+
cli._print_usage()
|
|
479
|
+
out = capsys.readouterr().out
|
|
480
|
+
assert "--yes" in out
|
|
481
|
+
assert "-y" in out
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def test_yes_threads_to_run_export(monkeypatch, tmp_path: Path) -> None:
|
|
485
|
+
"""`loghunter export <backend> --yes` must reach run_export with skip_confirm=True."""
|
|
486
|
+
captured: dict = {}
|
|
487
|
+
|
|
488
|
+
def _fake_run_export(*args, **kwargs):
|
|
489
|
+
captured.update(kwargs)
|
|
490
|
+
|
|
491
|
+
# _run_export does `from loghunter.exporters import run_export` inside the
|
|
492
|
+
# function — re-binding the attribute on the package is what it picks up.
|
|
493
|
+
monkeypatch.setattr("loghunter.exporters.run_export", _fake_run_export)
|
|
494
|
+
monkeypatch.setattr(
|
|
495
|
+
cfg, "load", lambda _=None: {
|
|
496
|
+
"export": {"splunk": {"host": "192.0.2.20", "port": 8089,
|
|
497
|
+
"query": {"default": {"spl": "x"}}}},
|
|
498
|
+
},
|
|
499
|
+
)
|
|
500
|
+
cli.main(["export", "splunk", "--yes"])
|
|
501
|
+
assert captured.get("skip_confirm") is True
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def test_yes_threads_to_runner_run(monkeypatch, tmp_path: Path) -> None:
|
|
505
|
+
"""A detector invocation with --yes must reach runner.run with skip_confirm=True."""
|
|
506
|
+
captured: dict = {}
|
|
507
|
+
|
|
508
|
+
def _fake_run(**kwargs):
|
|
509
|
+
captured.update(kwargs)
|
|
510
|
+
|
|
511
|
+
monkeypatch.setattr("loghunter.runner.run", _fake_run)
|
|
512
|
+
# Use a single-detector subcommand path that already takes a zeek_dir.
|
|
513
|
+
cli.main(["beacon", f"--zeek-dir={tmp_path}", "--yes"])
|
|
514
|
+
assert captured.get("skip_confirm") is True
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
# ── CLI flag rename: --output-dir → --out (user-facing contract) ─────────────
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def test_usage_advertises_out_not_output_dir(capsys) -> None:
|
|
521
|
+
"""Usage text mentions --out (the new flag) and not --output-dir (the dropped name).
|
|
522
|
+
|
|
523
|
+
We deliberately do NOT test runtime rejection of --output-dir — the generic
|
|
524
|
+
--flag=value parser would still produce an inert `output_dir` key, which is
|
|
525
|
+
harmless dead state. The user-facing contract is the help text.
|
|
526
|
+
"""
|
|
527
|
+
cli._print_usage()
|
|
528
|
+
out = capsys.readouterr().out
|
|
529
|
+
assert "--out" in out
|
|
530
|
+
assert "--output-dir" not in out
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
# ── Thread C — CLI polish for the aws detector ────────────────────────────────
|
|
534
|
+
|
|
535
|
+
# Fix 1: aws subcommand + usage
|
|
536
|
+
|
|
537
|
+
def test_aws_is_a_single_detector_command() -> None:
|
|
538
|
+
"""loghunter aws PATH must be recognized as a single-detector subcommand."""
|
|
539
|
+
assert "aws" in cli._SINGLE_DETECTOR_COMMANDS
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def test_usage_lists_aws_subcommand(capsys) -> None:
|
|
543
|
+
cli._print_usage()
|
|
544
|
+
out = capsys.readouterr().out
|
|
545
|
+
assert "loghunter aws " in out
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def test_usage_lists_duration_subcommand(capsys) -> None:
|
|
549
|
+
"""Regression: catch any future stale-usage drift on duration."""
|
|
550
|
+
cli._print_usage()
|
|
551
|
+
out = capsys.readouterr().out
|
|
552
|
+
assert "loghunter duration " in out
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
# Fix 2: positional PATH → cloudtrail_dir in single-detector mode
|
|
556
|
+
|
|
557
|
+
def test_aws_positional_path_routes_to_cloudtrail_dir(
|
|
558
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
559
|
+
tmp_path: Path,
|
|
560
|
+
) -> None:
|
|
561
|
+
"""loghunter aws PATH routes the positional to cloudtrail_dir, not zeek_dir,
|
|
562
|
+
and the positional scopes the run so siblings stay None."""
|
|
563
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
564
|
+
captured: dict[str, object] = {}
|
|
565
|
+
|
|
566
|
+
def fake_run(**kwargs: object) -> None:
|
|
567
|
+
captured.update(kwargs)
|
|
568
|
+
|
|
569
|
+
monkeypatch.setattr("loghunter.runner.run", fake_run)
|
|
570
|
+
ct_file = tmp_path / "cloudtrail_2026.json.log"
|
|
571
|
+
ct_file.write_text("", encoding="utf-8")
|
|
572
|
+
|
|
573
|
+
cli._run_single_detector("aws", [str(ct_file)])
|
|
574
|
+
|
|
575
|
+
assert captured["detect"] == "aws"
|
|
576
|
+
# CLI now passes raw strings; the resolver owns Path conversion.
|
|
577
|
+
assert captured["cloudtrail_dir"] == str(ct_file)
|
|
578
|
+
assert captured["zeek_dir"] is None
|
|
579
|
+
assert captured["syslog_dir"] is None
|
|
580
|
+
assert captured["pihole_dir"] is None
|
|
581
|
+
# scope rail: positional scopes the run to its routed source.
|
|
582
|
+
assert captured["scope"] == frozenset({"cloudtrail_dir"})
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
# The four source-dir `_tilde_expands` tests were DELETED. ~-expansion of
|
|
586
|
+
# explicit overrides now happens inside common.sources._resolve_one (the
|
|
587
|
+
# sole site for string→Path conversion), tested directly at
|
|
588
|
+
# tests/test_sources.py:test_resolve_sources_tilde_override_expands.
|
|
589
|
+
#
|
|
590
|
+
# The end-to-end `aws ~/path` CLI test is preserved as a seam-style
|
|
591
|
+
# dry-run test in tests/test_source_resolution_seam.py (stage 5).
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def test_runner_kwargs_out_tilde_expands_and_preserves_trailing_slash(
|
|
595
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
596
|
+
tmp_path: Path,
|
|
597
|
+
) -> None:
|
|
598
|
+
"""--out=~/reports/ must expand ~ AND preserve the trailing slash that
|
|
599
|
+
be_like_water needs to fire the directory-intent gate."""
|
|
600
|
+
monkeypatch.setenv("HOME", str(tmp_path))
|
|
601
|
+
|
|
602
|
+
captured: dict[str, str] = {}
|
|
603
|
+
|
|
604
|
+
def fake_be_like_water(target: str):
|
|
605
|
+
captured["target"] = target
|
|
606
|
+
from types import SimpleNamespace
|
|
607
|
+
return SimpleNamespace(is_file=False, path=Path(target))
|
|
608
|
+
|
|
609
|
+
monkeypatch.setattr("loghunter.cli.be_like_water", fake_be_like_water)
|
|
610
|
+
|
|
611
|
+
_runner_kwargs({"out": "~/reports/"}, config={})
|
|
612
|
+
|
|
613
|
+
assert captured["target"] == f"{tmp_path}/reports/"
|
|
614
|
+
assert captured["target"].endswith("/")
|
|
615
|
+
assert "~" not in captured["target"]
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
# The end-to-end ``aws ~/path`` CLI test (~-positional routes to
|
|
619
|
+
# cloudtrail_dir AND expands ~) moved to tests/test_source_resolution_seam.py
|
|
620
|
+
# as a real CLI dry-run seam test. ~-expansion now happens inside
|
|
621
|
+
# common.sources._resolve_one, not at the CLI seam.
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
# Analyze positional → named-detector-source routing.
|
|
625
|
+
#
|
|
626
|
+
# Replaces the deleted wrong-source "hint" scold and its five silence tests
|
|
627
|
+
# (the scold became meaningless once `route_positional_source` does the right
|
|
628
|
+
# thing on its own). The router itself is unit-tested in
|
|
629
|
+
# tests/test_sources.py — this test pins the CLI seam wiring: the positional
|
|
630
|
+
# lands on the detector's REQUIRED_LOGS source and the scope rail keeps
|
|
631
|
+
# siblings unloaded.
|
|
632
|
+
|
|
633
|
+
def test_analyze_positional_reroutes_to_named_detector_source(
|
|
634
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
635
|
+
tmp_path: Path,
|
|
636
|
+
) -> None:
|
|
637
|
+
"""``loghunter --detect=aws PATH`` routes the positional to cloudtrail_dir
|
|
638
|
+
(the detector's REQUIRED_LOGS source) and the scope rail keeps siblings
|
|
639
|
+
None — even with a config that sets zeek_dir."""
|
|
640
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
641
|
+
captured: dict[str, object] = {}
|
|
642
|
+
monkeypatch.setattr("loghunter.runner.run", lambda **kw: captured.update(kw))
|
|
643
|
+
monkeypatch.setattr(cfg, "load", lambda _=None: {
|
|
644
|
+
"loghunter": {"zeek_dir": str(tmp_path / "should-not-be-loaded")},
|
|
645
|
+
})
|
|
646
|
+
|
|
647
|
+
fake_path = tmp_path / "events.json.log"
|
|
648
|
+
fake_path.write_text("", encoding="utf-8")
|
|
649
|
+
cli._run_all_detectors([f"--detect=aws", str(fake_path)])
|
|
650
|
+
|
|
651
|
+
assert captured["cloudtrail_dir"] == str(fake_path)
|
|
652
|
+
assert captured["zeek_dir"] is None
|
|
653
|
+
assert captured["scope"] == frozenset({"cloudtrail_dir"})
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
# ── top-level KeyboardInterrupt handler ──────────────────────────────────────
|
|
657
|
+
#
|
|
658
|
+
# Two Ctrl-C moments coexist:
|
|
659
|
+
# 1. mid-run (load, detect, digest, export compute) → cli.main()'s new arm
|
|
660
|
+
# prints "Stopped." to stderr and exits 130.
|
|
661
|
+
# 2. at the records-found "Continue? [y/N]" prompt in runner.py → the
|
|
662
|
+
# existing (EOFError, KeyboardInterrupt) handler raises ExportAborted,
|
|
663
|
+
# which cli.main() catches and exits 0 with the "aborted by user"
|
|
664
|
+
# message on stdout. Locking both halves prevents a future refactor
|
|
665
|
+
# from collapsing them.
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
def _write_tiny_zeek_dir(tmp_path: Path) -> Path:
|
|
669
|
+
"""Write a two-row flat Zeek conn.log just rich enough to load.
|
|
670
|
+
|
|
671
|
+
Kept local to this module so test_config_cli stays independent of
|
|
672
|
+
test_runner's fixture helpers.
|
|
673
|
+
"""
|
|
674
|
+
import json
|
|
675
|
+
from datetime import datetime, timezone
|
|
676
|
+
|
|
677
|
+
zeek_dir = tmp_path / "zeek"
|
|
678
|
+
zeek_dir.mkdir()
|
|
679
|
+
rows = [
|
|
680
|
+
{
|
|
681
|
+
"ts": datetime(2026, 1, 1, tzinfo=timezone.utc).timestamp(),
|
|
682
|
+
"id.orig_h": "192.0.2.10",
|
|
683
|
+
"id.resp_h": "198.51.100.20",
|
|
684
|
+
"id.resp_p": 443,
|
|
685
|
+
"proto": "tcp",
|
|
686
|
+
},
|
|
687
|
+
{
|
|
688
|
+
"ts": datetime(2026, 1, 5, tzinfo=timezone.utc).timestamp(),
|
|
689
|
+
"id.orig_h": "192.0.2.10",
|
|
690
|
+
"id.resp_h": "198.51.100.20",
|
|
691
|
+
"id.resp_p": 443,
|
|
692
|
+
"proto": "tcp",
|
|
693
|
+
},
|
|
694
|
+
]
|
|
695
|
+
(zeek_dir / "conn.log").write_text(
|
|
696
|
+
"\n".join(json.dumps(r) for r in rows) + "\n", encoding="utf-8"
|
|
697
|
+
)
|
|
698
|
+
return zeek_dir
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
def test_cli_top_level_keyboard_interrupt_exits_cleanly(
|
|
702
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
703
|
+
capsys: pytest.CaptureFixture[str],
|
|
704
|
+
) -> None:
|
|
705
|
+
"""Ctrl-C during compute work → 'Stopped.' on stderr, exit 130, no traceback.
|
|
706
|
+
|
|
707
|
+
Non-TTY stderr (capsys's captured stream) — byte-exact "Stopped.\\n", no
|
|
708
|
+
leading blank line. Script/log capture must see the same string today and
|
|
709
|
+
after the TTY-only blank-line polish.
|
|
710
|
+
"""
|
|
711
|
+
def _raise_kbd(_argv=None):
|
|
712
|
+
raise KeyboardInterrupt
|
|
713
|
+
|
|
714
|
+
monkeypatch.setattr(cli, "_main", _raise_kbd)
|
|
715
|
+
|
|
716
|
+
with pytest.raises(SystemExit) as exc_info:
|
|
717
|
+
cli.main([])
|
|
718
|
+
|
|
719
|
+
assert exc_info.value.code == 130
|
|
720
|
+
captured = capsys.readouterr()
|
|
721
|
+
assert captured.err == "Stopped.\n"
|
|
722
|
+
assert "Traceback" not in captured.err
|
|
723
|
+
assert "Traceback" not in captured.out
|
|
724
|
+
# Sanity: this is the new path, not the prompt-cancel path.
|
|
725
|
+
assert "aborted by user" not in captured.err
|
|
726
|
+
assert "aborted by user" not in captured.out
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
def test_cli_top_level_keyboard_interrupt_prepends_blank_line_on_tty(
|
|
730
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
731
|
+
capsys: pytest.CaptureFixture[str],
|
|
732
|
+
) -> None:
|
|
733
|
+
"""Ctrl-C on a TTY → leading blank line so terminal '^C' echo does not
|
|
734
|
+
glue to 'Stopped.' on one row. The cli is the only place that sees this
|
|
735
|
+
discipline; runner liveness narration handles its own clears."""
|
|
736
|
+
def _raise_kbd(_argv=None):
|
|
737
|
+
raise KeyboardInterrupt
|
|
738
|
+
|
|
739
|
+
monkeypatch.setattr(cli, "_main", _raise_kbd)
|
|
740
|
+
# capsys swaps sys.stderr; force its isatty() to True for this run.
|
|
741
|
+
monkeypatch.setattr(sys.stderr, "isatty", lambda: True)
|
|
742
|
+
|
|
743
|
+
with pytest.raises(SystemExit) as exc_info:
|
|
744
|
+
cli.main([])
|
|
745
|
+
|
|
746
|
+
assert exc_info.value.code == 130
|
|
747
|
+
captured = capsys.readouterr()
|
|
748
|
+
assert captured.err == "\nStopped.\n"
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
def test_cli_keyboard_interrupt_at_confirm_prompt_still_exit_zero(
|
|
752
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
753
|
+
capsys: pytest.CaptureFixture[str],
|
|
754
|
+
tmp_path: Path,
|
|
755
|
+
) -> None:
|
|
756
|
+
"""Ctrl-C AT the records-found prompt → ExportAborted → exit 0, NOT Stopped./130.
|
|
757
|
+
|
|
758
|
+
Drives cli.main() end-to-end to lock the user-visible CLI behavior:
|
|
759
|
+
--warn-above is not threaded by _runner_kwargs, so we inject it via
|
|
760
|
+
cfg.load() the same way test_cloudtrail_exporter.py:794 does.
|
|
761
|
+
"""
|
|
762
|
+
zeek_dir = _write_tiny_zeek_dir(tmp_path)
|
|
763
|
+
|
|
764
|
+
def _fake_load(_path=None):
|
|
765
|
+
return {
|
|
766
|
+
"loghunter": {
|
|
767
|
+
"detect": "beacon",
|
|
768
|
+
"warn_above": 1,
|
|
769
|
+
"default_window": "all",
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
monkeypatch.setattr(cfg, "load", _fake_load)
|
|
774
|
+
|
|
775
|
+
def _kbd(*_a, **_kw):
|
|
776
|
+
raise KeyboardInterrupt
|
|
777
|
+
|
|
778
|
+
monkeypatch.setattr("builtins.input", _kbd)
|
|
779
|
+
|
|
780
|
+
with pytest.raises(SystemExit) as exc_info:
|
|
781
|
+
cli.main(["beacon", f"--zeek-dir={zeek_dir}"])
|
|
782
|
+
|
|
783
|
+
assert exc_info.value.code == 0
|
|
784
|
+
captured = capsys.readouterr()
|
|
785
|
+
# ExportAborted prints to stdout via cli.main()'s existing arm.
|
|
786
|
+
assert "aborted by user" in captured.out
|
|
787
|
+
# The new top-level path must NOT have fired here.
|
|
788
|
+
assert "Stopped." not in captured.err
|
|
789
|
+
assert "Stopped." not in captured.out
|
|
790
|
+
|
|
791
|
+
|
|
792
|
+
# ── Single-detector positional routing: syslog (v1 promotion) ────────────────
|
|
793
|
+
#
|
|
794
|
+
# Glenn rev-1 required tests. The syslog detector's REQUIRED_LOGS is now
|
|
795
|
+
# empty (dns shape), so _source_for_single_detector_path falls through to
|
|
796
|
+
# OPTIONAL_LOGS pattern matching — and `syslog.log` matches BOTH `*.log*`
|
|
797
|
+
# AND `syslog*.log*`, which previously routed to the zeek_dir default.
|
|
798
|
+
# That regresses flat-syslog. The fix special-cases syslog: directories
|
|
799
|
+
# default to syslog_dir (preserves /var/log convention), files content-sniff
|
|
800
|
+
# (Zeek-origin → zeek_dir, anything else → syslog_dir).
|
|
801
|
+
|
|
802
|
+
def test_syslog_positional_flat_file_routes_to_syslog_dir(
|
|
803
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
804
|
+
tmp_path: Path,
|
|
805
|
+
) -> None:
|
|
806
|
+
"""A flat RFC 3164 syslog file routes to syslog_dir via content-sniff."""
|
|
807
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
808
|
+
captured: dict[str, object] = {}
|
|
809
|
+
|
|
810
|
+
def fake_run(**kwargs: object) -> None:
|
|
811
|
+
captured.update(kwargs)
|
|
812
|
+
|
|
813
|
+
monkeypatch.setattr("loghunter.runner.run", fake_run)
|
|
814
|
+
flat_file = tmp_path / "auth.log"
|
|
815
|
+
flat_file.write_text(
|
|
816
|
+
"<134>Jun 11 12:00:00 host1 sshd[1234]: Accepted publickey for user\n",
|
|
817
|
+
encoding="utf-8",
|
|
818
|
+
)
|
|
819
|
+
|
|
820
|
+
cli._run_single_detector("syslog", [str(flat_file)])
|
|
821
|
+
|
|
822
|
+
assert captured["detect"] == "syslog"
|
|
823
|
+
assert captured["syslog_dir"] == str(flat_file)
|
|
824
|
+
# Scope rail: positional routes ONE source; siblings stay None.
|
|
825
|
+
assert captured["zeek_dir"] is None
|
|
826
|
+
assert captured["scope"] == frozenset({"syslog_dir"})
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
def test_syslog_positional_directory_routes_to_syslog_dir(
|
|
830
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
831
|
+
tmp_path: Path,
|
|
832
|
+
) -> None:
|
|
833
|
+
"""A directory positional preserves the /var/log flat-syslog convention.
|
|
834
|
+
|
|
835
|
+
Without the special-case, _source_for_single_detector_path's directory
|
|
836
|
+
branch would default to zeek_dir — wrong for the historical syslog flow.
|
|
837
|
+
"""
|
|
838
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
839
|
+
captured: dict[str, object] = {}
|
|
840
|
+
|
|
841
|
+
def fake_run(**kwargs: object) -> None:
|
|
842
|
+
captured.update(kwargs)
|
|
843
|
+
|
|
844
|
+
monkeypatch.setattr("loghunter.runner.run", fake_run)
|
|
845
|
+
log_dir = tmp_path / "log"
|
|
846
|
+
log_dir.mkdir()
|
|
847
|
+
(log_dir / "auth.log").write_text(
|
|
848
|
+
"<134>Jun 11 12:00:00 host1 sshd[1234]: ok\n",
|
|
849
|
+
encoding="utf-8",
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
cli._run_single_detector("syslog", [str(log_dir)])
|
|
853
|
+
|
|
854
|
+
assert captured["syslog_dir"] == str(log_dir)
|
|
855
|
+
assert captured["zeek_dir"] is None
|
|
856
|
+
assert captured["scope"] == frozenset({"syslog_dir"})
|
|
857
|
+
|
|
858
|
+
|
|
859
|
+
def test_syslog_positional_zeek_tsv_file_routes_to_zeek_dir(
|
|
860
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
861
|
+
tmp_path: Path,
|
|
862
|
+
) -> None:
|
|
863
|
+
"""A Zeek-TSV syslog.log positional content-sniffs to zeek_dir.
|
|
864
|
+
|
|
865
|
+
Filename `syslog.log` matches BOTH OPTIONAL_LOGS patterns — disambiguation
|
|
866
|
+
happens via content sniff (sniff_format_detailed), the same machinery the
|
|
867
|
+
digest verb uses. Zeek-origin → zeek_dir.
|
|
868
|
+
"""
|
|
869
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
870
|
+
captured: dict[str, object] = {}
|
|
871
|
+
|
|
872
|
+
def fake_run(**kwargs: object) -> None:
|
|
873
|
+
captured.update(kwargs)
|
|
874
|
+
|
|
875
|
+
monkeypatch.setattr("loghunter.runner.run", fake_run)
|
|
876
|
+
zeek_file = tmp_path / "syslog.log"
|
|
877
|
+
zeek_file.write_text(
|
|
878
|
+
"#separator \\x09\n"
|
|
879
|
+
"#set_separator\t,\n"
|
|
880
|
+
"#empty_field\t(empty)\n"
|
|
881
|
+
"#unset_field\t-\n"
|
|
882
|
+
"#path\tsyslog\n"
|
|
883
|
+
"#fields\tts\tuid\tid.orig_h\tmessage\n"
|
|
884
|
+
"#types\ttime\tstring\taddr\tstring\n"
|
|
885
|
+
"1779750000.000000\tCSL01\t192.0.2.10\thello\n",
|
|
886
|
+
encoding="utf-8",
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
cli._run_single_detector("syslog", [str(zeek_file)])
|
|
890
|
+
|
|
891
|
+
assert captured["zeek_dir"] == str(zeek_file)
|
|
892
|
+
assert captured["syslog_dir"] is None
|
|
893
|
+
assert captured["scope"] == frozenset({"zeek_dir"})
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
def test_syslog_positional_zeek_ndjson_file_routes_to_zeek_dir(
|
|
897
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
898
|
+
tmp_path: Path,
|
|
899
|
+
) -> None:
|
|
900
|
+
"""The NDJSON Zeek front-end also content-sniffs to zeek_dir."""
|
|
901
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
902
|
+
captured: dict[str, object] = {}
|
|
903
|
+
|
|
904
|
+
def fake_run(**kwargs: object) -> None:
|
|
905
|
+
captured.update(kwargs)
|
|
906
|
+
|
|
907
|
+
monkeypatch.setattr("loghunter.runner.run", fake_run)
|
|
908
|
+
zeek_file = tmp_path / "syslog.log"
|
|
909
|
+
zeek_file.write_text(
|
|
910
|
+
'{"_path":"syslog","ts":1779750000.0,"uid":"CSL01",'
|
|
911
|
+
'"id.orig_h":"192.0.2.10","id.resp_h":"198.51.100.20",'
|
|
912
|
+
'"id.resp_p":514,"proto":"udp","facility":"DAEMON","severity":"INFO",'
|
|
913
|
+
'"message":"Jun 11 12:00:00 host1 sshd[1234]: ok"}\n',
|
|
914
|
+
encoding="utf-8",
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
cli._run_single_detector("syslog", [str(zeek_file)])
|
|
918
|
+
|
|
919
|
+
assert captured["zeek_dir"] == str(zeek_file)
|
|
920
|
+
assert captured["syslog_dir"] is None
|
|
921
|
+
assert captured["scope"] == frozenset({"zeek_dir"})
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
def test_syslog_positional_unrecognized_file_routes_to_syslog_dir(
|
|
925
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
926
|
+
tmp_path: Path,
|
|
927
|
+
) -> None:
|
|
928
|
+
"""A file the sniffer cannot identify as a Zeek syslog.log falls to the
|
|
929
|
+
flat-syslog default (syslog_dir). Mirrors the "directory defaults to
|
|
930
|
+
flat" convention."""
|
|
931
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
932
|
+
captured: dict[str, object] = {}
|
|
933
|
+
|
|
934
|
+
def fake_run(**kwargs: object) -> None:
|
|
935
|
+
captured.update(kwargs)
|
|
936
|
+
|
|
937
|
+
monkeypatch.setattr("loghunter.runner.run", fake_run)
|
|
938
|
+
mystery = tmp_path / "mystery.log"
|
|
939
|
+
mystery.write_text("lorem ipsum dolor\nsit amet\n", encoding="utf-8")
|
|
940
|
+
|
|
941
|
+
cli._run_single_detector("syslog", [str(mystery)])
|
|
942
|
+
|
|
943
|
+
assert captured["syslog_dir"] == str(mystery)
|
|
944
|
+
assert captured["zeek_dir"] is None
|
|
945
|
+
assert captured["scope"] == frozenset({"syslog_dir"})
|
|
946
|
+
|
|
947
|
+
|
|
948
|
+
def test_syslog_positional_missing_file_does_not_leak_traceback(
|
|
949
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
950
|
+
tmp_path: Path,
|
|
951
|
+
) -> None:
|
|
952
|
+
"""Glenn caution #1: a missing/unreadable positional must not leak a raw
|
|
953
|
+
traceback through _source_for_single_detector_path's sniff call. The
|
|
954
|
+
routing degrades to syslog_dir; the runner's actual file-discovery
|
|
955
|
+
produces the canonical "not found" error downstream."""
|
|
956
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
957
|
+
|
|
958
|
+
# Patch runner.run so we exit cleanly before the runner tries to read.
|
|
959
|
+
captured: dict[str, object] = {}
|
|
960
|
+
|
|
961
|
+
def fake_run(**kwargs: object) -> None:
|
|
962
|
+
captured.update(kwargs)
|
|
963
|
+
|
|
964
|
+
monkeypatch.setattr("loghunter.runner.run", fake_run)
|
|
965
|
+
ghost = tmp_path / "does-not-exist.log"
|
|
966
|
+
|
|
967
|
+
# No OSError must propagate from the routing layer itself.
|
|
968
|
+
cli._run_single_detector("syslog", [str(ghost)])
|
|
969
|
+
|
|
970
|
+
# Degrades to syslog_dir per the convention.
|
|
971
|
+
assert captured["syslog_dir"] == str(ghost)
|
|
972
|
+
assert captured["scope"] == frozenset({"syslog_dir"})
|
|
973
|
+
|
|
974
|
+
|
|
975
|
+
def test_syslog_positional_zeek_ndjson_without_path_routes_to_zeek_dir(
|
|
976
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
977
|
+
tmp_path: Path,
|
|
978
|
+
) -> None:
|
|
979
|
+
"""P1 regression (Glenn bug handoff): a Zeek-NDJSON syslog.log emitted
|
|
980
|
+
without the `_path` directive must still content-sniff to Zeek and route
|
|
981
|
+
to zeek_dir. Pre-fix, the conn field-set fallback grabbed it and the
|
|
982
|
+
positional landed at syslog_dir, leaving load_syslog with an empty frame."""
|
|
983
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
984
|
+
captured: dict[str, object] = {}
|
|
985
|
+
|
|
986
|
+
def fake_run(**kwargs: object) -> None:
|
|
987
|
+
captured.update(kwargs)
|
|
988
|
+
|
|
989
|
+
monkeypatch.setattr("loghunter.runner.run", fake_run)
|
|
990
|
+
zeek_file = tmp_path / "syslog.log"
|
|
991
|
+
zeek_file.write_text(
|
|
992
|
+
# Note: NO _path directive — exactly the upstream-agent shape that
|
|
993
|
+
# triggered the original misroute.
|
|
994
|
+
'{"ts":1779750000.0,"uid":"CSL01",'
|
|
995
|
+
'"id.orig_h":"192.0.2.10","id.orig_p":41514,'
|
|
996
|
+
'"id.resp_h":"198.51.100.20","id.resp_p":514,'
|
|
997
|
+
'"proto":"udp","facility":"DAEMON","severity":"INFO",'
|
|
998
|
+
'"message":"Jun 11 12:00:00 host1 sshd[1234]: placeholder"}\n',
|
|
999
|
+
encoding="utf-8",
|
|
1000
|
+
)
|
|
1001
|
+
|
|
1002
|
+
cli._run_single_detector("syslog", [str(zeek_file)])
|
|
1003
|
+
|
|
1004
|
+
assert captured["zeek_dir"] == str(zeek_file)
|
|
1005
|
+
assert captured["syslog_dir"] is None
|
|
1006
|
+
assert captured["scope"] == frozenset({"zeek_dir"})
|