loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,1006 @@
1
+ """Tests for config defaults and CLI user-facing errors."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+ import pytest
8
+ import tomllib
9
+ from pathlib import Path
10
+
11
+ from loghunter import cli
12
+ from loghunter.cli import _runner_kwargs
13
+ from loghunter.common import config as cfg
14
+ from loghunter.detectors import dns
15
+
16
+
17
+ def test_detector_defaults_are_owned_by_detector_modules(
18
+ monkeypatch: pytest.MonkeyPatch,
19
+ ) -> None:
20
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
21
+ config = cfg.load(config_file=None)
22
+
23
+ assert config["detectors"] == {}
24
+ assert cfg.get_detector_config(config, "dns", dns.DEFAULT_CONFIG) == dns.DEFAULT_CONFIG
25
+
26
+
27
+ def test_detector_config_overrides_detector_defaults() -> None:
28
+ config = {"detectors": {"dns": {"min_cluster_size": 42}}}
29
+
30
+ merged = cfg.get_detector_config(config, "dns", dns.DEFAULT_CONFIG)
31
+
32
+ assert merged["min_cluster_size"] == 42
33
+ assert merged["min_samples"] == dns.DEFAULT_CONFIG["min_samples"]
34
+
35
+
36
+ def test_cli_formats_missing_config_file_as_actionable_error(
37
+ capsys: pytest.CaptureFixture[str],
38
+ tmp_path,
39
+ ) -> None:
40
+ missing = tmp_path / "missing.toml"
41
+ with pytest.raises(SystemExit) as exc:
42
+ cli.main([f"--config={missing}", "--dry-run"])
43
+
44
+ assert exc.value.code == 1
45
+ captured = capsys.readouterr()
46
+ assert "loghunter: Config file not found" in captured.err
47
+ assert "run 'loghunter init' to create a config" in captured.err
48
+
49
+
50
+ def test_cli_formats_bad_since_as_usage_error(
51
+ capsys: pytest.CaptureFixture[str],
52
+ monkeypatch: pytest.MonkeyPatch,
53
+ ) -> None:
54
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
55
+ with pytest.raises(SystemExit) as exc:
56
+ cli.main(["--since=tomorrow", "--dry-run"])
57
+
58
+ assert exc.value.code == 1
59
+ captured = capsys.readouterr()
60
+ assert "loghunter: --since expects a date like 2026-05-01" in captured.err
61
+ assert "Run 'loghunter --help' for usage." in captured.err
62
+
63
+
64
+ def test_cli_formats_bad_days_as_usage_error(
65
+ capsys: pytest.CaptureFixture[str],
66
+ monkeypatch: pytest.MonkeyPatch,
67
+ ) -> None:
68
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
69
+ with pytest.raises(SystemExit) as exc:
70
+ cli.main(["--days=soon", "--dry-run"])
71
+
72
+ assert exc.value.code == 1
73
+ captured = capsys.readouterr()
74
+ assert "loghunter: --days expects a range like 3-5" in captured.err
75
+
76
+
77
+ def test_cli_formats_unknown_output_as_usage_error(
78
+ capsys: pytest.CaptureFixture[str],
79
+ monkeypatch: pytest.MonkeyPatch,
80
+ tmp_path,
81
+ ) -> None:
82
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
83
+ zeek_dir = tmp_path / "zeek"
84
+ zeek_dir.mkdir()
85
+ (zeek_dir / "conn.log").write_text("", encoding="utf-8")
86
+
87
+ with pytest.raises(SystemExit) as exc:
88
+ cli.main([f"--zeek-dir={zeek_dir}", "--output=bogus"])
89
+
90
+ assert exc.value.code == 1
91
+ captured = capsys.readouterr()
92
+ assert "loghunter: Unknown output format 'bogus'." in captured.err
93
+ assert "Available formats:" in captured.err
94
+
95
+
96
+ def test_runner_kwargs_pihole_dir_arg(tmp_path: Path) -> None:
97
+ """--pihole-dir=PATH (parsed as pihole_dir key) flows through as the raw
98
+ string. The CLI does NOT resolve source-dir strings — that's the
99
+ resolver's job (covered by tests/test_sources.py). _runner_kwargs is
100
+ pure pass-through here."""
101
+ pihole = tmp_path / "pihole"
102
+ pihole.mkdir()
103
+ parsed = {"pihole_dir": str(pihole)}
104
+ kwargs = _runner_kwargs(parsed, config={})
105
+ assert kwargs["pihole_dir"] == str(pihole)
106
+
107
+
108
+ def test_runner_kwargs_cloudtrail_dir_arg(tmp_path: Path) -> None:
109
+ """--cloudtrail-dir=PATH flows through as the raw string."""
110
+ cloudtrail = tmp_path / "ct"
111
+ cloudtrail.mkdir()
112
+ parsed = {"cloudtrail_dir": str(cloudtrail)}
113
+ kwargs = _runner_kwargs(parsed, config={})
114
+ assert kwargs["cloudtrail_dir"] == str(cloudtrail)
115
+
116
+
117
+ def test_runner_kwargs_none_when_flag_absent() -> None:
118
+ """No flag → None override; the resolver decides whether to config-fill.
119
+ Replaces the old _from_config / unconfigured tests — that logic now lives
120
+ in resolve_sources (covered by tests/test_sources.py)."""
121
+ kwargs = _runner_kwargs({}, config={"loghunter": {"cloudtrail_dir": "/cfg/ct"}})
122
+ # CLI seam passes None for absent flags regardless of config — the runner
123
+ # routes the override+config into resolve_sources.
124
+ assert kwargs["cloudtrail_dir"] is None
125
+ assert kwargs["zeek_dir"] is None
126
+ assert kwargs["syslog_dir"] is None
127
+ assert kwargs["pihole_dir"] is None
128
+
129
+
130
+ def test_usage_advertises_cloudtrail_dir(capsys) -> None:
131
+ """First-run / --help usage must mention --cloudtrail-dir alongside the other
132
+ source-dir flags."""
133
+ cli._print_usage()
134
+ out = capsys.readouterr().out
135
+ assert "--cloudtrail-dir" in out
136
+
137
+
138
+ # ── Bare source-dir flag (no =value) → actionable CLI error ──────────────────
139
+ #
140
+ # _parse_args records a bare ``--zeek-dir`` (no =value) as
141
+ # parsed["zeek_dir"] = True. Pre-fix, ``Path(True)`` downstream raised a raw
142
+ # TypeError that escaped the CLI error boundary as a traceback. Post-fix,
143
+ # _coerce_source_dir catches the boolean at the seam and raises an actionable
144
+ # ``loghunter:`` ValueError with exit 1.
145
+
146
+ @pytest.mark.parametrize(
147
+ "flag", ["--zeek-dir", "--syslog-dir", "--pihole-dir", "--cloudtrail-dir"],
148
+ )
149
+ def test_bare_source_dir_flag_in_detect_raises_actionable_error(
150
+ capsys: pytest.CaptureFixture[str],
151
+ monkeypatch: pytest.MonkeyPatch,
152
+ flag: str,
153
+ ) -> None:
154
+ """Bare ``--<source>-dir`` (no =value) on the detect route produces an
155
+ actionable ``loghunter:`` message and exit 1 — no raw TypeError."""
156
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
157
+ with pytest.raises(SystemExit) as exc:
158
+ cli.main([flag, "--dry-run"])
159
+
160
+ assert exc.value.code == 1
161
+ captured = capsys.readouterr()
162
+ assert f"loghunter: {flag} needs a value: {flag}=…" in captured.err
163
+ assert "Run 'loghunter --help' for usage." in captured.err
164
+
165
+
166
+ def test_bare_zeek_dir_flag_in_bare_digest_raises_actionable_error(
167
+ capsys: pytest.CaptureFixture[str],
168
+ monkeypatch: pytest.MonkeyPatch,
169
+ ) -> None:
170
+ """Bare ``digest --zeek-dir`` (no positional) takes the bare-digest path
171
+ where ``--zeek-dir`` is the only source-dir flag in
172
+ _DIGEST_ALLOWED_LONG_FLAGS. Same actionable shape as the detect route.
173
+
174
+ The other three source-dir flags (--pihole-dir, --syslog-dir,
175
+ --cloudtrail-dir) are intentionally NOT in the digest allow-list — they
176
+ raise "unknown digest flag --…" via the existing _validate_digest_flags
177
+ rail and that behaviour is preserved (the digest CLI surface stays
178
+ narrow per CODE.md).
179
+ """
180
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
181
+ with pytest.raises(SystemExit) as exc:
182
+ cli.main(["digest", "--zeek-dir"])
183
+
184
+ assert exc.value.code == 1
185
+ captured = capsys.readouterr()
186
+ assert "loghunter: --zeek-dir needs a value: --zeek-dir=…" in captured.err
187
+
188
+
189
+ def test_bare_value_flag_with_short_form_mentions_short(
190
+ capsys: pytest.CaptureFixture[str],
191
+ monkeypatch: pytest.MonkeyPatch,
192
+ ) -> None:
193
+ """A value-taking flag that has a short form mentions both spellings."""
194
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
195
+ with pytest.raises(SystemExit) as exc:
196
+ cli.main(["--out"])
197
+
198
+ assert exc.value.code == 1
199
+ captured = capsys.readouterr()
200
+ assert "loghunter: --out (-o) needs a value: -o=… or --out=…" in captured.err
201
+
202
+
203
+ # The previous two mock-the-seam dns routing tests were DELETED. Their
204
+ # intent (content-sniff routes Zeek dns → zeek_dir, Pi-hole → pihole_dir)
205
+ # lives in tests/test_sources.py:
206
+ # - test_router_dns_pihole_content_under_neutral_name (the key locking
207
+ # content-not-name routing — fixture name does NOT match pihole*.log*)
208
+ # - test_router_dns_zeek_content_routes_to_zeek_dir
209
+ # The end-to-end scope rail (sibling source-dirs stay unloaded across the
210
+ # CLI ↔ runner seam) is locked by tests/test_source_resolution_seam.py.
211
+
212
+
213
+ def test_config_example_is_valid_toml() -> None:
214
+ path = Path("loghunter/data/config_example.toml")
215
+
216
+ with path.open("rb") as fh:
217
+ parsed = tomllib.load(fh)
218
+
219
+ assert parsed["allowlist"]["domain_patterns"] == ["~/.loghunter/allowlist.d/domains_user.txt"]
220
+ assert parsed["allowlist"]["connection_rules"] == ["~/.loghunter/allowlist.d/connections.txt"]
221
+
222
+
223
+ # ── Stage 4: default_window + --all ───────────────────────────────────────────
224
+
225
+ from datetime import timedelta
226
+
227
+ from loghunter.common.config import parse_window_span
228
+
229
+
230
+ def test_default_window_in_config_defaults() -> None:
231
+ config = cfg.load(None)
232
+ assert config["loghunter"]["default_window"] == "1d"
233
+
234
+
235
+ def test_invalid_default_window_raises_at_load(tmp_path: Path) -> None:
236
+ cfg_file = tmp_path / "lh.toml"
237
+ cfg_file.write_text('[loghunter]\ndefault_window = "1week"\n', encoding="utf-8")
238
+ with pytest.raises(cfg.ConfigError, match="not a valid duration"):
239
+ cfg.load(cfg_file)
240
+
241
+
242
+ def test_zero_default_window_raises_at_load(tmp_path: Path) -> None:
243
+ cfg_file = tmp_path / "lh.toml"
244
+ cfg_file.write_text('[loghunter]\ndefault_window = "0d"\n', encoding="utf-8")
245
+ with pytest.raises(cfg.ConfigError):
246
+ cfg.load(cfg_file)
247
+
248
+
249
+ def test_empty_string_default_window_loads_cleanly(tmp_path: Path) -> None:
250
+ cfg_file = tmp_path / "lh.toml"
251
+ cfg_file.write_text('[loghunter]\ndefault_window = ""\n', encoding="utf-8")
252
+ config = cfg.load(cfg_file)
253
+ assert config["loghunter"]["default_window"] == ""
254
+
255
+
256
+ def test_all_keyword_default_window_loads_cleanly(tmp_path: Path) -> None:
257
+ cfg_file = tmp_path / "lh.toml"
258
+ cfg_file.write_text('[loghunter]\ndefault_window = "all"\n', encoding="utf-8")
259
+ config = cfg.load(cfg_file)
260
+ assert config["loghunter"]["default_window"] == "all"
261
+
262
+
263
+ def test_parse_window_span_days() -> None:
264
+ assert parse_window_span("1d") == timedelta(days=1)
265
+ assert parse_window_span("7d") == timedelta(days=7)
266
+
267
+
268
+ def test_parse_window_span_hours() -> None:
269
+ assert parse_window_span("24h") == timedelta(hours=24)
270
+ assert parse_window_span("12h") == timedelta(hours=12)
271
+
272
+
273
+ def test_parse_window_span_empty_and_all_disable() -> None:
274
+ assert parse_window_span(None) is None
275
+ assert parse_window_span("") is None
276
+ assert parse_window_span("all") is None
277
+ assert parse_window_span("ALL") is None
278
+
279
+
280
+ def test_parse_window_span_invalid_raises() -> None:
281
+ with pytest.raises(cfg.ConfigError):
282
+ parse_window_span("nonsense")
283
+ with pytest.raises(cfg.ConfigError):
284
+ parse_window_span("7days")
285
+ with pytest.raises(cfg.ConfigError):
286
+ parse_window_span("-1d")
287
+
288
+
289
+ def test_runner_kwargs_all_with_since_raises() -> None:
290
+ with pytest.raises(ValueError, match="--all cannot be combined"):
291
+ _runner_kwargs({"all": True, "since": "7d"}, config={})
292
+
293
+
294
+ def test_runner_kwargs_all_with_until_raises() -> None:
295
+ with pytest.raises(ValueError, match="--all cannot be combined"):
296
+ _runner_kwargs({"all": True, "until": "2026-01-01"}, config={})
297
+
298
+
299
+ def test_runner_kwargs_all_with_days_raises() -> None:
300
+ with pytest.raises(ValueError, match="--all cannot be combined"):
301
+ _runner_kwargs({"all": True, "days": "3-5"}, config={})
302
+
303
+
304
+ def test_runner_kwargs_all_with_hours_raises() -> None:
305
+ with pytest.raises(ValueError, match="--all cannot be combined"):
306
+ _runner_kwargs({"all": True, "hours": "2-6"}, config={})
307
+
308
+
309
+ def test_runner_kwargs_all_flag_sets_load_all() -> None:
310
+ kwargs = _runner_kwargs({"all": True}, config={})
311
+ assert kwargs["load_all"] is True
312
+
313
+
314
+ def test_runner_kwargs_no_all_flag_sets_load_all_false() -> None:
315
+ kwargs = _runner_kwargs({}, config={})
316
+ assert kwargs["load_all"] is False
317
+
318
+
319
+ # ── --yes / -y wiring ─────────────────────────────────────────────────────────
320
+
321
+
322
+ def test_parse_args_recognizes_long_yes() -> None:
323
+ """--yes is a bool flag on every verb that allows it (analyze allows it)."""
324
+ result = cli._parse_args(["--yes"], "")
325
+ assert result.get("yes") is True
326
+
327
+
328
+ def test_parse_args_recognizes_short_y() -> None:
329
+ """-y is the canonical short for --yes (allowed on analyze)."""
330
+ result = cli._parse_args(["-y"], "")
331
+ assert result.get("yes") is True
332
+
333
+
334
+ def test_parse_args_rejects_unknown_short_flag() -> None:
335
+ """Unknown short flags now RAISE — the old silent-ignore behavior is gone."""
336
+ with pytest.raises(ValueError, match="unknown flag -x"):
337
+ cli._parse_args(["-x", "PATH"], "")
338
+
339
+
340
+ def test_parse_args_rejects_unknown_long_flag() -> None:
341
+ with pytest.raises(ValueError, match="unknown flag --foo"):
342
+ cli._parse_args(["--foo", "PATH"], "")
343
+
344
+
345
+ def test_parse_args_captures_path_and_paths() -> None:
346
+ """Both ``path`` (first positional) and ``paths`` (full list) populate."""
347
+ result = cli._parse_args(["a.log", "b.log"], "digest")
348
+ assert result["path"] == "a.log"
349
+ assert result["paths"] == ["a.log", "b.log"]
350
+
351
+
352
+ def test_parse_args_wrong_verb_long_form_lead_spelling() -> None:
353
+ """``digest --detect`` reports wrong-verb with the long-form lead."""
354
+ with pytest.raises(ValueError, match=r"--detect \(-d\) is not valid for digest"):
355
+ cli._parse_args(["--detect=all"], "digest")
356
+
357
+
358
+ def test_parse_args_wrong_verb_short_form_lead_spelling() -> None:
359
+ """``digest -d`` reports wrong-verb with the short-form lead."""
360
+ with pytest.raises(ValueError, match=r"-d \(--detect\) is not valid for digest"):
361
+ cli._parse_args(["-d=all"], "digest")
362
+
363
+
364
+ def test_parse_args_wrong_verb_beats_value_shape_for_bare_short() -> None:
365
+ """Validation order: wrong-verb wins over needs-a-value for ``digest -d``."""
366
+ with pytest.raises(ValueError, match=r"-d \(--detect\) is not valid for digest"):
367
+ cli._parse_args(["-d"], "digest")
368
+
369
+
370
+ def test_parse_args_wrong_verb_beats_value_shape_for_bare_long() -> None:
371
+ """Same as above for ``digest --detect``."""
372
+ with pytest.raises(ValueError, match=r"--detect \(-d\) is not valid for digest"):
373
+ cli._parse_args(["--detect"], "digest")
374
+
375
+
376
+ def test_parse_args_value_on_bool_raises_long() -> None:
377
+ with pytest.raises(ValueError, match=r"--verbose \(-v\) takes no value"):
378
+ cli._parse_args(["--verbose=1"], "")
379
+
380
+
381
+ def test_parse_args_value_on_bool_raises_short() -> None:
382
+ with pytest.raises(ValueError, match=r"--verbose \(-v\) takes no value"):
383
+ cli._parse_args(["-v=1"], "")
384
+
385
+
386
+ def test_parse_args_bundling_known_shorts_suggests_separation() -> None:
387
+ with pytest.raises(ValueError, match="short flags can't be combined"):
388
+ cli._parse_args(["-vy"], "")
389
+
390
+
391
+ def test_parse_args_bundling_unknown_short_is_plain_unknown() -> None:
392
+ with pytest.raises(ValueError, match="unknown flag -vq"):
393
+ cli._parse_args(["-vq"], "")
394
+
395
+
396
+ def test_parse_args_help_eq_value_is_takes_no_value() -> None:
397
+ """``--help=foo`` is NOT a help short-circuit — strict parser rejects it."""
398
+ with pytest.raises(ValueError, match=r"--help \(-h\) takes no value"):
399
+ cli._parse_args(["--help=foo"], "")
400
+
401
+
402
+ def test_parse_args_duplicate_flag_last_wins() -> None:
403
+ """A repeated value flag stays single-valued; last write wins."""
404
+ result = cli._parse_args(["--out=a", "--out=b"], "")
405
+ assert result["out"] == "b"
406
+
407
+
408
+ # ── W3: -vv literal token + verbose-level resolution ─────────────────────────
409
+
410
+
411
+ def test_parse_args_short_v_sets_verbose_true() -> None:
412
+ result = cli._parse_args(["-v"], "")
413
+ assert result.get("verbose") is True
414
+ assert "verbose_level" not in result
415
+
416
+
417
+ def test_parse_args_long_verbose_sets_verbose_true() -> None:
418
+ result = cli._parse_args(["--verbose"], "")
419
+ assert result.get("verbose") is True
420
+
421
+
422
+ def test_parse_args_literal_vv_sets_verbose_level_two() -> None:
423
+ """`-vv` is recognized as an explicit literal token BEFORE the bundling
424
+ refusal fires (regression against the old `pass separately` error)."""
425
+ result = cli._parse_args(["-vv"], "")
426
+ assert result.get("verbose_level") == 2
427
+
428
+
429
+ def test_parse_args_combined_v_and_vv_resolves_to_level_two() -> None:
430
+ """Last-wins duplication: `-v -vv` resolves to 2 via _resolve_verbose_level."""
431
+ parsed = cli._parse_args(["-v", "-vv"], "")
432
+ assert cli._resolve_verbose_level(parsed) == 2
433
+
434
+
435
+ def test_parse_args_vvv_still_rejected_as_bundling() -> None:
436
+ """`-vvv` is not a registered literal — falls through to the bundling
437
+ refusal lattice with the existing pass-separately message."""
438
+ with pytest.raises(ValueError, match="short flags can't be combined"):
439
+ cli._parse_args(["-vvv"], "")
440
+
441
+
442
+ def test_parse_args_vy_still_rejected_as_bundling() -> None:
443
+ """`-vy` is not the literal `-vv` and still hits bundling refusal."""
444
+ with pytest.raises(ValueError, match="short flags can't be combined"):
445
+ cli._parse_args(["-vy"], "")
446
+
447
+
448
+ def test_parse_args_vv_wrong_verb_matches_v_error_shape() -> None:
449
+ """`init` disallows verbose; `-vv` raises the SAME wrong-verb error shape
450
+ as `-v` would. Validation order: identity → verb-membership → value-shape."""
451
+ with pytest.raises(ValueError, match=r"-v \(--verbose\) is not valid for init"):
452
+ cli._parse_args(["-vv"], "init")
453
+ # And the parity check on -v:
454
+ with pytest.raises(ValueError, match=r"-v \(--verbose\) is not valid for init"):
455
+ cli._parse_args(["-v"], "init")
456
+
457
+
458
+ def test_resolve_verbose_level_collapses_states() -> None:
459
+ """none → 0; -v → 1; -vv → 2; combined → 2."""
460
+ assert cli._resolve_verbose_level({}) == 0
461
+ assert cli._resolve_verbose_level({"verbose": True}) == 1
462
+ assert cli._resolve_verbose_level({"verbose_level": 2}) == 2
463
+ assert cli._resolve_verbose_level({"verbose": True, "verbose_level": 2}) == 2
464
+
465
+
466
+ def test_runner_kwargs_yes_flag_sets_skip_confirm() -> None:
467
+ kwargs = _runner_kwargs({"yes": True}, config={})
468
+ assert kwargs.get("skip_confirm") is True
469
+
470
+
471
+ def test_runner_kwargs_no_yes_flag_skip_confirm_false() -> None:
472
+ kwargs = _runner_kwargs({}, config={})
473
+ assert kwargs.get("skip_confirm") is False
474
+
475
+
476
+ def test_usage_includes_yes_flag(capsys) -> None:
477
+ """--help / first-run usage must advertise --yes and -y."""
478
+ cli._print_usage()
479
+ out = capsys.readouterr().out
480
+ assert "--yes" in out
481
+ assert "-y" in out
482
+
483
+
484
+ def test_yes_threads_to_run_export(monkeypatch, tmp_path: Path) -> None:
485
+ """`loghunter export <backend> --yes` must reach run_export with skip_confirm=True."""
486
+ captured: dict = {}
487
+
488
+ def _fake_run_export(*args, **kwargs):
489
+ captured.update(kwargs)
490
+
491
+ # _run_export does `from loghunter.exporters import run_export` inside the
492
+ # function — re-binding the attribute on the package is what it picks up.
493
+ monkeypatch.setattr("loghunter.exporters.run_export", _fake_run_export)
494
+ monkeypatch.setattr(
495
+ cfg, "load", lambda _=None: {
496
+ "export": {"splunk": {"host": "192.0.2.20", "port": 8089,
497
+ "query": {"default": {"spl": "x"}}}},
498
+ },
499
+ )
500
+ cli.main(["export", "splunk", "--yes"])
501
+ assert captured.get("skip_confirm") is True
502
+
503
+
504
+ def test_yes_threads_to_runner_run(monkeypatch, tmp_path: Path) -> None:
505
+ """A detector invocation with --yes must reach runner.run with skip_confirm=True."""
506
+ captured: dict = {}
507
+
508
+ def _fake_run(**kwargs):
509
+ captured.update(kwargs)
510
+
511
+ monkeypatch.setattr("loghunter.runner.run", _fake_run)
512
+ # Use a single-detector subcommand path that already takes a zeek_dir.
513
+ cli.main(["beacon", f"--zeek-dir={tmp_path}", "--yes"])
514
+ assert captured.get("skip_confirm") is True
515
+
516
+
517
+ # ── CLI flag rename: --output-dir → --out (user-facing contract) ─────────────
518
+
519
+
520
+ def test_usage_advertises_out_not_output_dir(capsys) -> None:
521
+ """Usage text mentions --out (the new flag) and not --output-dir (the dropped name).
522
+
523
+ We deliberately do NOT test runtime rejection of --output-dir — the generic
524
+ --flag=value parser would still produce an inert `output_dir` key, which is
525
+ harmless dead state. The user-facing contract is the help text.
526
+ """
527
+ cli._print_usage()
528
+ out = capsys.readouterr().out
529
+ assert "--out" in out
530
+ assert "--output-dir" not in out
531
+
532
+
533
+ # ── Thread C — CLI polish for the aws detector ────────────────────────────────
534
+
535
+ # Fix 1: aws subcommand + usage
536
+
537
+ def test_aws_is_a_single_detector_command() -> None:
538
+ """loghunter aws PATH must be recognized as a single-detector subcommand."""
539
+ assert "aws" in cli._SINGLE_DETECTOR_COMMANDS
540
+
541
+
542
+ def test_usage_lists_aws_subcommand(capsys) -> None:
543
+ cli._print_usage()
544
+ out = capsys.readouterr().out
545
+ assert "loghunter aws " in out
546
+
547
+
548
+ def test_usage_lists_duration_subcommand(capsys) -> None:
549
+ """Regression: catch any future stale-usage drift on duration."""
550
+ cli._print_usage()
551
+ out = capsys.readouterr().out
552
+ assert "loghunter duration " in out
553
+
554
+
555
+ # Fix 2: positional PATH → cloudtrail_dir in single-detector mode
556
+
557
+ def test_aws_positional_path_routes_to_cloudtrail_dir(
558
+ monkeypatch: pytest.MonkeyPatch,
559
+ tmp_path: Path,
560
+ ) -> None:
561
+ """loghunter aws PATH routes the positional to cloudtrail_dir, not zeek_dir,
562
+ and the positional scopes the run so siblings stay None."""
563
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
564
+ captured: dict[str, object] = {}
565
+
566
+ def fake_run(**kwargs: object) -> None:
567
+ captured.update(kwargs)
568
+
569
+ monkeypatch.setattr("loghunter.runner.run", fake_run)
570
+ ct_file = tmp_path / "cloudtrail_2026.json.log"
571
+ ct_file.write_text("", encoding="utf-8")
572
+
573
+ cli._run_single_detector("aws", [str(ct_file)])
574
+
575
+ assert captured["detect"] == "aws"
576
+ # CLI now passes raw strings; the resolver owns Path conversion.
577
+ assert captured["cloudtrail_dir"] == str(ct_file)
578
+ assert captured["zeek_dir"] is None
579
+ assert captured["syslog_dir"] is None
580
+ assert captured["pihole_dir"] is None
581
+ # scope rail: positional scopes the run to its routed source.
582
+ assert captured["scope"] == frozenset({"cloudtrail_dir"})
583
+
584
+
585
+ # The four source-dir `_tilde_expands` tests were DELETED. ~-expansion of
586
+ # explicit overrides now happens inside common.sources._resolve_one (the
587
+ # sole site for string→Path conversion), tested directly at
588
+ # tests/test_sources.py:test_resolve_sources_tilde_override_expands.
589
+ #
590
+ # The end-to-end `aws ~/path` CLI test is preserved as a seam-style
591
+ # dry-run test in tests/test_source_resolution_seam.py (stage 5).
592
+
593
+
594
+ def test_runner_kwargs_out_tilde_expands_and_preserves_trailing_slash(
595
+ monkeypatch: pytest.MonkeyPatch,
596
+ tmp_path: Path,
597
+ ) -> None:
598
+ """--out=~/reports/ must expand ~ AND preserve the trailing slash that
599
+ be_like_water needs to fire the directory-intent gate."""
600
+ monkeypatch.setenv("HOME", str(tmp_path))
601
+
602
+ captured: dict[str, str] = {}
603
+
604
+ def fake_be_like_water(target: str):
605
+ captured["target"] = target
606
+ from types import SimpleNamespace
607
+ return SimpleNamespace(is_file=False, path=Path(target))
608
+
609
+ monkeypatch.setattr("loghunter.cli.be_like_water", fake_be_like_water)
610
+
611
+ _runner_kwargs({"out": "~/reports/"}, config={})
612
+
613
+ assert captured["target"] == f"{tmp_path}/reports/"
614
+ assert captured["target"].endswith("/")
615
+ assert "~" not in captured["target"]
616
+
617
+
618
+ # The end-to-end ``aws ~/path`` CLI test (~-positional routes to
619
+ # cloudtrail_dir AND expands ~) moved to tests/test_source_resolution_seam.py
620
+ # as a real CLI dry-run seam test. ~-expansion now happens inside
621
+ # common.sources._resolve_one, not at the CLI seam.
622
+
623
+
624
+ # Analyze positional → named-detector-source routing.
625
+ #
626
+ # Replaces the deleted wrong-source "hint" scold and its five silence tests
627
+ # (the scold became meaningless once `route_positional_source` does the right
628
+ # thing on its own). The router itself is unit-tested in
629
+ # tests/test_sources.py — this test pins the CLI seam wiring: the positional
630
+ # lands on the detector's REQUIRED_LOGS source and the scope rail keeps
631
+ # siblings unloaded.
632
+
633
+ def test_analyze_positional_reroutes_to_named_detector_source(
634
+ monkeypatch: pytest.MonkeyPatch,
635
+ tmp_path: Path,
636
+ ) -> None:
637
+ """``loghunter --detect=aws PATH`` routes the positional to cloudtrail_dir
638
+ (the detector's REQUIRED_LOGS source) and the scope rail keeps siblings
639
+ None — even with a config that sets zeek_dir."""
640
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
641
+ captured: dict[str, object] = {}
642
+ monkeypatch.setattr("loghunter.runner.run", lambda **kw: captured.update(kw))
643
+ monkeypatch.setattr(cfg, "load", lambda _=None: {
644
+ "loghunter": {"zeek_dir": str(tmp_path / "should-not-be-loaded")},
645
+ })
646
+
647
+ fake_path = tmp_path / "events.json.log"
648
+ fake_path.write_text("", encoding="utf-8")
649
+ cli._run_all_detectors([f"--detect=aws", str(fake_path)])
650
+
651
+ assert captured["cloudtrail_dir"] == str(fake_path)
652
+ assert captured["zeek_dir"] is None
653
+ assert captured["scope"] == frozenset({"cloudtrail_dir"})
654
+
655
+
656
+ # ── top-level KeyboardInterrupt handler ──────────────────────────────────────
657
+ #
658
+ # Two Ctrl-C moments coexist:
659
+ # 1. mid-run (load, detect, digest, export compute) → cli.main()'s new arm
660
+ # prints "Stopped." to stderr and exits 130.
661
+ # 2. at the records-found "Continue? [y/N]" prompt in runner.py → the
662
+ # existing (EOFError, KeyboardInterrupt) handler raises ExportAborted,
663
+ # which cli.main() catches and exits 0 with the "aborted by user"
664
+ # message on stdout. Locking both halves prevents a future refactor
665
+ # from collapsing them.
666
+
667
+
668
+ def _write_tiny_zeek_dir(tmp_path: Path) -> Path:
669
+ """Write a two-row flat Zeek conn.log just rich enough to load.
670
+
671
+ Kept local to this module so test_config_cli stays independent of
672
+ test_runner's fixture helpers.
673
+ """
674
+ import json
675
+ from datetime import datetime, timezone
676
+
677
+ zeek_dir = tmp_path / "zeek"
678
+ zeek_dir.mkdir()
679
+ rows = [
680
+ {
681
+ "ts": datetime(2026, 1, 1, tzinfo=timezone.utc).timestamp(),
682
+ "id.orig_h": "192.0.2.10",
683
+ "id.resp_h": "198.51.100.20",
684
+ "id.resp_p": 443,
685
+ "proto": "tcp",
686
+ },
687
+ {
688
+ "ts": datetime(2026, 1, 5, tzinfo=timezone.utc).timestamp(),
689
+ "id.orig_h": "192.0.2.10",
690
+ "id.resp_h": "198.51.100.20",
691
+ "id.resp_p": 443,
692
+ "proto": "tcp",
693
+ },
694
+ ]
695
+ (zeek_dir / "conn.log").write_text(
696
+ "\n".join(json.dumps(r) for r in rows) + "\n", encoding="utf-8"
697
+ )
698
+ return zeek_dir
699
+
700
+
701
+ def test_cli_top_level_keyboard_interrupt_exits_cleanly(
702
+ monkeypatch: pytest.MonkeyPatch,
703
+ capsys: pytest.CaptureFixture[str],
704
+ ) -> None:
705
+ """Ctrl-C during compute work → 'Stopped.' on stderr, exit 130, no traceback.
706
+
707
+ Non-TTY stderr (capsys's captured stream) — byte-exact "Stopped.\\n", no
708
+ leading blank line. Script/log capture must see the same string today and
709
+ after the TTY-only blank-line polish.
710
+ """
711
+ def _raise_kbd(_argv=None):
712
+ raise KeyboardInterrupt
713
+
714
+ monkeypatch.setattr(cli, "_main", _raise_kbd)
715
+
716
+ with pytest.raises(SystemExit) as exc_info:
717
+ cli.main([])
718
+
719
+ assert exc_info.value.code == 130
720
+ captured = capsys.readouterr()
721
+ assert captured.err == "Stopped.\n"
722
+ assert "Traceback" not in captured.err
723
+ assert "Traceback" not in captured.out
724
+ # Sanity: this is the new path, not the prompt-cancel path.
725
+ assert "aborted by user" not in captured.err
726
+ assert "aborted by user" not in captured.out
727
+
728
+
729
+ def test_cli_top_level_keyboard_interrupt_prepends_blank_line_on_tty(
730
+ monkeypatch: pytest.MonkeyPatch,
731
+ capsys: pytest.CaptureFixture[str],
732
+ ) -> None:
733
+ """Ctrl-C on a TTY → leading blank line so terminal '^C' echo does not
734
+ glue to 'Stopped.' on one row. The cli is the only place that sees this
735
+ discipline; runner liveness narration handles its own clears."""
736
+ def _raise_kbd(_argv=None):
737
+ raise KeyboardInterrupt
738
+
739
+ monkeypatch.setattr(cli, "_main", _raise_kbd)
740
+ # capsys swaps sys.stderr; force its isatty() to True for this run.
741
+ monkeypatch.setattr(sys.stderr, "isatty", lambda: True)
742
+
743
+ with pytest.raises(SystemExit) as exc_info:
744
+ cli.main([])
745
+
746
+ assert exc_info.value.code == 130
747
+ captured = capsys.readouterr()
748
+ assert captured.err == "\nStopped.\n"
749
+
750
+
751
+ def test_cli_keyboard_interrupt_at_confirm_prompt_still_exit_zero(
752
+ monkeypatch: pytest.MonkeyPatch,
753
+ capsys: pytest.CaptureFixture[str],
754
+ tmp_path: Path,
755
+ ) -> None:
756
+ """Ctrl-C AT the records-found prompt → ExportAborted → exit 0, NOT Stopped./130.
757
+
758
+ Drives cli.main() end-to-end to lock the user-visible CLI behavior:
759
+ --warn-above is not threaded by _runner_kwargs, so we inject it via
760
+ cfg.load() the same way test_cloudtrail_exporter.py:794 does.
761
+ """
762
+ zeek_dir = _write_tiny_zeek_dir(tmp_path)
763
+
764
+ def _fake_load(_path=None):
765
+ return {
766
+ "loghunter": {
767
+ "detect": "beacon",
768
+ "warn_above": 1,
769
+ "default_window": "all",
770
+ }
771
+ }
772
+
773
+ monkeypatch.setattr(cfg, "load", _fake_load)
774
+
775
+ def _kbd(*_a, **_kw):
776
+ raise KeyboardInterrupt
777
+
778
+ monkeypatch.setattr("builtins.input", _kbd)
779
+
780
+ with pytest.raises(SystemExit) as exc_info:
781
+ cli.main(["beacon", f"--zeek-dir={zeek_dir}"])
782
+
783
+ assert exc_info.value.code == 0
784
+ captured = capsys.readouterr()
785
+ # ExportAborted prints to stdout via cli.main()'s existing arm.
786
+ assert "aborted by user" in captured.out
787
+ # The new top-level path must NOT have fired here.
788
+ assert "Stopped." not in captured.err
789
+ assert "Stopped." not in captured.out
790
+
791
+
792
+ # ── Single-detector positional routing: syslog (v1 promotion) ────────────────
793
+ #
794
+ # Glenn rev-1 required tests. The syslog detector's REQUIRED_LOGS is now
795
+ # empty (dns shape), so _source_for_single_detector_path falls through to
796
+ # OPTIONAL_LOGS pattern matching — and `syslog.log` matches BOTH `*.log*`
797
+ # AND `syslog*.log*`, which previously routed to the zeek_dir default.
798
+ # That regresses flat-syslog. The fix special-cases syslog: directories
799
+ # default to syslog_dir (preserves /var/log convention), files content-sniff
800
+ # (Zeek-origin → zeek_dir, anything else → syslog_dir).
801
+
802
+ def test_syslog_positional_flat_file_routes_to_syslog_dir(
803
+ monkeypatch: pytest.MonkeyPatch,
804
+ tmp_path: Path,
805
+ ) -> None:
806
+ """A flat RFC 3164 syslog file routes to syslog_dir via content-sniff."""
807
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
808
+ captured: dict[str, object] = {}
809
+
810
+ def fake_run(**kwargs: object) -> None:
811
+ captured.update(kwargs)
812
+
813
+ monkeypatch.setattr("loghunter.runner.run", fake_run)
814
+ flat_file = tmp_path / "auth.log"
815
+ flat_file.write_text(
816
+ "<134>Jun 11 12:00:00 host1 sshd[1234]: Accepted publickey for user\n",
817
+ encoding="utf-8",
818
+ )
819
+
820
+ cli._run_single_detector("syslog", [str(flat_file)])
821
+
822
+ assert captured["detect"] == "syslog"
823
+ assert captured["syslog_dir"] == str(flat_file)
824
+ # Scope rail: positional routes ONE source; siblings stay None.
825
+ assert captured["zeek_dir"] is None
826
+ assert captured["scope"] == frozenset({"syslog_dir"})
827
+
828
+
829
+ def test_syslog_positional_directory_routes_to_syslog_dir(
830
+ monkeypatch: pytest.MonkeyPatch,
831
+ tmp_path: Path,
832
+ ) -> None:
833
+ """A directory positional preserves the /var/log flat-syslog convention.
834
+
835
+ Without the special-case, _source_for_single_detector_path's directory
836
+ branch would default to zeek_dir — wrong for the historical syslog flow.
837
+ """
838
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
839
+ captured: dict[str, object] = {}
840
+
841
+ def fake_run(**kwargs: object) -> None:
842
+ captured.update(kwargs)
843
+
844
+ monkeypatch.setattr("loghunter.runner.run", fake_run)
845
+ log_dir = tmp_path / "log"
846
+ log_dir.mkdir()
847
+ (log_dir / "auth.log").write_text(
848
+ "<134>Jun 11 12:00:00 host1 sshd[1234]: ok\n",
849
+ encoding="utf-8",
850
+ )
851
+
852
+ cli._run_single_detector("syslog", [str(log_dir)])
853
+
854
+ assert captured["syslog_dir"] == str(log_dir)
855
+ assert captured["zeek_dir"] is None
856
+ assert captured["scope"] == frozenset({"syslog_dir"})
857
+
858
+
859
+ def test_syslog_positional_zeek_tsv_file_routes_to_zeek_dir(
860
+ monkeypatch: pytest.MonkeyPatch,
861
+ tmp_path: Path,
862
+ ) -> None:
863
+ """A Zeek-TSV syslog.log positional content-sniffs to zeek_dir.
864
+
865
+ Filename `syslog.log` matches BOTH OPTIONAL_LOGS patterns — disambiguation
866
+ happens via content sniff (sniff_format_detailed), the same machinery the
867
+ digest verb uses. Zeek-origin → zeek_dir.
868
+ """
869
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
870
+ captured: dict[str, object] = {}
871
+
872
+ def fake_run(**kwargs: object) -> None:
873
+ captured.update(kwargs)
874
+
875
+ monkeypatch.setattr("loghunter.runner.run", fake_run)
876
+ zeek_file = tmp_path / "syslog.log"
877
+ zeek_file.write_text(
878
+ "#separator \\x09\n"
879
+ "#set_separator\t,\n"
880
+ "#empty_field\t(empty)\n"
881
+ "#unset_field\t-\n"
882
+ "#path\tsyslog\n"
883
+ "#fields\tts\tuid\tid.orig_h\tmessage\n"
884
+ "#types\ttime\tstring\taddr\tstring\n"
885
+ "1779750000.000000\tCSL01\t192.0.2.10\thello\n",
886
+ encoding="utf-8",
887
+ )
888
+
889
+ cli._run_single_detector("syslog", [str(zeek_file)])
890
+
891
+ assert captured["zeek_dir"] == str(zeek_file)
892
+ assert captured["syslog_dir"] is None
893
+ assert captured["scope"] == frozenset({"zeek_dir"})
894
+
895
+
896
+ def test_syslog_positional_zeek_ndjson_file_routes_to_zeek_dir(
897
+ monkeypatch: pytest.MonkeyPatch,
898
+ tmp_path: Path,
899
+ ) -> None:
900
+ """The NDJSON Zeek front-end also content-sniffs to zeek_dir."""
901
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
902
+ captured: dict[str, object] = {}
903
+
904
+ def fake_run(**kwargs: object) -> None:
905
+ captured.update(kwargs)
906
+
907
+ monkeypatch.setattr("loghunter.runner.run", fake_run)
908
+ zeek_file = tmp_path / "syslog.log"
909
+ zeek_file.write_text(
910
+ '{"_path":"syslog","ts":1779750000.0,"uid":"CSL01",'
911
+ '"id.orig_h":"192.0.2.10","id.resp_h":"198.51.100.20",'
912
+ '"id.resp_p":514,"proto":"udp","facility":"DAEMON","severity":"INFO",'
913
+ '"message":"Jun 11 12:00:00 host1 sshd[1234]: ok"}\n',
914
+ encoding="utf-8",
915
+ )
916
+
917
+ cli._run_single_detector("syslog", [str(zeek_file)])
918
+
919
+ assert captured["zeek_dir"] == str(zeek_file)
920
+ assert captured["syslog_dir"] is None
921
+ assert captured["scope"] == frozenset({"zeek_dir"})
922
+
923
+
924
+ def test_syslog_positional_unrecognized_file_routes_to_syslog_dir(
925
+ monkeypatch: pytest.MonkeyPatch,
926
+ tmp_path: Path,
927
+ ) -> None:
928
+ """A file the sniffer cannot identify as a Zeek syslog.log falls to the
929
+ flat-syslog default (syslog_dir). Mirrors the "directory defaults to
930
+ flat" convention."""
931
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
932
+ captured: dict[str, object] = {}
933
+
934
+ def fake_run(**kwargs: object) -> None:
935
+ captured.update(kwargs)
936
+
937
+ monkeypatch.setattr("loghunter.runner.run", fake_run)
938
+ mystery = tmp_path / "mystery.log"
939
+ mystery.write_text("lorem ipsum dolor\nsit amet\n", encoding="utf-8")
940
+
941
+ cli._run_single_detector("syslog", [str(mystery)])
942
+
943
+ assert captured["syslog_dir"] == str(mystery)
944
+ assert captured["zeek_dir"] is None
945
+ assert captured["scope"] == frozenset({"syslog_dir"})
946
+
947
+
948
+ def test_syslog_positional_missing_file_does_not_leak_traceback(
949
+ monkeypatch: pytest.MonkeyPatch,
950
+ tmp_path: Path,
951
+ ) -> None:
952
+ """Glenn caution #1: a missing/unreadable positional must not leak a raw
953
+ traceback through _source_for_single_detector_path's sniff call. The
954
+ routing degrades to syslog_dir; the runner's actual file-discovery
955
+ produces the canonical "not found" error downstream."""
956
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
957
+
958
+ # Patch runner.run so we exit cleanly before the runner tries to read.
959
+ captured: dict[str, object] = {}
960
+
961
+ def fake_run(**kwargs: object) -> None:
962
+ captured.update(kwargs)
963
+
964
+ monkeypatch.setattr("loghunter.runner.run", fake_run)
965
+ ghost = tmp_path / "does-not-exist.log"
966
+
967
+ # No OSError must propagate from the routing layer itself.
968
+ cli._run_single_detector("syslog", [str(ghost)])
969
+
970
+ # Degrades to syslog_dir per the convention.
971
+ assert captured["syslog_dir"] == str(ghost)
972
+ assert captured["scope"] == frozenset({"syslog_dir"})
973
+
974
+
975
+ def test_syslog_positional_zeek_ndjson_without_path_routes_to_zeek_dir(
976
+ monkeypatch: pytest.MonkeyPatch,
977
+ tmp_path: Path,
978
+ ) -> None:
979
+ """P1 regression (Glenn bug handoff): a Zeek-NDJSON syslog.log emitted
980
+ without the `_path` directive must still content-sniff to Zeek and route
981
+ to zeek_dir. Pre-fix, the conn field-set fallback grabbed it and the
982
+ positional landed at syslog_dir, leaving load_syslog with an empty frame."""
983
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
984
+ captured: dict[str, object] = {}
985
+
986
+ def fake_run(**kwargs: object) -> None:
987
+ captured.update(kwargs)
988
+
989
+ monkeypatch.setattr("loghunter.runner.run", fake_run)
990
+ zeek_file = tmp_path / "syslog.log"
991
+ zeek_file.write_text(
992
+ # Note: NO _path directive — exactly the upstream-agent shape that
993
+ # triggered the original misroute.
994
+ '{"ts":1779750000.0,"uid":"CSL01",'
995
+ '"id.orig_h":"192.0.2.10","id.orig_p":41514,'
996
+ '"id.resp_h":"198.51.100.20","id.resp_p":514,'
997
+ '"proto":"udp","facility":"DAEMON","severity":"INFO",'
998
+ '"message":"Jun 11 12:00:00 host1 sshd[1234]: placeholder"}\n',
999
+ encoding="utf-8",
1000
+ )
1001
+
1002
+ cli._run_single_detector("syslog", [str(zeek_file)])
1003
+
1004
+ assert captured["zeek_dir"] == str(zeek_file)
1005
+ assert captured["syslog_dir"] is None
1006
+ assert captured["scope"] == frozenset({"zeek_dir"})