loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
tests/test_sources.py ADDED
@@ -0,0 +1,648 @@
1
+ """Unit tests for loghunter.common.sources.
2
+
3
+ Covers three independent concerns:
4
+
5
+ * ``route_positional_source`` — the ONE detect-path positional → source-dir
6
+ router. Replaces the three previous routers (analyze, single-detector,
7
+ hint scold). Pure-function: takes a Path and a pre-imported detector
8
+ module; uses ``REQUIRED_LOGS`` when present, else content-sniff against
9
+ ``OPTIONAL_LOGS``; degrades gracefully on directory positional or sniff
10
+ ``OSError`` to ``OPTIONAL_LOGS[0]["source"]``.
11
+
12
+ * ``resolve_sources`` — analyze-path resolver. Owns the four-key truth
13
+ table (override / scope / config fallback). The ``None``-contract is
14
+ binding: ``overrides.get(key)`` of ``None`` is "no override," identical
15
+ to an absent key. Explicit-override shell semantics — ``~`` expansion,
16
+ CWD-relative ignoring LH_ROOT, absolute round-trip, ``Path`` round-trip —
17
+ are all asserted directly here because ``_resolve_one`` is the SOLE
18
+ string→Path site after the refactor (CLI hands raw strings through).
19
+
20
+ * ``resolve_digest_source`` — digest resolver. Owns the per-schema
21
+ candidate ladder, wrong-key + XOR + not-configured errors. Error strings
22
+ are byte-preserved from the previous run_digest ladders; this file
23
+ pins each string literal.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import os
29
+ from pathlib import Path
30
+ from typing import Any
31
+
32
+ import pytest
33
+
34
+ from loghunter.common import sources
35
+ from loghunter.common.sources import (
36
+ DigestSource,
37
+ ResolvedSources,
38
+ resolve_digest_source,
39
+ resolve_sources,
40
+ route_positional_source,
41
+ )
42
+
43
+
44
+ # ── route_positional_source ───────────────────────────────────────────────────
45
+
46
+
47
+ class _ReqModule:
48
+ """Detector stand-in carrying REQUIRED_LOGS only."""
49
+
50
+ REQUIRED_LOGS = [{"source": "cloudtrail_dir", "pattern": "*.json*"}]
51
+ OPTIONAL_LOGS: list[dict[str, str]] = []
52
+
53
+
54
+ class _OptModule:
55
+ """Detector stand-in mirroring the dns shape: zeek_dir first, pihole_dir second."""
56
+
57
+ REQUIRED_LOGS: list[dict[str, str]] = []
58
+ OPTIONAL_LOGS = [
59
+ {"source": "zeek_dir", "pattern": "dns*.log*"},
60
+ {"source": "pihole_dir", "pattern": "pihole*.log*"},
61
+ ]
62
+
63
+
64
+ class _SyslogShape:
65
+ """Detector stand-in mirroring the syslog shape: syslog_dir first, zeek_dir second."""
66
+
67
+ REQUIRED_LOGS: list[dict[str, str]] = []
68
+ OPTIONAL_LOGS = [
69
+ {"source": "syslog_dir", "pattern": "*.log*"},
70
+ {"source": "zeek_dir", "pattern": "syslog*.log*"},
71
+ ]
72
+
73
+
74
+ def test_router_required_logs_wins(tmp_path: Path) -> None:
75
+ """REQUIRED_LOGS[0]["source"] short-circuits — no sniff needed."""
76
+ nothing = tmp_path / "anything.log"
77
+ nothing.write_text("not even json\n", encoding="utf-8")
78
+ assert route_positional_source(nothing, detector_module=_ReqModule) == "cloudtrail_dir"
79
+
80
+
81
+ def test_router_dns_pihole_content_under_neutral_name(tmp_path: Path) -> None:
82
+ """A Pi-hole-CONTENT file whose name does NOT match pihole*.log* routes via
83
+ content-sniff to pihole_dir. Locks the fnmatch→content-sniff migration."""
84
+ # Name is deliberately bland — "mystery" cannot satisfy pihole*.log*.
85
+ pihole = tmp_path / "mystery.log"
86
+ pihole.write_text(
87
+ "Jun 11 12:00:00 host1 dnsmasq[1234]: query[A] example.com from 192.0.2.10\n",
88
+ encoding="utf-8",
89
+ )
90
+ assert route_positional_source(pihole, detector_module=_OptModule) == "pihole_dir"
91
+
92
+
93
+ def test_router_dns_zeek_content_routes_to_zeek_dir(tmp_path: Path) -> None:
94
+ """A Zeek-dns content file routes to zeek_dir even under a neutral name."""
95
+ zeek_dns = tmp_path / "mystery.log"
96
+ zeek_dns.write_text(
97
+ '{"_path":"dns","ts":1779750000.0,"uid":"CDS01",'
98
+ '"id.orig_h":"192.0.2.10","id.resp_h":"198.51.100.20",'
99
+ '"id.resp_p":53,"proto":"udp","query":"example.com","qtype":1}\n',
100
+ encoding="utf-8",
101
+ )
102
+ assert route_positional_source(zeek_dns, detector_module=_OptModule) == "zeek_dir"
103
+
104
+
105
+ def test_router_syslog_zeek_content_routes_to_zeek_dir(tmp_path: Path) -> None:
106
+ """Zeek syslog.log (TSV with #path syslog) routes to zeek_dir."""
107
+ zeek_syslog = tmp_path / "syslog.log"
108
+ zeek_syslog.write_text(
109
+ "#separator \\x09\n"
110
+ "#set_separator\t,\n"
111
+ "#empty_field\t(empty)\n"
112
+ "#unset_field\t-\n"
113
+ "#path\tsyslog\n"
114
+ "#fields\tts\tuid\tid.orig_h\tmessage\n"
115
+ "#types\ttime\tstring\taddr\tstring\n"
116
+ "1779750000.000000\tCSL01\t192.0.2.10\thello\n",
117
+ encoding="utf-8",
118
+ )
119
+ assert route_positional_source(zeek_syslog, detector_module=_SyslogShape) == "zeek_dir"
120
+
121
+
122
+ def test_router_syslog_flat_content_routes_to_syslog_dir(tmp_path: Path) -> None:
123
+ flat = tmp_path / "auth.log"
124
+ flat.write_text(
125
+ "<134>Jun 11 12:00:00 host1 sshd[1234]: Accepted publickey for user\n",
126
+ encoding="utf-8",
127
+ )
128
+ assert route_positional_source(flat, detector_module=_SyslogShape) == "syslog_dir"
129
+
130
+
131
+ def test_router_directory_falls_back_to_first_optional(tmp_path: Path) -> None:
132
+ """A directory positional defaults to OPTIONAL_LOGS[0] without sniffing."""
133
+ log_dir = tmp_path / "logs"
134
+ log_dir.mkdir()
135
+ assert route_positional_source(log_dir, detector_module=_OptModule) == "zeek_dir"
136
+ assert route_positional_source(log_dir, detector_module=_SyslogShape) == "syslog_dir"
137
+
138
+
139
+ def test_router_missing_file_degrades_silently(tmp_path: Path) -> None:
140
+ """A missing/unreadable positional must not raise; falls back to OPTIONAL_LOGS[0]."""
141
+ ghost = tmp_path / "does-not-exist.log"
142
+ assert route_positional_source(ghost, detector_module=_SyslogShape) == "syslog_dir"
143
+
144
+
145
+ def test_router_unrecognized_content_falls_back_to_first_optional(tmp_path: Path) -> None:
146
+ """A file the sniffer can't claim falls through to OPTIONAL_LOGS[0]."""
147
+ mystery = tmp_path / "mystery.log"
148
+ mystery.write_text("lorem ipsum dolor\nsit amet\n", encoding="utf-8")
149
+ assert route_positional_source(mystery, detector_module=_SyslogShape) == "syslog_dir"
150
+
151
+
152
+ # ── resolve_sources — overrides None-contract + scope truth table ─────────────
153
+
154
+
155
+ def _cfg_all_four() -> dict[str, Any]:
156
+ return {"loghunter": {
157
+ "root": "/tmp/lh-root",
158
+ "zeek_dir": "zeek",
159
+ "syslog_dir": "syslog",
160
+ "pihole_dir": "pihole",
161
+ "cloudtrail_dir": "cloudtrail",
162
+ }}
163
+
164
+
165
+ def test_resolve_sources_none_overrides_treated_as_absent(
166
+ monkeypatch: pytest.MonkeyPatch,
167
+ ) -> None:
168
+ """Glenn req #1: a None override is identical to an absent key.
169
+
170
+ ``runner.run(config=..., dry_run=True)`` passes all four kwargs with their
171
+ None defaults intact; the programmatic-fallback rail
172
+ (tests/test_root_provenance.py) depends on the resolver treating None as
173
+ 'no override' and config-filling.
174
+ """
175
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
176
+ resolved = resolve_sources(
177
+ _cfg_all_four(),
178
+ overrides={k: None for k in
179
+ ("zeek_dir", "syslog_dir", "pihole_dir", "cloudtrail_dir")},
180
+ scope=None,
181
+ )
182
+ assert resolved == ResolvedSources(
183
+ zeek_dir=[Path("/tmp/lh-root/zeek")],
184
+ syslog_dir=[Path("/tmp/lh-root/syslog")],
185
+ pihole_dir=[Path("/tmp/lh-root/pihole")],
186
+ cloudtrail_dir=[Path("/tmp/lh-root/cloudtrail")],
187
+ )
188
+
189
+
190
+ def test_resolve_sources_empty_overrides_dict_matches_none_overrides(
191
+ monkeypatch: pytest.MonkeyPatch,
192
+ ) -> None:
193
+ """``{}`` and ``{k: None, ...}`` produce identical results — same contract."""
194
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
195
+ cfg = _cfg_all_four()
196
+ via_empty = resolve_sources(cfg, overrides={}, scope=None)
197
+ via_nones = resolve_sources(
198
+ cfg,
199
+ overrides={"zeek_dir": None, "syslog_dir": None,
200
+ "pihole_dir": None, "cloudtrail_dir": None},
201
+ scope=None,
202
+ )
203
+ assert via_empty == via_nones
204
+
205
+
206
+ def test_resolve_sources_empty_string_override_treated_as_absent(
207
+ monkeypatch: pytest.MonkeyPatch,
208
+ ) -> None:
209
+ """An empty-string override falls through to config fallback.
210
+
211
+ The CLI parser stores a bare ``--zeek-dir=`` as ``""`` (not None, not
212
+ rejected). Pre-refactor the seam used truthiness (``if cli_val:``) so
213
+ ``""`` meant "no value, use config." A naive ``is not None`` check at
214
+ the resolver boundary would treat ``""`` as present, send it through
215
+ ``resolve_path("", "")`` → None, and silently suppress the config
216
+ fallback. The ``_present`` helper restores the old semantics."""
217
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
218
+ resolved = resolve_sources(
219
+ {"loghunter": {"root": "/lh", "zeek_dir": "zeek"}},
220
+ overrides={"zeek_dir": ""},
221
+ scope=None,
222
+ )
223
+ assert resolved.zeek_dir == [Path("/lh/zeek")]
224
+
225
+
226
+ def test_resolve_sources_scope_suppresses_unscoped_config_fill(
227
+ monkeypatch: pytest.MonkeyPatch,
228
+ ) -> None:
229
+ """A scoped run does not config-fill sibling source-dirs."""
230
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
231
+ resolved = resolve_sources(
232
+ _cfg_all_four(),
233
+ overrides={},
234
+ scope=frozenset({"syslog_dir"}),
235
+ )
236
+ assert resolved.syslog_dir == [Path("/tmp/lh-root/syslog")]
237
+ assert resolved.zeek_dir == []
238
+ assert resolved.pihole_dir == []
239
+ assert resolved.cloudtrail_dir == []
240
+
241
+
242
+ def test_resolve_sources_override_outside_scope_still_wins(
243
+ monkeypatch: pytest.MonkeyPatch,
244
+ ) -> None:
245
+ """An explicit override outside ``scope`` still applies — operator widening.
246
+
247
+ This is the property that lets ``loghunter syslog PATH --zeek-dir=/x``
248
+ widen the run while the positional still scopes to syslog_dir.
249
+ """
250
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
251
+ resolved = resolve_sources(
252
+ _cfg_all_four(),
253
+ overrides={"zeek_dir": "/explicit/zk"},
254
+ scope=frozenset({"syslog_dir"}),
255
+ )
256
+ assert resolved.zeek_dir == [Path("/explicit/zk")]
257
+ assert resolved.syslog_dir == [Path("/tmp/lh-root/syslog")]
258
+ assert resolved.pihole_dir == []
259
+ assert resolved.cloudtrail_dir == []
260
+
261
+
262
+ def test_resolve_sources_override_wins_over_config(
263
+ monkeypatch: pytest.MonkeyPatch,
264
+ ) -> None:
265
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
266
+ resolved = resolve_sources(
267
+ _cfg_all_four(),
268
+ overrides={"zeek_dir": "/explicit/zk"},
269
+ scope=None,
270
+ )
271
+ assert resolved.zeek_dir == [Path("/explicit/zk")]
272
+ # Config still fills siblings because scope is None.
273
+ assert resolved.syslog_dir == [Path("/tmp/lh-root/syslog")]
274
+
275
+
276
+ # ── resolve_sources — explicit override shell semantics (Glenn req) ──────────
277
+
278
+
279
+ def test_resolve_sources_tilde_override_expands(
280
+ monkeypatch: pytest.MonkeyPatch,
281
+ ) -> None:
282
+ """A ``~``-anchored override expands via expanduser — proves _resolve_one
283
+ sends overrides through resolve_path(value, ""), NOT resolve_path(value, root)."""
284
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
285
+ home = str(Path("~").expanduser())
286
+ resolved = resolve_sources(
287
+ {"loghunter": {"root": "/lh-root"}},
288
+ overrides={"zeek_dir": "~/zk"},
289
+ scope=None,
290
+ )
291
+ assert resolved.zeek_dir == [Path(home) / "zk"]
292
+
293
+
294
+ def test_resolve_sources_relative_override_ignores_lh_root(
295
+ monkeypatch: pytest.MonkeyPatch,
296
+ ) -> None:
297
+ """A relative override resolves CWD-relative and ignores LH_ROOT — the
298
+ CLI-vs-config provenance split that ``_resolve_one`` enforces by
299
+ passing ``root=""`` on the override branch."""
300
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
301
+ resolved = resolve_sources(
302
+ {"loghunter": {"root": "/lh-root"}},
303
+ overrides={"zeek_dir": "rel/zk"},
304
+ scope=None,
305
+ )
306
+ assert resolved.zeek_dir == [Path("rel/zk")]
307
+ # Negative: must NOT be /lh-root/rel/zk.
308
+ assert resolved.zeek_dir != [Path("/lh-root/rel/zk")]
309
+
310
+
311
+ def test_resolve_sources_absolute_override_round_trips() -> None:
312
+ resolved = resolve_sources(
313
+ {"loghunter": {"root": "/lh-root"}},
314
+ overrides={"zeek_dir": "/abs/zk"},
315
+ scope=None,
316
+ )
317
+ assert resolved.zeek_dir == [Path("/abs/zk")]
318
+
319
+
320
+ def test_resolve_sources_path_override_round_trips() -> None:
321
+ """A ``Path`` override goes through ``str(override)`` and is treated the
322
+ same as the equivalent string."""
323
+ resolved = resolve_sources(
324
+ {"loghunter": {"root": "/lh-root"}},
325
+ overrides={"zeek_dir": Path("/abs/zk")},
326
+ scope=None,
327
+ )
328
+ assert resolved.zeek_dir == [Path("/abs/zk")]
329
+
330
+
331
+ def test_resolve_sources_config_relative_uses_lh_root(
332
+ monkeypatch: pytest.MonkeyPatch,
333
+ ) -> None:
334
+ """Config-side values still get LH_ROOT — the rail
335
+ ``tests/test_root_provenance.py:160`` guards directly. Mirrored here so a
336
+ drift only requires reading this file."""
337
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
338
+ resolved = resolve_sources(
339
+ {"loghunter": {"root": "/lh-root", "zeek_dir": "zeek"}},
340
+ overrides={},
341
+ scope=None,
342
+ )
343
+ assert resolved.zeek_dir == [Path("/lh-root/zeek")]
344
+
345
+
346
+ def test_resolve_sources_env_lh_root_wins_over_config(
347
+ monkeypatch: pytest.MonkeyPatch,
348
+ ) -> None:
349
+ """LOGHUNTER_ROOT env var beats the config ``root`` key — the
350
+ ``effective_root`` rail. Tested here too because ``_resolve_one`` reads
351
+ it via the helper."""
352
+ monkeypatch.setenv("LOGHUNTER_ROOT", "/env-root")
353
+ resolved = resolve_sources(
354
+ {"loghunter": {"root": "/cfg-root", "zeek_dir": "zeek"}},
355
+ overrides={},
356
+ scope=None,
357
+ )
358
+ assert resolved.zeek_dir == [Path("/env-root/zeek")]
359
+
360
+
361
+ # ── resolve_digest_source ─────────────────────────────────────────────────────
362
+
363
+
364
+ def test_digest_conn_wrong_key_byte_preserved() -> None:
365
+ """digest conn rejects every non-zeek_dir override with the exact text."""
366
+ for bad in ("pihole_dir", "syslog_dir", "cloudtrail_dir"):
367
+ with pytest.raises(ValueError) as exc:
368
+ resolve_digest_source(
369
+ {"loghunter": {}}, "conn",
370
+ overrides={bad: "/x"},
371
+ )
372
+ assert str(exc.value) == (
373
+ f"digest conn: {bad} is not valid for the conn schema"
374
+ )
375
+
376
+
377
+ def test_digest_dns_wrong_key_byte_preserved() -> None:
378
+ for bad in ("syslog_dir", "cloudtrail_dir"):
379
+ with pytest.raises(ValueError) as exc:
380
+ resolve_digest_source(
381
+ {"loghunter": {}}, "dns",
382
+ overrides={bad: "/x"},
383
+ )
384
+ assert str(exc.value) == (
385
+ f"digest dns: {bad} is not valid for the dns schema"
386
+ )
387
+
388
+
389
+ def test_digest_syslog_wrong_key_byte_preserved() -> None:
390
+ for bad in ("pihole_dir", "cloudtrail_dir"):
391
+ with pytest.raises(ValueError) as exc:
392
+ resolve_digest_source(
393
+ {"loghunter": {}}, "syslog",
394
+ overrides={bad: "/x"},
395
+ )
396
+ assert str(exc.value) == (
397
+ f"digest syslog: {bad} is not valid for the syslog schema"
398
+ )
399
+
400
+
401
+ def test_digest_cloudtrail_wrong_key_byte_preserved() -> None:
402
+ for bad in ("zeek_dir", "pihole_dir", "syslog_dir"):
403
+ with pytest.raises(ValueError) as exc:
404
+ resolve_digest_source(
405
+ {"loghunter": {}}, "cloudtrail",
406
+ overrides={bad: "/x"},
407
+ )
408
+ assert str(exc.value) == (
409
+ f"digest cloudtrail: {bad} is not valid for the cloudtrail schema"
410
+ )
411
+
412
+
413
+ def test_digest_dns_xor_byte_preserved() -> None:
414
+ with pytest.raises(ValueError) as exc:
415
+ resolve_digest_source(
416
+ {"loghunter": {}}, "dns",
417
+ overrides={"zeek_dir": "/z", "pihole_dir": "/p"},
418
+ )
419
+ assert str(exc.value) == (
420
+ "digest dns: cannot use both --zeek-dir and --pihole-dir"
421
+ )
422
+
423
+
424
+ def test_digest_syslog_xor_byte_preserved() -> None:
425
+ with pytest.raises(ValueError) as exc:
426
+ resolve_digest_source(
427
+ {"loghunter": {}}, "syslog",
428
+ overrides={"zeek_dir": "/z", "syslog_dir": "/s"},
429
+ )
430
+ assert str(exc.value) == (
431
+ "digest syslog: cannot use both zeek_dir and syslog_dir"
432
+ )
433
+
434
+
435
+ def test_digest_conn_not_configured_byte_preserved() -> None:
436
+ with pytest.raises(ValueError) as exc:
437
+ resolve_digest_source({"loghunter": {}}, "conn", overrides={})
438
+ assert str(exc.value) == (
439
+ "digest: zeek_dir not configured — pass a PATH or set "
440
+ "[loghunter].zeek_dir in your config"
441
+ )
442
+
443
+
444
+ def test_digest_dns_not_configured_byte_preserved() -> None:
445
+ with pytest.raises(ValueError) as exc:
446
+ resolve_digest_source({"loghunter": {}}, "dns", overrides={})
447
+ assert str(exc.value) == (
448
+ "digest dns: zeek_dir or pihole_dir not configured — "
449
+ "pass a PATH, --zeek-dir/--pihole-dir, or set one in config"
450
+ )
451
+
452
+
453
+ def test_digest_syslog_not_configured_byte_preserved() -> None:
454
+ with pytest.raises(ValueError) as exc:
455
+ resolve_digest_source({"loghunter": {}}, "syslog", overrides={})
456
+ assert str(exc.value) == (
457
+ "digest syslog: no syslog source configured — pass a PATH, "
458
+ "--zeek-dir, or set [loghunter].syslog_dir / "
459
+ "[loghunter].zeek_dir in your config"
460
+ )
461
+
462
+
463
+ def test_digest_cloudtrail_not_configured_byte_preserved() -> None:
464
+ with pytest.raises(ValueError) as exc:
465
+ resolve_digest_source({"loghunter": {}}, "cloudtrail", overrides={})
466
+ assert str(exc.value) == (
467
+ "digest cloudtrail: cloudtrail_dir not configured — pass a PATH, "
468
+ "--cloudtrail-dir, or set [loghunter].cloudtrail_dir in your config"
469
+ )
470
+
471
+
472
+ def test_digest_conn_override_wins() -> None:
473
+ ds = resolve_digest_source(
474
+ {"loghunter": {}}, "conn",
475
+ overrides={"zeek_dir": "/explicit/zk"},
476
+ )
477
+ assert ds == DigestSource(
478
+ source_key="zeek_dir",
479
+ directory=Path("/explicit/zk"),
480
+ feed=None,
481
+ )
482
+
483
+
484
+ def test_digest_dns_zeek_preference_on_config_fallback(
485
+ monkeypatch: pytest.MonkeyPatch,
486
+ ) -> None:
487
+ """With both zeek_dir and pihole_dir configured, the dns digest prefers
488
+ zeek_dir — the first entry in the candidate ladder."""
489
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
490
+ ds = resolve_digest_source(
491
+ {"loghunter": {"zeek_dir": "/cfg/zk", "pihole_dir": "/cfg/ph"}},
492
+ "dns",
493
+ overrides={},
494
+ )
495
+ assert ds == DigestSource(
496
+ source_key="zeek_dir", directory=Path("/cfg/zk"), feed="zeek",
497
+ )
498
+
499
+
500
+ def test_digest_dns_pihole_when_only_pihole_configured(
501
+ monkeypatch: pytest.MonkeyPatch,
502
+ ) -> None:
503
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
504
+ ds = resolve_digest_source(
505
+ {"loghunter": {"pihole_dir": "/cfg/ph"}},
506
+ "dns",
507
+ overrides={},
508
+ )
509
+ assert ds == DigestSource(
510
+ source_key="pihole_dir", directory=Path("/cfg/ph"), feed="pihole",
511
+ )
512
+
513
+
514
+ def test_digest_syslog_syslog_preference_on_config_fallback(
515
+ monkeypatch: pytest.MonkeyPatch,
516
+ ) -> None:
517
+ """With both syslog_dir and zeek_dir configured, syslog digest prefers
518
+ syslog_dir."""
519
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
520
+ ds = resolve_digest_source(
521
+ {"loghunter": {"zeek_dir": "/cfg/zk", "syslog_dir": "/cfg/sl"}},
522
+ "syslog",
523
+ overrides={},
524
+ )
525
+ assert ds == DigestSource(
526
+ source_key="syslog_dir", directory=Path("/cfg/sl"), feed="syslog",
527
+ )
528
+
529
+
530
+ def test_digest_syslog_zeek_when_only_zeek_configured(
531
+ monkeypatch: pytest.MonkeyPatch,
532
+ ) -> None:
533
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
534
+ ds = resolve_digest_source(
535
+ {"loghunter": {"zeek_dir": "/cfg/zk"}},
536
+ "syslog",
537
+ overrides={},
538
+ )
539
+ assert ds == DigestSource(
540
+ source_key="zeek_dir", directory=Path("/cfg/zk"), feed="zeek",
541
+ )
542
+
543
+
544
+ def test_digest_cloudtrail_config_fallback(
545
+ monkeypatch: pytest.MonkeyPatch,
546
+ ) -> None:
547
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
548
+ ds = resolve_digest_source(
549
+ {"loghunter": {"cloudtrail_dir": "/cfg/ct"}},
550
+ "cloudtrail",
551
+ overrides={},
552
+ )
553
+ assert ds == DigestSource(
554
+ source_key="cloudtrail_dir", directory=Path("/cfg/ct"), feed=None,
555
+ )
556
+
557
+
558
+ def test_digest_none_overrides_treated_as_absent(
559
+ monkeypatch: pytest.MonkeyPatch,
560
+ ) -> None:
561
+ """The None-contract applies to the digest resolver too — runner.run_digest
562
+ passes all four dir kwargs with None defaults."""
563
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
564
+ ds = resolve_digest_source(
565
+ {"loghunter": {"syslog_dir": "/cfg/sl"}},
566
+ "syslog",
567
+ overrides={"zeek_dir": None, "syslog_dir": None,
568
+ "pihole_dir": None, "cloudtrail_dir": None},
569
+ )
570
+ assert ds.source_key == "syslog_dir"
571
+
572
+
573
+ def test_digest_empty_string_override_falls_through_to_config(
574
+ monkeypatch: pytest.MonkeyPatch,
575
+ ) -> None:
576
+ """A bare ``--zeek-dir=`` in a digest invocation must NOT suppress config
577
+ fallback. Mirror of the analyze resolver's empty-string test — same
578
+ falsy-vs-None class, locked at both resolvers."""
579
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
580
+ ds = resolve_digest_source(
581
+ {"loghunter": {"zeek_dir": "/cfg/zk"}},
582
+ "conn",
583
+ overrides={"zeek_dir": ""},
584
+ )
585
+ assert ds.source_key == "zeek_dir"
586
+ assert ds.directory == Path("/cfg/zk")
587
+
588
+
589
+ def test_digest_empty_string_override_does_not_trigger_wrong_key(
590
+ monkeypatch: pytest.MonkeyPatch,
591
+ ) -> None:
592
+ """A bare ``--syslog-dir=`` for a conn-schema digest must NOT raise the
593
+ wrong-key error — empty-string is "no override," not "present with the
594
+ wrong key." Defends the wrong-key guard against the same falsy class."""
595
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
596
+ ds = resolve_digest_source(
597
+ {"loghunter": {"zeek_dir": "/cfg/zk"}},
598
+ "conn",
599
+ overrides={"zeek_dir": "", "syslog_dir": ""},
600
+ )
601
+ assert ds.source_key == "zeek_dir"
602
+ assert ds.directory == Path("/cfg/zk")
603
+
604
+
605
+ def test_digest_override_root_provenance_uses_shell_semantics(
606
+ monkeypatch: pytest.MonkeyPatch,
607
+ ) -> None:
608
+ """An override into resolve_digest_source resolves through shell semantics
609
+ (no LH_ROOT prefix). Mirror of test_resolve_sources_relative_override_ignores_lh_root."""
610
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
611
+ ds = resolve_digest_source(
612
+ {"loghunter": {"root": "/lh-root", "zeek_dir": "should-not-be-used"}},
613
+ "conn",
614
+ overrides={"zeek_dir": "/abs/zk"},
615
+ )
616
+ assert ds.directory == Path("/abs/zk")
617
+
618
+
619
+ def test_digest_config_relative_uses_lh_root(
620
+ monkeypatch: pytest.MonkeyPatch,
621
+ ) -> None:
622
+ """Mirror of the analyze resolver: config-side relative values get LH_ROOT."""
623
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
624
+ ds = resolve_digest_source(
625
+ {"loghunter": {"root": "/lh-root", "zeek_dir": "zeek"}},
626
+ "conn",
627
+ overrides={},
628
+ )
629
+ assert ds.directory == Path("/lh-root/zeek")
630
+
631
+
632
+ # ── three-way drift tripwire for the digest (schema, source_key) keyspace ────
633
+
634
+
635
+ def test_digest_schema_source_keyspaces_agree() -> None:
636
+ """Three structures encode the legal (schema, source_key) space and must
637
+ agree. Without this tripwire, adding a new combo to two of the three
638
+ surfaces yields a production KeyError at the
639
+ ``_DIGEST_PATTERN_AND_EMPTY[(schema, source_key)]`` lookup in
640
+ ``run_digest`` for that schema only. Same drift shape we already guard
641
+ for the config example.
642
+ """
643
+ from loghunter.common.sources import _DIGEST_CANDIDATES, _DIGEST_FEED
644
+ from loghunter.runner import _DIGEST_PATTERN_AND_EMPTY
645
+
646
+ legal = {(s, k) for s, ks in _DIGEST_CANDIDATES.items() for k in ks}
647
+ assert set(_DIGEST_FEED) == legal
648
+ assert set(_DIGEST_PATTERN_AND_EMPTY) == legal