loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
tests/test_sources.py
ADDED
|
@@ -0,0 +1,648 @@
|
|
|
1
|
+
"""Unit tests for loghunter.common.sources.
|
|
2
|
+
|
|
3
|
+
Covers three independent concerns:
|
|
4
|
+
|
|
5
|
+
* ``route_positional_source`` — the ONE detect-path positional → source-dir
|
|
6
|
+
router. Replaces the three previous routers (analyze, single-detector,
|
|
7
|
+
hint scold). Pure-function: takes a Path and a pre-imported detector
|
|
8
|
+
module; uses ``REQUIRED_LOGS`` when present, else content-sniff against
|
|
9
|
+
``OPTIONAL_LOGS``; degrades gracefully on directory positional or sniff
|
|
10
|
+
``OSError`` to ``OPTIONAL_LOGS[0]["source"]``.
|
|
11
|
+
|
|
12
|
+
* ``resolve_sources`` — analyze-path resolver. Owns the four-key truth
|
|
13
|
+
table (override / scope / config fallback). The ``None``-contract is
|
|
14
|
+
binding: ``overrides.get(key)`` of ``None`` is "no override," identical
|
|
15
|
+
to an absent key. Explicit-override shell semantics — ``~`` expansion,
|
|
16
|
+
CWD-relative ignoring LH_ROOT, absolute round-trip, ``Path`` round-trip —
|
|
17
|
+
are all asserted directly here because ``_resolve_one`` is the SOLE
|
|
18
|
+
string→Path site after the refactor (CLI hands raw strings through).
|
|
19
|
+
|
|
20
|
+
* ``resolve_digest_source`` — digest resolver. Owns the per-schema
|
|
21
|
+
candidate ladder, wrong-key + XOR + not-configured errors. Error strings
|
|
22
|
+
are byte-preserved from the previous run_digest ladders; this file
|
|
23
|
+
pins each string literal.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import os
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
import pytest
|
|
33
|
+
|
|
34
|
+
from loghunter.common import sources
|
|
35
|
+
from loghunter.common.sources import (
|
|
36
|
+
DigestSource,
|
|
37
|
+
ResolvedSources,
|
|
38
|
+
resolve_digest_source,
|
|
39
|
+
resolve_sources,
|
|
40
|
+
route_positional_source,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ── route_positional_source ───────────────────────────────────────────────────
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class _ReqModule:
|
|
48
|
+
"""Detector stand-in carrying REQUIRED_LOGS only."""
|
|
49
|
+
|
|
50
|
+
REQUIRED_LOGS = [{"source": "cloudtrail_dir", "pattern": "*.json*"}]
|
|
51
|
+
OPTIONAL_LOGS: list[dict[str, str]] = []
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class _OptModule:
|
|
55
|
+
"""Detector stand-in mirroring the dns shape: zeek_dir first, pihole_dir second."""
|
|
56
|
+
|
|
57
|
+
REQUIRED_LOGS: list[dict[str, str]] = []
|
|
58
|
+
OPTIONAL_LOGS = [
|
|
59
|
+
{"source": "zeek_dir", "pattern": "dns*.log*"},
|
|
60
|
+
{"source": "pihole_dir", "pattern": "pihole*.log*"},
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class _SyslogShape:
|
|
65
|
+
"""Detector stand-in mirroring the syslog shape: syslog_dir first, zeek_dir second."""
|
|
66
|
+
|
|
67
|
+
REQUIRED_LOGS: list[dict[str, str]] = []
|
|
68
|
+
OPTIONAL_LOGS = [
|
|
69
|
+
{"source": "syslog_dir", "pattern": "*.log*"},
|
|
70
|
+
{"source": "zeek_dir", "pattern": "syslog*.log*"},
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_router_required_logs_wins(tmp_path: Path) -> None:
|
|
75
|
+
"""REQUIRED_LOGS[0]["source"] short-circuits — no sniff needed."""
|
|
76
|
+
nothing = tmp_path / "anything.log"
|
|
77
|
+
nothing.write_text("not even json\n", encoding="utf-8")
|
|
78
|
+
assert route_positional_source(nothing, detector_module=_ReqModule) == "cloudtrail_dir"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_router_dns_pihole_content_under_neutral_name(tmp_path: Path) -> None:
|
|
82
|
+
"""A Pi-hole-CONTENT file whose name does NOT match pihole*.log* routes via
|
|
83
|
+
content-sniff to pihole_dir. Locks the fnmatch→content-sniff migration."""
|
|
84
|
+
# Name is deliberately bland — "mystery" cannot satisfy pihole*.log*.
|
|
85
|
+
pihole = tmp_path / "mystery.log"
|
|
86
|
+
pihole.write_text(
|
|
87
|
+
"Jun 11 12:00:00 host1 dnsmasq[1234]: query[A] example.com from 192.0.2.10\n",
|
|
88
|
+
encoding="utf-8",
|
|
89
|
+
)
|
|
90
|
+
assert route_positional_source(pihole, detector_module=_OptModule) == "pihole_dir"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_router_dns_zeek_content_routes_to_zeek_dir(tmp_path: Path) -> None:
|
|
94
|
+
"""A Zeek-dns content file routes to zeek_dir even under a neutral name."""
|
|
95
|
+
zeek_dns = tmp_path / "mystery.log"
|
|
96
|
+
zeek_dns.write_text(
|
|
97
|
+
'{"_path":"dns","ts":1779750000.0,"uid":"CDS01",'
|
|
98
|
+
'"id.orig_h":"192.0.2.10","id.resp_h":"198.51.100.20",'
|
|
99
|
+
'"id.resp_p":53,"proto":"udp","query":"example.com","qtype":1}\n',
|
|
100
|
+
encoding="utf-8",
|
|
101
|
+
)
|
|
102
|
+
assert route_positional_source(zeek_dns, detector_module=_OptModule) == "zeek_dir"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_router_syslog_zeek_content_routes_to_zeek_dir(tmp_path: Path) -> None:
|
|
106
|
+
"""Zeek syslog.log (TSV with #path syslog) routes to zeek_dir."""
|
|
107
|
+
zeek_syslog = tmp_path / "syslog.log"
|
|
108
|
+
zeek_syslog.write_text(
|
|
109
|
+
"#separator \\x09\n"
|
|
110
|
+
"#set_separator\t,\n"
|
|
111
|
+
"#empty_field\t(empty)\n"
|
|
112
|
+
"#unset_field\t-\n"
|
|
113
|
+
"#path\tsyslog\n"
|
|
114
|
+
"#fields\tts\tuid\tid.orig_h\tmessage\n"
|
|
115
|
+
"#types\ttime\tstring\taddr\tstring\n"
|
|
116
|
+
"1779750000.000000\tCSL01\t192.0.2.10\thello\n",
|
|
117
|
+
encoding="utf-8",
|
|
118
|
+
)
|
|
119
|
+
assert route_positional_source(zeek_syslog, detector_module=_SyslogShape) == "zeek_dir"
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def test_router_syslog_flat_content_routes_to_syslog_dir(tmp_path: Path) -> None:
|
|
123
|
+
flat = tmp_path / "auth.log"
|
|
124
|
+
flat.write_text(
|
|
125
|
+
"<134>Jun 11 12:00:00 host1 sshd[1234]: Accepted publickey for user\n",
|
|
126
|
+
encoding="utf-8",
|
|
127
|
+
)
|
|
128
|
+
assert route_positional_source(flat, detector_module=_SyslogShape) == "syslog_dir"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def test_router_directory_falls_back_to_first_optional(tmp_path: Path) -> None:
|
|
132
|
+
"""A directory positional defaults to OPTIONAL_LOGS[0] without sniffing."""
|
|
133
|
+
log_dir = tmp_path / "logs"
|
|
134
|
+
log_dir.mkdir()
|
|
135
|
+
assert route_positional_source(log_dir, detector_module=_OptModule) == "zeek_dir"
|
|
136
|
+
assert route_positional_source(log_dir, detector_module=_SyslogShape) == "syslog_dir"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_router_missing_file_degrades_silently(tmp_path: Path) -> None:
|
|
140
|
+
"""A missing/unreadable positional must not raise; falls back to OPTIONAL_LOGS[0]."""
|
|
141
|
+
ghost = tmp_path / "does-not-exist.log"
|
|
142
|
+
assert route_positional_source(ghost, detector_module=_SyslogShape) == "syslog_dir"
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def test_router_unrecognized_content_falls_back_to_first_optional(tmp_path: Path) -> None:
|
|
146
|
+
"""A file the sniffer can't claim falls through to OPTIONAL_LOGS[0]."""
|
|
147
|
+
mystery = tmp_path / "mystery.log"
|
|
148
|
+
mystery.write_text("lorem ipsum dolor\nsit amet\n", encoding="utf-8")
|
|
149
|
+
assert route_positional_source(mystery, detector_module=_SyslogShape) == "syslog_dir"
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# ── resolve_sources — overrides None-contract + scope truth table ─────────────
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _cfg_all_four() -> dict[str, Any]:
|
|
156
|
+
return {"loghunter": {
|
|
157
|
+
"root": "/tmp/lh-root",
|
|
158
|
+
"zeek_dir": "zeek",
|
|
159
|
+
"syslog_dir": "syslog",
|
|
160
|
+
"pihole_dir": "pihole",
|
|
161
|
+
"cloudtrail_dir": "cloudtrail",
|
|
162
|
+
}}
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def test_resolve_sources_none_overrides_treated_as_absent(
|
|
166
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
167
|
+
) -> None:
|
|
168
|
+
"""Glenn req #1: a None override is identical to an absent key.
|
|
169
|
+
|
|
170
|
+
``runner.run(config=..., dry_run=True)`` passes all four kwargs with their
|
|
171
|
+
None defaults intact; the programmatic-fallback rail
|
|
172
|
+
(tests/test_root_provenance.py) depends on the resolver treating None as
|
|
173
|
+
'no override' and config-filling.
|
|
174
|
+
"""
|
|
175
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
176
|
+
resolved = resolve_sources(
|
|
177
|
+
_cfg_all_four(),
|
|
178
|
+
overrides={k: None for k in
|
|
179
|
+
("zeek_dir", "syslog_dir", "pihole_dir", "cloudtrail_dir")},
|
|
180
|
+
scope=None,
|
|
181
|
+
)
|
|
182
|
+
assert resolved == ResolvedSources(
|
|
183
|
+
zeek_dir=[Path("/tmp/lh-root/zeek")],
|
|
184
|
+
syslog_dir=[Path("/tmp/lh-root/syslog")],
|
|
185
|
+
pihole_dir=[Path("/tmp/lh-root/pihole")],
|
|
186
|
+
cloudtrail_dir=[Path("/tmp/lh-root/cloudtrail")],
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def test_resolve_sources_empty_overrides_dict_matches_none_overrides(
|
|
191
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
192
|
+
) -> None:
|
|
193
|
+
"""``{}`` and ``{k: None, ...}`` produce identical results — same contract."""
|
|
194
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
195
|
+
cfg = _cfg_all_four()
|
|
196
|
+
via_empty = resolve_sources(cfg, overrides={}, scope=None)
|
|
197
|
+
via_nones = resolve_sources(
|
|
198
|
+
cfg,
|
|
199
|
+
overrides={"zeek_dir": None, "syslog_dir": None,
|
|
200
|
+
"pihole_dir": None, "cloudtrail_dir": None},
|
|
201
|
+
scope=None,
|
|
202
|
+
)
|
|
203
|
+
assert via_empty == via_nones
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def test_resolve_sources_empty_string_override_treated_as_absent(
|
|
207
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
208
|
+
) -> None:
|
|
209
|
+
"""An empty-string override falls through to config fallback.
|
|
210
|
+
|
|
211
|
+
The CLI parser stores a bare ``--zeek-dir=`` as ``""`` (not None, not
|
|
212
|
+
rejected). Pre-refactor the seam used truthiness (``if cli_val:``) so
|
|
213
|
+
``""`` meant "no value, use config." A naive ``is not None`` check at
|
|
214
|
+
the resolver boundary would treat ``""`` as present, send it through
|
|
215
|
+
``resolve_path("", "")`` → None, and silently suppress the config
|
|
216
|
+
fallback. The ``_present`` helper restores the old semantics."""
|
|
217
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
218
|
+
resolved = resolve_sources(
|
|
219
|
+
{"loghunter": {"root": "/lh", "zeek_dir": "zeek"}},
|
|
220
|
+
overrides={"zeek_dir": ""},
|
|
221
|
+
scope=None,
|
|
222
|
+
)
|
|
223
|
+
assert resolved.zeek_dir == [Path("/lh/zeek")]
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def test_resolve_sources_scope_suppresses_unscoped_config_fill(
|
|
227
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
228
|
+
) -> None:
|
|
229
|
+
"""A scoped run does not config-fill sibling source-dirs."""
|
|
230
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
231
|
+
resolved = resolve_sources(
|
|
232
|
+
_cfg_all_four(),
|
|
233
|
+
overrides={},
|
|
234
|
+
scope=frozenset({"syslog_dir"}),
|
|
235
|
+
)
|
|
236
|
+
assert resolved.syslog_dir == [Path("/tmp/lh-root/syslog")]
|
|
237
|
+
assert resolved.zeek_dir == []
|
|
238
|
+
assert resolved.pihole_dir == []
|
|
239
|
+
assert resolved.cloudtrail_dir == []
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def test_resolve_sources_override_outside_scope_still_wins(
|
|
243
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
244
|
+
) -> None:
|
|
245
|
+
"""An explicit override outside ``scope`` still applies — operator widening.
|
|
246
|
+
|
|
247
|
+
This is the property that lets ``loghunter syslog PATH --zeek-dir=/x``
|
|
248
|
+
widen the run while the positional still scopes to syslog_dir.
|
|
249
|
+
"""
|
|
250
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
251
|
+
resolved = resolve_sources(
|
|
252
|
+
_cfg_all_four(),
|
|
253
|
+
overrides={"zeek_dir": "/explicit/zk"},
|
|
254
|
+
scope=frozenset({"syslog_dir"}),
|
|
255
|
+
)
|
|
256
|
+
assert resolved.zeek_dir == [Path("/explicit/zk")]
|
|
257
|
+
assert resolved.syslog_dir == [Path("/tmp/lh-root/syslog")]
|
|
258
|
+
assert resolved.pihole_dir == []
|
|
259
|
+
assert resolved.cloudtrail_dir == []
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def test_resolve_sources_override_wins_over_config(
|
|
263
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
264
|
+
) -> None:
|
|
265
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
266
|
+
resolved = resolve_sources(
|
|
267
|
+
_cfg_all_four(),
|
|
268
|
+
overrides={"zeek_dir": "/explicit/zk"},
|
|
269
|
+
scope=None,
|
|
270
|
+
)
|
|
271
|
+
assert resolved.zeek_dir == [Path("/explicit/zk")]
|
|
272
|
+
# Config still fills siblings because scope is None.
|
|
273
|
+
assert resolved.syslog_dir == [Path("/tmp/lh-root/syslog")]
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# ── resolve_sources — explicit override shell semantics (Glenn req) ──────────
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def test_resolve_sources_tilde_override_expands(
|
|
280
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
281
|
+
) -> None:
|
|
282
|
+
"""A ``~``-anchored override expands via expanduser — proves _resolve_one
|
|
283
|
+
sends overrides through resolve_path(value, ""), NOT resolve_path(value, root)."""
|
|
284
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
285
|
+
home = str(Path("~").expanduser())
|
|
286
|
+
resolved = resolve_sources(
|
|
287
|
+
{"loghunter": {"root": "/lh-root"}},
|
|
288
|
+
overrides={"zeek_dir": "~/zk"},
|
|
289
|
+
scope=None,
|
|
290
|
+
)
|
|
291
|
+
assert resolved.zeek_dir == [Path(home) / "zk"]
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def test_resolve_sources_relative_override_ignores_lh_root(
|
|
295
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
296
|
+
) -> None:
|
|
297
|
+
"""A relative override resolves CWD-relative and ignores LH_ROOT — the
|
|
298
|
+
CLI-vs-config provenance split that ``_resolve_one`` enforces by
|
|
299
|
+
passing ``root=""`` on the override branch."""
|
|
300
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
301
|
+
resolved = resolve_sources(
|
|
302
|
+
{"loghunter": {"root": "/lh-root"}},
|
|
303
|
+
overrides={"zeek_dir": "rel/zk"},
|
|
304
|
+
scope=None,
|
|
305
|
+
)
|
|
306
|
+
assert resolved.zeek_dir == [Path("rel/zk")]
|
|
307
|
+
# Negative: must NOT be /lh-root/rel/zk.
|
|
308
|
+
assert resolved.zeek_dir != [Path("/lh-root/rel/zk")]
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def test_resolve_sources_absolute_override_round_trips() -> None:
|
|
312
|
+
resolved = resolve_sources(
|
|
313
|
+
{"loghunter": {"root": "/lh-root"}},
|
|
314
|
+
overrides={"zeek_dir": "/abs/zk"},
|
|
315
|
+
scope=None,
|
|
316
|
+
)
|
|
317
|
+
assert resolved.zeek_dir == [Path("/abs/zk")]
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def test_resolve_sources_path_override_round_trips() -> None:
|
|
321
|
+
"""A ``Path`` override goes through ``str(override)`` and is treated the
|
|
322
|
+
same as the equivalent string."""
|
|
323
|
+
resolved = resolve_sources(
|
|
324
|
+
{"loghunter": {"root": "/lh-root"}},
|
|
325
|
+
overrides={"zeek_dir": Path("/abs/zk")},
|
|
326
|
+
scope=None,
|
|
327
|
+
)
|
|
328
|
+
assert resolved.zeek_dir == [Path("/abs/zk")]
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def test_resolve_sources_config_relative_uses_lh_root(
|
|
332
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
333
|
+
) -> None:
|
|
334
|
+
"""Config-side values still get LH_ROOT — the rail
|
|
335
|
+
``tests/test_root_provenance.py:160`` guards directly. Mirrored here so a
|
|
336
|
+
drift only requires reading this file."""
|
|
337
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
338
|
+
resolved = resolve_sources(
|
|
339
|
+
{"loghunter": {"root": "/lh-root", "zeek_dir": "zeek"}},
|
|
340
|
+
overrides={},
|
|
341
|
+
scope=None,
|
|
342
|
+
)
|
|
343
|
+
assert resolved.zeek_dir == [Path("/lh-root/zeek")]
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def test_resolve_sources_env_lh_root_wins_over_config(
|
|
347
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
348
|
+
) -> None:
|
|
349
|
+
"""LOGHUNTER_ROOT env var beats the config ``root`` key — the
|
|
350
|
+
``effective_root`` rail. Tested here too because ``_resolve_one`` reads
|
|
351
|
+
it via the helper."""
|
|
352
|
+
monkeypatch.setenv("LOGHUNTER_ROOT", "/env-root")
|
|
353
|
+
resolved = resolve_sources(
|
|
354
|
+
{"loghunter": {"root": "/cfg-root", "zeek_dir": "zeek"}},
|
|
355
|
+
overrides={},
|
|
356
|
+
scope=None,
|
|
357
|
+
)
|
|
358
|
+
assert resolved.zeek_dir == [Path("/env-root/zeek")]
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
# ── resolve_digest_source ─────────────────────────────────────────────────────
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def test_digest_conn_wrong_key_byte_preserved() -> None:
|
|
365
|
+
"""digest conn rejects every non-zeek_dir override with the exact text."""
|
|
366
|
+
for bad in ("pihole_dir", "syslog_dir", "cloudtrail_dir"):
|
|
367
|
+
with pytest.raises(ValueError) as exc:
|
|
368
|
+
resolve_digest_source(
|
|
369
|
+
{"loghunter": {}}, "conn",
|
|
370
|
+
overrides={bad: "/x"},
|
|
371
|
+
)
|
|
372
|
+
assert str(exc.value) == (
|
|
373
|
+
f"digest conn: {bad} is not valid for the conn schema"
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def test_digest_dns_wrong_key_byte_preserved() -> None:
|
|
378
|
+
for bad in ("syslog_dir", "cloudtrail_dir"):
|
|
379
|
+
with pytest.raises(ValueError) as exc:
|
|
380
|
+
resolve_digest_source(
|
|
381
|
+
{"loghunter": {}}, "dns",
|
|
382
|
+
overrides={bad: "/x"},
|
|
383
|
+
)
|
|
384
|
+
assert str(exc.value) == (
|
|
385
|
+
f"digest dns: {bad} is not valid for the dns schema"
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def test_digest_syslog_wrong_key_byte_preserved() -> None:
|
|
390
|
+
for bad in ("pihole_dir", "cloudtrail_dir"):
|
|
391
|
+
with pytest.raises(ValueError) as exc:
|
|
392
|
+
resolve_digest_source(
|
|
393
|
+
{"loghunter": {}}, "syslog",
|
|
394
|
+
overrides={bad: "/x"},
|
|
395
|
+
)
|
|
396
|
+
assert str(exc.value) == (
|
|
397
|
+
f"digest syslog: {bad} is not valid for the syslog schema"
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def test_digest_cloudtrail_wrong_key_byte_preserved() -> None:
|
|
402
|
+
for bad in ("zeek_dir", "pihole_dir", "syslog_dir"):
|
|
403
|
+
with pytest.raises(ValueError) as exc:
|
|
404
|
+
resolve_digest_source(
|
|
405
|
+
{"loghunter": {}}, "cloudtrail",
|
|
406
|
+
overrides={bad: "/x"},
|
|
407
|
+
)
|
|
408
|
+
assert str(exc.value) == (
|
|
409
|
+
f"digest cloudtrail: {bad} is not valid for the cloudtrail schema"
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def test_digest_dns_xor_byte_preserved() -> None:
|
|
414
|
+
with pytest.raises(ValueError) as exc:
|
|
415
|
+
resolve_digest_source(
|
|
416
|
+
{"loghunter": {}}, "dns",
|
|
417
|
+
overrides={"zeek_dir": "/z", "pihole_dir": "/p"},
|
|
418
|
+
)
|
|
419
|
+
assert str(exc.value) == (
|
|
420
|
+
"digest dns: cannot use both --zeek-dir and --pihole-dir"
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def test_digest_syslog_xor_byte_preserved() -> None:
|
|
425
|
+
with pytest.raises(ValueError) as exc:
|
|
426
|
+
resolve_digest_source(
|
|
427
|
+
{"loghunter": {}}, "syslog",
|
|
428
|
+
overrides={"zeek_dir": "/z", "syslog_dir": "/s"},
|
|
429
|
+
)
|
|
430
|
+
assert str(exc.value) == (
|
|
431
|
+
"digest syslog: cannot use both zeek_dir and syslog_dir"
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def test_digest_conn_not_configured_byte_preserved() -> None:
|
|
436
|
+
with pytest.raises(ValueError) as exc:
|
|
437
|
+
resolve_digest_source({"loghunter": {}}, "conn", overrides={})
|
|
438
|
+
assert str(exc.value) == (
|
|
439
|
+
"digest: zeek_dir not configured — pass a PATH or set "
|
|
440
|
+
"[loghunter].zeek_dir in your config"
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def test_digest_dns_not_configured_byte_preserved() -> None:
|
|
445
|
+
with pytest.raises(ValueError) as exc:
|
|
446
|
+
resolve_digest_source({"loghunter": {}}, "dns", overrides={})
|
|
447
|
+
assert str(exc.value) == (
|
|
448
|
+
"digest dns: zeek_dir or pihole_dir not configured — "
|
|
449
|
+
"pass a PATH, --zeek-dir/--pihole-dir, or set one in config"
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def test_digest_syslog_not_configured_byte_preserved() -> None:
|
|
454
|
+
with pytest.raises(ValueError) as exc:
|
|
455
|
+
resolve_digest_source({"loghunter": {}}, "syslog", overrides={})
|
|
456
|
+
assert str(exc.value) == (
|
|
457
|
+
"digest syslog: no syslog source configured — pass a PATH, "
|
|
458
|
+
"--zeek-dir, or set [loghunter].syslog_dir / "
|
|
459
|
+
"[loghunter].zeek_dir in your config"
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def test_digest_cloudtrail_not_configured_byte_preserved() -> None:
|
|
464
|
+
with pytest.raises(ValueError) as exc:
|
|
465
|
+
resolve_digest_source({"loghunter": {}}, "cloudtrail", overrides={})
|
|
466
|
+
assert str(exc.value) == (
|
|
467
|
+
"digest cloudtrail: cloudtrail_dir not configured — pass a PATH, "
|
|
468
|
+
"--cloudtrail-dir, or set [loghunter].cloudtrail_dir in your config"
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def test_digest_conn_override_wins() -> None:
|
|
473
|
+
ds = resolve_digest_source(
|
|
474
|
+
{"loghunter": {}}, "conn",
|
|
475
|
+
overrides={"zeek_dir": "/explicit/zk"},
|
|
476
|
+
)
|
|
477
|
+
assert ds == DigestSource(
|
|
478
|
+
source_key="zeek_dir",
|
|
479
|
+
directory=Path("/explicit/zk"),
|
|
480
|
+
feed=None,
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def test_digest_dns_zeek_preference_on_config_fallback(
|
|
485
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
486
|
+
) -> None:
|
|
487
|
+
"""With both zeek_dir and pihole_dir configured, the dns digest prefers
|
|
488
|
+
zeek_dir — the first entry in the candidate ladder."""
|
|
489
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
490
|
+
ds = resolve_digest_source(
|
|
491
|
+
{"loghunter": {"zeek_dir": "/cfg/zk", "pihole_dir": "/cfg/ph"}},
|
|
492
|
+
"dns",
|
|
493
|
+
overrides={},
|
|
494
|
+
)
|
|
495
|
+
assert ds == DigestSource(
|
|
496
|
+
source_key="zeek_dir", directory=Path("/cfg/zk"), feed="zeek",
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def test_digest_dns_pihole_when_only_pihole_configured(
|
|
501
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
502
|
+
) -> None:
|
|
503
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
504
|
+
ds = resolve_digest_source(
|
|
505
|
+
{"loghunter": {"pihole_dir": "/cfg/ph"}},
|
|
506
|
+
"dns",
|
|
507
|
+
overrides={},
|
|
508
|
+
)
|
|
509
|
+
assert ds == DigestSource(
|
|
510
|
+
source_key="pihole_dir", directory=Path("/cfg/ph"), feed="pihole",
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def test_digest_syslog_syslog_preference_on_config_fallback(
|
|
515
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
516
|
+
) -> None:
|
|
517
|
+
"""With both syslog_dir and zeek_dir configured, syslog digest prefers
|
|
518
|
+
syslog_dir."""
|
|
519
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
520
|
+
ds = resolve_digest_source(
|
|
521
|
+
{"loghunter": {"zeek_dir": "/cfg/zk", "syslog_dir": "/cfg/sl"}},
|
|
522
|
+
"syslog",
|
|
523
|
+
overrides={},
|
|
524
|
+
)
|
|
525
|
+
assert ds == DigestSource(
|
|
526
|
+
source_key="syslog_dir", directory=Path("/cfg/sl"), feed="syslog",
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def test_digest_syslog_zeek_when_only_zeek_configured(
|
|
531
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
532
|
+
) -> None:
|
|
533
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
534
|
+
ds = resolve_digest_source(
|
|
535
|
+
{"loghunter": {"zeek_dir": "/cfg/zk"}},
|
|
536
|
+
"syslog",
|
|
537
|
+
overrides={},
|
|
538
|
+
)
|
|
539
|
+
assert ds == DigestSource(
|
|
540
|
+
source_key="zeek_dir", directory=Path("/cfg/zk"), feed="zeek",
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def test_digest_cloudtrail_config_fallback(
|
|
545
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
546
|
+
) -> None:
|
|
547
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
548
|
+
ds = resolve_digest_source(
|
|
549
|
+
{"loghunter": {"cloudtrail_dir": "/cfg/ct"}},
|
|
550
|
+
"cloudtrail",
|
|
551
|
+
overrides={},
|
|
552
|
+
)
|
|
553
|
+
assert ds == DigestSource(
|
|
554
|
+
source_key="cloudtrail_dir", directory=Path("/cfg/ct"), feed=None,
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
def test_digest_none_overrides_treated_as_absent(
|
|
559
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
560
|
+
) -> None:
|
|
561
|
+
"""The None-contract applies to the digest resolver too — runner.run_digest
|
|
562
|
+
passes all four dir kwargs with None defaults."""
|
|
563
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
564
|
+
ds = resolve_digest_source(
|
|
565
|
+
{"loghunter": {"syslog_dir": "/cfg/sl"}},
|
|
566
|
+
"syslog",
|
|
567
|
+
overrides={"zeek_dir": None, "syslog_dir": None,
|
|
568
|
+
"pihole_dir": None, "cloudtrail_dir": None},
|
|
569
|
+
)
|
|
570
|
+
assert ds.source_key == "syslog_dir"
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def test_digest_empty_string_override_falls_through_to_config(
|
|
574
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
575
|
+
) -> None:
|
|
576
|
+
"""A bare ``--zeek-dir=`` in a digest invocation must NOT suppress config
|
|
577
|
+
fallback. Mirror of the analyze resolver's empty-string test — same
|
|
578
|
+
falsy-vs-None class, locked at both resolvers."""
|
|
579
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
580
|
+
ds = resolve_digest_source(
|
|
581
|
+
{"loghunter": {"zeek_dir": "/cfg/zk"}},
|
|
582
|
+
"conn",
|
|
583
|
+
overrides={"zeek_dir": ""},
|
|
584
|
+
)
|
|
585
|
+
assert ds.source_key == "zeek_dir"
|
|
586
|
+
assert ds.directory == Path("/cfg/zk")
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
def test_digest_empty_string_override_does_not_trigger_wrong_key(
|
|
590
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
591
|
+
) -> None:
|
|
592
|
+
"""A bare ``--syslog-dir=`` for a conn-schema digest must NOT raise the
|
|
593
|
+
wrong-key error — empty-string is "no override," not "present with the
|
|
594
|
+
wrong key." Defends the wrong-key guard against the same falsy class."""
|
|
595
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
596
|
+
ds = resolve_digest_source(
|
|
597
|
+
{"loghunter": {"zeek_dir": "/cfg/zk"}},
|
|
598
|
+
"conn",
|
|
599
|
+
overrides={"zeek_dir": "", "syslog_dir": ""},
|
|
600
|
+
)
|
|
601
|
+
assert ds.source_key == "zeek_dir"
|
|
602
|
+
assert ds.directory == Path("/cfg/zk")
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
def test_digest_override_root_provenance_uses_shell_semantics(
|
|
606
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
607
|
+
) -> None:
|
|
608
|
+
"""An override into resolve_digest_source resolves through shell semantics
|
|
609
|
+
(no LH_ROOT prefix). Mirror of test_resolve_sources_relative_override_ignores_lh_root."""
|
|
610
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
611
|
+
ds = resolve_digest_source(
|
|
612
|
+
{"loghunter": {"root": "/lh-root", "zeek_dir": "should-not-be-used"}},
|
|
613
|
+
"conn",
|
|
614
|
+
overrides={"zeek_dir": "/abs/zk"},
|
|
615
|
+
)
|
|
616
|
+
assert ds.directory == Path("/abs/zk")
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def test_digest_config_relative_uses_lh_root(
|
|
620
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
621
|
+
) -> None:
|
|
622
|
+
"""Mirror of the analyze resolver: config-side relative values get LH_ROOT."""
|
|
623
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
624
|
+
ds = resolve_digest_source(
|
|
625
|
+
{"loghunter": {"root": "/lh-root", "zeek_dir": "zeek"}},
|
|
626
|
+
"conn",
|
|
627
|
+
overrides={},
|
|
628
|
+
)
|
|
629
|
+
assert ds.directory == Path("/lh-root/zeek")
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
# ── three-way drift tripwire for the digest (schema, source_key) keyspace ────
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def test_digest_schema_source_keyspaces_agree() -> None:
|
|
636
|
+
"""Three structures encode the legal (schema, source_key) space and must
|
|
637
|
+
agree. Without this tripwire, adding a new combo to two of the three
|
|
638
|
+
surfaces yields a production KeyError at the
|
|
639
|
+
``_DIGEST_PATTERN_AND_EMPTY[(schema, source_key)]`` lookup in
|
|
640
|
+
``run_digest`` for that schema only. Same drift shape we already guard
|
|
641
|
+
for the config example.
|
|
642
|
+
"""
|
|
643
|
+
from loghunter.common.sources import _DIGEST_CANDIDATES, _DIGEST_FEED
|
|
644
|
+
from loghunter.runner import _DIGEST_PATTERN_AND_EMPTY
|
|
645
|
+
|
|
646
|
+
legal = {(s, k) for s, ks in _DIGEST_CANDIDATES.items() for k in ks}
|
|
647
|
+
assert set(_DIGEST_FEED) == legal
|
|
648
|
+
assert set(_DIGEST_PATTERN_AND_EMPTY) == legal
|