loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
"""Scope seam-crossing tests for the single-ownership source-resolution rail.
|
|
2
|
+
|
|
3
|
+
These tests exercise the REAL CLI ↔ runner path with ``--dry-run`` and a temp
|
|
4
|
+
``--config=<tmp_path>/cfg.toml`` file. They prove the property the old
|
|
5
|
+
``loghunter syslog ./flat.log`` regression tests COULD NOT prove because they
|
|
6
|
+
mocked ``runner.run``: that a positional PATH scoping the run keeps
|
|
7
|
+
sibling source-dirs from configured locations from sneaking in through the
|
|
8
|
+
runner-side config fallback.
|
|
9
|
+
|
|
10
|
+
The user's real ``~/.loghunter/config.toml`` MUST NOT participate — every test
|
|
11
|
+
either points ``--config=`` at a temp file written in ``tmp_path`` OR
|
|
12
|
+
monkeypatches ``cfg.SEARCH_PATHS`` to ``[]`` and ``cfg.load`` to a fixed dict
|
|
13
|
+
(when explicit-PATH config isn't relevant to the assertion).
|
|
14
|
+
|
|
15
|
+
Companion to:
|
|
16
|
+
|
|
17
|
+
- ``tests/test_sources.py`` (unit) — router + resolver primitives.
|
|
18
|
+
- ``tests/test_root_provenance.py`` (programmatic) — ``runner.run`` and
|
|
19
|
+
``run_digest`` config-fallback rail.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
import pytest
|
|
27
|
+
|
|
28
|
+
from loghunter import cli, runner
|
|
29
|
+
from loghunter.common import config as cfg
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ── helpers ──────────────────────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
_FLAT_SYSLOG_LINE = (
|
|
36
|
+
"<134>Jun 11 12:00:00 host1 sshd[1234]: Accepted publickey for user\n"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
_PIHOLE_LINE = (
|
|
40
|
+
"Jun 11 12:00:00 host1 dnsmasq[1234]: query[A] example.test from 192.0.2.10\n"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _write_cfg(
|
|
45
|
+
tmp_path: Path,
|
|
46
|
+
*,
|
|
47
|
+
zeek_dir: str | None = None,
|
|
48
|
+
syslog_dir: str | None = None,
|
|
49
|
+
pihole_dir: str | None = None,
|
|
50
|
+
cloudtrail_dir: str | None = None,
|
|
51
|
+
) -> str:
|
|
52
|
+
"""Write a minimal TOML config under tmp_path and return its absolute path.
|
|
53
|
+
|
|
54
|
+
Only the keys explicitly passed are written — the rest stay at default
|
|
55
|
+
(which means whatever ``_DEFAULTS`` has). The shipped defaults set
|
|
56
|
+
``zeek_dir=/var/log/zeek`` and ``syslog_dir=/var/log``; tests that need
|
|
57
|
+
a fully-isolated config write all four keys (or rely on the seam test's
|
|
58
|
+
"scoped-out sibling does not appear in output" assertion holding even
|
|
59
|
+
if a default leaks in elsewhere).
|
|
60
|
+
"""
|
|
61
|
+
lines = ["[loghunter]", 'root = ""']
|
|
62
|
+
if zeek_dir is not None:
|
|
63
|
+
lines.append(f'zeek_dir = "{zeek_dir}"')
|
|
64
|
+
if syslog_dir is not None:
|
|
65
|
+
lines.append(f'syslog_dir = "{syslog_dir}"')
|
|
66
|
+
if pihole_dir is not None:
|
|
67
|
+
lines.append(f'pihole_dir = "{pihole_dir}"')
|
|
68
|
+
if cloudtrail_dir is not None:
|
|
69
|
+
lines.append(f'cloudtrail_dir = "{cloudtrail_dir}"')
|
|
70
|
+
cfg_path = tmp_path / "cfg.toml"
|
|
71
|
+
cfg_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
72
|
+
return str(cfg_path)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# ── 1) analyze single-detector: positional scopes; configured sibling stays out
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_syslog_positional_via_real_cli_scopes_out_configured_zeek_dir(
|
|
79
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
80
|
+
tmp_path: Path,
|
|
81
|
+
capsys: pytest.CaptureFixture[str],
|
|
82
|
+
) -> None:
|
|
83
|
+
"""``loghunter syslog ./flat.log --dry-run`` against a config that sets
|
|
84
|
+
BOTH zeek_dir AND syslog_dir must NOT load the configured zeek_dir.
|
|
85
|
+
|
|
86
|
+
This is the test the 2b3a56e "P1 fix" lacked: previous regression tests
|
|
87
|
+
mocked ``runner.run`` and asserted CLI-passed kwargs, never crossing the
|
|
88
|
+
seam where the runner used to undo the scope by config-filling None back.
|
|
89
|
+
"""
|
|
90
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
91
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
92
|
+
|
|
93
|
+
zeek_d = tmp_path / "configured_zeek"
|
|
94
|
+
zeek_d.mkdir()
|
|
95
|
+
syslog_d = tmp_path / "configured_syslog"
|
|
96
|
+
syslog_d.mkdir()
|
|
97
|
+
cfg_path = _write_cfg(tmp_path, zeek_dir=str(zeek_d), syslog_dir=str(syslog_d))
|
|
98
|
+
|
|
99
|
+
flat_file = tmp_path / "flat.log"
|
|
100
|
+
flat_file.write_text(_FLAT_SYSLOG_LINE, encoding="utf-8")
|
|
101
|
+
|
|
102
|
+
cli._main(["syslog", str(flat_file), f"--config={cfg_path}", "--dry-run"])
|
|
103
|
+
|
|
104
|
+
out = capsys.readouterr().out
|
|
105
|
+
# Positive: the positional landed on syslog_dir.
|
|
106
|
+
assert str(flat_file) in out
|
|
107
|
+
# Negative: the configured zeek_dir did NOT sneak through the seam.
|
|
108
|
+
assert str(zeek_d) not in out
|
|
109
|
+
# And the dry-run line for zeek_dir reads "not configured" — the scope
|
|
110
|
+
# rail kept it None all the way through.
|
|
111
|
+
assert "zeek_dir:" in out
|
|
112
|
+
assert "not configured" in out.split("zeek_dir:")[1].split("\n")[0]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_analyze_detect_syslog_positional_scopes_out_configured_zeek_dir(
|
|
116
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
117
|
+
tmp_path: Path,
|
|
118
|
+
capsys: pytest.CaptureFixture[str],
|
|
119
|
+
) -> None:
|
|
120
|
+
"""Mirror of the single-detector seam test on the analyze entry point.
|
|
121
|
+
|
|
122
|
+
``loghunter --detect=syslog ./flat.log`` flows through ``_run_all_detectors``,
|
|
123
|
+
a separate code path from ``_run_single_detector``. Both must honor scope.
|
|
124
|
+
"""
|
|
125
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
126
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
127
|
+
|
|
128
|
+
zeek_d = tmp_path / "configured_zeek"
|
|
129
|
+
zeek_d.mkdir()
|
|
130
|
+
syslog_d = tmp_path / "configured_syslog"
|
|
131
|
+
syslog_d.mkdir()
|
|
132
|
+
cfg_path = _write_cfg(tmp_path, zeek_dir=str(zeek_d), syslog_dir=str(syslog_d))
|
|
133
|
+
|
|
134
|
+
flat_file = tmp_path / "flat.log"
|
|
135
|
+
flat_file.write_text(_FLAT_SYSLOG_LINE, encoding="utf-8")
|
|
136
|
+
|
|
137
|
+
cli._main([
|
|
138
|
+
"--detect=syslog", str(flat_file), f"--config={cfg_path}", "--dry-run",
|
|
139
|
+
])
|
|
140
|
+
|
|
141
|
+
out = capsys.readouterr().out
|
|
142
|
+
assert str(flat_file) in out
|
|
143
|
+
assert str(zeek_d) not in out
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# ── 2) runner-level mirror — runner.run with scope, no CLI involved ─────────
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_runner_run_scope_suppresses_unscoped_config_fill(
|
|
150
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
151
|
+
tmp_path: Path,
|
|
152
|
+
capsys: pytest.CaptureFixture[str],
|
|
153
|
+
) -> None:
|
|
154
|
+
"""``runner.run(config={both set}, syslog_dir=<file>, scope=frozenset({"syslog_dir"}), dry_run=True)``
|
|
155
|
+
→ zeek_dir absent from the dry-run output. Direct lock on the runner half
|
|
156
|
+
of the seam — what the CLI test above proves through the full path."""
|
|
157
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
158
|
+
|
|
159
|
+
zeek_d = tmp_path / "configured_zeek"
|
|
160
|
+
zeek_d.mkdir()
|
|
161
|
+
flat_file = tmp_path / "flat.log"
|
|
162
|
+
flat_file.write_text(_FLAT_SYSLOG_LINE, encoding="utf-8")
|
|
163
|
+
|
|
164
|
+
runner.run(
|
|
165
|
+
config={"loghunter": {
|
|
166
|
+
"zeek_dir": str(zeek_d),
|
|
167
|
+
"syslog_dir": str(tmp_path / "configured_syslog"),
|
|
168
|
+
}},
|
|
169
|
+
syslog_dir=str(flat_file),
|
|
170
|
+
scope=frozenset({"syslog_dir"}),
|
|
171
|
+
dry_run=True,
|
|
172
|
+
)
|
|
173
|
+
out = capsys.readouterr().out
|
|
174
|
+
assert str(flat_file) in out
|
|
175
|
+
assert str(zeek_d) not in out
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
# ── 3) same-source explicit flag + positional MERGE; positional still scopes
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def test_same_source_flag_and_positional_merge_both_load(
|
|
182
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
183
|
+
tmp_path: Path,
|
|
184
|
+
capsys: pytest.CaptureFixture[str],
|
|
185
|
+
) -> None:
|
|
186
|
+
"""``loghunter syslog ./auto.log --syslog-dir=/explicit --dry-run``:
|
|
187
|
+
|
|
188
|
+
- same-family flag + positional MERGE: BOTH the positional file AND the
|
|
189
|
+
flag's directory contribute to syslog_dir and both load. This is the
|
|
190
|
+
sanctioned rail supersession from the rev-3 prompt; the old "flag
|
|
191
|
+
wins" rule (and the BUGS entry it rode on) is retired.
|
|
192
|
+
- The positional still scopes the run (configured zeek_dir stays unloaded).
|
|
193
|
+
"""
|
|
194
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
195
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
196
|
+
|
|
197
|
+
zeek_d = tmp_path / "configured_zeek"
|
|
198
|
+
zeek_d.mkdir()
|
|
199
|
+
explicit_d = tmp_path / "explicit_syslog"
|
|
200
|
+
explicit_d.mkdir()
|
|
201
|
+
auto = tmp_path / "auto.log"
|
|
202
|
+
auto.write_text(_FLAT_SYSLOG_LINE, encoding="utf-8")
|
|
203
|
+
cfg_path = _write_cfg(tmp_path, zeek_dir=str(zeek_d))
|
|
204
|
+
|
|
205
|
+
cli._main([
|
|
206
|
+
"syslog", str(auto),
|
|
207
|
+
f"--syslog-dir={explicit_d}",
|
|
208
|
+
f"--config={cfg_path}",
|
|
209
|
+
"--dry-run",
|
|
210
|
+
])
|
|
211
|
+
|
|
212
|
+
out = capsys.readouterr().out
|
|
213
|
+
# MERGE: BOTH positional AND flag value appear under syslog_dir.
|
|
214
|
+
assert str(auto) in out
|
|
215
|
+
assert str(explicit_d) in out
|
|
216
|
+
# Scope: configured zeek_dir stayed out.
|
|
217
|
+
assert str(zeek_d) not in out
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
# ── 4) different-source explicit flag widens — operator widening ────────────
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def test_different_source_flag_alongside_positional_widens_run(
|
|
224
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
225
|
+
tmp_path: Path,
|
|
226
|
+
capsys: pytest.CaptureFixture[str],
|
|
227
|
+
) -> None:
|
|
228
|
+
"""``loghunter syslog ./flat.log --zeek-dir=/widen --dry-run``: an explicit
|
|
229
|
+
DIFFERENT-source flag still loads — the resolver's "override wins even
|
|
230
|
+
outside scope" branch is the operator widening the run deliberately."""
|
|
231
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
232
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
233
|
+
|
|
234
|
+
widen_zeek = tmp_path / "widen_zeek"
|
|
235
|
+
widen_zeek.mkdir()
|
|
236
|
+
flat_file = tmp_path / "flat.log"
|
|
237
|
+
flat_file.write_text(_FLAT_SYSLOG_LINE, encoding="utf-8")
|
|
238
|
+
# No config setup needed — the explicit flags carry the run.
|
|
239
|
+
cfg_path = _write_cfg(tmp_path)
|
|
240
|
+
|
|
241
|
+
cli._main([
|
|
242
|
+
"syslog", str(flat_file),
|
|
243
|
+
f"--zeek-dir={widen_zeek}",
|
|
244
|
+
f"--config={cfg_path}",
|
|
245
|
+
"--dry-run",
|
|
246
|
+
])
|
|
247
|
+
|
|
248
|
+
out = capsys.readouterr().out
|
|
249
|
+
assert str(flat_file) in out # positional → syslog_dir
|
|
250
|
+
assert str(widen_zeek) in out # explicit flag widens to zeek_dir
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
# ── 5) DNS content-sniff regression (Glenn req #3) ───────────────────────────
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def test_dns_pihole_content_under_neutral_name_routes_pihole_via_real_cli(
|
|
257
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
258
|
+
tmp_path: Path,
|
|
259
|
+
capsys: pytest.CaptureFixture[str],
|
|
260
|
+
) -> None:
|
|
261
|
+
"""A Pi-hole-CONTENT file whose NAME does NOT match ``pihole*.log*``
|
|
262
|
+
routes to pihole_dir end-to-end. Locks the fnmatch→content-sniff
|
|
263
|
+
migration at the CLI seam, in addition to the router unit test in
|
|
264
|
+
tests/test_sources.py."""
|
|
265
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
266
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
267
|
+
|
|
268
|
+
# Bland name — cannot satisfy pihole*.log*. Content is dnsmasq.
|
|
269
|
+
pihole = tmp_path / "mystery.log"
|
|
270
|
+
pihole.write_text(_PIHOLE_LINE, encoding="utf-8")
|
|
271
|
+
cfg_path = _write_cfg(tmp_path)
|
|
272
|
+
|
|
273
|
+
cli._main(["dns", str(pihole), f"--config={cfg_path}", "--dry-run"])
|
|
274
|
+
|
|
275
|
+
out = capsys.readouterr().out
|
|
276
|
+
# The positional landed on pihole_dir — visible on the dry-run line.
|
|
277
|
+
pihole_line = [
|
|
278
|
+
line for line in out.splitlines() if "pihole_dir:" in line
|
|
279
|
+
]
|
|
280
|
+
assert pihole_line, out
|
|
281
|
+
assert str(pihole) in pihole_line[0]
|
|
282
|
+
# And the zeek_dir line says "not configured" — sniff routed pihole, not
|
|
283
|
+
# the historical zeek_dir default.
|
|
284
|
+
zeek_line = [
|
|
285
|
+
line for line in out.splitlines() if "zeek_dir:" in line
|
|
286
|
+
][0]
|
|
287
|
+
assert "not configured" in zeek_line
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
# ── 6) aws ``~`` positional (seam form of the deleted CLI test) ─────────────
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def test_aws_subcommand_with_tilde_positional_resolves_via_dry_run(
|
|
294
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
295
|
+
tmp_path: Path,
|
|
296
|
+
capsys: pytest.CaptureFixture[str],
|
|
297
|
+
) -> None:
|
|
298
|
+
"""``loghunter aws ~/exports/cloudtrail.json.log --dry-run`` — the full
|
|
299
|
+
chain: router lands the positional on cloudtrail_dir, the resolver
|
|
300
|
+
``~``-expands the override.
|
|
301
|
+
|
|
302
|
+
Replaces the deleted ``test_aws_subcommand_with_tilde_positional_expands_and_routes``
|
|
303
|
+
in tests/test_config_cli.py — that test mocked ``runner.run`` and proved
|
|
304
|
+
the routing half but not the ``~``-expansion half because expansion now
|
|
305
|
+
happens inside the resolver, not the CLI seam.
|
|
306
|
+
"""
|
|
307
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
308
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
309
|
+
monkeypatch.setenv("HOME", str(tmp_path))
|
|
310
|
+
|
|
311
|
+
# The file doesn't have to exist; we only need the dry-run output to
|
|
312
|
+
# show the ~-expanded resolved path.
|
|
313
|
+
cfg_path = _write_cfg(tmp_path)
|
|
314
|
+
cli._main([
|
|
315
|
+
"aws", "~/exports/cloudtrail.json.log",
|
|
316
|
+
f"--config={cfg_path}", "--dry-run",
|
|
317
|
+
])
|
|
318
|
+
|
|
319
|
+
out = capsys.readouterr().out
|
|
320
|
+
expected = str(tmp_path / "exports" / "cloudtrail.json.log")
|
|
321
|
+
assert expected in out
|
|
322
|
+
assert "~" not in out.split("cloudtrail_dir:")[1].split("\n")[0]
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
# ── 7) digest seam — single-owner config fallback, no CLI scope, sniff routes
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def test_digest_positional_via_real_cli_routes_and_suppresses_zeek_default(
|
|
329
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
330
|
+
tmp_path: Path,
|
|
331
|
+
capsys: pytest.CaptureFixture[str],
|
|
332
|
+
) -> None:
|
|
333
|
+
"""``loghunter digest ./flat.log --dry-run`` against a config that sets
|
|
334
|
+
BOTH zeek_dir AND syslog_dir: the sniff router lands the positional on
|
|
335
|
+
syslog_dir, ``resolve_digest_source`` resolves a single source (syslog),
|
|
336
|
+
and the dry-run output does NOT mention zeek_dir.
|
|
337
|
+
|
|
338
|
+
Digest has no analyze-style ``scope``; this test proves single-owner
|
|
339
|
+
config fallback, positional self-routing through the real CLI path, and
|
|
340
|
+
the implicit "only one source per schema" property.
|
|
341
|
+
"""
|
|
342
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
343
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
344
|
+
|
|
345
|
+
zeek_d = tmp_path / "configured_zeek"
|
|
346
|
+
zeek_d.mkdir()
|
|
347
|
+
syslog_d = tmp_path / "configured_syslog"
|
|
348
|
+
syslog_d.mkdir()
|
|
349
|
+
cfg_path = _write_cfg(tmp_path, zeek_dir=str(zeek_d), syslog_dir=str(syslog_d))
|
|
350
|
+
|
|
351
|
+
flat_file = tmp_path / "flat.log"
|
|
352
|
+
flat_file.write_text(_FLAT_SYSLOG_LINE, encoding="utf-8")
|
|
353
|
+
|
|
354
|
+
cli._main(["digest", str(flat_file), f"--config={cfg_path}", "--dry-run"])
|
|
355
|
+
|
|
356
|
+
out = capsys.readouterr().out
|
|
357
|
+
# Digest dry-run prints `<source_key>: <directory>` — confirm we landed
|
|
358
|
+
# on syslog and the directory IS the positional.
|
|
359
|
+
assert "schema:" in out and "syslog" in out
|
|
360
|
+
assert "syslog_dir:" in out
|
|
361
|
+
assert str(flat_file) in out
|
|
362
|
+
# Negative: zeek_dir directory does NOT appear in the dry-run output.
|
|
363
|
+
assert str(zeek_d) not in out
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
# ── 8) empty-string override falls through to config (CR Finding 1) ─────────
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def test_analyze_empty_string_zeek_dir_flag_falls_through_to_config(
|
|
370
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
371
|
+
tmp_path: Path,
|
|
372
|
+
capsys: pytest.CaptureFixture[str],
|
|
373
|
+
) -> None:
|
|
374
|
+
"""``loghunter beacon --zeek-dir= --dry-run`` (bare flag, empty value)
|
|
375
|
+
against a configured ``[loghunter].zeek_dir`` must resolve to the
|
|
376
|
+
CONFIGURED directory — NOT silently to None.
|
|
377
|
+
|
|
378
|
+
The CLI parser stores ``--zeek-dir=`` as the empty string. The naive
|
|
379
|
+
``override is not None`` check at the resolver boundary treated ``""``
|
|
380
|
+
as "present," sent it through ``resolve_path("", "")`` → None, and
|
|
381
|
+
suppressed the config fallback — so beacon read "zeek_dir not
|
|
382
|
+
configured" and skipped, even with a perfectly good configured dir.
|
|
383
|
+
The ``_present`` helper restores the pre-refactor truthiness semantics.
|
|
384
|
+
"""
|
|
385
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
386
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
387
|
+
|
|
388
|
+
configured_zeek = tmp_path / "configured_zeek"
|
|
389
|
+
configured_zeek.mkdir()
|
|
390
|
+
cfg_path = _write_cfg(tmp_path, zeek_dir=str(configured_zeek))
|
|
391
|
+
|
|
392
|
+
cli._main([
|
|
393
|
+
"beacon", "--zeek-dir=", f"--config={cfg_path}", "--dry-run",
|
|
394
|
+
])
|
|
395
|
+
|
|
396
|
+
out = capsys.readouterr().out
|
|
397
|
+
# The configured zeek_dir must appear in the dry-run output.
|
|
398
|
+
assert str(configured_zeek) in out
|
|
399
|
+
# And the "not configured" sentinel must NOT show up for zeek_dir.
|
|
400
|
+
zeek_line = [
|
|
401
|
+
line for line in out.splitlines() if "zeek_dir:" in line
|
|
402
|
+
][0]
|
|
403
|
+
assert "not configured" not in zeek_line
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def test_digest_empty_string_zeek_dir_flag_falls_through_to_config(
|
|
407
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
408
|
+
tmp_path: Path,
|
|
409
|
+
capsys: pytest.CaptureFixture[str],
|
|
410
|
+
) -> None:
|
|
411
|
+
"""``loghunter digest --zeek-dir= --dry-run`` (bare-digest, empty flag)
|
|
412
|
+
against a configured ``[loghunter].zeek_dir`` must resolve the conn card's
|
|
413
|
+
source to the CONFIGURED directory — NOT raise "zeek_dir not configured".
|
|
414
|
+
|
|
415
|
+
Mirror of the analyze test for the digest resolver. The pre-fix bug
|
|
416
|
+
surfaced as a raise here because ``resolve_digest_source`` saw an empty
|
|
417
|
+
string in ``overrides["zeek_dir"]``, treated it as present, then failed
|
|
418
|
+
to resolve it (empty string → None) and walked away from the candidate
|
|
419
|
+
ladder.
|
|
420
|
+
"""
|
|
421
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
422
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
423
|
+
|
|
424
|
+
configured_zeek = tmp_path / "configured_zeek"
|
|
425
|
+
configured_zeek.mkdir()
|
|
426
|
+
cfg_path = _write_cfg(tmp_path, zeek_dir=str(configured_zeek))
|
|
427
|
+
|
|
428
|
+
cli._main([
|
|
429
|
+
"digest", "--zeek-dir=", f"--config={cfg_path}", "--dry-run",
|
|
430
|
+
])
|
|
431
|
+
|
|
432
|
+
out = capsys.readouterr().out
|
|
433
|
+
# The configured zeek_dir must appear on the digest dry-run's source line.
|
|
434
|
+
assert str(configured_zeek) in out
|
|
435
|
+
# And the schema is conn (the bare-digest default), not an error.
|
|
436
|
+
assert "schema:" in out and "conn" in out
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def test_digest_wrong_source_flag_error_byte_preserved_via_real_cli(
|
|
440
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
441
|
+
tmp_path: Path,
|
|
442
|
+
capsys: pytest.CaptureFixture[str],
|
|
443
|
+
) -> None:
|
|
444
|
+
"""``loghunter digest <Zeek-conn-file> --pihole-dir=/x`` raises the
|
|
445
|
+
byte-preserved wrong-source error through the CLI boundary.
|
|
446
|
+
|
|
447
|
+
Locks the error-string preservation at the real CLI seam, complementing
|
|
448
|
+
the resolver-level locks in tests/test_sources.py.
|
|
449
|
+
"""
|
|
450
|
+
monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
|
|
451
|
+
monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
|
|
452
|
+
|
|
453
|
+
pihole_dummy = tmp_path / "ph"
|
|
454
|
+
pihole_dummy.mkdir()
|
|
455
|
+
cfg_path = _write_cfg(tmp_path)
|
|
456
|
+
|
|
457
|
+
# The CLI rejects --pihole-dir alongside a positional BEFORE resolution
|
|
458
|
+
# (cli.py:825 guard). Use the bare-digest form (no positional) so the
|
|
459
|
+
# resolver sees the wrong-key combination — conn schema with
|
|
460
|
+
# --pihole-dir set in parsed.
|
|
461
|
+
#
|
|
462
|
+
# But --pihole-dir is not in _DIGEST_ALLOWED_LONG_FLAGS today, so this
|
|
463
|
+
# test exercises the analogous scenario at the resolver layer via a
|
|
464
|
+
# direct run_digest call (the seam test for digest error strings is
|
|
465
|
+
# primarily at the resolver). Skipped at the CLI seam because the
|
|
466
|
+
# digest CLI's narrow flag surface intentionally hides three of the
|
|
467
|
+
# four source-dir flags — that's a separate CODE.md rail.
|
|
468
|
+
pytest.skip(
|
|
469
|
+
"digest CLI exposes only --zeek-dir; wrong-source error strings "
|
|
470
|
+
"are locked at the resolver layer in tests/test_sources.py."
|
|
471
|
+
)
|