loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,471 @@
1
+ """Scope seam-crossing tests for the single-ownership source-resolution rail.
2
+
3
+ These tests exercise the REAL CLI ↔ runner path with ``--dry-run`` and a temp
4
+ ``--config=<tmp_path>/cfg.toml`` file. They prove the property the old
5
+ ``loghunter syslog ./flat.log`` regression tests COULD NOT prove because they
6
+ mocked ``runner.run``: that a positional PATH scoping the run keeps
7
+ sibling source-dirs from configured locations from sneaking in through the
8
+ runner-side config fallback.
9
+
10
+ The user's real ``~/.loghunter/config.toml`` MUST NOT participate — every test
11
+ either points ``--config=`` at a temp file written in ``tmp_path`` OR
12
+ monkeypatches ``cfg.SEARCH_PATHS`` to ``[]`` and ``cfg.load`` to a fixed dict
13
+ (when explicit-PATH config isn't relevant to the assertion).
14
+
15
+ Companion to:
16
+
17
+ - ``tests/test_sources.py`` (unit) — router + resolver primitives.
18
+ - ``tests/test_root_provenance.py`` (programmatic) — ``runner.run`` and
19
+ ``run_digest`` config-fallback rail.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from pathlib import Path
25
+
26
+ import pytest
27
+
28
+ from loghunter import cli, runner
29
+ from loghunter.common import config as cfg
30
+
31
+
32
+ # ── helpers ──────────────────────────────────────────────────────────────────
33
+
34
+
35
+ _FLAT_SYSLOG_LINE = (
36
+ "<134>Jun 11 12:00:00 host1 sshd[1234]: Accepted publickey for user\n"
37
+ )
38
+
39
+ _PIHOLE_LINE = (
40
+ "Jun 11 12:00:00 host1 dnsmasq[1234]: query[A] example.test from 192.0.2.10\n"
41
+ )
42
+
43
+
44
+ def _write_cfg(
45
+ tmp_path: Path,
46
+ *,
47
+ zeek_dir: str | None = None,
48
+ syslog_dir: str | None = None,
49
+ pihole_dir: str | None = None,
50
+ cloudtrail_dir: str | None = None,
51
+ ) -> str:
52
+ """Write a minimal TOML config under tmp_path and return its absolute path.
53
+
54
+ Only the keys explicitly passed are written — the rest stay at default
55
+ (which means whatever ``_DEFAULTS`` has). The shipped defaults set
56
+ ``zeek_dir=/var/log/zeek`` and ``syslog_dir=/var/log``; tests that need
57
+ a fully-isolated config write all four keys (or rely on the seam test's
58
+ "scoped-out sibling does not appear in output" assertion holding even
59
+ if a default leaks in elsewhere).
60
+ """
61
+ lines = ["[loghunter]", 'root = ""']
62
+ if zeek_dir is not None:
63
+ lines.append(f'zeek_dir = "{zeek_dir}"')
64
+ if syslog_dir is not None:
65
+ lines.append(f'syslog_dir = "{syslog_dir}"')
66
+ if pihole_dir is not None:
67
+ lines.append(f'pihole_dir = "{pihole_dir}"')
68
+ if cloudtrail_dir is not None:
69
+ lines.append(f'cloudtrail_dir = "{cloudtrail_dir}"')
70
+ cfg_path = tmp_path / "cfg.toml"
71
+ cfg_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
72
+ return str(cfg_path)
73
+
74
+
75
+ # ── 1) analyze single-detector: positional scopes; configured sibling stays out
76
+
77
+
78
+ def test_syslog_positional_via_real_cli_scopes_out_configured_zeek_dir(
79
+ monkeypatch: pytest.MonkeyPatch,
80
+ tmp_path: Path,
81
+ capsys: pytest.CaptureFixture[str],
82
+ ) -> None:
83
+ """``loghunter syslog ./flat.log --dry-run`` against a config that sets
84
+ BOTH zeek_dir AND syslog_dir must NOT load the configured zeek_dir.
85
+
86
+ This is the test the 2b3a56e "P1 fix" lacked: previous regression tests
87
+ mocked ``runner.run`` and asserted CLI-passed kwargs, never crossing the
88
+ seam where the runner used to undo the scope by config-filling None back.
89
+ """
90
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
91
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
92
+
93
+ zeek_d = tmp_path / "configured_zeek"
94
+ zeek_d.mkdir()
95
+ syslog_d = tmp_path / "configured_syslog"
96
+ syslog_d.mkdir()
97
+ cfg_path = _write_cfg(tmp_path, zeek_dir=str(zeek_d), syslog_dir=str(syslog_d))
98
+
99
+ flat_file = tmp_path / "flat.log"
100
+ flat_file.write_text(_FLAT_SYSLOG_LINE, encoding="utf-8")
101
+
102
+ cli._main(["syslog", str(flat_file), f"--config={cfg_path}", "--dry-run"])
103
+
104
+ out = capsys.readouterr().out
105
+ # Positive: the positional landed on syslog_dir.
106
+ assert str(flat_file) in out
107
+ # Negative: the configured zeek_dir did NOT sneak through the seam.
108
+ assert str(zeek_d) not in out
109
+ # And the dry-run line for zeek_dir reads "not configured" — the scope
110
+ # rail kept it None all the way through.
111
+ assert "zeek_dir:" in out
112
+ assert "not configured" in out.split("zeek_dir:")[1].split("\n")[0]
113
+
114
+
115
+ def test_analyze_detect_syslog_positional_scopes_out_configured_zeek_dir(
116
+ monkeypatch: pytest.MonkeyPatch,
117
+ tmp_path: Path,
118
+ capsys: pytest.CaptureFixture[str],
119
+ ) -> None:
120
+ """Mirror of the single-detector seam test on the analyze entry point.
121
+
122
+ ``loghunter --detect=syslog ./flat.log`` flows through ``_run_all_detectors``,
123
+ a separate code path from ``_run_single_detector``. Both must honor scope.
124
+ """
125
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
126
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
127
+
128
+ zeek_d = tmp_path / "configured_zeek"
129
+ zeek_d.mkdir()
130
+ syslog_d = tmp_path / "configured_syslog"
131
+ syslog_d.mkdir()
132
+ cfg_path = _write_cfg(tmp_path, zeek_dir=str(zeek_d), syslog_dir=str(syslog_d))
133
+
134
+ flat_file = tmp_path / "flat.log"
135
+ flat_file.write_text(_FLAT_SYSLOG_LINE, encoding="utf-8")
136
+
137
+ cli._main([
138
+ "--detect=syslog", str(flat_file), f"--config={cfg_path}", "--dry-run",
139
+ ])
140
+
141
+ out = capsys.readouterr().out
142
+ assert str(flat_file) in out
143
+ assert str(zeek_d) not in out
144
+
145
+
146
+ # ── 2) runner-level mirror — runner.run with scope, no CLI involved ─────────
147
+
148
+
149
+ def test_runner_run_scope_suppresses_unscoped_config_fill(
150
+ monkeypatch: pytest.MonkeyPatch,
151
+ tmp_path: Path,
152
+ capsys: pytest.CaptureFixture[str],
153
+ ) -> None:
154
+ """``runner.run(config={both set}, syslog_dir=<file>, scope=frozenset({"syslog_dir"}), dry_run=True)``
155
+ → zeek_dir absent from the dry-run output. Direct lock on the runner half
156
+ of the seam — what the CLI test above proves through the full path."""
157
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
158
+
159
+ zeek_d = tmp_path / "configured_zeek"
160
+ zeek_d.mkdir()
161
+ flat_file = tmp_path / "flat.log"
162
+ flat_file.write_text(_FLAT_SYSLOG_LINE, encoding="utf-8")
163
+
164
+ runner.run(
165
+ config={"loghunter": {
166
+ "zeek_dir": str(zeek_d),
167
+ "syslog_dir": str(tmp_path / "configured_syslog"),
168
+ }},
169
+ syslog_dir=str(flat_file),
170
+ scope=frozenset({"syslog_dir"}),
171
+ dry_run=True,
172
+ )
173
+ out = capsys.readouterr().out
174
+ assert str(flat_file) in out
175
+ assert str(zeek_d) not in out
176
+
177
+
178
+ # ── 3) same-source explicit flag + positional MERGE; positional still scopes
179
+
180
+
181
+ def test_same_source_flag_and_positional_merge_both_load(
182
+ monkeypatch: pytest.MonkeyPatch,
183
+ tmp_path: Path,
184
+ capsys: pytest.CaptureFixture[str],
185
+ ) -> None:
186
+ """``loghunter syslog ./auto.log --syslog-dir=/explicit --dry-run``:
187
+
188
+ - same-family flag + positional MERGE: BOTH the positional file AND the
189
+ flag's directory contribute to syslog_dir and both load. This is the
190
+ sanctioned rail supersession from the rev-3 prompt; the old "flag
191
+ wins" rule (and the BUGS entry it rode on) is retired.
192
+ - The positional still scopes the run (configured zeek_dir stays unloaded).
193
+ """
194
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
195
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
196
+
197
+ zeek_d = tmp_path / "configured_zeek"
198
+ zeek_d.mkdir()
199
+ explicit_d = tmp_path / "explicit_syslog"
200
+ explicit_d.mkdir()
201
+ auto = tmp_path / "auto.log"
202
+ auto.write_text(_FLAT_SYSLOG_LINE, encoding="utf-8")
203
+ cfg_path = _write_cfg(tmp_path, zeek_dir=str(zeek_d))
204
+
205
+ cli._main([
206
+ "syslog", str(auto),
207
+ f"--syslog-dir={explicit_d}",
208
+ f"--config={cfg_path}",
209
+ "--dry-run",
210
+ ])
211
+
212
+ out = capsys.readouterr().out
213
+ # MERGE: BOTH positional AND flag value appear under syslog_dir.
214
+ assert str(auto) in out
215
+ assert str(explicit_d) in out
216
+ # Scope: configured zeek_dir stayed out.
217
+ assert str(zeek_d) not in out
218
+
219
+
220
+ # ── 4) different-source explicit flag widens — operator widening ────────────
221
+
222
+
223
+ def test_different_source_flag_alongside_positional_widens_run(
224
+ monkeypatch: pytest.MonkeyPatch,
225
+ tmp_path: Path,
226
+ capsys: pytest.CaptureFixture[str],
227
+ ) -> None:
228
+ """``loghunter syslog ./flat.log --zeek-dir=/widen --dry-run``: an explicit
229
+ DIFFERENT-source flag still loads — the resolver's "override wins even
230
+ outside scope" branch is the operator widening the run deliberately."""
231
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
232
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
233
+
234
+ widen_zeek = tmp_path / "widen_zeek"
235
+ widen_zeek.mkdir()
236
+ flat_file = tmp_path / "flat.log"
237
+ flat_file.write_text(_FLAT_SYSLOG_LINE, encoding="utf-8")
238
+ # No config setup needed — the explicit flags carry the run.
239
+ cfg_path = _write_cfg(tmp_path)
240
+
241
+ cli._main([
242
+ "syslog", str(flat_file),
243
+ f"--zeek-dir={widen_zeek}",
244
+ f"--config={cfg_path}",
245
+ "--dry-run",
246
+ ])
247
+
248
+ out = capsys.readouterr().out
249
+ assert str(flat_file) in out # positional → syslog_dir
250
+ assert str(widen_zeek) in out # explicit flag widens to zeek_dir
251
+
252
+
253
+ # ── 5) DNS content-sniff regression (Glenn req #3) ───────────────────────────
254
+
255
+
256
+ def test_dns_pihole_content_under_neutral_name_routes_pihole_via_real_cli(
257
+ monkeypatch: pytest.MonkeyPatch,
258
+ tmp_path: Path,
259
+ capsys: pytest.CaptureFixture[str],
260
+ ) -> None:
261
+ """A Pi-hole-CONTENT file whose NAME does NOT match ``pihole*.log*``
262
+ routes to pihole_dir end-to-end. Locks the fnmatch→content-sniff
263
+ migration at the CLI seam, in addition to the router unit test in
264
+ tests/test_sources.py."""
265
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
266
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
267
+
268
+ # Bland name — cannot satisfy pihole*.log*. Content is dnsmasq.
269
+ pihole = tmp_path / "mystery.log"
270
+ pihole.write_text(_PIHOLE_LINE, encoding="utf-8")
271
+ cfg_path = _write_cfg(tmp_path)
272
+
273
+ cli._main(["dns", str(pihole), f"--config={cfg_path}", "--dry-run"])
274
+
275
+ out = capsys.readouterr().out
276
+ # The positional landed on pihole_dir — visible on the dry-run line.
277
+ pihole_line = [
278
+ line for line in out.splitlines() if "pihole_dir:" in line
279
+ ]
280
+ assert pihole_line, out
281
+ assert str(pihole) in pihole_line[0]
282
+ # And the zeek_dir line says "not configured" — sniff routed pihole, not
283
+ # the historical zeek_dir default.
284
+ zeek_line = [
285
+ line for line in out.splitlines() if "zeek_dir:" in line
286
+ ][0]
287
+ assert "not configured" in zeek_line
288
+
289
+
290
+ # ── 6) aws ``~`` positional (seam form of the deleted CLI test) ─────────────
291
+
292
+
293
+ def test_aws_subcommand_with_tilde_positional_resolves_via_dry_run(
294
+ monkeypatch: pytest.MonkeyPatch,
295
+ tmp_path: Path,
296
+ capsys: pytest.CaptureFixture[str],
297
+ ) -> None:
298
+ """``loghunter aws ~/exports/cloudtrail.json.log --dry-run`` — the full
299
+ chain: router lands the positional on cloudtrail_dir, the resolver
300
+ ``~``-expands the override.
301
+
302
+ Replaces the deleted ``test_aws_subcommand_with_tilde_positional_expands_and_routes``
303
+ in tests/test_config_cli.py — that test mocked ``runner.run`` and proved
304
+ the routing half but not the ``~``-expansion half because expansion now
305
+ happens inside the resolver, not the CLI seam.
306
+ """
307
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
308
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
309
+ monkeypatch.setenv("HOME", str(tmp_path))
310
+
311
+ # The file doesn't have to exist; we only need the dry-run output to
312
+ # show the ~-expanded resolved path.
313
+ cfg_path = _write_cfg(tmp_path)
314
+ cli._main([
315
+ "aws", "~/exports/cloudtrail.json.log",
316
+ f"--config={cfg_path}", "--dry-run",
317
+ ])
318
+
319
+ out = capsys.readouterr().out
320
+ expected = str(tmp_path / "exports" / "cloudtrail.json.log")
321
+ assert expected in out
322
+ assert "~" not in out.split("cloudtrail_dir:")[1].split("\n")[0]
323
+
324
+
325
+ # ── 7) digest seam — single-owner config fallback, no CLI scope, sniff routes
326
+
327
+
328
+ def test_digest_positional_via_real_cli_routes_and_suppresses_zeek_default(
329
+ monkeypatch: pytest.MonkeyPatch,
330
+ tmp_path: Path,
331
+ capsys: pytest.CaptureFixture[str],
332
+ ) -> None:
333
+ """``loghunter digest ./flat.log --dry-run`` against a config that sets
334
+ BOTH zeek_dir AND syslog_dir: the sniff router lands the positional on
335
+ syslog_dir, ``resolve_digest_source`` resolves a single source (syslog),
336
+ and the dry-run output does NOT mention zeek_dir.
337
+
338
+ Digest has no analyze-style ``scope``; this test proves single-owner
339
+ config fallback, positional self-routing through the real CLI path, and
340
+ the implicit "only one source per schema" property.
341
+ """
342
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
343
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
344
+
345
+ zeek_d = tmp_path / "configured_zeek"
346
+ zeek_d.mkdir()
347
+ syslog_d = tmp_path / "configured_syslog"
348
+ syslog_d.mkdir()
349
+ cfg_path = _write_cfg(tmp_path, zeek_dir=str(zeek_d), syslog_dir=str(syslog_d))
350
+
351
+ flat_file = tmp_path / "flat.log"
352
+ flat_file.write_text(_FLAT_SYSLOG_LINE, encoding="utf-8")
353
+
354
+ cli._main(["digest", str(flat_file), f"--config={cfg_path}", "--dry-run"])
355
+
356
+ out = capsys.readouterr().out
357
+ # Digest dry-run prints `<source_key>: <directory>` — confirm we landed
358
+ # on syslog and the directory IS the positional.
359
+ assert "schema:" in out and "syslog" in out
360
+ assert "syslog_dir:" in out
361
+ assert str(flat_file) in out
362
+ # Negative: zeek_dir directory does NOT appear in the dry-run output.
363
+ assert str(zeek_d) not in out
364
+
365
+
366
+ # ── 8) empty-string override falls through to config (CR Finding 1) ─────────
367
+
368
+
369
+ def test_analyze_empty_string_zeek_dir_flag_falls_through_to_config(
370
+ monkeypatch: pytest.MonkeyPatch,
371
+ tmp_path: Path,
372
+ capsys: pytest.CaptureFixture[str],
373
+ ) -> None:
374
+ """``loghunter beacon --zeek-dir= --dry-run`` (bare flag, empty value)
375
+ against a configured ``[loghunter].zeek_dir`` must resolve to the
376
+ CONFIGURED directory — NOT silently to None.
377
+
378
+ The CLI parser stores ``--zeek-dir=`` as the empty string. The naive
379
+ ``override is not None`` check at the resolver boundary treated ``""``
380
+ as "present," sent it through ``resolve_path("", "")`` → None, and
381
+ suppressed the config fallback — so beacon read "zeek_dir not
382
+ configured" and skipped, even with a perfectly good configured dir.
383
+ The ``_present`` helper restores the pre-refactor truthiness semantics.
384
+ """
385
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
386
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
387
+
388
+ configured_zeek = tmp_path / "configured_zeek"
389
+ configured_zeek.mkdir()
390
+ cfg_path = _write_cfg(tmp_path, zeek_dir=str(configured_zeek))
391
+
392
+ cli._main([
393
+ "beacon", "--zeek-dir=", f"--config={cfg_path}", "--dry-run",
394
+ ])
395
+
396
+ out = capsys.readouterr().out
397
+ # The configured zeek_dir must appear in the dry-run output.
398
+ assert str(configured_zeek) in out
399
+ # And the "not configured" sentinel must NOT show up for zeek_dir.
400
+ zeek_line = [
401
+ line for line in out.splitlines() if "zeek_dir:" in line
402
+ ][0]
403
+ assert "not configured" not in zeek_line
404
+
405
+
406
+ def test_digest_empty_string_zeek_dir_flag_falls_through_to_config(
407
+ monkeypatch: pytest.MonkeyPatch,
408
+ tmp_path: Path,
409
+ capsys: pytest.CaptureFixture[str],
410
+ ) -> None:
411
+ """``loghunter digest --zeek-dir= --dry-run`` (bare-digest, empty flag)
412
+ against a configured ``[loghunter].zeek_dir`` must resolve the conn card's
413
+ source to the CONFIGURED directory — NOT raise "zeek_dir not configured".
414
+
415
+ Mirror of the analyze test for the digest resolver. The pre-fix bug
416
+ surfaced as a raise here because ``resolve_digest_source`` saw an empty
417
+ string in ``overrides["zeek_dir"]``, treated it as present, then failed
418
+ to resolve it (empty string → None) and walked away from the candidate
419
+ ladder.
420
+ """
421
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
422
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
423
+
424
+ configured_zeek = tmp_path / "configured_zeek"
425
+ configured_zeek.mkdir()
426
+ cfg_path = _write_cfg(tmp_path, zeek_dir=str(configured_zeek))
427
+
428
+ cli._main([
429
+ "digest", "--zeek-dir=", f"--config={cfg_path}", "--dry-run",
430
+ ])
431
+
432
+ out = capsys.readouterr().out
433
+ # The configured zeek_dir must appear on the digest dry-run's source line.
434
+ assert str(configured_zeek) in out
435
+ # And the schema is conn (the bare-digest default), not an error.
436
+ assert "schema:" in out and "conn" in out
437
+
438
+
439
+ def test_digest_wrong_source_flag_error_byte_preserved_via_real_cli(
440
+ monkeypatch: pytest.MonkeyPatch,
441
+ tmp_path: Path,
442
+ capsys: pytest.CaptureFixture[str],
443
+ ) -> None:
444
+ """``loghunter digest <Zeek-conn-file> --pihole-dir=/x`` raises the
445
+ byte-preserved wrong-source error through the CLI boundary.
446
+
447
+ Locks the error-string preservation at the real CLI seam, complementing
448
+ the resolver-level locks in tests/test_sources.py.
449
+ """
450
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
451
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
452
+
453
+ pihole_dummy = tmp_path / "ph"
454
+ pihole_dummy.mkdir()
455
+ cfg_path = _write_cfg(tmp_path)
456
+
457
+ # The CLI rejects --pihole-dir alongside a positional BEFORE resolution
458
+ # (cli.py:825 guard). Use the bare-digest form (no positional) so the
459
+ # resolver sees the wrong-key combination — conn schema with
460
+ # --pihole-dir set in parsed.
461
+ #
462
+ # But --pihole-dir is not in _DIGEST_ALLOWED_LONG_FLAGS today, so this
463
+ # test exercises the analogous scenario at the resolver layer via a
464
+ # direct run_digest call (the seam test for digest error strings is
465
+ # primarily at the resolver). Skipped at the CLI seam because the
466
+ # digest CLI's narrow flag surface intentionally hides three of the
467
+ # four source-dir flags — that's a separate CODE.md rail.
468
+ pytest.skip(
469
+ "digest CLI exposes only --zeek-dir; wrong-source error strings "
470
+ "are locked at the resolver layer in tests/test_sources.py."
471
+ )