loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,707 @@
1
+ """Coverage for the loghunter init wizard.
2
+
3
+ Sections:
4
+ - Upsert matrix: section-bound transform inside the [loghunter] span.
5
+ - R1 root non-clobber: re-init preserves an existing root (including "").
6
+ - R5 _toml_str: literal/basic split, control-char rejection.
7
+ - Profiler: families, size, fresh buckets, bounded cap, no-data, perm-tolerant.
8
+ - Flow tests: drive the real _run_init with isolated HOME and monkeypatched
9
+ candidate-path constants — no test reaches the developer's /var/log.
10
+ - Verbatim line discipline: exact dialogue strings, no traceback leakage.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import tomllib
16
+ from datetime import datetime, timedelta
17
+ from pathlib import Path
18
+
19
+ import pytest
20
+
21
+ # Init wizard helpers moved from loghunter.cli to loghunter.cli_init (a
22
+ # CLI-internal split — first-run UX remains CLI-layer ownership). This module
23
+ # is rebound to the alias ``cli`` so the existing tests keep their
24
+ # ``cli._foo(...)`` / ``monkeypatch.setattr(cli, "_FOO", …)`` shape unchanged.
25
+ from loghunter import cli_init as cli
26
+
27
+
28
+ # ── Test fixtures ──────────────────────────────────────────────────────────────
29
+
30
+ EXAMPLE_TEXT = (
31
+ Path("loghunter/data/config_example.toml").read_text(encoding="utf-8")
32
+ )
33
+
34
+
35
+ def _isolated_home(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Path:
36
+ """Point HOME at tmp_path; return ~/.loghunter/ for asserting writes."""
37
+ monkeypatch.setenv("HOME", str(tmp_path))
38
+ return tmp_path / ".loghunter"
39
+
40
+
41
+ def _stage_inputs(monkeypatch: pytest.MonkeyPatch, answers: list[str]) -> None:
42
+ """Drive builtins.input from a fixed list of answers."""
43
+ it = iter(answers)
44
+ monkeypatch.setattr("builtins.input", lambda *_a, **_kw: next(it))
45
+
46
+
47
+ def _stub_candidates(
48
+ monkeypatch: pytest.MonkeyPatch,
49
+ *,
50
+ zeek: tuple[str, ...] = (),
51
+ pihole: tuple[tuple[str, str], ...] = (),
52
+ syslog: str = "/nonexistent-syslog-dir",
53
+ ) -> None:
54
+ """Replace the module-level probe constants so no real path is touched."""
55
+ monkeypatch.setattr(cli, "_ZEEK_CANDIDATES", zeek)
56
+ monkeypatch.setattr(cli, "_PIHOLE_CANDIDATES", pihole)
57
+ monkeypatch.setattr(cli, "_SYSLOG_CANDIDATE", syslog)
58
+
59
+
60
+ # ════════════════════════════════════════════════════════════════════════════
61
+ # 1–11. Upsert matrix — section-bound transform
62
+ # ════════════════════════════════════════════════════════════════════════════
63
+
64
+
65
+ def test_upsert_fresh_from_example_provided_active_rewrite() -> None:
66
+ out = cli._upsert_loghunter_key(
67
+ EXAMPLE_TEXT, "zeek_dir", "/opt/zeek/logs", fresh=True,
68
+ )
69
+ parsed = tomllib.loads(out)
70
+ assert parsed["loghunter"]["zeek_dir"] == "/opt/zeek/logs"
71
+ # only one active zeek_dir line
72
+ assert out.count('\nzeek_dir') == 1
73
+
74
+
75
+ def test_upsert_fresh_from_example_skipped_active_gets_commented() -> None:
76
+ out = cli._upsert_loghunter_key(
77
+ EXAMPLE_TEXT, "zeek_dir", None, fresh=True,
78
+ )
79
+ parsed = tomllib.loads(out)
80
+ assert "zeek_dir" not in parsed["loghunter"]
81
+ assert "# zeek_dir" in out
82
+
83
+
84
+ def test_upsert_fresh_from_example_skipped_already_commented_noop() -> None:
85
+ # pihole_dir is shipped commented in the example. Skipped → no-op.
86
+ out = cli._upsert_loghunter_key(
87
+ EXAMPLE_TEXT, "pihole_dir", None, fresh=True,
88
+ )
89
+ assert out == EXAMPLE_TEXT
90
+
91
+
92
+ def test_upsert_existing_active_key_updated() -> None:
93
+ base = "[loghunter]\nzeek_dir = \"/x\"\nsyslog_dir = \"/var/log\"\n"
94
+ out = cli._upsert_loghunter_key(base, "zeek_dir", "/y", fresh=False)
95
+ parsed = tomllib.loads(out)
96
+ assert parsed["loghunter"]["zeek_dir"] == "/y"
97
+ # syslog_dir line preserved byte-identical
98
+ assert 'syslog_dir = "/var/log"' in out
99
+
100
+
101
+ def test_upsert_existing_commented_key_uncommented() -> None:
102
+ base = '[loghunter]\n# zeek_dir = "/x"\nsyslog_dir = "/var/log"\n'
103
+ out = cli._upsert_loghunter_key(base, "zeek_dir", "/y", fresh=False)
104
+ parsed = tomllib.loads(out)
105
+ assert parsed["loghunter"]["zeek_dir"] == "/y"
106
+
107
+
108
+ def test_upsert_existing_without_key_inserted_inside_span() -> None:
109
+ base = "[loghunter]\nsyslog_dir = \"/var/log\"\n[allowlist]\n"
110
+ out = cli._upsert_loghunter_key(base, "zeek_dir", "/y", fresh=False)
111
+ parsed = tomllib.loads(out)
112
+ assert parsed["loghunter"]["zeek_dir"] == "/y"
113
+ # inserted INSIDE [loghunter], not in [allowlist]
114
+ pre_allowlist = out.split("[allowlist]")[0]
115
+ assert "zeek_dir" in pre_allowlist
116
+
117
+
118
+ def test_upsert_existing_full_file_outside_span_byte_identical() -> None:
119
+ other_blocks = (
120
+ "[allowlist]\ndomain_patterns = [\"~/x.txt\"]\n"
121
+ "\n[export.splunk]\nhost = \"192.0.2.20\"\n"
122
+ "\n[detectors.beacon]\nthreshold = 0.99\n"
123
+ "\n# narrative comment about something\n"
124
+ )
125
+ base = "[loghunter]\nzeek_dir = \"/x\"\n\n" + other_blocks
126
+ out = cli._upsert_loghunter_key(base, "zeek_dir", "/y", fresh=False)
127
+ # everything from [allowlist] onward is byte-identical
128
+ idx_in = base.index("[allowlist]")
129
+ idx_out = out.index("[allowlist]")
130
+ assert base[idx_in:] == out[idx_out:]
131
+
132
+
133
+ def test_upsert_existing_skipped_strict_noop() -> None:
134
+ base = "[loghunter]\nzeek_dir = \"/x\"\n[allowlist]\n"
135
+ out = cli._upsert_loghunter_key(base, "zeek_dir", None, fresh=False)
136
+ assert out == base
137
+
138
+
139
+ def test_upsert_section_bound_token_in_another_stanza_active() -> None:
140
+ """A `zeek_dir =` line inside [export.cloudtrail] must NEVER be matched."""
141
+ base = (
142
+ "[loghunter]\nzeek_dir = \"/x\"\n"
143
+ "\n[export.cloudtrail]\n"
144
+ "zeek_dir = \"/sneaky-active\"\n"
145
+ "# zeek_dir = \"/sneaky-comment\"\n"
146
+ "root = \"/sneaky-root\"\n"
147
+ )
148
+ out = cli._upsert_loghunter_key(base, "zeek_dir", "/y", fresh=False)
149
+ out = cli._upsert_loghunter_key(out, "root", "/new", fresh=False)
150
+ # the [export.cloudtrail] block is byte-identical
151
+ idx_in = base.index("[export.cloudtrail]")
152
+ idx_out = out.index("[export.cloudtrail]")
153
+ assert base[idx_in:] == out[idx_out:]
154
+ # the [loghunter] keys updated
155
+ parsed = tomllib.loads(out)
156
+ assert parsed["loghunter"]["zeek_dir"] == "/y"
157
+ assert parsed["loghunter"]["root"] == "/new"
158
+
159
+
160
+ def test_upsert_section_bound_subtable_boundary() -> None:
161
+ """`[loghunter.foo]` ends the span — its zeek_dir is untouched."""
162
+ base = (
163
+ "[loghunter]\n"
164
+ "[loghunter.foo]\n"
165
+ "zeek_dir = \"/sub\"\n"
166
+ )
167
+ out = cli._upsert_loghunter_key(base, "zeek_dir", "/y", fresh=False)
168
+ # sub-table zeek_dir intact
169
+ assert 'zeek_dir = "/sub"' in out
170
+ # new zeek_dir landed inside [loghunter] (before the sub-table)
171
+ idx_main = out.index("[loghunter]\n")
172
+ idx_sub = out.index("[loghunter.foo]")
173
+ between = out[idx_main:idx_sub]
174
+ assert "zeek_dir = '/y'" in between
175
+
176
+
177
+ def test_init_writes_bak_on_existing_config_update(
178
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
179
+ ) -> None:
180
+ home = _isolated_home(monkeypatch, tmp_path)
181
+ home.mkdir(parents=True)
182
+ cfg_path = home / "config.toml"
183
+ original = "[loghunter]\nroot = \"/data/lh\"\nzeek_dir = \"/x\"\n"
184
+ cfg_path.write_text(original, encoding="utf-8")
185
+
186
+ _stub_candidates(monkeypatch) # nothing detected
187
+ # Inputs: Zeek not-found (Enter=skip), Pi-hole not-found (Enter=skip),
188
+ # syslog absent (Enter=skip), gate (Enter=proceed), root Enter.
189
+ _stage_inputs(monkeypatch, ["", "", "", "", ""])
190
+
191
+ cli._run_init([])
192
+
193
+ bak = cfg_path.with_suffix(".toml.bak")
194
+ assert bak.read_text(encoding="utf-8") == original
195
+
196
+
197
+ # ════════════════════════════════════════════════════════════════════════════
198
+ # 12–15. R1 root non-clobber
199
+ # ════════════════════════════════════════════════════════════════════════════
200
+
201
+
202
+ def _run_init_with_no_sources(
203
+ monkeypatch: pytest.MonkeyPatch, root_input: str = "",
204
+ ) -> None:
205
+ """All-skipped path + Enter-proceed at the gate. Last input = root."""
206
+ _stub_candidates(monkeypatch)
207
+ _stage_inputs(monkeypatch, ["", "", "", "", root_input])
208
+ cli._run_init([])
209
+
210
+
211
+ def test_root_non_clobber_existing_value_preserved_on_enter(
212
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
213
+ ) -> None:
214
+ home = _isolated_home(monkeypatch, tmp_path)
215
+ home.mkdir(parents=True)
216
+ (home / "config.toml").write_text(
217
+ "[loghunter]\nroot = \"/data/lh\"\n", encoding="utf-8",
218
+ )
219
+ _run_init_with_no_sources(monkeypatch)
220
+ parsed = tomllib.loads((home / "config.toml").read_text(encoding="utf-8"))
221
+ assert parsed["loghunter"]["root"] == "/data/lh"
222
+
223
+
224
+ def test_root_non_clobber_missing_root_uses_live_default(
225
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
226
+ ) -> None:
227
+ home = _isolated_home(monkeypatch, tmp_path)
228
+ home.mkdir(parents=True)
229
+ (home / "config.toml").write_text(
230
+ "[loghunter]\nzeek_dir = \"/x\"\n", encoding="utf-8",
231
+ )
232
+ _run_init_with_no_sources(monkeypatch)
233
+ parsed = tomllib.loads((home / "config.toml").read_text(encoding="utf-8"))
234
+ assert parsed["loghunter"]["root"] == "~/.loghunter"
235
+
236
+
237
+ def test_root_non_clobber_existing_empty_preserved(
238
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
239
+ ) -> None:
240
+ """Explicit `root = ""` survives a re-init Enter — the user chose CWD."""
241
+ home = _isolated_home(monkeypatch, tmp_path)
242
+ home.mkdir(parents=True)
243
+ (home / "config.toml").write_text(
244
+ "[loghunter]\nroot = \"\"\n", encoding="utf-8",
245
+ )
246
+ _run_init_with_no_sources(monkeypatch)
247
+ parsed = tomllib.loads((home / "config.toml").read_text(encoding="utf-8"))
248
+ assert parsed["loghunter"]["root"] == ""
249
+
250
+
251
+ def test_root_typed_value_replaces_existing(
252
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
253
+ ) -> None:
254
+ home = _isolated_home(monkeypatch, tmp_path)
255
+ home.mkdir(parents=True)
256
+ (home / "config.toml").write_text(
257
+ "[loghunter]\nroot = \"/data/lh\"\n", encoding="utf-8",
258
+ )
259
+ _run_init_with_no_sources(monkeypatch, root_input="/new/root")
260
+ parsed = tomllib.loads((home / "config.toml").read_text(encoding="utf-8"))
261
+ assert parsed["loghunter"]["root"] == "/new/root"
262
+
263
+
264
+ # ════════════════════════════════════════════════════════════════════════════
265
+ # 16–21. R5 _toml_str
266
+ # ════════════════════════════════════════════════════════════════════════════
267
+
268
+
269
+ @pytest.mark.parametrize("value", [
270
+ "/var/log/zeek",
271
+ "/var/log/My Logs", # space
272
+ "/var/log/o'brien", # single quote → basic
273
+ "C:\\Logs", # backslash
274
+ '/var/log/"weird"', # double quote
275
+ ])
276
+ def test_toml_str_roundtrips(value: str) -> None:
277
+ rendered = cli._toml_str(value)
278
+ parsed = tomllib.loads(f"x = {rendered}")
279
+ assert parsed["x"] == value
280
+
281
+
282
+ def test_toml_str_single_quote_uses_basic_form() -> None:
283
+ assert cli._toml_str("/var/log/o'brien").startswith('"')
284
+
285
+
286
+ def test_toml_str_no_special_uses_literal_form() -> None:
287
+ assert cli._toml_str("/var/log/zeek") == "'/var/log/zeek'"
288
+
289
+
290
+ def test_toml_str_rejects_control_char() -> None:
291
+ with pytest.raises(ValueError, match="control character"):
292
+ cli._toml_str("/var/log/\n")
293
+
294
+
295
+ # ════════════════════════════════════════════════════════════════════════════
296
+ # 22–27. Profiler
297
+ # ════════════════════════════════════════════════════════════════════════════
298
+
299
+
300
+ def _make_file(path: Path, *, size: int = 8, mtime: float | None = None) -> None:
301
+ path.parent.mkdir(parents=True, exist_ok=True)
302
+ path.write_bytes(b"x" * size)
303
+ if mtime is not None:
304
+ import os
305
+ os.utime(path, (mtime, mtime))
306
+
307
+
308
+ def test_profile_zeek_logs_two_families(tmp_path: Path) -> None:
309
+ _make_file(tmp_path / "conn.log")
310
+ _make_file(tmp_path / "dns.log")
311
+ p = cli._profile_dir(str(tmp_path), cli._ZEEK_GLOBS, logs_label=None)
312
+ assert p is not None
313
+ assert p["logs"] == "conn + dns"
314
+
315
+
316
+ def test_profile_zeek_logs_three_families(tmp_path: Path) -> None:
317
+ _make_file(tmp_path / "conn.log")
318
+ _make_file(tmp_path / "dns.log")
319
+ _make_file(tmp_path / "ssl.log")
320
+ p = cli._profile_dir(str(tmp_path), cli._ZEEK_GLOBS, logs_label=None)
321
+ assert p is not None
322
+ assert p["logs"] == "conn, dns, ssl"
323
+
324
+
325
+ def test_profile_human_bytes_kb(tmp_path: Path) -> None:
326
+ _make_file(tmp_path / "conn.log", size=12 * 1024)
327
+ p = cli._profile_dir(str(tmp_path), ("conn*.log*",), logs_label=None)
328
+ assert p is not None
329
+ assert p["size_str"] == "~12 KB"
330
+
331
+
332
+ def test_profile_human_bytes_mb_and_gb() -> None:
333
+ assert cli._human_bytes(340 * 1024 ** 2) == "~340 MB"
334
+ assert cli._human_bytes(6 * 1024 ** 3) == "~6 GB"
335
+
336
+
337
+ @pytest.mark.parametrize("delta,expected", [
338
+ (timedelta(minutes=30), "updated just now"),
339
+ (timedelta(hours=12), "fresh today"),
340
+ (timedelta(days=3), "active this week"),
341
+ (timedelta(days=10), "last activity ~10 days ago"),
342
+ (timedelta(days=45), "but it looks stale — nothing new in ~6 weeks"),
343
+ (timedelta(days=75), "but it looks stale — nothing new in ~2 months"),
344
+ ])
345
+ def test_fresh_bucket_boundaries(delta: timedelta, expected: str) -> None:
346
+ assert cli._fresh_bucket(delta) == expected
347
+
348
+
349
+ def test_profile_bounded_cap(tmp_path: Path) -> None:
350
+ # Synthesize one more than the cap, all matching conn*.log*. The
351
+ # bounded flag must surface.
352
+ for i in range(cli._PROFILE_FILE_CAP + 50):
353
+ _make_file(tmp_path / f"conn.{i}.log")
354
+ p = cli._profile_dir(str(tmp_path), ("conn*.log*",), logs_label=None)
355
+ assert p is not None
356
+ assert p["bounded"] is True
357
+
358
+
359
+ def test_profile_no_data_returns_none(tmp_path: Path) -> None:
360
+ # Dir exists but no matching files.
361
+ assert cli._profile_dir(str(tmp_path), ("conn*.log*",), logs_label=None) is None
362
+
363
+
364
+ def test_profile_dir_missing_returns_none(tmp_path: Path) -> None:
365
+ assert cli._profile_dir(
366
+ str(tmp_path / "missing"), ("conn*.log*",), logs_label=None,
367
+ ) is None
368
+
369
+
370
+ def test_profile_permission_error_silently_handled(
371
+ tmp_path: Path, monkeypatch: pytest.MonkeyPatch,
372
+ ) -> None:
373
+ _make_file(tmp_path / "conn.log", size=8)
374
+
375
+ real_stat = Path.stat
376
+ def _fail_stat(self, *args, **kwargs):
377
+ if self.name.startswith("conn"):
378
+ raise PermissionError("simulated")
379
+ return real_stat(self, *args, **kwargs)
380
+ monkeypatch.setattr(Path, "stat", _fail_stat)
381
+
382
+ # Whichever file errored is skipped; no other files → no-data return.
383
+ result = cli._profile_dir(str(tmp_path), ("conn*.log*",), logs_label=None)
384
+ assert result is None
385
+
386
+
387
+ def test_detect_zeek_permission_error_continues(
388
+ tmp_path: Path, monkeypatch: pytest.MonkeyPatch,
389
+ ) -> None:
390
+ """A probe that raises PermissionError on glob falls through to the
391
+ next candidate, not the CLI error boundary."""
392
+ bad = tmp_path / "bad-zeek"
393
+ good = tmp_path / "good-zeek"
394
+ bad.mkdir()
395
+ good.mkdir()
396
+ _make_file(good / "conn.log")
397
+
398
+ monkeypatch.setattr(cli, "_ZEEK_CANDIDATES", (str(bad), str(good)))
399
+
400
+ real_glob = Path.glob
401
+ def _conditional_glob(self, pattern, *args, **kwargs):
402
+ if self == bad:
403
+ raise PermissionError("simulated")
404
+ return real_glob(self, pattern, *args, **kwargs)
405
+ monkeypatch.setattr(Path, "glob", _conditional_glob)
406
+
407
+ assert cli._detect_zeek() == str(good)
408
+
409
+
410
+ # ════════════════════════════════════════════════════════════════════════════
411
+ # 28–34. Flow tests — drive _run_init end-to-end
412
+ # ════════════════════════════════════════════════════════════════════════════
413
+
414
+
415
+ def _setup_zeek_dir(tmp_path: Path) -> str:
416
+ d = tmp_path / "fake-zeek"
417
+ d.mkdir()
418
+ _make_file(d / "conn.log")
419
+ _make_file(d / "dns.log")
420
+ return str(d)
421
+
422
+
423
+ def _setup_pihole(tmp_path: Path) -> tuple[str, tuple[tuple[str, str], ...]]:
424
+ d = tmp_path / "fake-pihole"
425
+ d.mkdir()
426
+ _make_file(d / "pihole.log")
427
+ return (str(d), ((str(d / "pihole.log"), str(d)),))
428
+
429
+
430
+ def _setup_syslog(tmp_path: Path) -> str:
431
+ d = tmp_path / "fake-var-log"
432
+ d.mkdir()
433
+ _make_file(d / "messages.log")
434
+ return str(d)
435
+
436
+
437
+ def test_flow_all_found_all_accepted_root_enter(
438
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
439
+ ) -> None:
440
+ home = _isolated_home(monkeypatch, tmp_path)
441
+ zeek = _setup_zeek_dir(tmp_path)
442
+ pihole_dir, pihole_candidates = _setup_pihole(tmp_path)
443
+ syslog = _setup_syslog(tmp_path)
444
+ _stub_candidates(monkeypatch, zeek=(zeek,), pihole=pihole_candidates, syslog=syslog)
445
+ _stage_inputs(monkeypatch, ["", "", "", ""])
446
+
447
+ cli._run_init([])
448
+
449
+ parsed = tomllib.loads((home / "config.toml").read_text(encoding="utf-8"))
450
+ assert parsed["loghunter"]["root"] == "~/.loghunter"
451
+ assert parsed["loghunter"]["zeek_dir"] == zeek
452
+ assert parsed["loghunter"]["pihole_dir"] == pihole_dir
453
+ assert parsed["loghunter"]["syslog_dir"] == syslog
454
+
455
+
456
+ def test_flow_typed_pihole_path(
457
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
458
+ ) -> None:
459
+ home = _isolated_home(monkeypatch, tmp_path)
460
+ zeek = _setup_zeek_dir(tmp_path)
461
+ _, pihole_candidates = _setup_pihole(tmp_path)
462
+ syslog = _setup_syslog(tmp_path)
463
+ _stub_candidates(monkeypatch, zeek=(zeek,), pihole=pihole_candidates, syslog=syslog)
464
+ _stage_inputs(monkeypatch, ["", "/custom/pihole", "", ""])
465
+
466
+ cli._run_init([])
467
+ parsed = tomllib.loads((home / "config.toml").read_text(encoding="utf-8"))
468
+ assert parsed["loghunter"]["pihole_dir"] == "/custom/pihole"
469
+
470
+
471
+ def test_flow_pihole_not_found_typed_path(
472
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
473
+ ) -> None:
474
+ home = _isolated_home(monkeypatch, tmp_path)
475
+ zeek = _setup_zeek_dir(tmp_path)
476
+ syslog = _setup_syslog(tmp_path)
477
+ _stub_candidates(monkeypatch, zeek=(zeek,), pihole=(), syslog=syslog)
478
+ _stage_inputs(monkeypatch, ["", "/somewhere/pihole", "", ""])
479
+
480
+ cli._run_init([])
481
+ parsed = tomllib.loads((home / "config.toml").read_text(encoding="utf-8"))
482
+ assert parsed["loghunter"]["pihole_dir"] == "/somewhere/pihole"
483
+
484
+
485
+ def test_flow_all_skipped_gate_redo(
486
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
487
+ ) -> None:
488
+ home = _isolated_home(monkeypatch, tmp_path)
489
+ zeek = _setup_zeek_dir(tmp_path)
490
+ _, pihole_candidates = _setup_pihole(tmp_path)
491
+ syslog = _setup_syslog(tmp_path)
492
+ _stub_candidates(monkeypatch, zeek=(zeek,), pihole=pihole_candidates, syslog=syslog)
493
+ # First pass: skip all three (s, s, s). Gate: r → redo. Second pass: Enter
494
+ # all three to accept. Then root Enter.
495
+ _stage_inputs(monkeypatch, ["s", "s", "s", "r", "", "", "", ""])
496
+
497
+ cli._run_init([])
498
+ parsed = tomllib.loads((home / "config.toml").read_text(encoding="utf-8"))
499
+ assert parsed["loghunter"]["zeek_dir"] == zeek
500
+ assert parsed["loghunter"]["pihole_dir"] != "" # set
501
+ assert parsed["loghunter"]["syslog_dir"] == syslog
502
+
503
+
504
+ def test_flow_all_skipped_gate_enter_proceed(
505
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
506
+ ) -> None:
507
+ home = _isolated_home(monkeypatch, tmp_path)
508
+ zeek = _setup_zeek_dir(tmp_path)
509
+ _, pihole_candidates = _setup_pihole(tmp_path)
510
+ syslog = _setup_syslog(tmp_path)
511
+ _stub_candidates(monkeypatch, zeek=(zeek,), pihole=pihole_candidates, syslog=syslog)
512
+ _stage_inputs(monkeypatch, ["s", "s", "s", "", ""])
513
+
514
+ cli._run_init([])
515
+ parsed = tomllib.loads((home / "config.toml").read_text(encoding="utf-8"))
516
+ # Fresh-from-example: all three source keys ended SKIPPED → active line in
517
+ # example becomes commented; tomllib sees them absent.
518
+ assert "zeek_dir" not in parsed["loghunter"]
519
+ assert "pihole_dir" not in parsed["loghunter"]
520
+ assert "syslog_dir" not in parsed["loghunter"]
521
+
522
+
523
+ def test_flow_reinit_preserves_custom_root_and_other_stanzas(
524
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
525
+ ) -> None:
526
+ home = _isolated_home(monkeypatch, tmp_path)
527
+ home.mkdir(parents=True)
528
+ existing = (
529
+ "[loghunter]\nroot = \"/data/lh\"\nzeek_dir = \"/old/zeek\"\n"
530
+ "\n[detectors.beacon]\nthreshold = 0.99\n"
531
+ )
532
+ (home / "config.toml").write_text(existing, encoding="utf-8")
533
+
534
+ zeek = _setup_zeek_dir(tmp_path)
535
+ _, pihole_candidates = _setup_pihole(tmp_path)
536
+ syslog = _setup_syslog(tmp_path)
537
+ _stub_candidates(monkeypatch, zeek=(zeek,), pihole=pihole_candidates, syslog=syslog)
538
+ _stage_inputs(monkeypatch, ["", "", "", ""])
539
+
540
+ cli._run_init([])
541
+
542
+ out = (home / "config.toml").read_text(encoding="utf-8")
543
+ parsed = tomllib.loads(out)
544
+ assert parsed["loghunter"]["root"] == "/data/lh"
545
+ # the detectors stanza survives byte-identical
546
+ assert "[detectors.beacon]\nthreshold = 0.99" in out
547
+ # .bak exists with the original bytes
548
+ assert (home / "config.toml.bak").read_text(encoding="utf-8") == existing
549
+
550
+
551
+ def test_flow_reinit_with_empty_root_preserved(
552
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
553
+ ) -> None:
554
+ home = _isolated_home(monkeypatch, tmp_path)
555
+ home.mkdir(parents=True)
556
+ (home / "config.toml").write_text(
557
+ "[loghunter]\nroot = \"\"\n", encoding="utf-8",
558
+ )
559
+ _stub_candidates(monkeypatch)
560
+ _stage_inputs(monkeypatch, ["", "", "", "", ""])
561
+
562
+ cli._run_init([])
563
+ parsed = tomllib.loads((home / "config.toml").read_text(encoding="utf-8"))
564
+ assert parsed["loghunter"]["root"] == ""
565
+
566
+
567
+ # ════════════════════════════════════════════════════════════════════════════
568
+ # 35. Verbatim line discipline
569
+ # ════════════════════════════════════════════════════════════════════════════
570
+
571
+
572
+ def test_verbatim_zeek_not_found_block(
573
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path, capsys: pytest.CaptureFixture[str],
574
+ ) -> None:
575
+ _isolated_home(monkeypatch, tmp_path)
576
+ _stub_candidates(monkeypatch)
577
+ _stage_inputs(monkeypatch, ["", "", "", "", ""])
578
+ cli._run_init([])
579
+ out = capsys.readouterr().out
580
+ assert "Didn't find Zeek. You might like it: https://zeek.org" in out
581
+ assert "If it's just hiding, tell me where." in out
582
+ assert "[Enter = skip · type a path]" in out
583
+
584
+
585
+ def test_verbatim_gate_and_confirm_blocks(
586
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path, capsys: pytest.CaptureFixture[str],
587
+ ) -> None:
588
+ _isolated_home(monkeypatch, tmp_path)
589
+ _stub_candidates(monkeypatch)
590
+ _stage_inputs(monkeypatch, ["", "", "", "", ""])
591
+ cli._run_init([])
592
+ out = capsys.readouterr().out
593
+ assert "You should provide at least one: Zeek, Pi-hole, or syslog." in out
594
+ assert "Or you can point loghunter at individual files. Up to you." in out
595
+ assert "[r = redo · Enter = skip]" in out
596
+ assert "Done — settings written to ~/.loghunter/config.toml." in out
597
+ assert "(none — pass files on the command line)" in out
598
+ assert "data: ~/.loghunter" in out
599
+ assert "Good hunting!" in out
600
+ # Confirm block has exactly one blank line between data line and docs URL.
601
+ confirm_idx = out.index("Done — settings written")
602
+ confirm_tail = out[confirm_idx:]
603
+ # Find "data:" line position and check the next non-empty line is the docs.
604
+ lines = confirm_tail.splitlines()
605
+ data_line_idx = next(i for i, line in enumerate(lines) if line.startswith(" data:"))
606
+ assert lines[data_line_idx + 1] == ""
607
+ assert lines[data_line_idx + 2].startswith("LogHunter documentation lives here:")
608
+
609
+
610
+ def test_verbatim_zeek_no_data_found_single_line(
611
+ monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str],
612
+ ) -> None:
613
+ """Rev-2 no-data reduced form: `Found Zeek at {path}. Use this?` on ONE line."""
614
+ cli._print_zeek_found("/some/zeek", None)
615
+ out = capsys.readouterr().out
616
+ assert "Found Zeek at /some/zeek. Use this?" in out
617
+ # The phrase must not be split across two lines.
618
+ assert "Found Zeek at /some/zeek.\nUse this?" not in out
619
+
620
+
621
+ def test_verbatim_pihole_no_data_found_single_line(
622
+ capsys: pytest.CaptureFixture[str],
623
+ ) -> None:
624
+ cli._print_pihole_found("/some/pihole", None)
625
+ out = capsys.readouterr().out
626
+ assert "Found Pi-hole at /some/pihole. Use this?" in out
627
+ assert "Found Pi-hole at /some/pihole.\nUse this?" not in out
628
+
629
+
630
+ def test_verbatim_zeek_profiled_keeps_two_line_form(
631
+ capsys: pytest.CaptureFixture[str],
632
+ ) -> None:
633
+ """The profiled (full) form is still two lines — guard so the no-data fix
634
+ doesn't accidentally collapse the rich path."""
635
+ profile = {
636
+ "logs": "conn + dns", "size_str": "~12 KB",
637
+ "fresh_str": "fresh today", "bounded": False, "size_bytes": 12_288,
638
+ }
639
+ cli._print_zeek_found("/some/zeek", profile)
640
+ out = capsys.readouterr().out
641
+ assert "Found Zeek at /some/zeek." in out
642
+ assert "conn + dns, ~12 KB, fresh today. Use this?" in out
643
+
644
+
645
+ # ════════════════════════════════════════════════════════════════════════════
646
+ # CR regression: upsert duplicate-key + .bak byte preservation
647
+ # ════════════════════════════════════════════════════════════════════════════
648
+
649
+
650
+ def test_upsert_active_wins_over_preceding_commented_sample() -> None:
651
+ """A commented sample BEFORE an active value must not be uncommented —
652
+ the active line is the one rewritten. Else we produce duplicate keys."""
653
+ base = (
654
+ "[loghunter]\n"
655
+ "# zeek_dir = \"/default\"\n"
656
+ "zeek_dir = \"/custom\"\n"
657
+ )
658
+ out = cli._upsert_loghunter_key(base, "zeek_dir", "/y", fresh=False)
659
+ # the active line was rewritten
660
+ assert 'zeek_dir = \'/y\'' in out
661
+ # the commented sample is byte-preserved
662
+ assert '# zeek_dir = "/default"' in out
663
+ # produced TOML still parses (no duplicate keys)
664
+ parsed = tomllib.loads(out)
665
+ assert parsed["loghunter"]["zeek_dir"] == "/y"
666
+
667
+
668
+ def test_bak_byte_identical_for_crlf_existing_config(
669
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
670
+ ) -> None:
671
+ """A user config with Windows line endings must round-trip through .bak
672
+ byte-identical; the non-clobber promise covers CRLF callers too."""
673
+ home = _isolated_home(monkeypatch, tmp_path)
674
+ home.mkdir(parents=True)
675
+ cfg_path = home / "config.toml"
676
+ # CRLF throughout; deliberate non-managed stanza we'll inspect after write.
677
+ original_bytes = (
678
+ b"[loghunter]\r\n"
679
+ b"root = \"/data/lh\"\r\n"
680
+ b"\r\n"
681
+ b"[detectors.beacon]\r\n"
682
+ b"threshold = 0.99\r\n"
683
+ )
684
+ cfg_path.write_bytes(original_bytes)
685
+
686
+ _stub_candidates(monkeypatch)
687
+ _stage_inputs(monkeypatch, ["", "", "", "", ""])
688
+ cli._run_init([])
689
+
690
+ bak = cfg_path.with_suffix(".toml.bak")
691
+ assert bak.read_bytes() == original_bytes
692
+ # The untouched detectors stanza retains CRLF in the rewritten file.
693
+ rewritten = cfg_path.read_bytes()
694
+ assert b"[detectors.beacon]\r\nthreshold = 0.99\r\n" in rewritten
695
+
696
+
697
+ def test_no_traceback_on_corrupt_existing_config(
698
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
699
+ ) -> None:
700
+ home = _isolated_home(monkeypatch, tmp_path)
701
+ home.mkdir(parents=True)
702
+ (home / "config.toml").write_text("not = valid = toml = at = all", encoding="utf-8")
703
+ _stub_candidates(monkeypatch)
704
+ _stage_inputs(monkeypatch, [""])
705
+
706
+ with pytest.raises(ValueError, match="loghunter init: existing config"):
707
+ cli._run_init([])