loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,1040 @@
1
+ """Stage 3 fan-out behavior for ``loghunter digest`` — schema-agnostic tests.
2
+
3
+ The per-schema digest test files own single-path CLI routing; this file owns
4
+ the cross-schema fan-out contract: N positionals digested independently,
5
+ per-path outcomes (rendered / empty / error) tallied to a three-way exit
6
+ code, and a shared ``--out`` target receiving concatenated cards.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import io
12
+ import sys
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import pytest
17
+
18
+ import loghunter.cli as cli
19
+ import loghunter.runner as runner
20
+
21
+
22
+ # ─── Fixtures — single representative line per schema ───────────────────────
23
+
24
+ _ZEEK_NDJSON_CONN_LINE = (
25
+ '{"ts": 1779750000.0, "id.orig_h": "192.0.2.10", "id.resp_h": "198.51.100.20",'
26
+ ' "id.resp_p": 443, "proto": "tcp", "duration": 1.23}\n'
27
+ )
28
+ _ZEEK_DNS_NDJSON_LINE = (
29
+ '{"ts": 1779750000.0, "id.orig_h": "192.0.2.10", "query": "example.test"}\n'
30
+ )
31
+ _PIHOLE_LINE = (
32
+ "Jun 1 12:00:00 piholehost dnsmasq[123]: query[A] example.test from 192.0.2.10\n"
33
+ )
34
+ _SYSLOG_LINE = (
35
+ "<13>Jun 1 12:00:00 examplehost sshd[1234]: Accepted publickey for placeholder\n"
36
+ )
37
+ _BLOB_LINE = (
38
+ "totally-unrecognized-application-banner xyzzy 42 frobnicate\n"
39
+ )
40
+
41
+
42
+ def _stub_config(monkeypatch, cfg_dict: dict | None = None) -> None:
43
+ monkeypatch.setattr(cli.cfg, "load", lambda _p: cfg_dict or {"loghunter": {}})
44
+
45
+
46
+ def _spy_run_digest_calls(monkeypatch) -> list[dict[str, Any]]:
47
+ """Replace runner.run_digest with a spy that records every call's kwargs."""
48
+ calls: list[dict[str, Any]] = []
49
+ monkeypatch.setattr(runner, "run_digest", lambda **kwargs: calls.append(kwargs))
50
+ return calls
51
+
52
+
53
+ # ─── Fan-out: multiple positionals digested in order ────────────────────────
54
+
55
+
56
+ def test_digest_three_mixed_positionals_render_in_argv_order(
57
+ tmp_path: Path, monkeypatch,
58
+ ) -> None:
59
+ """Three positionals of mixed formats → three run_digest calls, each
60
+ routed to the source-dir kwarg matching its sniffed schema."""
61
+ _stub_config(monkeypatch)
62
+ calls = _spy_run_digest_calls(monkeypatch)
63
+
64
+ conn = tmp_path / "conn.log"
65
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
66
+ dns = tmp_path / "dns.log"
67
+ dns.write_text(_ZEEK_DNS_NDJSON_LINE, encoding="utf-8")
68
+ syslog = tmp_path / "syslog.log"
69
+ syslog.write_text(_SYSLOG_LINE, encoding="utf-8")
70
+
71
+ rc = cli._main(["digest", str(conn), str(dns), str(syslog)])
72
+
73
+ assert rc == 0
74
+ assert [c["schema"] for c in calls] == ["conn", "dns", "syslog"]
75
+ assert calls[0]["zeek_dir"] == str(conn)
76
+ assert calls[1]["zeek_dir"] == str(dns)
77
+ assert calls[2]["syslog_dir"] == str(syslog)
78
+
79
+
80
+ def test_digest_pihole_positional_routes_to_pihole_dir_in_fanout(
81
+ tmp_path: Path, monkeypatch,
82
+ ) -> None:
83
+ """A dnsmasq/Pi-hole line in a fan-out gets the ``pihole_dir`` route, not
84
+ ``zeek_dir`` — Stage 1/2 origin distinction survives the loop."""
85
+ _stub_config(monkeypatch)
86
+ calls = _spy_run_digest_calls(monkeypatch)
87
+
88
+ zeek_dns = tmp_path / "zeek_dns.log"
89
+ zeek_dns.write_text(_ZEEK_DNS_NDJSON_LINE, encoding="utf-8")
90
+ pihole = tmp_path / "pihole.log"
91
+ pihole.write_text(_PIHOLE_LINE, encoding="utf-8")
92
+
93
+ rc = cli._main(["digest", str(zeek_dns), str(pihole)])
94
+
95
+ assert rc == 0
96
+ assert len(calls) == 2
97
+ assert calls[0]["schema"] == "dns" and calls[0]["zeek_dir"] == str(zeek_dns)
98
+ assert calls[1]["schema"] == "dns" and calls[1]["pihole_dir"] == str(pihole)
99
+
100
+
101
+ # ─── Three-way exit policy ───────────────────────────────────────────────────
102
+
103
+
104
+ def test_digest_mixed_valid_empty_missing_renders_and_exits_zero(
105
+ tmp_path: Path, monkeypatch, capsys,
106
+ ) -> None:
107
+ """1 valid + 1 empty + 1 missing → valid card renders, empty prints its
108
+ line on stdout, missing prints its error on stderr, exit 0 (≥1 rendered)."""
109
+ _stub_config(monkeypatch)
110
+ calls = _spy_run_digest_calls(monkeypatch)
111
+
112
+ conn = tmp_path / "conn.log"
113
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
114
+ empty = tmp_path / "empty.log"
115
+ empty.write_text("", encoding="utf-8")
116
+ missing = tmp_path / "missing.log" # never created
117
+
118
+ rc = cli._main(["digest", str(conn), str(empty), str(missing)])
119
+
120
+ captured = capsys.readouterr()
121
+ assert rc == 0
122
+ assert len(calls) == 1
123
+ assert calls[0]["schema"] == "conn"
124
+ assert "empty.log is empty. Nothing to do!" in captured.out
125
+ assert "digest: path not found" in captured.err
126
+
127
+
128
+ def test_digest_all_empty_exits_zero(
129
+ tmp_path: Path, monkeypatch, capsys,
130
+ ) -> None:
131
+ """2 empty files, no valid, no missing → both "Nothing to do!" lines,
132
+ exit 0 (empty is not a failure)."""
133
+ _stub_config(monkeypatch)
134
+ calls = _spy_run_digest_calls(monkeypatch)
135
+
136
+ a = tmp_path / "a.log"
137
+ a.write_text("", encoding="utf-8")
138
+ b = tmp_path / "b.log"
139
+ b.write_text("", encoding="utf-8")
140
+
141
+ rc = cli._main(["digest", str(a), str(b)])
142
+
143
+ captured = capsys.readouterr()
144
+ assert rc == 0
145
+ assert calls == []
146
+ assert captured.out.count("Nothing to do!") == 2
147
+
148
+
149
+ def test_digest_all_error_exits_nonzero(
150
+ tmp_path: Path, monkeypatch, capsys,
151
+ ) -> None:
152
+ """Missing path in a multi-path fan-out → error on stderr, exit 1.
153
+
154
+ Note: a directory positional in multi-path is silently skipped — see
155
+ test_digest_multipath_directory_is_silently_skipped. This test isolates
156
+ the missing-path error path, which retains its stderr message.
157
+ """
158
+ _stub_config(monkeypatch)
159
+ calls = _spy_run_digest_calls(monkeypatch)
160
+
161
+ other = tmp_path / "also_missing.log"
162
+
163
+ rc = cli._main(["digest", "/no/such/file.log", str(other)])
164
+
165
+ captured = capsys.readouterr()
166
+ assert rc == 1
167
+ assert calls == []
168
+ assert "path not found" in captured.err
169
+
170
+
171
+ def test_digest_mixed_empty_and_error_no_render_exits_nonzero(
172
+ tmp_path: Path, monkeypatch, capsys,
173
+ ) -> None:
174
+ """Mixed empty + error, no card rendered → exit 1 (a real error is present)."""
175
+ _stub_config(monkeypatch)
176
+ _spy_run_digest_calls(monkeypatch)
177
+
178
+ empty = tmp_path / "e.log"
179
+ empty.write_text("", encoding="utf-8")
180
+
181
+ rc = cli._main(["digest", str(empty), "/no/such/file.log"])
182
+
183
+ captured = capsys.readouterr()
184
+ assert rc == 1
185
+ assert "Nothing to do!" in captured.out
186
+ assert "path not found" in captured.err
187
+
188
+
189
+ # ─── Directory positionals: silent skip in fan-out, error on lone ───────────
190
+ #
191
+ # A directory positional in shell-expanded multi-path fan-out should not
192
+ # interleave error noise between cards. The v1 contract for a lone-directory
193
+ # positional (single positional, hits a directory) stays — actionable stderr
194
+ # message and exit 1. Other per-path errors (missing path, sniff failure)
195
+ # continue to surface in fan-out — only directories get the silent treatment.
196
+
197
+
198
+ def test_digest_multipath_directory_is_silently_skipped(
199
+ tmp_path: Path, monkeypatch, capsys,
200
+ ) -> None:
201
+ """Multi-path fan-out: a directory positional is silently skipped — no
202
+ stderr noise, no error tally, sibling files still render."""
203
+ _stub_config(monkeypatch)
204
+ calls = _spy_run_digest_calls(monkeypatch)
205
+
206
+ conn = tmp_path / "conn.log"
207
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
208
+ a_dir = tmp_path / "subdir"
209
+ a_dir.mkdir()
210
+
211
+ rc = cli._main(["digest", str(conn), str(a_dir)])
212
+
213
+ captured = capsys.readouterr()
214
+ assert rc == 0 # ≥1 rendered
215
+ assert len(calls) == 1 # only the conn file routed
216
+ # No directory noise on stderr — that's the point.
217
+ assert "must be a file, not a directory" not in captured.err
218
+ assert str(a_dir) not in captured.err
219
+
220
+
221
+ def test_digest_multipath_all_directories_exits_zero_silently(
222
+ tmp_path: Path, monkeypatch, capsys,
223
+ ) -> None:
224
+ """Multi-path fan-out where every positional is a directory: no output to
225
+ stdout or stderr, exit 0 (consistent with 'silent skip directories').
226
+
227
+ rendered=0, errored=0 → exit-code policy returns 0 via the
228
+ ``errored == 0`` branch in cli.py.
229
+ """
230
+ _stub_config(monkeypatch)
231
+ calls = _spy_run_digest_calls(monkeypatch)
232
+
233
+ d1 = tmp_path / "a"; d1.mkdir()
234
+ d2 = tmp_path / "b"; d2.mkdir()
235
+ d3 = tmp_path / "c"; d3.mkdir()
236
+
237
+ rc = cli._main(["digest", str(d1), str(d2), str(d3)])
238
+
239
+ captured = capsys.readouterr()
240
+ assert rc == 0
241
+ assert calls == []
242
+ assert captured.out == ""
243
+ assert captured.err == ""
244
+
245
+
246
+ def test_digest_lone_directory_positional_still_errors(
247
+ tmp_path: Path, monkeypatch, capsys,
248
+ ) -> None:
249
+ """Single positional that is a directory: v1 contract preserved —
250
+ actionable stderr message and exit 1. Whole-directory positionals are
251
+ not supported in v1."""
252
+ _stub_config(monkeypatch)
253
+ _spy_run_digest_calls(monkeypatch)
254
+
255
+ a_dir = tmp_path / "logs"
256
+ a_dir.mkdir()
257
+
258
+ rc = cli._main(["digest", str(a_dir)])
259
+
260
+ captured = capsys.readouterr()
261
+ assert rc == 1
262
+ assert "must be a file, not a directory" in captured.err
263
+ assert str(a_dir) in captured.err
264
+
265
+
266
+ def test_digest_multipath_non_directory_errors_still_surface(
267
+ tmp_path: Path, monkeypatch, capsys,
268
+ ) -> None:
269
+ """Silent-skip applies ONLY to directories — a missing-path positional in
270
+ fan-out still produces its stderr message and tallies as an error."""
271
+ _stub_config(monkeypatch)
272
+ _spy_run_digest_calls(monkeypatch)
273
+
274
+ conn = tmp_path / "conn.log"
275
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
276
+
277
+ rc = cli._main(["digest", str(conn), "/no/such/file.log"])
278
+
279
+ captured = capsys.readouterr()
280
+ assert rc == 0 # ≥1 rendered (conn)
281
+ assert "path not found" in captured.err
282
+
283
+
284
+ # ─── Real-route regression: notice-shape pathless NDJSON → blob ─────────────
285
+ #
286
+ # Reproduces the original incident: a notice.log-shaped pathless Zeek NDJSON
287
+ # (id.orig_h plus native src) historically reached the conn summariser via
288
+ # the field-set fallback and crashed it with the Grouper-not-1-dimensional
289
+ # pandas error. The collision guard now rejects the false claim at sniff,
290
+ # the orchestrator drops to the blob floor, and the real summariser is
291
+ # never invoked — so the defence-in-depth net never fires either.
292
+ # Unmocked end-to-end: this test fails if a future change accidentally
293
+ # bypasses the guard, even if the recognizer unit tests still pass.
294
+
295
+
296
+ def test_digest_notice_no_path_routes_to_blob_with_no_breadcrumb(
297
+ tmp_path: Path, monkeypatch, capsys,
298
+ ) -> None:
299
+ _stub_config(monkeypatch)
300
+
301
+ notice = tmp_path / "notice.log"
302
+ notice.write_text(
303
+ '{"ts": 1779750000.0, "uid": "Cxxxxxx",'
304
+ ' "id.orig_h": "192.0.2.10", "id.orig_p": 41514,'
305
+ ' "id.resp_h": "198.51.100.20", "id.resp_p": 443, "proto": "tcp",'
306
+ ' "src": "192.0.2.10", "dst": "198.51.100.20",'
307
+ ' "note": "Placeholder::Note", "msg": "placeholder message"}\n',
308
+ encoding="utf-8",
309
+ )
310
+
311
+ rc = cli._main(["digest", str(notice)])
312
+
313
+ captured = capsys.readouterr()
314
+ assert rc == 0
315
+ # Blob card rendered to stdout — flat-grammar headline + identity line
316
+ # carries the source name.
317
+ assert "Unrecognized source" in captured.out
318
+ assert "notice.log" in captured.out
319
+ # Stderr silent on the defence-in-depth path — the guard prevents the
320
+ # summariser from ever being called, so there is no breadcrumb, no
321
+ # raw pandas error text, no traceback.
322
+ assert "summariser failed" not in captured.err
323
+ assert "Grouper for 'src'" not in captured.err
324
+ assert "ValueError" not in captured.err
325
+ assert "Traceback" not in captured.err
326
+
327
+
328
+ # ─── Blob fallback on summariser raise (item 2) ─────────────────────────────
329
+ #
330
+ # Defence-in-depth for a recognised-schema summariser raising on a pathological
331
+ # frame (e.g. duplicate `src` column → pandas Grouper failure). The narrow
332
+ # try/except in run_digest catches Exception (NOT BaseException), is silent on
333
+ # stderr by default and emits a one-line breadcrumb under --verbose, and
334
+ # always falls back to a blob card for THE SAME file on THE SAME stream.
335
+ # Sibling fan-out iterations continue to render.
336
+
337
+
338
+ def test_digest_summariser_failure_falls_back_to_blob(
339
+ tmp_path: Path, monkeypatch, capsys,
340
+ ) -> None:
341
+ """A summariser that raises on a recognised conn file produces a blob
342
+ card on the supplied stream. Default mode is SILENT on stderr — the
343
+ breadcrumb is verbose-gated so raw exception text never leaks to the
344
+ operator. No traceback, no abort.
345
+
346
+ Coverage strategy: monkeypatch ``loghunter.digest.get_summarizer`` to
347
+ return a callable that raises a synthetic exception. This exercises
348
+ the narrow wrap without contorting a physical fixture into a duplicate-
349
+ column / pathological-schema state — same coverage, smaller blast
350
+ radius."""
351
+ _stub_config(monkeypatch)
352
+
353
+ # A real conn NDJSON file — sniff routes to conn, loader succeeds, and
354
+ # the summariser is the only thing that fails.
355
+ conn = tmp_path / "conn.log"
356
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
357
+
358
+ def _exploding_summarizer(_schema_name: str):
359
+ def _raise(*_a, **_kw):
360
+ raise RuntimeError("induced summariser failure")
361
+ return _raise
362
+
363
+ monkeypatch.setattr(
364
+ "loghunter.digest.get_summarizer", _exploding_summarizer,
365
+ )
366
+
367
+ rc = cli._main(["digest", str(conn)])
368
+
369
+ captured = capsys.readouterr()
370
+ assert rc == 0 # blob card counted as a render
371
+ # Default mode: NO breadcrumb, no raw exception text on stderr.
372
+ assert "summariser failed" not in captured.err
373
+ assert "RuntimeError: induced summariser failure" not in captured.err
374
+ # No traceback in either mode — the rail forbids raw exceptions
375
+ # reaching the user.
376
+ assert "Traceback" not in captured.err
377
+ # Blob card rendered to stdout: flat-grammar headline + identity line.
378
+ assert "Unrecognized source" in captured.out
379
+ assert "conn.log" in captured.out # identity line carries the source name
380
+
381
+
382
+ def test_digest_summariser_failure_breadcrumb_shown_under_verbose(
383
+ tmp_path: Path, monkeypatch, capsys,
384
+ ) -> None:
385
+ """Same defence-in-depth path as above, but invoked with --verbose:
386
+ the breadcrumb IS visible on stderr (debug aid). Blob card still
387
+ renders; no traceback in either mode."""
388
+ _stub_config(monkeypatch)
389
+
390
+ conn = tmp_path / "conn.log"
391
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
392
+
393
+ def _exploding_summarizer(_schema_name: str):
394
+ def _raise(*_a, **_kw):
395
+ raise RuntimeError("induced summariser failure")
396
+ return _raise
397
+
398
+ monkeypatch.setattr(
399
+ "loghunter.digest.get_summarizer", _exploding_summarizer,
400
+ )
401
+
402
+ rc = cli._main(["digest", "--verbose", str(conn)])
403
+
404
+ captured = capsys.readouterr()
405
+ assert rc == 0
406
+ # Verbose: the existing defence-in-depth breadcrumb is visible.
407
+ assert "summariser failed" in captured.err
408
+ assert "RuntimeError: induced summariser failure" in captured.err
409
+ assert "conn.log" in captured.err
410
+ # Still no traceback — verbose adds the breadcrumb, not a stack.
411
+ assert "Traceback" not in captured.err
412
+ # Blob card still renders.
413
+ assert "Unrecognized source" in captured.out
414
+ assert "conn.log" in captured.out
415
+
416
+
417
+ def test_digest_summariser_failure_does_not_abort_sibling_paths(
418
+ tmp_path: Path, monkeypatch, capsys,
419
+ ) -> None:
420
+ """In a multi-positional fan-out, a summariser raise on one path falls
421
+ back to a blob card AND lets subsequent paths render their cards.
422
+ Tests that the narrow wrap + blob fallback is a per-path concern, not
423
+ a fan-out abort."""
424
+ _stub_config(monkeypatch)
425
+
426
+ # Two real files, both routed to the conn schema by sniff.
427
+ a = tmp_path / "a.log"
428
+ a.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
429
+ b = tmp_path / "b.log"
430
+ b.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
431
+
432
+ # The summariser raises on the FIRST run only — second call succeeds.
433
+ # We monkeypatch get_summarizer to wrap the real one with a one-shot
434
+ # raise so we exercise the actual schema summariser thereafter.
435
+ from loghunter import digest as _digest_pkg
436
+ real_get = _digest_pkg.get_summarizer
437
+ call_n = {"n": 0}
438
+
439
+ def _flaky_get(schema_name: str):
440
+ def _wrap(*a, **kw):
441
+ call_n["n"] += 1
442
+ if call_n["n"] == 1:
443
+ raise RuntimeError("induced summariser failure")
444
+ return real_get(schema_name)(*a, **kw)
445
+ return _wrap
446
+
447
+ monkeypatch.setattr(
448
+ "loghunter.digest.get_summarizer", _flaky_get,
449
+ )
450
+
451
+ rc = cli._main(["digest", str(a), str(b)])
452
+
453
+ captured = capsys.readouterr()
454
+ assert rc == 0
455
+ # First file falls back silently (breadcrumb is verbose-gated) — no
456
+ # raw exception text on stderr in default mode.
457
+ assert "summariser failed" not in captured.err
458
+ assert "Traceback" not in captured.err
459
+ # Second file: a real conn card renders (identity line carries "conn ·").
460
+ assert "conn ·" in captured.out
461
+ # First file rendered a blob card as well — its headline is present.
462
+ assert "Unrecognized source" in captured.out
463
+
464
+
465
+ def test_digest_runner_value_error_does_not_abort_loop(
466
+ tmp_path: Path, monkeypatch, capsys,
467
+ ) -> None:
468
+ """A ValueError raised inside run_digest for one path is caught and
469
+ tallied; subsequent valid paths still render."""
470
+ _stub_config(monkeypatch)
471
+
472
+ calls: list[Path] = []
473
+
474
+ def flaky_run_digest(**kwargs):
475
+ # First call (conn) raises; second call (dns) succeeds.
476
+ called_for = kwargs.get("zeek_dir")
477
+ calls.append(called_for)
478
+ if len(calls) == 1:
479
+ raise ValueError("induced parser failure")
480
+
481
+ monkeypatch.setattr(runner, "run_digest", flaky_run_digest)
482
+
483
+ conn = tmp_path / "conn.log"
484
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
485
+ dns = tmp_path / "dns.log"
486
+ dns.write_text(_ZEEK_DNS_NDJSON_LINE, encoding="utf-8")
487
+
488
+ rc = cli._main(["digest", str(conn), str(dns)])
489
+
490
+ captured = capsys.readouterr()
491
+ assert rc == 0 # ≥1 rendered (the dns path)
492
+ assert len(calls) == 2
493
+ assert "induced parser failure" in captured.err
494
+
495
+
496
+ # ─── Shared --out concatenation ──────────────────────────────────────────────
497
+
498
+
499
+ def test_digest_out_directory_writes_single_timestamped_file(
500
+ tmp_path: Path, monkeypatch,
501
+ ) -> None:
502
+ """N valid paths with --out=<dir>/ → exactly one file digest_<ts>.txt in
503
+ the directory, populated by all run_digest streams in argv order."""
504
+ _stub_config(monkeypatch)
505
+
506
+ streams_received: list[Any] = []
507
+
508
+ def fake_run_digest(**kwargs):
509
+ # Simulate render: write a schema tag to the provided stream.
510
+ stream = kwargs.get("stream")
511
+ streams_received.append(stream)
512
+ stream.write(f"[card {kwargs['schema']}]\n")
513
+
514
+ monkeypatch.setattr(runner, "run_digest", fake_run_digest)
515
+
516
+ conn = tmp_path / "conn.log"
517
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
518
+ syslog = tmp_path / "sl.log"
519
+ syslog.write_text(_SYSLOG_LINE, encoding="utf-8")
520
+ out_dir = tmp_path / "out"
521
+
522
+ rc = cli._main(["digest", str(conn), str(syslog), f"--out={out_dir}/"])
523
+
524
+ assert rc == 0
525
+ files = sorted(out_dir.iterdir())
526
+ assert len(files) == 1
527
+ assert files[0].name.startswith("digest_") and files[0].suffix == ".txt"
528
+ # No path-derived stem: the file name has no input basename embedded.
529
+ assert "conn" not in files[0].name and "sl" not in files[0].name
530
+ # Both calls wrote into the same TextIO.
531
+ assert streams_received[0] is streams_received[1]
532
+ body = files[0].read_text(encoding="utf-8")
533
+ assert body == "[card conn]\n[card syslog]\n"
534
+
535
+
536
+ def test_digest_out_explicit_file_honors_path(
537
+ tmp_path: Path, monkeypatch,
538
+ ) -> None:
539
+ """`--out=<explicit-file>` with N paths → that exact file, all cards."""
540
+ _stub_config(monkeypatch)
541
+
542
+ def fake_run_digest(**kwargs):
543
+ kwargs["stream"].write(f"[card {kwargs['schema']}]\n")
544
+
545
+ monkeypatch.setattr(runner, "run_digest", fake_run_digest)
546
+
547
+ conn = tmp_path / "conn.log"
548
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
549
+ dns = tmp_path / "dns.log"
550
+ dns.write_text(_ZEEK_DNS_NDJSON_LINE, encoding="utf-8")
551
+ explicit = tmp_path / "my_report.txt"
552
+
553
+ rc = cli._main(["digest", str(conn), str(dns), f"--out={explicit}"])
554
+
555
+ assert rc == 0
556
+ assert explicit.read_text(encoding="utf-8") == "[card conn]\n[card dns]\n"
557
+
558
+
559
+ def test_digest_single_positional_with_out_directory_uses_same_naming(
560
+ tmp_path: Path, monkeypatch,
561
+ ) -> None:
562
+ """N=1 with `--out=<dir>/` uses digest_<ts>.txt — no special case."""
563
+ _stub_config(monkeypatch)
564
+ monkeypatch.setattr(
565
+ runner, "run_digest",
566
+ lambda **kw: kw["stream"].write(f"[card {kw['schema']}]\n"),
567
+ )
568
+
569
+ conn = tmp_path / "conn.log"
570
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
571
+ out_dir = tmp_path / "out"
572
+
573
+ rc = cli._main(["digest", str(conn), f"--out={out_dir}/"])
574
+
575
+ assert rc == 0
576
+ files = sorted(out_dir.iterdir())
577
+ assert len(files) == 1 and files[0].name.startswith("digest_")
578
+ assert files[0].read_text(encoding="utf-8") == "[card conn]\n"
579
+
580
+
581
+ # ─── Lazy stream — no file created when nothing renders ─────────────────────
582
+
583
+
584
+ def test_digest_out_directory_with_all_empty_creates_no_file(
585
+ tmp_path: Path, monkeypatch,
586
+ ) -> None:
587
+ """All-empty fan-out with --out=<dir>/ → no file is created (lazy open
588
+ proof)."""
589
+ _stub_config(monkeypatch)
590
+ _spy_run_digest_calls(monkeypatch)
591
+
592
+ a = tmp_path / "a.log"
593
+ a.write_text("", encoding="utf-8")
594
+ b = tmp_path / "b.log"
595
+ b.write_text("", encoding="utf-8")
596
+ out_dir = tmp_path / "out"
597
+
598
+ rc = cli._main(["digest", str(a), str(b), f"--out={out_dir}/"])
599
+
600
+ assert rc == 0
601
+ assert not out_dir.exists() or list(out_dir.iterdir()) == []
602
+
603
+
604
+ def test_digest_out_directory_with_all_error_creates_no_file(
605
+ tmp_path: Path, monkeypatch,
606
+ ) -> None:
607
+ """All-error fan-out with --out=<dir>/ → no file is created and exit 1."""
608
+ _stub_config(monkeypatch)
609
+ _spy_run_digest_calls(monkeypatch)
610
+
611
+ out_dir = tmp_path / "out"
612
+
613
+ rc = cli._main(["digest", "/no/such.log", "/also/missing.log", f"--out={out_dir}/"])
614
+
615
+ assert rc == 1
616
+ assert not out_dir.exists() or list(out_dir.iterdir()) == []
617
+
618
+
619
+ # ─── Dry-run sidesteps --out ────────────────────────────────────────────────
620
+
621
+
622
+ def test_digest_dry_run_with_out_creates_no_file(
623
+ tmp_path: Path, monkeypatch, capsys,
624
+ ) -> None:
625
+ """`digest *.log --dry-run --out=<dir>/` → no file materialises."""
626
+ _stub_config(monkeypatch)
627
+
628
+ # run_digest's dry-run branch must NOT receive an opened file stream.
629
+ # We let the real runner.run_digest run with dry_run=True so its early
630
+ # return is exercised, but spy on what stream= it was handed.
631
+ seen_streams: list[Any] = []
632
+
633
+ def fake_run_digest(**kwargs):
634
+ seen_streams.append(kwargs.get("stream"))
635
+ # dry-run never opens the handler in real runner.run_digest; mimic.
636
+
637
+ monkeypatch.setattr(runner, "run_digest", fake_run_digest)
638
+
639
+ conn = tmp_path / "conn.log"
640
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
641
+ out_dir = tmp_path / "out"
642
+
643
+ rc = cli._main(["digest", str(conn), "--dry-run", f"--out={out_dir}/"])
644
+
645
+ assert rc == 0
646
+ # Dry-run → get_stream() returned sys.stdout (or None per design); MUST
647
+ # not have opened a file in out_dir.
648
+ assert not out_dir.exists() or list(out_dir.iterdir()) == []
649
+ # Stream handed in is stdout (dry-run helper returns sys.stdout); never a
650
+ # file we opened.
651
+ assert seen_streams[0] is sys.stdout
652
+
653
+
654
+ # ─── Bare ``digest`` (no positional) still uses config-driven flow ──────────
655
+
656
+
657
+ def test_digest_bare_no_positional_resolves_output_via_kwargs(
658
+ tmp_path: Path, monkeypatch,
659
+ ) -> None:
660
+ """Bare ``digest`` (no positional) is the config-driven path — output is
661
+ resolved by _digest_runner_kwargs (not the fan-out stream helper). The
662
+ runner call sees output_dir / output_file populated as today."""
663
+ cfg_zeek = tmp_path / "zeek"
664
+ cfg_zeek.mkdir()
665
+ out_dir = tmp_path / "report"
666
+ _stub_config(monkeypatch, {"loghunter": {"zeek_dir": str(cfg_zeek)}})
667
+
668
+ captured: dict[str, Any] = {}
669
+ monkeypatch.setattr(runner, "run_digest", lambda **kw: captured.update(kw))
670
+
671
+ rc = cli._main(["digest", f"--out={out_dir}/"])
672
+ assert rc == 0
673
+ # Bare digest still resolves output the old way.
674
+ assert captured.get("output_dir") == out_dir
675
+ assert captured.get("stream") is None # CLI never threads a stream here
676
+ assert captured.get("schema") == "conn"
677
+
678
+
679
+ # ─── Detect-path regression: parsed["paths"] does not bleed into detector ──
680
+
681
+
682
+ def test_detect_path_unaffected_by_new_paths_key(
683
+ tmp_path: Path, monkeypatch,
684
+ ) -> None:
685
+ """A detector invocation with a positional still routes through
686
+ parsed["path"] only; the new parsed["paths"] key is irrelevant."""
687
+ _stub_config(monkeypatch)
688
+
689
+ captured: dict[str, Any] = {}
690
+ monkeypatch.setattr(runner, "run", lambda **kwargs: captured.update(kwargs))
691
+
692
+ log_path = tmp_path / "conn.log"
693
+ log_path.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
694
+ cli._main(["beacon", str(log_path)])
695
+
696
+ # Detector routes the positional to its required source key (zeek_dir).
697
+ assert captured.get("zeek_dir") == str(log_path)
698
+ assert captured.get("detect") == "beacon"
699
+
700
+
701
+ # ─── Source-dir flags rejected in fan-out ──────────────────────────────────
702
+
703
+
704
+ def test_digest_source_dir_flag_rejected_with_positional(
705
+ tmp_path: Path, monkeypatch,
706
+ ) -> None:
707
+ """Source-dir flags are meaningless in fan-out — rejected up front.
708
+
709
+ --zeek-dir remains an advertised digest flag (useful for bare
710
+ config-driven conn digest), so with a positional present it hits the
711
+ positional-guard 'not valid alongside' error. The other three
712
+ (--pihole-dir, --syslog-dir, --cloudtrail-dir) are not in the digest
713
+ allowed set under the spec-driven parser, and raise the spec's
714
+ wrong-verb error ('is not valid for digest'). Either way the
715
+ combination is rejected."""
716
+ _stub_config(monkeypatch)
717
+ log_path = tmp_path / "conn.log"
718
+ log_path.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
719
+ with pytest.raises(ValueError, match="--zeek-dir is not valid alongside"):
720
+ cli._main(["digest", str(log_path), "--zeek-dir=/x"])
721
+ for pruned in ("--pihole-dir", "--syslog-dir", "--cloudtrail-dir"):
722
+ with pytest.raises(ValueError, match=f"{pruned} is not valid for digest"):
723
+ cli._main(["digest", str(log_path), f"{pruned}=/x"])
724
+
725
+
726
+ def test_digest_unrecognized_single_file_still_routes_to_blob(
727
+ tmp_path: Path, monkeypatch,
728
+ ) -> None:
729
+ """The blob route lives at the CLI sniff layer, NOT inside run_digest.
730
+ A single-file Zeek bypass in run_digest must not introduce a new path
731
+ around that floor: unrecognized / garbage content must still sniff to
732
+ ``schema="blob"`` and reach run_digest via ``blob_path``."""
733
+ _stub_config(monkeypatch)
734
+ calls = _spy_run_digest_calls(monkeypatch)
735
+ garbage = tmp_path / "garbage.dat"
736
+ garbage.write_text(_BLOB_LINE, encoding="utf-8")
737
+
738
+ rc = cli._main(["digest", str(garbage)])
739
+
740
+ assert rc == 0
741
+ assert len(calls) == 1
742
+ assert calls[0]["schema"] == "blob"
743
+ assert calls[0]["blob_path"] == garbage
744
+
745
+
746
+ def test_digest_pruned_source_dir_flags_rejected_without_positional(
747
+ monkeypatch,
748
+ ) -> None:
749
+ """Without a positional, the pruned source-dir flags should also fail
750
+ with the spec-driven wrong-verb error — these flags are not in digest's
751
+ allowed set (schema is always conn with no positional, so only
752
+ --zeek-dir is meaningful). Locks the allowed-set asymmetry."""
753
+ _stub_config(monkeypatch)
754
+ for pruned in ("--pihole-dir", "--syslog-dir", "--cloudtrail-dir"):
755
+ with pytest.raises(ValueError, match=f"{pruned} is not valid for digest"):
756
+ cli._main(["digest", f"{pruned}=/x"])
757
+
758
+
759
+ # ─── Zeek syslog.log v1 promotion — fan-out routing + kwargs xor ladder ─────
760
+
761
+ _ZEEK_NDJSON_SYSLOG_LINE = (
762
+ '{"_path":"syslog","ts":1779750000.0,"uid":"CSL01",'
763
+ '"id.orig_h":"192.0.2.10","id.resp_h":"198.51.100.20","id.resp_p":514,'
764
+ '"proto":"udp","facility":"DAEMON","severity":"INFO",'
765
+ '"message":"Jun 11 12:00:00 host1 sshd[1234]: ok"}\n'
766
+ )
767
+
768
+
769
+ def test_digest_zeek_syslog_positional_routes_to_zeek_dir(
770
+ tmp_path: Path, monkeypatch,
771
+ ) -> None:
772
+ """A sniffed Zeek `syslog.log` positional (origin "zeek") synthesises
773
+ zeek_dir via _route_sniffed_path's new syslog origin split — mirrors the
774
+ dns origin split for Zeek vs Pi-hole."""
775
+ _stub_config(monkeypatch)
776
+ calls = _spy_run_digest_calls(monkeypatch)
777
+
778
+ zeek_syslog = tmp_path / "syslog.log"
779
+ zeek_syslog.write_text(_ZEEK_NDJSON_SYSLOG_LINE, encoding="utf-8")
780
+
781
+ rc = cli._main(["digest", str(zeek_syslog)])
782
+
783
+ assert rc == 0
784
+ assert len(calls) == 1
785
+ assert calls[0]["schema"] == "syslog"
786
+ assert calls[0]["zeek_dir"] == str(zeek_syslog)
787
+ assert calls[0]["syslog_dir"] is None
788
+
789
+
790
+ def test_digest_flat_syslog_positional_still_routes_to_syslog_dir(
791
+ tmp_path: Path, monkeypatch,
792
+ ) -> None:
793
+ """Flat rsyslog (origin "syslog") continues to synthesise syslog_dir —
794
+ the origin split must not regress the historical path."""
795
+ _stub_config(monkeypatch)
796
+ calls = _spy_run_digest_calls(monkeypatch)
797
+
798
+ flat = tmp_path / "syslog"
799
+ flat.write_text(_SYSLOG_LINE, encoding="utf-8")
800
+
801
+ rc = cli._main(["digest", str(flat)])
802
+
803
+ assert rc == 0
804
+ assert len(calls) == 1
805
+ assert calls[0]["schema"] == "syslog"
806
+ assert calls[0]["syslog_dir"] == str(flat)
807
+ assert calls[0]["zeek_dir"] is None
808
+
809
+
810
+ # The three CLI-layer ``_digest_runner_kwargs`` ladder tests were DELETED.
811
+ # After the single-ownership refactor, ``_digest_runner_kwargs`` does NOT
812
+ # resolve source dirs — it passes raw strings (or None) to ``run_digest``,
813
+ # which calls ``common.sources.resolve_digest_source``. The ladder + XOR +
814
+ # config-preference logic now lives there:
815
+ #
816
+ # syslog_dir > zeek_dir fallback → tests/test_sources.py:
817
+ # test_digest_syslog_syslog_preference_on_config_fallback
818
+ # test_digest_syslog_zeek_when_only_zeek_configured
819
+ # syslog XOR (zeek_dir + syslog_dir) → tests/test_sources.py:
820
+ # test_digest_syslog_xor_byte_preserved
821
+
822
+
823
+ def test_digest_zeek_syslog_without_path_renders_syslog_card(
824
+ tmp_path: Path, monkeypatch,
825
+ ) -> None:
826
+ """P1 regression: a Zeek-NDJSON syslog.log without `_path` must sniff to
827
+ `schema="syslog", origin="zeek"` and route to zeek_dir, NOT fall into the
828
+ conn fallback. Pre-fix this rendered a conn card (or crashed) instead of
829
+ the syslog card."""
830
+ _stub_config(monkeypatch)
831
+ calls = _spy_run_digest_calls(monkeypatch)
832
+
833
+ no_path = tmp_path / "syslog.log"
834
+ no_path.write_text(
835
+ '{"ts":1779750000.0,"uid":"CSL01",'
836
+ '"id.orig_h":"192.0.2.10","id.orig_p":41514,'
837
+ '"id.resp_h":"198.51.100.20","id.resp_p":514,'
838
+ '"proto":"udp","facility":"DAEMON","severity":"INFO",'
839
+ '"message":"Jun 11 12:00:00 host1 sshd[1234]: placeholder"}\n',
840
+ encoding="utf-8",
841
+ )
842
+
843
+ rc = cli._main(["digest", str(no_path)])
844
+
845
+ assert rc == 0
846
+ assert len(calls) == 1
847
+ assert calls[0]["schema"] == "syslog"
848
+ assert calls[0]["zeek_dir"] == str(no_path)
849
+ assert calls[0]["syslog_dir"] is None
850
+
851
+
852
+ # ─── Inter-card separator matrix ────────────────────────────────────────────
853
+ #
854
+ # A 40-col "─" * 40 rule separates adjacent RENDERED cards on a multi-card
855
+ # run; single-card runs draw no rule at all. Render-commit placement:
856
+ # `run_digest` / `_render_blob_for_path` emit the rule immediately before
857
+ # `handler.render_*(card)`, so a separator only ever precedes a card that
858
+ # reaches its render call. Skipped/empty/errored paths never trigger a rule.
859
+
860
+ _INTER_CARD_RULE = "─" * 40
861
+ _ZEEK_NDJSON_DNS_LINE = (
862
+ '{"_path": "dns", "ts": 1779750000.0, "id.orig_h": "192.0.2.10",'
863
+ ' "id.resp_h": "198.51.100.20", "id.resp_p": 53, "proto": "udp",'
864
+ ' "query": "example.test", "qtype": 1, "rcode": 0}\n'
865
+ )
866
+
867
+
868
+ def test_inter_card_separator_single_card_run_draws_no_rule(
869
+ tmp_path: Path, monkeypatch, capsys,
870
+ ) -> None:
871
+ """One positional → one card → no separator anywhere."""
872
+ _stub_config(monkeypatch)
873
+ conn = tmp_path / "conn.log"
874
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
875
+
876
+ rc = cli._main(["digest", str(conn)])
877
+
878
+ captured = capsys.readouterr()
879
+ assert rc == 0
880
+ assert _INTER_CARD_RULE not in captured.out
881
+
882
+
883
+ def test_inter_card_separator_two_schema_cards_get_one_rule_between(
884
+ tmp_path: Path, monkeypatch, capsys,
885
+ ) -> None:
886
+ """Two rendered schema cards → exactly one rule between, none before
887
+ the first or after the last."""
888
+ _stub_config(monkeypatch)
889
+ a = tmp_path / "a.log"
890
+ a.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
891
+ b = tmp_path / "b.log"
892
+ b.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
893
+
894
+ rc = cli._main(["digest", str(a), str(b)])
895
+
896
+ captured = capsys.readouterr()
897
+ assert rc == 0
898
+ assert captured.out.count(_INTER_CARD_RULE) == 1
899
+ # The first emitted line is identity-line-1 of card 1 (no leading rule).
900
+ assert captured.out.splitlines()[0] == "a.log"
901
+ # Output does not end with a trailing rule.
902
+ assert not captured.out.rstrip("\n").endswith(_INTER_CARD_RULE)
903
+
904
+
905
+ def test_inter_card_separator_skipped_path_does_not_get_rule(
906
+ tmp_path: Path, monkeypatch, capsys,
907
+ ) -> None:
908
+ """An empty positional sitting BETWEEN two valid paths produces
909
+ exactly one rule, placed correctly (not adjacent to the empty path)."""
910
+ _stub_config(monkeypatch)
911
+ a = tmp_path / "a.log"
912
+ a.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
913
+ empty = tmp_path / "empty.log"
914
+ empty.write_text("", encoding="utf-8")
915
+ b = tmp_path / "b.log"
916
+ b.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
917
+
918
+ rc = cli._main(["digest", str(a), str(empty), str(b)])
919
+
920
+ captured = capsys.readouterr()
921
+ assert rc == 0
922
+ # One rule total — between the two rendered cards. The empty path
923
+ # was skipped before any render-commit, so no separator fired for it.
924
+ assert captured.out.count(_INTER_CARD_RULE) == 1
925
+
926
+
927
+ def test_inter_card_separator_schema_to_blob_top_level(
928
+ tmp_path: Path, monkeypatch, capsys,
929
+ ) -> None:
930
+ """Schema card followed by a top-level blob (sniff floor) → one rule."""
931
+ _stub_config(monkeypatch)
932
+ conn = tmp_path / "a.log"
933
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
934
+ unknown = tmp_path / "mystery.txt"
935
+ unknown.write_text("alpha beta gamma\ndelta epsilon\n" * 50, encoding="utf-8")
936
+
937
+ rc = cli._main(["digest", str(conn), str(unknown)])
938
+
939
+ captured = capsys.readouterr()
940
+ assert rc == 0
941
+ assert captured.out.count(_INTER_CARD_RULE) == 1
942
+
943
+
944
+ def test_inter_card_separator_schema_to_internal_blob_fallback(
945
+ tmp_path: Path, monkeypatch, capsys,
946
+ ) -> None:
947
+ """Schema card followed by a summariser-failure blob fallback → exactly
948
+ one rule (single owner: _render_blob_for_path emits, run_digest does
949
+ not double-emit on the fallback arm)."""
950
+ _stub_config(monkeypatch)
951
+ a = tmp_path / "a.log"
952
+ a.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
953
+ b = tmp_path / "b.log"
954
+ b.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
955
+
956
+ # Flake the SECOND summariser call so the first card renders normally
957
+ # and the second falls back to a blob.
958
+ from loghunter import digest as _digest_pkg
959
+ real_get = _digest_pkg.get_summarizer
960
+ call_n = {"n": 0}
961
+
962
+ def _flaky_get(schema_name: str):
963
+ def _wrap(*a, **kw):
964
+ call_n["n"] += 1
965
+ if call_n["n"] == 2:
966
+ raise RuntimeError("induced summariser failure")
967
+ return real_get(schema_name)(*a, **kw)
968
+ return _wrap
969
+
970
+ monkeypatch.setattr("loghunter.digest.get_summarizer", _flaky_get)
971
+
972
+ rc = cli._main(["digest", str(a), str(b)])
973
+
974
+ captured = capsys.readouterr()
975
+ assert rc == 0
976
+ assert captured.out.count(_INTER_CARD_RULE) == 1
977
+
978
+
979
+ # ─── Loader-progress suppression on multi-file fan-out ──────────────────────
980
+
981
+
982
+ def test_digest_single_positional_keeps_loader_progress(
983
+ tmp_path: Path, monkeypatch,
984
+ ) -> None:
985
+ """A single-positional digest still wants the loader bar — nothing
986
+ renders below it to pollute, and a large log is exactly when feedback
987
+ matters. run_digest must receive show_progress=True."""
988
+ _stub_config(monkeypatch)
989
+ calls = _spy_run_digest_calls(monkeypatch)
990
+
991
+ conn = tmp_path / "conn.log"
992
+ conn.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
993
+
994
+ rc = cli._main(["digest", str(conn)])
995
+
996
+ assert rc == 0
997
+ assert len(calls) == 1
998
+ assert calls[0]["show_progress"] is True
999
+
1000
+
1001
+ def test_digest_multi_positional_suppresses_loader_progress(
1002
+ tmp_path: Path, monkeypatch,
1003
+ ) -> None:
1004
+ """Multi-positional fan-out: every card receives show_progress=False so
1005
+ the loader's leave=True bar can't interleave between a rendered card and
1006
+ the next card's separator. Suppress batch-wide (not just subsequent
1007
+ cards) — in a batch the cards are the whole report."""
1008
+ _stub_config(monkeypatch)
1009
+ calls = _spy_run_digest_calls(monkeypatch)
1010
+
1011
+ a = tmp_path / "a.log"
1012
+ a.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
1013
+ b = tmp_path / "b.log"
1014
+ b.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
1015
+
1016
+ rc = cli._main(["digest", str(a), str(b)])
1017
+
1018
+ assert rc == 0
1019
+ assert len(calls) == 2
1020
+ assert all(c["show_progress"] is False for c in calls)
1021
+
1022
+
1023
+ def test_inter_card_separator_out_concatenation_matches_stdout_fanout(
1024
+ tmp_path: Path, monkeypatch,
1025
+ ) -> None:
1026
+ """`--out` concatenation produces the same separator behavior as the
1027
+ stdout fan-out — one rule between two rendered cards, none at edges."""
1028
+ _stub_config(monkeypatch)
1029
+ a = tmp_path / "a.log"
1030
+ a.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
1031
+ b = tmp_path / "b.log"
1032
+ b.write_text(_ZEEK_NDJSON_CONN_LINE, encoding="utf-8")
1033
+ out = tmp_path / "digest.txt"
1034
+
1035
+ rc = cli._main(["digest", str(a), str(b), f"--out={out}"])
1036
+
1037
+ assert rc == 0
1038
+ content = out.read_text(encoding="utf-8")
1039
+ assert content.count(_INTER_CARD_RULE) == 1
1040
+ assert not content.rstrip("\n").endswith(_INTER_CARD_RULE)