loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
tests/test_display.py ADDED
@@ -0,0 +1,370 @@
1
+ """Tests for the liveness primitive in loghunter.common.display."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ import threading
7
+ import time
8
+
9
+ import pytest
10
+
11
+ from loghunter.common import display as display_mod
12
+ from loghunter.common.display import (
13
+ _SPINNER_FRAMES,
14
+ _color_enabled,
15
+ _stream_isatty,
16
+ liveness,
17
+ progress,
18
+ )
19
+
20
+
21
+ class _FakeStream:
22
+ """Minimal stderr stand-in for liveness tests.
23
+
24
+ Exposes ``isatty()``, ``write()``, ``flush()`` and an ``output``
25
+ property that joins the captured chunks. Writes are guarded by a lock
26
+ so the spinner thread and the test body do not tear a string.
27
+ """
28
+
29
+ def __init__(self, tty: bool) -> None:
30
+ self._tty = tty
31
+ self._chunks: list[str] = []
32
+ self._lock = threading.Lock()
33
+
34
+ def isatty(self) -> bool:
35
+ return self._tty
36
+
37
+ def write(self, s: str) -> int:
38
+ with self._lock:
39
+ self._chunks.append(s)
40
+ return len(s)
41
+
42
+ def flush(self) -> None: # pragma: no cover - no-op
43
+ return None
44
+
45
+ @property
46
+ def output(self) -> str:
47
+ with self._lock:
48
+ return "".join(self._chunks)
49
+
50
+
51
+ def _has_any_frame(text: str) -> bool:
52
+ return any(f in text for f in _SPINNER_FRAMES)
53
+
54
+
55
+ def _poll_until_drew(stream: _FakeStream, budget_s: float = 0.5) -> bool:
56
+ """Poll the fake's buffer until a \\r appears, signalling _drew flipped."""
57
+ deadline = time.monotonic() + budget_s
58
+ while time.monotonic() < deadline:
59
+ if "\r" in stream.output:
60
+ return True
61
+ time.sleep(0.005)
62
+ return False
63
+
64
+
65
+ # ── Test 1 ──────────────────────────────────────────────────────────────────
66
+
67
+
68
+ def test_non_tty_silent_enter_seal_writes_record_only(monkeypatch):
69
+ fake = _FakeStream(tty=False)
70
+ monkeypatch.setattr(sys, "stderr", fake)
71
+ with liveness("running thing") as ln:
72
+ ln.seal("done")
73
+ assert fake.output == "done\n"
74
+
75
+
76
+ # ── Test 2 ──────────────────────────────────────────────────────────────────
77
+
78
+
79
+ def test_non_tty_exception_writes_nothing(monkeypatch):
80
+ fake = _FakeStream(tty=False)
81
+ monkeypatch.setattr(sys, "stderr", fake)
82
+ with pytest.raises(RuntimeError):
83
+ with liveness("running thing"):
84
+ raise RuntimeError("boom")
85
+ assert fake.output == ""
86
+
87
+
88
+ # ── Test 3 — the byte-exact one ─────────────────────────────────────────────
89
+
90
+
91
+ def test_seal_before_delay_is_byte_exact(monkeypatch):
92
+ fake = _FakeStream(tty=True)
93
+ monkeypatch.setattr(sys, "stderr", fake)
94
+ # delay=10s means the spinner thread is parked in stop_event.wait(10);
95
+ # immediate seal sets the event before any frame can draw.
96
+ with liveness("running thing", delay=10.0) as ln:
97
+ ln.seal("done")
98
+ # Exact buffer: only the record line. No \r, no spaces, no frame chars.
99
+ assert fake.output == "done\n"
100
+
101
+
102
+ # ── Test 4 ──────────────────────────────────────────────────────────────────
103
+
104
+
105
+ def test_exception_after_drew_clears_no_record(monkeypatch):
106
+ fake = _FakeStream(tty=True)
107
+ monkeypatch.setattr(sys, "stderr", fake)
108
+ with pytest.raises(RuntimeError):
109
+ with liveness("running thing", delay=0.0):
110
+ assert _poll_until_drew(fake), (
111
+ "spinner thread did not draw within budget — _drew=True "
112
+ "branch not exercised"
113
+ )
114
+ raise RuntimeError("boom")
115
+ # Spinner drew, so _drew was True, so teardown emitted a clearing \r.
116
+ assert "\r" in fake.output
117
+ # And nothing claiming success was written.
118
+ assert "done" not in fake.output
119
+ # __exit__ never writes a newline-terminated record on its own —
120
+ # only the spinner's own \r-redrawn line should be in the buffer.
121
+ assert "\n" not in fake.output
122
+
123
+
124
+ # ── Test 5 ──────────────────────────────────────────────────────────────────
125
+
126
+
127
+ def test_keyboard_interrupt_propagates(monkeypatch):
128
+ fake = _FakeStream(tty=True)
129
+ monkeypatch.setattr(sys, "stderr", fake)
130
+ with pytest.raises(KeyboardInterrupt):
131
+ with liveness("running thing", delay=0.0):
132
+ # No need to wait for a frame here — the contract is that
133
+ # KeyboardInterrupt (a BaseException, not an Exception) is
134
+ # not swallowed by the context manager.
135
+ raise KeyboardInterrupt
136
+ # No false seal.
137
+ assert "done" not in fake.output
138
+
139
+
140
+ # ── Test 6 ──────────────────────────────────────────────────────────────────
141
+
142
+
143
+ def test_seal_is_idempotent(monkeypatch):
144
+ fake = _FakeStream(tty=False)
145
+ monkeypatch.setattr(sys, "stderr", fake)
146
+ with liveness("running thing") as ln:
147
+ ln.seal("done")
148
+ ln.seal("done")
149
+ assert fake.output == "done\n"
150
+
151
+
152
+ # ── Bonus: a spinner that actually drew, then sealed, clears once ──────────
153
+
154
+
155
+ def test_seal_after_drew_clears_then_writes_record(monkeypatch):
156
+ fake = _FakeStream(tty=True)
157
+ monkeypatch.setattr(sys, "stderr", fake)
158
+ with liveness("running thing", delay=0.0) as ln:
159
+ assert _poll_until_drew(fake), (
160
+ "spinner thread did not draw within budget"
161
+ )
162
+ ln.seal("done")
163
+ out = fake.output
164
+ # Spinner drew at least one frame char.
165
+ assert _has_any_frame(out)
166
+ # Sealed record is present and is the LAST thing in the buffer.
167
+ assert out.endswith("done\n")
168
+ # Exactly one record line.
169
+ assert out.count("done\n") == 1
170
+
171
+
172
+ # ── progress() helper ───────────────────────────────────────────────────────
173
+
174
+
175
+ class _TqdmSpy:
176
+ """Spy that records construction kwargs and acts as the tqdm bar object.
177
+
178
+ Patched in for `loghunter.common.display.tqdm` so progress() tests can
179
+ assert that tqdm IS or IS NOT constructed, inspect the kwargs the helper
180
+ passes when it is, and observe the counter (`progress()` now drives the bar
181
+ via `update(1)` from its own generator — NO iterable is passed to tqdm).
182
+ """
183
+
184
+ def __init__(self) -> None:
185
+ self.calls: list[dict] = []
186
+ self.n = 0
187
+ self.closed = False
188
+
189
+ def __call__(self, iterable=None, **kwargs):
190
+ self.calls.append(kwargs)
191
+ return self
192
+
193
+ def __iter__(self):
194
+ # progress() must drive the bar via update(1) from its OWN generator and
195
+ # never iterate the tqdm object directly. If a regression returns the bar
196
+ # itself (the `loaded X: 0.00 lines` orphaned-counter bug), fail LOUDLY
197
+ # here rather than with a confusing AttributeError downstream.
198
+ raise AssertionError(
199
+ "progress() iterated the tqdm bar directly — it must wrap it in its "
200
+ "own counting generator (iter(gen) is gen) so the count survives "
201
+ "parser re-iteration"
202
+ )
203
+
204
+ def update(self, k: int = 1) -> None:
205
+ self.n += k
206
+
207
+ def close(self) -> None:
208
+ self.closed = True
209
+
210
+
211
+ def test_progress_disabled_returns_bare_iterable_no_tqdm(monkeypatch):
212
+ spy = _TqdmSpy()
213
+ monkeypatch.setattr(display_mod, "tqdm", spy)
214
+ fake = _FakeStream(tty=True)
215
+ items = [1, 2, 3]
216
+ result = list(progress(items, desc="loaded x.log",
217
+ show_progress=False, stream=fake))
218
+ assert result == [1, 2, 3]
219
+ assert spy.calls == [] # tqdm NEVER constructed when disabled
220
+
221
+
222
+ def test_progress_non_tty_returns_bare_iterable_no_tqdm(monkeypatch):
223
+ spy = _TqdmSpy()
224
+ monkeypatch.setattr(display_mod, "tqdm", spy)
225
+ fake = _FakeStream(tty=False)
226
+ items = ["a", "b"]
227
+ result = list(progress(items, desc="loaded x.log",
228
+ show_progress=True, stream=fake))
229
+ assert result == ["a", "b"]
230
+ assert spy.calls == [] # tqdm NEVER constructed off a TTY
231
+
232
+
233
+ def test_progress_tty_constructs_tqdm_with_pinned_format(monkeypatch):
234
+ spy = _TqdmSpy()
235
+ monkeypatch.setattr(display_mod, "tqdm", spy)
236
+ fake = _FakeStream(tty=True)
237
+ # The return is now a GENERATOR wrapping the tqdm; tqdm is constructed WITHOUT
238
+ # an iterable and driven via update(1).
239
+ out = list(progress(["a"], desc="loaded x.log",
240
+ show_progress=True, unit=" lines", stream=fake))
241
+ assert out == ["a"]
242
+ assert len(spy.calls) == 1
243
+ kw = spy.calls[0]
244
+ # Pinned bar_format reproduces the long-standing NDJSON bar byte-for-byte
245
+ # when unit=" lines".
246
+ assert kw["bar_format"] == "{desc}: {n_fmt} lines [{elapsed}]"
247
+ assert kw["desc"] == "loaded x.log"
248
+ assert kw["unit"] == " lines"
249
+ assert kw["leave"] is True
250
+ assert kw["unit_scale"] is True
251
+ assert kw["mininterval"] == 0.5
252
+ assert kw["file"] is fake
253
+ # The generator drove the bar and closed it.
254
+ assert spy.n == 1
255
+ assert spy.closed is True
256
+
257
+
258
+ def test_progress_count_survives_two_phase_reiteration(monkeypatch):
259
+ """Regression for `loaded X: 0.00 lines`: a parser that sniffs-then-resumes
260
+ (Zeek `itertools.chain(prefix, line_iter)`) re-iterates the progress result.
261
+ Because progress() returns a GENERATOR (iter(gen) is gen), the SAME counter
262
+ continues — every item is counted exactly once. Exercises the REAL progress()
263
+ (only `tqdm` is faked for inspection; progress itself is NOT replaced)."""
264
+ import itertools
265
+
266
+ spy = _TqdmSpy()
267
+ monkeypatch.setattr(display_mod, "tqdm", spy)
268
+ fake = _FakeStream(tty=True)
269
+ items = list(range(10))
270
+
271
+ it = progress(items, desc="loaded conn.log",
272
+ show_progress=True, unit=" lines", stream=fake)
273
+ # Phase 1: sniff one item then break (mimics the NDJSON-vs-TSV sniff).
274
+ prefix: list[int] = []
275
+ for x in it:
276
+ prefix.append(x)
277
+ break
278
+ # Phase 2: chain the prefix back and consume the rest from the SAME iterator.
279
+ rest = list(itertools.chain(prefix, it))
280
+
281
+ assert rest == items, "every item observed once, in order, across re-iteration"
282
+ assert spy.n == 10, "counter is the TRUE total, not orphaned at the sniff break"
283
+ assert spy.closed is True
284
+
285
+
286
+ def test_progress_unit_parameterization(monkeypatch):
287
+ """Different units (e.g. " events") thread cleanly into bar_format."""
288
+ spy = _TqdmSpy()
289
+ monkeypatch.setattr(display_mod, "tqdm", spy)
290
+ fake = _FakeStream(tty=True)
291
+ list(progress(["a"], desc="loaded x", show_progress=True,
292
+ unit=" events", stream=fake))
293
+ assert spy.calls[0]["bar_format"] == "{desc}: {n_fmt} events [{elapsed}]"
294
+
295
+
296
+ def test_progress_ignores_no_color(monkeypatch):
297
+ """Color policy is NOT a progress policy — a color preference must not
298
+ suppress the progress bar."""
299
+ spy = _TqdmSpy()
300
+ monkeypatch.setattr(display_mod, "tqdm", spy)
301
+ monkeypatch.setenv("NO_COLOR", "1")
302
+ fake = _FakeStream(tty=True)
303
+ list(progress(["a"], desc="loaded x", show_progress=True, stream=fake))
304
+ assert len(spy.calls) == 1
305
+
306
+
307
+ def test_progress_ignores_term_dumb(monkeypatch):
308
+ """TERM=dumb is a color signal, not a progress signal."""
309
+ spy = _TqdmSpy()
310
+ monkeypatch.setattr(display_mod, "tqdm", spy)
311
+ monkeypatch.setenv("TERM", "dumb")
312
+ fake = _FakeStream(tty=True)
313
+ list(progress(["a"], desc="loaded x", show_progress=True, stream=fake))
314
+ assert len(spy.calls) == 1
315
+
316
+
317
+ def test_progress_defaults_to_sys_stderr(monkeypatch):
318
+ """When stream= is not passed, the helper resolves it to sys.stderr."""
319
+ spy = _TqdmSpy()
320
+ monkeypatch.setattr(display_mod, "tqdm", spy)
321
+ fake = _FakeStream(tty=True)
322
+ monkeypatch.setattr(sys, "stderr", fake)
323
+ list(progress(["a"], desc="loaded x", show_progress=True))
324
+ assert len(spy.calls) == 1
325
+ assert spy.calls[0]["file"] is fake
326
+
327
+
328
+ # ── _stream_isatty factoring regression ─────────────────────────────────────
329
+
330
+
331
+ def test_stream_isatty_handles_missing_attr():
332
+ """An object without isatty() resolves to False (no crash)."""
333
+
334
+ class _Bare:
335
+ pass
336
+
337
+ assert _stream_isatty(_Bare()) is False
338
+
339
+
340
+ def test_stream_isatty_handles_raising_isatty():
341
+ """isatty() raising is treated as False (no propagation)."""
342
+
343
+ class _Boom:
344
+ def isatty(self):
345
+ raise OSError("nope")
346
+
347
+ assert _stream_isatty(_Boom()) is False
348
+
349
+
350
+ def test_stream_isatty_true_and_false():
351
+ assert _stream_isatty(_FakeStream(tty=True)) is True
352
+ assert _stream_isatty(_FakeStream(tty=False)) is False
353
+
354
+
355
+ def test_color_enabled_still_layers_no_color_and_term_dumb(monkeypatch):
356
+ """Color policy keeps NO_COLOR / TERM=dumb ON TOP of the raw TTY probe."""
357
+ fake_tty = _FakeStream(tty=True)
358
+ fake_non_tty = _FakeStream(tty=False)
359
+
360
+ monkeypatch.delenv("NO_COLOR", raising=False)
361
+ monkeypatch.delenv("TERM", raising=False)
362
+ assert _color_enabled(fake_tty) is True
363
+ assert _color_enabled(fake_non_tty) is False
364
+
365
+ monkeypatch.setenv("NO_COLOR", "1")
366
+ assert _color_enabled(fake_tty) is False
367
+
368
+ monkeypatch.delenv("NO_COLOR")
369
+ monkeypatch.setenv("TERM", "dumb")
370
+ assert _color_enabled(fake_tty) is False