loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
tests/test_cli_help.py ADDED
@@ -0,0 +1,342 @@
1
+ """Per-command help, side-effect-light help short-circuit, and a few other
2
+ parser-surface invariants that don't fit the verb-specific suites.
3
+
4
+ Key promises locked here:
5
+ - ``loghunter <verb> --help`` / ``-h`` renders that verb's own generated help.
6
+ - Help fires BEFORE config load, output-registry lookup, sniff dispatch, or
7
+ init wizard entry.
8
+ - ``--help=anything`` and ``-h=anything`` are NOT help — they raise the
9
+ strict-parser "takes no value" error.
10
+ - ``loghunter conn.log`` (a real file in CWD) resolves as a path, not as an
11
+ unknown command.
12
+ - ``--output=FORMAT`` validates via the registered output handler list.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import sys
18
+ from pathlib import Path
19
+
20
+ import pytest
21
+
22
+ from loghunter import cli
23
+ from loghunter.common import config as cfg
24
+
25
+
26
+ # ── per-command help renders from the spec ───────────────────────────────────
27
+
28
+
29
+ @pytest.mark.parametrize("verb", [
30
+ "", "beacon", "dns", "syslog", "scan", "duration", "aws",
31
+ "digest", "export", "init",
32
+ ])
33
+ def test_render_verb_help_lists_verb_allowed_flags(verb: str) -> None:
34
+ """Every flag in a verb's allowed set appears in its rendered help, and
35
+ no flag from outside the allowed set leaks in."""
36
+ rendered = cli._render_verb_help(verb)
37
+ vs = cli._VERBS[verb]
38
+ for spec in cli._FLAG_LIST:
39
+ if spec.key in vs.allowed:
40
+ assert spec.long in rendered, (
41
+ f"{spec.long} should be in {verb!r} help"
42
+ )
43
+ if spec.short:
44
+ assert f"-{spec.short}" in rendered
45
+ else:
46
+ assert spec.long not in rendered, (
47
+ f"{spec.long} should NOT be in {verb!r} help"
48
+ )
49
+
50
+
51
+ def test_render_verb_help_blob_path_never_appears() -> None:
52
+ """``blob_path`` is an INTERNAL routing key — must not appear in any
53
+ rendered help. Padding the spec/allowed-set with it would silently mint
54
+ an unadvertised ``--blob-path`` ([[feedback-cli-surface-discipline]])."""
55
+ for verb in cli._VERBS:
56
+ rendered = cli._render_verb_help(verb)
57
+ assert "blob_path" not in rendered
58
+ assert "--blob-path" not in rendered
59
+ assert "-blob-path" not in rendered
60
+
61
+
62
+ def test_init_help_only_lists_help(capsys: pytest.CaptureFixture[str]) -> None:
63
+ """init's allowed set is ``{help}`` — its rendered help mentions
64
+ ``--help`` and nothing else from the spec."""
65
+ rendered = cli._render_verb_help("init")
66
+ assert "--help" in rendered
67
+ for spec in cli._FLAG_LIST:
68
+ if spec.key != "help":
69
+ assert spec.long not in rendered
70
+
71
+
72
+ # ── side-effect-light help: no config load, no sniff, no wizard ──────────────
73
+
74
+
75
+ def test_verb_help_does_not_load_config(
76
+ monkeypatch: pytest.MonkeyPatch,
77
+ capsys: pytest.CaptureFixture[str],
78
+ ) -> None:
79
+ """``<verb> --help`` short-circuits BEFORE cfg.load is called."""
80
+ def _exploding_load(_path=None): # pragma: no cover — would only run on failure
81
+ raise RuntimeError("config must not load during help")
82
+
83
+ monkeypatch.setattr(cfg, "load", _exploding_load)
84
+ for argv in (
85
+ ["beacon", "--help"], ["beacon", "-h"],
86
+ ["digest", "--help"], ["digest", "-h"],
87
+ ["export", "--help"], ["init", "--help"],
88
+ ["--help"], ["-h"],
89
+ ):
90
+ rc = cli._main(argv)
91
+ assert rc == 0
92
+ capsys.readouterr()
93
+
94
+
95
+ def test_init_help_does_not_start_wizard(
96
+ monkeypatch: pytest.MonkeyPatch,
97
+ capsys: pytest.CaptureFixture[str],
98
+ ) -> None:
99
+ """``init -h`` must NOT enter the wizard (no run_init call, no input())."""
100
+ called = {"wizard": False}
101
+
102
+ def _spy_run_init(): # pragma: no cover — would only run on failure
103
+ called["wizard"] = True
104
+
105
+ monkeypatch.setattr("loghunter.cli_init.run_init", _spy_run_init)
106
+
107
+ rc = cli._main(["init", "-h"])
108
+
109
+ assert rc == 0
110
+ assert called["wizard"] is False
111
+ out = capsys.readouterr().out
112
+ assert "Usage: loghunter init" in out
113
+
114
+
115
+ def test_digest_help_does_not_sniff(
116
+ monkeypatch: pytest.MonkeyPatch,
117
+ capsys: pytest.CaptureFixture[str],
118
+ tmp_path: Path,
119
+ ) -> None:
120
+ """``digest --help`` must NOT call sniff_format_detailed even when a
121
+ positional is also passed."""
122
+ called = {"sniffed": False}
123
+
124
+ def _spy_sniff(_path): # pragma: no cover — would only run on failure
125
+ called["sniffed"] = True
126
+ raise RuntimeError("sniff must not run during help")
127
+
128
+ monkeypatch.setattr(
129
+ "loghunter.common.loader.sniff_format_detailed", _spy_sniff,
130
+ )
131
+ pretend = tmp_path / "anything.log"
132
+ pretend.write_text("placeholder\n", encoding="utf-8")
133
+
134
+ rc = cli._main(["digest", str(pretend), "--help"])
135
+
136
+ assert rc == 0
137
+ assert called["sniffed"] is False
138
+
139
+
140
+ def test_help_with_output_bogus_short_circuits_before_registry(
141
+ monkeypatch: pytest.MonkeyPatch,
142
+ capsys: pytest.CaptureFixture[str],
143
+ ) -> None:
144
+ """``--help --output=bogus`` shows usage; the output registry is NEVER
145
+ consulted — the help short-circuit wins."""
146
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
147
+
148
+ def _exploding_get_handler(_name): # pragma: no cover
149
+ raise RuntimeError("registry must not run during help")
150
+
151
+ monkeypatch.setattr("loghunter.cli.get_handler", _exploding_get_handler)
152
+
153
+ rc = cli._main(["beacon", "--help", "--output=bogus"])
154
+
155
+ assert rc == 0
156
+
157
+
158
+ # ── --help=anything / -h=anything are NOT help ───────────────────────────────
159
+
160
+
161
+ def test_help_with_value_raises_takes_no_value(
162
+ capsys: pytest.CaptureFixture[str],
163
+ monkeypatch: pytest.MonkeyPatch,
164
+ ) -> None:
165
+ """``--help=foo`` is a value-on-bool error from the strict parser, not
166
+ a help short-circuit."""
167
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
168
+ with pytest.raises(SystemExit):
169
+ cli.main(["--help=foo"])
170
+ err = capsys.readouterr().err
171
+ assert "--help (-h) takes no value" in err
172
+
173
+
174
+ def test_short_help_with_value_raises_takes_no_value(
175
+ capsys: pytest.CaptureFixture[str],
176
+ monkeypatch: pytest.MonkeyPatch,
177
+ ) -> None:
178
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
179
+ with pytest.raises(SystemExit):
180
+ cli.main(["-h=foo"])
181
+ err = capsys.readouterr().err
182
+ assert "--help (-h) takes no value" in err
183
+
184
+
185
+ # ── loghunter conn.log (bare filename in CWD) ────────────────────────────────
186
+
187
+
188
+ def test_bare_filename_in_cwd_routes_as_analyze_path(
189
+ monkeypatch: pytest.MonkeyPatch,
190
+ tmp_path: Path,
191
+ ) -> None:
192
+ """A token that exists on disk routes to the analyze path (not 'unknown
193
+ command') even when it lacks the path-shape prefixes."""
194
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
195
+ monkeypatch.chdir(tmp_path)
196
+
197
+ captured: dict[str, object] = {}
198
+
199
+ def fake_run(**kwargs: object) -> None:
200
+ captured.update(kwargs)
201
+
202
+ monkeypatch.setattr("loghunter.runner.run", fake_run)
203
+
204
+ (tmp_path / "conn.log").write_text("", encoding="utf-8")
205
+
206
+ cli._main(["conn.log"])
207
+
208
+ # CLI now passes raw strings; the resolver owns Path conversion.
209
+ assert captured.get("zeek_dir") == "conn.log"
210
+
211
+
212
+ # ── --output=FORMAT validation via the registry ──────────────────────────────
213
+
214
+
215
+ def test_unknown_output_format_raises_with_available_list(
216
+ monkeypatch: pytest.MonkeyPatch,
217
+ capsys: pytest.CaptureFixture[str],
218
+ tmp_path: Path,
219
+ ) -> None:
220
+ """``--output=bogus`` raises a CLI-formatted error with the registry's
221
+ live available-format list, not a hardcoded one."""
222
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
223
+ zeek_dir = tmp_path / "zeek"
224
+ zeek_dir.mkdir()
225
+ (zeek_dir / "conn.log").write_text("", encoding="utf-8")
226
+
227
+ with pytest.raises(SystemExit) as exc:
228
+ cli.main([f"--zeek-dir={zeek_dir}", "--output=bogus"])
229
+
230
+ assert exc.value.code == 1
231
+ err = capsys.readouterr().err
232
+ assert "Unknown output format 'bogus'." in err
233
+ assert "Available formats:" in err
234
+ # Built-in handlers must surface
235
+ for fmt in ("text", "json", "csv", "html"):
236
+ assert fmt in err
237
+
238
+
239
+ def test_digest_unknown_output_format_uses_same_registry_error(
240
+ monkeypatch: pytest.MonkeyPatch,
241
+ capsys: pytest.CaptureFixture[str],
242
+ tmp_path: Path,
243
+ ) -> None:
244
+ """Digest validates --output via the same registry — uniform error voice.
245
+ Registry check happens BEFORE digest's text-only rail, so ``--output=bogus``
246
+ reports 'Unknown output format', not 'currently supports only text'."""
247
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
248
+ log = tmp_path / "x.log"
249
+ log.write_text("", encoding="utf-8")
250
+
251
+ with pytest.raises(SystemExit) as exc:
252
+ cli.main(["digest", str(log), "--output=bogus"])
253
+
254
+ assert exc.value.code == 1
255
+ err = capsys.readouterr().err
256
+ assert "Unknown output format 'bogus'." in err
257
+
258
+
259
+ # ── export positionals come from the parser ──────────────────────────────────
260
+
261
+
262
+ def test_export_positionals_come_from_parser(
263
+ monkeypatch: pytest.MonkeyPatch,
264
+ ) -> None:
265
+ """``loghunter export splunk q1 q2`` consumes positionals from the
266
+ parser's ``paths`` list, not by re-scraping raw args."""
267
+ captured: dict = {}
268
+
269
+ def fake_run_export(**kwargs):
270
+ captured.update(kwargs)
271
+
272
+ monkeypatch.setattr("loghunter.exporters.run_export", fake_run_export)
273
+ monkeypatch.setattr(cfg, "load", lambda _=None: {
274
+ "export": {"splunk": {"host": "192.0.2.20", "port": 8089,
275
+ "query": {"q1": {"spl": "x"}, "q2": {"spl": "y"}}}},
276
+ })
277
+
278
+ cli.main(["export", "splunk", "q1", "q2"])
279
+
280
+ assert captured["backend"] == "splunk"
281
+ assert captured["query_names"] == ["q1", "q2"]
282
+
283
+
284
+ # ── digest combination guard (preserved) ─────────────────────────────────────
285
+
286
+
287
+ def test_digest_path_plus_zeek_dir_still_rejected(
288
+ monkeypatch: pytest.MonkeyPatch,
289
+ tmp_path: Path,
290
+ ) -> None:
291
+ """``digest PATH --zeek-dir=…`` is rejected (positional self-routes via
292
+ sniff). Bare digest with --zeek-dir is allowed — that's the bare-conn
293
+ config-driven path."""
294
+ monkeypatch.setattr(cfg, "load", lambda _=None: {"loghunter": {}})
295
+ log = tmp_path / "x.log"
296
+ log.write_text("", encoding="utf-8")
297
+ with pytest.raises(ValueError, match="--zeek-dir is not valid alongside"):
298
+ cli._main(["digest", str(log), "--zeek-dir=/x"])
299
+
300
+
301
+ # ── bare short-form value flag mentions both spellings ───────────────────────
302
+
303
+
304
+ def test_bare_short_value_flag_short_lead_message(
305
+ capsys: pytest.CaptureFixture[str],
306
+ monkeypatch: pytest.MonkeyPatch,
307
+ ) -> None:
308
+ """A bare short value flag (``-o``) raises the actionable error mentioning
309
+ both ``-o=…`` and ``--out=…``."""
310
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
311
+ with pytest.raises(SystemExit) as exc:
312
+ cli.main(["-o"])
313
+ assert exc.value.code == 1
314
+ err = capsys.readouterr().err
315
+ assert "loghunter: --out (-o) needs a value: -o=… or --out=…" in err
316
+
317
+
318
+ # ── --detect/-d on single-detector verbs raises wrong-verb ───────────────────
319
+
320
+
321
+ def test_detect_on_single_detector_verb_raises_wrong_verb(
322
+ capsys: pytest.CaptureFixture[str],
323
+ monkeypatch: pytest.MonkeyPatch,
324
+ ) -> None:
325
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
326
+ with pytest.raises(SystemExit) as exc:
327
+ cli.main(["beacon", "--detect=all"])
328
+ assert exc.value.code == 1
329
+ err = capsys.readouterr().err
330
+ assert "--detect (-d) is not valid for beacon" in err
331
+
332
+
333
+ def test_short_detect_on_single_detector_verb_raises_wrong_verb(
334
+ capsys: pytest.CaptureFixture[str],
335
+ monkeypatch: pytest.MonkeyPatch,
336
+ ) -> None:
337
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [])
338
+ with pytest.raises(SystemExit) as exc:
339
+ cli.main(["beacon", "-d=all"])
340
+ assert exc.value.code == 1
341
+ err = capsys.readouterr().err
342
+ assert "-d (--detect) is not valid for beacon" in err