loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,351 @@
1
+ """Tests for the Splunk exporter framework.
2
+
3
+ No live Splunk connection — SDK is mocked where needed.
4
+ All IP addresses use RFC 5737 documentation space (192.0.2.x).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime, timezone, timedelta
10
+ from pathlib import Path
11
+
12
+ import pytest
13
+
14
+ from loghunter.cli import _resolve_timeframe
15
+ from loghunter.common import config as cfg
16
+ from loghunter.common.paths import effective_root
17
+ from loghunter.exporters import (
18
+ _auto_filename,
19
+ _normalize_end_of_day_until,
20
+ _resolve_output_path,
21
+ _resolve_queries,
22
+ )
23
+ from loghunter.exporters.splunk import _build_hour_windows, _get_credentials, fetch, write
24
+
25
+
26
+ # ── --days full pipeline: local midnight → 24 chunks ─────────────────────────
27
+
28
+
29
+ def test_days_flag_local_midnight_gives_24_chunks() -> None:
30
+ # Simulate a user in UTC-5 running --days=1-1 at 15:00 local.
31
+ # Without the fix, UTC now → .replace(hour=0) gives UTC midnight, which converts
32
+ # to 19:00 local — the window spans 19:00→19:00 local (still 24 chunks but wrong
33
+ # day), and the first chunk starts at hour 19, not 0.
34
+ tz_minus5 = timezone(timedelta(hours=-5))
35
+ local_now = datetime(2026, 5, 31, 15, 0, 0, tzinfo=tz_minus5)
36
+
37
+ since, until = _resolve_timeframe({"days": "1-1"}, now=local_now)
38
+ until = _normalize_end_of_day_until(until)
39
+ windows = _build_hour_windows(since, until)
40
+
41
+ assert len(windows) == 24
42
+ # First chunk must start at local midnight (hour 0), not a UTC-shifted hour
43
+ assert windows[0][0].hour == 0
44
+
45
+
46
+ # ── _normalize_end_of_day_until ───────────────────────────────────────────────
47
+
48
+
49
+ def test_normalize_end_of_day_eod() -> None:
50
+ # 23:59:59 → next midnight
51
+ until = datetime(2026, 5, 30, 23, 59, 59)
52
+ result = _normalize_end_of_day_until(until)
53
+ assert result == datetime(2026, 5, 31, 0, 0, 0)
54
+
55
+
56
+ def test_normalize_end_of_day_midnight() -> None:
57
+ # Already on a boundary — unchanged
58
+ until = datetime(2026, 5, 31, 0, 0, 0)
59
+ result = _normalize_end_of_day_until(until)
60
+ assert result == until
61
+
62
+
63
+ def test_normalize_end_of_day_midday_59() -> None:
64
+ # 14:59:59 — hour != 23, must NOT trigger (critical: --hours edge case)
65
+ until = datetime(2026, 5, 30, 14, 59, 59)
66
+ result = _normalize_end_of_day_until(until)
67
+ assert result == until
68
+
69
+
70
+ def test_end_of_day_until_gives_24_chunks() -> None:
71
+ since = datetime(2026, 5, 29, 0, 0, 0)
72
+ # Simulate what --days produces: 23:59:59
73
+ until_raw = datetime(2026, 5, 29, 23, 59, 59)
74
+ assert len(_build_hour_windows(since, until_raw)) == 23 # without fix
75
+ until_fixed = _normalize_end_of_day_until(until_raw)
76
+ assert len(_build_hour_windows(since, until_fixed)) == 24 # with fix
77
+
78
+
79
+ # ── _build_hour_windows ───────────────────────────────────────────────────────
80
+
81
+
82
+ def test_build_hour_windows_single_day():
83
+ since = datetime(2026, 5, 29, 0, 0, 0)
84
+ until = datetime(2026, 5, 30, 0, 0, 0) # 24 hours later, on midnight boundary
85
+ windows = _build_hour_windows(since, until)
86
+ assert len(windows) == 24
87
+ assert windows[0] == (datetime(2026, 5, 29, 0, 0, 0), datetime(2026, 5, 29, 1, 0, 0))
88
+ assert windows[-1] == (datetime(2026, 5, 29, 23, 0, 0), datetime(2026, 5, 30, 0, 0, 0))
89
+
90
+
91
+ def test_build_hour_windows_multi_day():
92
+ since = datetime(2026, 5, 23, 0, 0, 0)
93
+ until = datetime(2026, 5, 30, 0, 0, 0) # 7 days later
94
+ windows = _build_hour_windows(since, until)
95
+ assert len(windows) == 168 # 7 * 24
96
+
97
+
98
+ def test_build_hour_windows_partial():
99
+ # since is not on an hour boundary — floored to 09:00
100
+ # until is on an hour boundary — 14:00 unchanged
101
+ since = datetime(2026, 5, 30, 9, 30, 0)
102
+ until = datetime(2026, 5, 30, 14, 0, 0)
103
+ windows = _build_hour_windows(since, until)
104
+ # floor(09:30) = 09:00, floor(14:00) = 14:00 → 5 complete hours
105
+ assert len(windows) == 5
106
+ # All chunks are exactly one hour
107
+ for start, end in windows:
108
+ assert (end - start).total_seconds() == 3600
109
+ # All boundaries are on whole-hour marks (no partial-hour chunks)
110
+ for start, end in windows:
111
+ assert start.minute == 0 and start.second == 0 and start.microsecond == 0
112
+ assert end.minute == 0 and end.second == 0 and end.microsecond == 0
113
+ # First chunk starts at the floored hour
114
+ assert windows[0][0].hour == 9
115
+ assert windows[0][0].minute == 0
116
+ # Last chunk ends at 14:00
117
+ assert windows[-1][1].hour == 14
118
+ assert windows[-1][1].minute == 0
119
+
120
+
121
+ # ── write ─────────────────────────────────────────────────────────────────────
122
+
123
+
124
+ def test_write_output(tmp_path: Path) -> None:
125
+ rows = [
126
+ {
127
+ "_time": "2026-05-30T01:00:00.000+00:00",
128
+ "_raw": "<34>May 30 01:00:00 192.0.2.10 kernel: boot message",
129
+ },
130
+ {
131
+ "_time": "2026-05-29T23:00:00.000+00:00",
132
+ "_raw": "May 29 23:00:00 192.0.2.11 sshd: no PRI prefix here",
133
+ },
134
+ {
135
+ "_time": "2026-05-30T00:00:00.000+00:00",
136
+ "_raw": "<5>May 30 00:00:00 192.0.2.10 nginx: another line",
137
+ },
138
+ ]
139
+ outpath = tmp_path / "output.log"
140
+ n, _ = write(rows, outpath, verbose=False)
141
+
142
+ assert n == 3
143
+ lines = outpath.read_text(encoding="utf-8").splitlines()
144
+ assert len(lines) == 3
145
+
146
+ # Sorted by _time ascending
147
+ assert "May 29 23:00:00" in lines[0]
148
+ assert "May 30 00:00:00" in lines[1]
149
+ assert "May 30 01:00:00" in lines[2]
150
+
151
+ # PRI prefixes stripped where present
152
+ assert not lines[1].startswith("<")
153
+ assert not lines[2].startswith("<")
154
+
155
+ # Line without PRI written unchanged
156
+ assert "no PRI prefix here" in lines[0]
157
+
158
+
159
+ def test_write_creates_parent_directories(tmp_path: Path) -> None:
160
+ rows = [{"_time": "2026-05-30T01:00:00.000+00:00", "_raw": "May 30 01:00:00 192.0.2.10 kernel: boot"}]
161
+ outpath = tmp_path / "a" / "b" / "out.log"
162
+ n, _ = write(rows, outpath, verbose=False)
163
+ assert n == 1
164
+ assert outpath.exists()
165
+
166
+
167
+ # ── credentials ──────────────────────────────────────────────────────────────
168
+
169
+
170
+ def test_get_credentials_from_env(monkeypatch: pytest.MonkeyPatch) -> None:
171
+ monkeypatch.setenv("LOGHUNTER_SPLUNK_USER", "testuser")
172
+ monkeypatch.setenv("LOGHUNTER_SPLUNK_PASS", "testpass")
173
+ user, passwd = _get_credentials({})
174
+ assert user == "testuser"
175
+ assert passwd == "testpass"
176
+
177
+
178
+ def test_get_credentials_missing(monkeypatch: pytest.MonkeyPatch) -> None:
179
+ monkeypatch.delenv("LOGHUNTER_SPLUNK_USER", raising=False)
180
+ monkeypatch.delenv("LOGHUNTER_SPLUNK_PASS", raising=False)
181
+ with pytest.raises(ValueError, match="Splunk credentials not found"):
182
+ _get_credentials({})
183
+
184
+
185
+ # ── fetch SDK guard ───────────────────────────────────────────────────────────
186
+
187
+
188
+ def test_fetch_no_sdk(monkeypatch: pytest.MonkeyPatch) -> None:
189
+ import loghunter.exporters.splunk as splunk_module
190
+
191
+ monkeypatch.setattr(splunk_module, "splunk_client", None)
192
+ since = datetime(2026, 5, 29, 0, 0, 0)
193
+ until = datetime(2026, 5, 30, 0, 0, 0)
194
+ with pytest.raises(ValueError, match="splunk-sdk not installed"):
195
+ splunk_module.fetch(
196
+ {"spl": "search *"},
197
+ {"host": "192.0.2.20", "port": 8089, "username": "u", "password": "p"},
198
+ since,
199
+ until,
200
+ False,
201
+ )
202
+
203
+
204
+ def test_fetch_formats_splunk_auth_error(monkeypatch: pytest.MonkeyPatch) -> None:
205
+ import loghunter.exporters.splunk as splunk_module
206
+
207
+ class AuthenticationError(Exception):
208
+ pass
209
+
210
+ class FakeClient:
211
+ @staticmethod
212
+ def connect(**_kwargs):
213
+ raise AuthenticationError("Login failed")
214
+
215
+ monkeypatch.setattr(splunk_module, "splunk_client", FakeClient)
216
+ since = datetime(2026, 5, 29, 0, 0, 0)
217
+ until = datetime(2026, 5, 29, 1, 0, 0)
218
+
219
+ with pytest.raises(ValueError) as exc_info:
220
+ splunk_module.fetch(
221
+ {"spl": "search *"},
222
+ {"host": "192.0.2.20", "port": 8089, "username": "u", "password": "p"},
223
+ since,
224
+ until,
225
+ False,
226
+ )
227
+
228
+ msg = str(exc_info.value)
229
+ assert "Splunk login failed" in msg
230
+ assert "LOGHUNTER_SPLUNK_USER" in msg
231
+
232
+
233
+ def test_fetch_formats_splunk_connection_error(monkeypatch: pytest.MonkeyPatch) -> None:
234
+ import loghunter.exporters.splunk as splunk_module
235
+
236
+ class FakeClient:
237
+ @staticmethod
238
+ def connect(**_kwargs):
239
+ raise OSError("connection refused")
240
+
241
+ monkeypatch.setattr(splunk_module, "splunk_client", FakeClient)
242
+ since = datetime(2026, 5, 29, 0, 0, 0)
243
+ until = datetime(2026, 5, 29, 1, 0, 0)
244
+
245
+ with pytest.raises(ValueError) as exc_info:
246
+ splunk_module.fetch(
247
+ {"spl": "search *"},
248
+ {"host": "192.0.2.20", "port": 8089, "username": "u", "password": "p"},
249
+ since,
250
+ until,
251
+ False,
252
+ )
253
+
254
+ msg = str(exc_info.value)
255
+ assert "Could not connect to Splunk management API" in msg
256
+ assert "192.0.2.20:8089" in msg
257
+
258
+
259
+ def test_default_splunk_export_dir_is_global_default(
260
+ monkeypatch: pytest.MonkeyPatch,
261
+ tmp_path: Path,
262
+ ) -> None:
263
+ """No shipped Splunk query — user must define one. The cascade still
264
+ resolves an empty / synthetic query against the shipped global export_dir
265
+ (tier 4: ~/.loghunter/exports), which auto-segments per source."""
266
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [tmp_path / "missing.toml"])
267
+ config = cfg.load(config_file=None)
268
+ user_query = {"output_basename": "syslog"} # user-defined query — minimum shape
269
+ since = datetime(2026, 5, 30, 0, 0, 0)
270
+ until = datetime(2026, 5, 31, 0, 0, 0)
271
+
272
+ result = _resolve_output_path(
273
+ user_query, None, since, until, "default",
274
+ backend_config=config["export"]["splunk"],
275
+ loghunter_config=config["loghunter"],
276
+ root=effective_root(config),
277
+ )
278
+
279
+ assert result.parent == Path("~/.loghunter/exports/syslog").expanduser()
280
+
281
+
282
+ # ── query resolution ──────────────────────────────────────────────────────────
283
+
284
+
285
+ def _make_config(queries: dict) -> dict:
286
+ return {"export": {"splunk": {"host": "192.0.2.20", "port": 8089, "query": queries}}}
287
+
288
+
289
+ def test_query_resolution_default() -> None:
290
+ config = _make_config({"default": {"spl": "search *"}})
291
+ result = _resolve_queries(config, "splunk", [])
292
+ assert result == [("default", {"spl": "search *"})]
293
+
294
+
295
+ def test_query_resolution_single() -> None:
296
+ config = _make_config({"myquery": {"spl": "search index=main"}})
297
+ result = _resolve_queries(config, "splunk", [])
298
+ assert result == [("myquery", {"spl": "search index=main"})]
299
+
300
+
301
+ def test_query_resolution_ambiguous() -> None:
302
+ config = _make_config({"alpha": {"spl": "search a"}, "beta": {"spl": "search b"}})
303
+ with pytest.raises(ValueError) as exc_info:
304
+ _resolve_queries(config, "splunk", [])
305
+ msg = str(exc_info.value)
306
+ assert "alpha" in msg
307
+ assert "beta" in msg
308
+
309
+
310
+ def test_query_resolution_explicit() -> None:
311
+ config = _make_config({"alpha": {"spl": "search a"}, "beta": {"spl": "search b"}})
312
+ result = _resolve_queries(config, "splunk", ["beta"])
313
+ assert result == [("beta", {"spl": "search b"})]
314
+
315
+
316
+ def test_query_resolution_missing() -> None:
317
+ config = _make_config({"alpha": {"spl": "search a"}})
318
+ with pytest.raises(ValueError, match="noexist"):
319
+ _resolve_queries(config, "splunk", ["noexist"])
320
+
321
+
322
+ # ── output path resolution ────────────────────────────────────────────────────
323
+
324
+
325
+ def test_output_autoname_single_day(tmp_path: Path) -> None:
326
+ """cli_out is now a string; tmp_path exists -> Step 2 DIRECTORY verdict."""
327
+ since = datetime(2026, 5, 30, 0, 0, 0)
328
+ until = datetime(2026, 5, 31, 0, 0, 0) # exactly 1 day
329
+ query_cfg = {"output_basename": "syslog"}
330
+ result = _resolve_output_path(query_cfg, str(tmp_path), since, until, "default")
331
+ assert result.name == "syslog_20260530_1d.log"
332
+ assert result.parent == tmp_path
333
+
334
+
335
+ def test_output_autoname_multi_day(tmp_path: Path) -> None:
336
+ since = datetime(2026, 5, 24, 0, 0, 0)
337
+ until = datetime(2026, 5, 31, 0, 0, 0) # exactly 7 days
338
+ query_cfg = {"output_basename": "syslog"}
339
+ result = _resolve_output_path(query_cfg, str(tmp_path), since, until, "default")
340
+ assert result.name == "syslog_20260524_7d.log"
341
+ assert result.parent == tmp_path
342
+
343
+
344
+ def test_output_explicit_path(tmp_path: Path) -> None:
345
+ """A non-existent path with no trailing slash -> Step 3 FILE verdict."""
346
+ since = datetime(2026, 5, 30, 0, 0, 0)
347
+ until = datetime(2026, 5, 31, 0, 0, 0)
348
+ explicit = tmp_path / "myfile.log"
349
+ assert not explicit.exists()
350
+ result = _resolve_output_path({}, str(explicit), since, until, "default")
351
+ assert result == explicit