loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,491 @@
1
+ """Tests for the duration detector.
2
+
3
+ All IP addresses use RFC 5737 documentation space:
4
+ 192.0.2.x, 198.51.100.x, 203.0.113.x
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import io
10
+ import unittest
11
+ from datetime import datetime, timezone
12
+
13
+ import pandas as pd
14
+
15
+ from loghunter.common.finding import DetectorContext, Finding, RunSummary, Severity
16
+ from loghunter.detectors.duration import (
17
+ DETECTOR_NAME,
18
+ STATUS,
19
+ _duration_str,
20
+ run,
21
+ )
22
+ from loghunter.outputs.text import TextHandler
23
+ from loghunter.runner import discover_detectors
24
+
25
+
26
+ # ── Helpers ───────────────────────────────────────────────────────────────────
27
+
28
+ _NOW = datetime(2026, 5, 30, tzinfo=timezone.utc)
29
+ _WINDOW = (_NOW, _NOW)
30
+
31
+
32
+ def _ctx(
33
+ df: pd.DataFrame | None,
34
+ cfg: dict | None = None,
35
+ ) -> DetectorContext:
36
+ logs = {"conn*.log*": df} if df is not None else {}
37
+ return DetectorContext(
38
+ logs=logs,
39
+ config=cfg or {},
40
+ allowlist=None,
41
+ data_window=_WINDOW,
42
+ )
43
+
44
+
45
+ def _conn_row(
46
+ src: str = "192.0.2.10",
47
+ dst: str = "198.51.100.20",
48
+ port: int = 443,
49
+ proto: str = "tcp",
50
+ duration: float = 7200.0,
51
+ ts: float = 1_779_750_000.0,
52
+ **kwargs,
53
+ ) -> dict:
54
+ row = {
55
+ "src": src, "dst": dst, "port": port, "proto": proto,
56
+ "duration": duration, "ts": ts,
57
+ }
58
+ row.update(kwargs)
59
+ return row
60
+
61
+
62
+ def _minimal_finding() -> Finding:
63
+ return Finding(
64
+ detector="duration",
65
+ severity=Severity.MEDIUM,
66
+ title="192.0.2.10 → 198.51.100.20:443/tcp",
67
+ description="A long-lived connection.",
68
+ evidence={
69
+ "src": "192.0.2.10",
70
+ "dst": "198.51.100.20",
71
+ "port": 443,
72
+ "proto": "tcp",
73
+ "max_duration_seconds": 7200.0,
74
+ "max_duration_str": "2h 0m",
75
+ "connection_count": 1,
76
+ "total_bytes": None,
77
+ "avg_bytes_per_second": None,
78
+ "conn_states": [],
79
+ "first_seen": None,
80
+ "last_seen": None,
81
+ },
82
+ next_steps=["Review the connection."],
83
+ ts_generated=_NOW,
84
+ data_window=_WINDOW,
85
+ )
86
+
87
+
88
+ # ── Tests ─────────────────────────────────────────────────────────────────────
89
+
90
+ class DurationDetectorTests(unittest.TestCase):
91
+
92
+ # ── Discovery ─────────────────────────────────────────────────────────────
93
+
94
+ def test_duration_is_available_in_discover_detectors(self) -> None:
95
+ detectors = discover_detectors()
96
+ self.assertIn("duration", detectors)
97
+ self.assertEqual(getattr(detectors["duration"], "STATUS", None), "available")
98
+
99
+ def test_detector_name_and_status_constants(self) -> None:
100
+ self.assertEqual(DETECTOR_NAME, "duration")
101
+ self.assertEqual(STATUS, "available")
102
+
103
+ # ── Empty / missing input ─────────────────────────────────────────────────
104
+
105
+ def test_run_returns_empty_when_no_conn_key(self) -> None:
106
+ self.assertEqual(run(_ctx(None)), [])
107
+
108
+ def test_run_returns_empty_on_empty_dataframe(self) -> None:
109
+ empty = pd.DataFrame(columns=["src", "dst", "port", "proto", "ts", "duration"])
110
+ self.assertEqual(run(_ctx(empty)), [])
111
+
112
+ def test_run_returns_empty_when_duration_column_absent(self) -> None:
113
+ df = pd.DataFrame([{"src": "192.0.2.10", "dst": "198.51.100.20",
114
+ "port": 443, "proto": "tcp", "ts": 1_779_750_000.0}])
115
+ self.assertEqual(run(_ctx(df)), [])
116
+
117
+ def test_run_returns_empty_when_all_below_threshold(self) -> None:
118
+ df = pd.DataFrame([_conn_row(duration=299.0)])
119
+ self.assertEqual(run(_ctx(df, {"min_duration_seconds": 300})), [])
120
+
121
+ def test_run_returns_empty_when_all_nan(self) -> None:
122
+ df = pd.DataFrame([_conn_row(duration=float("nan"))])
123
+ self.assertEqual(run(_ctx(df)), [])
124
+
125
+ def test_run_returns_empty_when_all_zero(self) -> None:
126
+ df = pd.DataFrame([_conn_row(duration=0.0)])
127
+ self.assertEqual(run(_ctx(df)), [])
128
+
129
+ def test_run_returns_empty_when_all_negative(self) -> None:
130
+ df = pd.DataFrame([_conn_row(duration=-10.0)])
131
+ self.assertEqual(run(_ctx(df)), [])
132
+
133
+ # ── Core detection ────────────────────────────────────────────────────────
134
+
135
+ def test_medium_severity_at_7200s(self) -> None:
136
+ df = pd.DataFrame([_conn_row(duration=7200.0)])
137
+ findings = run(_ctx(df))
138
+ self.assertEqual(len(findings), 1)
139
+ self.assertEqual(findings[0].severity, Severity.MEDIUM)
140
+
141
+ def test_high_severity_at_14400s(self) -> None:
142
+ df = pd.DataFrame([_conn_row(duration=14400.0)])
143
+ findings = run(_ctx(df))
144
+ self.assertEqual(len(findings), 1)
145
+ self.assertEqual(findings[0].severity, Severity.HIGH)
146
+
147
+ def test_high_severity_above_14400s(self) -> None:
148
+ df = pd.DataFrame([_conn_row(duration=86400.0)])
149
+ findings = run(_ctx(df))
150
+ self.assertEqual(len(findings), 1)
151
+ self.assertEqual(findings[0].severity, Severity.HIGH)
152
+
153
+ def test_low_severity_at_301s_emitted(self) -> None:
154
+ df = pd.DataFrame([_conn_row(duration=301.0)])
155
+ findings = run(_ctx(df, {"min_duration_seconds": 300}))
156
+ self.assertEqual(len(findings), 1)
157
+ self.assertEqual(findings[0].severity, Severity.LOW)
158
+
159
+ def test_exactly_at_threshold_is_detected(self) -> None:
160
+ df = pd.DataFrame([_conn_row(duration=1800.0)])
161
+ # 1800s is LOW (< 7200) — under W6 the result set is verbosity-invariant,
162
+ # so LOW always emits; the text handler is responsible for hiding LOW at
163
+ # level 0.
164
+ findings = run(_ctx(df))
165
+ self.assertEqual(len(findings), 1)
166
+
167
+ def test_just_below_threshold_not_detected(self) -> None:
168
+ df = pd.DataFrame([_conn_row(duration=1799.9)])
169
+ self.assertEqual(run(_ctx(df)), [])
170
+
171
+ def test_multiple_findings_sorted_descending_by_max_duration(self) -> None:
172
+ df = pd.DataFrame([
173
+ _conn_row(src="192.0.2.10", duration=7200.0),
174
+ _conn_row(src="192.0.2.11", duration=14400.0),
175
+ _conn_row(src="192.0.2.12", duration=9000.0),
176
+ ])
177
+ findings = run(_ctx(df))
178
+ self.assertEqual(len(findings), 3)
179
+ durations = [f.evidence["max_duration_seconds"] for f in findings]
180
+ self.assertEqual(durations, sorted(durations, reverse=True))
181
+ self.assertEqual(findings[0].evidence["src"], "192.0.2.11")
182
+
183
+ def test_zero_duration_excluded_even_if_column_present(self) -> None:
184
+ df = pd.DataFrame([
185
+ _conn_row(src="192.0.2.10", duration=0.0),
186
+ _conn_row(src="192.0.2.11", duration=7200.0),
187
+ ])
188
+ findings = run(_ctx(df))
189
+ self.assertEqual(len(findings), 1)
190
+ self.assertEqual(findings[0].evidence["src"], "192.0.2.11")
191
+
192
+ def test_non_numeric_duration_values_tolerated(self) -> None:
193
+ df = pd.DataFrame([
194
+ _conn_row(src="192.0.2.10", duration="bad"),
195
+ _conn_row(src="192.0.2.11", duration=7200.0),
196
+ ])
197
+ findings = run(_ctx(df))
198
+ self.assertEqual(len(findings), 1)
199
+ self.assertEqual(findings[0].evidence["src"], "192.0.2.11")
200
+
201
+ # ── Grouping behavior ─────────────────────────────────────────────────────
202
+
203
+ def test_grouping_collapses_same_flow(self) -> None:
204
+ # Three rows for the same (src, dst, port, proto) → one finding
205
+ df = pd.DataFrame([
206
+ _conn_row(duration=7200.0, ts=1_779_750_000.0),
207
+ _conn_row(duration=9000.0, ts=1_779_750_100.0),
208
+ _conn_row(duration=7800.0, ts=1_779_750_200.0),
209
+ ])
210
+ findings = run(_ctx(df))
211
+ self.assertEqual(len(findings), 1)
212
+ self.assertEqual(findings[0].evidence["connection_count"], 3)
213
+ self.assertEqual(findings[0].evidence["max_duration_seconds"], 9000.0)
214
+
215
+ def test_floor_row_excluded_from_group(self) -> None:
216
+ # One row below the floor; only the two above it count
217
+ df = pd.DataFrame([
218
+ _conn_row(duration=7200.0),
219
+ _conn_row(duration=9000.0),
220
+ _conn_row(duration=500.0), # below default 1800s floor
221
+ ])
222
+ findings = run(_ctx(df))
223
+ self.assertEqual(len(findings), 1)
224
+ self.assertEqual(findings[0].evidence["connection_count"], 2)
225
+ self.assertEqual(findings[0].evidence["max_duration_seconds"], 9000.0)
226
+
227
+ def test_two_flows_produce_two_findings(self) -> None:
228
+ df = pd.DataFrame([
229
+ _conn_row(src="192.0.2.10", dst="198.51.100.1", port=443, duration=7200.0),
230
+ _conn_row(src="192.0.2.10", dst="198.51.100.2", port=443, duration=14400.0),
231
+ ])
232
+ findings = run(_ctx(df))
233
+ self.assertEqual(len(findings), 2)
234
+
235
+ # ── LOW severity: result-set verbosity invariance (W6) ────────────────────
236
+
237
+ def test_low_always_emitted_result_set_invariant(self) -> None:
238
+ """W6: duration.run() emits LOW findings regardless of verbosity. The
239
+ result set is invariant across verbose levels; the text handler is the
240
+ sole authority on hiding LOW at level 0 (W2 pipeline step 1)."""
241
+ # 2000s is LOW (< 7200) but above the 1800s floor.
242
+ df = pd.DataFrame([_conn_row(duration=2000.0)])
243
+ findings = run(_ctx(df))
244
+ self.assertEqual(len(findings), 1)
245
+ self.assertEqual(findings[0].severity, Severity.LOW)
246
+
247
+ # ── Evidence fields ───────────────────────────────────────────────────────
248
+
249
+ def test_max_duration_seconds_is_rounded_float(self) -> None:
250
+ df = pd.DataFrame([_conn_row(duration=7200.123456)])
251
+ f = run(_ctx(df))[0]
252
+ self.assertEqual(f.evidence["max_duration_seconds"], 7200.1)
253
+
254
+ def test_max_duration_str_present_and_non_empty(self) -> None:
255
+ df = pd.DataFrame([_conn_row(duration=7200.0)])
256
+ f = run(_ctx(df))[0]
257
+ self.assertIsInstance(f.evidence["max_duration_str"], str)
258
+ self.assertTrue(f.evidence["max_duration_str"])
259
+
260
+ def test_src_dst_port_proto_present(self) -> None:
261
+ df = pd.DataFrame([_conn_row(
262
+ src="192.0.2.10", dst="198.51.100.20", port=443, proto="tcp", duration=7200.0
263
+ )])
264
+ f = run(_ctx(df))[0]
265
+ self.assertEqual(f.evidence["src"], "192.0.2.10")
266
+ self.assertEqual(f.evidence["dst"], "198.51.100.20")
267
+ self.assertEqual(f.evidence["port"], 443)
268
+ self.assertEqual(f.evidence["proto"], "tcp")
269
+
270
+ def test_avg_bps_none_when_bytes_null(self) -> None:
271
+ df = pd.DataFrame([_conn_row(duration=7200.0)])
272
+ f = run(_ctx(df))[0]
273
+ self.assertIsNone(f.evidence["avg_bytes_per_second"])
274
+
275
+ def test_avg_bps_computed_when_bytes_present(self) -> None:
276
+ df = pd.DataFrame([_conn_row(duration=7200.0, bytes=720000)])
277
+ f = run(_ctx(df))[0]
278
+ self.assertIsNotNone(f.evidence["avg_bytes_per_second"])
279
+ self.assertAlmostEqual(f.evidence["avg_bytes_per_second"], 100.0, places=1)
280
+
281
+ def test_avg_bps_from_max_duration_row(self) -> None:
282
+ # Row 1: max duration 9000s, bytes 90000 → bps 10.0
283
+ # Row 2: shorter duration 7200s, bytes 720000 (higher bytes, shorter duration)
284
+ # avg_bps must use the max-duration row: 90000 / 9000 = 10.0
285
+ df = pd.DataFrame([
286
+ _conn_row(duration=9000.0, bytes=90000),
287
+ _conn_row(duration=7200.0, bytes=720000),
288
+ ])
289
+ findings = run(_ctx(df))
290
+ self.assertEqual(len(findings), 1)
291
+ self.assertAlmostEqual(findings[0].evidence["avg_bytes_per_second"], 10.0, places=1)
292
+
293
+ def test_avg_bps_none_when_column_absent(self) -> None:
294
+ df = pd.DataFrame([_conn_row(duration=7200.0)])
295
+ self.assertNotIn("bytes", df.columns)
296
+ f = run(_ctx(df))[0]
297
+ self.assertIsNone(f.evidence["avg_bytes_per_second"])
298
+
299
+ def test_total_bytes_none_when_all_null(self) -> None:
300
+ df = pd.DataFrame([
301
+ _conn_row(duration=7200.0, bytes=None),
302
+ _conn_row(duration=7800.0, bytes=float("nan")),
303
+ ])
304
+ findings = run(_ctx(df))
305
+ self.assertIsNone(findings[0].evidence["total_bytes"])
306
+
307
+ def test_total_bytes_none_when_column_absent(self) -> None:
308
+ df = pd.DataFrame([_conn_row(duration=7200.0)])
309
+ self.assertNotIn("bytes", df.columns)
310
+ f = run(_ctx(df))[0]
311
+ self.assertIsNone(f.evidence["total_bytes"])
312
+
313
+ def test_conn_states_when_single_state_present(self) -> None:
314
+ df = pd.DataFrame([_conn_row(duration=7200.0, conn_state="SF")])
315
+ f = run(_ctx(df))[0]
316
+ self.assertEqual(f.evidence["conn_states"], ["SF"])
317
+
318
+ def test_conn_states_distinct_sorted(self) -> None:
319
+ # Repeated and null states — expect sorted unique non-null list
320
+ df = pd.DataFrame([
321
+ _conn_row(duration=8000.0, conn_state="SF"),
322
+ _conn_row(duration=7800.0, conn_state="RSTO"),
323
+ _conn_row(duration=7200.0, conn_state="SF"), # duplicate
324
+ _conn_row(duration=7500.0, conn_state=None), # null, excluded
325
+ ])
326
+ findings = run(_ctx(df))
327
+ self.assertEqual(findings[0].evidence["conn_states"], ["RSTO", "SF"])
328
+
329
+ def test_conn_states_empty_list_when_column_absent(self) -> None:
330
+ df = pd.DataFrame([_conn_row(duration=7200.0)])
331
+ self.assertNotIn("conn_state", df.columns)
332
+ f = run(_ctx(df))[0]
333
+ self.assertEqual(f.evidence["conn_states"], [])
334
+
335
+ # ── Finding contract ──────────────────────────────────────────────────────
336
+
337
+ def test_title_contains_src_and_dst_port(self) -> None:
338
+ df = pd.DataFrame([_conn_row(
339
+ src="192.0.2.10", dst="198.51.100.20", port=443, proto="tcp", duration=7200.0
340
+ )])
341
+ f = run(_ctx(df))[0]
342
+ self.assertIn("192.0.2.10", f.title)
343
+ self.assertIn("198.51.100.20", f.title)
344
+ self.assertIn("443", f.title)
345
+
346
+ def test_title_does_not_contain_duration_value(self) -> None:
347
+ df = pd.DataFrame([_conn_row(duration=7200.0)])
348
+ f = run(_ctx(df))[0]
349
+ self.assertNotIn("7200", f.title)
350
+ self.assertNotIn("2h", f.title)
351
+
352
+ def test_detector_field_is_duration(self) -> None:
353
+ df = pd.DataFrame([_conn_row(duration=7200.0)])
354
+ f = run(_ctx(df))[0]
355
+ self.assertEqual(f.detector, "duration")
356
+
357
+ def test_next_steps_non_empty(self) -> None:
358
+ df = pd.DataFrame([_conn_row(duration=7200.0)])
359
+ f = run(_ctx(df))[0]
360
+ self.assertIsInstance(f.next_steps, list)
361
+ self.assertGreater(len(f.next_steps), 0)
362
+
363
+ # ── _duration_str helper ──────────────────────────────────────────────────
364
+
365
+ def test_duration_str_seconds(self) -> None:
366
+ self.assertEqual(_duration_str(47.0), "47s")
367
+
368
+ def test_duration_str_minutes(self) -> None:
369
+ self.assertEqual(_duration_str(872.0), "14m 32s")
370
+
371
+ def test_duration_str_hours(self) -> None:
372
+ self.assertEqual(_duration_str(15780.0), "4h 23m")
373
+
374
+ def test_duration_str_days(self) -> None:
375
+ self.assertEqual(_duration_str(93600.0), "1d 2h")
376
+
377
+ # ── Text renderer ─────────────────────────────────────────────────────────
378
+
379
+ def test_render_duration_group_no_exception(self) -> None:
380
+ summary = RunSummary(
381
+ data_window=_WINDOW,
382
+ record_counts={"conn*.log*": 1},
383
+ data_size_bytes=0,
384
+ detectors_run=["duration"],
385
+ detectors_skipped={},
386
+ )
387
+ stream = io.StringIO()
388
+ handler = TextHandler(stream=stream, verbose_level=0)
389
+ handler.begin(summary)
390
+ handler.write([_minimal_finding()])
391
+ handler.end()
392
+ self.assertTrue(len(stream.getvalue()) > 0)
393
+
394
+ def test_render_output_contains_key_tokens(self) -> None:
395
+ df = pd.DataFrame([_conn_row(
396
+ src="192.0.2.10", dst="198.51.100.20", port=443, proto="tcp", duration=7200.0
397
+ )])
398
+ findings = run(_ctx(df))
399
+ summary = RunSummary(
400
+ data_window=_WINDOW, record_counts={}, data_size_bytes=0,
401
+ detectors_run=["duration"], detectors_skipped={},
402
+ )
403
+ stream = io.StringIO()
404
+ handler = TextHandler(stream=stream, verbose_level=0)
405
+ handler.begin(summary)
406
+ handler.write(findings)
407
+ handler.end()
408
+ output = stream.getvalue()
409
+ self.assertIn("192.0.2.10", output)
410
+ self.assertIn("198.51.100.20", output)
411
+ self.assertIn("2h 0m", output)
412
+
413
+ def test_verbose_mode_emits_evidence_and_next_steps(self) -> None:
414
+ summary = RunSummary(
415
+ data_window=_WINDOW, record_counts={}, data_size_bytes=0,
416
+ detectors_run=["duration"], detectors_skipped={},
417
+ )
418
+ stream = io.StringIO()
419
+ handler = TextHandler(stream=stream, verbose_level=1)
420
+ handler.begin(summary)
421
+ handler.write([_minimal_finding()])
422
+ handler.end()
423
+ output = stream.getvalue()
424
+ self.assertIn("evidence:", output)
425
+ self.assertIn("next steps:", output)
426
+ self.assertIn("data window:", output)
427
+
428
+ def test_render_conns_column_pluralization(self) -> None:
429
+ # Two rows with the same flow tuple → grouped → "2 conns"
430
+ df = pd.DataFrame([
431
+ _conn_row(duration=7200.0, ts=1_779_750_000.0),
432
+ _conn_row(duration=7800.0, ts=1_779_750_100.0),
433
+ ])
434
+ findings = run(_ctx(df))
435
+ self.assertEqual(len(findings), 1)
436
+ summary = RunSummary(
437
+ data_window=_WINDOW, record_counts={}, data_size_bytes=0,
438
+ detectors_run=["duration"], detectors_skipped={},
439
+ )
440
+ stream = io.StringIO()
441
+ handler = TextHandler(stream=stream, verbose_level=0)
442
+ handler.begin(summary)
443
+ handler.write(findings)
444
+ handler.end()
445
+ self.assertIn("conns", stream.getvalue())
446
+
447
+ def test_render_single_conn_uses_singular(self) -> None:
448
+ df = pd.DataFrame([_conn_row(duration=7200.0)])
449
+ findings = run(_ctx(df))
450
+ summary = RunSummary(
451
+ data_window=_WINDOW, record_counts={}, data_size_bytes=0,
452
+ detectors_run=["duration"], detectors_skipped={},
453
+ )
454
+ stream = io.StringIO()
455
+ handler = TextHandler(stream=stream, verbose_level=0)
456
+ handler.begin(summary)
457
+ handler.write(findings)
458
+ handler.end()
459
+ self.assertIn("1 conn", stream.getvalue())
460
+
461
+ def test_arrow_alignment_across_multiple_findings(self) -> None:
462
+ """All → arrows must appear at the same column offset."""
463
+ df = pd.DataFrame([
464
+ _conn_row(src="192.0.2.10", dst="198.51.100.1", port=443, duration=14400.0),
465
+ _conn_row(src="192.0.2.200", dst="203.0.113.5", port=22, duration=7200.0),
466
+ _conn_row(src="192.0.2.1", dst="198.51.100.200", port=9997, duration=7201.0),
467
+ ])
468
+ findings = run(_ctx(df))
469
+ self.assertEqual(len(findings), 3)
470
+
471
+ summary = RunSummary(
472
+ data_window=_WINDOW, record_counts={}, data_size_bytes=0,
473
+ detectors_run=["duration"], detectors_skipped={},
474
+ )
475
+ stream = io.StringIO()
476
+ handler = TextHandler(stream=stream, verbose_level=0)
477
+ handler.begin(summary)
478
+ handler.write(findings)
479
+ handler.end()
480
+
481
+ output_lines = stream.getvalue().splitlines()
482
+ finding_lines = [line for line in output_lines if line.lstrip().startswith("[")]
483
+ arrow_positions = [line.index("→") for line in finding_lines if "→" in line]
484
+ self.assertEqual(len(arrow_positions), 3)
485
+ self.assertEqual(len(set(arrow_positions)), 1, (
486
+ f"→ arrows not aligned — positions: {arrow_positions}"
487
+ ))
488
+
489
+
490
+ if __name__ == "__main__":
491
+ unittest.main()
@@ -0,0 +1,153 @@
1
+ """Export orchestrator — new [export.<backend>] config shape coverage.
2
+
3
+ Glenn's amendment: the previous draft missed the fetch seam where
4
+ ``run_export`` reads ``config[resolved_backend]`` at lines 155 and 165. A
5
+ stub-backend test that drives the actual ``run_export`` exposes this — it
6
+ KeyErrors today if any site still reads the top-level key.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import sys
12
+ from datetime import datetime
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import pytest
17
+
18
+ from loghunter import exporters
19
+ from loghunter.exporters import run_export
20
+
21
+
22
+ # ── backend selection reads config["export"][name], not top-level ────────────
23
+
24
+
25
+ def test_backend_selection_reads_from_export_namespace(
26
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
27
+ ) -> None:
28
+ """A config with [splunk] at the TOP level must NOT auto-select splunk —
29
+ the new clean-break shape requires [export.splunk]."""
30
+ config = {
31
+ "loghunter": {"export_dir": str(tmp_path)},
32
+ # WRONG shape — top-level [splunk]. Must NOT activate.
33
+ "splunk": {"host": "192.0.2.20", "port": 8089,
34
+ "query": {"default": {"spl": "x"}}},
35
+ }
36
+ with pytest.raises(ValueError, match=r"No export backend configured"):
37
+ run_export(
38
+ config=config, backend=None, query_names=[],
39
+ since=datetime(2026, 6, 1), until=datetime(2026, 6, 2),
40
+ out=None, verbose=False,
41
+ )
42
+
43
+
44
+ def test_backend_selection_from_export_namespace_succeeds(
45
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
46
+ ) -> None:
47
+ config = {
48
+ "loghunter": {"export_dir": str(tmp_path)},
49
+ "export": {"splunk": {"host": "192.0.2.20", "port": 8089,
50
+ "query": {"default": {"spl": "x"}}}},
51
+ }
52
+ # Stub fetch / write so no real Splunk call happens.
53
+ from loghunter.exporters import splunk as splunk_module
54
+ monkeypatch.setattr(
55
+ splunk_module, "fetch",
56
+ lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
57
+ )
58
+ monkeypatch.setattr(splunk_module, "write", lambda rows, outpath, verbose: (0, {"bytes": 0, "paths": [outpath]}))
59
+ # Should auto-select splunk and not raise.
60
+ run_export(
61
+ config=config, backend=None, query_names=[],
62
+ since=datetime(2026, 6, 1), until=datetime(2026, 6, 2),
63
+ out=None, verbose=False,
64
+ )
65
+
66
+
67
+ # ── run_export fetch-seam — stub backend, verify what gets passed in ────────
68
+
69
+
70
+ class _StubBackend:
71
+ """Module-shaped stub: exposes the four duck-typed callables run_export
72
+ needs. captured = the kwargs each was called with."""
73
+
74
+ captured: dict[str, Any] = {}
75
+
76
+ @staticmethod
77
+ def is_configured(backend_cfg: dict) -> bool:
78
+ return bool(backend_cfg.get("host", "").strip())
79
+
80
+ @staticmethod
81
+ def summary_descriptor(backend_cfg: dict) -> str:
82
+ return backend_cfg.get("host", "")
83
+
84
+ @staticmethod
85
+ def fetch(query_config, backend_config, since, until, verbose, *, skip_confirm=False):
86
+ # Capture the backend_config the orchestrator hands us — this is the
87
+ # seam Glenn's amendment guards. Pre-fix, this would have arrived as
88
+ # config["splunk"] (top-level) and the new clean-break config has no
89
+ # such key → KeyError or empty dict.
90
+ _StubBackend.captured["backend_config"] = backend_config
91
+ _StubBackend.captured["query_config"] = query_config
92
+ return ([], {"units": 0, "unit_label": "chunks"})
93
+
94
+ @staticmethod
95
+ def write(rows, outpath, verbose):
96
+ _StubBackend.captured["outpath"] = outpath
97
+ return 0, {"bytes": 0, "paths": [outpath]}
98
+
99
+
100
+ def test_run_export_fetch_receives_export_namespace_backend_config(
101
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
102
+ ) -> None:
103
+ """Drive the actual run_export call. The fetch seam MUST receive the
104
+ [export.<stub>] stanza dict, not the empty top-level config['<stub>']."""
105
+ _StubBackend.captured = {}
106
+ # Register the stub under the existing splunk slot via monkeypatch on the
107
+ # loaded-module cache: the orchestrator does importlib on a name in
108
+ # _KNOWN_BACKENDS, then is_configured / fetch / write on that module.
109
+ monkeypatch.setattr(exporters, "_load_backend", lambda name: _StubBackend)
110
+ monkeypatch.setattr(exporters, "_KNOWN_BACKENDS", ("splunk",))
111
+
112
+ config = {
113
+ "loghunter": {"export_dir": str(tmp_path)},
114
+ "export": {"splunk": {
115
+ "host": "192.0.2.20",
116
+ "port": 8089,
117
+ "query": {"default": {"spl": "search *", "output_basename": "syslog"}},
118
+ }},
119
+ # Decoy: top-level key with junk. Pre-fix code would have read THIS.
120
+ "splunk": {"host": "BOGUS-do-not-use", "query": {}},
121
+ }
122
+ run_export(
123
+ config=config, backend="splunk", query_names=[],
124
+ since=datetime(2026, 6, 1), until=datetime(2026, 6, 2),
125
+ out=None, verbose=False,
126
+ )
127
+
128
+ backend_cfg = _StubBackend.captured["backend_config"]
129
+ assert backend_cfg.get("host") == "192.0.2.20"
130
+ assert backend_cfg.get("host") != "BOGUS-do-not-use"
131
+
132
+
133
+ # ── Splunk no-query under [export.splunk] → actionable ValueError ────────────
134
+
135
+
136
+ def test_splunk_no_query_under_export_namespace_raises_actionable(
137
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
138
+ ) -> None:
139
+ """No shipped default query. [export.splunk] with host set but no query
140
+ stanza must raise a ValueError naming [export.splunk.query.<name>]."""
141
+ config = {
142
+ "loghunter": {"export_dir": str(tmp_path)},
143
+ "export": {"splunk": {"host": "192.0.2.20", "port": 8089}},
144
+ # NO query.* — bare loghunter export must surface an actionable error.
145
+ }
146
+ with pytest.raises(ValueError) as exc_info:
147
+ run_export(
148
+ config=config, backend=None, query_names=[],
149
+ since=datetime(2026, 6, 1), until=datetime(2026, 6, 2),
150
+ out=None, verbose=False,
151
+ )
152
+ msg = str(exc_info.value)
153
+ assert "[export.splunk.query." in msg