loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,455 @@
1
+ """Focused tests for the scan detector migration.
2
+
3
+ All IP addresses use RFC 5737 documentation space:
4
+ 192.0.2.x, 198.51.100.x, 203.0.113.x
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import io
10
+ import sys
11
+ import time
12
+ import unittest
13
+ from datetime import datetime, timezone
14
+
15
+ import pandas as pd
16
+
17
+ from loghunter.common.finding import DetectorContext, Finding, RunSummary, Severity
18
+ from loghunter.detectors.scan import (
19
+ DETECTOR_NAME,
20
+ STATUS,
21
+ _classify_direction,
22
+ _make_finding,
23
+ _zone_of,
24
+ run,
25
+ )
26
+ from loghunter.outputs.text import TextHandler
27
+ from loghunter.runner import discover_detectors
28
+
29
+
30
+ # ── Helpers ───────────────────────────────────────────────────────────────────
31
+
32
+ _NOW = datetime(2026, 5, 30, tzinfo=timezone.utc)
33
+ _WINDOW = (_NOW, _NOW)
34
+
35
+ # Matches the shipped [loghunter].home_net default. Used here only so a
36
+ # helper-built context behaves identically to a runner-built context for the
37
+ # detector's RFC 5737 doc-space traffic fixtures — those addresses are outside
38
+ # RFC1918 and read as external→external, matching pre-refactor behavior.
39
+ _RFC1918_HOME_NET = ["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"]
40
+
41
+
42
+ def _ctx(
43
+ df: pd.DataFrame | None,
44
+ cfg: dict | None = None,
45
+ home_net: list[str] | None = None,
46
+ ) -> DetectorContext:
47
+ """Build a DetectorContext for scan tests.
48
+
49
+ home_net distinguishes None (apply RFC1918 default — same as runner supply
50
+ path with no operator override) from [] (intentionally empty, to exercise
51
+ the scan module's standalone-callable fallback constant). Tests that need
52
+ a specific zone layout pass home_net explicitly.
53
+ """
54
+ logs = {"conn*.log*": df} if df is not None else {}
55
+ return DetectorContext(
56
+ logs=logs,
57
+ config=cfg or {},
58
+ allowlist=None,
59
+ data_window=_WINDOW,
60
+ home_net=_RFC1918_HOME_NET if home_net is None else home_net,
61
+ )
62
+
63
+
64
+ def _base_row(scan_type: str, src: str = "192.0.2.1") -> dict:
65
+ """Minimal classified result dict suitable for _make_finding."""
66
+ return {
67
+ "scan_type" : scan_type,
68
+ "src" : src,
69
+ "dst" : "198.51.100.1" if scan_type == "vertical" else None,
70
+ "port" : 22 if scan_type == "horizontal" else None,
71
+ "distinct_ports" : 20,
72
+ "distinct_hosts" : 20,
73
+ "total_conns" : 100,
74
+ "scan_state_ratio" : 0.70,
75
+ "top_states" : "S0, REJ",
76
+ "direction" : "internal→external",
77
+ "pattern_tag" : "confirmed_scan",
78
+ "pattern_notes" : "Strong scanner signature.",
79
+ "window_start" : "2026-05-30 00:00:00",
80
+ "window_secs" : 3600,
81
+ "active_buckets" : 5 if scan_type == "slow" else None,
82
+ "temporal_spread_score": 2.5 if scan_type == "slow" else None,
83
+ "max_ports_in_bucket" : 4 if scan_type == "slow" else None,
84
+ "_severity" : Severity.HIGH,
85
+ }
86
+
87
+
88
+ def _conn_df(
89
+ src: str,
90
+ dst: str | None,
91
+ ports: list[int],
92
+ dsts: list[str] | None,
93
+ conn_state: str,
94
+ proto: str = "tcp",
95
+ base_ts: float = 1_748_563_200.0,
96
+ spacing: float = 60.0,
97
+ ) -> pd.DataFrame:
98
+ """Build a canonical-schema DataFrame for detector input."""
99
+ rows = []
100
+ if dsts is not None:
101
+ # horizontal — one port, many hosts
102
+ p = ports[0]
103
+ for i, d in enumerate(dsts):
104
+ rows.append({
105
+ "src" : src,
106
+ "dst" : d,
107
+ "port" : p,
108
+ "proto" : proto,
109
+ "ts" : base_ts + i * spacing,
110
+ "conn_state": conn_state,
111
+ })
112
+ else:
113
+ # vertical — one host, many ports
114
+ for i, p in enumerate(ports):
115
+ rows.append({
116
+ "src" : src,
117
+ "dst" : dst,
118
+ "port" : p,
119
+ "proto" : proto,
120
+ "ts" : base_ts + i * spacing,
121
+ "conn_state": conn_state,
122
+ })
123
+ return pd.DataFrame(rows)
124
+
125
+
126
+ # ── Tests ─────────────────────────────────────────────────────────────────────
127
+
128
+ class ScanDetectorTests(unittest.TestCase):
129
+
130
+ # ── Discovery ─────────────────────────────────────────────────────────────
131
+
132
+ def test_scan_is_available_in_discover_detectors(self) -> None:
133
+ detectors = discover_detectors()
134
+ self.assertIn("scan", detectors)
135
+ self.assertEqual(getattr(detectors["scan"], "STATUS", None), "available")
136
+
137
+ def test_detector_name_constant(self) -> None:
138
+ self.assertEqual(DETECTOR_NAME, "scan")
139
+ self.assertEqual(STATUS, "available")
140
+
141
+ # ── Empty / no input ──────────────────────────────────────────────────────
142
+
143
+ def test_run_returns_empty_on_no_logs(self) -> None:
144
+ self.assertEqual(run(_ctx(None)), [])
145
+
146
+ def test_run_returns_empty_on_empty_dataframe(self) -> None:
147
+ empty = pd.DataFrame(columns=["src", "dst", "port", "proto", "ts", "conn_state"])
148
+ self.assertEqual(run(_ctx(empty)), [])
149
+
150
+ def test_icmp_rows_excluded(self) -> None:
151
+ df = _conn_df("192.0.2.1", "198.51.100.1",
152
+ ports=list(range(1, 30)), dsts=None,
153
+ conn_state="S0", proto="icmp")
154
+ self.assertEqual(run(_ctx(df)), [])
155
+
156
+ def test_malformed_but_loadable_rows_do_not_crash(self) -> None:
157
+ """Missing nullable-ish fields and string ports should be normalized safely."""
158
+ rows = []
159
+ for i, port in enumerate(range(1, 25)):
160
+ rows.append({
161
+ "src" : "192.0.2.1",
162
+ "dst" : None if i == 0 else "198.51.100.1",
163
+ "port": str(port),
164
+ "ts" : 1_748_563_200.0 + i,
165
+ })
166
+ findings = run(_ctx(pd.DataFrame(rows), {"vertical_threshold": 15}))
167
+
168
+ self.assertIsInstance(findings, list)
169
+
170
+ def test_missing_port_column_returns_no_findings(self) -> None:
171
+ rows = []
172
+ for i in range(20):
173
+ rows.append({
174
+ "src" : "192.0.2.1",
175
+ "dst" : "198.51.100.1",
176
+ "proto" : "tcp",
177
+ "ts" : 1_748_563_200.0 + i,
178
+ "conn_state": "S0",
179
+ })
180
+ self.assertEqual(run(_ctx(pd.DataFrame(rows))), [])
181
+
182
+ def test_missing_ts_column_returns_no_findings(self) -> None:
183
+ rows = []
184
+ for port in range(1, 25):
185
+ rows.append({
186
+ "src" : "192.0.2.1",
187
+ "dst" : "198.51.100.1",
188
+ "port" : port,
189
+ "proto" : "tcp",
190
+ "conn_state": "S0",
191
+ })
192
+ self.assertEqual(run(_ctx(pd.DataFrame(rows))), [])
193
+
194
+ # ── Vertical scan ─────────────────────────────────────────────────────────
195
+
196
+ def test_vertical_scan_detected(self) -> None:
197
+ ports = list(range(1, 26)) # 25 distinct ports > threshold 15
198
+ df = _conn_df(
199
+ src="192.0.2.1",
200
+ dst="198.51.100.1",
201
+ ports=ports,
202
+ dsts=None,
203
+ conn_state="S0",
204
+ spacing=30.0,
205
+ )
206
+ findings = run(_ctx(df, {"vertical_threshold": 15}))
207
+ scan_findings = [f for f in findings if f.evidence["scan_type"] == "vertical"]
208
+ self.assertTrue(len(scan_findings) >= 1, "Expected at least one vertical finding")
209
+ self.assertIn(scan_findings[0].severity, (Severity.HIGH, Severity.MEDIUM))
210
+
211
+ def test_vertical_below_threshold_not_flagged(self) -> None:
212
+ ports = list(range(1, 10)) # 9 ports < threshold 15
213
+ df = _conn_df("192.0.2.1", "198.51.100.1",
214
+ ports=ports, dsts=None, conn_state="S0")
215
+ findings = run(_ctx(df, {"vertical_threshold": 15}))
216
+ vertical = [f for f in findings if f.evidence["scan_type"] == "vertical"]
217
+ self.assertEqual(vertical, [])
218
+
219
+ # ── Horizontal scan ───────────────────────────────────────────────────────
220
+
221
+ def test_horizontal_scan_detected(self) -> None:
222
+ dsts = [f"198.51.100.{i}" for i in range(1, 31)] # 30 distinct hosts
223
+ df = _conn_df(
224
+ src="192.0.2.1",
225
+ dst=None,
226
+ ports=[22],
227
+ dsts=dsts,
228
+ conn_state="REJ",
229
+ spacing=10.0,
230
+ )
231
+ findings = run(_ctx(df, {"horizontal_threshold": 15}))
232
+ horiz = [f for f in findings if f.evidence["scan_type"] == "horizontal"]
233
+ self.assertTrue(len(horiz) >= 1, "Expected at least one horizontal finding")
234
+ self.assertEqual(horiz[0].evidence["port"], 22)
235
+
236
+ # ── Block scan ────────────────────────────────────────────────────────────
237
+
238
+ def test_block_scan_detected(self) -> None:
239
+ # 25 ports × 25 hosts, all S0 → scan_state_ratio = 1.0
240
+ rows = []
241
+ base_ts = 1_748_563_200.0
242
+ for i, port in enumerate(range(1, 26)):
243
+ for j, host_n in enumerate(range(1, 26)):
244
+ rows.append({
245
+ "src" : "192.0.2.1",
246
+ "dst" : f"198.51.100.{host_n}",
247
+ "port" : port,
248
+ "proto" : "tcp",
249
+ "ts" : base_ts + (i * 25 + j),
250
+ "conn_state": "S0",
251
+ })
252
+ df = pd.DataFrame(rows)
253
+ findings = run(_ctx(df, {
254
+ "block_port_threshold": 20,
255
+ "block_host_threshold": 20,
256
+ "block_state_min" : 0.30,
257
+ }))
258
+ block = [f for f in findings if f.evidence["scan_type"] == "block"]
259
+ self.assertTrue(len(block) >= 1, "Expected at least one block finding")
260
+
261
+ # ── Slow scan ─────────────────────────────────────────────────────────────
262
+
263
+ def test_slow_scan_detected(self) -> None:
264
+ # 10 ports spread across 5 time buckets (one per bucket), all S0
265
+ bucket_secs = 3600.0
266
+ base_ts = 1_748_563_200.0
267
+ rows = []
268
+ for bucket in range(5):
269
+ for port in range(bucket * 2 + 1, bucket * 2 + 3): # 2 ports per bucket
270
+ rows.append({
271
+ "src" : "192.0.2.1",
272
+ "dst" : "198.51.100.1",
273
+ "port" : port,
274
+ "proto" : "tcp",
275
+ "ts" : base_ts + bucket * bucket_secs + 60,
276
+ "conn_state": "S0",
277
+ })
278
+ df = pd.DataFrame(rows)
279
+ findings = run(_ctx(df, {
280
+ "slow_min_ports" : 8,
281
+ "slow_min_buckets" : 4,
282
+ "slow_state_min" : 0.30,
283
+ "window_secs" : int(bucket_secs),
284
+ "vertical_threshold": 15,
285
+ }))
286
+ slow = [f for f in findings if f.evidence["scan_type"] == "slow"]
287
+ self.assertTrue(len(slow) >= 1, "Expected at least one slow scan finding")
288
+
289
+ # ── Finding construction ──────────────────────────────────────────────────
290
+
291
+ def test_make_finding_vertical_title(self) -> None:
292
+ # Title is flow/entity only — metrics belong in evidence, not title.
293
+ row = _base_row("vertical")
294
+ f = _make_finding(row, _WINDOW)
295
+ self.assertIn("192.0.2.1", f.title)
296
+ self.assertIn("198.51.100.1", f.title)
297
+ self.assertNotIn("ports", f.title)
298
+ self.assertEqual(f.detector, "scan")
299
+ self.assertEqual(f.severity, Severity.HIGH)
300
+ # Metrics are in evidence
301
+ self.assertIn("distinct_ports", f.evidence)
302
+
303
+ def test_make_finding_horizontal_title(self) -> None:
304
+ row = _base_row("horizontal")
305
+ f = _make_finding(row, _WINDOW)
306
+ self.assertIn("*:22", f.title)
307
+ self.assertNotIn("hosts", f.title)
308
+ self.assertIn("distinct_hosts", f.evidence)
309
+
310
+ def test_make_finding_block_title(self) -> None:
311
+ row = _base_row("block")
312
+ f = _make_finding(row, _WINDOW)
313
+ self.assertIn("→ *", f.title)
314
+ self.assertNotIn("×", f.title)
315
+ self.assertIn("distinct_ports", f.evidence)
316
+ self.assertIn("distinct_hosts", f.evidence)
317
+
318
+ def test_make_finding_slow_title(self) -> None:
319
+ row = _base_row("slow")
320
+ f = _make_finding(row, _WINDOW)
321
+ self.assertIn("slow scan", f.title)
322
+ self.assertNotIn("windows", f.title)
323
+ # Slow evidence includes temporal fields
324
+ self.assertIn("temporal_spread_score", f.evidence)
325
+ self.assertIn("active_buckets", f.evidence)
326
+
327
+ def test_make_finding_evidence_fields_present(self) -> None:
328
+ for scan_type in ("vertical", "horizontal", "block"):
329
+ row = _base_row(scan_type)
330
+ f = _make_finding(row, _WINDOW)
331
+ for field in ("scan_type", "src", "scan_state_ratio", "pattern_tag"):
332
+ self.assertIn(field, f.evidence, f"Missing {field} in {scan_type} evidence")
333
+
334
+ # ── Text renderer ─────────────────────────────────────────────────────────
335
+
336
+ def test_text_renderer_scan_group(self) -> None:
337
+ """Render a mixed set of scan findings; verify key tokens and no exceptions."""
338
+ findings = [
339
+ _make_finding(_base_row("vertical"), _WINDOW),
340
+ _make_finding(_base_row("horizontal"), _WINDOW),
341
+ ]
342
+ summary = RunSummary(
343
+ data_window=_WINDOW,
344
+ record_counts={"conn*.log*": 1000},
345
+ data_size_bytes=0,
346
+ detectors_run=["scan"],
347
+ detectors_skipped={},
348
+ )
349
+ stream = io.StringIO()
350
+ handler = TextHandler(stream=stream, verbose_level=0)
351
+ handler.begin(summary)
352
+ handler.write(findings)
353
+ handler.end()
354
+
355
+ output = stream.getvalue()
356
+ self.assertIn("ratio=", output)
357
+ self.assertIn("ports", output)
358
+ self.assertIn("hosts", output)
359
+ self.assertIn("vertical", output)
360
+ self.assertIn("horizontal", output)
361
+
362
+ def test_text_renderer_verbose_scan_group(self) -> None:
363
+ """Verbose mode emits description, evidence, and next steps."""
364
+ finding = _make_finding(_base_row("vertical"), _WINDOW)
365
+ summary = RunSummary(
366
+ data_window=_WINDOW,
367
+ record_counts={},
368
+ data_size_bytes=0,
369
+ detectors_run=["scan"],
370
+ detectors_skipped={},
371
+ )
372
+ stream = io.StringIO()
373
+ handler = TextHandler(stream=stream, verbose_level=1)
374
+ handler.begin(summary)
375
+ handler.write([finding])
376
+ handler.end()
377
+
378
+ output = stream.getvalue()
379
+ self.assertIn("evidence:", output)
380
+ self.assertIn("next steps:", output)
381
+ self.assertIn("data window:", output)
382
+
383
+
384
+ # ── Zone-label seam tests ─────────────────────────────────────────────────────
385
+ #
386
+ # Sample IPs use RFC 5737 documentation space throughout. Internal/external is
387
+ # defined by a test-specific home_net (e.g. 192.0.2.0/24) so 192.0.2.x reads
388
+ # as internal and 198.51.100.x / 203.0.113.x read as external — no RFC1918
389
+ # addresses appear in any traffic fixture.
390
+
391
+
392
+ def test_zone_of_returns_internal_for_home_net_ip() -> None:
393
+ assert _zone_of("192.0.2.10", ["192.0.2.0/24"]) == "internal"
394
+
395
+
396
+ def test_zone_of_returns_external_for_outside_ip() -> None:
397
+ assert _zone_of("198.51.100.10", ["192.0.2.0/24"]) == "external"
398
+
399
+
400
+ def test_zone_of_returns_external_for_unparseable_ip() -> None:
401
+ assert _zone_of("not-an-ip", ["192.0.2.0/24"]) == "external"
402
+
403
+
404
+ def test_classify_direction_produces_four_byte_identical_strings() -> None:
405
+ """Two-zone case MUST yield exactly the legacy four direction strings.
406
+
407
+ Proves the mechanical f-string rendering produces byte-identical evidence
408
+ output for the pre-refactor case — so existing reports stay legible.
409
+ """
410
+ home_net = ["192.0.2.0/24"]
411
+ cases = [
412
+ # (src, dst, expected src_zone, expected dst_zone, expected rendered)
413
+ ("192.0.2.5", "192.0.2.6", "internal", "internal", "internal→internal"),
414
+ ("192.0.2.5", "198.51.100.6", "internal", "external", "internal→external"),
415
+ ("198.51.100.5","192.0.2.6", "external", "internal", "external→internal"),
416
+ ("198.51.100.5","203.0.113.6", "external", "external", "external→external"),
417
+ ]
418
+ for src, dst, exp_src_zone, exp_dst_zone, exp_rendered in cases:
419
+ src_zone, dst_zone, rendered = _classify_direction(src, dst, home_net)
420
+ assert src_zone == exp_src_zone, (src, dst, src_zone)
421
+ assert dst_zone == exp_dst_zone, (src, dst, dst_zone)
422
+ assert rendered == exp_rendered, (src, dst, rendered)
423
+
424
+
425
+ def test_run_falls_back_to_default_home_net_when_context_empty() -> None:
426
+ """Empty context.home_net activates scan's standalone-callable fallback.
427
+
428
+ Traffic is doc-space only (no RFC1918). With the RFC1918 fallback in
429
+ effect, every flow correctly classifies as ``external→external`` and
430
+ the populated ``direction`` column makes its way into evidence — proving
431
+ the fallback path activated and the column was populated, without
432
+ smuggling private addresses into the fixture. (A future test needing to
433
+ prove an internal-side classification through the empty-context path
434
+ should monkeypatch _DEFAULT_HOME_NET to a doc-space range instead.)
435
+ """
436
+ src = "198.51.100.10"
437
+ dsts = [f"203.0.113.{i}" for i in range(10, 35)]
438
+ df = _conn_df(src, None, [22], dsts, conn_state="S0")
439
+ ctx = DetectorContext(
440
+ logs={"conn*.log*": df},
441
+ config={},
442
+ allowlist=None,
443
+ data_window=_WINDOW,
444
+ home_net=[],
445
+ )
446
+ findings = run(ctx)
447
+ assert findings, "fallback should keep run() functional with empty context.home_net"
448
+ for f in findings:
449
+ assert f.evidence.get("direction") == "external→external", (
450
+ "RFC1918 fallback should classify doc-space src→doc-space dst as external→external"
451
+ )
452
+
453
+
454
+ if __name__ == "__main__":
455
+ unittest.main()
@@ -0,0 +1,50 @@
1
+ """Config search path — ``./loghunter.conf`` is GONE.
2
+
3
+ Clean-break: no project-local config; user must use --config or one of the
4
+ two remaining tiers (~/.loghunter/config.toml, /etc/loghunter/config.toml).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+
11
+ import pytest
12
+
13
+ from loghunter.common import config as cfg
14
+
15
+
16
+ def test_search_paths_does_not_include_project_local_loghunter_conf() -> None:
17
+ """SEARCH_PATHS must not carry ./loghunter.conf — the clean-break drop."""
18
+ paths_str = [str(p) for p in cfg.SEARCH_PATHS]
19
+ for p in paths_str:
20
+ assert not p.endswith("loghunter.conf"), (
21
+ f"./loghunter.conf is back in SEARCH_PATHS: {paths_str}"
22
+ )
23
+
24
+
25
+ def test_search_paths_carries_user_and_system_only() -> None:
26
+ paths_str = [str(p) for p in cfg.SEARCH_PATHS]
27
+ # User dir (expanded) and /etc both present.
28
+ assert any(p.endswith(".loghunter/config.toml") for p in paths_str)
29
+ assert "/etc/loghunter/config.toml" in paths_str
30
+ # Exactly two tiers — keep the precedence list tight.
31
+ assert len(paths_str) == 2
32
+
33
+
34
+ def test_stray_loghunter_conf_in_cwd_is_not_picked_up(
35
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
36
+ ) -> None:
37
+ """Create a ./loghunter.conf in CWD with a sentinel value. cfg.load() must
38
+ NOT pick it up — only --config explicit + the two remaining tiers."""
39
+ monkeypatch.chdir(tmp_path)
40
+ stray = tmp_path / "loghunter.conf"
41
+ stray.write_text('[loghunter]\nzeek_dir = "/should-never-load"\n', encoding="utf-8")
42
+ # Point the two remaining search paths at nonexistent locations so cfg.load
43
+ # falls back to _DEFAULTS rather than picking up the stray.
44
+ monkeypatch.setattr(
45
+ cfg, "SEARCH_PATHS",
46
+ [tmp_path / "no-user-config", tmp_path / "no-etc-config"],
47
+ )
48
+ config = cfg.load(config_file=None)
49
+ # Defaults shipped, not the sentinel.
50
+ assert config["loghunter"].get("zeek_dir") != "/should-never-load"