loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
"""Focused tests for the scan detector migration.
|
|
2
|
+
|
|
3
|
+
All IP addresses use RFC 5737 documentation space:
|
|
4
|
+
192.0.2.x, 198.51.100.x, 203.0.113.x
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import io
|
|
10
|
+
import sys
|
|
11
|
+
import time
|
|
12
|
+
import unittest
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from loghunter.common.finding import DetectorContext, Finding, RunSummary, Severity
|
|
18
|
+
from loghunter.detectors.scan import (
|
|
19
|
+
DETECTOR_NAME,
|
|
20
|
+
STATUS,
|
|
21
|
+
_classify_direction,
|
|
22
|
+
_make_finding,
|
|
23
|
+
_zone_of,
|
|
24
|
+
run,
|
|
25
|
+
)
|
|
26
|
+
from loghunter.outputs.text import TextHandler
|
|
27
|
+
from loghunter.runner import discover_detectors
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
_NOW = datetime(2026, 5, 30, tzinfo=timezone.utc)
|
|
33
|
+
_WINDOW = (_NOW, _NOW)
|
|
34
|
+
|
|
35
|
+
# Matches the shipped [loghunter].home_net default. Used here only so a
|
|
36
|
+
# helper-built context behaves identically to a runner-built context for the
|
|
37
|
+
# detector's RFC 5737 doc-space traffic fixtures — those addresses are outside
|
|
38
|
+
# RFC1918 and read as external→external, matching pre-refactor behavior.
|
|
39
|
+
_RFC1918_HOME_NET = ["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _ctx(
|
|
43
|
+
df: pd.DataFrame | None,
|
|
44
|
+
cfg: dict | None = None,
|
|
45
|
+
home_net: list[str] | None = None,
|
|
46
|
+
) -> DetectorContext:
|
|
47
|
+
"""Build a DetectorContext for scan tests.
|
|
48
|
+
|
|
49
|
+
home_net distinguishes None (apply RFC1918 default — same as runner supply
|
|
50
|
+
path with no operator override) from [] (intentionally empty, to exercise
|
|
51
|
+
the scan module's standalone-callable fallback constant). Tests that need
|
|
52
|
+
a specific zone layout pass home_net explicitly.
|
|
53
|
+
"""
|
|
54
|
+
logs = {"conn*.log*": df} if df is not None else {}
|
|
55
|
+
return DetectorContext(
|
|
56
|
+
logs=logs,
|
|
57
|
+
config=cfg or {},
|
|
58
|
+
allowlist=None,
|
|
59
|
+
data_window=_WINDOW,
|
|
60
|
+
home_net=_RFC1918_HOME_NET if home_net is None else home_net,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _base_row(scan_type: str, src: str = "192.0.2.1") -> dict:
|
|
65
|
+
"""Minimal classified result dict suitable for _make_finding."""
|
|
66
|
+
return {
|
|
67
|
+
"scan_type" : scan_type,
|
|
68
|
+
"src" : src,
|
|
69
|
+
"dst" : "198.51.100.1" if scan_type == "vertical" else None,
|
|
70
|
+
"port" : 22 if scan_type == "horizontal" else None,
|
|
71
|
+
"distinct_ports" : 20,
|
|
72
|
+
"distinct_hosts" : 20,
|
|
73
|
+
"total_conns" : 100,
|
|
74
|
+
"scan_state_ratio" : 0.70,
|
|
75
|
+
"top_states" : "S0, REJ",
|
|
76
|
+
"direction" : "internal→external",
|
|
77
|
+
"pattern_tag" : "confirmed_scan",
|
|
78
|
+
"pattern_notes" : "Strong scanner signature.",
|
|
79
|
+
"window_start" : "2026-05-30 00:00:00",
|
|
80
|
+
"window_secs" : 3600,
|
|
81
|
+
"active_buckets" : 5 if scan_type == "slow" else None,
|
|
82
|
+
"temporal_spread_score": 2.5 if scan_type == "slow" else None,
|
|
83
|
+
"max_ports_in_bucket" : 4 if scan_type == "slow" else None,
|
|
84
|
+
"_severity" : Severity.HIGH,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _conn_df(
|
|
89
|
+
src: str,
|
|
90
|
+
dst: str | None,
|
|
91
|
+
ports: list[int],
|
|
92
|
+
dsts: list[str] | None,
|
|
93
|
+
conn_state: str,
|
|
94
|
+
proto: str = "tcp",
|
|
95
|
+
base_ts: float = 1_748_563_200.0,
|
|
96
|
+
spacing: float = 60.0,
|
|
97
|
+
) -> pd.DataFrame:
|
|
98
|
+
"""Build a canonical-schema DataFrame for detector input."""
|
|
99
|
+
rows = []
|
|
100
|
+
if dsts is not None:
|
|
101
|
+
# horizontal — one port, many hosts
|
|
102
|
+
p = ports[0]
|
|
103
|
+
for i, d in enumerate(dsts):
|
|
104
|
+
rows.append({
|
|
105
|
+
"src" : src,
|
|
106
|
+
"dst" : d,
|
|
107
|
+
"port" : p,
|
|
108
|
+
"proto" : proto,
|
|
109
|
+
"ts" : base_ts + i * spacing,
|
|
110
|
+
"conn_state": conn_state,
|
|
111
|
+
})
|
|
112
|
+
else:
|
|
113
|
+
# vertical — one host, many ports
|
|
114
|
+
for i, p in enumerate(ports):
|
|
115
|
+
rows.append({
|
|
116
|
+
"src" : src,
|
|
117
|
+
"dst" : dst,
|
|
118
|
+
"port" : p,
|
|
119
|
+
"proto" : proto,
|
|
120
|
+
"ts" : base_ts + i * spacing,
|
|
121
|
+
"conn_state": conn_state,
|
|
122
|
+
})
|
|
123
|
+
return pd.DataFrame(rows)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
# ── Tests ─────────────────────────────────────────────────────────────────────
|
|
127
|
+
|
|
128
|
+
class ScanDetectorTests(unittest.TestCase):
|
|
129
|
+
|
|
130
|
+
# ── Discovery ─────────────────────────────────────────────────────────────
|
|
131
|
+
|
|
132
|
+
def test_scan_is_available_in_discover_detectors(self) -> None:
|
|
133
|
+
detectors = discover_detectors()
|
|
134
|
+
self.assertIn("scan", detectors)
|
|
135
|
+
self.assertEqual(getattr(detectors["scan"], "STATUS", None), "available")
|
|
136
|
+
|
|
137
|
+
def test_detector_name_constant(self) -> None:
|
|
138
|
+
self.assertEqual(DETECTOR_NAME, "scan")
|
|
139
|
+
self.assertEqual(STATUS, "available")
|
|
140
|
+
|
|
141
|
+
# ── Empty / no input ──────────────────────────────────────────────────────
|
|
142
|
+
|
|
143
|
+
def test_run_returns_empty_on_no_logs(self) -> None:
|
|
144
|
+
self.assertEqual(run(_ctx(None)), [])
|
|
145
|
+
|
|
146
|
+
def test_run_returns_empty_on_empty_dataframe(self) -> None:
|
|
147
|
+
empty = pd.DataFrame(columns=["src", "dst", "port", "proto", "ts", "conn_state"])
|
|
148
|
+
self.assertEqual(run(_ctx(empty)), [])
|
|
149
|
+
|
|
150
|
+
def test_icmp_rows_excluded(self) -> None:
|
|
151
|
+
df = _conn_df("192.0.2.1", "198.51.100.1",
|
|
152
|
+
ports=list(range(1, 30)), dsts=None,
|
|
153
|
+
conn_state="S0", proto="icmp")
|
|
154
|
+
self.assertEqual(run(_ctx(df)), [])
|
|
155
|
+
|
|
156
|
+
def test_malformed_but_loadable_rows_do_not_crash(self) -> None:
|
|
157
|
+
"""Missing nullable-ish fields and string ports should be normalized safely."""
|
|
158
|
+
rows = []
|
|
159
|
+
for i, port in enumerate(range(1, 25)):
|
|
160
|
+
rows.append({
|
|
161
|
+
"src" : "192.0.2.1",
|
|
162
|
+
"dst" : None if i == 0 else "198.51.100.1",
|
|
163
|
+
"port": str(port),
|
|
164
|
+
"ts" : 1_748_563_200.0 + i,
|
|
165
|
+
})
|
|
166
|
+
findings = run(_ctx(pd.DataFrame(rows), {"vertical_threshold": 15}))
|
|
167
|
+
|
|
168
|
+
self.assertIsInstance(findings, list)
|
|
169
|
+
|
|
170
|
+
def test_missing_port_column_returns_no_findings(self) -> None:
|
|
171
|
+
rows = []
|
|
172
|
+
for i in range(20):
|
|
173
|
+
rows.append({
|
|
174
|
+
"src" : "192.0.2.1",
|
|
175
|
+
"dst" : "198.51.100.1",
|
|
176
|
+
"proto" : "tcp",
|
|
177
|
+
"ts" : 1_748_563_200.0 + i,
|
|
178
|
+
"conn_state": "S0",
|
|
179
|
+
})
|
|
180
|
+
self.assertEqual(run(_ctx(pd.DataFrame(rows))), [])
|
|
181
|
+
|
|
182
|
+
def test_missing_ts_column_returns_no_findings(self) -> None:
|
|
183
|
+
rows = []
|
|
184
|
+
for port in range(1, 25):
|
|
185
|
+
rows.append({
|
|
186
|
+
"src" : "192.0.2.1",
|
|
187
|
+
"dst" : "198.51.100.1",
|
|
188
|
+
"port" : port,
|
|
189
|
+
"proto" : "tcp",
|
|
190
|
+
"conn_state": "S0",
|
|
191
|
+
})
|
|
192
|
+
self.assertEqual(run(_ctx(pd.DataFrame(rows))), [])
|
|
193
|
+
|
|
194
|
+
# ── Vertical scan ─────────────────────────────────────────────────────────
|
|
195
|
+
|
|
196
|
+
def test_vertical_scan_detected(self) -> None:
|
|
197
|
+
ports = list(range(1, 26)) # 25 distinct ports > threshold 15
|
|
198
|
+
df = _conn_df(
|
|
199
|
+
src="192.0.2.1",
|
|
200
|
+
dst="198.51.100.1",
|
|
201
|
+
ports=ports,
|
|
202
|
+
dsts=None,
|
|
203
|
+
conn_state="S0",
|
|
204
|
+
spacing=30.0,
|
|
205
|
+
)
|
|
206
|
+
findings = run(_ctx(df, {"vertical_threshold": 15}))
|
|
207
|
+
scan_findings = [f for f in findings if f.evidence["scan_type"] == "vertical"]
|
|
208
|
+
self.assertTrue(len(scan_findings) >= 1, "Expected at least one vertical finding")
|
|
209
|
+
self.assertIn(scan_findings[0].severity, (Severity.HIGH, Severity.MEDIUM))
|
|
210
|
+
|
|
211
|
+
def test_vertical_below_threshold_not_flagged(self) -> None:
|
|
212
|
+
ports = list(range(1, 10)) # 9 ports < threshold 15
|
|
213
|
+
df = _conn_df("192.0.2.1", "198.51.100.1",
|
|
214
|
+
ports=ports, dsts=None, conn_state="S0")
|
|
215
|
+
findings = run(_ctx(df, {"vertical_threshold": 15}))
|
|
216
|
+
vertical = [f for f in findings if f.evidence["scan_type"] == "vertical"]
|
|
217
|
+
self.assertEqual(vertical, [])
|
|
218
|
+
|
|
219
|
+
# ── Horizontal scan ───────────────────────────────────────────────────────
|
|
220
|
+
|
|
221
|
+
def test_horizontal_scan_detected(self) -> None:
|
|
222
|
+
dsts = [f"198.51.100.{i}" for i in range(1, 31)] # 30 distinct hosts
|
|
223
|
+
df = _conn_df(
|
|
224
|
+
src="192.0.2.1",
|
|
225
|
+
dst=None,
|
|
226
|
+
ports=[22],
|
|
227
|
+
dsts=dsts,
|
|
228
|
+
conn_state="REJ",
|
|
229
|
+
spacing=10.0,
|
|
230
|
+
)
|
|
231
|
+
findings = run(_ctx(df, {"horizontal_threshold": 15}))
|
|
232
|
+
horiz = [f for f in findings if f.evidence["scan_type"] == "horizontal"]
|
|
233
|
+
self.assertTrue(len(horiz) >= 1, "Expected at least one horizontal finding")
|
|
234
|
+
self.assertEqual(horiz[0].evidence["port"], 22)
|
|
235
|
+
|
|
236
|
+
# ── Block scan ────────────────────────────────────────────────────────────
|
|
237
|
+
|
|
238
|
+
def test_block_scan_detected(self) -> None:
|
|
239
|
+
# 25 ports × 25 hosts, all S0 → scan_state_ratio = 1.0
|
|
240
|
+
rows = []
|
|
241
|
+
base_ts = 1_748_563_200.0
|
|
242
|
+
for i, port in enumerate(range(1, 26)):
|
|
243
|
+
for j, host_n in enumerate(range(1, 26)):
|
|
244
|
+
rows.append({
|
|
245
|
+
"src" : "192.0.2.1",
|
|
246
|
+
"dst" : f"198.51.100.{host_n}",
|
|
247
|
+
"port" : port,
|
|
248
|
+
"proto" : "tcp",
|
|
249
|
+
"ts" : base_ts + (i * 25 + j),
|
|
250
|
+
"conn_state": "S0",
|
|
251
|
+
})
|
|
252
|
+
df = pd.DataFrame(rows)
|
|
253
|
+
findings = run(_ctx(df, {
|
|
254
|
+
"block_port_threshold": 20,
|
|
255
|
+
"block_host_threshold": 20,
|
|
256
|
+
"block_state_min" : 0.30,
|
|
257
|
+
}))
|
|
258
|
+
block = [f for f in findings if f.evidence["scan_type"] == "block"]
|
|
259
|
+
self.assertTrue(len(block) >= 1, "Expected at least one block finding")
|
|
260
|
+
|
|
261
|
+
# ── Slow scan ─────────────────────────────────────────────────────────────
|
|
262
|
+
|
|
263
|
+
def test_slow_scan_detected(self) -> None:
|
|
264
|
+
# 10 ports spread across 5 time buckets (one per bucket), all S0
|
|
265
|
+
bucket_secs = 3600.0
|
|
266
|
+
base_ts = 1_748_563_200.0
|
|
267
|
+
rows = []
|
|
268
|
+
for bucket in range(5):
|
|
269
|
+
for port in range(bucket * 2 + 1, bucket * 2 + 3): # 2 ports per bucket
|
|
270
|
+
rows.append({
|
|
271
|
+
"src" : "192.0.2.1",
|
|
272
|
+
"dst" : "198.51.100.1",
|
|
273
|
+
"port" : port,
|
|
274
|
+
"proto" : "tcp",
|
|
275
|
+
"ts" : base_ts + bucket * bucket_secs + 60,
|
|
276
|
+
"conn_state": "S0",
|
|
277
|
+
})
|
|
278
|
+
df = pd.DataFrame(rows)
|
|
279
|
+
findings = run(_ctx(df, {
|
|
280
|
+
"slow_min_ports" : 8,
|
|
281
|
+
"slow_min_buckets" : 4,
|
|
282
|
+
"slow_state_min" : 0.30,
|
|
283
|
+
"window_secs" : int(bucket_secs),
|
|
284
|
+
"vertical_threshold": 15,
|
|
285
|
+
}))
|
|
286
|
+
slow = [f for f in findings if f.evidence["scan_type"] == "slow"]
|
|
287
|
+
self.assertTrue(len(slow) >= 1, "Expected at least one slow scan finding")
|
|
288
|
+
|
|
289
|
+
# ── Finding construction ──────────────────────────────────────────────────
|
|
290
|
+
|
|
291
|
+
def test_make_finding_vertical_title(self) -> None:
|
|
292
|
+
# Title is flow/entity only — metrics belong in evidence, not title.
|
|
293
|
+
row = _base_row("vertical")
|
|
294
|
+
f = _make_finding(row, _WINDOW)
|
|
295
|
+
self.assertIn("192.0.2.1", f.title)
|
|
296
|
+
self.assertIn("198.51.100.1", f.title)
|
|
297
|
+
self.assertNotIn("ports", f.title)
|
|
298
|
+
self.assertEqual(f.detector, "scan")
|
|
299
|
+
self.assertEqual(f.severity, Severity.HIGH)
|
|
300
|
+
# Metrics are in evidence
|
|
301
|
+
self.assertIn("distinct_ports", f.evidence)
|
|
302
|
+
|
|
303
|
+
def test_make_finding_horizontal_title(self) -> None:
|
|
304
|
+
row = _base_row("horizontal")
|
|
305
|
+
f = _make_finding(row, _WINDOW)
|
|
306
|
+
self.assertIn("*:22", f.title)
|
|
307
|
+
self.assertNotIn("hosts", f.title)
|
|
308
|
+
self.assertIn("distinct_hosts", f.evidence)
|
|
309
|
+
|
|
310
|
+
def test_make_finding_block_title(self) -> None:
|
|
311
|
+
row = _base_row("block")
|
|
312
|
+
f = _make_finding(row, _WINDOW)
|
|
313
|
+
self.assertIn("→ *", f.title)
|
|
314
|
+
self.assertNotIn("×", f.title)
|
|
315
|
+
self.assertIn("distinct_ports", f.evidence)
|
|
316
|
+
self.assertIn("distinct_hosts", f.evidence)
|
|
317
|
+
|
|
318
|
+
def test_make_finding_slow_title(self) -> None:
|
|
319
|
+
row = _base_row("slow")
|
|
320
|
+
f = _make_finding(row, _WINDOW)
|
|
321
|
+
self.assertIn("slow scan", f.title)
|
|
322
|
+
self.assertNotIn("windows", f.title)
|
|
323
|
+
# Slow evidence includes temporal fields
|
|
324
|
+
self.assertIn("temporal_spread_score", f.evidence)
|
|
325
|
+
self.assertIn("active_buckets", f.evidence)
|
|
326
|
+
|
|
327
|
+
def test_make_finding_evidence_fields_present(self) -> None:
|
|
328
|
+
for scan_type in ("vertical", "horizontal", "block"):
|
|
329
|
+
row = _base_row(scan_type)
|
|
330
|
+
f = _make_finding(row, _WINDOW)
|
|
331
|
+
for field in ("scan_type", "src", "scan_state_ratio", "pattern_tag"):
|
|
332
|
+
self.assertIn(field, f.evidence, f"Missing {field} in {scan_type} evidence")
|
|
333
|
+
|
|
334
|
+
# ── Text renderer ─────────────────────────────────────────────────────────
|
|
335
|
+
|
|
336
|
+
def test_text_renderer_scan_group(self) -> None:
|
|
337
|
+
"""Render a mixed set of scan findings; verify key tokens and no exceptions."""
|
|
338
|
+
findings = [
|
|
339
|
+
_make_finding(_base_row("vertical"), _WINDOW),
|
|
340
|
+
_make_finding(_base_row("horizontal"), _WINDOW),
|
|
341
|
+
]
|
|
342
|
+
summary = RunSummary(
|
|
343
|
+
data_window=_WINDOW,
|
|
344
|
+
record_counts={"conn*.log*": 1000},
|
|
345
|
+
data_size_bytes=0,
|
|
346
|
+
detectors_run=["scan"],
|
|
347
|
+
detectors_skipped={},
|
|
348
|
+
)
|
|
349
|
+
stream = io.StringIO()
|
|
350
|
+
handler = TextHandler(stream=stream, verbose_level=0)
|
|
351
|
+
handler.begin(summary)
|
|
352
|
+
handler.write(findings)
|
|
353
|
+
handler.end()
|
|
354
|
+
|
|
355
|
+
output = stream.getvalue()
|
|
356
|
+
self.assertIn("ratio=", output)
|
|
357
|
+
self.assertIn("ports", output)
|
|
358
|
+
self.assertIn("hosts", output)
|
|
359
|
+
self.assertIn("vertical", output)
|
|
360
|
+
self.assertIn("horizontal", output)
|
|
361
|
+
|
|
362
|
+
def test_text_renderer_verbose_scan_group(self) -> None:
|
|
363
|
+
"""Verbose mode emits description, evidence, and next steps."""
|
|
364
|
+
finding = _make_finding(_base_row("vertical"), _WINDOW)
|
|
365
|
+
summary = RunSummary(
|
|
366
|
+
data_window=_WINDOW,
|
|
367
|
+
record_counts={},
|
|
368
|
+
data_size_bytes=0,
|
|
369
|
+
detectors_run=["scan"],
|
|
370
|
+
detectors_skipped={},
|
|
371
|
+
)
|
|
372
|
+
stream = io.StringIO()
|
|
373
|
+
handler = TextHandler(stream=stream, verbose_level=1)
|
|
374
|
+
handler.begin(summary)
|
|
375
|
+
handler.write([finding])
|
|
376
|
+
handler.end()
|
|
377
|
+
|
|
378
|
+
output = stream.getvalue()
|
|
379
|
+
self.assertIn("evidence:", output)
|
|
380
|
+
self.assertIn("next steps:", output)
|
|
381
|
+
self.assertIn("data window:", output)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
# ── Zone-label seam tests ─────────────────────────────────────────────────────
|
|
385
|
+
#
|
|
386
|
+
# Sample IPs use RFC 5737 documentation space throughout. Internal/external is
|
|
387
|
+
# defined by a test-specific home_net (e.g. 192.0.2.0/24) so 192.0.2.x reads
|
|
388
|
+
# as internal and 198.51.100.x / 203.0.113.x read as external — no RFC1918
|
|
389
|
+
# addresses appear in any traffic fixture.
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def test_zone_of_returns_internal_for_home_net_ip() -> None:
|
|
393
|
+
assert _zone_of("192.0.2.10", ["192.0.2.0/24"]) == "internal"
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def test_zone_of_returns_external_for_outside_ip() -> None:
|
|
397
|
+
assert _zone_of("198.51.100.10", ["192.0.2.0/24"]) == "external"
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def test_zone_of_returns_external_for_unparseable_ip() -> None:
|
|
401
|
+
assert _zone_of("not-an-ip", ["192.0.2.0/24"]) == "external"
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def test_classify_direction_produces_four_byte_identical_strings() -> None:
|
|
405
|
+
"""Two-zone case MUST yield exactly the legacy four direction strings.
|
|
406
|
+
|
|
407
|
+
Proves the mechanical f-string rendering produces byte-identical evidence
|
|
408
|
+
output for the pre-refactor case — so existing reports stay legible.
|
|
409
|
+
"""
|
|
410
|
+
home_net = ["192.0.2.0/24"]
|
|
411
|
+
cases = [
|
|
412
|
+
# (src, dst, expected src_zone, expected dst_zone, expected rendered)
|
|
413
|
+
("192.0.2.5", "192.0.2.6", "internal", "internal", "internal→internal"),
|
|
414
|
+
("192.0.2.5", "198.51.100.6", "internal", "external", "internal→external"),
|
|
415
|
+
("198.51.100.5","192.0.2.6", "external", "internal", "external→internal"),
|
|
416
|
+
("198.51.100.5","203.0.113.6", "external", "external", "external→external"),
|
|
417
|
+
]
|
|
418
|
+
for src, dst, exp_src_zone, exp_dst_zone, exp_rendered in cases:
|
|
419
|
+
src_zone, dst_zone, rendered = _classify_direction(src, dst, home_net)
|
|
420
|
+
assert src_zone == exp_src_zone, (src, dst, src_zone)
|
|
421
|
+
assert dst_zone == exp_dst_zone, (src, dst, dst_zone)
|
|
422
|
+
assert rendered == exp_rendered, (src, dst, rendered)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def test_run_falls_back_to_default_home_net_when_context_empty() -> None:
|
|
426
|
+
"""Empty context.home_net activates scan's standalone-callable fallback.
|
|
427
|
+
|
|
428
|
+
Traffic is doc-space only (no RFC1918). With the RFC1918 fallback in
|
|
429
|
+
effect, every flow correctly classifies as ``external→external`` and
|
|
430
|
+
the populated ``direction`` column makes its way into evidence — proving
|
|
431
|
+
the fallback path activated and the column was populated, without
|
|
432
|
+
smuggling private addresses into the fixture. (A future test needing to
|
|
433
|
+
prove an internal-side classification through the empty-context path
|
|
434
|
+
should monkeypatch _DEFAULT_HOME_NET to a doc-space range instead.)
|
|
435
|
+
"""
|
|
436
|
+
src = "198.51.100.10"
|
|
437
|
+
dsts = [f"203.0.113.{i}" for i in range(10, 35)]
|
|
438
|
+
df = _conn_df(src, None, [22], dsts, conn_state="S0")
|
|
439
|
+
ctx = DetectorContext(
|
|
440
|
+
logs={"conn*.log*": df},
|
|
441
|
+
config={},
|
|
442
|
+
allowlist=None,
|
|
443
|
+
data_window=_WINDOW,
|
|
444
|
+
home_net=[],
|
|
445
|
+
)
|
|
446
|
+
findings = run(ctx)
|
|
447
|
+
assert findings, "fallback should keep run() functional with empty context.home_net"
|
|
448
|
+
for f in findings:
|
|
449
|
+
assert f.evidence.get("direction") == "external→external", (
|
|
450
|
+
"RFC1918 fallback should classify doc-space src→doc-space dst as external→external"
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
if __name__ == "__main__":
|
|
455
|
+
unittest.main()
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Config search path — ``./loghunter.conf`` is GONE.
|
|
2
|
+
|
|
3
|
+
Clean-break: no project-local config; user must use --config or one of the
|
|
4
|
+
two remaining tiers (~/.loghunter/config.toml, /etc/loghunter/config.toml).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
import pytest
|
|
12
|
+
|
|
13
|
+
from loghunter.common import config as cfg
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_search_paths_does_not_include_project_local_loghunter_conf() -> None:
|
|
17
|
+
"""SEARCH_PATHS must not carry ./loghunter.conf — the clean-break drop."""
|
|
18
|
+
paths_str = [str(p) for p in cfg.SEARCH_PATHS]
|
|
19
|
+
for p in paths_str:
|
|
20
|
+
assert not p.endswith("loghunter.conf"), (
|
|
21
|
+
f"./loghunter.conf is back in SEARCH_PATHS: {paths_str}"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_search_paths_carries_user_and_system_only() -> None:
|
|
26
|
+
paths_str = [str(p) for p in cfg.SEARCH_PATHS]
|
|
27
|
+
# User dir (expanded) and /etc both present.
|
|
28
|
+
assert any(p.endswith(".loghunter/config.toml") for p in paths_str)
|
|
29
|
+
assert "/etc/loghunter/config.toml" in paths_str
|
|
30
|
+
# Exactly two tiers — keep the precedence list tight.
|
|
31
|
+
assert len(paths_str) == 2
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_stray_loghunter_conf_in_cwd_is_not_picked_up(
|
|
35
|
+
monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
|
|
36
|
+
) -> None:
|
|
37
|
+
"""Create a ./loghunter.conf in CWD with a sentinel value. cfg.load() must
|
|
38
|
+
NOT pick it up — only --config explicit + the two remaining tiers."""
|
|
39
|
+
monkeypatch.chdir(tmp_path)
|
|
40
|
+
stray = tmp_path / "loghunter.conf"
|
|
41
|
+
stray.write_text('[loghunter]\nzeek_dir = "/should-never-load"\n', encoding="utf-8")
|
|
42
|
+
# Point the two remaining search paths at nonexistent locations so cfg.load
|
|
43
|
+
# falls back to _DEFAULTS rather than picking up the stray.
|
|
44
|
+
monkeypatch.setattr(
|
|
45
|
+
cfg, "SEARCH_PATHS",
|
|
46
|
+
[tmp_path / "no-user-config", tmp_path / "no-etc-config"],
|
|
47
|
+
)
|
|
48
|
+
config = cfg.load(config_file=None)
|
|
49
|
+
# Defaults shipped, not the sentinel.
|
|
50
|
+
assert config["loghunter"].get("zeek_dir") != "/should-never-load"
|