loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
"""Tests for the syslog anomaly detector.
|
|
2
|
+
|
|
3
|
+
All IP addresses and hostnames use RFC 5737 documentation space:
|
|
4
|
+
192.0.2.x, 198.51.100.x, 203.0.113.x
|
|
5
|
+
No real network data appears anywhere in this file.
|
|
6
|
+
|
|
7
|
+
Strategy: run() tests monkeypatch _run_drain3 to inject pre-labelled template
|
|
8
|
+
columns, so test outcomes are independent of drain3 clustering behaviour.
|
|
9
|
+
The drain3 function itself has its own smoke test.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import unittest
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from unittest.mock import patch
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from loghunter.common.finding import DetectorContext, Finding, Severity
|
|
21
|
+
from loghunter.detectors.syslog import (
|
|
22
|
+
DETECTOR_NAME,
|
|
23
|
+
STATUS,
|
|
24
|
+
_run_drain3,
|
|
25
|
+
run,
|
|
26
|
+
)
|
|
27
|
+
from loghunter.outputs.text import Section as _Section
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _flat_section(findings: list[Finding]) -> list[_Section]:
|
|
31
|
+
"""Wrap findings into the single-section shape per the W2 renderer contract."""
|
|
32
|
+
return [_Section(None, list(findings), len(findings))]
|
|
33
|
+
|
|
34
|
+
_NOW = datetime(2026, 5, 30, tzinfo=timezone.utc)
|
|
35
|
+
_WINDOW = (_NOW, _NOW)
|
|
36
|
+
|
|
37
|
+
# Fixed unix epoch used across fixtures (2026-05-30 00:00:00 UTC)
|
|
38
|
+
_BASE_TS = 1_748_563_200.0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
def _make_df(rows: list[dict]) -> pd.DataFrame:
|
|
44
|
+
return pd.DataFrame(rows, columns=["ts", "host", "raw", "message"])
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _ctx(df: pd.DataFrame, cfg: dict | None = None) -> DetectorContext:
|
|
48
|
+
return DetectorContext(
|
|
49
|
+
logs={"*.log*": df},
|
|
50
|
+
config=cfg or {},
|
|
51
|
+
allowlist=None,
|
|
52
|
+
data_window=_WINDOW,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _common_row(i: int, ts_offset: float = 0.0) -> dict:
|
|
57
|
+
"""A row whose message belongs to the high-frequency common template."""
|
|
58
|
+
return {
|
|
59
|
+
"ts": _BASE_TS + ts_offset + i * 60.0,
|
|
60
|
+
"host": "192.0.2.1",
|
|
61
|
+
"raw": f"<30>May 30 12:{i:02d}:00 192.0.2.1 sshd[*]: Accepted publickey for admin",
|
|
62
|
+
"message": "sshd[*]: Accepted publickey for admin",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _patched_drain3(template_id_col: list[int], template_str_col: list[str]):
|
|
67
|
+
"""Return a mock for _run_drain3 that injects pre-set template columns."""
|
|
68
|
+
def _mock(df: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
|
|
69
|
+
df = df.copy()
|
|
70
|
+
df["template_id"] = template_id_col
|
|
71
|
+
df["template_str"] = template_str_col
|
|
72
|
+
return df
|
|
73
|
+
return _mock
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# ── Tests ─────────────────────────────────────────────────────────────────────
|
|
77
|
+
|
|
78
|
+
class SyslogDetectorTests(unittest.TestCase):
|
|
79
|
+
|
|
80
|
+
# ── Constants ─────────────────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
def test_status_and_name_constants(self) -> None:
|
|
83
|
+
self.assertEqual(STATUS, "available")
|
|
84
|
+
self.assertEqual(DETECTOR_NAME, "syslog")
|
|
85
|
+
|
|
86
|
+
# ── Empty input ───────────────────────────────────────────────────────────
|
|
87
|
+
|
|
88
|
+
def test_run_returns_empty_on_empty_dataframe(self) -> None:
|
|
89
|
+
empty = _make_df([])
|
|
90
|
+
self.assertEqual(run(_ctx(empty)), [])
|
|
91
|
+
|
|
92
|
+
def test_run_returns_empty_when_logs_key_absent(self) -> None:
|
|
93
|
+
ctx = DetectorContext(
|
|
94
|
+
logs={},
|
|
95
|
+
config={},
|
|
96
|
+
allowlist=None,
|
|
97
|
+
data_window=_WINDOW,
|
|
98
|
+
)
|
|
99
|
+
self.assertEqual(run(ctx), [])
|
|
100
|
+
|
|
101
|
+
# ── Anomalous findings ────────────────────────────────────────────────────
|
|
102
|
+
|
|
103
|
+
def test_run_returns_medium_findings_for_anomalous_rows(self) -> None:
|
|
104
|
+
"""One rare template (count=1) among 50 common rows → one MEDIUM finding."""
|
|
105
|
+
rows = [_common_row(i) for i in range(50)]
|
|
106
|
+
rows.append({
|
|
107
|
+
"ts": _BASE_TS + 3600.0,
|
|
108
|
+
"host": "192.0.2.1",
|
|
109
|
+
"raw": "<30>May 30 13:00:00 192.0.2.1 kernel: RARE_SENTINEL xyzzy_anomaly",
|
|
110
|
+
"message": "kernel: RARE_SENTINEL xyzzy_anomaly",
|
|
111
|
+
})
|
|
112
|
+
df = _make_df(rows)
|
|
113
|
+
|
|
114
|
+
# template_id=1 for 50 common rows, template_id=2 for the rare row
|
|
115
|
+
ids = [1] * 50 + [2]
|
|
116
|
+
strs = ["sshd[*]: Accepted publickey for admin"] * 50 + ["kernel: RARE_SENTINEL <*>"]
|
|
117
|
+
|
|
118
|
+
with patch("loghunter.detectors.syslog._run_drain3", _patched_drain3(ids, strs)):
|
|
119
|
+
findings = run(_ctx(df, {"max_count": 1, "rarity_pct": 10}))
|
|
120
|
+
|
|
121
|
+
self.assertEqual(len(findings), 1)
|
|
122
|
+
self.assertEqual(findings[0].detector, "syslog")
|
|
123
|
+
self.assertEqual(findings[0].severity, Severity.MEDIUM)
|
|
124
|
+
|
|
125
|
+
def test_medium_finding_evidence_fields(self) -> None:
|
|
126
|
+
"""Evidence contains host, template_id (int), template_str, count, threshold."""
|
|
127
|
+
rows = [_common_row(i) for i in range(50)]
|
|
128
|
+
rows.append({
|
|
129
|
+
"ts": _BASE_TS + 3600.0,
|
|
130
|
+
"host": "192.0.2.2",
|
|
131
|
+
"raw": "<30>May 30 13:00:00 192.0.2.2 cron[*]: Evidence fields test",
|
|
132
|
+
"message": "cron[*]: Evidence fields test",
|
|
133
|
+
})
|
|
134
|
+
df = _make_df(rows)
|
|
135
|
+
ids = [1] * 50 + [99]
|
|
136
|
+
strs = ["sshd[*]: Accepted publickey for admin"] * 50 + ["cron[*]: Evidence <*> test"]
|
|
137
|
+
|
|
138
|
+
with patch("loghunter.detectors.syslog._run_drain3", _patched_drain3(ids, strs)):
|
|
139
|
+
findings = run(_ctx(df, {"max_count": 1, "rarity_pct": 10}))
|
|
140
|
+
|
|
141
|
+
self.assertEqual(len(findings), 1)
|
|
142
|
+
ev = findings[0].evidence
|
|
143
|
+
self.assertEqual(ev["host"], "192.0.2.2")
|
|
144
|
+
self.assertIsInstance(ev["template_id"], int)
|
|
145
|
+
self.assertIsInstance(ev["count"], int)
|
|
146
|
+
self.assertIsInstance(ev["threshold"], int)
|
|
147
|
+
self.assertIn("template_str", ev)
|
|
148
|
+
|
|
149
|
+
# ── Reboot suppression ────────────────────────────────────────────────────
|
|
150
|
+
|
|
151
|
+
def test_synthetic_reboot_finding_and_suppression(self) -> None:
|
|
152
|
+
"""Post-reboot anomalous events are suppressed; a synthetic INFO finding appears."""
|
|
153
|
+
rows = [_common_row(i) for i in range(50)]
|
|
154
|
+
|
|
155
|
+
# Reboot signal row — matches is_reboot_signal() via "kernel: Linux version "
|
|
156
|
+
rows.append({
|
|
157
|
+
"ts": _BASE_TS,
|
|
158
|
+
"host": "192.0.2.1",
|
|
159
|
+
"raw": "<30>May 30 00:00:00 192.0.2.1 kernel: Linux version 5.15.0",
|
|
160
|
+
"message": "kernel: Linux version 5.15.0",
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
# Anomalous post-reboot event, within 300s suppress window
|
|
164
|
+
rows.append({
|
|
165
|
+
"ts": _BASE_TS + 100.0,
|
|
166
|
+
"host": "192.0.2.1",
|
|
167
|
+
"raw": "<30>May 30 00:01:40 192.0.2.1 kernel: POST_REBOOT_ANOMALY_SENTINEL",
|
|
168
|
+
"message": "kernel: POST_REBOOT_ANOMALY_SENTINEL",
|
|
169
|
+
})
|
|
170
|
+
|
|
171
|
+
df = _make_df(rows)
|
|
172
|
+
|
|
173
|
+
# 50 common (id=1), reboot signal (id=2, count=1 → anomalous), post-reboot (id=3, count=1 → anomalous)
|
|
174
|
+
ids = [1] * 50 + [2, 3]
|
|
175
|
+
strs = ["sshd[*]: Accepted publickey for admin"] * 50 + [
|
|
176
|
+
"kernel: Linux version <*>",
|
|
177
|
+
"kernel: POST_REBOOT_ANOMALY_SENTINEL",
|
|
178
|
+
]
|
|
179
|
+
|
|
180
|
+
with patch("loghunter.detectors.syslog._run_drain3", _patched_drain3(ids, strs)):
|
|
181
|
+
findings = run(_ctx(df, {"max_count": 1, "rarity_pct": 10}))
|
|
182
|
+
|
|
183
|
+
info_findings = [f for f in findings if f.severity == Severity.INFO]
|
|
184
|
+
medium_findings = [f for f in findings if f.severity == Severity.MEDIUM]
|
|
185
|
+
|
|
186
|
+
# Synthetic reboot INFO finding present
|
|
187
|
+
self.assertTrue(len(info_findings) >= 1, "Expected at least one INFO (reboot) finding")
|
|
188
|
+
reboot_titles = [f.title.lower() for f in info_findings]
|
|
189
|
+
self.assertTrue(
|
|
190
|
+
any("reboot" in t for t in reboot_titles),
|
|
191
|
+
f"Expected 'reboot' in at least one INFO finding title, got: {reboot_titles}",
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# Post-reboot anomalous event (template_id=3) is suppressed — must not appear as MEDIUM
|
|
195
|
+
medium_template_ids = [f.evidence.get("template_id") for f in medium_findings]
|
|
196
|
+
self.assertNotIn(
|
|
197
|
+
3, medium_template_ids,
|
|
198
|
+
"Post-reboot anomalous event (template_id=3) should be suppressed, not a MEDIUM finding",
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# ── Text renderer ─────────────────────────────────────────────────────────
|
|
202
|
+
|
|
203
|
+
def test_text_renderer_syslog_group(self) -> None:
|
|
204
|
+
"""Default syslog output keeps template/count details behind verbose."""
|
|
205
|
+
from loghunter.outputs.text import TextHandler
|
|
206
|
+
|
|
207
|
+
medium_f = Finding(
|
|
208
|
+
detector="syslog",
|
|
209
|
+
severity=Severity.MEDIUM,
|
|
210
|
+
title="May 30 14:23:01 router sshd[100]: Failed password for root",
|
|
211
|
+
description="Rare template",
|
|
212
|
+
evidence={
|
|
213
|
+
"host": "router", "template_id": 47,
|
|
214
|
+
"template_str": "sshd[*]: Failed password for <*>",
|
|
215
|
+
"count": 1, "threshold": 3,
|
|
216
|
+
},
|
|
217
|
+
next_steps=[],
|
|
218
|
+
ts_generated=_NOW,
|
|
219
|
+
data_window=_WINDOW,
|
|
220
|
+
)
|
|
221
|
+
info_f = Finding(
|
|
222
|
+
detector="syslog",
|
|
223
|
+
severity=Severity.INFO,
|
|
224
|
+
title="*** host1 rebooted ***",
|
|
225
|
+
description="Reboot detected",
|
|
226
|
+
evidence={
|
|
227
|
+
"host": "host1",
|
|
228
|
+
"reboot_ts": "2026-05-30T02:14:00+00:00",
|
|
229
|
+
"suppressed_window_seconds": 300,
|
|
230
|
+
},
|
|
231
|
+
next_steps=[],
|
|
232
|
+
ts_generated=_NOW,
|
|
233
|
+
data_window=_WINDOW,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
handler = TextHandler(verbose_level=0)
|
|
237
|
+
rendered = handler._render_syslog_group(_flat_section([medium_f, info_f]))
|
|
238
|
+
|
|
239
|
+
medium_out = rendered[0]
|
|
240
|
+
self.assertNotIn("\n", medium_out)
|
|
241
|
+
self.assertIn("May 30 14:23:01 router sshd[100]", medium_out)
|
|
242
|
+
self.assertNotIn("count=", medium_out)
|
|
243
|
+
self.assertNotIn("thresh=", medium_out)
|
|
244
|
+
self.assertNotIn("template_id", medium_out)
|
|
245
|
+
self.assertNotIn("template:", medium_out)
|
|
246
|
+
|
|
247
|
+
# INFO: single line with "reboot @"
|
|
248
|
+
info_out = rendered[1]
|
|
249
|
+
self.assertNotIn("\n", info_out)
|
|
250
|
+
self.assertIn("reboot @", info_out)
|
|
251
|
+
|
|
252
|
+
self.assertTrue(medium_out.startswith("[M] May 30"))
|
|
253
|
+
self.assertTrue(info_out.startswith("[I] host1 "))
|
|
254
|
+
|
|
255
|
+
def test_text_renderer_syslog_verbose_shows_template_details(self) -> None:
|
|
256
|
+
"""Verbose syslog output includes rarity and drain3 template internals."""
|
|
257
|
+
from loghunter.outputs.text import TextHandler
|
|
258
|
+
|
|
259
|
+
medium_f = Finding(
|
|
260
|
+
detector="syslog",
|
|
261
|
+
severity=Severity.MEDIUM,
|
|
262
|
+
title="May 30 14:23:01 router sshd[100]: Failed password for root",
|
|
263
|
+
description="Rare template",
|
|
264
|
+
evidence={
|
|
265
|
+
"host": "router", "template_id": 47,
|
|
266
|
+
"template_str": "sshd[*]: Failed password for <*>",
|
|
267
|
+
"count": 1, "threshold": 3,
|
|
268
|
+
},
|
|
269
|
+
next_steps=["Review surrounding log context for this host"],
|
|
270
|
+
ts_generated=_NOW,
|
|
271
|
+
data_window=_WINDOW,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
rendered = "\n".join(
|
|
275
|
+
TextHandler(verbose_level=1)._render_syslog_group(_flat_section([medium_f]))
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
self.assertIn("May 30 14:23:01 router sshd[100]", rendered)
|
|
279
|
+
# W3 curated subset for syslog (template): template_str, host, count, threshold.
|
|
280
|
+
# template_id is internal — only surfaced under -vv (debug tail).
|
|
281
|
+
self.assertIn("template_str: sshd[*]: Failed password for <*>", rendered)
|
|
282
|
+
self.assertIn("count: 1", rendered)
|
|
283
|
+
self.assertIn("threshold: 3", rendered)
|
|
284
|
+
self.assertIn("Rare template", rendered)
|
|
285
|
+
self.assertIn("next steps:", rendered)
|
|
286
|
+
|
|
287
|
+
# ── drain3 smoke test ─────────────────────────────────────────────────────
|
|
288
|
+
|
|
289
|
+
def test_run_drain3_adds_columns(self) -> None:
|
|
290
|
+
"""_run_drain3 must add non-null template_id and template_str columns."""
|
|
291
|
+
rows = [
|
|
292
|
+
{"ts": _BASE_TS + i, "host": "192.0.2.1",
|
|
293
|
+
"raw": f"line {i}", "message": msg}
|
|
294
|
+
for i, msg in enumerate([
|
|
295
|
+
"sshd[*]: session opened for user admin",
|
|
296
|
+
"sshd[*]: session opened for user root",
|
|
297
|
+
"sshd[*]: session closed for user admin",
|
|
298
|
+
"kernel: device eth0 entered promiscuous mode",
|
|
299
|
+
"kernel: device eth1 entered promiscuous mode",
|
|
300
|
+
])
|
|
301
|
+
]
|
|
302
|
+
df = _make_df(rows)
|
|
303
|
+
result = _run_drain3(df, sim_thresh=0.5, depth=4, parametrize_numeric=True)
|
|
304
|
+
|
|
305
|
+
self.assertIn("template_id", result.columns)
|
|
306
|
+
self.assertIn("template_str", result.columns)
|
|
307
|
+
self.assertFalse(result["template_id"].isna().any(), "template_id should have no nulls")
|
|
308
|
+
self.assertFalse(result["template_str"].isna().any(), "template_str should have no nulls")
|
|
309
|
+
self.assertEqual(len(result), len(df))
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
# ── Fidelity-aware v1: dns-shape REQUIRES_ONE_OF_OPTIONAL contract ──────────
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _zeek_syslog_row(i: int, ts_offset: float = 0.0) -> dict:
|
|
316
|
+
"""A Zeek-frame syslog row with facility/severity carried.
|
|
317
|
+
|
|
318
|
+
Detector is source-blind — facility/severity must ride along without ever
|
|
319
|
+
being read. The frame carries the minimal-5 plus the extended pair.
|
|
320
|
+
"""
|
|
321
|
+
return {
|
|
322
|
+
"ts": _BASE_TS + ts_offset + i * 60.0,
|
|
323
|
+
"host": "192.0.2.1",
|
|
324
|
+
"program": "sshd",
|
|
325
|
+
"raw": f"Jun 11 12:{i:02d}:00 host1 sshd[1234]: Accepted publickey for user",
|
|
326
|
+
"message": "sshd[*]: Accepted publickey for user",
|
|
327
|
+
"facility": "DAEMON",
|
|
328
|
+
"severity": "INFO",
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _ctx_zeek_only(df: pd.DataFrame, cfg: dict | None = None) -> DetectorContext:
|
|
333
|
+
"""Context with frame keyed at the Zeek-syslog pattern key only."""
|
|
334
|
+
return DetectorContext(
|
|
335
|
+
logs={"syslog*.log*": df},
|
|
336
|
+
config=cfg or {},
|
|
337
|
+
allowlist=None,
|
|
338
|
+
data_window=_WINDOW,
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _ctx_both(
|
|
343
|
+
flat_df: pd.DataFrame, zeek_df: pd.DataFrame, cfg: dict | None = None,
|
|
344
|
+
) -> DetectorContext:
|
|
345
|
+
"""Context with BOTH source keys populated (concat path)."""
|
|
346
|
+
return DetectorContext(
|
|
347
|
+
logs={"*.log*": flat_df, "syslog*.log*": zeek_df},
|
|
348
|
+
config=cfg or {},
|
|
349
|
+
allowlist=None,
|
|
350
|
+
data_window=_WINDOW,
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def test_detector_module_exposes_dns_shape_optional_contract() -> None:
|
|
355
|
+
"""REQUIRED_LOGS empty; OPTIONAL_LOGS lists both feeds; ONE-OF gate on."""
|
|
356
|
+
import loghunter.detectors.syslog as mod
|
|
357
|
+
assert mod.REQUIRED_LOGS == []
|
|
358
|
+
assert {(o["source"], o["pattern"]) for o in mod.OPTIONAL_LOGS} == {
|
|
359
|
+
("syslog_dir", "*.log*"),
|
|
360
|
+
("zeek_dir", "syslog*.log*"),
|
|
361
|
+
}
|
|
362
|
+
assert mod.REQUIRES_ONE_OF_OPTIONAL is True
|
|
363
|
+
assert mod.REQUIRES_ONE_OF_OPTIONAL_REASON == (
|
|
364
|
+
"syslog — no syslog source found "
|
|
365
|
+
"(need syslog_dir files or zeek_dir syslog.log)"
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def test_run_returns_empty_when_no_source_frames_present() -> None:
|
|
370
|
+
"""Both pattern keys absent → empty findings. REQUIRES_ONE_OF_OPTIONAL
|
|
371
|
+
is enforced upstream by the runner; the detector itself degrades cleanly
|
|
372
|
+
when called with no frames."""
|
|
373
|
+
ctx = DetectorContext(
|
|
374
|
+
logs={}, config={}, allowlist=None,
|
|
375
|
+
data_window=_WINDOW,
|
|
376
|
+
)
|
|
377
|
+
assert run(ctx) == []
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def test_run_zeek_only_context_produces_findings_and_is_source_blind() -> None:
|
|
381
|
+
"""Zeek frame (with facility/severity) drives detection; detector never
|
|
382
|
+
touches the extended columns. Source-blindness rail: the row tuples used
|
|
383
|
+
in the detector body have NO facility/severity attribute access."""
|
|
384
|
+
rows = [_zeek_syslog_row(i) for i in range(20)]
|
|
385
|
+
rows.append({
|
|
386
|
+
"ts": _BASE_TS + 30 * 60.0,
|
|
387
|
+
"host": "192.0.2.1",
|
|
388
|
+
"program": "kernel",
|
|
389
|
+
"raw": "Jun 11 12:30:00 host1 kernel: rare placeholder event",
|
|
390
|
+
"message": "kernel: rare placeholder event",
|
|
391
|
+
"facility": "KERN",
|
|
392
|
+
"severity": "ERR",
|
|
393
|
+
})
|
|
394
|
+
df = pd.DataFrame(rows)
|
|
395
|
+
# Inject a stable template-id split: 20 commons + 1 rare.
|
|
396
|
+
template_ids = [1] * 20 + [2]
|
|
397
|
+
template_strs = ["common"] * 20 + ["rare"]
|
|
398
|
+
|
|
399
|
+
with patch(
|
|
400
|
+
"loghunter.detectors.syslog._run_drain3",
|
|
401
|
+
_patched_drain3(template_ids, template_strs),
|
|
402
|
+
):
|
|
403
|
+
findings = run(_ctx_zeek_only(df))
|
|
404
|
+
|
|
405
|
+
# One rare → at least one finding (drain3 patched to a known split).
|
|
406
|
+
assert any(f.severity == Severity.MEDIUM for f in findings)
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def test_run_concats_both_frames_in_order(monkeypatch) -> None:
|
|
410
|
+
"""When both pattern keys are populated, run() concats flat + Zeek before
|
|
411
|
+
drain3 — the precedent is detectors/dns.py:_run_zeek_path-and-pihole-enrichment.
|
|
412
|
+
Asserts the concatenated frame's row count matches sum-of-inputs, proving
|
|
413
|
+
no de-dup / drop on union."""
|
|
414
|
+
flat_rows = [_common_row(i) for i in range(5)]
|
|
415
|
+
zeek_rows = [_zeek_syslog_row(i, ts_offset=10_000.0) for i in range(3)]
|
|
416
|
+
flat_df = pd.DataFrame(flat_rows)
|
|
417
|
+
zeek_df = pd.DataFrame(zeek_rows)
|
|
418
|
+
|
|
419
|
+
seen_lengths: list[int] = []
|
|
420
|
+
|
|
421
|
+
def _capture_drain3(df, *args, **kwargs):
|
|
422
|
+
seen_lengths.append(len(df))
|
|
423
|
+
df = df.copy()
|
|
424
|
+
df["template_id"] = [1] * len(df)
|
|
425
|
+
df["template_str"] = ["common"] * len(df)
|
|
426
|
+
return df
|
|
427
|
+
|
|
428
|
+
monkeypatch.setattr(
|
|
429
|
+
"loghunter.detectors.syslog._run_drain3", _capture_drain3
|
|
430
|
+
)
|
|
431
|
+
run(_ctx_both(flat_df, zeek_df))
|
|
432
|
+
assert seen_lengths == [len(flat_rows) + len(zeek_rows)]
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def test_run_source_blind_no_facility_severity_required(monkeypatch) -> None:
|
|
436
|
+
"""A frame missing facility/severity entirely still runs (flat-shape on
|
|
437
|
+
the Zeek key). Verifies the detector body references no extended column."""
|
|
438
|
+
rows = [_common_row(i) for i in range(20)]
|
|
439
|
+
rows.append({
|
|
440
|
+
"ts": _BASE_TS + 30 * 60.0,
|
|
441
|
+
"host": "192.0.2.1",
|
|
442
|
+
"program": "kernel",
|
|
443
|
+
"raw": "Jun 11 12:30:00 host1 kernel: rare placeholder",
|
|
444
|
+
"message": "kernel: rare placeholder",
|
|
445
|
+
})
|
|
446
|
+
df = pd.DataFrame(rows)
|
|
447
|
+
template_ids = [1] * 20 + [2]
|
|
448
|
+
template_strs = ["common"] * 20 + ["rare"]
|
|
449
|
+
monkeypatch.setattr(
|
|
450
|
+
"loghunter.detectors.syslog._run_drain3",
|
|
451
|
+
_patched_drain3(template_ids, template_strs),
|
|
452
|
+
)
|
|
453
|
+
findings = run(_ctx_zeek_only(df))
|
|
454
|
+
assert findings, "detector must run on a minimal-5 frame keyed at the Zeek pattern"
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
if __name__ == "__main__":
|
|
458
|
+
unittest.main()
|