loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
|
@@ -0,0 +1,491 @@
|
|
|
1
|
+
"""Tests for the duration detector.
|
|
2
|
+
|
|
3
|
+
All IP addresses use RFC 5737 documentation space:
|
|
4
|
+
192.0.2.x, 198.51.100.x, 203.0.113.x
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import io
|
|
10
|
+
import unittest
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
from loghunter.common.finding import DetectorContext, Finding, RunSummary, Severity
|
|
16
|
+
from loghunter.detectors.duration import (
|
|
17
|
+
DETECTOR_NAME,
|
|
18
|
+
STATUS,
|
|
19
|
+
_duration_str,
|
|
20
|
+
run,
|
|
21
|
+
)
|
|
22
|
+
from loghunter.outputs.text import TextHandler
|
|
23
|
+
from loghunter.runner import discover_detectors
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
_NOW = datetime(2026, 5, 30, tzinfo=timezone.utc)
|
|
29
|
+
_WINDOW = (_NOW, _NOW)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _ctx(
|
|
33
|
+
df: pd.DataFrame | None,
|
|
34
|
+
cfg: dict | None = None,
|
|
35
|
+
) -> DetectorContext:
|
|
36
|
+
logs = {"conn*.log*": df} if df is not None else {}
|
|
37
|
+
return DetectorContext(
|
|
38
|
+
logs=logs,
|
|
39
|
+
config=cfg or {},
|
|
40
|
+
allowlist=None,
|
|
41
|
+
data_window=_WINDOW,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _conn_row(
|
|
46
|
+
src: str = "192.0.2.10",
|
|
47
|
+
dst: str = "198.51.100.20",
|
|
48
|
+
port: int = 443,
|
|
49
|
+
proto: str = "tcp",
|
|
50
|
+
duration: float = 7200.0,
|
|
51
|
+
ts: float = 1_779_750_000.0,
|
|
52
|
+
**kwargs,
|
|
53
|
+
) -> dict:
|
|
54
|
+
row = {
|
|
55
|
+
"src": src, "dst": dst, "port": port, "proto": proto,
|
|
56
|
+
"duration": duration, "ts": ts,
|
|
57
|
+
}
|
|
58
|
+
row.update(kwargs)
|
|
59
|
+
return row
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _minimal_finding() -> Finding:
|
|
63
|
+
return Finding(
|
|
64
|
+
detector="duration",
|
|
65
|
+
severity=Severity.MEDIUM,
|
|
66
|
+
title="192.0.2.10 → 198.51.100.20:443/tcp",
|
|
67
|
+
description="A long-lived connection.",
|
|
68
|
+
evidence={
|
|
69
|
+
"src": "192.0.2.10",
|
|
70
|
+
"dst": "198.51.100.20",
|
|
71
|
+
"port": 443,
|
|
72
|
+
"proto": "tcp",
|
|
73
|
+
"max_duration_seconds": 7200.0,
|
|
74
|
+
"max_duration_str": "2h 0m",
|
|
75
|
+
"connection_count": 1,
|
|
76
|
+
"total_bytes": None,
|
|
77
|
+
"avg_bytes_per_second": None,
|
|
78
|
+
"conn_states": [],
|
|
79
|
+
"first_seen": None,
|
|
80
|
+
"last_seen": None,
|
|
81
|
+
},
|
|
82
|
+
next_steps=["Review the connection."],
|
|
83
|
+
ts_generated=_NOW,
|
|
84
|
+
data_window=_WINDOW,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ── Tests ─────────────────────────────────────────────────────────────────────
|
|
89
|
+
|
|
90
|
+
class DurationDetectorTests(unittest.TestCase):
|
|
91
|
+
|
|
92
|
+
# ── Discovery ─────────────────────────────────────────────────────────────
|
|
93
|
+
|
|
94
|
+
def test_duration_is_available_in_discover_detectors(self) -> None:
|
|
95
|
+
detectors = discover_detectors()
|
|
96
|
+
self.assertIn("duration", detectors)
|
|
97
|
+
self.assertEqual(getattr(detectors["duration"], "STATUS", None), "available")
|
|
98
|
+
|
|
99
|
+
def test_detector_name_and_status_constants(self) -> None:
|
|
100
|
+
self.assertEqual(DETECTOR_NAME, "duration")
|
|
101
|
+
self.assertEqual(STATUS, "available")
|
|
102
|
+
|
|
103
|
+
# ── Empty / missing input ─────────────────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
def test_run_returns_empty_when_no_conn_key(self) -> None:
|
|
106
|
+
self.assertEqual(run(_ctx(None)), [])
|
|
107
|
+
|
|
108
|
+
def test_run_returns_empty_on_empty_dataframe(self) -> None:
|
|
109
|
+
empty = pd.DataFrame(columns=["src", "dst", "port", "proto", "ts", "duration"])
|
|
110
|
+
self.assertEqual(run(_ctx(empty)), [])
|
|
111
|
+
|
|
112
|
+
def test_run_returns_empty_when_duration_column_absent(self) -> None:
|
|
113
|
+
df = pd.DataFrame([{"src": "192.0.2.10", "dst": "198.51.100.20",
|
|
114
|
+
"port": 443, "proto": "tcp", "ts": 1_779_750_000.0}])
|
|
115
|
+
self.assertEqual(run(_ctx(df)), [])
|
|
116
|
+
|
|
117
|
+
def test_run_returns_empty_when_all_below_threshold(self) -> None:
|
|
118
|
+
df = pd.DataFrame([_conn_row(duration=299.0)])
|
|
119
|
+
self.assertEqual(run(_ctx(df, {"min_duration_seconds": 300})), [])
|
|
120
|
+
|
|
121
|
+
def test_run_returns_empty_when_all_nan(self) -> None:
|
|
122
|
+
df = pd.DataFrame([_conn_row(duration=float("nan"))])
|
|
123
|
+
self.assertEqual(run(_ctx(df)), [])
|
|
124
|
+
|
|
125
|
+
def test_run_returns_empty_when_all_zero(self) -> None:
|
|
126
|
+
df = pd.DataFrame([_conn_row(duration=0.0)])
|
|
127
|
+
self.assertEqual(run(_ctx(df)), [])
|
|
128
|
+
|
|
129
|
+
def test_run_returns_empty_when_all_negative(self) -> None:
|
|
130
|
+
df = pd.DataFrame([_conn_row(duration=-10.0)])
|
|
131
|
+
self.assertEqual(run(_ctx(df)), [])
|
|
132
|
+
|
|
133
|
+
# ── Core detection ────────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
def test_medium_severity_at_7200s(self) -> None:
|
|
136
|
+
df = pd.DataFrame([_conn_row(duration=7200.0)])
|
|
137
|
+
findings = run(_ctx(df))
|
|
138
|
+
self.assertEqual(len(findings), 1)
|
|
139
|
+
self.assertEqual(findings[0].severity, Severity.MEDIUM)
|
|
140
|
+
|
|
141
|
+
def test_high_severity_at_14400s(self) -> None:
|
|
142
|
+
df = pd.DataFrame([_conn_row(duration=14400.0)])
|
|
143
|
+
findings = run(_ctx(df))
|
|
144
|
+
self.assertEqual(len(findings), 1)
|
|
145
|
+
self.assertEqual(findings[0].severity, Severity.HIGH)
|
|
146
|
+
|
|
147
|
+
def test_high_severity_above_14400s(self) -> None:
|
|
148
|
+
df = pd.DataFrame([_conn_row(duration=86400.0)])
|
|
149
|
+
findings = run(_ctx(df))
|
|
150
|
+
self.assertEqual(len(findings), 1)
|
|
151
|
+
self.assertEqual(findings[0].severity, Severity.HIGH)
|
|
152
|
+
|
|
153
|
+
def test_low_severity_at_301s_emitted(self) -> None:
|
|
154
|
+
df = pd.DataFrame([_conn_row(duration=301.0)])
|
|
155
|
+
findings = run(_ctx(df, {"min_duration_seconds": 300}))
|
|
156
|
+
self.assertEqual(len(findings), 1)
|
|
157
|
+
self.assertEqual(findings[0].severity, Severity.LOW)
|
|
158
|
+
|
|
159
|
+
def test_exactly_at_threshold_is_detected(self) -> None:
|
|
160
|
+
df = pd.DataFrame([_conn_row(duration=1800.0)])
|
|
161
|
+
# 1800s is LOW (< 7200) — under W6 the result set is verbosity-invariant,
|
|
162
|
+
# so LOW always emits; the text handler is responsible for hiding LOW at
|
|
163
|
+
# level 0.
|
|
164
|
+
findings = run(_ctx(df))
|
|
165
|
+
self.assertEqual(len(findings), 1)
|
|
166
|
+
|
|
167
|
+
def test_just_below_threshold_not_detected(self) -> None:
|
|
168
|
+
df = pd.DataFrame([_conn_row(duration=1799.9)])
|
|
169
|
+
self.assertEqual(run(_ctx(df)), [])
|
|
170
|
+
|
|
171
|
+
def test_multiple_findings_sorted_descending_by_max_duration(self) -> None:
|
|
172
|
+
df = pd.DataFrame([
|
|
173
|
+
_conn_row(src="192.0.2.10", duration=7200.0),
|
|
174
|
+
_conn_row(src="192.0.2.11", duration=14400.0),
|
|
175
|
+
_conn_row(src="192.0.2.12", duration=9000.0),
|
|
176
|
+
])
|
|
177
|
+
findings = run(_ctx(df))
|
|
178
|
+
self.assertEqual(len(findings), 3)
|
|
179
|
+
durations = [f.evidence["max_duration_seconds"] for f in findings]
|
|
180
|
+
self.assertEqual(durations, sorted(durations, reverse=True))
|
|
181
|
+
self.assertEqual(findings[0].evidence["src"], "192.0.2.11")
|
|
182
|
+
|
|
183
|
+
def test_zero_duration_excluded_even_if_column_present(self) -> None:
|
|
184
|
+
df = pd.DataFrame([
|
|
185
|
+
_conn_row(src="192.0.2.10", duration=0.0),
|
|
186
|
+
_conn_row(src="192.0.2.11", duration=7200.0),
|
|
187
|
+
])
|
|
188
|
+
findings = run(_ctx(df))
|
|
189
|
+
self.assertEqual(len(findings), 1)
|
|
190
|
+
self.assertEqual(findings[0].evidence["src"], "192.0.2.11")
|
|
191
|
+
|
|
192
|
+
def test_non_numeric_duration_values_tolerated(self) -> None:
|
|
193
|
+
df = pd.DataFrame([
|
|
194
|
+
_conn_row(src="192.0.2.10", duration="bad"),
|
|
195
|
+
_conn_row(src="192.0.2.11", duration=7200.0),
|
|
196
|
+
])
|
|
197
|
+
findings = run(_ctx(df))
|
|
198
|
+
self.assertEqual(len(findings), 1)
|
|
199
|
+
self.assertEqual(findings[0].evidence["src"], "192.0.2.11")
|
|
200
|
+
|
|
201
|
+
# ── Grouping behavior ─────────────────────────────────────────────────────
|
|
202
|
+
|
|
203
|
+
def test_grouping_collapses_same_flow(self) -> None:
|
|
204
|
+
# Three rows for the same (src, dst, port, proto) → one finding
|
|
205
|
+
df = pd.DataFrame([
|
|
206
|
+
_conn_row(duration=7200.0, ts=1_779_750_000.0),
|
|
207
|
+
_conn_row(duration=9000.0, ts=1_779_750_100.0),
|
|
208
|
+
_conn_row(duration=7800.0, ts=1_779_750_200.0),
|
|
209
|
+
])
|
|
210
|
+
findings = run(_ctx(df))
|
|
211
|
+
self.assertEqual(len(findings), 1)
|
|
212
|
+
self.assertEqual(findings[0].evidence["connection_count"], 3)
|
|
213
|
+
self.assertEqual(findings[0].evidence["max_duration_seconds"], 9000.0)
|
|
214
|
+
|
|
215
|
+
def test_floor_row_excluded_from_group(self) -> None:
|
|
216
|
+
# One row below the floor; only the two above it count
|
|
217
|
+
df = pd.DataFrame([
|
|
218
|
+
_conn_row(duration=7200.0),
|
|
219
|
+
_conn_row(duration=9000.0),
|
|
220
|
+
_conn_row(duration=500.0), # below default 1800s floor
|
|
221
|
+
])
|
|
222
|
+
findings = run(_ctx(df))
|
|
223
|
+
self.assertEqual(len(findings), 1)
|
|
224
|
+
self.assertEqual(findings[0].evidence["connection_count"], 2)
|
|
225
|
+
self.assertEqual(findings[0].evidence["max_duration_seconds"], 9000.0)
|
|
226
|
+
|
|
227
|
+
def test_two_flows_produce_two_findings(self) -> None:
|
|
228
|
+
df = pd.DataFrame([
|
|
229
|
+
_conn_row(src="192.0.2.10", dst="198.51.100.1", port=443, duration=7200.0),
|
|
230
|
+
_conn_row(src="192.0.2.10", dst="198.51.100.2", port=443, duration=14400.0),
|
|
231
|
+
])
|
|
232
|
+
findings = run(_ctx(df))
|
|
233
|
+
self.assertEqual(len(findings), 2)
|
|
234
|
+
|
|
235
|
+
# ── LOW severity: result-set verbosity invariance (W6) ────────────────────
|
|
236
|
+
|
|
237
|
+
def test_low_always_emitted_result_set_invariant(self) -> None:
|
|
238
|
+
"""W6: duration.run() emits LOW findings regardless of verbosity. The
|
|
239
|
+
result set is invariant across verbose levels; the text handler is the
|
|
240
|
+
sole authority on hiding LOW at level 0 (W2 pipeline step 1)."""
|
|
241
|
+
# 2000s is LOW (< 7200) but above the 1800s floor.
|
|
242
|
+
df = pd.DataFrame([_conn_row(duration=2000.0)])
|
|
243
|
+
findings = run(_ctx(df))
|
|
244
|
+
self.assertEqual(len(findings), 1)
|
|
245
|
+
self.assertEqual(findings[0].severity, Severity.LOW)
|
|
246
|
+
|
|
247
|
+
# ── Evidence fields ───────────────────────────────────────────────────────
|
|
248
|
+
|
|
249
|
+
def test_max_duration_seconds_is_rounded_float(self) -> None:
|
|
250
|
+
df = pd.DataFrame([_conn_row(duration=7200.123456)])
|
|
251
|
+
f = run(_ctx(df))[0]
|
|
252
|
+
self.assertEqual(f.evidence["max_duration_seconds"], 7200.1)
|
|
253
|
+
|
|
254
|
+
def test_max_duration_str_present_and_non_empty(self) -> None:
|
|
255
|
+
df = pd.DataFrame([_conn_row(duration=7200.0)])
|
|
256
|
+
f = run(_ctx(df))[0]
|
|
257
|
+
self.assertIsInstance(f.evidence["max_duration_str"], str)
|
|
258
|
+
self.assertTrue(f.evidence["max_duration_str"])
|
|
259
|
+
|
|
260
|
+
def test_src_dst_port_proto_present(self) -> None:
|
|
261
|
+
df = pd.DataFrame([_conn_row(
|
|
262
|
+
src="192.0.2.10", dst="198.51.100.20", port=443, proto="tcp", duration=7200.0
|
|
263
|
+
)])
|
|
264
|
+
f = run(_ctx(df))[0]
|
|
265
|
+
self.assertEqual(f.evidence["src"], "192.0.2.10")
|
|
266
|
+
self.assertEqual(f.evidence["dst"], "198.51.100.20")
|
|
267
|
+
self.assertEqual(f.evidence["port"], 443)
|
|
268
|
+
self.assertEqual(f.evidence["proto"], "tcp")
|
|
269
|
+
|
|
270
|
+
def test_avg_bps_none_when_bytes_null(self) -> None:
|
|
271
|
+
df = pd.DataFrame([_conn_row(duration=7200.0)])
|
|
272
|
+
f = run(_ctx(df))[0]
|
|
273
|
+
self.assertIsNone(f.evidence["avg_bytes_per_second"])
|
|
274
|
+
|
|
275
|
+
def test_avg_bps_computed_when_bytes_present(self) -> None:
|
|
276
|
+
df = pd.DataFrame([_conn_row(duration=7200.0, bytes=720000)])
|
|
277
|
+
f = run(_ctx(df))[0]
|
|
278
|
+
self.assertIsNotNone(f.evidence["avg_bytes_per_second"])
|
|
279
|
+
self.assertAlmostEqual(f.evidence["avg_bytes_per_second"], 100.0, places=1)
|
|
280
|
+
|
|
281
|
+
def test_avg_bps_from_max_duration_row(self) -> None:
|
|
282
|
+
# Row 1: max duration 9000s, bytes 90000 → bps 10.0
|
|
283
|
+
# Row 2: shorter duration 7200s, bytes 720000 (higher bytes, shorter duration)
|
|
284
|
+
# avg_bps must use the max-duration row: 90000 / 9000 = 10.0
|
|
285
|
+
df = pd.DataFrame([
|
|
286
|
+
_conn_row(duration=9000.0, bytes=90000),
|
|
287
|
+
_conn_row(duration=7200.0, bytes=720000),
|
|
288
|
+
])
|
|
289
|
+
findings = run(_ctx(df))
|
|
290
|
+
self.assertEqual(len(findings), 1)
|
|
291
|
+
self.assertAlmostEqual(findings[0].evidence["avg_bytes_per_second"], 10.0, places=1)
|
|
292
|
+
|
|
293
|
+
def test_avg_bps_none_when_column_absent(self) -> None:
|
|
294
|
+
df = pd.DataFrame([_conn_row(duration=7200.0)])
|
|
295
|
+
self.assertNotIn("bytes", df.columns)
|
|
296
|
+
f = run(_ctx(df))[0]
|
|
297
|
+
self.assertIsNone(f.evidence["avg_bytes_per_second"])
|
|
298
|
+
|
|
299
|
+
def test_total_bytes_none_when_all_null(self) -> None:
|
|
300
|
+
df = pd.DataFrame([
|
|
301
|
+
_conn_row(duration=7200.0, bytes=None),
|
|
302
|
+
_conn_row(duration=7800.0, bytes=float("nan")),
|
|
303
|
+
])
|
|
304
|
+
findings = run(_ctx(df))
|
|
305
|
+
self.assertIsNone(findings[0].evidence["total_bytes"])
|
|
306
|
+
|
|
307
|
+
def test_total_bytes_none_when_column_absent(self) -> None:
|
|
308
|
+
df = pd.DataFrame([_conn_row(duration=7200.0)])
|
|
309
|
+
self.assertNotIn("bytes", df.columns)
|
|
310
|
+
f = run(_ctx(df))[0]
|
|
311
|
+
self.assertIsNone(f.evidence["total_bytes"])
|
|
312
|
+
|
|
313
|
+
def test_conn_states_when_single_state_present(self) -> None:
|
|
314
|
+
df = pd.DataFrame([_conn_row(duration=7200.0, conn_state="SF")])
|
|
315
|
+
f = run(_ctx(df))[0]
|
|
316
|
+
self.assertEqual(f.evidence["conn_states"], ["SF"])
|
|
317
|
+
|
|
318
|
+
def test_conn_states_distinct_sorted(self) -> None:
|
|
319
|
+
# Repeated and null states — expect sorted unique non-null list
|
|
320
|
+
df = pd.DataFrame([
|
|
321
|
+
_conn_row(duration=8000.0, conn_state="SF"),
|
|
322
|
+
_conn_row(duration=7800.0, conn_state="RSTO"),
|
|
323
|
+
_conn_row(duration=7200.0, conn_state="SF"), # duplicate
|
|
324
|
+
_conn_row(duration=7500.0, conn_state=None), # null, excluded
|
|
325
|
+
])
|
|
326
|
+
findings = run(_ctx(df))
|
|
327
|
+
self.assertEqual(findings[0].evidence["conn_states"], ["RSTO", "SF"])
|
|
328
|
+
|
|
329
|
+
def test_conn_states_empty_list_when_column_absent(self) -> None:
|
|
330
|
+
df = pd.DataFrame([_conn_row(duration=7200.0)])
|
|
331
|
+
self.assertNotIn("conn_state", df.columns)
|
|
332
|
+
f = run(_ctx(df))[0]
|
|
333
|
+
self.assertEqual(f.evidence["conn_states"], [])
|
|
334
|
+
|
|
335
|
+
# ── Finding contract ──────────────────────────────────────────────────────
|
|
336
|
+
|
|
337
|
+
def test_title_contains_src_and_dst_port(self) -> None:
|
|
338
|
+
df = pd.DataFrame([_conn_row(
|
|
339
|
+
src="192.0.2.10", dst="198.51.100.20", port=443, proto="tcp", duration=7200.0
|
|
340
|
+
)])
|
|
341
|
+
f = run(_ctx(df))[0]
|
|
342
|
+
self.assertIn("192.0.2.10", f.title)
|
|
343
|
+
self.assertIn("198.51.100.20", f.title)
|
|
344
|
+
self.assertIn("443", f.title)
|
|
345
|
+
|
|
346
|
+
def test_title_does_not_contain_duration_value(self) -> None:
|
|
347
|
+
df = pd.DataFrame([_conn_row(duration=7200.0)])
|
|
348
|
+
f = run(_ctx(df))[0]
|
|
349
|
+
self.assertNotIn("7200", f.title)
|
|
350
|
+
self.assertNotIn("2h", f.title)
|
|
351
|
+
|
|
352
|
+
def test_detector_field_is_duration(self) -> None:
|
|
353
|
+
df = pd.DataFrame([_conn_row(duration=7200.0)])
|
|
354
|
+
f = run(_ctx(df))[0]
|
|
355
|
+
self.assertEqual(f.detector, "duration")
|
|
356
|
+
|
|
357
|
+
def test_next_steps_non_empty(self) -> None:
|
|
358
|
+
df = pd.DataFrame([_conn_row(duration=7200.0)])
|
|
359
|
+
f = run(_ctx(df))[0]
|
|
360
|
+
self.assertIsInstance(f.next_steps, list)
|
|
361
|
+
self.assertGreater(len(f.next_steps), 0)
|
|
362
|
+
|
|
363
|
+
# ── _duration_str helper ──────────────────────────────────────────────────
|
|
364
|
+
|
|
365
|
+
def test_duration_str_seconds(self) -> None:
|
|
366
|
+
self.assertEqual(_duration_str(47.0), "47s")
|
|
367
|
+
|
|
368
|
+
def test_duration_str_minutes(self) -> None:
|
|
369
|
+
self.assertEqual(_duration_str(872.0), "14m 32s")
|
|
370
|
+
|
|
371
|
+
def test_duration_str_hours(self) -> None:
|
|
372
|
+
self.assertEqual(_duration_str(15780.0), "4h 23m")
|
|
373
|
+
|
|
374
|
+
def test_duration_str_days(self) -> None:
|
|
375
|
+
self.assertEqual(_duration_str(93600.0), "1d 2h")
|
|
376
|
+
|
|
377
|
+
# ── Text renderer ─────────────────────────────────────────────────────────
|
|
378
|
+
|
|
379
|
+
def test_render_duration_group_no_exception(self) -> None:
|
|
380
|
+
summary = RunSummary(
|
|
381
|
+
data_window=_WINDOW,
|
|
382
|
+
record_counts={"conn*.log*": 1},
|
|
383
|
+
data_size_bytes=0,
|
|
384
|
+
detectors_run=["duration"],
|
|
385
|
+
detectors_skipped={},
|
|
386
|
+
)
|
|
387
|
+
stream = io.StringIO()
|
|
388
|
+
handler = TextHandler(stream=stream, verbose_level=0)
|
|
389
|
+
handler.begin(summary)
|
|
390
|
+
handler.write([_minimal_finding()])
|
|
391
|
+
handler.end()
|
|
392
|
+
self.assertTrue(len(stream.getvalue()) > 0)
|
|
393
|
+
|
|
394
|
+
def test_render_output_contains_key_tokens(self) -> None:
|
|
395
|
+
df = pd.DataFrame([_conn_row(
|
|
396
|
+
src="192.0.2.10", dst="198.51.100.20", port=443, proto="tcp", duration=7200.0
|
|
397
|
+
)])
|
|
398
|
+
findings = run(_ctx(df))
|
|
399
|
+
summary = RunSummary(
|
|
400
|
+
data_window=_WINDOW, record_counts={}, data_size_bytes=0,
|
|
401
|
+
detectors_run=["duration"], detectors_skipped={},
|
|
402
|
+
)
|
|
403
|
+
stream = io.StringIO()
|
|
404
|
+
handler = TextHandler(stream=stream, verbose_level=0)
|
|
405
|
+
handler.begin(summary)
|
|
406
|
+
handler.write(findings)
|
|
407
|
+
handler.end()
|
|
408
|
+
output = stream.getvalue()
|
|
409
|
+
self.assertIn("192.0.2.10", output)
|
|
410
|
+
self.assertIn("198.51.100.20", output)
|
|
411
|
+
self.assertIn("2h 0m", output)
|
|
412
|
+
|
|
413
|
+
def test_verbose_mode_emits_evidence_and_next_steps(self) -> None:
|
|
414
|
+
summary = RunSummary(
|
|
415
|
+
data_window=_WINDOW, record_counts={}, data_size_bytes=0,
|
|
416
|
+
detectors_run=["duration"], detectors_skipped={},
|
|
417
|
+
)
|
|
418
|
+
stream = io.StringIO()
|
|
419
|
+
handler = TextHandler(stream=stream, verbose_level=1)
|
|
420
|
+
handler.begin(summary)
|
|
421
|
+
handler.write([_minimal_finding()])
|
|
422
|
+
handler.end()
|
|
423
|
+
output = stream.getvalue()
|
|
424
|
+
self.assertIn("evidence:", output)
|
|
425
|
+
self.assertIn("next steps:", output)
|
|
426
|
+
self.assertIn("data window:", output)
|
|
427
|
+
|
|
428
|
+
def test_render_conns_column_pluralization(self) -> None:
|
|
429
|
+
# Two rows with the same flow tuple → grouped → "2 conns"
|
|
430
|
+
df = pd.DataFrame([
|
|
431
|
+
_conn_row(duration=7200.0, ts=1_779_750_000.0),
|
|
432
|
+
_conn_row(duration=7800.0, ts=1_779_750_100.0),
|
|
433
|
+
])
|
|
434
|
+
findings = run(_ctx(df))
|
|
435
|
+
self.assertEqual(len(findings), 1)
|
|
436
|
+
summary = RunSummary(
|
|
437
|
+
data_window=_WINDOW, record_counts={}, data_size_bytes=0,
|
|
438
|
+
detectors_run=["duration"], detectors_skipped={},
|
|
439
|
+
)
|
|
440
|
+
stream = io.StringIO()
|
|
441
|
+
handler = TextHandler(stream=stream, verbose_level=0)
|
|
442
|
+
handler.begin(summary)
|
|
443
|
+
handler.write(findings)
|
|
444
|
+
handler.end()
|
|
445
|
+
self.assertIn("conns", stream.getvalue())
|
|
446
|
+
|
|
447
|
+
def test_render_single_conn_uses_singular(self) -> None:
|
|
448
|
+
df = pd.DataFrame([_conn_row(duration=7200.0)])
|
|
449
|
+
findings = run(_ctx(df))
|
|
450
|
+
summary = RunSummary(
|
|
451
|
+
data_window=_WINDOW, record_counts={}, data_size_bytes=0,
|
|
452
|
+
detectors_run=["duration"], detectors_skipped={},
|
|
453
|
+
)
|
|
454
|
+
stream = io.StringIO()
|
|
455
|
+
handler = TextHandler(stream=stream, verbose_level=0)
|
|
456
|
+
handler.begin(summary)
|
|
457
|
+
handler.write(findings)
|
|
458
|
+
handler.end()
|
|
459
|
+
self.assertIn("1 conn", stream.getvalue())
|
|
460
|
+
|
|
461
|
+
def test_arrow_alignment_across_multiple_findings(self) -> None:
|
|
462
|
+
"""All → arrows must appear at the same column offset."""
|
|
463
|
+
df = pd.DataFrame([
|
|
464
|
+
_conn_row(src="192.0.2.10", dst="198.51.100.1", port=443, duration=14400.0),
|
|
465
|
+
_conn_row(src="192.0.2.200", dst="203.0.113.5", port=22, duration=7200.0),
|
|
466
|
+
_conn_row(src="192.0.2.1", dst="198.51.100.200", port=9997, duration=7201.0),
|
|
467
|
+
])
|
|
468
|
+
findings = run(_ctx(df))
|
|
469
|
+
self.assertEqual(len(findings), 3)
|
|
470
|
+
|
|
471
|
+
summary = RunSummary(
|
|
472
|
+
data_window=_WINDOW, record_counts={}, data_size_bytes=0,
|
|
473
|
+
detectors_run=["duration"], detectors_skipped={},
|
|
474
|
+
)
|
|
475
|
+
stream = io.StringIO()
|
|
476
|
+
handler = TextHandler(stream=stream, verbose_level=0)
|
|
477
|
+
handler.begin(summary)
|
|
478
|
+
handler.write(findings)
|
|
479
|
+
handler.end()
|
|
480
|
+
|
|
481
|
+
output_lines = stream.getvalue().splitlines()
|
|
482
|
+
finding_lines = [line for line in output_lines if line.lstrip().startswith("[")]
|
|
483
|
+
arrow_positions = [line.index("→") for line in finding_lines if "→" in line]
|
|
484
|
+
self.assertEqual(len(arrow_positions), 3)
|
|
485
|
+
self.assertEqual(len(set(arrow_positions)), 1, (
|
|
486
|
+
f"→ arrows not aligned — positions: {arrow_positions}"
|
|
487
|
+
))
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
if __name__ == "__main__":
|
|
491
|
+
unittest.main()
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Export orchestrator — new [export.<backend>] config shape coverage.
|
|
2
|
+
|
|
3
|
+
Glenn's amendment: the previous draft missed the fetch seam where
|
|
4
|
+
``run_export`` reads ``config[resolved_backend]`` at lines 155 and 165. A
|
|
5
|
+
stub-backend test that drives the actual ``run_export`` exposes this — it
|
|
6
|
+
KeyErrors today if any site still reads the top-level key.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import sys
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import pytest
|
|
17
|
+
|
|
18
|
+
from loghunter import exporters
|
|
19
|
+
from loghunter.exporters import run_export
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# ── backend selection reads config["export"][name], not top-level ────────────
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_backend_selection_reads_from_export_namespace(
|
|
26
|
+
monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""A config with [splunk] at the TOP level must NOT auto-select splunk —
|
|
29
|
+
the new clean-break shape requires [export.splunk]."""
|
|
30
|
+
config = {
|
|
31
|
+
"loghunter": {"export_dir": str(tmp_path)},
|
|
32
|
+
# WRONG shape — top-level [splunk]. Must NOT activate.
|
|
33
|
+
"splunk": {"host": "192.0.2.20", "port": 8089,
|
|
34
|
+
"query": {"default": {"spl": "x"}}},
|
|
35
|
+
}
|
|
36
|
+
with pytest.raises(ValueError, match=r"No export backend configured"):
|
|
37
|
+
run_export(
|
|
38
|
+
config=config, backend=None, query_names=[],
|
|
39
|
+
since=datetime(2026, 6, 1), until=datetime(2026, 6, 2),
|
|
40
|
+
out=None, verbose=False,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_backend_selection_from_export_namespace_succeeds(
|
|
45
|
+
monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
|
|
46
|
+
) -> None:
|
|
47
|
+
config = {
|
|
48
|
+
"loghunter": {"export_dir": str(tmp_path)},
|
|
49
|
+
"export": {"splunk": {"host": "192.0.2.20", "port": 8089,
|
|
50
|
+
"query": {"default": {"spl": "x"}}}},
|
|
51
|
+
}
|
|
52
|
+
# Stub fetch / write so no real Splunk call happens.
|
|
53
|
+
from loghunter.exporters import splunk as splunk_module
|
|
54
|
+
monkeypatch.setattr(
|
|
55
|
+
splunk_module, "fetch",
|
|
56
|
+
lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
|
|
57
|
+
)
|
|
58
|
+
monkeypatch.setattr(splunk_module, "write", lambda rows, outpath, verbose: (0, {"bytes": 0, "paths": [outpath]}))
|
|
59
|
+
# Should auto-select splunk and not raise.
|
|
60
|
+
run_export(
|
|
61
|
+
config=config, backend=None, query_names=[],
|
|
62
|
+
since=datetime(2026, 6, 1), until=datetime(2026, 6, 2),
|
|
63
|
+
out=None, verbose=False,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ── run_export fetch-seam — stub backend, verify what gets passed in ────────
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class _StubBackend:
|
|
71
|
+
"""Module-shaped stub: exposes the four duck-typed callables run_export
|
|
72
|
+
needs. captured = the kwargs each was called with."""
|
|
73
|
+
|
|
74
|
+
captured: dict[str, Any] = {}
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def is_configured(backend_cfg: dict) -> bool:
|
|
78
|
+
return bool(backend_cfg.get("host", "").strip())
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
def summary_descriptor(backend_cfg: dict) -> str:
|
|
82
|
+
return backend_cfg.get("host", "")
|
|
83
|
+
|
|
84
|
+
@staticmethod
|
|
85
|
+
def fetch(query_config, backend_config, since, until, verbose, *, skip_confirm=False):
|
|
86
|
+
# Capture the backend_config the orchestrator hands us — this is the
|
|
87
|
+
# seam Glenn's amendment guards. Pre-fix, this would have arrived as
|
|
88
|
+
# config["splunk"] (top-level) and the new clean-break config has no
|
|
89
|
+
# such key → KeyError or empty dict.
|
|
90
|
+
_StubBackend.captured["backend_config"] = backend_config
|
|
91
|
+
_StubBackend.captured["query_config"] = query_config
|
|
92
|
+
return ([], {"units": 0, "unit_label": "chunks"})
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def write(rows, outpath, verbose):
|
|
96
|
+
_StubBackend.captured["outpath"] = outpath
|
|
97
|
+
return 0, {"bytes": 0, "paths": [outpath]}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_run_export_fetch_receives_export_namespace_backend_config(
|
|
101
|
+
monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
|
|
102
|
+
) -> None:
|
|
103
|
+
"""Drive the actual run_export call. The fetch seam MUST receive the
|
|
104
|
+
[export.<stub>] stanza dict, not the empty top-level config['<stub>']."""
|
|
105
|
+
_StubBackend.captured = {}
|
|
106
|
+
# Register the stub under the existing splunk slot via monkeypatch on the
|
|
107
|
+
# loaded-module cache: the orchestrator does importlib on a name in
|
|
108
|
+
# _KNOWN_BACKENDS, then is_configured / fetch / write on that module.
|
|
109
|
+
monkeypatch.setattr(exporters, "_load_backend", lambda name: _StubBackend)
|
|
110
|
+
monkeypatch.setattr(exporters, "_KNOWN_BACKENDS", ("splunk",))
|
|
111
|
+
|
|
112
|
+
config = {
|
|
113
|
+
"loghunter": {"export_dir": str(tmp_path)},
|
|
114
|
+
"export": {"splunk": {
|
|
115
|
+
"host": "192.0.2.20",
|
|
116
|
+
"port": 8089,
|
|
117
|
+
"query": {"default": {"spl": "search *", "output_basename": "syslog"}},
|
|
118
|
+
}},
|
|
119
|
+
# Decoy: top-level key with junk. Pre-fix code would have read THIS.
|
|
120
|
+
"splunk": {"host": "BOGUS-do-not-use", "query": {}},
|
|
121
|
+
}
|
|
122
|
+
run_export(
|
|
123
|
+
config=config, backend="splunk", query_names=[],
|
|
124
|
+
since=datetime(2026, 6, 1), until=datetime(2026, 6, 2),
|
|
125
|
+
out=None, verbose=False,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
backend_cfg = _StubBackend.captured["backend_config"]
|
|
129
|
+
assert backend_cfg.get("host") == "192.0.2.20"
|
|
130
|
+
assert backend_cfg.get("host") != "BOGUS-do-not-use"
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# ── Splunk no-query under [export.splunk] → actionable ValueError ────────────
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def test_splunk_no_query_under_export_namespace_raises_actionable(
|
|
137
|
+
monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
|
|
138
|
+
) -> None:
|
|
139
|
+
"""No shipped default query. [export.splunk] with host set but no query
|
|
140
|
+
stanza must raise a ValueError naming [export.splunk.query.<name>]."""
|
|
141
|
+
config = {
|
|
142
|
+
"loghunter": {"export_dir": str(tmp_path)},
|
|
143
|
+
"export": {"splunk": {"host": "192.0.2.20", "port": 8089}},
|
|
144
|
+
# NO query.* — bare loghunter export must surface an actionable error.
|
|
145
|
+
}
|
|
146
|
+
with pytest.raises(ValueError) as exc_info:
|
|
147
|
+
run_export(
|
|
148
|
+
config=config, backend=None, query_names=[],
|
|
149
|
+
since=datetime(2026, 6, 1), until=datetime(2026, 6, 2),
|
|
150
|
+
out=None, verbose=False,
|
|
151
|
+
)
|
|
152
|
+
msg = str(exc_info.value)
|
|
153
|
+
assert "[export.splunk.query." in msg
|