loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
|
@@ -0,0 +1,735 @@
|
|
|
1
|
+
"""Scan detector — port scan detection from Zeek conn.log.
|
|
2
|
+
|
|
3
|
+
Detects vertical (one→many ports), horizontal (one→many hosts), block
|
|
4
|
+
(many ports AND many hosts), and slow (temporally spread) port scanning.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import ipaddress
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
from loghunter.common.finding import DetectorContext, Finding, MethodTag, Severity
|
|
16
|
+
|
|
17
|
+
DETECTOR_NAME = "scan"
|
|
18
|
+
STATUS = "available"
|
|
19
|
+
|
|
20
|
+
REQUIRED_LOGS = [
|
|
21
|
+
{"source": "zeek_dir", "pattern": "conn*.log*"},
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
OPTIONAL_LOGS: list[dict] = []
|
|
25
|
+
|
|
26
|
+
DEFAULT_CONFIG = {
|
|
27
|
+
"vertical_threshold": 15,
|
|
28
|
+
"horizontal_threshold": 15,
|
|
29
|
+
"block_port_threshold": 20,
|
|
30
|
+
"block_host_threshold": 20,
|
|
31
|
+
"block_state_min": 0.30,
|
|
32
|
+
"slow_state_min": 0.30,
|
|
33
|
+
"window_secs": 3600,
|
|
34
|
+
"slow_min_ports": 8,
|
|
35
|
+
"slow_min_buckets": 4,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
DETECTOR_METHOD = MethodTag("pattern", named=False)
|
|
39
|
+
|
|
40
|
+
# ── Domain-knowledge constants ────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
SCAN_STATES = {'S0', 'REJ', 'RSTO', 'RSTR', 'SH', 'OTH'}
|
|
43
|
+
|
|
44
|
+
BITTORRENT_PORTS_PEER = {6881, 6882, 6883, 6884, 6885, 6886, 6887, 6888, 6889,
|
|
45
|
+
51413, 51414}
|
|
46
|
+
BITTORRENT_PORTS_TRACKER = {6969, 2710}
|
|
47
|
+
|
|
48
|
+
# IoT/smart device discovery ports — multicast/broadcast, structurally produce
|
|
49
|
+
# high S0/OTH rates that are not scanning
|
|
50
|
+
IOT_DISCOVERY_PORTS = {
|
|
51
|
+
5353, # mDNS
|
|
52
|
+
1900, # SSDP/UPnP
|
|
53
|
+
5355, # LLMNR
|
|
54
|
+
137, # NetBIOS Name Service
|
|
55
|
+
138, # NetBIOS Datagram
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
# IoT multicast/broadcast destination ranges — connections to these are never scans
|
|
59
|
+
IOT_MULTICAST_PREFIXES = ('224.', '239.', '255.255.255.255', 'ff0', 'ff1', 'ff2')
|
|
60
|
+
|
|
61
|
+
DARK_PORTS = {0, 1, 2, 3, 4, 6, 8}
|
|
62
|
+
|
|
63
|
+
SCAN_TYPE_DESCRIPTIONS = {
|
|
64
|
+
'vertical' : 'Port scan (one host, many ports)',
|
|
65
|
+
'horizontal': 'Network sweep (many hosts, one port)',
|
|
66
|
+
'block' : 'Block scan (many hosts AND many ports)',
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
STATE_EXPLANATIONS = {
|
|
70
|
+
'S0' : 'SYN sent, no response (filtered/firewalled)',
|
|
71
|
+
'REJ' : 'Port closed (RST received)',
|
|
72
|
+
'RSTO': 'Connection reset by originator',
|
|
73
|
+
'RSTR': 'Connection reset by responder',
|
|
74
|
+
'SF' : 'Normal established+closed connection',
|
|
75
|
+
'SH' : 'Half-open scan (SYN+FIN)',
|
|
76
|
+
'OTH' : 'No SYN observed',
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ── Zone-label seam ───────────────────────────────────────────────────────────
|
|
81
|
+
#
|
|
82
|
+
# Standalone-callable fallback: when run() is invoked with a DetectorContext
|
|
83
|
+
# whose home_net is empty (e.g. from a notebook), this RFC1918 list is used.
|
|
84
|
+
# The runner is the normal supply path — it resolves [loghunter].home_net and
|
|
85
|
+
# passes it on every DetectorContext.
|
|
86
|
+
_DEFAULT_HOME_NET = ["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _zone_of(ip: str, home_net: list[str]) -> str:
|
|
90
|
+
"""Return the zone label for an IP given the operator's home_net.
|
|
91
|
+
|
|
92
|
+
Today returns "internal" or "external". The function body is the seam:
|
|
93
|
+
adding a third zone (e.g. "dmz") is a single new if-check inside this
|
|
94
|
+
function — signature and callers do not change. Zones are descriptive
|
|
95
|
+
labels only; there is no trust-rank or numeric ordering at this stage.
|
|
96
|
+
"""
|
|
97
|
+
try:
|
|
98
|
+
addr = ipaddress.ip_address(ip)
|
|
99
|
+
except ValueError:
|
|
100
|
+
return "external"
|
|
101
|
+
if any(addr in ipaddress.ip_network(n, strict=False) for n in home_net):
|
|
102
|
+
return "internal"
|
|
103
|
+
return "external"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _classify_direction(src: str, dst: str, home_net: list[str]) -> tuple[str, str, str]:
|
|
107
|
+
"""Compute (src_zone, dst_zone, rendered) for a flow.
|
|
108
|
+
|
|
109
|
+
The rendered direction string falls out of the zone pair via mechanical
|
|
110
|
+
f-string interpolation — not a hardcoded 2×2 branch. For the two-zone case
|
|
111
|
+
the four strings ("internal→internal", "internal→external",
|
|
112
|
+
"external→internal", "external→external") are produced byte-identically;
|
|
113
|
+
introducing additional zones would yield the new combinations without
|
|
114
|
+
touching this function.
|
|
115
|
+
"""
|
|
116
|
+
src_zone = _zone_of(src, home_net)
|
|
117
|
+
dst_zone = _zone_of(dst, home_net)
|
|
118
|
+
return src_zone, dst_zone, f"{src_zone}→{dst_zone}"
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _prefilter(df: pd.DataFrame, home_net: list[str]) -> pd.DataFrame:
|
|
122
|
+
"""Drop ICMP, IPv6 link-local, and IoT multicast rows; add direction columns.
|
|
123
|
+
|
|
124
|
+
Normalizes expected columns to safe types first so downstream detection code
|
|
125
|
+
does not crash on malformed-but-loadable conn logs. Rows with missing or
|
|
126
|
+
unparseable values simply never meet scan thresholds and produce no findings.
|
|
127
|
+
|
|
128
|
+
Adds two columns: ``direction`` (rendered string for evidence display) and
|
|
129
|
+
``src_zone`` (raw zone label, used by structural checks that would otherwise
|
|
130
|
+
have to string-parse the rendered direction).
|
|
131
|
+
"""
|
|
132
|
+
df = df.copy()
|
|
133
|
+
|
|
134
|
+
# Ensure required string columns exist and contain no None/NaN values.
|
|
135
|
+
for col in ("src", "dst", "proto", "conn_state"):
|
|
136
|
+
if col not in df.columns:
|
|
137
|
+
df[col] = ""
|
|
138
|
+
else:
|
|
139
|
+
df[col] = df[col].fillna("").astype(str)
|
|
140
|
+
|
|
141
|
+
# Port and timestamp must be numeric for every scan mode. Malformed rows are
|
|
142
|
+
# dropped here instead of letting lower-level pandas operations raise KeyError.
|
|
143
|
+
for col in ("port", "ts"):
|
|
144
|
+
if col not in df.columns:
|
|
145
|
+
df[col] = np.nan
|
|
146
|
+
else:
|
|
147
|
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
148
|
+
|
|
149
|
+
df = df[df["port"].notna() & df["ts"].notna()]
|
|
150
|
+
if df.empty:
|
|
151
|
+
return df
|
|
152
|
+
|
|
153
|
+
df = df[df['proto'] != 'icmp']
|
|
154
|
+
if df.empty:
|
|
155
|
+
return df
|
|
156
|
+
df = df[~(df['src'].str.startswith('fe80:') | df['dst'].str.startswith('fe80:'))]
|
|
157
|
+
if df.empty:
|
|
158
|
+
return df
|
|
159
|
+
df = df[~df['dst'].map(lambda ip: any(ip.startswith(p) for p in IOT_MULTICAST_PREFIXES))]
|
|
160
|
+
if df.empty:
|
|
161
|
+
return df
|
|
162
|
+
df = df.copy() # break view chain before column assignment
|
|
163
|
+
triples = [_classify_direction(s, d, home_net) for s, d in zip(df['src'], df['dst'])]
|
|
164
|
+
df['src_zone'] = [t[0] for t in triples]
|
|
165
|
+
df['direction'] = [t[2] for t in triples]
|
|
166
|
+
return df
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _detect_vertical(df: pd.DataFrame, cfg: dict) -> list[dict]:
|
|
170
|
+
"""Vertical scan: one src → many distinct ports on one dst."""
|
|
171
|
+
threshold = cfg['vertical_threshold']
|
|
172
|
+
window_secs = cfg['window_secs']
|
|
173
|
+
|
|
174
|
+
global_counts = (
|
|
175
|
+
df.groupby(['src', 'dst'])['port']
|
|
176
|
+
.nunique()
|
|
177
|
+
.reset_index(name='global_distinct_ports')
|
|
178
|
+
)
|
|
179
|
+
candidates = global_counts[global_counts['global_distinct_ports'] >= threshold]
|
|
180
|
+
|
|
181
|
+
if len(candidates) == 0:
|
|
182
|
+
return []
|
|
183
|
+
|
|
184
|
+
cand_keys = candidates[['src', 'dst']]
|
|
185
|
+
df_cands = df.merge(cand_keys, on=['src', 'dst'])
|
|
186
|
+
|
|
187
|
+
results = []
|
|
188
|
+
for (src, dst), grp in df_cands.groupby(['src', 'dst']):
|
|
189
|
+
grp = grp.sort_values('ts')
|
|
190
|
+
ts_arr = grp['ts'].values # already float epoch seconds
|
|
191
|
+
port_arr = grp['port'].values
|
|
192
|
+
state_arr = grp['conn_state'].values
|
|
193
|
+
|
|
194
|
+
port_counts = {}
|
|
195
|
+
max_ports_in_window = 0
|
|
196
|
+
best_window_start = ts_arr[0]
|
|
197
|
+
left = 0
|
|
198
|
+
|
|
199
|
+
for right in range(len(ts_arr)):
|
|
200
|
+
p = port_arr[right]
|
|
201
|
+
if p is not None and not (isinstance(p, float) and np.isnan(p)):
|
|
202
|
+
port_counts[p] = port_counts.get(p, 0) + 1
|
|
203
|
+
while ts_arr[right] - ts_arr[left] > window_secs:
|
|
204
|
+
lp = port_arr[left]
|
|
205
|
+
if lp is not None and not (isinstance(lp, float) and np.isnan(lp)):
|
|
206
|
+
port_counts[lp] -= 1
|
|
207
|
+
if port_counts[lp] == 0:
|
|
208
|
+
del port_counts[lp]
|
|
209
|
+
left += 1
|
|
210
|
+
n = len(port_counts)
|
|
211
|
+
if n > max_ports_in_window:
|
|
212
|
+
max_ports_in_window = n
|
|
213
|
+
best_window_start = ts_arr[left]
|
|
214
|
+
|
|
215
|
+
if max_ports_in_window < threshold:
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
state_counts = pd.Series(state_arr).value_counts()
|
|
219
|
+
total_conns = len(state_arr)
|
|
220
|
+
scan_state_count = sum(state_counts.get(s, 0) for s in SCAN_STATES)
|
|
221
|
+
scan_state_ratio = scan_state_count / total_conns
|
|
222
|
+
|
|
223
|
+
port_series = pd.Series(port_arr).dropna()
|
|
224
|
+
port_buckets = pd.cut(port_series, bins=[0, 1023, 49151, 65535],
|
|
225
|
+
labels=['well-known', 'registered', 'ephemeral'])
|
|
226
|
+
counts_arr = (port_buckets.value_counts().values + 1).astype(float)
|
|
227
|
+
probs = counts_arr / counts_arr.sum()
|
|
228
|
+
port_range_entropy = round(float(-np.sum(probs * np.log(probs))), 3)
|
|
229
|
+
|
|
230
|
+
results.append({
|
|
231
|
+
'scan_type' : 'vertical',
|
|
232
|
+
'src' : src,
|
|
233
|
+
'dst' : dst,
|
|
234
|
+
'port' : None,
|
|
235
|
+
'port_class' : None,
|
|
236
|
+
'distinct_ports' : max_ports_in_window,
|
|
237
|
+
'distinct_hosts' : 1,
|
|
238
|
+
'total_conns' : total_conns,
|
|
239
|
+
'scan_state_ratio' : round(scan_state_ratio, 3),
|
|
240
|
+
'top_states' : ', '.join(state_counts.head(3).index.tolist()),
|
|
241
|
+
'port_range_entropy' : port_range_entropy,
|
|
242
|
+
'window_start' : datetime.fromtimestamp(
|
|
243
|
+
best_window_start, tz=timezone.utc
|
|
244
|
+
).strftime('%Y-%m-%d %H:%M:%S'),
|
|
245
|
+
'window_secs' : window_secs,
|
|
246
|
+
'direction' : grp['direction'].iloc[0],
|
|
247
|
+
})
|
|
248
|
+
|
|
249
|
+
return results
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _detect_horizontal(df: pd.DataFrame, cfg: dict) -> list[dict]:
|
|
253
|
+
"""Horizontal scan: one src → same port across many distinct hosts."""
|
|
254
|
+
threshold = cfg['horizontal_threshold']
|
|
255
|
+
window_secs = cfg['window_secs']
|
|
256
|
+
|
|
257
|
+
df_tcp_udp = df[df['port'].notna()].copy()
|
|
258
|
+
|
|
259
|
+
global_counts = (
|
|
260
|
+
df_tcp_udp.groupby(['src', 'port'])['dst']
|
|
261
|
+
.nunique()
|
|
262
|
+
.reset_index(name='global_distinct_hosts')
|
|
263
|
+
)
|
|
264
|
+
candidates = global_counts[global_counts['global_distinct_hosts'] >= threshold]
|
|
265
|
+
|
|
266
|
+
if len(candidates) == 0:
|
|
267
|
+
return []
|
|
268
|
+
|
|
269
|
+
cand_keys = candidates[['src', 'port']]
|
|
270
|
+
df_cands = df_tcp_udp.merge(cand_keys, on=['src', 'port'])
|
|
271
|
+
|
|
272
|
+
results = []
|
|
273
|
+
for (src, port), grp in df_cands.groupby(['src', 'port']):
|
|
274
|
+
grp = grp.sort_values('ts')
|
|
275
|
+
ts_arr = grp['ts'].values # already float epoch seconds
|
|
276
|
+
host_arr = grp['dst'].values
|
|
277
|
+
state_arr = grp['conn_state'].values
|
|
278
|
+
|
|
279
|
+
host_counts = {}
|
|
280
|
+
max_hosts_in_window = 0
|
|
281
|
+
best_window_start = ts_arr[0]
|
|
282
|
+
left = 0
|
|
283
|
+
|
|
284
|
+
for right in range(len(ts_arr)):
|
|
285
|
+
h = host_arr[right]
|
|
286
|
+
if h is not None:
|
|
287
|
+
host_counts[h] = host_counts.get(h, 0) + 1
|
|
288
|
+
while ts_arr[right] - ts_arr[left] > window_secs:
|
|
289
|
+
lh = host_arr[left]
|
|
290
|
+
if lh is not None:
|
|
291
|
+
host_counts[lh] -= 1
|
|
292
|
+
if host_counts[lh] == 0:
|
|
293
|
+
del host_counts[lh]
|
|
294
|
+
left += 1
|
|
295
|
+
n = len(host_counts)
|
|
296
|
+
if n > max_hosts_in_window:
|
|
297
|
+
max_hosts_in_window = n
|
|
298
|
+
best_window_start = ts_arr[left]
|
|
299
|
+
|
|
300
|
+
if max_hosts_in_window < threshold:
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
state_counts = pd.Series(state_arr).value_counts()
|
|
304
|
+
total_conns = len(state_arr)
|
|
305
|
+
scan_state_ratio = sum(state_counts.get(s, 0) for s in SCAN_STATES) / total_conns
|
|
306
|
+
velocity = max_hosts_in_window / max(ts_arr[-1] - ts_arr[0], 1)
|
|
307
|
+
|
|
308
|
+
port_int = int(port)
|
|
309
|
+
if port_int <= 1023:
|
|
310
|
+
port_class = 'well-known'
|
|
311
|
+
elif port_int <= 49151:
|
|
312
|
+
port_class = 'registered'
|
|
313
|
+
else:
|
|
314
|
+
port_class = 'ephemeral'
|
|
315
|
+
|
|
316
|
+
results.append({
|
|
317
|
+
'scan_type' : 'horizontal',
|
|
318
|
+
'src' : src,
|
|
319
|
+
'dst' : None,
|
|
320
|
+
'port' : port_int,
|
|
321
|
+
'port_class' : port_class,
|
|
322
|
+
'distinct_ports' : 1,
|
|
323
|
+
'distinct_hosts' : max_hosts_in_window,
|
|
324
|
+
'total_conns' : total_conns,
|
|
325
|
+
'scan_state_ratio' : round(scan_state_ratio, 3),
|
|
326
|
+
'top_states' : ', '.join(state_counts.head(3).index.tolist()),
|
|
327
|
+
'velocity_hosts_per_sec' : round(velocity, 4),
|
|
328
|
+
'window_start' : datetime.fromtimestamp(
|
|
329
|
+
best_window_start, tz=timezone.utc
|
|
330
|
+
).strftime('%Y-%m-%d %H:%M:%S'),
|
|
331
|
+
'window_secs' : window_secs,
|
|
332
|
+
'direction' : grp['direction'].iloc[0],
|
|
333
|
+
})
|
|
334
|
+
|
|
335
|
+
return results
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def _detect_block(df: pd.DataFrame, cfg: dict) -> list[dict]:
|
|
339
|
+
"""Block scan: one src → many ports AND many hosts within a time window."""
|
|
340
|
+
port_threshold = cfg['block_port_threshold']
|
|
341
|
+
host_threshold = cfg['block_host_threshold']
|
|
342
|
+
scan_state_ratio_min = cfg['block_state_min']
|
|
343
|
+
window_secs = cfg['window_secs']
|
|
344
|
+
|
|
345
|
+
df_w = df[df['port'].notna()].copy()
|
|
346
|
+
df_w['time_bucket'] = (df_w['ts'] // window_secs).astype(int)
|
|
347
|
+
df_w['is_scan_state'] = df_w['conn_state'].isin(SCAN_STATES)
|
|
348
|
+
|
|
349
|
+
global_agg = df_w.groupby('src').agg(
|
|
350
|
+
global_distinct_ports=('port', 'nunique'),
|
|
351
|
+
global_distinct_hosts=('dst', 'nunique'),
|
|
352
|
+
scan_state_ratio=('is_scan_state', 'mean'),
|
|
353
|
+
).reset_index()
|
|
354
|
+
|
|
355
|
+
candidates = global_agg[
|
|
356
|
+
(global_agg['global_distinct_ports'] >= port_threshold) &
|
|
357
|
+
(global_agg['global_distinct_hosts'] >= host_threshold) &
|
|
358
|
+
(global_agg['scan_state_ratio'] >= scan_state_ratio_min)
|
|
359
|
+
]
|
|
360
|
+
|
|
361
|
+
if len(candidates) == 0:
|
|
362
|
+
return []
|
|
363
|
+
|
|
364
|
+
df_cands = df_w[df_w['src'].isin(candidates['src'])]
|
|
365
|
+
bucket_agg = df_cands.groupby(['src', 'time_bucket']).agg(
|
|
366
|
+
distinct_ports=('port', 'nunique'),
|
|
367
|
+
distinct_hosts=('dst', 'nunique'),
|
|
368
|
+
total_conns=('port', 'count'),
|
|
369
|
+
scan_state_ratio=('is_scan_state', 'mean'),
|
|
370
|
+
top_states=('conn_state',
|
|
371
|
+
lambda x: ', '.join(x.value_counts().head(3).index.tolist())),
|
|
372
|
+
direction=('direction', 'first'),
|
|
373
|
+
ports_well_known=('port', lambda x: (x <= 1023).sum()),
|
|
374
|
+
ports_registered=('port', lambda x: ((x > 1023) & (x <= 49151)).sum()),
|
|
375
|
+
ports_ephemeral=('port', lambda x: (x > 49151).sum()),
|
|
376
|
+
window_start_ts=('ts', 'min'),
|
|
377
|
+
).reset_index()
|
|
378
|
+
|
|
379
|
+
findings = bucket_agg[
|
|
380
|
+
(bucket_agg['distinct_ports'] >= port_threshold) &
|
|
381
|
+
(bucket_agg['distinct_hosts'] >= host_threshold) &
|
|
382
|
+
(bucket_agg['scan_state_ratio'] >= scan_state_ratio_min)
|
|
383
|
+
].copy()
|
|
384
|
+
|
|
385
|
+
if len(findings) == 0:
|
|
386
|
+
return []
|
|
387
|
+
|
|
388
|
+
findings['scan_type'] = 'block'
|
|
389
|
+
findings['dst'] = None
|
|
390
|
+
findings['port'] = None
|
|
391
|
+
findings['port_class'] = None
|
|
392
|
+
findings['window_secs'] = window_secs
|
|
393
|
+
findings['window_start'] = findings['window_start_ts'].map(
|
|
394
|
+
lambda ts: datetime.fromtimestamp(ts, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
|
|
395
|
+
)
|
|
396
|
+
findings['scan_state_ratio'] = findings['scan_state_ratio'].round(3)
|
|
397
|
+
findings['breadth_score'] = findings['distinct_ports'] * findings['distinct_hosts']
|
|
398
|
+
|
|
399
|
+
findings = (
|
|
400
|
+
findings
|
|
401
|
+
.sort_values('breadth_score', ascending=False)
|
|
402
|
+
.drop_duplicates(subset=['src'], keep='first')
|
|
403
|
+
.drop(columns=['time_bucket', 'window_start_ts', 'breadth_score'])
|
|
404
|
+
.reset_index(drop=True)
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
return findings.to_dict('records')
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _detect_slow(df: pd.DataFrame, cfg: dict) -> list[dict]:
|
|
411
|
+
"""Slow scan: port diversity spread across many time buckets to evade per-window thresholds."""
|
|
412
|
+
min_ports = cfg['slow_min_ports']
|
|
413
|
+
min_buckets = cfg['slow_min_buckets']
|
|
414
|
+
state_min = cfg['slow_state_min']
|
|
415
|
+
bucket_secs = cfg['window_secs']
|
|
416
|
+
vert_threshold = cfg['vertical_threshold']
|
|
417
|
+
|
|
418
|
+
df_w = df[df['port'].notna()].copy()
|
|
419
|
+
df_w['time_bucket'] = (df_w['ts'] // bucket_secs).astype(int)
|
|
420
|
+
|
|
421
|
+
def is_iot_discovery(grp: pd.DataFrame) -> bool:
|
|
422
|
+
port_counts = grp['port'].value_counts()
|
|
423
|
+
top_ports = set(port_counts.head(3).index.tolist())
|
|
424
|
+
if top_ports.issubset(IOT_DISCOVERY_PORTS | {53, 443, 80}):
|
|
425
|
+
ext_conns = grp[grp['src_zone'] != 'internal'].shape[0]
|
|
426
|
+
if ext_conns / len(grp) < 0.1:
|
|
427
|
+
return True
|
|
428
|
+
return False
|
|
429
|
+
|
|
430
|
+
results = []
|
|
431
|
+
|
|
432
|
+
for src, grp in df_w.groupby('src'):
|
|
433
|
+
n_buckets = grp['time_bucket'].nunique()
|
|
434
|
+
if n_buckets < min_buckets:
|
|
435
|
+
continue
|
|
436
|
+
|
|
437
|
+
total_unique_ports = grp['port'].nunique()
|
|
438
|
+
if total_unique_ports < min_ports:
|
|
439
|
+
continue
|
|
440
|
+
|
|
441
|
+
max_ports_in_bucket = grp.groupby('time_bucket')['port'].nunique().max()
|
|
442
|
+
|
|
443
|
+
if max_ports_in_bucket >= vert_threshold:
|
|
444
|
+
continue
|
|
445
|
+
|
|
446
|
+
spread_score = round(total_unique_ports / max(max_ports_in_bucket, 1), 2)
|
|
447
|
+
state_counts = grp['conn_state'].value_counts()
|
|
448
|
+
scan_state_ratio = sum(state_counts.get(s, 0) for s in SCAN_STATES) / len(grp)
|
|
449
|
+
|
|
450
|
+
if scan_state_ratio < state_min:
|
|
451
|
+
continue
|
|
452
|
+
|
|
453
|
+
iot_flag = is_iot_discovery(grp)
|
|
454
|
+
|
|
455
|
+
if iot_flag:
|
|
456
|
+
pattern_tag = 'iot_discovery'
|
|
457
|
+
pattern_notes = (
|
|
458
|
+
f"Traffic pattern consistent with IoT device discovery (mDNS/SSDP/UPnP). "
|
|
459
|
+
f"High temporal spread from repeated network attach/detach cycles rather "
|
|
460
|
+
f"than deliberate scanning. Add source to allowlist to suppress if known."
|
|
461
|
+
)
|
|
462
|
+
elif scan_state_ratio >= 0.60:
|
|
463
|
+
pattern_tag = 'slow_scan'
|
|
464
|
+
pattern_notes = (
|
|
465
|
+
f"Temporal spread score {spread_score:.2f} with {scan_state_ratio:.1%} "
|
|
466
|
+
f"scan-indicative states across {n_buckets} time windows. "
|
|
467
|
+
f"Activity deliberately paced below per-window detection threshold. "
|
|
468
|
+
f"Strong slow scan signature."
|
|
469
|
+
)
|
|
470
|
+
else:
|
|
471
|
+
pattern_tag = 'slow_scan_candidate'
|
|
472
|
+
pattern_notes = (
|
|
473
|
+
f"Temporal spread score {spread_score:.2f} with {scan_state_ratio:.1%} "
|
|
474
|
+
f"scan-indicative states across {n_buckets} time windows. "
|
|
475
|
+
f"Moderate confidence — review destination IPs and ports."
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
results.append({
|
|
479
|
+
'scan_type' : 'slow',
|
|
480
|
+
'src' : src,
|
|
481
|
+
'dst' : None,
|
|
482
|
+
'port' : None,
|
|
483
|
+
'port_class' : None,
|
|
484
|
+
'distinct_ports' : total_unique_ports,
|
|
485
|
+
'distinct_hosts' : grp['dst'].nunique(),
|
|
486
|
+
'max_ports_in_bucket' : int(max_ports_in_bucket),
|
|
487
|
+
'active_buckets' : n_buckets,
|
|
488
|
+
'temporal_spread_score' : spread_score,
|
|
489
|
+
'total_conns' : len(grp),
|
|
490
|
+
'scan_state_ratio' : round(scan_state_ratio, 3),
|
|
491
|
+
'top_states' : ', '.join(state_counts.head(3).index.tolist()),
|
|
492
|
+
'window_start' : datetime.fromtimestamp(
|
|
493
|
+
float(grp['ts'].min()), tz=timezone.utc
|
|
494
|
+
).strftime('%Y-%m-%d %H:%M:%S'),
|
|
495
|
+
'window_secs' : bucket_secs,
|
|
496
|
+
'direction' : grp['direction'].iloc[0],
|
|
497
|
+
'pattern_tag' : pattern_tag,
|
|
498
|
+
'pattern_notes' : pattern_notes,
|
|
499
|
+
})
|
|
500
|
+
|
|
501
|
+
return results
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def _classify(row: dict) -> tuple[str, str]:
|
|
505
|
+
"""Return (pattern_tag, explanation) for a finding dict."""
|
|
506
|
+
port = row.get('port')
|
|
507
|
+
ratio = row['scan_state_ratio']
|
|
508
|
+
hosts = row.get('distinct_hosts') or 0
|
|
509
|
+
ports = row.get('distinct_ports') or 0
|
|
510
|
+
scan_type = row['scan_type']
|
|
511
|
+
|
|
512
|
+
if scan_type == 'slow':
|
|
513
|
+
return (row.get('pattern_tag', 'slow_scan_candidate'),
|
|
514
|
+
row.get('pattern_notes', ''))
|
|
515
|
+
|
|
516
|
+
if port in IOT_DISCOVERY_PORTS and ratio < 0.40:
|
|
517
|
+
return ('iot_discovery',
|
|
518
|
+
f"Port {port} is an IoT/device discovery port (mDNS/SSDP/UPnP/NetBIOS). "
|
|
519
|
+
f"High host counts on this port are normal for device discovery protocols. "
|
|
520
|
+
f"Not a port scan. Add source to allowlist to suppress.")
|
|
521
|
+
|
|
522
|
+
if port in BITTORRENT_PORTS_PEER and ratio >= 0.50:
|
|
523
|
+
return ('bittorrent',
|
|
524
|
+
f"BitTorrent peer connections on port {port} — {hosts} peers contacted, "
|
|
525
|
+
f"{ratio:.1%} failed connections (normal for BT peer discovery). "
|
|
526
|
+
f"If this host shouldn't run BitTorrent, investigate.")
|
|
527
|
+
|
|
528
|
+
if port in BITTORRENT_PORTS_TRACKER and ratio >= 0.15:
|
|
529
|
+
return ('bittorrent',
|
|
530
|
+
f"BitTorrent tracker traffic on port {port} — {hosts} trackers contacted, "
|
|
531
|
+
f"{ratio:.1%} failed connections (normal for tracker announce/scrape). "
|
|
532
|
+
f"If this host shouldn't run BitTorrent, investigate.")
|
|
533
|
+
|
|
534
|
+
if port == 53 and ratio < 0.05 and hosts >= 15:
|
|
535
|
+
return ('dns_resolver',
|
|
536
|
+
f"DNS recursive resolution — {hosts} external resolvers on port 53, "
|
|
537
|
+
f"{ratio:.1%} failed. This is a DNS server or resolver, not a scanner. "
|
|
538
|
+
f"Add source to allowlist to suppress.")
|
|
539
|
+
|
|
540
|
+
if port == 443 and ratio < 0.10 and hosts >= 15:
|
|
541
|
+
return ('https_browsing',
|
|
542
|
+
f"HTTPS to {hosts} external hosts, {ratio:.1%} failed — consistent with "
|
|
543
|
+
f"normal web browsing or cloud service traffic. "
|
|
544
|
+
f"Add source to allowlist to suppress.")
|
|
545
|
+
|
|
546
|
+
if port == 80 and ratio < 0.10 and hosts >= 15:
|
|
547
|
+
return ('http_browsing',
|
|
548
|
+
f"HTTP to {hosts} external hosts, {ratio:.1%} failed — consistent with "
|
|
549
|
+
f"normal web traffic.")
|
|
550
|
+
|
|
551
|
+
if port == 443 and 0.10 <= ratio < 0.50 and hosts >= 20:
|
|
552
|
+
return ('streaming_blocked',
|
|
553
|
+
f"{hosts} HTTPS destinations, {ratio:.1%} failed. On a media/streaming "
|
|
554
|
+
f"device this pattern is consistent with DNS-level blocking (Pi-hole, "
|
|
555
|
+
f"NextDNS) causing direct connection fallback attempts. "
|
|
556
|
+
f"Add source to allowlist to suppress.")
|
|
557
|
+
|
|
558
|
+
if port in DARK_PORTS and ratio >= 0.90:
|
|
559
|
+
return ('dark_traffic',
|
|
560
|
+
f"Port {port} is unassigned/reserved — likely a Zeek encoding artifact "
|
|
561
|
+
f"(e.g. ICMP type/code) or internet background radiation. "
|
|
562
|
+
f"Check proto field in conn.log.")
|
|
563
|
+
|
|
564
|
+
if scan_type == 'vertical' and ratio >= 0.60 and ports >= 1000:
|
|
565
|
+
return ('confirmed_scan',
|
|
566
|
+
f"Full port range scan — {ports} distinct ports on single target "
|
|
567
|
+
f"with {ratio:.1%} scan-indicative states. Strong scanner signature.")
|
|
568
|
+
|
|
569
|
+
if ratio >= 0.60:
|
|
570
|
+
return ('confirmed_scan',
|
|
571
|
+
f"{ratio:.1%} scan-indicative states "
|
|
572
|
+
f"({'ports' if scan_type == 'vertical' else 'hosts'}: {max(ports, hosts)}). "
|
|
573
|
+
f"Strong scanner signature.")
|
|
574
|
+
|
|
575
|
+
return ('unknown', '')
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def _to_severity(row: dict) -> Severity:
|
|
579
|
+
"""Return Severity based on scan_state_ratio, breadth, and pattern_tag."""
|
|
580
|
+
ratio = row['scan_state_ratio']
|
|
581
|
+
breadth = max(row.get('distinct_ports') or 0, row.get('distinct_hosts') or 0)
|
|
582
|
+
tag = row['pattern_tag']
|
|
583
|
+
|
|
584
|
+
if tag in ('dns_resolver', 'https_browsing', 'http_browsing',
|
|
585
|
+
'iot_discovery', 'dark_traffic'):
|
|
586
|
+
return Severity.LOW
|
|
587
|
+
|
|
588
|
+
if row.get('scan_type') == 'slow':
|
|
589
|
+
if tag == 'slow_scan':
|
|
590
|
+
return Severity.HIGH if ratio >= 0.60 else Severity.MEDIUM
|
|
591
|
+
return Severity.LOW
|
|
592
|
+
|
|
593
|
+
if ratio >= 0.60:
|
|
594
|
+
return Severity.HIGH
|
|
595
|
+
if ratio >= 0.30 and breadth >= 50:
|
|
596
|
+
return Severity.HIGH
|
|
597
|
+
if ratio >= 0.20:
|
|
598
|
+
return Severity.MEDIUM
|
|
599
|
+
if ratio >= 0.10 and breadth >= 25:
|
|
600
|
+
return Severity.MEDIUM
|
|
601
|
+
return Severity.LOW
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def _make_finding(row: dict, data_window: tuple) -> Finding:
|
|
605
|
+
"""Construct a Finding from a classified result dict."""
|
|
606
|
+
scan_type = row['scan_type']
|
|
607
|
+
src = row['src']
|
|
608
|
+
dst = row.get('dst')
|
|
609
|
+
port = row.get('port')
|
|
610
|
+
distinct_ports = row.get('distinct_ports', 0)
|
|
611
|
+
distinct_hosts = row.get('distinct_hosts', 0)
|
|
612
|
+
active_buckets = row.get('active_buckets')
|
|
613
|
+
|
|
614
|
+
if scan_type == 'vertical':
|
|
615
|
+
title = f"{src} → {dst}"
|
|
616
|
+
elif scan_type == 'horizontal':
|
|
617
|
+
title = f"{src} → *:{port}"
|
|
618
|
+
elif scan_type == 'block':
|
|
619
|
+
title = f"{src} → *"
|
|
620
|
+
else:
|
|
621
|
+
title = f"{src} slow scan"
|
|
622
|
+
|
|
623
|
+
description = row.get('pattern_notes') or (
|
|
624
|
+
f"{scan_type} scan — {row.get('total_conns', 0)} connections, "
|
|
625
|
+
f"scan_state_ratio={row.get('scan_state_ratio', 0):.2f}"
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
evidence: dict = {
|
|
629
|
+
'scan_type' : scan_type,
|
|
630
|
+
'src' : src,
|
|
631
|
+
'dst' : dst,
|
|
632
|
+
'port' : port,
|
|
633
|
+
'distinct_ports' : distinct_ports,
|
|
634
|
+
'distinct_hosts' : distinct_hosts,
|
|
635
|
+
'total_conns' : row.get('total_conns'),
|
|
636
|
+
'scan_state_ratio' : row.get('scan_state_ratio'),
|
|
637
|
+
'top_states' : row.get('top_states'),
|
|
638
|
+
'direction' : row.get('direction'),
|
|
639
|
+
'pattern_tag' : row.get('pattern_tag'),
|
|
640
|
+
'window_start' : row.get('window_start'),
|
|
641
|
+
'window_secs' : row.get('window_secs'),
|
|
642
|
+
}
|
|
643
|
+
if scan_type == 'slow':
|
|
644
|
+
evidence['temporal_spread_score'] = row.get('temporal_spread_score')
|
|
645
|
+
evidence['active_buckets'] = active_buckets
|
|
646
|
+
evidence['max_ports_in_bucket'] = row.get('max_ports_in_bucket')
|
|
647
|
+
|
|
648
|
+
pattern_tag = row.get('pattern_tag', 'unknown')
|
|
649
|
+
severity = row['_severity']
|
|
650
|
+
|
|
651
|
+
if pattern_tag == 'confirmed_scan' or severity == Severity.HIGH:
|
|
652
|
+
next_steps = [
|
|
653
|
+
"Pivot to conn.log to review full connection history for this source.",
|
|
654
|
+
"Check reverse DNS for the source host.",
|
|
655
|
+
"Look up source IP on Shodan for open services and prior reports.",
|
|
656
|
+
]
|
|
657
|
+
elif pattern_tag == 'bittorrent':
|
|
658
|
+
next_steps = [
|
|
659
|
+
"Expected behavior if this host runs BitTorrent.",
|
|
660
|
+
"Add source to allowlist to suppress if BitTorrent is authorized.",
|
|
661
|
+
]
|
|
662
|
+
elif pattern_tag in ('iot_discovery', 'dns_resolver', 'https_browsing'):
|
|
663
|
+
next_steps = [
|
|
664
|
+
"Known benign traffic pattern — add source to allowlist to suppress.",
|
|
665
|
+
]
|
|
666
|
+
elif scan_type == 'slow' and pattern_tag == 'slow_scan':
|
|
667
|
+
next_steps = [
|
|
668
|
+
"Pivot to conn.log to review full connection history for this source.",
|
|
669
|
+
"Check reverse DNS for the source host.",
|
|
670
|
+
"Look up source IP on Shodan.",
|
|
671
|
+
f"Note temporal spread: activity paced across {active_buckets} time windows.",
|
|
672
|
+
]
|
|
673
|
+
else:
|
|
674
|
+
next_steps = [f"Review conn.log for {src} to assess scan intent."]
|
|
675
|
+
|
|
676
|
+
return Finding(
|
|
677
|
+
detector='scan',
|
|
678
|
+
severity=severity,
|
|
679
|
+
title=title,
|
|
680
|
+
description=description,
|
|
681
|
+
evidence=evidence,
|
|
682
|
+
next_steps=next_steps,
|
|
683
|
+
ts_generated=datetime.now(tz=timezone.utc),
|
|
684
|
+
data_window=data_window,
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
# ── Detector entry point ──────────────────────────────────────────────────────
|
|
689
|
+
|
|
690
|
+
def run(context: DetectorContext) -> list[Finding]:
|
|
691
|
+
"""Detect port scan activity: vertical, horizontal, block, and slow scans."""
|
|
692
|
+
cfg: dict = {**DEFAULT_CONFIG, **context.config}
|
|
693
|
+
home_net = list(context.home_net) if context.home_net else list(_DEFAULT_HOME_NET)
|
|
694
|
+
|
|
695
|
+
df = context.logs.get('conn*.log*')
|
|
696
|
+
if df is None or df.empty:
|
|
697
|
+
return []
|
|
698
|
+
|
|
699
|
+
df = _prefilter(df, home_net)
|
|
700
|
+
if df.empty:
|
|
701
|
+
return []
|
|
702
|
+
|
|
703
|
+
all_rows: list[dict] = []
|
|
704
|
+
all_rows.extend(_detect_vertical(df, cfg))
|
|
705
|
+
all_rows.extend(_detect_horizontal(df, cfg))
|
|
706
|
+
all_rows.extend(_detect_block(df, cfg))
|
|
707
|
+
all_rows.extend(_detect_slow(df, cfg))
|
|
708
|
+
|
|
709
|
+
if not all_rows:
|
|
710
|
+
return []
|
|
711
|
+
|
|
712
|
+
# Deduplicate: keep highest-breadth result per unique (scan_type, src, dst, port)
|
|
713
|
+
seen: dict[tuple, dict] = {}
|
|
714
|
+
for row in all_rows:
|
|
715
|
+
key = (row['scan_type'], row.get('src'), row.get('dst'), row.get('port'))
|
|
716
|
+
breadth = max(row.get('distinct_ports') or 0, row.get('distinct_hosts') or 0)
|
|
717
|
+
if key not in seen or breadth > max(
|
|
718
|
+
seen[key].get('distinct_ports') or 0,
|
|
719
|
+
seen[key].get('distinct_hosts') or 0,
|
|
720
|
+
):
|
|
721
|
+
seen[key] = row
|
|
722
|
+
|
|
723
|
+
deduped = list(seen.values())
|
|
724
|
+
|
|
725
|
+
for row in deduped:
|
|
726
|
+
if 'pattern_tag' not in row:
|
|
727
|
+
row['pattern_tag'], row['pattern_notes'] = _classify(row)
|
|
728
|
+
row['_severity'] = _to_severity(row)
|
|
729
|
+
|
|
730
|
+
sev_order = {Severity.HIGH: 0, Severity.MEDIUM: 1, Severity.LOW: 2, Severity.INFO: 3}
|
|
731
|
+
deduped.sort(
|
|
732
|
+
key=lambda r: (sev_order[r['_severity']], -r.get('scan_state_ratio', 0))
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
return [_make_finding(row, context.data_window) for row in deduped]
|