loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Dnsblock detector — behavioral anomalies in blocked DNS query patterns. (planned)
|
|
2
|
+
|
|
3
|
+
Surfaces who is querying known-bad domains, how often, with what
|
|
4
|
+
persistence, and across what spread of clients. Complements the dns
|
|
5
|
+
detector: DNS clustering finds *unknown-bad* domains by behavioral
|
|
6
|
+
fingerprint; dnsblock finds *known-bad-domain access patterns* by client
|
|
7
|
+
behavior. Pi-hole/dnsmasq only — needs the `was_blocked` column that
|
|
8
|
+
Zeek does not carry.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from loghunter.common.finding import DetectorContext, Finding
|
|
14
|
+
|
|
15
|
+
DETECTOR_NAME = "dnsblock"
|
|
16
|
+
STATUS = "planned"
|
|
17
|
+
|
|
18
|
+
REQUIRED_LOGS = [
|
|
19
|
+
{"source": "pihole_dir", "pattern": "pihole*.log*"},
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
OPTIONAL_LOGS: list[dict] = []
|
|
23
|
+
|
|
24
|
+
DEFAULT_CONFIG: dict = {}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def run(context: DetectorContext) -> list[Finding]:
|
|
28
|
+
"""Detect behavioral anomalies in blocked DNS query patterns."""
|
|
29
|
+
raise NotImplementedError("dnsblock detector is planned — not yet implemented")
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Duration detector — long-lived connection detection from Zeek conn.log.
|
|
2
|
+
|
|
3
|
+
Flags connections that remain open for an unusually long time, which may indicate
|
|
4
|
+
tunneling, C2 keep-alive sessions, or data exfiltration channels.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
from loghunter.common.finding import DetectorContext, Finding, MethodTag, Severity
|
|
14
|
+
|
|
15
|
+
DETECTOR_NAME = "duration"
|
|
16
|
+
STATUS = "available"
|
|
17
|
+
|
|
18
|
+
REQUIRED_LOGS = [
|
|
19
|
+
{"source": "zeek_dir", "pattern": "conn*.log*"},
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
OPTIONAL_LOGS: list[dict] = []
|
|
23
|
+
|
|
24
|
+
DEFAULT_CONFIG = {
|
|
25
|
+
"min_duration_seconds": 1800,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
DETECTOR_METHOD = MethodTag("heuristics", named=False)
|
|
29
|
+
|
|
30
|
+
_DURATION_HIGH = 14400 # 4 hours
|
|
31
|
+
_DURATION_MEDIUM = 7200 # 2 hours
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _duration_str(seconds: float) -> str:
|
|
35
|
+
"""Return a compact human-readable string for a duration in seconds."""
|
|
36
|
+
s = int(seconds)
|
|
37
|
+
if s < 60:
|
|
38
|
+
return f"{s}s"
|
|
39
|
+
if s < 3600:
|
|
40
|
+
m, rem = divmod(s, 60)
|
|
41
|
+
return f"{m}m {rem}s"
|
|
42
|
+
if s < 86400:
|
|
43
|
+
h, rem = divmod(s, 3600)
|
|
44
|
+
return f"{h}h {rem // 60}m"
|
|
45
|
+
d, rem = divmod(s, 86400)
|
|
46
|
+
return f"{d}d {rem // 3600}h"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _to_severity(duration: float) -> Severity:
|
|
50
|
+
if duration >= _DURATION_HIGH:
|
|
51
|
+
return Severity.HIGH
|
|
52
|
+
if duration >= _DURATION_MEDIUM:
|
|
53
|
+
return Severity.MEDIUM
|
|
54
|
+
return Severity.LOW
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def run(context: DetectorContext) -> list[Finding]:
|
|
58
|
+
"""Flag flows exceeding the minimum duration threshold, grouped by (src, dst, port, proto)."""
|
|
59
|
+
cfg: dict = {**DEFAULT_CONFIG, **context.config}
|
|
60
|
+
min_dur = cfg["min_duration_seconds"]
|
|
61
|
+
|
|
62
|
+
df = context.logs.get("conn*.log*")
|
|
63
|
+
if df is None or df.empty:
|
|
64
|
+
return []
|
|
65
|
+
|
|
66
|
+
if "duration" not in df.columns:
|
|
67
|
+
return []
|
|
68
|
+
|
|
69
|
+
df = df.copy()
|
|
70
|
+
df["duration"] = pd.to_numeric(df["duration"], errors="coerce")
|
|
71
|
+
|
|
72
|
+
df = df[df["duration"].notna() & (df["duration"] > 0)]
|
|
73
|
+
if df.empty:
|
|
74
|
+
return []
|
|
75
|
+
|
|
76
|
+
df = df[df["duration"] >= min_dur]
|
|
77
|
+
if df.empty:
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
# Normalize grouping keys. Port may be NaN; fill with sentinel so groupby
|
|
81
|
+
# doesn't silently drop portless rows. dropna=False is a second safety net.
|
|
82
|
+
for col in ("src", "dst", "proto"):
|
|
83
|
+
if col not in df.columns:
|
|
84
|
+
df[col] = ""
|
|
85
|
+
if "port" in df.columns:
|
|
86
|
+
df["port"] = pd.to_numeric(df["port"], errors="coerce")
|
|
87
|
+
else:
|
|
88
|
+
df["port"] = float("nan")
|
|
89
|
+
df["_port_key"] = df["port"].fillna(-1).astype(int)
|
|
90
|
+
|
|
91
|
+
findings: list[Finding] = []
|
|
92
|
+
for (src, dst, port_key, proto), group in df.groupby(
|
|
93
|
+
["src", "dst", "_port_key", "proto"], sort=False, dropna=False):
|
|
94
|
+
|
|
95
|
+
port: int | None = None if port_key == -1 else int(port_key)
|
|
96
|
+
|
|
97
|
+
max_row = group.loc[group["duration"].idxmax()]
|
|
98
|
+
max_dur = round(float(max_row["duration"]), 1)
|
|
99
|
+
max_dur_str = _duration_str(max_dur)
|
|
100
|
+
|
|
101
|
+
# total_bytes: None if column absent or all null
|
|
102
|
+
if "bytes" in group.columns:
|
|
103
|
+
bytes_series = group["bytes"].dropna()
|
|
104
|
+
total_bytes: int | None = int(bytes_series.sum()) if not bytes_series.empty else None
|
|
105
|
+
else:
|
|
106
|
+
total_bytes = None
|
|
107
|
+
|
|
108
|
+
# avg_bytes_per_second: derived from the max-duration row, not group total
|
|
109
|
+
avg_bps: float | None
|
|
110
|
+
if "bytes" in group.columns:
|
|
111
|
+
row_bytes = max_row["bytes"]
|
|
112
|
+
avg_bps = (
|
|
113
|
+
round(float(row_bytes) / max_dur, 1)
|
|
114
|
+
if pd.notna(row_bytes) and max_dur > 0
|
|
115
|
+
else None
|
|
116
|
+
)
|
|
117
|
+
else:
|
|
118
|
+
avg_bps = None
|
|
119
|
+
|
|
120
|
+
# conn_states: distinct non-null values, sorted; empty list if column absent
|
|
121
|
+
if "conn_state" in group.columns:
|
|
122
|
+
states: list[str] = sorted(group["conn_state"].dropna().unique().tolist())
|
|
123
|
+
else:
|
|
124
|
+
states = []
|
|
125
|
+
|
|
126
|
+
# first_seen / last_seen: UTC ISO strings from unix epoch seconds
|
|
127
|
+
if "ts" in group.columns:
|
|
128
|
+
ts_series = pd.to_numeric(group["ts"], errors="coerce").dropna()
|
|
129
|
+
else:
|
|
130
|
+
ts_series = pd.Series(dtype=float)
|
|
131
|
+
if not ts_series.empty:
|
|
132
|
+
first_seen: str | None = datetime.fromtimestamp(
|
|
133
|
+
float(ts_series.min()), tz=timezone.utc
|
|
134
|
+
).isoformat()
|
|
135
|
+
last_seen: str | None = datetime.fromtimestamp(
|
|
136
|
+
float(ts_series.max()), tz=timezone.utc
|
|
137
|
+
).isoformat()
|
|
138
|
+
else:
|
|
139
|
+
first_seen = last_seen = None
|
|
140
|
+
|
|
141
|
+
port_str = str(port) if port is not None else "?"
|
|
142
|
+
title = f"{src} → {dst}:{port_str}/{proto}"
|
|
143
|
+
|
|
144
|
+
severity = _to_severity(max_dur)
|
|
145
|
+
|
|
146
|
+
findings.append(Finding(
|
|
147
|
+
detector="duration",
|
|
148
|
+
severity=severity,
|
|
149
|
+
title=title,
|
|
150
|
+
description=(
|
|
151
|
+
"A long-lived connection may indicate tunneling, a C2 keep-alive session, "
|
|
152
|
+
"or an active data exfiltration channel."
|
|
153
|
+
),
|
|
154
|
+
evidence={
|
|
155
|
+
"src": src,
|
|
156
|
+
"dst": dst,
|
|
157
|
+
"port": port,
|
|
158
|
+
"proto": proto,
|
|
159
|
+
"max_duration_seconds": max_dur,
|
|
160
|
+
"max_duration_str": max_dur_str,
|
|
161
|
+
"connection_count": len(group),
|
|
162
|
+
"total_bytes": total_bytes,
|
|
163
|
+
"avg_bytes_per_second": avg_bps,
|
|
164
|
+
"conn_states": states,
|
|
165
|
+
"first_seen": first_seen,
|
|
166
|
+
"last_seen": last_seen,
|
|
167
|
+
},
|
|
168
|
+
next_steps=[
|
|
169
|
+
f"Review {max_dur_str} connection in conn.log: zeek-cut id.orig_h id.resp_h id.resp_p duration conn_state < conn.log | grep {src}",
|
|
170
|
+
"Check if this is expected infrastructure (VPN, backup, monitoring) — if so, add to allowlist",
|
|
171
|
+
f"For external destinations, run: whois {dst}",
|
|
172
|
+
],
|
|
173
|
+
ts_generated=datetime.now(tz=timezone.utc),
|
|
174
|
+
data_window=context.data_window,
|
|
175
|
+
))
|
|
176
|
+
|
|
177
|
+
findings.sort(key=lambda f: f.evidence["max_duration_seconds"], reverse=True)
|
|
178
|
+
return findings
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Protocol detector — per-protocol autoencoder on connection metadata. (planned)
|
|
2
|
+
|
|
3
|
+
Trains a per-protocol autoencoder on connection feature vectors derived from
|
|
4
|
+
Zeek conn.log. High reconstruction error indicates anomalous session behavior
|
|
5
|
+
for that protocol. Requires session-level feature data.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from loghunter.common.finding import DetectorContext, Finding
|
|
11
|
+
|
|
12
|
+
DETECTOR_NAME = "protocol"
|
|
13
|
+
STATUS = "planned"
|
|
14
|
+
|
|
15
|
+
REQUIRED_LOGS = [
|
|
16
|
+
{"source": "zeek_dir", "pattern": "conn*.log*"},
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
OPTIONAL_LOGS: list[dict] = []
|
|
20
|
+
|
|
21
|
+
DEFAULT_CONFIG: dict = {}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def run(context: DetectorContext) -> list[Finding]:
|
|
25
|
+
"""Detect anomalous sessions using per-protocol autoencoder reconstruction error."""
|
|
26
|
+
raise NotImplementedError("protocol detector is planned — not yet implemented")
|