PyPI - loghunter-cli - Versions diffs - 0.1.0.dev0__py3-none-any.whl - Mend

loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

loghunter/__init__.py +3 -0
loghunter/cli.py +1108 -0
loghunter/cli_init.py +567 -0
loghunter/common/__init__.py +1 -0
loghunter/common/allowlist.py +436 -0
loghunter/common/clustering.py +326 -0
loghunter/common/config.py +221 -0
loghunter/common/display.py +323 -0
loghunter/common/errors.py +45 -0
loghunter/common/finding.py +239 -0
loghunter/common/loader/__init__.py +136 -0
loghunter/common/loader/diagnostics.py +94 -0
loghunter/common/loader/discovery.py +335 -0
loghunter/common/loader/io.py +76 -0
loghunter/common/loader/pipeline.py +1010 -0
loghunter/common/loader/sniff.py +184 -0
loghunter/common/loader/types.py +207 -0
loghunter/common/loader/windowing.py +523 -0
loghunter/common/output.py +93 -0
loghunter/common/paths.py +105 -0
loghunter/common/sources.py +392 -0
loghunter/data/allowlist/connections.txt +50 -0
loghunter/data/allowlist/domains_devices.txt +5 -0
loghunter/data/allowlist/domains_homelab.txt +5 -0
loghunter/data/allowlist/domains_universal.txt +125 -0
loghunter/data/config_example.toml +144 -0
loghunter/detectors/__init__.py +5 -0
loghunter/detectors/auth.py +27 -0
loghunter/detectors/aws.py +671 -0
loghunter/detectors/beacon.py +258 -0
loghunter/detectors/dns.py +778 -0
loghunter/detectors/dnsblock.py +29 -0
loghunter/detectors/duration.py +178 -0
loghunter/detectors/protocol.py +26 -0
loghunter/detectors/scan.py +735 -0
loghunter/detectors/ssl.py +25 -0
loghunter/detectors/syslog.py +266 -0
loghunter/detectors/weird.py +27 -0
loghunter/digest/__init__.py +43 -0
loghunter/digest/_stats.py +182 -0
loghunter/digest/blob.py +698 -0
loghunter/digest/cloudtrail.py +341 -0
loghunter/digest/conn.py +367 -0
loghunter/digest/dns.py +364 -0
loghunter/digest/syslog.py +269 -0
loghunter/exporters/__init__.py +534 -0
loghunter/exporters/cloudtrail.py +499 -0
loghunter/exporters/splunk.py +222 -0
loghunter/outputs/__init__.py +1 -0
loghunter/outputs/allowlist.py +75 -0
loghunter/outputs/csv.py +70 -0
loghunter/outputs/email.py +44 -0
loghunter/outputs/html.py +99 -0
loghunter/outputs/json.py +77 -0
loghunter/outputs/text.py +1422 -0
loghunter/parsers/__init__.py +1 -0
loghunter/parsers/cloudtrail.py +287 -0
loghunter/parsers/dnsmasq.py +331 -0
loghunter/parsers/syslog.py +150 -0
loghunter/parsers/zeek.py +294 -0
loghunter/parsers/zeek_tsv.py +310 -0
loghunter/runner.py +1895 -0
loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
migrations/cloudtrail_parquet.py +59 -0
migrations/conn_fft.py +550 -0
migrations/conn_scan.py +1097 -0
migrations/dns_dbscan.py +520 -0
migrations/get_syslog.py +402 -0
migrations/syslog_drain3.py +479 -0
scratch/junk/parquet.py +59 -0
tests/__init__.py +1 -0
tests/_cloudtrail_fakes.py +116 -0
tests/conftest.py +17 -0
tests/test_allowlist_defaults_accessor.py +90 -0
tests/test_architecture_spine.py +302 -0
tests/test_aws_detector.py +504 -0
tests/test_be_like_water.py +106 -0
tests/test_cli_help.py +342 -0
tests/test_cli_multi_positional.py +458 -0
tests/test_cloudtrail_exporter.py +631 -0
tests/test_cloudtrail_exporter_botocore.py +207 -0
tests/test_cloudtrail_parser.py +393 -0
tests/test_clustering.py +85 -0
tests/test_clustering_interruptible.py +404 -0
tests/test_config_cli.py +1006 -0
tests/test_config_example_drift.py +164 -0
tests/test_digest_blob.py +1237 -0
tests/test_digest_cli.py +1040 -0
tests/test_digest_cloudtrail.py +980 -0
tests/test_digest_conn.py +1189 -0
tests/test_digest_dns.py +770 -0
tests/test_digest_stats.py +282 -0
tests/test_digest_syslog.py +724 -0
tests/test_display.py +370 -0
tests/test_dns_detector.py +1010 -0
tests/test_dnsmasq_parser.py +467 -0
tests/test_duration_detector.py +491 -0
tests/test_export_orchestrator_shape.py +153 -0
tests/test_init_wizard.py +707 -0
tests/test_loader.py +3639 -0
tests/test_loader_package_surface.py +115 -0
tests/test_loader_window_model.py +215 -0
tests/test_output_path_cascade.py +575 -0
tests/test_resolve_path.py +111 -0
tests/test_root_provenance.py +212 -0
tests/test_runner.py +2599 -0
tests/test_scan_detector.py +455 -0
tests/test_search_paths.py +50 -0
tests/test_sniff_orchestrator.py +373 -0
tests/test_sniff_recognizers.py +573 -0
tests/test_source_resolution_seam.py +471 -0
tests/test_sources.py +648 -0
tests/test_splunk_exporter.py +351 -0
tests/test_syslog_detector.py +458 -0
tests/test_syslog_parser.py +582 -0
tests/test_text_output.py +1225 -0
tests/test_zeek_tsv_parser.py +580 -0

loghunter/detectors/beacon.py ADDED Viewed

@@ -0,0 +1,258 @@
+"""Beacon detector — FFT-based periodic connection detection.
+Algorithm:
+- Bin connection timestamps into 30-second intervals (not 10s — 10s bins place a 60s
+  beacon at the Nyquist limit, producing harmonic artifacts)
+- Compute FFT over the binned time grid (resilient to data gaps vs raw inter-arrival)
+- Composite score: 40% spectral ratio + 40% peak prominence + 20% inverted jitter CV
+- Peak prominence: peak power relative to local spectral noise floor, normalized at 100x
+- Jitter CV computed on outlier-cleaned inter-arrival deltas
+- Minimum 20 connections per candidate flow
+Reference calibration: MRTG 60s SSH poll scores ~0.608, dominant period exactly 60.0s.
+"""
+from __future__ import annotations
+from datetime import datetime, timezone
+from typing import Any
+import numpy as np
+from loghunter.common.finding import DetectorContext, Finding, MethodTag, Severity
+DETECTOR_NAME = "beacon"
+STATUS = "available"
+REQUIRED_LOGS = [
+    {"source": "zeek_dir", "pattern": "conn*.log*"},
+]
+OPTIONAL_LOGS: list[dict] = []
+DEFAULT_CONFIG = {
+    "threshold": 0.5,
+    "min_connections": 20,
+    "bin_seconds": 30,
+}
+DETECTOR_METHOD = MethodTag("FFT", named=True)
+# Period range to consider (seconds). Outside this, FFT peaks are ignored.
+_MIN_PERIOD = 45
+_MAX_PERIOD = 7200
+def run(context: DetectorContext) -> list[Finding]:
+    """Detect beaconing flows using FFT on binned connection timestamps."""
+    cfg = context.config
+    threshold: float = cfg.get("threshold", DEFAULT_CONFIG["threshold"])
+    min_conns: int = cfg.get("min_connections", DEFAULT_CONFIG["min_connections"])
+    bin_size: int = cfg.get("bin_seconds", DEFAULT_CONFIG["bin_seconds"])
+    df = context.logs.get("conn*.log*")
+    if df is None or df.empty:
+        return []
+    df = _filter_conn(df)
+    if df.empty:
+        return []
+    findings: list[Finding] = []
+    for (src, dst, port, proto), group in df.groupby(["src", "dst", "port", "proto"]):
+        if len(group) < min_conns:
+            continue
+        ts_arr = group["ts"].sort_values().to_numpy(dtype=float)
+        score_data = _compute_beacon_score(ts_arr, bin_size)
+        if score_data is None or score_data["beacon_score"] < threshold:
+            continue
+        findings.append(_make_finding(
+            str(src), str(dst), int(port), str(proto),
+            score_data, group, context.data_window,
+        ))
+    findings.sort(key=lambda f: f.evidence["beacon_score"], reverse=True)
+    return findings
+def _filter_conn(df: Any) -> Any:
+    """Apply standard beacon pre-filters: established conns, no multicast, local origin."""
+    import pandas as pd
+    df = df[df["conn_state"].isin(["SF", "S1"])].copy()
+    df = df[~df["dst"].map(_is_multicast_or_broadcast)]
+    df = df[~df["src"].str.startswith("fe80:", na=False)]
+    df = df[~df["dst"].str.startswith("fe80:", na=False)]
+    df = df[df["local_orig"] == True]  # noqa: E712
+    df = df[df["bytes"].notna()]
+    return df
+def _is_multicast_or_broadcast(ip: str) -> bool:
+    if not isinstance(ip, str):
+        return False
+    return (
+        ip.startswith("224.") or ip.startswith("239.") or
+        ip.startswith("255.") or ip.endswith(".255") or
+        ip.startswith("ff0") or ip.startswith("ff02")
+    )
+def _compute_beacon_score(
+    ts_array: np.ndarray,
+    bin_size: int = 30,
+) -> dict[str, Any] | None:
+    """Score a single flow's connection timestamps for periodic beaconing via FFT.
+    Returns None if the flow cannot be scored (too few points, no variance, no
+    dominant period in the configured range).
+    Why binning over raw inter-arrival deltas: gaps produce delta outliers that
+    corrupt FFT results; binning represents gaps as zero-count bins, preserving
+    the periodicity signal.
+    Why prominence alongside spectral ratio: sparse binary signals spread energy
+    across harmonics, keeping the absolute spectral ratio low even for perfectly
+    periodic flows. Prominence measures peak power above the local noise floor,
+    robust to harmonic spreading.
+    """
+    if len(ts_array) < 10:
+        return None
+    t_start = ts_array.min()
+    t_end = ts_array.max()
+    n_bins = int((t_end - t_start) / bin_size) + 1
+    bin_idx = ((ts_array - t_start) / bin_size).astype(int)
+    counts = np.zeros(n_bins)
+    np.add.at(counts, bin_idx, 1)
+    std = counts.std()
+    if std == 0:
+        return None
+    counts_norm = (counts - counts.mean()) / std
+    fft_mag = np.abs(np.fft.rfft(counts_norm))
+    freqs = np.fft.rfftfreq(n_bins, d=bin_size)
+    fft_mag[0] = 0  # zero DC component
+    with np.errstate(divide="ignore"):
+        periods = np.where(freqs > 0, 1.0 / freqs, np.inf)
+    mask = (periods >= _MIN_PERIOD) & (periods <= _MAX_PERIOD)
+    fft_masked = np.where(mask, fft_mag, 0)
+    if fft_masked.max() == 0:
+        return None
+    peak_idx = int(fft_masked.argmax())
+    peak_period = float(periods[peak_idx])
+    peak_power = float(fft_mag[peak_idx])
+    total_power = float(fft_mag[1:].sum())
+    if total_power == 0:
+        return None
+    spectral_ratio = peak_power / total_power
+    window = max(10, int(peak_idx * 0.05))
+    lo = max(1, peak_idx - window)
+    hi = min(len(fft_mag) - 1, peak_idx + window)
+    local = np.concatenate([fft_mag[lo:peak_idx], fft_mag[peak_idx + 1:hi + 1]])
+    noise_floor = float(np.median(local)) if len(local) > 0 else 1.0
+    prominence = peak_power / (noise_floor + 1e-10)
+    prominence_norm = min(prominence / 100.0, 1.0)
+    deltas = np.diff(ts_array)
+    d_mean = deltas.mean()
+    d_std = deltas.std()
+    clean_deltas = deltas[np.abs(deltas - d_mean) < 3 * d_std]
+    if len(clean_deltas) > 1 and clean_deltas.mean() > 0:
+        jitter_cv = float(clean_deltas.std() / clean_deltas.mean())
+    else:
+        jitter_cv = 1.0
+    beacon_score = (
+        0.4 * spectral_ratio +
+        0.4 * prominence_norm +
+        0.2 * (1.0 - min(jitter_cv, 1.0))
+    )
+    return {
+        "beacon_score": round(beacon_score, 4),
+        "dominant_period": round(peak_period, 1),
+        "dominant_period_m": round(peak_period / 60, 2),
+        "spectral_ratio": round(spectral_ratio, 4),
+        "prominence": round(prominence, 2),
+        "prominence_norm": round(prominence_norm, 4),
+        "jitter_cv": round(jitter_cv, 4),
+        "conn_count": len(ts_array),
+        "occupancy": round(float((counts > 0).sum()) / n_bins, 4),
+    }
+def _make_finding(
+    src: str,
+    dst: str,
+    port: int,
+    proto: str,
+    score_data: dict[str, Any],
+    group: Any,
+    data_window: tuple[datetime, datetime],
+) -> Finding:
+    score = score_data["beacon_score"]
+    period_s = score_data["dominant_period"]
+    period_m = score_data["dominant_period_m"]
+    conn_count = score_data["conn_count"]
+    if score >= 0.7:
+        severity = Severity.HIGH
+    elif score >= 0.5:
+        severity = Severity.MEDIUM
+    else:
+        severity = Severity.LOW
+    period_str = f"{period_m:.1f}m" if period_m >= 2 else f"{period_s:.0f}s"
+    title = f"{src} → {dst}:{port}/{proto}"
+    bytes_s = group["bytes"].dropna()
+    bytes_mean = round(float(bytes_s.mean()), 1) if len(bytes_s) > 0 else 0.0
+    description = (
+        f"Flow {src} → {dst}:{port}/{proto} shows periodic beaconing with a dominant "
+        f"period of {period_str} (score={score:.4f}). "
+        f"Spectral ratio: {score_data['spectral_ratio']:.4f}, "
+        f"peak prominence: {score_data['prominence']:.2f}, "
+        f"jitter CV: {score_data['jitter_cv']:.4f}. "
+        f"Mean payload: {bytes_mean:.0f} bytes."
+    )
+    next_steps = [
+        f"Identify the process on {src} making connections every {period_str}",
+        f"Pivot to dns.log — search for lookups resolving to {dst}",
+        f"Check {dst} on VirusTotal, Shodan, and ASN lookup",
+        f"Review full history: zeek-cut id.orig_h id.resp_h id.resp_p ts | grep '{dst}'",
+        "Use --export-allowlist to stage this flow for allowlisting if known-good",
+    ]
+    evidence = {
+        **score_data,
+        "period_str": period_str,
+        "src_ip": src,
+        "dst_ip": dst,
+        "dst_port": port,
+        "proto": proto,
+        "bytes_mean": bytes_mean,
+    }
+    return Finding(
+        detector=DETECTOR_NAME,
+        severity=severity,
+        title=title,
+        description=description,
+        evidence=evidence,
+        next_steps=next_steps,
+        ts_generated=datetime.now(timezone.utc),
+        data_window=data_window,
+    )