loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,29 @@
1
+ """Dnsblock detector — behavioral anomalies in blocked DNS query patterns. (planned)
2
+
3
+ Surfaces who is querying known-bad domains, how often, with what
4
+ persistence, and across what spread of clients. Complements the dns
5
+ detector: DNS clustering finds *unknown-bad* domains by behavioral
6
+ fingerprint; dnsblock finds *known-bad-domain access patterns* by client
7
+ behavior. Pi-hole/dnsmasq only — needs the `was_blocked` column that
8
+ Zeek does not carry.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from loghunter.common.finding import DetectorContext, Finding
14
+
15
+ DETECTOR_NAME = "dnsblock"
16
+ STATUS = "planned"
17
+
18
+ REQUIRED_LOGS = [
19
+ {"source": "pihole_dir", "pattern": "pihole*.log*"},
20
+ ]
21
+
22
+ OPTIONAL_LOGS: list[dict] = []
23
+
24
+ DEFAULT_CONFIG: dict = {}
25
+
26
+
27
+ def run(context: DetectorContext) -> list[Finding]:
28
+ """Detect behavioral anomalies in blocked DNS query patterns."""
29
+ raise NotImplementedError("dnsblock detector is planned — not yet implemented")
@@ -0,0 +1,178 @@
1
+ """Duration detector — long-lived connection detection from Zeek conn.log.
2
+
3
+ Flags connections that remain open for an unusually long time, which may indicate
4
+ tunneling, C2 keep-alive sessions, or data exfiltration channels.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime, timezone
10
+
11
+ import pandas as pd
12
+
13
+ from loghunter.common.finding import DetectorContext, Finding, MethodTag, Severity
14
+
15
+ DETECTOR_NAME = "duration"
16
+ STATUS = "available"
17
+
18
+ REQUIRED_LOGS = [
19
+ {"source": "zeek_dir", "pattern": "conn*.log*"},
20
+ ]
21
+
22
+ OPTIONAL_LOGS: list[dict] = []
23
+
24
+ DEFAULT_CONFIG = {
25
+ "min_duration_seconds": 1800,
26
+ }
27
+
28
+ DETECTOR_METHOD = MethodTag("heuristics", named=False)
29
+
30
+ _DURATION_HIGH = 14400 # 4 hours
31
+ _DURATION_MEDIUM = 7200 # 2 hours
32
+
33
+
34
+ def _duration_str(seconds: float) -> str:
35
+ """Return a compact human-readable string for a duration in seconds."""
36
+ s = int(seconds)
37
+ if s < 60:
38
+ return f"{s}s"
39
+ if s < 3600:
40
+ m, rem = divmod(s, 60)
41
+ return f"{m}m {rem}s"
42
+ if s < 86400:
43
+ h, rem = divmod(s, 3600)
44
+ return f"{h}h {rem // 60}m"
45
+ d, rem = divmod(s, 86400)
46
+ return f"{d}d {rem // 3600}h"
47
+
48
+
49
+ def _to_severity(duration: float) -> Severity:
50
+ if duration >= _DURATION_HIGH:
51
+ return Severity.HIGH
52
+ if duration >= _DURATION_MEDIUM:
53
+ return Severity.MEDIUM
54
+ return Severity.LOW
55
+
56
+
57
+ def run(context: DetectorContext) -> list[Finding]:
58
+ """Flag flows exceeding the minimum duration threshold, grouped by (src, dst, port, proto)."""
59
+ cfg: dict = {**DEFAULT_CONFIG, **context.config}
60
+ min_dur = cfg["min_duration_seconds"]
61
+
62
+ df = context.logs.get("conn*.log*")
63
+ if df is None or df.empty:
64
+ return []
65
+
66
+ if "duration" not in df.columns:
67
+ return []
68
+
69
+ df = df.copy()
70
+ df["duration"] = pd.to_numeric(df["duration"], errors="coerce")
71
+
72
+ df = df[df["duration"].notna() & (df["duration"] > 0)]
73
+ if df.empty:
74
+ return []
75
+
76
+ df = df[df["duration"] >= min_dur]
77
+ if df.empty:
78
+ return []
79
+
80
+ # Normalize grouping keys. Port may be NaN; fill with sentinel so groupby
81
+ # doesn't silently drop portless rows. dropna=False is a second safety net.
82
+ for col in ("src", "dst", "proto"):
83
+ if col not in df.columns:
84
+ df[col] = ""
85
+ if "port" in df.columns:
86
+ df["port"] = pd.to_numeric(df["port"], errors="coerce")
87
+ else:
88
+ df["port"] = float("nan")
89
+ df["_port_key"] = df["port"].fillna(-1).astype(int)
90
+
91
+ findings: list[Finding] = []
92
+ for (src, dst, port_key, proto), group in df.groupby(
93
+ ["src", "dst", "_port_key", "proto"], sort=False, dropna=False):
94
+
95
+ port: int | None = None if port_key == -1 else int(port_key)
96
+
97
+ max_row = group.loc[group["duration"].idxmax()]
98
+ max_dur = round(float(max_row["duration"]), 1)
99
+ max_dur_str = _duration_str(max_dur)
100
+
101
+ # total_bytes: None if column absent or all null
102
+ if "bytes" in group.columns:
103
+ bytes_series = group["bytes"].dropna()
104
+ total_bytes: int | None = int(bytes_series.sum()) if not bytes_series.empty else None
105
+ else:
106
+ total_bytes = None
107
+
108
+ # avg_bytes_per_second: derived from the max-duration row, not group total
109
+ avg_bps: float | None
110
+ if "bytes" in group.columns:
111
+ row_bytes = max_row["bytes"]
112
+ avg_bps = (
113
+ round(float(row_bytes) / max_dur, 1)
114
+ if pd.notna(row_bytes) and max_dur > 0
115
+ else None
116
+ )
117
+ else:
118
+ avg_bps = None
119
+
120
+ # conn_states: distinct non-null values, sorted; empty list if column absent
121
+ if "conn_state" in group.columns:
122
+ states: list[str] = sorted(group["conn_state"].dropna().unique().tolist())
123
+ else:
124
+ states = []
125
+
126
+ # first_seen / last_seen: UTC ISO strings from unix epoch seconds
127
+ if "ts" in group.columns:
128
+ ts_series = pd.to_numeric(group["ts"], errors="coerce").dropna()
129
+ else:
130
+ ts_series = pd.Series(dtype=float)
131
+ if not ts_series.empty:
132
+ first_seen: str | None = datetime.fromtimestamp(
133
+ float(ts_series.min()), tz=timezone.utc
134
+ ).isoformat()
135
+ last_seen: str | None = datetime.fromtimestamp(
136
+ float(ts_series.max()), tz=timezone.utc
137
+ ).isoformat()
138
+ else:
139
+ first_seen = last_seen = None
140
+
141
+ port_str = str(port) if port is not None else "?"
142
+ title = f"{src} → {dst}:{port_str}/{proto}"
143
+
144
+ severity = _to_severity(max_dur)
145
+
146
+ findings.append(Finding(
147
+ detector="duration",
148
+ severity=severity,
149
+ title=title,
150
+ description=(
151
+ "A long-lived connection may indicate tunneling, a C2 keep-alive session, "
152
+ "or an active data exfiltration channel."
153
+ ),
154
+ evidence={
155
+ "src": src,
156
+ "dst": dst,
157
+ "port": port,
158
+ "proto": proto,
159
+ "max_duration_seconds": max_dur,
160
+ "max_duration_str": max_dur_str,
161
+ "connection_count": len(group),
162
+ "total_bytes": total_bytes,
163
+ "avg_bytes_per_second": avg_bps,
164
+ "conn_states": states,
165
+ "first_seen": first_seen,
166
+ "last_seen": last_seen,
167
+ },
168
+ next_steps=[
169
+ f"Review {max_dur_str} connection in conn.log: zeek-cut id.orig_h id.resp_h id.resp_p duration conn_state < conn.log | grep {src}",
170
+ "Check if this is expected infrastructure (VPN, backup, monitoring) — if so, add to allowlist",
171
+ f"For external destinations, run: whois {dst}",
172
+ ],
173
+ ts_generated=datetime.now(tz=timezone.utc),
174
+ data_window=context.data_window,
175
+ ))
176
+
177
+ findings.sort(key=lambda f: f.evidence["max_duration_seconds"], reverse=True)
178
+ return findings
@@ -0,0 +1,26 @@
1
+ """Protocol detector — per-protocol autoencoder on connection metadata. (planned)
2
+
3
+ Trains a per-protocol autoencoder on connection feature vectors derived from
4
+ Zeek conn.log. High reconstruction error indicates anomalous session behavior
5
+ for that protocol. Requires session-level feature data.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from loghunter.common.finding import DetectorContext, Finding
11
+
12
+ DETECTOR_NAME = "protocol"
13
+ STATUS = "planned"
14
+
15
+ REQUIRED_LOGS = [
16
+ {"source": "zeek_dir", "pattern": "conn*.log*"},
17
+ ]
18
+
19
+ OPTIONAL_LOGS: list[dict] = []
20
+
21
+ DEFAULT_CONFIG: dict = {}
22
+
23
+
24
+ def run(context: DetectorContext) -> list[Finding]:
25
+ """Detect anomalous sessions using per-protocol autoencoder reconstruction error."""
26
+ raise NotImplementedError("protocol detector is planned — not yet implemented")