loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
loghunter/cli_init.py
ADDED
|
@@ -0,0 +1,567 @@
|
|
|
1
|
+
"""First-run setup wizard.
|
|
2
|
+
|
|
3
|
+
CLI-INTERNAL split off ``loghunter/cli.py`` — first-run UX REMAINS CLI-layer
|
|
4
|
+
ownership. This module owns the wizard; ``cli.py`` keeps dispatch and arg
|
|
5
|
+
validation. Nothing here imports detectors, runner, or outputs.
|
|
6
|
+
|
|
7
|
+
The wizard mostly works by hitting Enter: it LOOKS before it asks (detect +
|
|
8
|
+
profile what's on disk) and NEVER clobbers config a user already set. Path
|
|
9
|
+
profiling is glob + stat ONLY — never reads a log line.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import re
|
|
16
|
+
import tomllib
|
|
17
|
+
from datetime import datetime, timedelta
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
# ── detection ─────────────────────────────────────────────────────────────────
|
|
21
|
+
#
|
|
22
|
+
# Detection looks at conventional public paths; nothing here ever reads a log
|
|
23
|
+
# LINE — stat + glob only. Constants are module-level so flow tests can
|
|
24
|
+
# monkeypatch them off the developer's real filesystem.
|
|
25
|
+
|
|
26
|
+
_ZEEK_CANDIDATES: tuple[str, ...] = (
|
|
27
|
+
"/var/log/zeek",
|
|
28
|
+
"/opt/zeek/logs",
|
|
29
|
+
"/usr/local/zeek/logs",
|
|
30
|
+
"/nsm/zeek/logs",
|
|
31
|
+
)
|
|
32
|
+
# Each entry: (probe path, candidate_dir to register if probe matches). The
|
|
33
|
+
# probe may be a literal file or an absolute glob ("/dir/*.log").
|
|
34
|
+
_PIHOLE_CANDIDATES: tuple[tuple[str, str], ...] = (
|
|
35
|
+
("/var/log/pihole/pihole.log", "/var/log/pihole"),
|
|
36
|
+
("/var/log/pihole.log", "/var/log"),
|
|
37
|
+
("/var/log/pihole/*.log", "/var/log/pihole"),
|
|
38
|
+
)
|
|
39
|
+
_SYSLOG_CANDIDATE: str = "/var/log"
|
|
40
|
+
|
|
41
|
+
# Zeek family globs drive the {logs} fill and the size sum.
|
|
42
|
+
_ZEEK_GLOBS: tuple[str, ...] = (
|
|
43
|
+
"conn*.log*", "dns*.log*", "ssl*.log*",
|
|
44
|
+
"http*.log*", "weird*.log*", "notice*.log*",
|
|
45
|
+
)
|
|
46
|
+
# Pi-hole stays narrow even when the candidate dir is /var/log — we profile
|
|
47
|
+
# only the Pi-hole file so unrelated syslog files don't inflate the size.
|
|
48
|
+
_PIHOLE_GLOB: str = "pihole.log*"
|
|
49
|
+
# Syslog mirrors the detector's OPTIONAL_LOGS glob so the profile honestly
|
|
50
|
+
# previews what will be analyzed.
|
|
51
|
+
_SYSLOG_GLOB: str = "*.log*"
|
|
52
|
+
|
|
53
|
+
_PROFILE_FILE_CAP: int = 5000
|
|
54
|
+
_DOCS_URL: str = "https://github.com/spiralbend/loghunter"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _detect_zeek() -> str | None:
|
|
58
|
+
"""Probe conventional Zeek log dirs; first hit with conn*.log* wins, else
|
|
59
|
+
the first dir that has any *.log*. Returns the dir path or None."""
|
|
60
|
+
fallback: str | None = None
|
|
61
|
+
for cand in _ZEEK_CANDIDATES:
|
|
62
|
+
p = Path(cand)
|
|
63
|
+
try:
|
|
64
|
+
if not p.is_dir():
|
|
65
|
+
continue
|
|
66
|
+
if any(p.glob("conn*.log*")):
|
|
67
|
+
return cand
|
|
68
|
+
if fallback is None and any(p.glob("*.log*")):
|
|
69
|
+
fallback = cand
|
|
70
|
+
except OSError:
|
|
71
|
+
continue
|
|
72
|
+
return fallback
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _detect_pihole() -> str | None:
|
|
76
|
+
"""Walk pi-hole probes; return the candidate dir of the first hit."""
|
|
77
|
+
for probe, candidate_dir in _PIHOLE_CANDIDATES:
|
|
78
|
+
try:
|
|
79
|
+
if "*" in probe:
|
|
80
|
+
# Path.glob walks the receiver — for an absolute glob we must
|
|
81
|
+
# split (parent, pattern) and glob from the parent directory.
|
|
82
|
+
parent = Path(probe).parent
|
|
83
|
+
pattern = Path(probe).name
|
|
84
|
+
if parent.is_dir() and any(parent.glob(pattern)):
|
|
85
|
+
return candidate_dir
|
|
86
|
+
else:
|
|
87
|
+
if Path(probe).is_file():
|
|
88
|
+
return candidate_dir
|
|
89
|
+
except OSError:
|
|
90
|
+
continue
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _detect_syslog() -> str | None:
|
|
95
|
+
"""Return the syslog candidate dir if it exists, else None."""
|
|
96
|
+
try:
|
|
97
|
+
return _SYSLOG_CANDIDATE if Path(_SYSLOG_CANDIDATE).is_dir() else None
|
|
98
|
+
except OSError:
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _human_bytes(n: int) -> str:
|
|
103
|
+
"""Format a byte count as `~6 GB` / `~340 MB` / `~12 KB`. The `~` reflects
|
|
104
|
+
that the count is glob-scoped, not whole-dir."""
|
|
105
|
+
if n < 1024:
|
|
106
|
+
return f"~{n} B"
|
|
107
|
+
if n < 1024 ** 2:
|
|
108
|
+
return f"~{n // 1024} KB"
|
|
109
|
+
if n < 1024 ** 3:
|
|
110
|
+
return f"~{n // (1024 ** 2)} MB"
|
|
111
|
+
if n < 1024 ** 4:
|
|
112
|
+
return f"~{n // (1024 ** 3)} GB"
|
|
113
|
+
return f"~{n // (1024 ** 4)} TB"
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _fresh_bucket(delta: timedelta) -> str:
|
|
117
|
+
"""Map an age delta to Dave's relative-time phrasing."""
|
|
118
|
+
seconds = delta.total_seconds()
|
|
119
|
+
if seconds < 3600:
|
|
120
|
+
return "updated just now"
|
|
121
|
+
if seconds < 86_400:
|
|
122
|
+
return "fresh today"
|
|
123
|
+
if seconds < 7 * 86_400:
|
|
124
|
+
return "active this week"
|
|
125
|
+
days = int(seconds // 86_400)
|
|
126
|
+
if seconds < 30 * 86_400:
|
|
127
|
+
return f"last activity ~{days} days ago"
|
|
128
|
+
if seconds < 60 * 86_400:
|
|
129
|
+
weeks = days // 7
|
|
130
|
+
return f"but it looks stale — nothing new in ~{weeks} weeks"
|
|
131
|
+
months = days // 30
|
|
132
|
+
return f"but it looks stale — nothing new in ~{months} months"
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _profile_dir(
|
|
136
|
+
path: str,
|
|
137
|
+
globs: tuple[str, ...],
|
|
138
|
+
*,
|
|
139
|
+
logs_label: str | None,
|
|
140
|
+
now: datetime | None = None,
|
|
141
|
+
) -> dict | None:
|
|
142
|
+
"""Stat + glob the candidate dir; return a profile dict or None (no-data).
|
|
143
|
+
|
|
144
|
+
Permission-tolerant: a single file's stat raising OSError is silently
|
|
145
|
+
skipped. The dir not existing or no files matching returns None — the
|
|
146
|
+
caller's "reduced dialogue form" branch."""
|
|
147
|
+
p = Path(path).expanduser()
|
|
148
|
+
try:
|
|
149
|
+
if not p.is_dir():
|
|
150
|
+
return None
|
|
151
|
+
except OSError:
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
matched: list[Path] = []
|
|
155
|
+
families_present: list[str] = [] # zeek family order, first-seen
|
|
156
|
+
bounded = False
|
|
157
|
+
try:
|
|
158
|
+
for glob in globs:
|
|
159
|
+
family = glob.split("*", 1)[0].rstrip(".") # "conn*.log*" → "conn"
|
|
160
|
+
family_hit = False
|
|
161
|
+
for f in p.glob(glob):
|
|
162
|
+
matched.append(f)
|
|
163
|
+
family_hit = True
|
|
164
|
+
if len(matched) >= _PROFILE_FILE_CAP:
|
|
165
|
+
bounded = True
|
|
166
|
+
break
|
|
167
|
+
if family_hit and family and family not in families_present:
|
|
168
|
+
families_present.append(family)
|
|
169
|
+
if bounded:
|
|
170
|
+
break
|
|
171
|
+
except OSError:
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
total = 0
|
|
175
|
+
max_mtime: float | None = None
|
|
176
|
+
for f in matched:
|
|
177
|
+
try:
|
|
178
|
+
st = f.stat()
|
|
179
|
+
except OSError:
|
|
180
|
+
continue
|
|
181
|
+
total += st.st_size
|
|
182
|
+
if max_mtime is None or st.st_mtime > max_mtime:
|
|
183
|
+
max_mtime = st.st_mtime
|
|
184
|
+
|
|
185
|
+
if not matched or total == 0 or max_mtime is None:
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
now = now or datetime.now()
|
|
189
|
+
delta = now - datetime.fromtimestamp(max_mtime)
|
|
190
|
+
|
|
191
|
+
if logs_label is not None:
|
|
192
|
+
logs = logs_label
|
|
193
|
+
elif families_present:
|
|
194
|
+
if len(families_present) <= 2:
|
|
195
|
+
logs = " + ".join(families_present)
|
|
196
|
+
else:
|
|
197
|
+
logs = ", ".join(families_present)
|
|
198
|
+
else:
|
|
199
|
+
logs = ""
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
"size_bytes": total,
|
|
203
|
+
"size_str": _human_bytes(total),
|
|
204
|
+
"fresh_str": _fresh_bucket(delta),
|
|
205
|
+
"logs": logs,
|
|
206
|
+
"bounded": bounded,
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# ── TOML serialization ────────────────────────────────────────────────────────
|
|
211
|
+
|
|
212
|
+
_TOML_FORBIDDEN_RE = re.compile(r'[\x00-\x1f\x7f]')
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _toml_str(value: str) -> str:
|
|
216
|
+
"""Serialize a path value as a TOML string. Literal form when possible
|
|
217
|
+
(single-quoted, no escapes); basic form when the value contains a single
|
|
218
|
+
quote. Control characters are rejected — silently writing invalid TOML
|
|
219
|
+
is worse than asking the user to retype the path."""
|
|
220
|
+
if _TOML_FORBIDDEN_RE.search(value):
|
|
221
|
+
raise ValueError(
|
|
222
|
+
"loghunter init: path contains a control character that cannot "
|
|
223
|
+
f"be written to TOML: {value!r}"
|
|
224
|
+
)
|
|
225
|
+
if "'" not in value:
|
|
226
|
+
return f"'{value}'"
|
|
227
|
+
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
|
|
228
|
+
return f'"{escaped}"'
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
# ── Section-bound keyed upsert ────────────────────────────────────────────────
|
|
232
|
+
#
|
|
233
|
+
# The four managed keys (root, zeek_dir, pihole_dir, syslog_dir) are rewritten
|
|
234
|
+
# ONLY inside the [loghunter] table span. A token appearing in any other
|
|
235
|
+
# stanza, a comment outside the span, or even a [loghunter.subtable] is never
|
|
236
|
+
# matched — that IS the non-clobber guarantee.
|
|
237
|
+
|
|
238
|
+
_LOGHUNTER_HEADER_RE = re.compile(r'^\[loghunter\]\s*(?:#.*)?$', re.MULTILINE)
|
|
239
|
+
_MANAGED_KEYS: tuple[str, ...] = ("root", "zeek_dir", "pihole_dir", "syslog_dir")
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _loghunter_span(text: str) -> tuple[int, int, int] | None:
|
|
243
|
+
"""Locate the [loghunter] table span: (header_start, body_start, body_end).
|
|
244
|
+
|
|
245
|
+
body runs from the line AFTER the header to the line BEFORE the next
|
|
246
|
+
`^[` section header, or EOF. Returns None when the header is absent."""
|
|
247
|
+
m = _LOGHUNTER_HEADER_RE.search(text)
|
|
248
|
+
if m is None:
|
|
249
|
+
return None
|
|
250
|
+
header_start = m.start()
|
|
251
|
+
# body starts after the header line's trailing newline
|
|
252
|
+
nl = text.find("\n", m.end())
|
|
253
|
+
body_start = nl + 1 if nl != -1 else len(text)
|
|
254
|
+
# body ends at the next ^[ section header found after body_start
|
|
255
|
+
rest_offset = body_start
|
|
256
|
+
next_header_re = re.compile(r'^\[', re.MULTILINE)
|
|
257
|
+
nh = next_header_re.search(text, rest_offset)
|
|
258
|
+
body_end = nh.start() if nh else len(text)
|
|
259
|
+
return (header_start, body_start, body_end)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _upsert_loghunter_key(
|
|
263
|
+
text: str, key: str, value: str | None, *, fresh: bool,
|
|
264
|
+
) -> str:
|
|
265
|
+
"""Keyed transform inside the [loghunter] table span.
|
|
266
|
+
|
|
267
|
+
value is None — SKIPPED. fresh=True comments any active line for
|
|
268
|
+
the key; fresh=False is a strict no-op (never
|
|
269
|
+
touch a user-set value).
|
|
270
|
+
value is a string — PROVIDED. Upsert active line inside span. value=""
|
|
271
|
+
is honored (the explicit-empty-root case).
|
|
272
|
+
"""
|
|
273
|
+
span = _loghunter_span(text)
|
|
274
|
+
if span is None:
|
|
275
|
+
# Defensive — the shipped example always ships a header. Prepend one
|
|
276
|
+
# so the upsert has a place to land.
|
|
277
|
+
text = "[loghunter]\n" + text
|
|
278
|
+
span = _loghunter_span(text)
|
|
279
|
+
assert span is not None
|
|
280
|
+
header_start, body_start, body_end = span
|
|
281
|
+
body = text[body_start:body_end]
|
|
282
|
+
|
|
283
|
+
active_re = re.compile(rf'^{re.escape(key)}\s*=.*$', re.MULTILINE)
|
|
284
|
+
commented_re = re.compile(rf'^#\s*{re.escape(key)}\s*=.*$', re.MULTILINE)
|
|
285
|
+
|
|
286
|
+
if value is not None:
|
|
287
|
+
new_line = f"{key} = {_toml_str(value)}"
|
|
288
|
+
# Active match wins over a commented sample — otherwise a base shaped
|
|
289
|
+
# like `# zeek_dir = "/default"\nzeek_dir = "/custom"` produces
|
|
290
|
+
# duplicate active keys (the commented line gets uncommented while
|
|
291
|
+
# the existing active line remains, invalidating the TOML).
|
|
292
|
+
m = active_re.search(body) or commented_re.search(body)
|
|
293
|
+
if m is not None:
|
|
294
|
+
new_body = body[:m.start()] + new_line + body[m.end():]
|
|
295
|
+
else:
|
|
296
|
+
# Insert directly after the header line. body_start is already
|
|
297
|
+
# past the header newline, so prepending here = post-header.
|
|
298
|
+
new_body = new_line + "\n" + body
|
|
299
|
+
return text[:body_start] + new_body + text[body_end:]
|
|
300
|
+
|
|
301
|
+
# SKIPPED branch.
|
|
302
|
+
if not fresh:
|
|
303
|
+
return text # never touch a user-set value
|
|
304
|
+
m = re.search(rf'^(?P<key>{re.escape(key)}\s*=.*)$', body, re.MULTILINE)
|
|
305
|
+
if m is None:
|
|
306
|
+
return text # active line not present; nothing to comment
|
|
307
|
+
line_start = m.start()
|
|
308
|
+
new_body = body[:line_start] + "# " + body[line_start:]
|
|
309
|
+
return text[:body_start] + new_body + text[body_end:]
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
# ── Wizard dialogue ───────────────────────────────────────────────────────────
|
|
313
|
+
|
|
314
|
+
def _print_intro(existing_basis: bool) -> None:
|
|
315
|
+
print("OK, let's find your logs.")
|
|
316
|
+
if existing_basis:
|
|
317
|
+
print("Found ~/.loghunter/, using that as basis (non-destructive)")
|
|
318
|
+
print()
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def _print_zeek_found(path: str, profile: dict | None) -> None:
|
|
322
|
+
if profile is not None:
|
|
323
|
+
print(f"Found Zeek at {path}.")
|
|
324
|
+
print(f"{profile['logs']}, {profile['size_str']}, {profile['fresh_str']}. Use this?")
|
|
325
|
+
else:
|
|
326
|
+
# No-data reduced form: single-line headline per the Rev 2 prompt.
|
|
327
|
+
print(f"Found Zeek at {path}. Use this?")
|
|
328
|
+
print("[Enter = yes · type a path · s = skip]")
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _print_zeek_not_found() -> None:
|
|
332
|
+
print("Didn't find Zeek. You might like it: https://zeek.org")
|
|
333
|
+
print("If it's just hiding, tell me where.")
|
|
334
|
+
print("[Enter = skip · type a path]")
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _print_pihole_found(path: str, profile: dict | None) -> None:
|
|
338
|
+
if profile is not None:
|
|
339
|
+
print(f"Found Pi-hole at {path}.")
|
|
340
|
+
print(f"{profile['size_str']} of query logs, {profile['fresh_str']}. Use this?")
|
|
341
|
+
else:
|
|
342
|
+
# No-data reduced form: single-line headline per the Rev 2 prompt.
|
|
343
|
+
print(f"Found Pi-hole at {path}. Use this?")
|
|
344
|
+
print("[Enter = yes · type a path · s = skip]")
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def _print_pihole_not_found() -> None:
|
|
348
|
+
print("Pi-hole seems to be absent. Worth a look: https://pi-hole.net")
|
|
349
|
+
print("Point me at the logs if they're elsewhere.")
|
|
350
|
+
print("[Enter = skip · type a path]")
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _print_syslog(path: str, profile: dict | None) -> None:
|
|
354
|
+
if profile is not None:
|
|
355
|
+
print(f"syslog is where you'd expect… {path} — {profile['size_str']}, {profile['fresh_str']}.")
|
|
356
|
+
else:
|
|
357
|
+
print(f"syslog is where you'd expect… {path}.")
|
|
358
|
+
print("Use this? [Enter = yes · type a path · s = skip]")
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _print_gate() -> None:
|
|
362
|
+
print("You should provide at least one: Zeek, Pi-hole, or syslog.")
|
|
363
|
+
print("Or you can point loghunter at individual files. Up to you.")
|
|
364
|
+
print("[r = redo · Enter = skip]")
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _print_root_prompt(default_root: str) -> None:
|
|
368
|
+
print("Last thing: where should LogHunter keep what it produces — exports and reports?")
|
|
369
|
+
print(f"[Enter = {default_root}]")
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _print_confirm(active_sources: list[tuple[str, str]], root: str) -> None:
|
|
373
|
+
if active_sources:
|
|
374
|
+
sources_line = ", ".join(f"{label} ({path})" for label, path in active_sources)
|
|
375
|
+
else:
|
|
376
|
+
sources_line = "(none — pass files on the command line)"
|
|
377
|
+
print("Done — settings written to ~/.loghunter/config.toml.")
|
|
378
|
+
print(f" reading: {sources_line}")
|
|
379
|
+
print(f" data: {root}")
|
|
380
|
+
print()
|
|
381
|
+
print(f"LogHunter documentation lives here: {_DOCS_URL}")
|
|
382
|
+
print("Or just run `loghunter` for a quick-start TL;DR.")
|
|
383
|
+
print()
|
|
384
|
+
print("Good hunting!")
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
# ── Flow helpers ──────────────────────────────────────────────────────────────
|
|
388
|
+
|
|
389
|
+
def _ask_source(found_path: str | None, found_printer, not_found_printer) -> str | None:
|
|
390
|
+
"""Drive one source prompt. Returns the chosen path or None (skipped)."""
|
|
391
|
+
if found_path is not None:
|
|
392
|
+
found_printer()
|
|
393
|
+
answer = input("> ").strip()
|
|
394
|
+
if answer == "":
|
|
395
|
+
return found_path
|
|
396
|
+
if answer.lower() in ("s", "skip"):
|
|
397
|
+
return None
|
|
398
|
+
return os.path.expanduser(answer)
|
|
399
|
+
# NOT FOUND path
|
|
400
|
+
not_found_printer()
|
|
401
|
+
answer = input("> ").strip()
|
|
402
|
+
if answer == "" or answer.lower() in ("s", "skip"):
|
|
403
|
+
return None
|
|
404
|
+
return os.path.expanduser(answer)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _read_existing_config_for_root(
|
|
408
|
+
target: Path,
|
|
409
|
+
) -> tuple[bytes | None, str | None, dict | None]:
|
|
410
|
+
"""Read an existing config file. Returns (raw_bytes, decoded_text,
|
|
411
|
+
parsed-loghunter-dict). Bytes are preserved verbatim for `.bak`
|
|
412
|
+
(read_text translates CRLF→LF under universal newlines, which would
|
|
413
|
+
break the non-clobber guarantee for Windows-line-ending files)."""
|
|
414
|
+
if not target.exists():
|
|
415
|
+
return (None, None, None)
|
|
416
|
+
try:
|
|
417
|
+
raw = target.read_bytes()
|
|
418
|
+
except OSError as exc:
|
|
419
|
+
raise ValueError(
|
|
420
|
+
f"loghunter init: cannot read existing config at {target}: {exc}"
|
|
421
|
+
) from exc
|
|
422
|
+
try:
|
|
423
|
+
text = raw.decode("utf-8")
|
|
424
|
+
except UnicodeDecodeError as exc:
|
|
425
|
+
raise ValueError(
|
|
426
|
+
f"loghunter init: existing config at {target} is not UTF-8: {exc}"
|
|
427
|
+
) from exc
|
|
428
|
+
try:
|
|
429
|
+
parsed = tomllib.loads(text)
|
|
430
|
+
except tomllib.TOMLDecodeError as exc:
|
|
431
|
+
raise ValueError(
|
|
432
|
+
f"loghunter init: existing config at {target} is not valid TOML: {exc}"
|
|
433
|
+
) from exc
|
|
434
|
+
return (raw, text, parsed.get("loghunter", {}))
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def _load_example_text() -> str:
|
|
438
|
+
"""Return the shipped config_example.toml contents."""
|
|
439
|
+
try:
|
|
440
|
+
import importlib.resources
|
|
441
|
+
pkg_data = importlib.resources.files("loghunter") / "data"
|
|
442
|
+
return (pkg_data / "config_example.toml").read_text(encoding="utf-8")
|
|
443
|
+
except Exception:
|
|
444
|
+
example_path = Path(__file__).parent / "data" / "config_example.toml"
|
|
445
|
+
return example_path.read_text(encoding="utf-8")
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def run_init() -> None:
|
|
449
|
+
"""Detection-driven, non-clobbering wizard for the first-run config.
|
|
450
|
+
|
|
451
|
+
Public entry point — ``cli.py`` validates argv via ``_parse_args(args,
|
|
452
|
+
"init")`` (allowed set is help-only; standalone ``--help``/``-h`` is
|
|
453
|
+
short-circuited before this function is invoked) and then delegates here.
|
|
454
|
+
"""
|
|
455
|
+
target = Path("~/.loghunter/config.toml").expanduser()
|
|
456
|
+
existing_bytes, existing_text, existing_lh = _read_existing_config_for_root(target)
|
|
457
|
+
existing_basis = existing_bytes is not None
|
|
458
|
+
base_text = existing_text if existing_basis else _load_example_text()
|
|
459
|
+
fresh = not existing_basis
|
|
460
|
+
|
|
461
|
+
_print_intro(existing_basis)
|
|
462
|
+
|
|
463
|
+
# Run the three source prompts in order; on gate-redo we re-loop.
|
|
464
|
+
while True:
|
|
465
|
+
zeek_path = _detect_zeek()
|
|
466
|
+
zeek_profile = (
|
|
467
|
+
_profile_dir(zeek_path, _ZEEK_GLOBS, logs_label=None) if zeek_path else None
|
|
468
|
+
)
|
|
469
|
+
zeek_answer = _ask_source(
|
|
470
|
+
zeek_path,
|
|
471
|
+
lambda: _print_zeek_found(zeek_path, zeek_profile),
|
|
472
|
+
_print_zeek_not_found,
|
|
473
|
+
)
|
|
474
|
+
print()
|
|
475
|
+
|
|
476
|
+
pihole_path = _detect_pihole()
|
|
477
|
+
pihole_profile = (
|
|
478
|
+
_profile_dir(pihole_path, (_PIHOLE_GLOB,), logs_label="query logs")
|
|
479
|
+
if pihole_path else None
|
|
480
|
+
)
|
|
481
|
+
pihole_answer = _ask_source(
|
|
482
|
+
pihole_path,
|
|
483
|
+
lambda: _print_pihole_found(pihole_path, pihole_profile),
|
|
484
|
+
_print_pihole_not_found,
|
|
485
|
+
)
|
|
486
|
+
print()
|
|
487
|
+
|
|
488
|
+
syslog_path = _detect_syslog()
|
|
489
|
+
if syslog_path is not None:
|
|
490
|
+
syslog_profile = _profile_dir(syslog_path, (_SYSLOG_GLOB,), logs_label=None)
|
|
491
|
+
_print_syslog(syslog_path, syslog_profile)
|
|
492
|
+
syslog_input = input("> ").strip()
|
|
493
|
+
if syslog_input == "":
|
|
494
|
+
syslog_answer: str | None = syslog_path
|
|
495
|
+
elif syslog_input.lower() in ("s", "skip"):
|
|
496
|
+
syslog_answer = None
|
|
497
|
+
else:
|
|
498
|
+
syslog_answer = os.path.expanduser(syslog_input)
|
|
499
|
+
else:
|
|
500
|
+
# Treat absent /var/log as a no-found case (extremely rare in
|
|
501
|
+
# practice). Reuse the Zeek not-found shape for shape consistency.
|
|
502
|
+
print("syslog isn't where I'd expect — point me at the logs if they're elsewhere.")
|
|
503
|
+
print("[Enter = skip · type a path]")
|
|
504
|
+
answer = input("> ").strip()
|
|
505
|
+
syslog_answer = os.path.expanduser(answer) if answer else None
|
|
506
|
+
print()
|
|
507
|
+
|
|
508
|
+
if zeek_answer is None and pihole_answer is None and syslog_answer is None:
|
|
509
|
+
_print_gate()
|
|
510
|
+
gate = input("> ").strip().lower()
|
|
511
|
+
if gate == "r":
|
|
512
|
+
continue # re-loop the three source prompts
|
|
513
|
+
break
|
|
514
|
+
|
|
515
|
+
# Root default: existing config's explicit root wins (including ""); else
|
|
516
|
+
# the new live default ~/.loghunter.
|
|
517
|
+
default_root = "~/.loghunter"
|
|
518
|
+
if existing_lh is not None and "root" in existing_lh:
|
|
519
|
+
default_root = existing_lh["root"]
|
|
520
|
+
_print_root_prompt(default_root)
|
|
521
|
+
root_input = input("> ").strip()
|
|
522
|
+
root_value = default_root if root_input == "" else os.path.expanduser(root_input)
|
|
523
|
+
print()
|
|
524
|
+
|
|
525
|
+
# Write `.bak` BEFORE any transformation when re-initing an existing file.
|
|
526
|
+
# Backup the RAW bytes verbatim — text-mode round-trip would translate
|
|
527
|
+
# CRLF→LF on the way in and leave the .bak non-identical to the original.
|
|
528
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
529
|
+
if existing_basis:
|
|
530
|
+
bak_path = target.with_suffix(".toml.bak")
|
|
531
|
+
try:
|
|
532
|
+
bak_path.write_bytes(existing_bytes)
|
|
533
|
+
except OSError as exc:
|
|
534
|
+
raise ValueError(
|
|
535
|
+
f"loghunter init: cannot write backup at {bak_path}: {exc}"
|
|
536
|
+
) from exc
|
|
537
|
+
|
|
538
|
+
text = base_text
|
|
539
|
+
for key, val in (
|
|
540
|
+
("root", root_value),
|
|
541
|
+
("zeek_dir", zeek_answer),
|
|
542
|
+
("pihole_dir", pihole_answer),
|
|
543
|
+
("syslog_dir", syslog_answer),
|
|
544
|
+
):
|
|
545
|
+
text = _upsert_loghunter_key(text, key, val, fresh=fresh)
|
|
546
|
+
|
|
547
|
+
# Bytes-out symmetric with bytes-in: write_text would platform-translate
|
|
548
|
+
# the newline boundary on Windows. write_bytes preserves the byte stream.
|
|
549
|
+
target.write_bytes(text.encode("utf-8"))
|
|
550
|
+
|
|
551
|
+
active: list[tuple[str, str]] = []
|
|
552
|
+
if zeek_answer is not None:
|
|
553
|
+
active.append(("Zeek", zeek_answer))
|
|
554
|
+
if pihole_answer is not None:
|
|
555
|
+
active.append(("Pi-hole", pihole_answer))
|
|
556
|
+
if syslog_answer is not None:
|
|
557
|
+
active.append(("syslog", syslog_answer))
|
|
558
|
+
_print_confirm(active, root_value)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
# Compat shim — pre-extraction tests called `cli._run_init([])`. The new entry
|
|
562
|
+
# is ``run_init()`` (cli.py validates argv via ``_parse_args(args, "init")``
|
|
563
|
+
# before delegating). Tests that drive the wizard end-to-end keep working.
|
|
564
|
+
def _run_init(args: list[str] | None = None) -> None:
|
|
565
|
+
"""Wrapper that preserves the pre-extraction call shape for tests."""
|
|
566
|
+
del args
|
|
567
|
+
run_init()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Common utilities shared across detectors and output handlers."""
|