loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
loghunter/cli.py
ADDED
|
@@ -0,0 +1,1108 @@
|
|
|
1
|
+
"""CLI dispatcher — argument parsing, subcommand routing, and first-run experience.
|
|
2
|
+
|
|
3
|
+
Entry point: loghunter.cli:main (registered in pyproject.toml).
|
|
4
|
+
|
|
5
|
+
Dispatch table:
|
|
6
|
+
loghunter [options] PATH run all enabled detectors via runner
|
|
7
|
+
loghunter beacon|dns|syslog|... run a single detector
|
|
8
|
+
loghunter digest [PATH ...] orient-before-the-hunt card (sniff-driven)
|
|
9
|
+
loghunter export pull logs from external systems
|
|
10
|
+
loghunter init first-run setup wizard
|
|
11
|
+
|
|
12
|
+
Parsing is a small declarative spec (``_FLAG_LIST`` + ``_VERBS``) plus a
|
|
13
|
+
per-token loop (``_parse_args``). The spec governs allowed-flag membership,
|
|
14
|
+
validation, and generated per-command help. ``blob_path`` is NOT a flag —
|
|
15
|
+
it is an INTERNAL routing key synthesized post-sniff and MUST NOT enter
|
|
16
|
+
the spec.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
import sys
|
|
23
|
+
from dataclasses import dataclass
|
|
24
|
+
from datetime import datetime, timedelta, timezone
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
from loghunter.common import config as cfg
|
|
29
|
+
from loghunter.common.errors import DigestEmpty, ExportAborted
|
|
30
|
+
from loghunter.common.output import get_handler
|
|
31
|
+
from loghunter.common.paths import be_like_water, effective_root, resolve_path
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ── flag/verb spec ────────────────────────────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class FlagSpec:
|
|
39
|
+
"""One advertised CLI flag.
|
|
40
|
+
|
|
41
|
+
``key`` is the underscore canonical key used by ``parsed[...]``, config,
|
|
42
|
+
and runner kwargs — never the hyphenated display spelling.
|
|
43
|
+
"""
|
|
44
|
+
key: str
|
|
45
|
+
long: str
|
|
46
|
+
short: str | None
|
|
47
|
+
takes_value: bool
|
|
48
|
+
metavar: str
|
|
49
|
+
help: str
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# Ordered list — also the display order for generated per-command help.
|
|
53
|
+
_FLAG_LIST: tuple[FlagSpec, ...] = (
|
|
54
|
+
FlagSpec("help", "--help", "h", False, "",
|
|
55
|
+
"show this help and exit"),
|
|
56
|
+
FlagSpec("verbose", "--verbose", "v", False, "",
|
|
57
|
+
"verbose output (extended evidence and next-steps; -vv for full raw debug detail)"),
|
|
58
|
+
FlagSpec("yes", "--yes", "y", False, "",
|
|
59
|
+
"assume yes to advisory prompts (large-dataset, egress)"),
|
|
60
|
+
FlagSpec("all", "--all", "a", False, "",
|
|
61
|
+
"load all available data; overrides default window"),
|
|
62
|
+
FlagSpec("out", "--out", "o", True, "PATH",
|
|
63
|
+
"single per-run output target (file or dir; trailing / = dir)"),
|
|
64
|
+
FlagSpec("config", "--config", "c", True, "FILE",
|
|
65
|
+
"path to a config file (overrides search-path lookup)"),
|
|
66
|
+
FlagSpec("since", "--since", "s", True, "DURATION|DATE",
|
|
67
|
+
"window start (7d, 24h, or ISO date)"),
|
|
68
|
+
FlagSpec("detect", "--detect", "d", True, "LIST",
|
|
69
|
+
"detector selection (all, comma list, or 'all,!x,!y')"),
|
|
70
|
+
FlagSpec("dry_run", "--dry-run", None, False, "",
|
|
71
|
+
"show the plan without running detectors / writing output"),
|
|
72
|
+
FlagSpec("export_allowlist", "--export-allowlist", None, False, "",
|
|
73
|
+
"emit allowlist-ready lines instead of a report (stubbed)"),
|
|
74
|
+
FlagSpec("output", "--output", None, True, "FORMAT",
|
|
75
|
+
"output format (text, json, csv, html)"),
|
|
76
|
+
FlagSpec("until", "--until", None, True, "DATE",
|
|
77
|
+
"window end (ISO date)"),
|
|
78
|
+
FlagSpec("days", "--days", None, True, "N-M",
|
|
79
|
+
"days-ago range (e.g. 1-7); order-insensitive"),
|
|
80
|
+
FlagSpec("hours", "--hours", None, True, "N-M",
|
|
81
|
+
"hours-ago range (e.g. 0-2); order-insensitive"),
|
|
82
|
+
FlagSpec("zeek_dir", "--zeek-dir", None, True, "PATH",
|
|
83
|
+
"Zeek log directory (overrides config)"),
|
|
84
|
+
FlagSpec("pihole_dir", "--pihole-dir", None, True, "PATH",
|
|
85
|
+
"Pi-hole / dnsmasq log directory (overrides config)"),
|
|
86
|
+
FlagSpec("syslog_dir", "--syslog-dir", None, True, "PATH",
|
|
87
|
+
"rsyslog log directory (overrides config)"),
|
|
88
|
+
FlagSpec("cloudtrail_dir", "--cloudtrail-dir", None, True, "PATH",
|
|
89
|
+
"CloudTrail JSON directory (overrides config)"),
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
_FLAGS_BY_KEY: dict[str, FlagSpec] = {f.key: f for f in _FLAG_LIST}
|
|
93
|
+
_FLAGS_BY_LONG: dict[str, FlagSpec] = {f.long: f for f in _FLAG_LIST}
|
|
94
|
+
_FLAGS_BY_SHORT: dict[str, FlagSpec] = {f.short: f for f in _FLAG_LIST if f.short}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass(frozen=True)
|
|
98
|
+
class VerbSpec:
|
|
99
|
+
"""One verb in the dispatcher.
|
|
100
|
+
|
|
101
|
+
``name == ""`` represents the analyze/no-verb path. ``allowed`` is the set
|
|
102
|
+
of canonical flag keys (not long spellings) — short flags are aliases for
|
|
103
|
+
their canonical key, so a short flag is allowed iff its canonical key is
|
|
104
|
+
in ``allowed``.
|
|
105
|
+
"""
|
|
106
|
+
name: str
|
|
107
|
+
summary: str
|
|
108
|
+
positional_shape: str
|
|
109
|
+
allowed: frozenset[str]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
_ANALYZE_ALLOWED: frozenset[str] = frozenset({
|
|
113
|
+
"help", "verbose", "yes", "all", "out", "config", "since", "detect",
|
|
114
|
+
"dry_run", "export_allowlist", "output", "until", "days", "hours",
|
|
115
|
+
"zeek_dir", "pihole_dir", "syslog_dir", "cloudtrail_dir",
|
|
116
|
+
})
|
|
117
|
+
_SINGLE_DET_ALLOWED: frozenset[str] = _ANALYZE_ALLOWED - {"detect"}
|
|
118
|
+
_DIGEST_ALLOWED: frozenset[str] = frozenset({
|
|
119
|
+
"help", "verbose", "yes", "all", "out", "config", "since",
|
|
120
|
+
"dry_run", "output", "until", "days", "hours", "zeek_dir",
|
|
121
|
+
})
|
|
122
|
+
_EXPORT_ALLOWED: frozenset[str] = frozenset({
|
|
123
|
+
"help", "verbose", "yes", "out", "config", "since", "until", "days", "hours",
|
|
124
|
+
})
|
|
125
|
+
_INIT_ALLOWED: frozenset[str] = frozenset({"help"})
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
_VERBS: dict[str, VerbSpec] = {
|
|
129
|
+
"": VerbSpec("", "run all enabled detectors",
|
|
130
|
+
"[PATH]", _ANALYZE_ALLOWED),
|
|
131
|
+
"beacon": VerbSpec("beacon", "beacon detection (conn.log)",
|
|
132
|
+
"[PATH]", _SINGLE_DET_ALLOWED),
|
|
133
|
+
"dns": VerbSpec("dns", "DNS clustering (Zeek or Pi-hole)",
|
|
134
|
+
"[PATH]", _SINGLE_DET_ALLOWED),
|
|
135
|
+
"syslog": VerbSpec("syslog", "syslog anomaly detection",
|
|
136
|
+
"[PATH]", _SINGLE_DET_ALLOWED),
|
|
137
|
+
"scan": VerbSpec("scan", "port scan detection (conn.log)",
|
|
138
|
+
"[PATH]", _SINGLE_DET_ALLOWED),
|
|
139
|
+
"duration": VerbSpec("duration", "long connection detection (conn.log)",
|
|
140
|
+
"[PATH]", _SINGLE_DET_ALLOWED),
|
|
141
|
+
"aws": VerbSpec("aws", "CloudTrail behavioral surfacing (per-principal)",
|
|
142
|
+
"[PATH]", _SINGLE_DET_ALLOWED),
|
|
143
|
+
"digest": VerbSpec("digest", "orient-before-the-hunt card (schema sniffed)",
|
|
144
|
+
"[PATH ...]", _DIGEST_ALLOWED),
|
|
145
|
+
"export": VerbSpec("export", "pull logs from external systems to local files",
|
|
146
|
+
"[BACKEND] [QUERY ...]", _EXPORT_ALLOWED),
|
|
147
|
+
"init": VerbSpec("init", "first-run setup wizard",
|
|
148
|
+
"", _INIT_ALLOWED),
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
_SINGLE_DETECTOR_COMMANDS: frozenset[str] = frozenset({
|
|
153
|
+
"beacon", "dns", "syslog", "scan", "duration", "aws",
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
# User-initiated stop (Ctrl-C during compute). Named so the future error-voice
|
|
157
|
+
# pass can find the message and exit code together. 130 is the Unix 128 + SIGINT
|
|
158
|
+
# convention. Ctrl-C AT THE CONFIRM PROMPTS (runner.py) is a separate path that
|
|
159
|
+
# routes through ExportAborted → exit 0; this is the mid-run sibling.
|
|
160
|
+
_STOPPED_MESSAGE = "Stopped."
|
|
161
|
+
_SIGINT_EXIT_CODE = 130
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def main(argv: list[str] | None = None) -> None:
|
|
165
|
+
"""Parse arguments and dispatch to the appropriate subcommand or runner."""
|
|
166
|
+
try:
|
|
167
|
+
rc = _main(argv) or 0
|
|
168
|
+
except KeyboardInterrupt:
|
|
169
|
+
# Most terminals echo Ctrl-C as "^C" with no trailing newline before
|
|
170
|
+
# Python sees the signal. Without a leading blank line on TTY stderr,
|
|
171
|
+
# our message lands as "^CStopped." on one row. Non-TTY stderr stays
|
|
172
|
+
# byte-exact at "Stopped.\n" so log capture / scripts are unaffected.
|
|
173
|
+
if sys.stderr.isatty():
|
|
174
|
+
print(file=sys.stderr)
|
|
175
|
+
print(_STOPPED_MESSAGE, file=sys.stderr)
|
|
176
|
+
sys.exit(_SIGINT_EXIT_CODE)
|
|
177
|
+
except ExportAborted as exc:
|
|
178
|
+
print(str(exc))
|
|
179
|
+
sys.exit(0)
|
|
180
|
+
except cfg.ConfigError as exc:
|
|
181
|
+
print(f"loghunter: {exc}", file=sys.stderr)
|
|
182
|
+
sys.exit(1)
|
|
183
|
+
except ValueError as exc:
|
|
184
|
+
print(f"loghunter: {exc}", file=sys.stderr)
|
|
185
|
+
print("Run 'loghunter --help' for usage.", file=sys.stderr)
|
|
186
|
+
sys.exit(1)
|
|
187
|
+
except OSError as exc:
|
|
188
|
+
print(f"loghunter: {exc}", file=sys.stderr)
|
|
189
|
+
sys.exit(1)
|
|
190
|
+
if rc:
|
|
191
|
+
sys.exit(rc)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _main(argv: list[str] | None = None) -> int:
|
|
195
|
+
"""Internal CLI dispatcher. Exceptions are formatted by main().
|
|
196
|
+
|
|
197
|
+
Returns an int exit code; only the digest fan-out currently uses non-zero
|
|
198
|
+
(its three-way exit policy). Every other branch returns 0.
|
|
199
|
+
"""
|
|
200
|
+
args = argv if argv is not None else sys.argv[1:]
|
|
201
|
+
|
|
202
|
+
if not args or args == ["--help"] or args == ["-h"]:
|
|
203
|
+
_print_global_usage()
|
|
204
|
+
return 0
|
|
205
|
+
|
|
206
|
+
cand = args[0]
|
|
207
|
+
|
|
208
|
+
if cand in _SINGLE_DETECTOR_COMMANDS:
|
|
209
|
+
verb = cand
|
|
210
|
+
rest = args[1:]
|
|
211
|
+
elif cand == "digest":
|
|
212
|
+
verb = "digest"
|
|
213
|
+
rest = args[1:]
|
|
214
|
+
elif cand == "init":
|
|
215
|
+
verb = "init"
|
|
216
|
+
rest = args[1:]
|
|
217
|
+
elif cand == "export":
|
|
218
|
+
verb = "export"
|
|
219
|
+
rest = args[1:]
|
|
220
|
+
elif cand.startswith("-") or _looks_like_path(cand):
|
|
221
|
+
verb = ""
|
|
222
|
+
rest = args
|
|
223
|
+
else:
|
|
224
|
+
print(f"loghunter: unknown command '{cand}'", file=sys.stderr)
|
|
225
|
+
print("Run 'loghunter --help' for usage.", file=sys.stderr)
|
|
226
|
+
sys.exit(1)
|
|
227
|
+
|
|
228
|
+
# Side-effect-light help short-circuit — STANDALONE --help / -h ONLY.
|
|
229
|
+
# `--help=anything` and `-h=anything` are NOT help; they fall through to
|
|
230
|
+
# the strict parser and produce "takes no value". This fires BEFORE
|
|
231
|
+
# config load, output resolution, sniff dispatch, or wizard entry.
|
|
232
|
+
if any(tok == "--help" or tok == "-h" for tok in rest):
|
|
233
|
+
print(_render_verb_help(verb), end="")
|
|
234
|
+
return 0
|
|
235
|
+
|
|
236
|
+
if verb in _SINGLE_DETECTOR_COMMANDS:
|
|
237
|
+
_run_single_detector(verb, rest)
|
|
238
|
+
elif verb == "digest":
|
|
239
|
+
return _run_digest(rest) or 0
|
|
240
|
+
elif verb == "init":
|
|
241
|
+
_run_init(rest)
|
|
242
|
+
elif verb == "export":
|
|
243
|
+
_run_export(rest)
|
|
244
|
+
else:
|
|
245
|
+
_run_all_detectors(rest)
|
|
246
|
+
return 0
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _looks_like_path(s: str) -> bool:
|
|
250
|
+
"""Return True if the string looks like a filesystem path rather than a subcommand.
|
|
251
|
+
|
|
252
|
+
Verbs are matched first in ``_main`` so verb names always win; this only
|
|
253
|
+
decides whether a non-verb token routes to the analyze path or fails as
|
|
254
|
+
an unknown command. The ``os.path.exists`` clause catches bare filenames
|
|
255
|
+
in CWD (``loghunter conn.log``) that the prefix tests would miss.
|
|
256
|
+
"""
|
|
257
|
+
if s.startswith("/") or s.startswith("~") or s.startswith("."):
|
|
258
|
+
return True
|
|
259
|
+
if "/" in s:
|
|
260
|
+
return True
|
|
261
|
+
try:
|
|
262
|
+
if os.path.exists(s):
|
|
263
|
+
return True
|
|
264
|
+
except (OSError, ValueError):
|
|
265
|
+
pass
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# ── usage / help generation ───────────────────────────────────────────────────
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _global_usage_text() -> str:
|
|
273
|
+
"""Compose the bare-loghunter / --help screen from the spec."""
|
|
274
|
+
lines = [
|
|
275
|
+
"loghunter — network threat hunting for self-hosters",
|
|
276
|
+
"",
|
|
277
|
+
"Usage:",
|
|
278
|
+
" loghunter [options] PATH run all enabled detectors",
|
|
279
|
+
" loghunter beacon [options] PATH beacon detection (conn.log)",
|
|
280
|
+
" loghunter dns [options] PATH DNS clustering (Zeek or Pi-hole)",
|
|
281
|
+
" loghunter syslog [options] PATH syslog anomaly detection",
|
|
282
|
+
" loghunter scan [options] PATH port scan detection (conn.log)",
|
|
283
|
+
" loghunter duration [options] PATH long connection detection (conn.log)",
|
|
284
|
+
" loghunter aws [options] PATH CloudTrail behavioral surfacing (per-principal)",
|
|
285
|
+
"",
|
|
286
|
+
" loghunter digest [options] PATH orient-before-the-hunt card; schema is",
|
|
287
|
+
" inferred from the file (conn, dns, syslog,",
|
|
288
|
+
" cloudtrail, or blob for unrecognized text)",
|
|
289
|
+
"",
|
|
290
|
+
" loghunter export pull logs from external systems",
|
|
291
|
+
" loghunter init first-run setup wizard",
|
|
292
|
+
"",
|
|
293
|
+
"Common options (short forms shown for the frequently-typed flags):",
|
|
294
|
+
" --help, -h --verbose, -v --yes, -y --all, -a",
|
|
295
|
+
" --out, -o=PATH --config, -c=FILE --since, -s=… --detect, -d=LIST",
|
|
296
|
+
"",
|
|
297
|
+
"Less common: --dry-run --export-allowlist --output=FORMAT --until=DATE",
|
|
298
|
+
" --days=N-M --hours=N-M",
|
|
299
|
+
" --zeek-dir=PATH --syslog-dir=PATH --pihole-dir=PATH --cloudtrail-dir=PATH",
|
|
300
|
+
"",
|
|
301
|
+
"Run 'loghunter <command> --help' for command-specific options.",
|
|
302
|
+
"",
|
|
303
|
+
]
|
|
304
|
+
return "\n".join(lines)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _print_global_usage() -> None:
|
|
308
|
+
"""Print the first-run usage message, appending a hint when no config is found."""
|
|
309
|
+
print(_global_usage_text(), end="")
|
|
310
|
+
if cfg._find_config_file() is None:
|
|
311
|
+
print(" No config found. Run 'loghunter init' to get started.")
|
|
312
|
+
print(" Config will be written to ~/.loghunter/config.toml")
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# Compatibility alias — internal helper kept under its historical name for
|
|
316
|
+
# tests/observers that import it directly.
|
|
317
|
+
_print_usage = _print_global_usage
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _render_verb_help(verb: str) -> str:
|
|
321
|
+
"""Render per-command help from the spec — drives `<verb> --help` / `-h`."""
|
|
322
|
+
vs = _VERBS[verb]
|
|
323
|
+
cmd = "loghunter" + (f" {verb}" if verb else "")
|
|
324
|
+
shape = f" {vs.positional_shape}" if vs.positional_shape else ""
|
|
325
|
+
lines = [f"Usage: {cmd} [options]{shape}".rstrip(), "", vs.summary, "", "Options:"]
|
|
326
|
+
# Preserve display order from _FLAG_LIST so output is stable.
|
|
327
|
+
for spec in _FLAG_LIST:
|
|
328
|
+
if spec.key not in vs.allowed:
|
|
329
|
+
continue
|
|
330
|
+
if spec.short:
|
|
331
|
+
head = f" {spec.long}, -{spec.short}"
|
|
332
|
+
else:
|
|
333
|
+
head = f" {spec.long}"
|
|
334
|
+
if spec.takes_value:
|
|
335
|
+
head += f"={spec.metavar}"
|
|
336
|
+
lines.append(f"{head:<32} {spec.help}".rstrip())
|
|
337
|
+
return "\n".join(lines) + "\n"
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# ── parser ────────────────────────────────────────────────────────────────────
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _parse_args(args: list[str], verb: str) -> dict[str, Any]:
|
|
344
|
+
"""Parse CLI tokens for ``verb`` into a kwargs dict.
|
|
345
|
+
|
|
346
|
+
Validation order is BINDING: identity → verb-membership → value-shape. A
|
|
347
|
+
globally-known but verb-disallowed flag yields the wrong-verb error
|
|
348
|
+
regardless of value shape (``digest -d`` and ``digest --detect`` both
|
|
349
|
+
report "not valid for digest", NOT "needs a value").
|
|
350
|
+
|
|
351
|
+
Duplicate flags are last-wins (preserving the original dict-overwrite
|
|
352
|
+
behavior). Single-valued — never promoted to a list.
|
|
353
|
+
|
|
354
|
+
Positionals: ``parsed["path"]`` = first; ``parsed["paths"]`` = full list.
|
|
355
|
+
"""
|
|
356
|
+
if verb not in _VERBS:
|
|
357
|
+
raise ValueError(f"unknown verb {verb!r}")
|
|
358
|
+
allowed = _VERBS[verb].allowed
|
|
359
|
+
verb_label = verb if verb else "analyze"
|
|
360
|
+
|
|
361
|
+
def _wrong_verb_long(spec: FlagSpec) -> str:
|
|
362
|
+
alias = f" (-{spec.short})" if spec.short else ""
|
|
363
|
+
return f"{spec.long}{alias} is not valid for {verb_label}"
|
|
364
|
+
|
|
365
|
+
def _wrong_verb_short(spec: FlagSpec) -> str:
|
|
366
|
+
# Short was typed; lead with the short, alias is the long.
|
|
367
|
+
return f"-{spec.short} ({spec.long}) is not valid for {verb_label}"
|
|
368
|
+
|
|
369
|
+
def _needs_value(spec: FlagSpec) -> str:
|
|
370
|
+
alias = f" (-{spec.short})" if spec.short else ""
|
|
371
|
+
short_hint = f"-{spec.short}=… or " if spec.short else ""
|
|
372
|
+
return f"{spec.long}{alias} needs a value: {short_hint}{spec.long}=…"
|
|
373
|
+
|
|
374
|
+
def _no_value(spec: FlagSpec) -> str:
|
|
375
|
+
alias = f" (-{spec.short})" if spec.short else ""
|
|
376
|
+
return f"{spec.long}{alias} takes no value"
|
|
377
|
+
|
|
378
|
+
result: dict[str, Any] = {}
|
|
379
|
+
positionals: list[str] = []
|
|
380
|
+
|
|
381
|
+
for arg in args:
|
|
382
|
+
# `-vv` is a SINGLE explicitly-registered literal token — the one-off
|
|
383
|
+
# spelling for verbose-level 2. Recognized BEFORE the normal short-flag
|
|
384
|
+
# branch so the bundling-refusal lattice ("can't be combined") never
|
|
385
|
+
# catches it. Anything longer (-vvv, -vy) falls through to the bundling
|
|
386
|
+
# path and gets the existing pass-separately message. Wrong-verb parity:
|
|
387
|
+
# `loghunter init -vv` raises the same wrong-verb error as
|
|
388
|
+
# `loghunter init -v`, never "unknown flag" / "needs a value".
|
|
389
|
+
if arg == "-vv":
|
|
390
|
+
v_spec = _FLAGS_BY_SHORT.get("v")
|
|
391
|
+
if v_spec is None or v_spec.key not in allowed:
|
|
392
|
+
raise ValueError(_wrong_verb_short(v_spec))
|
|
393
|
+
result["verbose_level"] = 2
|
|
394
|
+
continue
|
|
395
|
+
if arg.startswith("--"):
|
|
396
|
+
body, eq, val = arg[2:].partition("=")
|
|
397
|
+
long_form = f"--{body}"
|
|
398
|
+
spec = _FLAGS_BY_LONG.get(long_form)
|
|
399
|
+
if spec is None:
|
|
400
|
+
raise ValueError(f"unknown flag --{body}")
|
|
401
|
+
if spec.key not in allowed:
|
|
402
|
+
raise ValueError(_wrong_verb_long(spec))
|
|
403
|
+
if eq:
|
|
404
|
+
if not spec.takes_value:
|
|
405
|
+
raise ValueError(_no_value(spec))
|
|
406
|
+
result[spec.key] = val
|
|
407
|
+
else:
|
|
408
|
+
if spec.takes_value:
|
|
409
|
+
raise ValueError(_needs_value(spec))
|
|
410
|
+
result[spec.key] = True
|
|
411
|
+
elif arg.startswith("-") and arg != "-":
|
|
412
|
+
stripped = arg[1:]
|
|
413
|
+
body, eq, val = stripped.partition("=")
|
|
414
|
+
if len(body) == 1:
|
|
415
|
+
short = body
|
|
416
|
+
spec = _FLAGS_BY_SHORT.get(short)
|
|
417
|
+
if spec is None:
|
|
418
|
+
raise ValueError(f"unknown flag -{short}")
|
|
419
|
+
if spec.key not in allowed:
|
|
420
|
+
raise ValueError(_wrong_verb_short(spec))
|
|
421
|
+
if eq:
|
|
422
|
+
if not spec.takes_value:
|
|
423
|
+
raise ValueError(_no_value(spec))
|
|
424
|
+
result[spec.key] = val
|
|
425
|
+
else:
|
|
426
|
+
if spec.takes_value:
|
|
427
|
+
raise ValueError(_needs_value(spec))
|
|
428
|
+
result[spec.key] = True
|
|
429
|
+
elif len(body) > 1:
|
|
430
|
+
# Bundling attempt — deliberately declined. Surface kindly when
|
|
431
|
+
# every char is a known short; otherwise plain unknown-flag.
|
|
432
|
+
if all(ch in _FLAGS_BY_SHORT for ch in body):
|
|
433
|
+
separated = " ".join(f"-{ch}" for ch in body)
|
|
434
|
+
raise ValueError(
|
|
435
|
+
f"short flags can't be combined (-{body}); "
|
|
436
|
+
f"pass separately: {separated}"
|
|
437
|
+
)
|
|
438
|
+
raise ValueError(f"unknown flag -{body}")
|
|
439
|
+
else:
|
|
440
|
+
raise ValueError(f"unknown flag {arg}")
|
|
441
|
+
else:
|
|
442
|
+
positionals.append(arg)
|
|
443
|
+
|
|
444
|
+
if positionals:
|
|
445
|
+
result["path"] = positionals[0]
|
|
446
|
+
result["paths"] = positionals
|
|
447
|
+
|
|
448
|
+
return result
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
# ── shared resolution helpers ─────────────────────────────────────────────────
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def _assert_all_vs_timeframe(parsed: dict[str, Any]) -> None:
|
|
455
|
+
"""``--all`` is mutually exclusive with explicit timeframe flags."""
|
|
456
|
+
if parsed.get("all") and any(k in parsed for k in ("since", "until", "days", "hours")):
|
|
457
|
+
raise ValueError(
|
|
458
|
+
"--all cannot be combined with --since, --until, --days, or --hours"
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _resolve_output_target(
|
|
463
|
+
parsed: dict[str, Any], config: dict[str, Any],
|
|
464
|
+
) -> tuple[Path | None, Path | None]:
|
|
465
|
+
"""Resolve the ``--out`` / ``[loghunter].report_dir`` ladder.
|
|
466
|
+
|
|
467
|
+
Returns ``(output_file, output_dir)`` — exactly one of which is non-None
|
|
468
|
+
when a target is set; both ``None`` means stdout.
|
|
469
|
+
"""
|
|
470
|
+
cfg_lh = config.get("loghunter", {})
|
|
471
|
+
root = effective_root(config)
|
|
472
|
+
cli_out = parsed.get("out") if "out" in parsed else None
|
|
473
|
+
if cli_out:
|
|
474
|
+
target = resolve_path(cli_out, "")
|
|
475
|
+
else:
|
|
476
|
+
target = resolve_path(cfg_lh.get("report_dir"), root)
|
|
477
|
+
|
|
478
|
+
if target is None:
|
|
479
|
+
return None, None
|
|
480
|
+
resolved = be_like_water(target)
|
|
481
|
+
if resolved.is_file:
|
|
482
|
+
return resolved.path, None
|
|
483
|
+
return None, resolved.path
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _resolve_timeframe(
|
|
487
|
+
parsed: dict[str, Any],
|
|
488
|
+
now: datetime | None = None,
|
|
489
|
+
) -> tuple[datetime | None, datetime | None]:
|
|
490
|
+
"""Convert --since/--until/--days/--hours into a (since, until) datetime pair."""
|
|
491
|
+
if now is None:
|
|
492
|
+
now = datetime.now(timezone.utc)
|
|
493
|
+
since: datetime | None = None
|
|
494
|
+
until: datetime | None = None
|
|
495
|
+
|
|
496
|
+
if "days" in parsed:
|
|
497
|
+
a, b = _parse_range(str(parsed["days"]), "--days")
|
|
498
|
+
since = (now - timedelta(days=b)).replace(hour=0, minute=0, second=0, microsecond=0)
|
|
499
|
+
until = (now - timedelta(days=a)).replace(hour=23, minute=59, second=59, microsecond=0)
|
|
500
|
+
return since, until
|
|
501
|
+
|
|
502
|
+
if "hours" in parsed:
|
|
503
|
+
a, b = _parse_range(str(parsed["hours"]), "--hours")
|
|
504
|
+
since = now - timedelta(hours=b)
|
|
505
|
+
until = now - timedelta(hours=a)
|
|
506
|
+
return since, until
|
|
507
|
+
|
|
508
|
+
if "since" in parsed:
|
|
509
|
+
s = str(parsed["since"])
|
|
510
|
+
if s.endswith("d"):
|
|
511
|
+
since = now - timedelta(days=_parse_positive_int(s[:-1], "--since"))
|
|
512
|
+
elif s.endswith("h"):
|
|
513
|
+
since = now - timedelta(hours=_parse_positive_int(s[:-1], "--since"))
|
|
514
|
+
else:
|
|
515
|
+
since = _parse_iso_date(s, "--since")
|
|
516
|
+
|
|
517
|
+
if "until" in parsed:
|
|
518
|
+
until = _parse_iso_date(str(parsed["until"]), "--until")
|
|
519
|
+
|
|
520
|
+
return since, until
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def _parse_range(value: str, flag: str) -> tuple[int, int]:
|
|
524
|
+
"""Parse N-M range arguments for --days and --hours."""
|
|
525
|
+
parts = value.split("-")
|
|
526
|
+
if len(parts) != 2:
|
|
527
|
+
raise ValueError(f"{flag} expects a range like 3-5")
|
|
528
|
+
try:
|
|
529
|
+
start, end = sorted(int(part) for part in parts)
|
|
530
|
+
except ValueError as exc:
|
|
531
|
+
raise ValueError(f"{flag} expects numeric values like 3-5") from exc
|
|
532
|
+
return start, end
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def _parse_positive_int(value: str, flag: str) -> int:
|
|
536
|
+
"""Parse a positive integer embedded in a duration flag."""
|
|
537
|
+
try:
|
|
538
|
+
parsed = int(value)
|
|
539
|
+
except ValueError as exc:
|
|
540
|
+
raise ValueError(f"{flag} expects a duration like 7d or 24h") from exc
|
|
541
|
+
if parsed < 0:
|
|
542
|
+
raise ValueError(f"{flag} duration must be positive")
|
|
543
|
+
return parsed
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _parse_iso_date(value: str, flag: str) -> datetime:
|
|
547
|
+
"""Parse an ISO date/time as UTC for CLI timeframe flags."""
|
|
548
|
+
try:
|
|
549
|
+
parsed = datetime.fromisoformat(value)
|
|
550
|
+
except ValueError as exc:
|
|
551
|
+
raise ValueError(f"{flag} expects a date like 2026-05-01") from exc
|
|
552
|
+
return parsed.replace(tzinfo=timezone.utc)
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
# ── runner-kwargs builders ────────────────────────────────────────────────────
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
def _resolve_verbose_level(parsed: dict[str, Any]) -> int:
|
|
559
|
+
"""Collapse the parser's two-key verbose state into a single 0/1/2 dial.
|
|
560
|
+
|
|
561
|
+
``-vv`` is registered as the literal token ``verbose_level=2`` (see
|
|
562
|
+
``_parse_args``); ``-v`` / ``--verbose`` set ``verbose=True``. Their
|
|
563
|
+
last-wins resolution lands here:
|
|
564
|
+
none → 0; -v → 1; -vv → 2; combined → 2.
|
|
565
|
+
Only the text handler distinguishes all three levels; every other
|
|
566
|
+
consumer collapses to ``>= 1`` (export internals, csv/html description
|
|
567
|
+
gate, digest summariser-failure breadcrumb).
|
|
568
|
+
"""
|
|
569
|
+
if parsed.get("verbose_level") == 2:
|
|
570
|
+
return 2
|
|
571
|
+
return 1 if parsed.get("verbose") else 0
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
_ANALYZE_SOURCE_KEYS: tuple[str, ...] = (
|
|
575
|
+
"zeek_dir", "syslog_dir", "pihole_dir", "cloudtrail_dir",
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def _merge_family_value(
|
|
580
|
+
bucket: list[str], flag_value: str | None,
|
|
581
|
+
) -> str | list[str] | None:
|
|
582
|
+
"""Combine a positional-derived bucket with the explicit ``--<family>-dir``
|
|
583
|
+
flag value, returning the runner-kwarg shape for that family.
|
|
584
|
+
|
|
585
|
+
MERGE rule (sanctioned rail supersession in the rev-3 prompt — James
|
|
586
|
+
reconciles CODE.md after landing): positionals routed to the family
|
|
587
|
+
+ the flag value BOTH contribute, both load. Order is positionals
|
|
588
|
+
first, flag appended; dedup is the loader's job (via ``.resolve()``).
|
|
589
|
+
|
|
590
|
+
Wire-shape compression for the runner kwarg:
|
|
591
|
+
|
|
592
|
+
- empty + no flag → ``None`` (no override; config fallback within scope)
|
|
593
|
+
- exactly one truthy value → scalar (string) — keeps programmatic
|
|
594
|
+
scalar-caller shape byte-identical with the prior single-Path contract
|
|
595
|
+
- 2+ values → ``list[str]`` — the multi-input shape
|
|
596
|
+
|
|
597
|
+
All three shapes flow through ``runner.run`` → ``resolve_sources`` →
|
|
598
|
+
``_normalize_overrides``, which collapses to the same downstream
|
|
599
|
+
``list[Path]`` regardless. Raw strings only — the CLI does NOT
|
|
600
|
+
``Path(...)`` or ``resolve_path`` source values; ``_resolve_one`` is
|
|
601
|
+
the SOLE string→Path site.
|
|
602
|
+
"""
|
|
603
|
+
merged: list[str] = [b for b in bucket if b]
|
|
604
|
+
if flag_value:
|
|
605
|
+
merged.append(flag_value)
|
|
606
|
+
if not merged:
|
|
607
|
+
return None
|
|
608
|
+
if len(merged) == 1:
|
|
609
|
+
return merged[0]
|
|
610
|
+
return merged
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
def _build_positional_buckets(
|
|
614
|
+
paths: list[str], *, detector_module: Any | None,
|
|
615
|
+
) -> dict[str, list[str]]:
|
|
616
|
+
"""Sniff-classify each positional into its source-family bucket.
|
|
617
|
+
|
|
618
|
+
Returns ``{family_key: [positional, …]}`` for the families touched.
|
|
619
|
+
``detector_module=None`` triggers the router's content-sniff mode
|
|
620
|
+
(detect=all / unknown selector). Empty input → empty dict.
|
|
621
|
+
"""
|
|
622
|
+
from loghunter.common.sources import route_positional_source
|
|
623
|
+
|
|
624
|
+
buckets: dict[str, list[str]] = {}
|
|
625
|
+
for p in paths:
|
|
626
|
+
routed = route_positional_source(p, detector_module=detector_module)
|
|
627
|
+
buckets.setdefault(routed, []).append(p)
|
|
628
|
+
return buckets
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def _runner_kwargs(
|
|
632
|
+
parsed: dict[str, Any],
|
|
633
|
+
config: dict[str, Any],
|
|
634
|
+
detect: str | None = None,
|
|
635
|
+
scope: frozenset[str] | None = None,
|
|
636
|
+
source_buckets: dict[str, list[str]] | None = None,
|
|
637
|
+
) -> dict[str, Any]:
|
|
638
|
+
"""Build the kwargs dict for runner.run() from parsed CLI args and loaded config.
|
|
639
|
+
|
|
640
|
+
Source-dir overrides flow through as raw parsed strings, per-family lists,
|
|
641
|
+
or ``None``. The CLI does NOT call ``resolve_path`` or ``Path(...)`` for
|
|
642
|
+
source dirs — ``loghunter.common.sources._resolve_one`` is the SOLE site
|
|
643
|
+
where a source-dir string becomes a resolved ``Path``. The runner threads
|
|
644
|
+
the raw values into ``resolve_sources``, which normalizes scalar/list/None
|
|
645
|
+
uniformly.
|
|
646
|
+
|
|
647
|
+
``source_buckets`` carries the per-positional sniff classification
|
|
648
|
+
(``{family_key: [positional_path, …]}``). For each family the bucket is
|
|
649
|
+
MERGED with the explicit ``--<family>-dir`` flag (positionals first, flag
|
|
650
|
+
appended) — the sanctioned rail supersession from the rev-3 prompt:
|
|
651
|
+
same-family flag + positional now BOTH load instead of "flag wins."
|
|
652
|
+
|
|
653
|
+
``scope`` is the SOLE scoping signal: ``None`` = unconstrained,
|
|
654
|
+
``frozenset(touched_families)`` = scope the run so sibling source-dirs
|
|
655
|
+
stay unloaded. The caller computes ``scope`` from the bucket keys; a
|
|
656
|
+
positional ALWAYS scopes. An explicit override outside ``scope`` still
|
|
657
|
+
applies — the operator widening the run deliberately.
|
|
658
|
+
"""
|
|
659
|
+
_assert_all_vs_timeframe(parsed)
|
|
660
|
+
|
|
661
|
+
since, until = _resolve_timeframe(parsed)
|
|
662
|
+
cfg_lh = config.get("loghunter", {})
|
|
663
|
+
|
|
664
|
+
output_file, output_dir = _resolve_output_target(parsed, config)
|
|
665
|
+
|
|
666
|
+
buckets = source_buckets or {}
|
|
667
|
+
family_values: dict[str, str | list[str] | None] = {
|
|
668
|
+
key: _merge_family_value(buckets.get(key, []), parsed.get(key))
|
|
669
|
+
for key in _ANALYZE_SOURCE_KEYS
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
return dict(
|
|
673
|
+
config=config,
|
|
674
|
+
detect=detect or parsed.get("detect"),
|
|
675
|
+
# Source-dir overrides: raw strings / lists / None — resolver owns Path conversion.
|
|
676
|
+
zeek_dir=family_values["zeek_dir"],
|
|
677
|
+
syslog_dir=family_values["syslog_dir"],
|
|
678
|
+
pihole_dir=family_values["pihole_dir"],
|
|
679
|
+
cloudtrail_dir=family_values["cloudtrail_dir"],
|
|
680
|
+
scope=scope,
|
|
681
|
+
since=since,
|
|
682
|
+
until=until,
|
|
683
|
+
output_format=parsed.get("output", cfg_lh.get("output_format", "text")),
|
|
684
|
+
output_dir=output_dir,
|
|
685
|
+
output_file=output_file,
|
|
686
|
+
verbose_level=_resolve_verbose_level(parsed),
|
|
687
|
+
dry_run=bool(parsed.get("dry_run", False)),
|
|
688
|
+
export_allowlist=bool(parsed.get("export_allowlist", False)),
|
|
689
|
+
load_all=bool(parsed.get("all", False)),
|
|
690
|
+
skip_confirm=bool(parsed.get("yes", False)),
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
# ── verb runners ──────────────────────────────────────────────────────────────
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def _named_detector_module(detect: Any) -> Any:
|
|
698
|
+
"""Return the imported detector module for an exactly-one-detector selector.
|
|
699
|
+
|
|
700
|
+
Returns ``None`` for ``all``, comma lists, exclusion syntax, missing
|
|
701
|
+
selectors, and unimportable names. Imports ONLY the explicitly named
|
|
702
|
+
module via ``importlib`` — never iterates ``detectors/``. Used by the
|
|
703
|
+
two analyze entry points to feed ``route_positional_source`` with the
|
|
704
|
+
detector's REQUIRED_LOGS / OPTIONAL_LOGS metadata.
|
|
705
|
+
"""
|
|
706
|
+
if not isinstance(detect, str):
|
|
707
|
+
return None
|
|
708
|
+
name = detect.strip()
|
|
709
|
+
if not name or name.lower() == "all" or "," in name or "!" in name:
|
|
710
|
+
return None
|
|
711
|
+
try:
|
|
712
|
+
import importlib
|
|
713
|
+
return importlib.import_module(f"loghunter.detectors.{name}")
|
|
714
|
+
except ImportError:
|
|
715
|
+
return None
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
def _run_all_detectors(args: list[str]) -> None:
|
|
719
|
+
"""Parse args and invoke runner with all enabled detectors.
|
|
720
|
+
|
|
721
|
+
Each positional in ``parsed["paths"]`` is sniff-classified into its source
|
|
722
|
+
family bucket via ``route_positional_source`` (detector_module=None for
|
|
723
|
+
detect=all / unknown selector → content-sniff → ``{origin}_dir``,
|
|
724
|
+
defaulting to ``zeek_dir`` on directory / unrecognized / OSError). The
|
|
725
|
+
per-family bucket then MERGES with any explicit ``--<family>-dir`` flag
|
|
726
|
+
inside ``_runner_kwargs``. ``scope = frozenset(touched_families)`` keeps
|
|
727
|
+
sibling source-dirs suppressed.
|
|
728
|
+
"""
|
|
729
|
+
import loghunter.runner as runner
|
|
730
|
+
|
|
731
|
+
parsed = _parse_args(args, "")
|
|
732
|
+
|
|
733
|
+
if "output" in parsed:
|
|
734
|
+
get_handler(parsed["output"])
|
|
735
|
+
|
|
736
|
+
config = cfg.load(parsed.get("config"))
|
|
737
|
+
|
|
738
|
+
paths = parsed.get("paths") or []
|
|
739
|
+
if paths:
|
|
740
|
+
mod = _named_detector_module(parsed.get("detect"))
|
|
741
|
+
buckets = _build_positional_buckets(paths, detector_module=mod)
|
|
742
|
+
scope: frozenset[str] | None = frozenset(buckets) if buckets else None
|
|
743
|
+
else:
|
|
744
|
+
buckets = {}
|
|
745
|
+
scope = None
|
|
746
|
+
|
|
747
|
+
runner.run(**_runner_kwargs(
|
|
748
|
+
parsed, config, scope=scope, source_buckets=buckets,
|
|
749
|
+
))
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
def _run_single_detector(detector: str, args: list[str]) -> None:
|
|
753
|
+
"""Parse args and invoke runner constrained to a single detector.
|
|
754
|
+
|
|
755
|
+
Each positional in ``parsed["paths"]`` is sniff-classified into its source
|
|
756
|
+
family bucket using the named detector module's REQUIRED_LOGS /
|
|
757
|
+
OPTIONAL_LOGS metadata (via ``route_positional_source(detector_module=mod)``).
|
|
758
|
+
The per-family bucket then MERGES with any explicit ``--<family>-dir`` flag
|
|
759
|
+
inside ``_runner_kwargs``. ``scope = frozenset(touched_families)`` keeps
|
|
760
|
+
sibling source-dirs suppressed.
|
|
761
|
+
"""
|
|
762
|
+
import loghunter.runner as runner
|
|
763
|
+
|
|
764
|
+
parsed = _parse_args(args, detector)
|
|
765
|
+
|
|
766
|
+
if "output" in parsed:
|
|
767
|
+
get_handler(parsed["output"])
|
|
768
|
+
|
|
769
|
+
config = cfg.load(parsed.get("config"))
|
|
770
|
+
|
|
771
|
+
paths = parsed.get("paths") or []
|
|
772
|
+
if paths:
|
|
773
|
+
mod = _named_detector_module(detector)
|
|
774
|
+
buckets = _build_positional_buckets(paths, detector_module=mod)
|
|
775
|
+
scope: frozenset[str] | None = frozenset(buckets) if buckets else None
|
|
776
|
+
else:
|
|
777
|
+
buckets = {}
|
|
778
|
+
scope = None
|
|
779
|
+
|
|
780
|
+
runner.run(**_runner_kwargs(
|
|
781
|
+
parsed, config, detect=detector, scope=scope, source_buckets=buckets,
|
|
782
|
+
))
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
_SOURCE_DIR_KEYS = (
|
|
786
|
+
"zeek_dir", "pihole_dir", "syslog_dir", "cloudtrail_dir", "blob_path",
|
|
787
|
+
)
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
def _route_sniffed_path(
|
|
791
|
+
parsed: dict[str, Any],
|
|
792
|
+
path: Path,
|
|
793
|
+
result: Any,
|
|
794
|
+
) -> tuple[dict[str, Any], str]:
|
|
795
|
+
"""Build a per-path parsed-dict variant routing a sniffed PATH into the
|
|
796
|
+
right source-dir kwarg. Clears prior-iteration source-dir keys so a stale
|
|
797
|
+
value never leaks between paths in a fan-out loop."""
|
|
798
|
+
parsed_for_path = {
|
|
799
|
+
k: v for k, v in parsed.items() if k not in _SOURCE_DIR_KEYS
|
|
800
|
+
}
|
|
801
|
+
schema = result.schema
|
|
802
|
+
path_str = str(path)
|
|
803
|
+
if schema == "conn":
|
|
804
|
+
parsed_for_path["zeek_dir"] = path_str
|
|
805
|
+
elif schema == "dns":
|
|
806
|
+
if result.origin == "pihole":
|
|
807
|
+
parsed_for_path["pihole_dir"] = path_str
|
|
808
|
+
else:
|
|
809
|
+
parsed_for_path["zeek_dir"] = path_str
|
|
810
|
+
elif schema == "syslog":
|
|
811
|
+
# syslog is fidelity-aware: Zeek syslog.log → zeek_dir; flat
|
|
812
|
+
# rsyslog → syslog_dir. Mirrors the dns origin-split above.
|
|
813
|
+
if result.origin == "zeek":
|
|
814
|
+
parsed_for_path["zeek_dir"] = path_str
|
|
815
|
+
else:
|
|
816
|
+
parsed_for_path["syslog_dir"] = path_str
|
|
817
|
+
elif schema == "cloudtrail":
|
|
818
|
+
parsed_for_path["cloudtrail_dir"] = path_str
|
|
819
|
+
else: # schema == "blob"
|
|
820
|
+
# blob_path is INTERNAL — synthesized post-sniff. It is NOT a flag
|
|
821
|
+
# and must NEVER appear in _FLAGS / _VERBS / help.
|
|
822
|
+
parsed_for_path["blob_path"] = path_str
|
|
823
|
+
return parsed_for_path, schema
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
def _run_digest(args: list[str]) -> int:
|
|
827
|
+
"""Parse args and dispatch to runner.run_digest, supporting N positionals."""
|
|
828
|
+
import loghunter.runner as runner
|
|
829
|
+
from loghunter.common.loader import sniff_format_detailed
|
|
830
|
+
|
|
831
|
+
parsed = _parse_args(args, "digest")
|
|
832
|
+
|
|
833
|
+
# Output validation: registry-first (uniform error voice), then digest's
|
|
834
|
+
# text-only rail. The spec already forbids --output for digest? No — it
|
|
835
|
+
# ALLOWS --output but digest renders text cards only.
|
|
836
|
+
out_fmt = parsed.get("output", "text")
|
|
837
|
+
get_handler(out_fmt)
|
|
838
|
+
if out_fmt != "text":
|
|
839
|
+
raise ValueError(
|
|
840
|
+
f"digest currently supports only --output=text (got {out_fmt!r})"
|
|
841
|
+
)
|
|
842
|
+
|
|
843
|
+
# Positional + source-dir combination guard. The spec allows --zeek-dir
|
|
844
|
+
# for BARE digest (no positional, single conn card from a configured
|
|
845
|
+
# source dir). With a positional present, the positional self-routes via
|
|
846
|
+
# sniff and source-dir flags would be silently overridden — reject the
|
|
847
|
+
# combination up-front so the operator sees the conflict.
|
|
848
|
+
if parsed.get("paths"):
|
|
849
|
+
for flag in ("zeek_dir", "pihole_dir", "syslog_dir", "cloudtrail_dir"):
|
|
850
|
+
if flag in parsed:
|
|
851
|
+
raise ValueError(
|
|
852
|
+
f"digest: --{flag.replace('_', '-')} is not valid alongside "
|
|
853
|
+
"a positional PATH (positionals self-route via sniff)"
|
|
854
|
+
)
|
|
855
|
+
|
|
856
|
+
config = cfg.load(parsed.get("config"))
|
|
857
|
+
|
|
858
|
+
paths_raw = parsed.get("paths") or []
|
|
859
|
+
|
|
860
|
+
if not paths_raw:
|
|
861
|
+
# No positional: config-driven path. Bare digest, single conn card.
|
|
862
|
+
# Output target is resolved by _digest_runner_kwargs (no fan-out
|
|
863
|
+
# involved), so the existing single-card flow is preserved verbatim.
|
|
864
|
+
try:
|
|
865
|
+
runner.run_digest(
|
|
866
|
+
**_digest_runner_kwargs(parsed, config, schema="conn")
|
|
867
|
+
)
|
|
868
|
+
except DigestEmpty as exc:
|
|
869
|
+
# Recognized-but-empty (e.g. header-only conn.log in the
|
|
870
|
+
# configured directory). The file was understood — narrate
|
|
871
|
+
# without a card and exit 0. PLACEHOLDER voice — qmail
|
|
872
|
+
# error-voice pass.
|
|
873
|
+
print(
|
|
874
|
+
f"digest: {exc.basename}: recognized as {exc.schema} "
|
|
875
|
+
"but no parseable records — skipping.",
|
|
876
|
+
file=sys.stderr,
|
|
877
|
+
)
|
|
878
|
+
return 0
|
|
879
|
+
return 0
|
|
880
|
+
|
|
881
|
+
# Fan-out path. Resolve the shared output target ONCE — never per path.
|
|
882
|
+
is_dry_run = bool(parsed.get("dry_run", False))
|
|
883
|
+
get_stream, close_stream = _build_digest_fanout_stream(
|
|
884
|
+
parsed, config, dry_run=is_dry_run,
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
is_multirun = len(paths_raw) > 1
|
|
888
|
+
|
|
889
|
+
rendered = empty = recognized_empty = errored = 0
|
|
890
|
+
try:
|
|
891
|
+
for raw in paths_raw:
|
|
892
|
+
path = Path(os.path.expanduser(raw))
|
|
893
|
+
if not path.exists():
|
|
894
|
+
print(f"digest: path not found: {path}", file=sys.stderr)
|
|
895
|
+
errored += 1
|
|
896
|
+
continue
|
|
897
|
+
if path.is_dir():
|
|
898
|
+
# Multi-path fan-out: silently skip a directory positional.
|
|
899
|
+
# The lone-positional case keeps the v1 contract — whole-
|
|
900
|
+
# directory positionals are rejected with an actionable
|
|
901
|
+
# stderr message and exit 1.
|
|
902
|
+
if len(paths_raw) == 1:
|
|
903
|
+
print(
|
|
904
|
+
f"digest: PATH must be a file, not a directory: {path}",
|
|
905
|
+
file=sys.stderr,
|
|
906
|
+
)
|
|
907
|
+
errored += 1
|
|
908
|
+
continue
|
|
909
|
+
try:
|
|
910
|
+
result = sniff_format_detailed(path)
|
|
911
|
+
if result.state == "empty":
|
|
912
|
+
print(f"{path.name} is empty. Nothing to do!")
|
|
913
|
+
empty += 1
|
|
914
|
+
continue
|
|
915
|
+
parsed_for_path, schema = _route_sniffed_path(
|
|
916
|
+
parsed, path, result,
|
|
917
|
+
)
|
|
918
|
+
kwargs = _digest_runner_kwargs(
|
|
919
|
+
parsed_for_path, config, schema=schema,
|
|
920
|
+
resolve_output=False,
|
|
921
|
+
)
|
|
922
|
+
if schema != "blob":
|
|
923
|
+
kwargs["fallback_blob_path"] = path
|
|
924
|
+
runner.run_digest(
|
|
925
|
+
**kwargs,
|
|
926
|
+
stream=get_stream(),
|
|
927
|
+
leading_separator=(rendered > 0),
|
|
928
|
+
show_progress=not is_multirun,
|
|
929
|
+
)
|
|
930
|
+
except DigestEmpty as exc:
|
|
931
|
+
print(
|
|
932
|
+
f"digest: {exc.basename}: recognized as {exc.schema} "
|
|
933
|
+
"but no parseable records — skipping.",
|
|
934
|
+
file=sys.stderr,
|
|
935
|
+
)
|
|
936
|
+
recognized_empty += 1
|
|
937
|
+
continue
|
|
938
|
+
except (ValueError, OSError) as exc:
|
|
939
|
+
print(f"digest: {path.name}: {exc}", file=sys.stderr)
|
|
940
|
+
errored += 1
|
|
941
|
+
continue
|
|
942
|
+
rendered += 1
|
|
943
|
+
finally:
|
|
944
|
+
close_stream()
|
|
945
|
+
|
|
946
|
+
if rendered > 0:
|
|
947
|
+
return 0
|
|
948
|
+
if errored == 0:
|
|
949
|
+
return 0
|
|
950
|
+
return 1
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
def _build_digest_fanout_stream(
|
|
954
|
+
parsed: dict[str, Any],
|
|
955
|
+
config: dict[str, Any],
|
|
956
|
+
dry_run: bool = False,
|
|
957
|
+
) -> tuple[Any, Any]:
|
|
958
|
+
"""Resolve the shared digest --out target into a lazy (get, close) pair.
|
|
959
|
+
|
|
960
|
+
Returns:
|
|
961
|
+
get_stream() — sys.stdout for stdout runs; for file targets, opens on
|
|
962
|
+
first call and returns the same handle on subsequent calls.
|
|
963
|
+
close_stream() — closes the file only if get_stream() was ever called.
|
|
964
|
+
|
|
965
|
+
--dry-run skips output resolution entirely.
|
|
966
|
+
"""
|
|
967
|
+
if dry_run:
|
|
968
|
+
return (lambda: sys.stdout, lambda: None)
|
|
969
|
+
|
|
970
|
+
output_file, output_dir = _resolve_output_target(parsed, config)
|
|
971
|
+
|
|
972
|
+
if output_file is None and output_dir is None:
|
|
973
|
+
return (lambda: sys.stdout, lambda: None)
|
|
974
|
+
|
|
975
|
+
if output_file is not None:
|
|
976
|
+
dest = output_file
|
|
977
|
+
else:
|
|
978
|
+
# DIR verdict: digest_<timestamp>.txt — fixed for whole run, never
|
|
979
|
+
# derived from any input path. One code path for N=1 and N>1.
|
|
980
|
+
stamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
|
981
|
+
dest = output_dir / f"digest_{stamp}.txt"
|
|
982
|
+
|
|
983
|
+
state: dict[str, Any] = {"fh": None}
|
|
984
|
+
|
|
985
|
+
def _get_stream() -> Any:
|
|
986
|
+
if state["fh"] is None:
|
|
987
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
988
|
+
state["fh"] = dest.open("w", encoding="utf-8", newline="")
|
|
989
|
+
return state["fh"]
|
|
990
|
+
|
|
991
|
+
def _close_stream() -> None:
|
|
992
|
+
fh = state["fh"]
|
|
993
|
+
if fh is not None:
|
|
994
|
+
fh.close()
|
|
995
|
+
|
|
996
|
+
return (_get_stream, _close_stream)
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
def _digest_runner_kwargs(
|
|
1000
|
+
parsed: dict[str, Any],
|
|
1001
|
+
config: dict[str, Any],
|
|
1002
|
+
schema: str = "conn",
|
|
1003
|
+
resolve_output: bool = True,
|
|
1004
|
+
) -> dict[str, Any]:
|
|
1005
|
+
"""Build the kwargs dict for runner.run_digest from parsed CLI args + config.
|
|
1006
|
+
|
|
1007
|
+
Source-dir overrides (``zeek_dir`` / ``pihole_dir`` / ``syslog_dir`` /
|
|
1008
|
+
``cloudtrail_dir``) flow through as RAW strings (or ``None``). The CLI
|
|
1009
|
+
does NOT resolve or path-wrap them — ``loghunter.common.sources.resolve_digest_source``
|
|
1010
|
+
in ``run_digest`` owns the per-schema candidate ladder, wrong-key + XOR +
|
|
1011
|
+
not-configured errors, and the SOLE string→Path conversion site
|
|
1012
|
+
(``_resolve_one``). Window + output target stay here.
|
|
1013
|
+
|
|
1014
|
+
``blob_path`` is an INTERNAL routing key (NOT a flag) synthesized by
|
|
1015
|
+
``_route_sniffed_path``; it stays a ``Path`` and is consumed by
|
|
1016
|
+
``run_digest``'s blob branch BEFORE source resolution.
|
|
1017
|
+
|
|
1018
|
+
``resolve_output=False`` is the fan-out seam: the CLI's `_run_digest`
|
|
1019
|
+
has already resolved the shared `--out` target into a single TextIO
|
|
1020
|
+
stream that is passed alongside, so per-path kwargs MUST NOT re-resolve.
|
|
1021
|
+
"""
|
|
1022
|
+
_assert_all_vs_timeframe(parsed)
|
|
1023
|
+
|
|
1024
|
+
since, until = _resolve_timeframe(parsed)
|
|
1025
|
+
|
|
1026
|
+
output_file: Path | None = None
|
|
1027
|
+
output_dir: Path | None = None
|
|
1028
|
+
if resolve_output:
|
|
1029
|
+
output_file, output_dir = _resolve_output_target(parsed, config)
|
|
1030
|
+
|
|
1031
|
+
cli_blob = parsed.get("blob_path")
|
|
1032
|
+
|
|
1033
|
+
return dict(
|
|
1034
|
+
config=config,
|
|
1035
|
+
# Source-dir overrides: raw strings (or None) — resolver owns Path conversion.
|
|
1036
|
+
zeek_dir=parsed.get("zeek_dir"),
|
|
1037
|
+
pihole_dir=parsed.get("pihole_dir"),
|
|
1038
|
+
syslog_dir=parsed.get("syslog_dir"),
|
|
1039
|
+
cloudtrail_dir=parsed.get("cloudtrail_dir"),
|
|
1040
|
+
# blob_path is internal routing — expanduser only (no LH_ROOT, no
|
|
1041
|
+
# be_like_water). The blob branch in run_digest consumes it BEFORE
|
|
1042
|
+
# source resolution, so it never reaches resolve_digest_source.
|
|
1043
|
+
blob_path=Path(os.path.expanduser(cli_blob)) if cli_blob else None,
|
|
1044
|
+
since=since,
|
|
1045
|
+
until=until,
|
|
1046
|
+
output_format=parsed.get("output", "text"),
|
|
1047
|
+
output_dir=output_dir,
|
|
1048
|
+
output_file=output_file,
|
|
1049
|
+
verbose_level=_resolve_verbose_level(parsed),
|
|
1050
|
+
dry_run=bool(parsed.get("dry_run", False)),
|
|
1051
|
+
load_all=bool(parsed.get("all", False)),
|
|
1052
|
+
skip_confirm=bool(parsed.get("yes", False)),
|
|
1053
|
+
schema=schema,
|
|
1054
|
+
)
|
|
1055
|
+
|
|
1056
|
+
|
|
1057
|
+
def _run_init(args: list[str]) -> None:
|
|
1058
|
+
"""Validate init args via the spec, then delegate to the wizard.
|
|
1059
|
+
|
|
1060
|
+
init's allowed set is help-only. Standalone ``--help`` / ``-h`` is
|
|
1061
|
+
short-circuited in ``_main`` BEFORE this function is invoked, so anything
|
|
1062
|
+
that reaches here MUST be an empty list — any unexpected token raises
|
|
1063
|
+
via the strict parser (unknown flag or wrong-verb).
|
|
1064
|
+
"""
|
|
1065
|
+
_parse_args(args, "init")
|
|
1066
|
+
from loghunter.cli_init import run_init
|
|
1067
|
+
run_init()
|
|
1068
|
+
|
|
1069
|
+
|
|
1070
|
+
def _run_export(args: list[str]) -> None:
|
|
1071
|
+
"""Pull logs from an external system (Splunk, CloudTrail) to local files."""
|
|
1072
|
+
from loghunter.exporters import run_export
|
|
1073
|
+
|
|
1074
|
+
parsed = _parse_args(args, "export")
|
|
1075
|
+
|
|
1076
|
+
# Timeframe: pass None when no flags given — exporter applies its own default
|
|
1077
|
+
since, until = _resolve_timeframe(parsed, now=datetime.now().astimezone())
|
|
1078
|
+
|
|
1079
|
+
config = cfg.load(parsed.get("config"))
|
|
1080
|
+
|
|
1081
|
+
positionals: list[str] = parsed.get("paths") or []
|
|
1082
|
+
|
|
1083
|
+
# Disambiguate: first positional is a backend name if it matches a known backend
|
|
1084
|
+
_KNOWN_EXPORT_BACKENDS = {"splunk", "cloudtrail"}
|
|
1085
|
+
if positionals and positionals[0] in _KNOWN_EXPORT_BACKENDS:
|
|
1086
|
+
backend: str | None = positionals[0]
|
|
1087
|
+
query_names = positionals[1:]
|
|
1088
|
+
else:
|
|
1089
|
+
backend = None
|
|
1090
|
+
query_names = positionals
|
|
1091
|
+
|
|
1092
|
+
# Pass the raw CLI string (preserving any trailing slash) — be_like_water
|
|
1093
|
+
# decides file vs directory inside the export pipeline.
|
|
1094
|
+
out_str = parsed.get("out") if "out" in parsed else None
|
|
1095
|
+
|
|
1096
|
+
# Export collapses to a single bool: -vv on export == -v (no level-2
|
|
1097
|
+
# surface). The export pipeline keeps its bool internally; the CLI
|
|
1098
|
+
# collapses at the seam.
|
|
1099
|
+
run_export(
|
|
1100
|
+
config=config,
|
|
1101
|
+
backend=backend,
|
|
1102
|
+
query_names=query_names,
|
|
1103
|
+
since=since,
|
|
1104
|
+
until=until,
|
|
1105
|
+
out=out_str,
|
|
1106
|
+
verbose=(_resolve_verbose_level(parsed) >= 1),
|
|
1107
|
+
skip_confirm=bool(parsed.get("yes", False)),
|
|
1108
|
+
)
|