loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
tests/test_runner.py
ADDED
|
@@ -0,0 +1,2599 @@
|
|
|
1
|
+
"""Unit tests for runner helper functions: _derive_data_sources, _dns_nudge, and build_run_plan."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import gzip
|
|
6
|
+
import json
|
|
7
|
+
from datetime import datetime, timedelta, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from types import SimpleNamespace
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import pytest
|
|
13
|
+
|
|
14
|
+
import loghunter.runner as runner
|
|
15
|
+
from loghunter.common.display import TEXT_RULE_WIDTH
|
|
16
|
+
from loghunter.runner import (
|
|
17
|
+
_DIGEST_TS_CONFIDENCE_FLOOR,
|
|
18
|
+
_aws_no_interactive_note,
|
|
19
|
+
_aws_window_note,
|
|
20
|
+
_check_required_logs,
|
|
21
|
+
_derive_data_sources,
|
|
22
|
+
_dns_nudge,
|
|
23
|
+
_is_optional_satisfiable,
|
|
24
|
+
_print_dry_run,
|
|
25
|
+
_source_overlap_notes,
|
|
26
|
+
_ts_confidence,
|
|
27
|
+
RunPlan,
|
|
28
|
+
build_run_plan,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ── _derive_data_sources ──────────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
def test_derive_data_sources_zeek_conn_and_dns() -> None:
|
|
35
|
+
needed = {
|
|
36
|
+
"conn*.log*": "zeek_dir",
|
|
37
|
+
"dns*.log*": "zeek_dir",
|
|
38
|
+
}
|
|
39
|
+
counts = {"conn*.log*": 1000, "dns*.log*": 500}
|
|
40
|
+
assert _derive_data_sources(needed, counts) == ["zeek_conn", "zeek_dns"]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_derive_data_sources_syslog() -> None:
|
|
44
|
+
needed = {"*.log*": "syslog_dir"}
|
|
45
|
+
counts = {"*.log*": 200}
|
|
46
|
+
assert _derive_data_sources(needed, counts) == ["syslog_raw"]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_derive_data_sources_excludes_zero_count() -> None:
|
|
50
|
+
needed = {
|
|
51
|
+
"conn*.log*": "zeek_dir",
|
|
52
|
+
"dns*.log*": "zeek_dir",
|
|
53
|
+
}
|
|
54
|
+
# dns has zero records — must not appear in output
|
|
55
|
+
counts = {"conn*.log*": 50, "dns*.log*": 0}
|
|
56
|
+
assert _derive_data_sources(needed, counts) == ["zeek_conn"]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_derive_data_sources_pihole() -> None:
|
|
60
|
+
needed = {"*.log*": "pihole_dir"}
|
|
61
|
+
counts = {"*.log*": 100}
|
|
62
|
+
assert _derive_data_sources(needed, counts) == ["dnsmasq_dns"]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_derive_data_sources_cloudtrail() -> None:
|
|
66
|
+
needed = {"*.json": "cloudtrail_dir"}
|
|
67
|
+
counts = {"*.json": 75}
|
|
68
|
+
assert _derive_data_sources(needed, counts) == ["cloudtrail_raw"]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_derive_data_sources_unknown_pattern_skipped() -> None:
|
|
72
|
+
# Patterns not in needed_logs produce no label
|
|
73
|
+
needed = {"conn*.log*": "zeek_dir"}
|
|
74
|
+
counts = {"conn*.log*": 10, "mystery*.log*": 5}
|
|
75
|
+
assert _derive_data_sources(needed, counts) == ["zeek_conn"]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_derive_data_sources_empty_record_counts() -> None:
|
|
79
|
+
needed = {"conn*.log*": "zeek_dir"}
|
|
80
|
+
assert _derive_data_sources(needed, {}) == []
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# ── _dns_nudge ────────────────────────────────────────────────────────────────
|
|
84
|
+
|
|
85
|
+
def test_dns_nudge_fires_for_dnsmasq_alone() -> None:
|
|
86
|
+
result = _dns_nudge(["dnsmasq_dns"])
|
|
87
|
+
assert result is not None
|
|
88
|
+
assert "Pi-hole" in result or "dnsmasq" in result
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_dns_nudge_fires_for_dnsmasq_with_non_rich_zeek_source() -> None:
|
|
92
|
+
# zeek_conn is not a rich DNS source — nudge should still fire
|
|
93
|
+
result = _dns_nudge(["dnsmasq_dns", "zeek_conn"])
|
|
94
|
+
assert result is not None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_dns_nudge_suppressed_when_zeek_dns_present() -> None:
|
|
98
|
+
assert _dns_nudge(["zeek_dns"]) is None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_dns_nudge_suppressed_when_zeek_dns_and_dnsmasq_both_present() -> None:
|
|
102
|
+
assert _dns_nudge(["dnsmasq_dns", "zeek_dns"]) is None
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_dns_nudge_suppressed_when_no_dns_at_all() -> None:
|
|
106
|
+
assert _dns_nudge(["zeek_conn", "syslog_raw"]) is None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def test_dry_run_uses_shared_text_rule_width(capsys) -> None:
|
|
110
|
+
_print_dry_run(
|
|
111
|
+
zeek_dir=None,
|
|
112
|
+
syslog_dir=None,
|
|
113
|
+
pihole_dir=None,
|
|
114
|
+
cloudtrail_dir=None,
|
|
115
|
+
since=None,
|
|
116
|
+
until=None,
|
|
117
|
+
load_all=False,
|
|
118
|
+
will_run=[],
|
|
119
|
+
skipped={},
|
|
120
|
+
)
|
|
121
|
+
lines = capsys.readouterr().out.splitlines()
|
|
122
|
+
# The dry-run banner is bracketed by DOUBLE rules (run-summary/dry-run polish).
|
|
123
|
+
rule_lines = [line for line in lines if set(line) == {"═"}]
|
|
124
|
+
|
|
125
|
+
assert rule_lines
|
|
126
|
+
assert all(len(line) == TEXT_RULE_WIDTH for line in rule_lines)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_dry_run_lists_cloudtrail_dir(tmp_path: Path, capsys) -> None:
|
|
130
|
+
cloudtrail_dir = tmp_path / "ct"
|
|
131
|
+
cloudtrail_dir.mkdir()
|
|
132
|
+
_print_dry_run(
|
|
133
|
+
zeek_dir=None,
|
|
134
|
+
syslog_dir=None,
|
|
135
|
+
pihole_dir=None,
|
|
136
|
+
cloudtrail_dir=cloudtrail_dir,
|
|
137
|
+
since=None,
|
|
138
|
+
until=None,
|
|
139
|
+
load_all=False,
|
|
140
|
+
will_run=[],
|
|
141
|
+
skipped={},
|
|
142
|
+
)
|
|
143
|
+
out = capsys.readouterr().out
|
|
144
|
+
assert "cloudtrail_dir:" in out
|
|
145
|
+
assert str(cloudtrail_dir) in out
|
|
146
|
+
assert "found" in out
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ── DNS run-plan resolution — four source cases ───────────────────────────────
|
|
150
|
+
|
|
151
|
+
_SKIP_REASON = "dns — no DNS source found (need zeek_dir dns logs or pihole_dir logs)"
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _dns_mod() -> SimpleNamespace:
|
|
155
|
+
return SimpleNamespace(
|
|
156
|
+
REQUIRED_LOGS=[],
|
|
157
|
+
OPTIONAL_LOGS=[
|
|
158
|
+
{"source": "zeek_dir", "pattern": "dns*.log*"},
|
|
159
|
+
{"source": "pihole_dir", "pattern": "pihole*.log*"},
|
|
160
|
+
],
|
|
161
|
+
REQUIRES_ONE_OF_OPTIONAL=True,
|
|
162
|
+
REQUIRES_ONE_OF_OPTIONAL_REASON=_SKIP_REASON,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _beacon_mod() -> SimpleNamespace:
|
|
167
|
+
return SimpleNamespace(
|
|
168
|
+
REQUIRED_LOGS=[{"source": "zeek_dir", "pattern": "conn*.log*"}],
|
|
169
|
+
OPTIONAL_LOGS=[],
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def test_dns_plan_neither_source_skipped() -> None:
|
|
174
|
+
plan = build_run_plan(
|
|
175
|
+
"all", zeek_dir=None, syslog_dir=None, pihole_dir=None,
|
|
176
|
+
detectors={"dns": _dns_mod()},
|
|
177
|
+
)
|
|
178
|
+
assert plan.will_run == []
|
|
179
|
+
assert plan.skipped["dns"] == _SKIP_REASON
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def test_dns_plan_zeek_only_runs(tmp_path: Path) -> None:
|
|
183
|
+
zeek_dir = tmp_path / "zeek"
|
|
184
|
+
zeek_dir.mkdir()
|
|
185
|
+
(zeek_dir / "dns.log").write_text("", encoding="utf-8")
|
|
186
|
+
|
|
187
|
+
plan = build_run_plan(
|
|
188
|
+
"all", zeek_dir=zeek_dir, syslog_dir=None, pihole_dir=None,
|
|
189
|
+
detectors={"dns": _dns_mod()},
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
assert "dns" in plan.will_run
|
|
193
|
+
assert "dns" not in plan.skipped
|
|
194
|
+
assert plan.needed_logs == {"dns*.log*": "zeek_dir"}
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def test_dns_plan_pihole_only_runs(tmp_path: Path) -> None:
|
|
198
|
+
pihole_dir = tmp_path / "pihole"
|
|
199
|
+
pihole_dir.mkdir()
|
|
200
|
+
(pihole_dir / "pihole.log").write_text("", encoding="utf-8")
|
|
201
|
+
|
|
202
|
+
plan = build_run_plan(
|
|
203
|
+
"all", zeek_dir=None, syslog_dir=None, pihole_dir=pihole_dir,
|
|
204
|
+
detectors={"dns": _dns_mod()},
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
assert "dns" in plan.will_run
|
|
208
|
+
assert "dns" not in plan.skipped
|
|
209
|
+
assert plan.needed_logs == {"pihole*.log*": "pihole_dir"}
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def test_dns_plan_both_sources_runs(tmp_path: Path) -> None:
|
|
213
|
+
zeek_dir = tmp_path / "zeek"
|
|
214
|
+
zeek_dir.mkdir()
|
|
215
|
+
(zeek_dir / "dns.log").write_text("", encoding="utf-8")
|
|
216
|
+
pihole_dir = tmp_path / "pihole"
|
|
217
|
+
pihole_dir.mkdir()
|
|
218
|
+
(pihole_dir / "pihole.log").write_text("", encoding="utf-8")
|
|
219
|
+
|
|
220
|
+
plan = build_run_plan(
|
|
221
|
+
"all", zeek_dir=zeek_dir, syslog_dir=None, pihole_dir=pihole_dir,
|
|
222
|
+
detectors={"dns": _dns_mod()},
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
assert "dns" in plan.will_run
|
|
226
|
+
assert plan.needed_logs.get("dns*.log*") == "zeek_dir"
|
|
227
|
+
assert plan.needed_logs.get("pihole*.log*") == "pihole_dir"
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def test_dns_plan_zeek_no_dns_files_pihole_satisfies(tmp_path: Path) -> None:
|
|
231
|
+
"""When zeek_dir has no dns*.log* but pihole_dir does, only pihole pattern is loaded."""
|
|
232
|
+
zeek_dir = tmp_path / "zeek"
|
|
233
|
+
zeek_dir.mkdir()
|
|
234
|
+
(zeek_dir / "conn.log").write_text("", encoding="utf-8") # no dns*.log*
|
|
235
|
+
pihole_dir = tmp_path / "pihole"
|
|
236
|
+
pihole_dir.mkdir()
|
|
237
|
+
(pihole_dir / "pihole.log").write_text("", encoding="utf-8")
|
|
238
|
+
|
|
239
|
+
plan = build_run_plan(
|
|
240
|
+
"all", zeek_dir=zeek_dir, syslog_dir=None, pihole_dir=pihole_dir,
|
|
241
|
+
detectors={"dns": _dns_mod()},
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
assert "dns" in plan.will_run
|
|
245
|
+
assert plan.needed_logs == {"pihole*.log*": "pihole_dir"}
|
|
246
|
+
assert "dns*.log*" not in plan.needed_logs
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def test_dns_plan_beacon_regression(tmp_path: Path) -> None:
|
|
250
|
+
"""Adding pihole_dir=None does not affect a detector with normal REQUIRED_LOGS."""
|
|
251
|
+
zeek_dir = tmp_path / "zeek"
|
|
252
|
+
zeek_dir.mkdir()
|
|
253
|
+
(zeek_dir / "conn.log").write_text("", encoding="utf-8")
|
|
254
|
+
|
|
255
|
+
plan = build_run_plan(
|
|
256
|
+
"all", zeek_dir=zeek_dir, syslog_dir=None, pihole_dir=None,
|
|
257
|
+
detectors={"beacon": _beacon_mod()},
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
assert "beacon" in plan.will_run
|
|
261
|
+
assert "beacon" not in plan.skipped
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# ── data_sources via _derive_data_sources on plan.needed_logs ─────────────────
|
|
265
|
+
|
|
266
|
+
def test_data_sources_dns_zeek_only(tmp_path: Path) -> None:
|
|
267
|
+
zeek_dir = tmp_path / "zeek"
|
|
268
|
+
zeek_dir.mkdir()
|
|
269
|
+
(zeek_dir / "dns.log").write_text("", encoding="utf-8")
|
|
270
|
+
plan = build_run_plan(
|
|
271
|
+
"all", zeek_dir=zeek_dir, syslog_dir=None, pihole_dir=None,
|
|
272
|
+
detectors={"dns": _dns_mod()},
|
|
273
|
+
)
|
|
274
|
+
assert _derive_data_sources(plan.needed_logs, {"dns*.log*": 500}) == ["zeek_dns"]
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def test_data_sources_dns_pihole_only(tmp_path: Path) -> None:
|
|
278
|
+
pihole_dir = tmp_path / "pihole"
|
|
279
|
+
pihole_dir.mkdir()
|
|
280
|
+
(pihole_dir / "pihole.log").write_text("", encoding="utf-8")
|
|
281
|
+
plan = build_run_plan(
|
|
282
|
+
"all", zeek_dir=None, syslog_dir=None, pihole_dir=pihole_dir,
|
|
283
|
+
detectors={"dns": _dns_mod()},
|
|
284
|
+
)
|
|
285
|
+
assert _derive_data_sources(plan.needed_logs, {"pihole*.log*": 100}) == ["dnsmasq_dns"]
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def test_data_sources_dns_both(tmp_path: Path) -> None:
|
|
289
|
+
zeek_dir = tmp_path / "zeek"
|
|
290
|
+
zeek_dir.mkdir()
|
|
291
|
+
(zeek_dir / "dns.log").write_text("", encoding="utf-8")
|
|
292
|
+
pihole_dir = tmp_path / "pihole"
|
|
293
|
+
pihole_dir.mkdir()
|
|
294
|
+
(pihole_dir / "pihole.log").write_text("", encoding="utf-8")
|
|
295
|
+
plan = build_run_plan(
|
|
296
|
+
"all", zeek_dir=zeek_dir, syslog_dir=None, pihole_dir=pihole_dir,
|
|
297
|
+
detectors={"dns": _dns_mod()},
|
|
298
|
+
)
|
|
299
|
+
counts = {"dns*.log*": 500, "pihole*.log*": 100}
|
|
300
|
+
assert _derive_data_sources(plan.needed_logs, counts) == ["dnsmasq_dns", "zeek_dns"]
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def test_data_sources_dns_neither() -> None:
|
|
304
|
+
plan = build_run_plan(
|
|
305
|
+
"all", zeek_dir=None, syslog_dir=None, pihole_dir=None,
|
|
306
|
+
detectors={"dns": _dns_mod()},
|
|
307
|
+
)
|
|
308
|
+
assert _derive_data_sources(plan.needed_logs, {}) == []
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
# ── Stage 4: pattern-aware single-file satisfiability ─────────────────────────
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _beacon_mod():
|
|
315
|
+
import loghunter.detectors.beacon as beacon
|
|
316
|
+
return beacon
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def _dns_real_mod():
|
|
320
|
+
import loghunter.detectors.dns as dns
|
|
321
|
+
return dns
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def test_check_required_logs_zeek_file_matching_pattern_passes(tmp_path: Path) -> None:
|
|
325
|
+
f = tmp_path / "conn.log"
|
|
326
|
+
f.write_text("", encoding="utf-8")
|
|
327
|
+
assert _check_required_logs(_beacon_mod(), {"zeek_dir": f}) is None
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def test_check_required_logs_zeek_file_wrong_pattern_skips(tmp_path: Path) -> None:
|
|
331
|
+
"""beacon /path/to/dns.log → skipped (pattern conn*.log* doesn't match dns.log)."""
|
|
332
|
+
f = tmp_path / "dns.log"
|
|
333
|
+
f.write_text("", encoding="utf-8")
|
|
334
|
+
reason = _check_required_logs(_beacon_mod(), {"zeek_dir": f})
|
|
335
|
+
assert reason is not None
|
|
336
|
+
assert "conn*.log*" in reason and "not found" in reason
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def test_is_optional_satisfiable_zeek_file_matches_dns_pattern(tmp_path: Path) -> None:
|
|
340
|
+
"""loghunter dns /path/to/dns.log → DNS optional path satisfied."""
|
|
341
|
+
f = tmp_path / "dns.log"
|
|
342
|
+
f.write_text("", encoding="utf-8")
|
|
343
|
+
req = {"source": "zeek_dir", "pattern": "dns*.log*"}
|
|
344
|
+
assert _is_optional_satisfiable(req, {"zeek_dir": f}) is True
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
# ── CloudTrail source threading ───────────────────────────────────────────────
|
|
348
|
+
|
|
349
|
+
def _cloudtrail_mod() -> SimpleNamespace:
|
|
350
|
+
"""Fake aws-family detector requiring cloudtrail_dir for satisfiability tests."""
|
|
351
|
+
return SimpleNamespace(
|
|
352
|
+
DETECTOR_NAME="fakeaws",
|
|
353
|
+
STATUS="available",
|
|
354
|
+
REQUIRED_LOGS=[{"source": "cloudtrail_dir", "pattern": "*.json*"}],
|
|
355
|
+
OPTIONAL_LOGS=[],
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def test_check_required_logs_cloudtrail_native_nested_tree_passes(
|
|
360
|
+
tmp_path: Path,
|
|
361
|
+
) -> None:
|
|
362
|
+
"""Native AWSLogs/<acct>/CloudTrail/<region>/YYYY/MM/DD/ tree resolves via
|
|
363
|
+
discover_cloudtrail_files — not via raw directory.glob (which is non-recursive)."""
|
|
364
|
+
nested = (
|
|
365
|
+
tmp_path
|
|
366
|
+
/ "AWSLogs" / "123456789012" / "CloudTrail" / "us-east-1"
|
|
367
|
+
/ "2026" / "06" / "01"
|
|
368
|
+
)
|
|
369
|
+
nested.mkdir(parents=True)
|
|
370
|
+
(nested / "events.json.gz").write_bytes(b"placeholder")
|
|
371
|
+
|
|
372
|
+
reason = _check_required_logs(_cloudtrail_mod(), {"cloudtrail_dir": tmp_path})
|
|
373
|
+
assert reason is None
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def test_check_required_logs_cloudtrail_empty_dir_returns_reason(
|
|
377
|
+
tmp_path: Path,
|
|
378
|
+
) -> None:
|
|
379
|
+
empty = tmp_path / "empty-ct"
|
|
380
|
+
empty.mkdir()
|
|
381
|
+
|
|
382
|
+
reason = _check_required_logs(_cloudtrail_mod(), {"cloudtrail_dir": empty})
|
|
383
|
+
assert reason is not None
|
|
384
|
+
assert "no CloudTrail JSON logs found" in reason
|
|
385
|
+
assert str(empty) in reason
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def test_build_run_plan_threads_cloudtrail_dir_into_source_map(
|
|
389
|
+
tmp_path: Path,
|
|
390
|
+
) -> None:
|
|
391
|
+
cloudtrail_dir = tmp_path / "ct"
|
|
392
|
+
cloudtrail_dir.mkdir()
|
|
393
|
+
(cloudtrail_dir / "events.json.log").write_text("{}\n", encoding="utf-8")
|
|
394
|
+
|
|
395
|
+
plan = build_run_plan(
|
|
396
|
+
"all",
|
|
397
|
+
zeek_dir=None, syslog_dir=None, pihole_dir=None,
|
|
398
|
+
cloudtrail_dir=cloudtrail_dir,
|
|
399
|
+
detectors={"fakeaws": _cloudtrail_mod()},
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
assert "fakeaws" in plan.will_run
|
|
403
|
+
assert plan.needed_logs == {"*.json*": "cloudtrail_dir"}
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def test_runner_cloudtrail_integration_lights_data_sources(
|
|
407
|
+
tmp_path: Path, capture_summary, monkeypatch
|
|
408
|
+
) -> None:
|
|
409
|
+
"""End-to-end load contract: a detector requiring cloudtrail_dir drives the
|
|
410
|
+
load through runner.run, and the resulting RunSummary.data_sources contains
|
|
411
|
+
"cloudtrail_raw". This is what proves Thread A wires plan → load → context →
|
|
412
|
+
data_sources before the real aws detector lands in Thread B."""
|
|
413
|
+
cloudtrail_dir = tmp_path / "ct"
|
|
414
|
+
cloudtrail_dir.mkdir()
|
|
415
|
+
event = {
|
|
416
|
+
"eventTime": "2026-06-01T12:00:00Z",
|
|
417
|
+
"eventSource": "s3.amazonaws.com",
|
|
418
|
+
"eventName": "GetObject",
|
|
419
|
+
"eventID": "integration-test-event",
|
|
420
|
+
"awsRegion": "us-east-1",
|
|
421
|
+
"sourceIPAddress": "192.0.2.10",
|
|
422
|
+
"userIdentity": {
|
|
423
|
+
"type": "IAMUser",
|
|
424
|
+
"userName": "placeholder-user",
|
|
425
|
+
"arn": "arn:aws:iam::123456789012:user/placeholder-user",
|
|
426
|
+
},
|
|
427
|
+
"readOnly": True,
|
|
428
|
+
}
|
|
429
|
+
(cloudtrail_dir / "events.json.log").write_text(
|
|
430
|
+
json.dumps(event) + "\n",
|
|
431
|
+
encoding="utf-8",
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
captured_ctx: dict = {}
|
|
435
|
+
|
|
436
|
+
def _fake_run(ctx):
|
|
437
|
+
captured_ctx["ctx"] = ctx
|
|
438
|
+
return []
|
|
439
|
+
|
|
440
|
+
fakeaws = SimpleNamespace(
|
|
441
|
+
DETECTOR_NAME="fakeaws",
|
|
442
|
+
STATUS="available",
|
|
443
|
+
REQUIRED_LOGS=[{"source": "cloudtrail_dir", "pattern": "*.json*"}],
|
|
444
|
+
OPTIONAL_LOGS=[],
|
|
445
|
+
DEFAULT_CONFIG={},
|
|
446
|
+
run=_fake_run,
|
|
447
|
+
)
|
|
448
|
+
monkeypatch.setattr(runner, "discover_detectors", lambda: {"fakeaws": fakeaws})
|
|
449
|
+
|
|
450
|
+
runner.run(
|
|
451
|
+
config={"loghunter": {"detect": "fakeaws"}},
|
|
452
|
+
cloudtrail_dir=cloudtrail_dir,
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
s = capture_summary["summary"]
|
|
456
|
+
assert s.data_sources == ["cloudtrail_raw"]
|
|
457
|
+
assert s.record_counts.get("*.json*", 0) == 1
|
|
458
|
+
|
|
459
|
+
ctx = captured_ctx["ctx"]
|
|
460
|
+
df = ctx.logs["*.json*"]
|
|
461
|
+
from loghunter.common.loader import _CLOUDTRAIL_COLUMNS
|
|
462
|
+
assert list(df.columns) == _CLOUDTRAIL_COLUMNS
|
|
463
|
+
assert df.iloc[0]["event_id"] == "integration-test-event"
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
# ── Stage 4: integration tests — drive runner.run() end-to-end ────────────────
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def _write_ndjson(path: Path, records: list[dict]) -> None:
|
|
470
|
+
path.write_text(
|
|
471
|
+
"\n".join(json.dumps(r) for r in records) + "\n",
|
|
472
|
+
encoding="utf-8",
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
_TS_JAN1 = datetime(2026, 1, 1, tzinfo=timezone.utc).timestamp()
|
|
477
|
+
_TS_JAN5 = datetime(2026, 1, 5, tzinfo=timezone.utc).timestamp()
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def _conn(ts: float) -> dict:
|
|
481
|
+
return {
|
|
482
|
+
"ts": ts,
|
|
483
|
+
"id.orig_h": "192.0.2.10",
|
|
484
|
+
"id.resp_h": "198.51.100.20",
|
|
485
|
+
"id.resp_p": 443,
|
|
486
|
+
"proto": "tcp",
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _make_dated_zeek(tmp_path: Path, dates_records: dict[str, list[dict]]) -> Path:
|
|
491
|
+
zeek_dir = tmp_path / "zeek"
|
|
492
|
+
zeek_dir.mkdir()
|
|
493
|
+
for d, records in dates_records.items():
|
|
494
|
+
sub = zeek_dir / d
|
|
495
|
+
sub.mkdir()
|
|
496
|
+
_write_ndjson(sub / "conn.log", records)
|
|
497
|
+
return zeek_dir
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def _make_flat_zeek(tmp_path: Path, records: list[dict]) -> Path:
|
|
501
|
+
zeek_dir = tmp_path / "zeek"
|
|
502
|
+
zeek_dir.mkdir()
|
|
503
|
+
_write_ndjson(zeek_dir / "conn.log", records)
|
|
504
|
+
return zeek_dir
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
@pytest.fixture
|
|
508
|
+
def capture_summary(monkeypatch):
|
|
509
|
+
"""Patch _build_output_handler to capture RunSummary instead of rendering."""
|
|
510
|
+
captured: dict = {}
|
|
511
|
+
|
|
512
|
+
class _CapHandler:
|
|
513
|
+
def begin(self, rs): captured["summary"] = rs
|
|
514
|
+
def write(self, fs): captured["findings"] = fs
|
|
515
|
+
def end(self): pass
|
|
516
|
+
|
|
517
|
+
def _fake_build(output_format, output_dir, output_file, verbose_level, *, max_findings_per_detector=100):
|
|
518
|
+
return _CapHandler(), lambda: None
|
|
519
|
+
|
|
520
|
+
monkeypatch.setattr("loghunter.runner._build_output_handler", _fake_build)
|
|
521
|
+
return captured
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
_BEACON_ONLY = {"loghunter": {"detect": "beacon", "default_window": "1d"}}
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def test_runner_dated_default_filters_to_newest_date(tmp_path, capture_summary, capsys):
|
|
528
|
+
zeek_dir = _make_dated_zeek(tmp_path, {
|
|
529
|
+
"2026-01-01": [_conn(_TS_JAN1)],
|
|
530
|
+
"2026-01-05": [_conn(_TS_JAN5)],
|
|
531
|
+
})
|
|
532
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=zeek_dir)
|
|
533
|
+
s = capture_summary["summary"]
|
|
534
|
+
assert s.record_counts.get("conn*.log*", 0) == 1
|
|
535
|
+
# The prose default-window note moved to a pre-load stderr announcement.
|
|
536
|
+
assert "Default window: last 1d" in capsys.readouterr().err
|
|
537
|
+
assert not any("Default window" in n for n in s.notes)
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def test_runner_dated_default_7d_with_sparse_dirs(tmp_path, capture_summary):
|
|
541
|
+
zeek_dir = _make_dated_zeek(tmp_path, {
|
|
542
|
+
"2026-01-01": [_conn(_TS_JAN1)],
|
|
543
|
+
"2026-01-05": [_conn(_TS_JAN5)],
|
|
544
|
+
})
|
|
545
|
+
config = {"loghunter": {"detect": "beacon", "default_window": "7d"}}
|
|
546
|
+
runner.run(config=config, zeek_dir=zeek_dir)
|
|
547
|
+
s = capture_summary["summary"]
|
|
548
|
+
assert s.record_counts.get("conn*.log*", 0) == 2
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def test_runner_flat_default_filters_to_last_span(tmp_path, capture_summary, capsys):
|
|
552
|
+
zeek_dir = _make_flat_zeek(tmp_path, [_conn(_TS_JAN1), _conn(_TS_JAN5)])
|
|
553
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=zeek_dir)
|
|
554
|
+
s = capture_summary["summary"]
|
|
555
|
+
assert s.record_counts.get("conn*.log*", 0) == 1
|
|
556
|
+
assert "Default window: last 1d" in capsys.readouterr().err
|
|
557
|
+
assert not any("Default window" in n for n in s.notes)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def test_runner_bounded_single_file_loads_everything_no_note(
|
|
561
|
+
tmp_path, capture_summary, capsys
|
|
562
|
+
):
|
|
563
|
+
f = tmp_path / "conn.log"
|
|
564
|
+
_write_ndjson(f, [_conn(_TS_JAN1), _conn(_TS_JAN5)])
|
|
565
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=f)
|
|
566
|
+
s = capture_summary["summary"]
|
|
567
|
+
assert s.record_counts.get("conn*.log*", 0) == 2
|
|
568
|
+
assert not any("Default window" in n for n in s.notes)
|
|
569
|
+
assert "Default window" not in capsys.readouterr().err
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def test_runner_populates_detector_methods_for_will_run(tmp_path, capture_summary):
|
|
573
|
+
"""W1: RunSummary.detector_methods carries the MethodTag for every
|
|
574
|
+
detector in plan.will_run. Beacon's tag is FFT (named=True)."""
|
|
575
|
+
from loghunter.common.finding import MethodTag
|
|
576
|
+
f = tmp_path / "conn.log"
|
|
577
|
+
_write_ndjson(f, [_conn(_TS_JAN1)])
|
|
578
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=f)
|
|
579
|
+
s = capture_summary["summary"]
|
|
580
|
+
assert s.detectors_run == ["beacon"]
|
|
581
|
+
assert s.detector_methods.get("beacon") == MethodTag("FFT", named=True)
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def test_runner_load_all_on_single_file_silent_noop(tmp_path, capture_summary):
|
|
585
|
+
"""--all on a BOUNDED single file: loads all, emits no default-window note, no error."""
|
|
586
|
+
f = tmp_path / "conn.log"
|
|
587
|
+
_write_ndjson(f, [_conn(_TS_JAN1), _conn(_TS_JAN5)])
|
|
588
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=f, load_all=True)
|
|
589
|
+
s = capture_summary["summary"]
|
|
590
|
+
assert s.record_counts.get("conn*.log*", 0) == 2
|
|
591
|
+
assert not any("Default window" in n for n in s.notes)
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def test_runner_explicit_since_suppresses_default_window_note(
|
|
595
|
+
tmp_path, capture_summary, capsys
|
|
596
|
+
):
|
|
597
|
+
zeek_dir = _make_dated_zeek(tmp_path, {
|
|
598
|
+
"2026-01-01": [_conn(_TS_JAN1)],
|
|
599
|
+
"2026-01-05": [_conn(_TS_JAN5)],
|
|
600
|
+
})
|
|
601
|
+
runner.run(
|
|
602
|
+
config=_BEACON_ONLY,
|
|
603
|
+
zeek_dir=zeek_dir,
|
|
604
|
+
since=datetime(2026, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
|
|
605
|
+
until=datetime(2026, 1, 5, 23, 59, 59, tzinfo=timezone.utc),
|
|
606
|
+
)
|
|
607
|
+
s = capture_summary["summary"]
|
|
608
|
+
assert s.record_counts.get("conn*.log*", 0) == 2
|
|
609
|
+
assert not any("Default window" in n for n in s.notes)
|
|
610
|
+
assert "Default window" not in capsys.readouterr().err
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
def test_runner_load_all_overrides_default_window(tmp_path, capture_summary, capsys):
|
|
614
|
+
zeek_dir = _make_dated_zeek(tmp_path, {
|
|
615
|
+
"2026-01-01": [_conn(_TS_JAN1)],
|
|
616
|
+
"2026-01-05": [_conn(_TS_JAN5)],
|
|
617
|
+
})
|
|
618
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=zeek_dir, load_all=True)
|
|
619
|
+
s = capture_summary["summary"]
|
|
620
|
+
assert s.record_counts.get("conn*.log*", 0) == 2
|
|
621
|
+
assert not any("Default window" in n for n in s.notes)
|
|
622
|
+
assert "Default window" not in capsys.readouterr().err
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
def test_runner_default_window_empty_string_disables(tmp_path, capture_summary):
|
|
626
|
+
zeek_dir = _make_dated_zeek(tmp_path, {
|
|
627
|
+
"2026-01-01": [_conn(_TS_JAN1)],
|
|
628
|
+
"2026-01-05": [_conn(_TS_JAN5)],
|
|
629
|
+
})
|
|
630
|
+
config = {"loghunter": {"detect": "beacon", "default_window": ""}}
|
|
631
|
+
runner.run(config=config, zeek_dir=zeek_dir)
|
|
632
|
+
s = capture_summary["summary"]
|
|
633
|
+
assert s.record_counts.get("conn*.log*", 0) == 2
|
|
634
|
+
assert not any("Default window" in n for n in s.notes)
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
# ── Source-coverage disclosure notes ──────────────────────────────────────────
|
|
638
|
+
#
|
|
639
|
+
# Each test drives runner.run end-to-end and inspects RunSummary.notes for the
|
|
640
|
+
# user-facing disclosure note. The HUMAN label (`Pi-hole` / `syslog` /
|
|
641
|
+
# `CloudTrail` / `Zeek <log_type>`) is asserted explicitly — and the parallel
|
|
642
|
+
# `data_sources` token string (`dnsmasq_dns` / `zeek_dns` / `syslog_raw` /
|
|
643
|
+
# `cloudtrail_raw`) is asserted ABSENT from the note text, to pin against
|
|
644
|
+
# internal-token leaks (Glenn B+ required).
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
def _has_coverage_note(notes, *, starts_with, forbidden_token=None):
|
|
648
|
+
"""Return the disclosure note that starts with the given human label; fail
|
|
649
|
+
fast if any note starts with a forbidden internal-token prefix."""
|
|
650
|
+
for n in notes:
|
|
651
|
+
if forbidden_token is not None and n.startswith(forbidden_token + ":"):
|
|
652
|
+
raise AssertionError(
|
|
653
|
+
f"note leaked internal token {forbidden_token!r}: {n!r}"
|
|
654
|
+
)
|
|
655
|
+
matches = [n for n in notes if n.startswith(starts_with + ":")]
|
|
656
|
+
assert matches, (
|
|
657
|
+
f"no note starting with {starts_with!r} found in: {notes!r}"
|
|
658
|
+
)
|
|
659
|
+
return matches[0]
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def test_runner_dated_zeek_outside_window_emits_bare_note(
|
|
663
|
+
tmp_path, capture_summary,
|
|
664
|
+
):
|
|
665
|
+
"""Dated-Zeek date-pruned: every dated subdir falls outside the requested
|
|
666
|
+
window. `discover_zeek_files` returns no files; the loader early-returns
|
|
667
|
+
empty; coverage = (None, None) → runner emits the BARE note. Detector
|
|
668
|
+
still RUNS (not skipped)."""
|
|
669
|
+
zeek_dir = _make_dated_zeek(tmp_path, {
|
|
670
|
+
"2025-01-01": [_conn(datetime(2025, 1, 1, tzinfo=timezone.utc).timestamp())],
|
|
671
|
+
})
|
|
672
|
+
runner.run(
|
|
673
|
+
config=_BEACON_ONLY, zeek_dir=zeek_dir,
|
|
674
|
+
since=datetime(2030, 1, 1, tzinfo=timezone.utc),
|
|
675
|
+
until=datetime(2030, 12, 31, tzinfo=timezone.utc),
|
|
676
|
+
)
|
|
677
|
+
s = capture_summary["summary"]
|
|
678
|
+
note = _has_coverage_note(s.notes, starts_with="Zeek conn",
|
|
679
|
+
forbidden_token="zeek_conn")
|
|
680
|
+
assert "files found" in note
|
|
681
|
+
assert "Widen with --since/--days" in note
|
|
682
|
+
# Detector RAN — beacon is in detectors_run, just produced no findings
|
|
683
|
+
# (because the loaded frame was empty).
|
|
684
|
+
assert s.detectors_run == ["beacon"]
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
def test_runner_flat_zeek_per_pattern_trim_emits_span_note(
|
|
688
|
+
tmp_path, capture_summary,
|
|
689
|
+
):
|
|
690
|
+
"""Glenn #1: flat Zeek dir, DEFAULT window, stale dns*.log* alongside
|
|
691
|
+
fresh conn*.log*. The combined-max window derived from conn's max ts
|
|
692
|
+
trims dns to empty. The runner-side flat-default instrumentation
|
|
693
|
+
writes per-pattern coverage; dns gets a SPAN note labelled "Zeek dns:"
|
|
694
|
+
(not "zeek_dns:").
|
|
695
|
+
|
|
696
|
+
Detector selection: "beacon,dns" so BOTH patterns are in plan.needed_logs
|
|
697
|
+
(the per-pattern trim only fires when more than one Zeek pattern is in
|
|
698
|
+
the subset — that is the entire shape Glenn flagged).
|
|
699
|
+
"""
|
|
700
|
+
from datetime import datetime as _dt
|
|
701
|
+
|
|
702
|
+
# FRESH window is anchored to NOW so the default 1d window keeps conn
|
|
703
|
+
# alive; STALE window is well outside the 1d span so dns trims to empty.
|
|
704
|
+
fresh_ts = _dt.now(timezone.utc).timestamp()
|
|
705
|
+
stale_ts = fresh_ts - 30 * 24 * 3600 # 30 days before fresh
|
|
706
|
+
|
|
707
|
+
zeek_dir = tmp_path / "zeek"
|
|
708
|
+
zeek_dir.mkdir()
|
|
709
|
+
_write_ndjson(zeek_dir / "conn.log", [_conn(fresh_ts)])
|
|
710
|
+
_write_ndjson(zeek_dir / "dns.log", [{
|
|
711
|
+
"ts": stale_ts,
|
|
712
|
+
"id.orig_h": "192.0.2.10",
|
|
713
|
+
"query": "example.test",
|
|
714
|
+
"qclass": 1,
|
|
715
|
+
}])
|
|
716
|
+
|
|
717
|
+
runner.run(
|
|
718
|
+
config={"loghunter": {"detect": "beacon, dns", "default_window": "1d"}},
|
|
719
|
+
zeek_dir=zeek_dir,
|
|
720
|
+
)
|
|
721
|
+
s = capture_summary["summary"]
|
|
722
|
+
# conn ran in-window (used as the max-ts anchor), dns trimmed to empty.
|
|
723
|
+
assert s.record_counts.get("dns*.log*", 0) == 0
|
|
724
|
+
assert s.record_counts.get("conn*.log*", 0) == 1
|
|
725
|
+
note = _has_coverage_note(s.notes, starts_with="Zeek dns",
|
|
726
|
+
forbidden_token="zeek_dns")
|
|
727
|
+
assert "rows loaded" in note
|
|
728
|
+
assert "data spans" in note
|
|
729
|
+
assert "Widen with --since/--days" in note
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def test_runner_populated_run_emits_no_coverage_note(tmp_path, capture_summary):
|
|
733
|
+
"""Happy path: a populated single-file Zeek load produces NO disclosure
|
|
734
|
+
note. The mark_kept short-circuit means LoadResult.coverage is empty for
|
|
735
|
+
populated patterns."""
|
|
736
|
+
f = tmp_path / "conn.log"
|
|
737
|
+
_write_ndjson(f, [_conn(_TS_JAN1)])
|
|
738
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=f)
|
|
739
|
+
s = capture_summary["summary"]
|
|
740
|
+
# No coverage note for any label.
|
|
741
|
+
for label in ("Zeek conn", "Zeek dns", "Pi-hole", "syslog", "CloudTrail"):
|
|
742
|
+
assert not any(n.startswith(label + ":") for n in s.notes), (
|
|
743
|
+
f"unexpected coverage note for {label}: {s.notes!r}"
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
def test_runner_empty_zeek_file_no_coverage_note(tmp_path, capture_summary):
|
|
748
|
+
"""Glenn #2: an empty Zeek file (rotation artifact) reads but yields no
|
|
749
|
+
valid-ts rows. coverage = (0, None) → PARSE-GAP arm → NO note. Telling
|
|
750
|
+
the operator to widen the window on an empty file would mislead."""
|
|
751
|
+
zeek_dir = tmp_path / "zeek"
|
|
752
|
+
zeek_dir.mkdir()
|
|
753
|
+
(zeek_dir / "conn.log").write_text("", encoding="utf-8")
|
|
754
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=zeek_dir)
|
|
755
|
+
s = capture_summary["summary"]
|
|
756
|
+
# No Zeek conn coverage note despite the empty file + empty frame.
|
|
757
|
+
assert not any(n.startswith("Zeek conn:") for n in s.notes), s.notes
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
# ── Mock-based runner tests (stale Pi-hole / parse-gap CT / multi-source) ────
|
|
761
|
+
#
|
|
762
|
+
# These cases need precise control over LoadResult.coverage shape that the
|
|
763
|
+
# parser layer doesn't make easy to fixture (year-guessing in dnsmasq /
|
|
764
|
+
# multi-pattern coverage assembly). Mocking load_required_logs lets each test
|
|
765
|
+
# pin one coverage scenario through runner.run + the capture_summary fixture
|
|
766
|
+
# while still exercising the full runner-side note assembly.
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
@pytest.fixture
|
|
770
|
+
def mock_load_required_logs(monkeypatch):
|
|
771
|
+
"""Override loader.load_required_logs to return a hand-built LoadResult."""
|
|
772
|
+
from loghunter.common import loader as _loader
|
|
773
|
+
|
|
774
|
+
def _install(load_result):
|
|
775
|
+
def _fake(*args, **kwargs):
|
|
776
|
+
return load_result
|
|
777
|
+
monkeypatch.setattr(_loader, "load_required_logs", _fake)
|
|
778
|
+
return _install
|
|
779
|
+
|
|
780
|
+
|
|
781
|
+
def _ts_window_span():
|
|
782
|
+
"""A representative full-data span used in mocked SPAN-coverage tests."""
|
|
783
|
+
return (
|
|
784
|
+
datetime(2025, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
|
|
785
|
+
datetime(2025, 6, 5, 0, 0, 0, tzinfo=timezone.utc),
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def test_runner_stale_pihole_emits_pihole_span_note(
|
|
790
|
+
tmp_path, capture_summary, mock_load_required_logs,
|
|
791
|
+
):
|
|
792
|
+
"""Motivating bug: dns both-mode, Pi-hole archive timestamped weeks ago
|
|
793
|
+
and the window picks up nothing. SPAN note labelled "Pi-hole:" — must
|
|
794
|
+
NOT leak the internal "dnsmasq_dns:" token. Pi-hole isn't in
|
|
795
|
+
data_sources (record_counts==0), so the Zeek-evangelization nudge does
|
|
796
|
+
NOT fire (data_sources is byte-identical to a Zeek-only run)."""
|
|
797
|
+
from loghunter.common.loader import LoadResult, SourceCoverage
|
|
798
|
+
|
|
799
|
+
# Build minimal Zeek dns + Pi-hole dirs so the plan picks both patterns.
|
|
800
|
+
zeek_dir = tmp_path / "zeek"
|
|
801
|
+
zeek_dir.mkdir()
|
|
802
|
+
_write_ndjson(zeek_dir / "dns.log", [{
|
|
803
|
+
"ts": _TS_JAN5, "id.orig_h": "192.0.2.10",
|
|
804
|
+
"query": "example.test", "qclass": 1,
|
|
805
|
+
}])
|
|
806
|
+
pihole_dir = tmp_path / "pihole"
|
|
807
|
+
pihole_dir.mkdir()
|
|
808
|
+
(pihole_dir / "pihole.log").write_text(
|
|
809
|
+
"Jun 1 12:00:00 dnsmasq[1]: query[A] x.test from 192.0.2.10\n",
|
|
810
|
+
encoding="utf-8",
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
zeek_dns_df = pd.DataFrame({
|
|
814
|
+
"ts": [_TS_JAN5], "src": ["192.0.2.10"],
|
|
815
|
+
"query": ["example.test"], "qclass": [1],
|
|
816
|
+
})
|
|
817
|
+
pihole_empty = pd.DataFrame(columns=_PIHOLE_COLUMNS_FOR_MOCK)
|
|
818
|
+
span = _ts_window_span()
|
|
819
|
+
fake_lr = LoadResult(
|
|
820
|
+
logs={"dns*.log*": zeek_dns_df, "pihole*.log*": pihole_empty},
|
|
821
|
+
record_counts={"dns*.log*": 1}, # pihole = 0
|
|
822
|
+
data_window=(
|
|
823
|
+
datetime.fromtimestamp(_TS_JAN5, tz=timezone.utc),
|
|
824
|
+
datetime.fromtimestamp(_TS_JAN5, tz=timezone.utc),
|
|
825
|
+
),
|
|
826
|
+
warnings=[],
|
|
827
|
+
data_size_bytes=0,
|
|
828
|
+
coverage={"pihole*.log*": SourceCoverage(15_400_000, span)},
|
|
829
|
+
)
|
|
830
|
+
mock_load_required_logs(fake_lr)
|
|
831
|
+
|
|
832
|
+
runner.run(
|
|
833
|
+
config={"loghunter": {"detect": "dns", "default_window": ""}},
|
|
834
|
+
zeek_dir=zeek_dir, pihole_dir=pihole_dir,
|
|
835
|
+
since=datetime(2026, 6, 1, tzinfo=timezone.utc),
|
|
836
|
+
until=datetime(2026, 6, 3, tzinfo=timezone.utc),
|
|
837
|
+
)
|
|
838
|
+
s = capture_summary["summary"]
|
|
839
|
+
note = _has_coverage_note(s.notes, starts_with="Pi-hole",
|
|
840
|
+
forbidden_token="dnsmasq_dns")
|
|
841
|
+
assert "15,400,000 rows loaded" in note
|
|
842
|
+
assert "data spans" in note
|
|
843
|
+
assert "Widen with --since/--days" in note
|
|
844
|
+
# Pi-hole NOT in data_sources (record_counts==0) → nudge does not fire,
|
|
845
|
+
# and data_sources is unchanged (only the Zeek dns label).
|
|
846
|
+
assert "zeek_dns" in s.data_sources
|
|
847
|
+
assert "dnsmasq_dns" not in s.data_sources
|
|
848
|
+
assert not any("Pi-hole/dnsmasq logs" in n for n in s.notes)
|
|
849
|
+
|
|
850
|
+
|
|
851
|
+
def test_runner_parse_gap_cloudtrail_emits_no_note(
|
|
852
|
+
tmp_path, capture_summary, mock_load_required_logs,
|
|
853
|
+
):
|
|
854
|
+
"""CloudTrail file with all-unparseable eventTime → coverage = (0, None)
|
|
855
|
+
→ runner emits NO note (parse-gap arm)."""
|
|
856
|
+
from loghunter.common.loader import LoadResult, SourceCoverage
|
|
857
|
+
|
|
858
|
+
ct_dir = tmp_path / "ct"
|
|
859
|
+
ct_dir.mkdir()
|
|
860
|
+
(ct_dir / "events.json.log").write_text("{}", encoding="utf-8")
|
|
861
|
+
|
|
862
|
+
empty_ct = pd.DataFrame(columns=_CT_COLUMNS_FOR_MOCK)
|
|
863
|
+
fake_lr = LoadResult(
|
|
864
|
+
logs={"*.json*": empty_ct},
|
|
865
|
+
record_counts={},
|
|
866
|
+
data_window=None,
|
|
867
|
+
warnings=[],
|
|
868
|
+
data_size_bytes=0,
|
|
869
|
+
coverage={"*.json*": SourceCoverage(0, None)},
|
|
870
|
+
)
|
|
871
|
+
mock_load_required_logs(fake_lr)
|
|
872
|
+
|
|
873
|
+
runner.run(
|
|
874
|
+
config={"loghunter": {"detect": "aws"}}, cloudtrail_dir=ct_dir,
|
|
875
|
+
)
|
|
876
|
+
s = capture_summary["summary"]
|
|
877
|
+
# No CloudTrail (or any other) coverage note.
|
|
878
|
+
for label in ("CloudTrail", "Zeek conn", "Pi-hole", "syslog"):
|
|
879
|
+
assert not any(n.startswith(label + ":") for n in s.notes), (
|
|
880
|
+
f"unexpected coverage note for {label}: {s.notes!r}"
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
def test_runner_wrong_family_syslog_skip_no_coverage_note(
|
|
885
|
+
tmp_path, capture_summary, mock_load_required_logs,
|
|
886
|
+
):
|
|
887
|
+
"""A deliberately-skipped wrong-family file (NDJSON in syslog_dir)
|
|
888
|
+
surfaces as the loader's existing skip behavior; the runner MUST NOT
|
|
889
|
+
emit a window-disclosure note for it. At the loader level the tracker
|
|
890
|
+
writes SourceCoverage(None, None) (note_file_read never fired), but the
|
|
891
|
+
runner's BARE-note arm is zeek_dir-only — so syslog produces no note."""
|
|
892
|
+
from loghunter.common.loader import LoadResult, SourceCoverage
|
|
893
|
+
|
|
894
|
+
syslog_dir = tmp_path / "syslog"
|
|
895
|
+
syslog_dir.mkdir()
|
|
896
|
+
# Plan-time satisfiability now content-sniffs (Item E), so the on-disk file
|
|
897
|
+
# must pass the gate; the LOAD is mocked below to simulate the wrong-family
|
|
898
|
+
# skip (empty frame + SourceCoverage(None, None)).
|
|
899
|
+
(syslog_dir / "host.log").write_text(
|
|
900
|
+
"<134>May 31 12:00:00 host-a kernel: x\n", encoding="utf-8"
|
|
901
|
+
)
|
|
902
|
+
|
|
903
|
+
empty_syslog = pd.DataFrame(
|
|
904
|
+
columns=["ts", "host", "program", "raw", "message"]
|
|
905
|
+
)
|
|
906
|
+
fake_lr = LoadResult(
|
|
907
|
+
logs={"*.log*": empty_syslog},
|
|
908
|
+
record_counts={},
|
|
909
|
+
data_window=None,
|
|
910
|
+
warnings=[],
|
|
911
|
+
data_size_bytes=0,
|
|
912
|
+
# The wrong-family skip leaves the tracker with no note_file_read
|
|
913
|
+
# calls; coverage(True) → SourceCoverage(None, None).
|
|
914
|
+
coverage={"*.log*": SourceCoverage(None, None)},
|
|
915
|
+
)
|
|
916
|
+
mock_load_required_logs(fake_lr)
|
|
917
|
+
|
|
918
|
+
runner.run(
|
|
919
|
+
config={"loghunter": {"detect": "syslog"}}, syslog_dir=syslog_dir,
|
|
920
|
+
)
|
|
921
|
+
s = capture_summary["summary"]
|
|
922
|
+
# No "syslog:" coverage note (BARE arm is zeek_dir-only).
|
|
923
|
+
assert not any(n.startswith("syslog:") for n in s.notes), s.notes
|
|
924
|
+
|
|
925
|
+
|
|
926
|
+
def test_runner_unconfigured_source_no_coverage_note(
|
|
927
|
+
tmp_path, capture_summary,
|
|
928
|
+
):
|
|
929
|
+
"""A pattern not loaded (source unconfigured) → loader warns via its
|
|
930
|
+
existing "{source} not configured — {pattern} not loaded" warning; the
|
|
931
|
+
disclosure note MUST NOT fire (would duplicate the warning).
|
|
932
|
+
|
|
933
|
+
Driven by detect=dns with only zeek_dir configured (no pihole_dir);
|
|
934
|
+
DNS plan needs both patterns but pihole_dir is absent.
|
|
935
|
+
"""
|
|
936
|
+
zeek_dir = tmp_path / "zeek"
|
|
937
|
+
zeek_dir.mkdir()
|
|
938
|
+
_write_ndjson(zeek_dir / "dns.log", [{
|
|
939
|
+
"ts": _TS_JAN5, "id.orig_h": "192.0.2.10",
|
|
940
|
+
"query": "example.test", "qclass": 1,
|
|
941
|
+
}])
|
|
942
|
+
|
|
943
|
+
runner.run(
|
|
944
|
+
config={"loghunter": {"detect": "dns", "default_window": ""}},
|
|
945
|
+
zeek_dir=zeek_dir,
|
|
946
|
+
# pihole_dir omitted entirely
|
|
947
|
+
)
|
|
948
|
+
s = capture_summary["summary"]
|
|
949
|
+
# Loader emits the "pihole_dir not configured" warning, but the
|
|
950
|
+
# disclosure note (Pi-hole: …) does NOT fire.
|
|
951
|
+
assert not any(n.startswith("Pi-hole:") for n in s.notes), s.notes
|
|
952
|
+
|
|
953
|
+
|
|
954
|
+
def test_runner_appends_disclosure_after_home_net_note(
|
|
955
|
+
tmp_path, capture_summary, mock_load_required_logs,
|
|
956
|
+
):
|
|
957
|
+
"""Note ordering preserved: the new disclosure note appends LAST in the
|
|
958
|
+
notes list, after _home_net_note (so the existing notes' relative order
|
|
959
|
+
is byte-identical)."""
|
|
960
|
+
from loghunter.common.loader import LoadResult, SourceCoverage
|
|
961
|
+
|
|
962
|
+
ct_dir = tmp_path / "ct"
|
|
963
|
+
ct_dir.mkdir()
|
|
964
|
+
(ct_dir / "events.json.log").write_text("{}", encoding="utf-8")
|
|
965
|
+
|
|
966
|
+
empty_ct = pd.DataFrame(columns=_CT_COLUMNS_FOR_MOCK)
|
|
967
|
+
span = _ts_window_span()
|
|
968
|
+
fake_lr = LoadResult(
|
|
969
|
+
logs={"*.json*": empty_ct},
|
|
970
|
+
record_counts={},
|
|
971
|
+
data_window=None,
|
|
972
|
+
warnings=[],
|
|
973
|
+
data_size_bytes=0,
|
|
974
|
+
coverage={"*.json*": SourceCoverage(42, span)},
|
|
975
|
+
)
|
|
976
|
+
mock_load_required_logs(fake_lr)
|
|
977
|
+
|
|
978
|
+
runner.run(
|
|
979
|
+
config={"loghunter": {"detect": "aws"}}, cloudtrail_dir=ct_dir,
|
|
980
|
+
)
|
|
981
|
+
s = capture_summary["summary"]
|
|
982
|
+
ct_note_idx = next(
|
|
983
|
+
(i for i, n in enumerate(s.notes) if n.startswith("CloudTrail:")),
|
|
984
|
+
None,
|
|
985
|
+
)
|
|
986
|
+
assert ct_note_idx is not None, f"no CloudTrail note in {s.notes!r}"
|
|
987
|
+
# No "_home_net" or other internal-prefixed notes follow the disclosure.
|
|
988
|
+
# The disclosure must be at or after the index of any pre-existing note.
|
|
989
|
+
# Simpler invariant: it's the LAST note (or among the last).
|
|
990
|
+
assert ct_note_idx == len(s.notes) - 1, (
|
|
991
|
+
f"CloudTrail note not last (idx={ct_note_idx}, len={len(s.notes)}): "
|
|
992
|
+
f"{s.notes!r}"
|
|
993
|
+
)
|
|
994
|
+
|
|
995
|
+
|
|
996
|
+
# Column lists needed by the mocked LoadResult fixtures above.
|
|
997
|
+
_PIHOLE_COLUMNS_FOR_MOCK = [
|
|
998
|
+
"ts", "host", "program", "client", "qtype", "query", "answer",
|
|
999
|
+
"rcode", "raw", "message", "event_type",
|
|
1000
|
+
]
|
|
1001
|
+
_CT_COLUMNS_FOR_MOCK = [
|
|
1002
|
+
"ts", "eventTime", "eventSource", "eventName", "eventID", "awsRegion",
|
|
1003
|
+
"sourceIPAddress", "principal", "lane", "read_write", "errorCode",
|
|
1004
|
+
"raw",
|
|
1005
|
+
]
|
|
1006
|
+
|
|
1007
|
+
|
|
1008
|
+
# The two staging-dir tests below are the Stage 4 vs Stage 3 differential:
|
|
1009
|
+
# Stage 3 includes non-date child dirs in the no-window branch; Stage 4 bare runs
|
|
1010
|
+
# are windowed (skipping them) but --all reverts to the no-window branch.
|
|
1011
|
+
|
|
1012
|
+
def test_runner_default_window_skips_real_nondate_subdir(tmp_path, capture_summary):
|
|
1013
|
+
"""Bare run with dated dirs + staging/ → only dated dir loaded (windowed branch)."""
|
|
1014
|
+
zeek_dir = _make_dated_zeek(tmp_path, {"2026-01-05": [_conn(_TS_JAN5)]})
|
|
1015
|
+
staging = zeek_dir / "staging"
|
|
1016
|
+
staging.mkdir()
|
|
1017
|
+
_write_ndjson(staging / "conn.log", [_conn(_TS_JAN1)])
|
|
1018
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=zeek_dir)
|
|
1019
|
+
s = capture_summary["summary"]
|
|
1020
|
+
assert s.record_counts.get("conn*.log*", 0) == 1
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
def test_runner_load_all_includes_real_nondate_subdir(tmp_path, capture_summary):
|
|
1024
|
+
"""--all with dated dirs + staging/ → both dirs loaded (no-window branch)."""
|
|
1025
|
+
zeek_dir = _make_dated_zeek(tmp_path, {"2026-01-05": [_conn(_TS_JAN5)]})
|
|
1026
|
+
staging = zeek_dir / "staging"
|
|
1027
|
+
staging.mkdir()
|
|
1028
|
+
_write_ndjson(staging / "conn.log", [_conn(_TS_JAN1)])
|
|
1029
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=zeek_dir, load_all=True)
|
|
1030
|
+
s = capture_summary["summary"]
|
|
1031
|
+
assert s.record_counts.get("conn*.log*", 0) == 2
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
# ── Stage 4 P1 regression: Zeek default window must not leak into other sources ──
|
|
1035
|
+
|
|
1036
|
+
def test_runner_default_window_no_leak_from_unplanned_family(
|
|
1037
|
+
tmp_path, capture_summary, capsys
|
|
1038
|
+
):
|
|
1039
|
+
"""detect=syslog with zeek_dir configured: a CONFIGURED-but-not-in-plan family
|
|
1040
|
+
(zeek_dir) is never loaded or windowed (no conn count). The universal default
|
|
1041
|
+
window engages on syslog — its OWN family — anchoring on syslog's own max-ts.
|
|
1042
|
+
|
|
1043
|
+
Under the old Zeek-only rule this case proved Zeek's window did not leak into
|
|
1044
|
+
syslog (which loaded full). Under the universal window, syslog gets its OWN
|
|
1045
|
+
1d window: the Jan 1 row falls outside (Jan 5 − 1d) and is trimmed; the
|
|
1046
|
+
unplanned Zeek family stays out of the load entirely.
|
|
1047
|
+
"""
|
|
1048
|
+
# Current year so RFC 3164 syslog parsing (which assumes current year) is recent.
|
|
1049
|
+
year = datetime.now(timezone.utc).year
|
|
1050
|
+
|
|
1051
|
+
zeek_dir = tmp_path / "zeek"
|
|
1052
|
+
zeek_dir.mkdir()
|
|
1053
|
+
newest = zeek_dir / f"{year}-01-05"
|
|
1054
|
+
newest.mkdir()
|
|
1055
|
+
_write_ndjson(newest / "conn.log", [_conn(_TS_JAN5)])
|
|
1056
|
+
|
|
1057
|
+
syslog_dir = tmp_path / "syslog"
|
|
1058
|
+
syslog_dir.mkdir()
|
|
1059
|
+
(syslog_dir / "host.log").write_text(
|
|
1060
|
+
f"Jan 1 00:00:01 host kernel: old line\n"
|
|
1061
|
+
f"Jan 5 00:00:01 host kernel: new line\n",
|
|
1062
|
+
encoding="utf-8",
|
|
1063
|
+
)
|
|
1064
|
+
|
|
1065
|
+
runner.run(
|
|
1066
|
+
config={"loghunter": {"detect": "syslog", "default_window": "1d"}},
|
|
1067
|
+
zeek_dir=zeek_dir, syslog_dir=syslog_dir,
|
|
1068
|
+
)
|
|
1069
|
+
|
|
1070
|
+
s = capture_summary["summary"]
|
|
1071
|
+
assert "conn*.log*" not in s.record_counts, \
|
|
1072
|
+
"zeek_dir is not in the plan for detect=syslog — must not load"
|
|
1073
|
+
assert s.record_counts.get("*.log*", 0) == 1, \
|
|
1074
|
+
"syslog's OWN universal default window keeps only the in-window row"
|
|
1075
|
+
assert "Default window: last 1d" in capsys.readouterr().err
|
|
1076
|
+
assert not any("Default window" in n for n in s.notes)
|
|
1077
|
+
|
|
1078
|
+
|
|
1079
|
+
def test_runner_default_window_applies_to_all_families_in_mixed_run(
|
|
1080
|
+
tmp_path, capture_summary, capsys
|
|
1081
|
+
):
|
|
1082
|
+
"""Mixed run (beacon + syslog) with default window: EVERY in-plan family is
|
|
1083
|
+
windowed on its own anchor — Zeek conn to the newest dated dir, syslog to its
|
|
1084
|
+
own last-1d. (Old behavior windowed Zeek only; the window is now universal.)"""
|
|
1085
|
+
year = datetime.now(timezone.utc).year
|
|
1086
|
+
|
|
1087
|
+
zeek_dir = tmp_path / "zeek"
|
|
1088
|
+
zeek_dir.mkdir()
|
|
1089
|
+
# Derive conn row ts from the SAME `year` as the dir names: the Zeek default
|
|
1090
|
+
# window is derived from the dir NAME, so hardcoded 2026 rows would be filtered
|
|
1091
|
+
# out (window misses them) on a 2027+ box.
|
|
1092
|
+
old = zeek_dir / f"{year}-01-01"
|
|
1093
|
+
old.mkdir()
|
|
1094
|
+
_write_ndjson(old / "conn.log", [
|
|
1095
|
+
_conn(datetime(year, 1, 1, tzinfo=timezone.utc).timestamp())
|
|
1096
|
+
])
|
|
1097
|
+
new = zeek_dir / f"{year}-01-05"
|
|
1098
|
+
new.mkdir()
|
|
1099
|
+
_write_ndjson(new / "conn.log", [
|
|
1100
|
+
_conn(datetime(year, 1, 5, tzinfo=timezone.utc).timestamp())
|
|
1101
|
+
])
|
|
1102
|
+
|
|
1103
|
+
syslog_dir = tmp_path / "syslog"
|
|
1104
|
+
syslog_dir.mkdir()
|
|
1105
|
+
(syslog_dir / "host.log").write_text(
|
|
1106
|
+
"Jan 1 00:00:01 host kernel: old line\n"
|
|
1107
|
+
"Jan 5 00:00:01 host kernel: new line\n",
|
|
1108
|
+
encoding="utf-8",
|
|
1109
|
+
)
|
|
1110
|
+
|
|
1111
|
+
runner.run(
|
|
1112
|
+
config={"loghunter": {"detect": "beacon,syslog", "default_window": "1d"}},
|
|
1113
|
+
zeek_dir=zeek_dir, syslog_dir=syslog_dir,
|
|
1114
|
+
)
|
|
1115
|
+
|
|
1116
|
+
s = capture_summary["summary"]
|
|
1117
|
+
assert "Default window: last 1d" in capsys.readouterr().err
|
|
1118
|
+
assert not any("Default window" in n for n in s.notes)
|
|
1119
|
+
assert s.record_counts.get("conn*.log*", 0) == 1, \
|
|
1120
|
+
"Zeek conn rows filtered to newest dated dir only"
|
|
1121
|
+
assert s.record_counts.get("*.log*", 0) == 1, \
|
|
1122
|
+
"syslog rows windowed to its OWN last-1d (Jan 1 trimmed)"
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
# ── universal default window: flat (syslog/pihole) + cloudtrail families ──────
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
def test_runner_syslog_default_window_trims_and_keeps_nan_ts(
|
|
1129
|
+
tmp_path, capture_summary, capsys
|
|
1130
|
+
):
|
|
1131
|
+
"""The universal default window engages on a flat syslog DIRECTORY: rows older
|
|
1132
|
+
than (max-ts − 1d) are trimmed, the in-window row survives, AND a row with an
|
|
1133
|
+
unparseable timestamp (NaN ts, keep-policy) survives the trim (keep-null)."""
|
|
1134
|
+
year = datetime.now(timezone.utc).year # noqa: F841 (documents the year-guess)
|
|
1135
|
+
syslog_dir = tmp_path / "syslog"
|
|
1136
|
+
syslog_dir.mkdir()
|
|
1137
|
+
(syslog_dir / "host.log").write_text(
|
|
1138
|
+
"Jun 1 12:00:00 host kernel: old line\n" # outside last-1d → trimmed
|
|
1139
|
+
"Jun 5 12:00:00 host kernel: new line\n" # in window → kept
|
|
1140
|
+
"Xxx 1 12:00:00 host kernel: nan line\n", # NaN ts → kept (keep policy)
|
|
1141
|
+
encoding="utf-8",
|
|
1142
|
+
)
|
|
1143
|
+
runner.run(
|
|
1144
|
+
config={"loghunter": {"detect": "syslog", "default_window": "1d"}},
|
|
1145
|
+
syslog_dir=syslog_dir,
|
|
1146
|
+
)
|
|
1147
|
+
s = capture_summary["summary"]
|
|
1148
|
+
assert "Default window: last 1d" in capsys.readouterr().err
|
|
1149
|
+
assert s.record_counts.get("*.log*", 0) == 2, \
|
|
1150
|
+
"in-window row + NaN-ts row survive; the old row is trimmed"
|
|
1151
|
+
|
|
1152
|
+
|
|
1153
|
+
def test_runner_flat_family_explicit_file_is_bounded_no_window(
|
|
1154
|
+
tmp_path, capture_summary, capsys
|
|
1155
|
+
):
|
|
1156
|
+
"""A flat family given an explicit FILE is BOUNDED — load full, no default
|
|
1157
|
+
window, no stderr announcement (boundedness generalizes to every family)."""
|
|
1158
|
+
f = tmp_path / "host.log"
|
|
1159
|
+
f.write_text(
|
|
1160
|
+
"Jun 1 12:00:00 host kernel: old line\n"
|
|
1161
|
+
"Jun 5 12:00:00 host kernel: new line\n",
|
|
1162
|
+
encoding="utf-8",
|
|
1163
|
+
)
|
|
1164
|
+
runner.run(
|
|
1165
|
+
config={"loghunter": {"detect": "syslog", "default_window": "1d"}},
|
|
1166
|
+
syslog_dir=f,
|
|
1167
|
+
)
|
|
1168
|
+
s = capture_summary["summary"]
|
|
1169
|
+
assert s.record_counts.get("*.log*", 0) == 2, "bounded file loads full"
|
|
1170
|
+
assert "Default window" not in capsys.readouterr().err
|
|
1171
|
+
|
|
1172
|
+
|
|
1173
|
+
def test_runner_flat_family_mixed_file_and_dir_trims_with_bucket(
|
|
1174
|
+
tmp_path, capture_summary, capsys
|
|
1175
|
+
):
|
|
1176
|
+
"""Mixed explicit-file + directory in one flat family (1E): the family is
|
|
1177
|
+
unbounded, the default window applies to the WHOLE load, and the named file's
|
|
1178
|
+
out-of-window rows are trimmed WITH the bucket. The floor anchors on DIRECTORY
|
|
1179
|
+
candidates only — the explicit file does not drive it (else its old row would
|
|
1180
|
+
survive)."""
|
|
1181
|
+
old_file = tmp_path / "old.log"
|
|
1182
|
+
old_file.write_text(
|
|
1183
|
+
"Jun 1 12:00:00 host kernel: explicit old line\n", # trimmed with bucket
|
|
1184
|
+
encoding="utf-8",
|
|
1185
|
+
)
|
|
1186
|
+
syslog_dir = tmp_path / "syslog"
|
|
1187
|
+
syslog_dir.mkdir()
|
|
1188
|
+
(syslog_dir / "host.log").write_text(
|
|
1189
|
+
"Jun 5 12:00:00 host kernel: dir new line\n", # anchor + in window
|
|
1190
|
+
encoding="utf-8",
|
|
1191
|
+
)
|
|
1192
|
+
runner.run(
|
|
1193
|
+
config={"loghunter": {"detect": "syslog", "default_window": "1d"}},
|
|
1194
|
+
syslog_dir=[old_file, syslog_dir],
|
|
1195
|
+
)
|
|
1196
|
+
s = capture_summary["summary"]
|
|
1197
|
+
assert "Default window: last 1d" in capsys.readouterr().err
|
|
1198
|
+
assert s.record_counts.get("*.log*", 0) == 1, \
|
|
1199
|
+
"only the dir's in-window row survives; the explicit file's old row is trimmed"
|
|
1200
|
+
|
|
1201
|
+
|
|
1202
|
+
def _ct_event(ts_iso: str, event_id: str) -> dict:
|
|
1203
|
+
return {
|
|
1204
|
+
"eventTime": ts_iso,
|
|
1205
|
+
"eventSource": "s3.amazonaws.com",
|
|
1206
|
+
"eventName": "GetObject",
|
|
1207
|
+
"eventID": event_id,
|
|
1208
|
+
"awsRegion": "us-east-1",
|
|
1209
|
+
"sourceIPAddress": "192.0.2.10",
|
|
1210
|
+
"userIdentity": {
|
|
1211
|
+
"type": "IAMUser",
|
|
1212
|
+
"userName": "placeholder-user",
|
|
1213
|
+
"principalId": "AIDAEXAMPLE",
|
|
1214
|
+
"arn": "arn:aws:iam::123456789012:user/placeholder-user",
|
|
1215
|
+
},
|
|
1216
|
+
"readOnly": True,
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
|
|
1220
|
+
def test_runner_cloudtrail_excluded_from_default_window_loads_full(
|
|
1221
|
+
tmp_path, capture_summary, capsys
|
|
1222
|
+
):
|
|
1223
|
+
"""CloudTrail opts OUT of the auto-default window (aws is baseline-relative):
|
|
1224
|
+
an UNQUALIFIED run loads the FULL archive (no trim), and — being the only
|
|
1225
|
+
family — emits NO "Default window" stderr line."""
|
|
1226
|
+
ct_dir = tmp_path / "ct"
|
|
1227
|
+
ct_dir.mkdir()
|
|
1228
|
+
(ct_dir / "events.json").write_text(
|
|
1229
|
+
"\n".join(json.dumps(e) for e in [
|
|
1230
|
+
_ct_event("2026-06-01T12:00:00Z", "aaaa"), # a month apart — both
|
|
1231
|
+
_ct_event("2026-06-05T12:00:00Z", "bbbb"), # load (no default window)
|
|
1232
|
+
]) + "\n",
|
|
1233
|
+
encoding="utf-8",
|
|
1234
|
+
)
|
|
1235
|
+
runner.run(
|
|
1236
|
+
config={"loghunter": {"detect": "aws", "default_window": "1d"}},
|
|
1237
|
+
cloudtrail_dir=ct_dir,
|
|
1238
|
+
)
|
|
1239
|
+
s = capture_summary["summary"]
|
|
1240
|
+
assert "Default window" not in capsys.readouterr().err, \
|
|
1241
|
+
"cloudtrail-only unqualified run engages no default window"
|
|
1242
|
+
assert s.record_counts.get("*.json*", 0) == 2, \
|
|
1243
|
+
"cloudtrail loads FULL — excluded from the auto-default window"
|
|
1244
|
+
|
|
1245
|
+
|
|
1246
|
+
def test_runner_cloudtrail_explicit_window_narrows_and_riders(
|
|
1247
|
+
tmp_path, capture_summary
|
|
1248
|
+
):
|
|
1249
|
+
"""An explicit --since DOES window cloudtrail, and the aws window note then
|
|
1250
|
+
carries the --all rider (cloudtrail_narrowed)."""
|
|
1251
|
+
ct_dir = tmp_path / "ct"
|
|
1252
|
+
ct_dir.mkdir()
|
|
1253
|
+
(ct_dir / "events.json").write_text(
|
|
1254
|
+
"\n".join(json.dumps(e) for e in [
|
|
1255
|
+
_ct_event("2026-06-01T12:00:00Z", "aaaa"), # before since → excluded
|
|
1256
|
+
_ct_event("2026-06-05T12:00:00Z", "bbbb"), # in window
|
|
1257
|
+
]) + "\n",
|
|
1258
|
+
encoding="utf-8",
|
|
1259
|
+
)
|
|
1260
|
+
runner.run(
|
|
1261
|
+
config={"loghunter": {"detect": "aws", "default_window": "1d"}},
|
|
1262
|
+
cloudtrail_dir=ct_dir,
|
|
1263
|
+
since=datetime(2026, 6, 4, tzinfo=timezone.utc),
|
|
1264
|
+
until=datetime(2026, 6, 6, tzinfo=timezone.utc),
|
|
1265
|
+
)
|
|
1266
|
+
s = capture_summary["summary"]
|
|
1267
|
+
assert s.record_counts.get("*.json*", 0) == 1, "explicit window narrows cloudtrail"
|
|
1268
|
+
assert any("--all for a full-baseline" in n for n in s.notes), \
|
|
1269
|
+
"explicit narrowing → aws window note carries the --all rider"
|
|
1270
|
+
|
|
1271
|
+
|
|
1272
|
+
def test_runner_mixed_unqualified_cloudtrail_full_no_aws_all_rider(
|
|
1273
|
+
tmp_path, capture_summary, capsys
|
|
1274
|
+
):
|
|
1275
|
+
"""Mixed unqualified run (aws + syslog): the default window fires for syslog
|
|
1276
|
+
(eligible) so the stderr line STILL prints, but cloudtrail loads FULL and the
|
|
1277
|
+
aws notes must NOT claim --all is needed (cloudtrail wasn't narrowed)."""
|
|
1278
|
+
ct_dir = tmp_path / "ct"
|
|
1279
|
+
ct_dir.mkdir()
|
|
1280
|
+
(ct_dir / "events.json").write_text(
|
|
1281
|
+
"\n".join(json.dumps(e) for e in [
|
|
1282
|
+
_ct_event("2026-06-01T12:00:00Z", "aaaa"),
|
|
1283
|
+
_ct_event("2026-06-05T12:00:00Z", "bbbb"),
|
|
1284
|
+
]) + "\n",
|
|
1285
|
+
encoding="utf-8",
|
|
1286
|
+
)
|
|
1287
|
+
syslog_dir = tmp_path / "syslog"
|
|
1288
|
+
syslog_dir.mkdir()
|
|
1289
|
+
(syslog_dir / "host.log").write_text(
|
|
1290
|
+
"Jun 1 12:00:00 host kernel: old line\n"
|
|
1291
|
+
"Jun 5 12:00:00 host kernel: new line\n",
|
|
1292
|
+
encoding="utf-8",
|
|
1293
|
+
)
|
|
1294
|
+
runner.run(
|
|
1295
|
+
config={"loghunter": {"detect": "aws,syslog", "default_window": "1d"}},
|
|
1296
|
+
cloudtrail_dir=ct_dir, syslog_dir=syslog_dir,
|
|
1297
|
+
)
|
|
1298
|
+
s = capture_summary["summary"]
|
|
1299
|
+
assert "Default window: last 1d" in capsys.readouterr().err, \
|
|
1300
|
+
"syslog (eligible) still engages the default window"
|
|
1301
|
+
assert s.record_counts.get("*.json*", 0) == 2, "cloudtrail loaded FULL"
|
|
1302
|
+
# Positive guard so the negative below proves the --all rider was SUPPRESSED,
|
|
1303
|
+
# not that the whole aws note silently vanished (a vacuous pass otherwise).
|
|
1304
|
+
assert any(n.startswith("aws:") for n in s.notes), \
|
|
1305
|
+
"the aws first-seen note still fires"
|
|
1306
|
+
assert not any("--all" in n for n in s.notes), \
|
|
1307
|
+
"cloudtrail not narrowed → no --all rider on any aws note"
|
|
1308
|
+
|
|
1309
|
+
|
|
1310
|
+
def test_apply_default_window_keep_null_and_metadata(tmp_path):
|
|
1311
|
+
"""B/D unit: the post-load trim (relocated to loader.apply_default_window) retains
|
|
1312
|
+
NaN-ts rows under keep_null and preserves rotation_skips / warnings /
|
|
1313
|
+
data_size_bytes via dataclasses.replace (only logs / record_counts / data_window
|
|
1314
|
+
/ coverage are rebuilt)."""
|
|
1315
|
+
import math
|
|
1316
|
+
import pandas as pd
|
|
1317
|
+
from loghunter.common.loader import LoadResult, RotationSkipInfo
|
|
1318
|
+
from loghunter.common.loader import apply_default_window
|
|
1319
|
+
|
|
1320
|
+
base = datetime(2026, 6, 5, 12, 0, tzinfo=timezone.utc).timestamp()
|
|
1321
|
+
skips = {"*.log*": RotationSkipInfo(loaded=2, skipped=3, fallback=False)}
|
|
1322
|
+
|
|
1323
|
+
def _mk() -> LoadResult:
|
|
1324
|
+
df = pd.DataFrame([
|
|
1325
|
+
{"ts": base, "message": "in-window"},
|
|
1326
|
+
{"ts": base - 5 * 86400, "message": "old"}, # outside 1d → trimmed
|
|
1327
|
+
{"ts": float("nan"), "message": "no-ts"}, # NaN → kept iff keep_null
|
|
1328
|
+
])
|
|
1329
|
+
return LoadResult(
|
|
1330
|
+
logs={"*.log*": df},
|
|
1331
|
+
record_counts={"*.log*": 3},
|
|
1332
|
+
data_window=None,
|
|
1333
|
+
warnings=["a soft warning"],
|
|
1334
|
+
data_size_bytes=4242,
|
|
1335
|
+
rotation_skips=skips,
|
|
1336
|
+
)
|
|
1337
|
+
|
|
1338
|
+
src = _mk()
|
|
1339
|
+
kept = apply_default_window(
|
|
1340
|
+
src, ["*.log*"], timedelta(days=1), keep_null=True
|
|
1341
|
+
)
|
|
1342
|
+
msgs = set(kept.logs["*.log*"]["message"])
|
|
1343
|
+
assert msgs == {"in-window", "no-ts"}, "keep_null retains the NaN-ts row"
|
|
1344
|
+
# #4: the passed-in LoadResult.logs must NOT be mutated in place (shallow copy).
|
|
1345
|
+
assert len(src.logs["*.log*"]) == 3, "input frame untouched by the trim"
|
|
1346
|
+
assert kept.logs["*.log*"] is not src.logs["*.log*"]
|
|
1347
|
+
assert kept.record_counts["*.log*"] == 2
|
|
1348
|
+
# Metadata preserved unchanged through replace().
|
|
1349
|
+
assert kept.warnings == ["a soft warning"]
|
|
1350
|
+
assert kept.data_size_bytes == 4242
|
|
1351
|
+
assert kept.rotation_skips is skips
|
|
1352
|
+
|
|
1353
|
+
dropped = apply_default_window(
|
|
1354
|
+
_mk(), ["*.log*"], timedelta(days=1), keep_null=False
|
|
1355
|
+
)
|
|
1356
|
+
msgs2 = set(dropped.logs["*.log*"]["message"])
|
|
1357
|
+
assert msgs2 == {"in-window"}, "keep_null=False drops the NaN-ts row (drop policy)"
|
|
1358
|
+
assert not any(math.isnan(x) for x in dropped.logs["*.log*"]["ts"])
|
|
1359
|
+
|
|
1360
|
+
|
|
1361
|
+
def test_runner_no_data_window_forces_requested_span_none(
|
|
1362
|
+
tmp_path, capture_summary, capsys
|
|
1363
|
+
):
|
|
1364
|
+
"""#2: a default window is active but the load has NO real data window (every
|
|
1365
|
+
row's ts is unparseable → kept by keep-policy but `_data_window` is None). The
|
|
1366
|
+
runner must force requested_span None so the underfill parenthetical can't render
|
|
1367
|
+
a confident comparison over data that doesn't exist.
|
|
1368
|
+
|
|
1369
|
+
Uses pihole_dir: it KEEPs NaN-ts rows AND is discovered by filename (no content
|
|
1370
|
+
gate), so an all-unparseable-ts file still loads. The syslog content-sniff gate
|
|
1371
|
+
(Item E) rejects an all-unparseable-ts file at discovery — sniff requires a
|
|
1372
|
+
parseable ts — so this scenario is unreachable via syslog directory discovery,
|
|
1373
|
+
and a parseable line would itself give a non-None data window."""
|
|
1374
|
+
pihole_dir = tmp_path / "pihole"
|
|
1375
|
+
pihole_dir.mkdir()
|
|
1376
|
+
# "Xxx" matches the outer \w{3} but strptime fails → NaN ts, kept by keep policy.
|
|
1377
|
+
(pihole_dir / "pihole.log").write_text(
|
|
1378
|
+
"Xxx 1 12:00:00 dnsmasq[1]: query[A] a.test from 192.0.2.1\n"
|
|
1379
|
+
"Xxx 2 12:00:00 dnsmasq[1]: query[A] b.test from 192.0.2.1\n",
|
|
1380
|
+
encoding="utf-8",
|
|
1381
|
+
)
|
|
1382
|
+
runner.run(
|
|
1383
|
+
config={"loghunter": {"detect": "dns", "default_window": "1d"}},
|
|
1384
|
+
pihole_dir=pihole_dir,
|
|
1385
|
+
)
|
|
1386
|
+
s = capture_summary["summary"]
|
|
1387
|
+
# Default window engaged (unbounded dir, no explicit window)...
|
|
1388
|
+
assert "Default window: last 1d" in capsys.readouterr().err
|
|
1389
|
+
# ...but with no real data window, requested_span is forced None (the gate the
|
|
1390
|
+
# renderer can't see).
|
|
1391
|
+
assert s.requested_span is None
|
|
1392
|
+
|
|
1393
|
+
|
|
1394
|
+
def test_aws_window_note_cloudtrail_narrowed_rider() -> None:
|
|
1395
|
+
"""The aws window note gains --all guidance ONLY when CloudTrail was actually
|
|
1396
|
+
narrowed (explicit window) — it rides the EXISTING note (no new note), and the
|
|
1397
|
+
base note is unchanged when CloudTrail loaded full."""
|
|
1398
|
+
plan = SimpleNamespace(will_run=["aws"])
|
|
1399
|
+
base = _aws_window_note(plan, cloudtrail_narrowed=False)
|
|
1400
|
+
assert base is not None
|
|
1401
|
+
assert "first-seen" in base
|
|
1402
|
+
assert "--all" not in base
|
|
1403
|
+
|
|
1404
|
+
rider = _aws_window_note(plan, cloudtrail_narrowed=True)
|
|
1405
|
+
assert rider is not None
|
|
1406
|
+
assert rider.startswith(base) # same note, guidance appended
|
|
1407
|
+
assert "--all" in rider
|
|
1408
|
+
|
|
1409
|
+
# No aws → no note regardless of the flag.
|
|
1410
|
+
assert _aws_window_note(SimpleNamespace(will_run=["beacon"]),
|
|
1411
|
+
cloudtrail_narrowed=True) is None
|
|
1412
|
+
|
|
1413
|
+
|
|
1414
|
+
def _ct_service_event(ts_iso: str, event_id: str) -> dict:
|
|
1415
|
+
"""A service-lane CloudTrail event (userIdentity.type=AWSService → service)."""
|
|
1416
|
+
return {
|
|
1417
|
+
"eventTime": ts_iso,
|
|
1418
|
+
"eventSource": "s3.amazonaws.com",
|
|
1419
|
+
"eventName": "GetObject",
|
|
1420
|
+
"eventID": event_id,
|
|
1421
|
+
"awsRegion": "us-east-1",
|
|
1422
|
+
"sourceIPAddress": "ec2.amazonaws.com",
|
|
1423
|
+
"userIdentity": {"type": "AWSService", "invokedBy": "ec2.amazonaws.com"},
|
|
1424
|
+
"readOnly": True,
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1427
|
+
|
|
1428
|
+
def test_runner_aws_no_interactive_note_unqualified_end_to_end(
|
|
1429
|
+
tmp_path, capture_summary
|
|
1430
|
+
):
|
|
1431
|
+
"""#2 end-to-end: a real all-service-lane CloudTrail load (parser lane
|
|
1432
|
+
assignment → runner note assembly → detector empty return) on an UNQUALIFIED
|
|
1433
|
+
run discloses the neutral no-interactive note (NO --all) and aws emits no
|
|
1434
|
+
finding."""
|
|
1435
|
+
ct_dir = tmp_path / "ct"
|
|
1436
|
+
ct_dir.mkdir()
|
|
1437
|
+
(ct_dir / "events.json").write_text(
|
|
1438
|
+
"\n".join(json.dumps(e) for e in [
|
|
1439
|
+
_ct_service_event("2026-06-01T12:00:00Z", "aaaa"),
|
|
1440
|
+
_ct_service_event("2026-06-05T12:00:00Z", "bbbb"),
|
|
1441
|
+
]) + "\n",
|
|
1442
|
+
encoding="utf-8",
|
|
1443
|
+
)
|
|
1444
|
+
runner.run(
|
|
1445
|
+
config={"loghunter": {"detect": "aws", "default_window": "1d"}},
|
|
1446
|
+
cloudtrail_dir=ct_dir,
|
|
1447
|
+
)
|
|
1448
|
+
s = capture_summary["summary"]
|
|
1449
|
+
note = next((n for n in s.notes if "none are interactive-lane" in n), None)
|
|
1450
|
+
assert note is not None, "the no-interactive disclosure must be appended"
|
|
1451
|
+
assert "--all" not in note, "unqualified → CloudTrail loaded full, no --all"
|
|
1452
|
+
findings = capture_summary.get("findings", [])
|
|
1453
|
+
assert not any(f.detector == "aws" for f in findings), "aws scored nothing"
|
|
1454
|
+
|
|
1455
|
+
|
|
1456
|
+
def test_runner_aws_no_interactive_note_narrowed_end_to_end(
|
|
1457
|
+
tmp_path, capture_summary
|
|
1458
|
+
):
|
|
1459
|
+
"""#2 end-to-end: with an explicit window (CloudTrail narrowed), the same
|
|
1460
|
+
no-interactive note carries the --all suffix."""
|
|
1461
|
+
ct_dir = tmp_path / "ct"
|
|
1462
|
+
ct_dir.mkdir()
|
|
1463
|
+
(ct_dir / "events.json").write_text(
|
|
1464
|
+
"\n".join(json.dumps(e) for e in [
|
|
1465
|
+
_ct_service_event("2026-06-05T12:00:00Z", "bbbb"),
|
|
1466
|
+
]) + "\n",
|
|
1467
|
+
encoding="utf-8",
|
|
1468
|
+
)
|
|
1469
|
+
runner.run(
|
|
1470
|
+
config={"loghunter": {"detect": "aws", "default_window": "1d"}},
|
|
1471
|
+
cloudtrail_dir=ct_dir,
|
|
1472
|
+
since=datetime(2026, 6, 4, tzinfo=timezone.utc),
|
|
1473
|
+
until=datetime(2026, 6, 6, tzinfo=timezone.utc),
|
|
1474
|
+
)
|
|
1475
|
+
s = capture_summary["summary"]
|
|
1476
|
+
note = next((n for n in s.notes if "none are interactive-lane" in n), None)
|
|
1477
|
+
assert note is not None
|
|
1478
|
+
assert "Run with --all for full history." in note
|
|
1479
|
+
|
|
1480
|
+
|
|
1481
|
+
def test_interactive_count_helper() -> None:
|
|
1482
|
+
"""Supplementary unit: interactive_count counts interactive-lane rows; 0 on
|
|
1483
|
+
all-service / empty / missing-lane (== the silent-nothing condition)."""
|
|
1484
|
+
import pandas as pd
|
|
1485
|
+
from loghunter.detectors.aws import interactive_count
|
|
1486
|
+
|
|
1487
|
+
assert interactive_count(None) == 0
|
|
1488
|
+
assert interactive_count(pd.DataFrame()) == 0
|
|
1489
|
+
assert interactive_count(pd.DataFrame({"x": [1, 2]})) == 0 # missing lane
|
|
1490
|
+
assert interactive_count(pd.DataFrame({"lane": ["service", "service"]})) == 0
|
|
1491
|
+
assert interactive_count(
|
|
1492
|
+
pd.DataFrame({"lane": ["interactive", "service", "interactive"]})
|
|
1493
|
+
) == 2
|
|
1494
|
+
|
|
1495
|
+
|
|
1496
|
+
# ── large-dataset prompt: skip_confirm wiring ────────────────────────────────
|
|
1497
|
+
|
|
1498
|
+
|
|
1499
|
+
_TINY_WARN_CFG = {"loghunter": {"detect": "beacon", "warn_above": 1, "default_window": "all"}}
|
|
1500
|
+
|
|
1501
|
+
|
|
1502
|
+
def test_runner_skip_confirm_skips_prompt_entirely(
|
|
1503
|
+
tmp_path: Path, capture_summary, monkeypatch
|
|
1504
|
+
) -> None:
|
|
1505
|
+
"""skip_confirm=True must short-circuit the prompt — input() is never called."""
|
|
1506
|
+
from loghunter.common.errors import ExportAborted # noqa: F401 (import resolves post-move)
|
|
1507
|
+
|
|
1508
|
+
zeek_dir = _make_flat_zeek(tmp_path, [_conn(_TS_JAN1), _conn(_TS_JAN5)])
|
|
1509
|
+
|
|
1510
|
+
def _no_input(*_a, **_kw):
|
|
1511
|
+
raise AssertionError("input() must not be called when skip_confirm=True")
|
|
1512
|
+
|
|
1513
|
+
monkeypatch.setattr("builtins.input", _no_input)
|
|
1514
|
+
runner.run(config=_TINY_WARN_CFG, zeek_dir=zeek_dir, skip_confirm=True)
|
|
1515
|
+
# If we got here, no input() was called and the run completed.
|
|
1516
|
+
assert capture_summary["summary"] is not None
|
|
1517
|
+
|
|
1518
|
+
|
|
1519
|
+
def test_runner_decline_raises_export_aborted(
|
|
1520
|
+
tmp_path: Path, capture_summary, monkeypatch
|
|
1521
|
+
) -> None:
|
|
1522
|
+
"""Decline at the large-dataset prompt must raise ExportAborted (not bare return)."""
|
|
1523
|
+
from loghunter.common.errors import ExportAborted
|
|
1524
|
+
|
|
1525
|
+
zeek_dir = _make_flat_zeek(tmp_path, [_conn(_TS_JAN1), _conn(_TS_JAN5)])
|
|
1526
|
+
monkeypatch.setattr("builtins.input", lambda *_: "n")
|
|
1527
|
+
with pytest.raises(ExportAborted, match="aborted by user"):
|
|
1528
|
+
runner.run(config=_TINY_WARN_CFG, zeek_dir=zeek_dir)
|
|
1529
|
+
|
|
1530
|
+
|
|
1531
|
+
def test_runner_accept_continues_normally(
|
|
1532
|
+
tmp_path: Path, capture_summary, monkeypatch
|
|
1533
|
+
) -> None:
|
|
1534
|
+
"""Default skip_confirm=False with 'y' answer preserves interactive behavior."""
|
|
1535
|
+
zeek_dir = _make_flat_zeek(tmp_path, [_conn(_TS_JAN1), _conn(_TS_JAN5)])
|
|
1536
|
+
monkeypatch.setattr("builtins.input", lambda *_: "y")
|
|
1537
|
+
runner.run(config=_TINY_WARN_CFG, zeek_dir=zeek_dir)
|
|
1538
|
+
assert capture_summary["summary"] is not None
|
|
1539
|
+
|
|
1540
|
+
|
|
1541
|
+
# ── _build_output_handler: output_file precedence and behavior ───────────────
|
|
1542
|
+
|
|
1543
|
+
|
|
1544
|
+
from loghunter.runner import _build_output_handler # noqa: E402
|
|
1545
|
+
from loghunter.common.finding import RunSummary # noqa: E402
|
|
1546
|
+
|
|
1547
|
+
|
|
1548
|
+
def _drive_handler(handler, close_handler) -> None:
|
|
1549
|
+
"""Drive a handler through one no-finding lifecycle so its file is created."""
|
|
1550
|
+
summary = RunSummary(
|
|
1551
|
+
data_window=(datetime(2026, 1, 1, tzinfo=timezone.utc),
|
|
1552
|
+
datetime(2026, 1, 2, tzinfo=timezone.utc)),
|
|
1553
|
+
record_counts={},
|
|
1554
|
+
data_size_bytes=0,
|
|
1555
|
+
detectors_run=["beacon"],
|
|
1556
|
+
detectors_skipped={},
|
|
1557
|
+
)
|
|
1558
|
+
handler.begin(summary)
|
|
1559
|
+
handler.write([])
|
|
1560
|
+
handler.end()
|
|
1561
|
+
close_handler()
|
|
1562
|
+
|
|
1563
|
+
|
|
1564
|
+
def test_build_output_handler_writes_to_exact_output_file(tmp_path: Path) -> None:
|
|
1565
|
+
"""output_file writes to the EXACT path; no auto-named file appears."""
|
|
1566
|
+
target = tmp_path / "hunt.txt"
|
|
1567
|
+
handler, close_handler = _build_output_handler(
|
|
1568
|
+
output_format="text", output_dir=None, output_file=target, verbose_level=0,
|
|
1569
|
+
)
|
|
1570
|
+
_drive_handler(handler, close_handler)
|
|
1571
|
+
assert target.exists()
|
|
1572
|
+
# No auto-named *.txt sibling
|
|
1573
|
+
siblings = [p.name for p in tmp_path.iterdir()]
|
|
1574
|
+
assert siblings == ["hunt.txt"]
|
|
1575
|
+
|
|
1576
|
+
|
|
1577
|
+
def test_build_output_handler_creates_parent_directories(tmp_path: Path) -> None:
|
|
1578
|
+
"""output_file parent directories are mkdir-p'd at handler-build time."""
|
|
1579
|
+
target = tmp_path / "deep" / "nested" / "hunt.txt"
|
|
1580
|
+
assert not target.parent.exists()
|
|
1581
|
+
handler, close_handler = _build_output_handler(
|
|
1582
|
+
output_format="text", output_dir=None, output_file=target, verbose_level=0,
|
|
1583
|
+
)
|
|
1584
|
+
_drive_handler(handler, close_handler)
|
|
1585
|
+
assert target.exists()
|
|
1586
|
+
assert target.parent.is_dir()
|
|
1587
|
+
|
|
1588
|
+
|
|
1589
|
+
def test_build_output_handler_output_file_takes_precedence_over_output_dir(
|
|
1590
|
+
tmp_path: Path,
|
|
1591
|
+
) -> None:
|
|
1592
|
+
"""When both are set, output_file wins and no findings file is created under output_dir."""
|
|
1593
|
+
explicit = tmp_path / "explicit.txt"
|
|
1594
|
+
some_dir = tmp_path / "some_dir"
|
|
1595
|
+
handler, close_handler = _build_output_handler(
|
|
1596
|
+
output_format="text", output_dir=some_dir, output_file=explicit, verbose_level=0,
|
|
1597
|
+
)
|
|
1598
|
+
_drive_handler(handler, close_handler)
|
|
1599
|
+
assert explicit.exists()
|
|
1600
|
+
# output_dir may or may not have been created; key invariant is that no
|
|
1601
|
+
# auto-named findings file lives in it.
|
|
1602
|
+
if some_dir.exists():
|
|
1603
|
+
assert not any(p.is_file() for p in some_dir.iterdir())
|
|
1604
|
+
|
|
1605
|
+
|
|
1606
|
+
# ── Deliverable 0: dry-run alignment of source-dir lines ─────────────────────
|
|
1607
|
+
|
|
1608
|
+
def test_dry_run_source_dir_lines_align_colons_and_values(tmp_path: Path, capsys) -> None:
|
|
1609
|
+
"""All four ``*_dir:`` lines must have their colon AND their value-start at
|
|
1610
|
+
the same column. Catches the alignment bug where ``cloudtrail_dir:`` pushed
|
|
1611
|
+
its value out of column with the others."""
|
|
1612
|
+
zeek = tmp_path / "zeek"; zeek.mkdir()
|
|
1613
|
+
syslog = tmp_path / "syslog"; syslog.mkdir()
|
|
1614
|
+
pihole = tmp_path / "pihole"; pihole.mkdir()
|
|
1615
|
+
cloudtrail = tmp_path / "ct"; cloudtrail.mkdir()
|
|
1616
|
+
_print_dry_run(
|
|
1617
|
+
zeek_dir=zeek, syslog_dir=syslog, pihole_dir=pihole, cloudtrail_dir=cloudtrail,
|
|
1618
|
+
since=None, until=None, load_all=False, will_run=[], skipped={},
|
|
1619
|
+
)
|
|
1620
|
+
out = capsys.readouterr().out.splitlines()
|
|
1621
|
+
|
|
1622
|
+
dir_lines = [ln for ln in out if any(
|
|
1623
|
+
label in ln for label in (
|
|
1624
|
+
"zeek_dir:", "syslog_dir:", "pihole_dir:", "cloudtrail_dir:",
|
|
1625
|
+
)
|
|
1626
|
+
)]
|
|
1627
|
+
assert len(dir_lines) == 4, f"expected 4 source-dir lines, got {len(dir_lines)}"
|
|
1628
|
+
|
|
1629
|
+
colon_cols = [ln.index(":") for ln in dir_lines]
|
|
1630
|
+
assert len(set(colon_cols)) == 1, f"colons misaligned: {colon_cols} in lines {dir_lines}"
|
|
1631
|
+
|
|
1632
|
+
# Value start = first non-space character after the trailing gutter that
|
|
1633
|
+
# follows the label field.
|
|
1634
|
+
value_starts = []
|
|
1635
|
+
for ln in dir_lines:
|
|
1636
|
+
label_end = ln.index(":") + 1
|
|
1637
|
+
# find the first non-space char after the label
|
|
1638
|
+
i = label_end
|
|
1639
|
+
while i < len(ln) and ln[i] == " ":
|
|
1640
|
+
i += 1
|
|
1641
|
+
value_starts.append(i)
|
|
1642
|
+
assert len(set(value_starts)) == 1, (
|
|
1643
|
+
f"value starts misaligned: {value_starts} in lines {dir_lines}"
|
|
1644
|
+
)
|
|
1645
|
+
|
|
1646
|
+
|
|
1647
|
+
# ── Deliverable 3: aws RunSummary notes — pure helper tests ──────────────────
|
|
1648
|
+
|
|
1649
|
+
def _fake_aws_mod(below_floor: int = 0):
|
|
1650
|
+
"""Tiny fake of the aws detector exposing only what the runner reads."""
|
|
1651
|
+
return SimpleNamespace(
|
|
1652
|
+
DETECTOR_NAME="aws",
|
|
1653
|
+
STATUS="available",
|
|
1654
|
+
DEFAULT_CONFIG={"min_events": 50},
|
|
1655
|
+
below_floor_count=lambda df, n: below_floor,
|
|
1656
|
+
)
|
|
1657
|
+
|
|
1658
|
+
|
|
1659
|
+
def _fake_plan(will_run: list[str], aws_mod=None) -> SimpleNamespace:
|
|
1660
|
+
detectors = {"aws": aws_mod} if aws_mod is not None else {}
|
|
1661
|
+
return SimpleNamespace(
|
|
1662
|
+
detectors=detectors,
|
|
1663
|
+
selected=will_run,
|
|
1664
|
+
will_run=will_run,
|
|
1665
|
+
skipped={},
|
|
1666
|
+
needed_logs={"*.json*": "cloudtrail_dir"},
|
|
1667
|
+
)
|
|
1668
|
+
|
|
1669
|
+
|
|
1670
|
+
def test_aws_below_floor_note_returns_string_with_count() -> None:
|
|
1671
|
+
from loghunter.runner import _aws_below_floor_note
|
|
1672
|
+
plan = _fake_plan(["aws"], _fake_aws_mod(below_floor=5))
|
|
1673
|
+
df = pd.DataFrame([{"lane": "interactive", "principal": "x"}])
|
|
1674
|
+
note = _aws_below_floor_note(plan, {"*.json*": df}, config={})
|
|
1675
|
+
assert note is not None
|
|
1676
|
+
assert "5" in note
|
|
1677
|
+
assert "min_events" in note
|
|
1678
|
+
|
|
1679
|
+
|
|
1680
|
+
def test_aws_below_floor_note_returns_none_when_aws_not_in_plan() -> None:
|
|
1681
|
+
from loghunter.runner import _aws_below_floor_note
|
|
1682
|
+
plan = _fake_plan(["beacon"], aws_mod=None)
|
|
1683
|
+
df = pd.DataFrame([{"lane": "interactive", "principal": "x"}])
|
|
1684
|
+
assert _aws_below_floor_note(plan, {"*.json*": df}, config={}) is None
|
|
1685
|
+
|
|
1686
|
+
|
|
1687
|
+
def test_aws_below_floor_note_returns_none_when_count_is_zero() -> None:
|
|
1688
|
+
from loghunter.runner import _aws_below_floor_note
|
|
1689
|
+
plan = _fake_plan(["aws"], _fake_aws_mod(below_floor=0))
|
|
1690
|
+
df = pd.DataFrame([{"lane": "interactive", "principal": "x"}])
|
|
1691
|
+
assert _aws_below_floor_note(plan, {"*.json*": df}, config={}) is None
|
|
1692
|
+
|
|
1693
|
+
|
|
1694
|
+
def test_aws_below_floor_note_returns_none_when_no_frame() -> None:
|
|
1695
|
+
from loghunter.runner import _aws_below_floor_note
|
|
1696
|
+
plan = _fake_plan(["aws"], _fake_aws_mod(below_floor=5))
|
|
1697
|
+
assert _aws_below_floor_note(plan, {}, config={}) is None
|
|
1698
|
+
|
|
1699
|
+
|
|
1700
|
+
def test_aws_window_note_fires_when_aws_runs() -> None:
|
|
1701
|
+
from loghunter.runner import _aws_window_note
|
|
1702
|
+
plan = _fake_plan(["aws"], _fake_aws_mod())
|
|
1703
|
+
note = _aws_window_note(plan)
|
|
1704
|
+
assert note is not None
|
|
1705
|
+
assert "first-seen" in note
|
|
1706
|
+
|
|
1707
|
+
|
|
1708
|
+
def test_aws_window_note_silent_when_aws_did_not_run() -> None:
|
|
1709
|
+
from loghunter.runner import _aws_window_note
|
|
1710
|
+
plan = _fake_plan(["beacon"], aws_mod=None)
|
|
1711
|
+
assert _aws_window_note(plan) is None
|
|
1712
|
+
|
|
1713
|
+
|
|
1714
|
+
# ── Integration: real runner.run() emits the note via the loaded frame ───────
|
|
1715
|
+
|
|
1716
|
+
def test_aws_below_floor_note_in_runner_run_reflects_current_frame(
|
|
1717
|
+
tmp_path: Path, capture_summary, monkeypatch
|
|
1718
|
+
) -> None:
|
|
1719
|
+
"""The note must appear in the RunSummary.notes the user actually sees, not
|
|
1720
|
+
just in the helper. Glenn's catch: a helper-only test could pass while the
|
|
1721
|
+
runner's call ordering or wiring was broken; this asserts the wired path."""
|
|
1722
|
+
# Build a CloudTrail directory whose loaded frame has 3 below-floor
|
|
1723
|
+
# interactive principals.
|
|
1724
|
+
cloudtrail_dir = tmp_path / "ct"
|
|
1725
|
+
cloudtrail_dir.mkdir()
|
|
1726
|
+
events: list[dict] = []
|
|
1727
|
+
for name in ["alice", "bob", "carol"]:
|
|
1728
|
+
for i in range(5):
|
|
1729
|
+
events.append({
|
|
1730
|
+
"eventTime": f"2026-06-01T12:0{i}:00Z",
|
|
1731
|
+
"eventSource": "s3.amazonaws.com",
|
|
1732
|
+
"eventName": "GetObject",
|
|
1733
|
+
"eventID": f"e-{name}-{i}",
|
|
1734
|
+
"awsRegion": "us-east-1",
|
|
1735
|
+
"sourceIPAddress": "192.0.2.10",
|
|
1736
|
+
"userIdentity": {"type": "IAMUser", "userName": name,
|
|
1737
|
+
"arn": f"arn:aws:iam::123456789012:user/{name}"},
|
|
1738
|
+
"readOnly": True,
|
|
1739
|
+
})
|
|
1740
|
+
(cloudtrail_dir / "events.json.log").write_text(
|
|
1741
|
+
"\n".join(json.dumps(e) for e in events) + "\n",
|
|
1742
|
+
encoding="utf-8",
|
|
1743
|
+
)
|
|
1744
|
+
|
|
1745
|
+
# Use the real aws detector so the wiring is exercised end to end.
|
|
1746
|
+
import loghunter.detectors.aws as aws_mod
|
|
1747
|
+
monkeypatch.setattr(runner, "discover_detectors", lambda: {"aws": aws_mod})
|
|
1748
|
+
|
|
1749
|
+
runner.run(
|
|
1750
|
+
config={"loghunter": {"detect": "aws"}},
|
|
1751
|
+
cloudtrail_dir=cloudtrail_dir,
|
|
1752
|
+
)
|
|
1753
|
+
|
|
1754
|
+
s = capture_summary["summary"]
|
|
1755
|
+
floor_notes = [n for n in s.notes if "below the min_events floor" in n]
|
|
1756
|
+
assert floor_notes, f"expected below-floor note in {s.notes}"
|
|
1757
|
+
assert "3" in floor_notes[0]
|
|
1758
|
+
|
|
1759
|
+
|
|
1760
|
+
def test_aws_window_note_in_runner_run(
|
|
1761
|
+
tmp_path: Path, capture_summary, monkeypatch
|
|
1762
|
+
) -> None:
|
|
1763
|
+
"""The window-boundary disclosure must appear whenever aws runs."""
|
|
1764
|
+
cloudtrail_dir = tmp_path / "ct"
|
|
1765
|
+
cloudtrail_dir.mkdir()
|
|
1766
|
+
event = {
|
|
1767
|
+
"eventTime": "2026-06-01T12:00:00Z",
|
|
1768
|
+
"eventSource": "s3.amazonaws.com",
|
|
1769
|
+
"eventName": "GetObject",
|
|
1770
|
+
"eventID": "e-1",
|
|
1771
|
+
"awsRegion": "us-east-1",
|
|
1772
|
+
"sourceIPAddress": "192.0.2.10",
|
|
1773
|
+
"userIdentity": {"type": "IAMUser", "userName": "placeholder",
|
|
1774
|
+
"arn": "arn:aws:iam::123456789012:user/placeholder"},
|
|
1775
|
+
"readOnly": True,
|
|
1776
|
+
}
|
|
1777
|
+
(cloudtrail_dir / "events.json.log").write_text(
|
|
1778
|
+
json.dumps(event) + "\n", encoding="utf-8",
|
|
1779
|
+
)
|
|
1780
|
+
|
|
1781
|
+
import loghunter.detectors.aws as aws_mod
|
|
1782
|
+
monkeypatch.setattr(runner, "discover_detectors", lambda: {"aws": aws_mod})
|
|
1783
|
+
|
|
1784
|
+
runner.run(
|
|
1785
|
+
config={"loghunter": {"detect": "aws"}},
|
|
1786
|
+
cloudtrail_dir=cloudtrail_dir,
|
|
1787
|
+
)
|
|
1788
|
+
|
|
1789
|
+
s = capture_summary["summary"]
|
|
1790
|
+
assert any("first-seen" in n for n in s.notes)
|
|
1791
|
+
|
|
1792
|
+
|
|
1793
|
+
# ── _home_net_note — scan topology disclosure ────────────────────────────────
|
|
1794
|
+
#
|
|
1795
|
+
# Pure helper tests of the runner's home_net disclosure note. Provenance is
|
|
1796
|
+
# carried by the ``__user_set__`` sidecar attached by the config loader; tests
|
|
1797
|
+
# construct it explicitly to drive both default and declared paths.
|
|
1798
|
+
|
|
1799
|
+
_RFC1918_HOME_NET = ["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"]
|
|
1800
|
+
|
|
1801
|
+
|
|
1802
|
+
def _scan_plan(scan_in_plan: bool) -> SimpleNamespace:
|
|
1803
|
+
will_run = ["scan"] if scan_in_plan else ["beacon"]
|
|
1804
|
+
return SimpleNamespace(
|
|
1805
|
+
detectors={},
|
|
1806
|
+
selected=will_run,
|
|
1807
|
+
will_run=will_run,
|
|
1808
|
+
skipped={},
|
|
1809
|
+
needed_logs={},
|
|
1810
|
+
)
|
|
1811
|
+
|
|
1812
|
+
|
|
1813
|
+
def test_home_net_note_default_includes_parenthetical() -> None:
|
|
1814
|
+
from loghunter.runner import _home_net_note
|
|
1815
|
+
config = {"loghunter": {"home_net": _RFC1918_HOME_NET}}
|
|
1816
|
+
note = _home_net_note(_scan_plan(scan_in_plan=True), config)
|
|
1817
|
+
assert note is not None
|
|
1818
|
+
assert "10.0.0.0/8" in note
|
|
1819
|
+
assert "172.16.0.0/12" in note
|
|
1820
|
+
assert "192.168.0.0/16" in note
|
|
1821
|
+
assert "RFC1918 default" in note
|
|
1822
|
+
assert "set home_net in config to override" in note
|
|
1823
|
+
|
|
1824
|
+
|
|
1825
|
+
def test_home_net_note_declared_omits_parenthetical_with_custom_range() -> None:
|
|
1826
|
+
from loghunter.runner import _home_net_note
|
|
1827
|
+
config = {
|
|
1828
|
+
"loghunter": {"home_net": ["192.0.2.0/24"]},
|
|
1829
|
+
"__user_set__": {"loghunter": {"home_net"}},
|
|
1830
|
+
}
|
|
1831
|
+
note = _home_net_note(_scan_plan(scan_in_plan=True), config)
|
|
1832
|
+
assert note is not None
|
|
1833
|
+
assert "192.0.2.0/24" in note
|
|
1834
|
+
assert "RFC1918 default" not in note
|
|
1835
|
+
|
|
1836
|
+
|
|
1837
|
+
def test_home_net_note_declared_omits_parenthetical_when_value_equals_default() -> None:
|
|
1838
|
+
"""User explicitly types the RFC1918 list — must read as declared, not default.
|
|
1839
|
+
|
|
1840
|
+
A value-only check would misclassify this. The ``__user_set__`` sidecar
|
|
1841
|
+
is the provenance source of truth.
|
|
1842
|
+
"""
|
|
1843
|
+
from loghunter.runner import _home_net_note
|
|
1844
|
+
config = {
|
|
1845
|
+
"loghunter": {"home_net": list(_RFC1918_HOME_NET)},
|
|
1846
|
+
"__user_set__": {"loghunter": {"home_net"}},
|
|
1847
|
+
}
|
|
1848
|
+
note = _home_net_note(_scan_plan(scan_in_plan=True), config)
|
|
1849
|
+
assert note is not None
|
|
1850
|
+
assert "10.0.0.0/8" in note
|
|
1851
|
+
assert "RFC1918 default" not in note
|
|
1852
|
+
assert "override" not in note
|
|
1853
|
+
|
|
1854
|
+
|
|
1855
|
+
def test_home_net_note_returns_none_when_scan_not_in_plan() -> None:
|
|
1856
|
+
from loghunter.runner import _home_net_note
|
|
1857
|
+
config = {"loghunter": {"home_net": _RFC1918_HOME_NET}}
|
|
1858
|
+
assert _home_net_note(_scan_plan(scan_in_plan=False), config) is None
|
|
1859
|
+
|
|
1860
|
+
|
|
1861
|
+
# ── Stage 3: caller-owned TextIO seam for digest fan-out ─────────────────────
|
|
1862
|
+
|
|
1863
|
+
|
|
1864
|
+
def test_build_output_handler_caller_stream_no_open_no_close(
|
|
1865
|
+
tmp_path: Path,
|
|
1866
|
+
) -> None:
|
|
1867
|
+
"""``_build_output_handler(..., stream=<TextIO>)`` returns a handler
|
|
1868
|
+
wrapping the caller's stream with a no-op close — the stream stays open
|
|
1869
|
+
after the close callback runs."""
|
|
1870
|
+
import io as _io
|
|
1871
|
+
from loghunter.runner import _build_output_handler
|
|
1872
|
+
|
|
1873
|
+
buf = _io.StringIO()
|
|
1874
|
+
handler, close = _build_output_handler(
|
|
1875
|
+
"text", output_dir=None, output_file=None, verbose_level=0, stream=buf,
|
|
1876
|
+
)
|
|
1877
|
+
close()
|
|
1878
|
+
assert not buf.closed
|
|
1879
|
+
# Handler must write to the caller's buffer, not stdout.
|
|
1880
|
+
handler._stream.write("probe\n")
|
|
1881
|
+
assert buf.getvalue() == "probe\n"
|
|
1882
|
+
|
|
1883
|
+
|
|
1884
|
+
def test_run_digest_conn_writes_to_caller_stream(
|
|
1885
|
+
tmp_path: Path,
|
|
1886
|
+
) -> None:
|
|
1887
|
+
"""``run_digest(..., stream=<StringIO>)`` writes the conn card to the
|
|
1888
|
+
caller-owned stream — never touches output_dir / output_file."""
|
|
1889
|
+
import io as _io
|
|
1890
|
+
from loghunter import runner as _runner
|
|
1891
|
+
|
|
1892
|
+
log_path = tmp_path / "conn.log"
|
|
1893
|
+
log_path.write_text(
|
|
1894
|
+
'{"ts": 1779750000.0, "id.orig_h": "192.0.2.10", '
|
|
1895
|
+
'"id.resp_h": "198.51.100.20", "id.resp_p": 443, '
|
|
1896
|
+
'"proto": "tcp", "duration": 1.23}\n',
|
|
1897
|
+
encoding="utf-8",
|
|
1898
|
+
)
|
|
1899
|
+
buf = _io.StringIO()
|
|
1900
|
+
_runner.run_digest(
|
|
1901
|
+
config={"loghunter": {}},
|
|
1902
|
+
zeek_dir=log_path,
|
|
1903
|
+
stream=buf,
|
|
1904
|
+
skip_confirm=True,
|
|
1905
|
+
schema="conn",
|
|
1906
|
+
)
|
|
1907
|
+
rendered = buf.getvalue()
|
|
1908
|
+
# Flat-card identity block: source basename on line 1; "conn · …" on
|
|
1909
|
+
# line 3. No banner, no header rule under the new grammar.
|
|
1910
|
+
assert "conn.log" in rendered
|
|
1911
|
+
assert "conn ·" in rendered
|
|
1912
|
+
# No file was created next to the input log.
|
|
1913
|
+
assert sorted(p.name for p in tmp_path.iterdir()) == ["conn.log"]
|
|
1914
|
+
|
|
1915
|
+
|
|
1916
|
+
def test_run_digest_blob_writes_to_caller_stream(
|
|
1917
|
+
tmp_path: Path,
|
|
1918
|
+
) -> None:
|
|
1919
|
+
"""Stage 3 regression: ``run_digest(schema='blob', stream=<StringIO>)``
|
|
1920
|
+
writes the blob card to the caller-owned stream. Without the stream
|
|
1921
|
+
being threaded into ``_run_digest_blob``'s ``_build_output_handler``,
|
|
1922
|
+
blob cards would silently bypass the shared --out file and the fan-out
|
|
1923
|
+
contract would break the moment a positional sniffed to blob."""
|
|
1924
|
+
import io as _io
|
|
1925
|
+
from loghunter import runner as _runner
|
|
1926
|
+
|
|
1927
|
+
blob = tmp_path / "weird.txt"
|
|
1928
|
+
blob.write_text(
|
|
1929
|
+
"unrecognized-app-banner xyzzy 42 frobnicate\n"
|
|
1930
|
+
"second line with no clear schema\n",
|
|
1931
|
+
encoding="utf-8",
|
|
1932
|
+
)
|
|
1933
|
+
buf = _io.StringIO()
|
|
1934
|
+
_runner.run_digest(
|
|
1935
|
+
config={"loghunter": {}},
|
|
1936
|
+
blob_path=blob,
|
|
1937
|
+
stream=buf,
|
|
1938
|
+
skip_confirm=True,
|
|
1939
|
+
schema="blob",
|
|
1940
|
+
)
|
|
1941
|
+
rendered = buf.getvalue()
|
|
1942
|
+
# Flat blob card: source basename on identity line 1; the labeled
|
|
1943
|
+
# best-guess headline names "Unrecognized source". No header rule.
|
|
1944
|
+
assert "weird.txt" in rendered
|
|
1945
|
+
assert "Unrecognized source" in rendered
|
|
1946
|
+
# No incidental files materialised in tmp_path beyond the input.
|
|
1947
|
+
assert sorted(p.name for p in tmp_path.iterdir()) == ["weird.txt"]
|
|
1948
|
+
|
|
1949
|
+
|
|
1950
|
+
# ── Liveness narration in the detector loop ───────────────────────────────────
|
|
1951
|
+
|
|
1952
|
+
|
|
1953
|
+
def _fake_detector(name: str, run_impl):
|
|
1954
|
+
"""Build a minimal fake detector module suitable for the runner loop."""
|
|
1955
|
+
return SimpleNamespace(
|
|
1956
|
+
DETECTOR_NAME=name,
|
|
1957
|
+
STATUS="available",
|
|
1958
|
+
REQUIRED_LOGS=[],
|
|
1959
|
+
OPTIONAL_LOGS=[],
|
|
1960
|
+
DEFAULT_CONFIG={},
|
|
1961
|
+
run=run_impl,
|
|
1962
|
+
)
|
|
1963
|
+
|
|
1964
|
+
|
|
1965
|
+
def test_liveness_seals_one_record_per_non_syslog_detector(
|
|
1966
|
+
tmp_path: Path, capture_summary, monkeypatch, capsys
|
|
1967
|
+
) -> None:
|
|
1968
|
+
"""Two non-syslog detectors → two sealed lines (one per detector). The
|
|
1969
|
+
detector that returned findings gets the completion record 'done'; the
|
|
1970
|
+
empty one gets 'nothing'. The seal MUST NOT carry the finding count —
|
|
1971
|
+
the W2 report header is the single authoritative count surface (the
|
|
1972
|
+
double-count fix from James's revamp CR). Both records go to stderr only."""
|
|
1973
|
+
f1 = SimpleNamespace() # opaque placeholder Findings — handler is patched
|
|
1974
|
+
f2 = SimpleNamespace()
|
|
1975
|
+
fakes = {
|
|
1976
|
+
"alpha": _fake_detector("alpha", lambda ctx: [f1, f2]),
|
|
1977
|
+
"beta": _fake_detector("beta", lambda ctx: []),
|
|
1978
|
+
}
|
|
1979
|
+
monkeypatch.setattr(runner, "discover_detectors", lambda: fakes)
|
|
1980
|
+
|
|
1981
|
+
runner.run(config={"loghunter": {"detect": "alpha,beta"}})
|
|
1982
|
+
|
|
1983
|
+
captured = capsys.readouterr()
|
|
1984
|
+
assert "alpha: done" in captured.err
|
|
1985
|
+
assert "beta: nothing" in captured.err
|
|
1986
|
+
# Seal MUST NOT contain the finding count — the header carries it.
|
|
1987
|
+
import re
|
|
1988
|
+
assert not re.search(r"alpha: \d+ findings", captured.err)
|
|
1989
|
+
# Records are stderr-only; stdout carries findings rendering (suppressed
|
|
1990
|
+
# here by the capture_summary fake handler).
|
|
1991
|
+
assert "alpha: done" not in captured.out
|
|
1992
|
+
assert "beta: nothing" not in captured.out
|
|
1993
|
+
# The captured findings via the patched handler include both detectors'
|
|
1994
|
+
# output (the patched handler is what the user pointed at — runner.run
|
|
1995
|
+
# returns None, so we assert against the captured findings list).
|
|
1996
|
+
assert capture_summary["findings"] == [f1, f2]
|
|
1997
|
+
|
|
1998
|
+
|
|
1999
|
+
def test_liveness_suppresses_seal_on_detector_error(
|
|
2000
|
+
tmp_path: Path, capture_summary, monkeypatch, capsys
|
|
2001
|
+
) -> None:
|
|
2002
|
+
"""A detector that raises Exception leaves the existing 'detector error'
|
|
2003
|
+
line, and the liveness block emits NO sealed record (no false success)."""
|
|
2004
|
+
def _boom(ctx):
|
|
2005
|
+
raise RuntimeError("boom")
|
|
2006
|
+
|
|
2007
|
+
fakes = {"gamma": _fake_detector("gamma", _boom)}
|
|
2008
|
+
monkeypatch.setattr(runner, "discover_detectors", lambda: fakes)
|
|
2009
|
+
|
|
2010
|
+
runner.run(config={"loghunter": {"detect": "gamma"}})
|
|
2011
|
+
|
|
2012
|
+
captured = capsys.readouterr()
|
|
2013
|
+
assert "gamma: detector error — boom" in captured.err
|
|
2014
|
+
# No seal of any shape for the errored detector.
|
|
2015
|
+
assert "gamma: nothing" not in captured.err
|
|
2016
|
+
assert "gamma: 0 findings" not in captured.err
|
|
2017
|
+
import re
|
|
2018
|
+
assert not re.search(r"gamma: \d+ findings", captured.err)
|
|
2019
|
+
# The patched handler still got called with an empty findings list
|
|
2020
|
+
# (run completes; the error did not abort the loop).
|
|
2021
|
+
assert capture_summary["findings"] == []
|
|
2022
|
+
|
|
2023
|
+
|
|
2024
|
+
def test_liveness_skips_outer_spinner_for_syslog(
|
|
2025
|
+
tmp_path: Path, capture_summary, monkeypatch, capsys
|
|
2026
|
+
) -> None:
|
|
2027
|
+
"""syslog gets no outer liveness wrapper — its inner drain3 tqdm carries
|
|
2028
|
+
the narration for that phase. Verified as the absence of the outer
|
|
2029
|
+
'running syslog' label and the absence of a 'syslog: ...' seal."""
|
|
2030
|
+
fakes = {"syslog": _fake_detector("syslog", lambda ctx: [])}
|
|
2031
|
+
monkeypatch.setattr(runner, "discover_detectors", lambda: fakes)
|
|
2032
|
+
|
|
2033
|
+
runner.run(config={"loghunter": {"detect": "syslog"}})
|
|
2034
|
+
|
|
2035
|
+
captured = capsys.readouterr()
|
|
2036
|
+
assert "running syslog" not in captured.err
|
|
2037
|
+
assert "syslog: nothing" not in captured.err
|
|
2038
|
+
assert "syslog: 0 findings" not in captured.err
|
|
2039
|
+
|
|
2040
|
+
|
|
2041
|
+
# ── _ts_confidence (item 4: timestamp-confidence floor) ──────────────────────
|
|
2042
|
+
|
|
2043
|
+
|
|
2044
|
+
def _ts_frame(ts_values: list[float]) -> pd.DataFrame:
|
|
2045
|
+
"""Build a minimal frame carrying only the ts column from a list of
|
|
2046
|
+
float values (use float("nan") for unparseable rows)."""
|
|
2047
|
+
return pd.DataFrame({"ts": ts_values})
|
|
2048
|
+
|
|
2049
|
+
|
|
2050
|
+
def test_ts_confidence_full_parseable_with_span_is_confident() -> None:
|
|
2051
|
+
"""All rows parseable + non-zero span → True."""
|
|
2052
|
+
assert _ts_confidence(_ts_frame([1000.0, 1100.0, 1200.0, 1300.0])) is True
|
|
2053
|
+
|
|
2054
|
+
|
|
2055
|
+
def test_ts_confidence_at_floor_passes() -> None:
|
|
2056
|
+
"""Parseable fraction equal to the floor (8/10 = 0.80) + non-zero span
|
|
2057
|
+
→ True; the floor is inclusive."""
|
|
2058
|
+
values = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
|
|
2059
|
+
float("nan"), float("nan")]
|
|
2060
|
+
assert _ts_confidence(_ts_frame(values)) is True
|
|
2061
|
+
assert _DIGEST_TS_CONFIDENCE_FLOOR == 0.80
|
|
2062
|
+
|
|
2063
|
+
|
|
2064
|
+
def test_ts_confidence_just_below_floor_fails() -> None:
|
|
2065
|
+
"""7/10 = 0.70 < 0.80 → False."""
|
|
2066
|
+
values = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
|
|
2067
|
+
float("nan"), float("nan"), float("nan")]
|
|
2068
|
+
assert _ts_confidence(_ts_frame(values)) is False
|
|
2069
|
+
|
|
2070
|
+
|
|
2071
|
+
def test_ts_confidence_all_nan_fails() -> None:
|
|
2072
|
+
"""Every ts unparseable → False (coverage gate)."""
|
|
2073
|
+
assert _ts_confidence(_ts_frame([float("nan")] * 50)) is False
|
|
2074
|
+
|
|
2075
|
+
|
|
2076
|
+
def test_ts_confidence_zero_span_fails() -> None:
|
|
2077
|
+
"""All events at the same instant → False (span gate).
|
|
2078
|
+
|
|
2079
|
+
The flat card now renders the SAME bare "(timeline unavailable)" line
|
|
2080
|
+
for both the coverage gate and the span gate — the differentiated
|
|
2081
|
+
footer text is gone, and so are the sentinel reasons.
|
|
2082
|
+
"""
|
|
2083
|
+
assert _ts_confidence(_ts_frame([42.0] * 80)) is False
|
|
2084
|
+
|
|
2085
|
+
|
|
2086
|
+
def test_ts_confidence_no_ts_column_fails() -> None:
|
|
2087
|
+
"""A frame with no ts column → False (structural coverage shape)."""
|
|
2088
|
+
frame = pd.DataFrame({"src": ["192.0.2.1", "192.0.2.2"]})
|
|
2089
|
+
assert _ts_confidence(frame) is False
|
|
2090
|
+
|
|
2091
|
+
|
|
2092
|
+
def test_ts_confidence_empty_frame_fails() -> None:
|
|
2093
|
+
"""Defensive: an empty frame returns False."""
|
|
2094
|
+
assert _ts_confidence(pd.DataFrame({"ts": []})) is False
|
|
2095
|
+
|
|
2096
|
+
|
|
2097
|
+
# ── Both timeline-failure modes render the same bare line (no footer) ───────
|
|
2098
|
+
|
|
2099
|
+
|
|
2100
|
+
def _zeek_conn_line(ts: float) -> str:
|
|
2101
|
+
return (
|
|
2102
|
+
'{"_path": "conn", "ts": ' + repr(ts) + ', "id.orig_h": "192.0.2.10",'
|
|
2103
|
+
' "id.resp_h": "198.51.100.20", "id.resp_p": 443, "proto": "tcp"}\n'
|
|
2104
|
+
)
|
|
2105
|
+
|
|
2106
|
+
|
|
2107
|
+
def test_run_digest_zero_span_renders_bare_timeline_unavailable(
|
|
2108
|
+
tmp_path: Path, capsys,
|
|
2109
|
+
) -> None:
|
|
2110
|
+
"""Zero-span timestamps render the bare "(timeline unavailable)" line
|
|
2111
|
+
and NO footer block. The old differentiated footer text is gone with
|
|
2112
|
+
the flat card grammar."""
|
|
2113
|
+
zeek_dir = tmp_path / "zeek"
|
|
2114
|
+
zeek_dir.mkdir()
|
|
2115
|
+
(zeek_dir / "conn.log").write_text(
|
|
2116
|
+
_zeek_conn_line(1779750000.0) * 5,
|
|
2117
|
+
encoding="utf-8",
|
|
2118
|
+
)
|
|
2119
|
+
|
|
2120
|
+
runner.run_digest(
|
|
2121
|
+
config={"loghunter": {}},
|
|
2122
|
+
zeek_dir=zeek_dir, load_all=True, skip_confirm=True,
|
|
2123
|
+
)
|
|
2124
|
+
out = capsys.readouterr().out
|
|
2125
|
+
assert "(timeline unavailable)" in out
|
|
2126
|
+
# No footer / N.B. block anywhere in the flat grammar.
|
|
2127
|
+
assert "N.B." not in out
|
|
2128
|
+
assert "timeline collapsed" not in out
|
|
2129
|
+
assert "timestamp unparseable" not in out
|
|
2130
|
+
|
|
2131
|
+
|
|
2132
|
+
def test_run_digest_low_coverage_renders_bare_timeline_unavailable(
|
|
2133
|
+
tmp_path: Path, capsys,
|
|
2134
|
+
) -> None:
|
|
2135
|
+
"""Low-coverage timestamps render the SAME bare line as zero-span —
|
|
2136
|
+
proves the _ts_confidence collapse to a boolean predicate, not just
|
|
2137
|
+
sentinel deletion."""
|
|
2138
|
+
syslog_dir = tmp_path / "syslog"
|
|
2139
|
+
syslog_dir.mkdir()
|
|
2140
|
+
(syslog_dir / "router.log").write_text(
|
|
2141
|
+
"<134>May 31 12:00:00 192.0.2.1 sshd[100]: real line\n"
|
|
2142
|
+
"garbage line 1\n"
|
|
2143
|
+
"garbage line 2\n"
|
|
2144
|
+
"garbage line 3\n"
|
|
2145
|
+
"garbage line 4\n",
|
|
2146
|
+
encoding="utf-8",
|
|
2147
|
+
)
|
|
2148
|
+
|
|
2149
|
+
runner.run_digest(
|
|
2150
|
+
config={"loghunter": {}},
|
|
2151
|
+
syslog_dir=syslog_dir, load_all=True, skip_confirm=True,
|
|
2152
|
+
schema="syslog",
|
|
2153
|
+
)
|
|
2154
|
+
out = capsys.readouterr().out
|
|
2155
|
+
assert "(timeline unavailable)" in out
|
|
2156
|
+
assert "N.B." not in out
|
|
2157
|
+
assert "timestamp unparseable" not in out
|
|
2158
|
+
assert "floor 80%" not in out
|
|
2159
|
+
|
|
2160
|
+
|
|
2161
|
+
def test_run_digest_summariser_raise_without_fallback_path_reraises(
|
|
2162
|
+
tmp_path: Path, monkeypatch,
|
|
2163
|
+
) -> None:
|
|
2164
|
+
"""When ``fallback_blob_path`` is None (the bare-config caller has no
|
|
2165
|
+
single-file fallback available), a summariser raise propagates out so
|
|
2166
|
+
the CLI's existing ValueError arm can format the message. The narrow
|
|
2167
|
+
wrap MUST NOT swallow exceptions silently when no fallback is
|
|
2168
|
+
available."""
|
|
2169
|
+
# Build a minimal Zeek conn file that loads fine.
|
|
2170
|
+
zeek_dir = tmp_path / "zeek"
|
|
2171
|
+
zeek_dir.mkdir()
|
|
2172
|
+
(zeek_dir / "conn.log").write_text(
|
|
2173
|
+
'{"ts": 1779750000.0, "id.orig_h": "192.0.2.10",'
|
|
2174
|
+
' "id.resp_h": "198.51.100.20", "id.resp_p": 443, "proto": "tcp"}\n',
|
|
2175
|
+
encoding="utf-8",
|
|
2176
|
+
)
|
|
2177
|
+
|
|
2178
|
+
def _exploding_summarizer(_schema_name: str):
|
|
2179
|
+
def _raise(*_a, **_kw):
|
|
2180
|
+
raise RuntimeError("induced summariser failure")
|
|
2181
|
+
return _raise
|
|
2182
|
+
|
|
2183
|
+
monkeypatch.setattr(
|
|
2184
|
+
"loghunter.digest.get_summarizer", _exploding_summarizer,
|
|
2185
|
+
)
|
|
2186
|
+
|
|
2187
|
+
# No fallback_blob_path → must re-raise.
|
|
2188
|
+
with pytest.raises(RuntimeError, match="induced summariser failure"):
|
|
2189
|
+
runner.run_digest(
|
|
2190
|
+
config={"loghunter": {}},
|
|
2191
|
+
zeek_dir=zeek_dir, load_all=True, skip_confirm=True,
|
|
2192
|
+
# fallback_blob_path is the default None.
|
|
2193
|
+
)
|
|
2194
|
+
|
|
2195
|
+
|
|
2196
|
+
# ── _prepare_detector_context + prep-error vs detector-error labels ─────────
|
|
2197
|
+
#
|
|
2198
|
+
# Addendum (docs/BUGS.md "Detector liveness starts too late"): the
|
|
2199
|
+
# per-detector prep (filter_df + DetectorContext construction) now lives
|
|
2200
|
+
# INSIDE the per-detector liveness block, so the spinner appears as soon
|
|
2201
|
+
# as the operator-visible work begins. A failure during prep must be
|
|
2202
|
+
# labelled "prep error" — distinct from "detector error" — because the
|
|
2203
|
+
# runner owns prep, not the detector (separation-of-powers).
|
|
2204
|
+
|
|
2205
|
+
|
|
2206
|
+
def _zeek_conn_dir(tmp_path: Path) -> Path:
|
|
2207
|
+
"""Build a minimal Zeek conn directory with one parseable record."""
|
|
2208
|
+
zeek_dir = tmp_path / "zeek"
|
|
2209
|
+
zeek_dir.mkdir()
|
|
2210
|
+
_write_ndjson(zeek_dir / "conn.log", [
|
|
2211
|
+
_conn(_TS_JAN5),
|
|
2212
|
+
_conn(_TS_JAN5 + 60.0),
|
|
2213
|
+
])
|
|
2214
|
+
return zeek_dir
|
|
2215
|
+
|
|
2216
|
+
|
|
2217
|
+
def test_prep_error_renders_prep_error_label_not_detector_error(
|
|
2218
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
2219
|
+
) -> None:
|
|
2220
|
+
"""A failure inside _prepare_detector_context surfaces as
|
|
2221
|
+
'prep error', NOT 'detector error'. The detector module is not at
|
|
2222
|
+
fault — the runner's own prep raised."""
|
|
2223
|
+
zeek_dir = _zeek_conn_dir(tmp_path)
|
|
2224
|
+
|
|
2225
|
+
def _exploding_prep(*_a, **_kw):
|
|
2226
|
+
raise RuntimeError("induced prep failure")
|
|
2227
|
+
|
|
2228
|
+
monkeypatch.setattr(
|
|
2229
|
+
runner, "_prepare_detector_context", _exploding_prep,
|
|
2230
|
+
)
|
|
2231
|
+
|
|
2232
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=zeek_dir)
|
|
2233
|
+
|
|
2234
|
+
err = capsys.readouterr().err
|
|
2235
|
+
assert "beacon: prep error — induced prep failure" in err
|
|
2236
|
+
# The detector-error label must NOT appear — that would mislead the
|
|
2237
|
+
# operator about WHERE the failure was. Separation-of-powers detail.
|
|
2238
|
+
assert "beacon: detector error" not in err
|
|
2239
|
+
|
|
2240
|
+
|
|
2241
|
+
def test_detector_error_label_preserved_byte_identical(
|
|
2242
|
+
tmp_path: Path, monkeypatch, capsys,
|
|
2243
|
+
) -> None:
|
|
2244
|
+
"""A failure inside mod.run(ctx) keeps the existing
|
|
2245
|
+
'detector error — ...' shape exactly. Today's contract preserved."""
|
|
2246
|
+
zeek_dir = _zeek_conn_dir(tmp_path)
|
|
2247
|
+
|
|
2248
|
+
import loghunter.detectors.beacon as beacon_mod
|
|
2249
|
+
|
|
2250
|
+
def _exploding_run(_ctx):
|
|
2251
|
+
raise RuntimeError("induced detector failure")
|
|
2252
|
+
|
|
2253
|
+
monkeypatch.setattr(beacon_mod, "run", _exploding_run)
|
|
2254
|
+
|
|
2255
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=zeek_dir)
|
|
2256
|
+
|
|
2257
|
+
err = capsys.readouterr().err
|
|
2258
|
+
assert "beacon: detector error — induced detector failure" in err
|
|
2259
|
+
# The new prep-error label must NOT appear for a detector-side raise.
|
|
2260
|
+
assert "beacon: prep error" not in err
|
|
2261
|
+
|
|
2262
|
+
|
|
2263
|
+
def test_liveness_seal_lands_once_for_successful_run(
|
|
2264
|
+
tmp_path: Path, monkeypatch, capsys, capture_summary,
|
|
2265
|
+
) -> None:
|
|
2266
|
+
"""A successful detector run produces exactly one sealed liveness
|
|
2267
|
+
record ('beacon: done' or 'beacon: nothing' — the seal carries no
|
|
2268
|
+
count after James's double-count fix; the report header is the
|
|
2269
|
+
single authoritative count surface). Guards against a double-seal
|
|
2270
|
+
regression — the prep block is now INSIDE the liveness scope, so a
|
|
2271
|
+
stray extra seal would land if the body were wrapped twice."""
|
|
2272
|
+
zeek_dir = _zeek_conn_dir(tmp_path)
|
|
2273
|
+
|
|
2274
|
+
# Patch beacon's run() to return nothing — sidesteps fixture
|
|
2275
|
+
# field-shape mismatches; this test is about seal accounting, not
|
|
2276
|
+
# detector logic.
|
|
2277
|
+
import loghunter.detectors.beacon as beacon_mod
|
|
2278
|
+
monkeypatch.setattr(beacon_mod, "run", lambda _ctx: [])
|
|
2279
|
+
|
|
2280
|
+
runner.run(config=_BEACON_ONLY, zeek_dir=zeek_dir)
|
|
2281
|
+
err = capsys.readouterr().err
|
|
2282
|
+
# Either "beacon: N findings" or "beacon: nothing" — exactly one of
|
|
2283
|
+
# them, exactly once.
|
|
2284
|
+
seal_lines = [
|
|
2285
|
+
ln for ln in err.splitlines()
|
|
2286
|
+
if ln.strip().startswith("beacon:") and "error" not in ln
|
|
2287
|
+
]
|
|
2288
|
+
assert len(seal_lines) == 1, (
|
|
2289
|
+
f"expected exactly one beacon seal line, got {seal_lines!r}"
|
|
2290
|
+
)
|
|
2291
|
+
|
|
2292
|
+
|
|
2293
|
+
def test_prepare_detector_context_filters_per_pattern(tmp_path: Path) -> None:
|
|
2294
|
+
"""Unit: _prepare_detector_context calls allowlist.filter_df once per
|
|
2295
|
+
pattern the detector declares (REQUIRED + OPTIONAL), and builds a
|
|
2296
|
+
DetectorContext with the filtered view. Verifies the pure extraction
|
|
2297
|
+
of the previously inline prep."""
|
|
2298
|
+
from loghunter.common.finding import DetectorContext as _DC
|
|
2299
|
+
|
|
2300
|
+
mod = SimpleNamespace(
|
|
2301
|
+
REQUIRED_LOGS=[{"source": "zeek_dir", "pattern": "conn*.log*"}],
|
|
2302
|
+
OPTIONAL_LOGS=[{"source": "zeek_dir", "pattern": "dns*.log*"}],
|
|
2303
|
+
)
|
|
2304
|
+
|
|
2305
|
+
conn_df = pd.DataFrame({"a": [1, 2]})
|
|
2306
|
+
dns_df = pd.DataFrame({"b": [3]})
|
|
2307
|
+
other_df = pd.DataFrame({"c": [4]})
|
|
2308
|
+
|
|
2309
|
+
filter_calls: list[tuple[str, str]] = []
|
|
2310
|
+
|
|
2311
|
+
class _RecordingAllowlist:
|
|
2312
|
+
def filter_df(self, df, name):
|
|
2313
|
+
filter_calls.append((name, "<df>"))
|
|
2314
|
+
# Identity filter for the test — we only care about being called.
|
|
2315
|
+
return df
|
|
2316
|
+
|
|
2317
|
+
logs = {
|
|
2318
|
+
"conn*.log*": conn_df,
|
|
2319
|
+
"dns*.log*": dns_df,
|
|
2320
|
+
"other*.log*": other_df,
|
|
2321
|
+
}
|
|
2322
|
+
ctx = runner._prepare_detector_context(
|
|
2323
|
+
mod=mod, name="beacon", logs=logs,
|
|
2324
|
+
allowlist=_RecordingAllowlist(),
|
|
2325
|
+
det_cfg={"k": "v"},
|
|
2326
|
+
data_window=(_NOW := datetime(2026, 1, 5, tzinfo=timezone.utc),
|
|
2327
|
+
_NOW),
|
|
2328
|
+
data_sources=["zeek_conn"],
|
|
2329
|
+
home_net=["10.0.0.0/8"],
|
|
2330
|
+
)
|
|
2331
|
+
|
|
2332
|
+
# filter_df called for each declared pattern, in name=beacon.
|
|
2333
|
+
assert ("beacon", "<df>") in filter_calls
|
|
2334
|
+
assert filter_calls.count(("beacon", "<df>")) == 2 # conn + dns
|
|
2335
|
+
# other*.log* is NOT in the detector's declared patterns — passes
|
|
2336
|
+
# through unfiltered.
|
|
2337
|
+
assert "other*.log*" in ctx.logs
|
|
2338
|
+
assert ctx.logs["other*.log*"] is other_df
|
|
2339
|
+
|
|
2340
|
+
# The returned context is shaped like the previously inline DetectorContext.
|
|
2341
|
+
assert isinstance(ctx, _DC)
|
|
2342
|
+
assert ctx.config == {"k": "v"}
|
|
2343
|
+
assert ctx.data_sources == ["zeek_conn"]
|
|
2344
|
+
assert ctx.home_net == ["10.0.0.0/8"]
|
|
2345
|
+
|
|
2346
|
+
|
|
2347
|
+
# ── Rotation-peek disclosure notes (real runner.run, syslog_dir) ───────────────
|
|
2348
|
+
#
|
|
2349
|
+
# Drive runner.run end-to-end (NOT mocked) so the loader→RunSummary note seam is
|
|
2350
|
+
# exercised. since/until are derived by parsing the fixture lines so the tests do
|
|
2351
|
+
# not depend on the machine clock year. _rotation_skip_notes is the formatter.
|
|
2352
|
+
|
|
2353
|
+
from loghunter.parsers.syslog import parse_timestamp as _parse_ts
|
|
2354
|
+
|
|
2355
|
+
_SYSLOG_ONLY = {"loghunter": {"detect": "syslog"}}
|
|
2356
|
+
|
|
2357
|
+
|
|
2358
|
+
def _sysrot_line(mon: str, day: int) -> str:
|
|
2359
|
+
return f"{mon} {day:>2} 12:00:00 host1 sshd[1]: session opened for user"
|
|
2360
|
+
|
|
2361
|
+
|
|
2362
|
+
def _write_sysrot(d: Path, base: str, ts_by_ordinal: dict[int, tuple[str, int]]) -> None:
|
|
2363
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
2364
|
+
for idx, (mon, day) in ts_by_ordinal.items():
|
|
2365
|
+
name = base if idx == 0 else f"{base}.{idx}"
|
|
2366
|
+
(d / name).write_text(_sysrot_line(mon, day) + "\n", encoding="utf-8")
|
|
2367
|
+
|
|
2368
|
+
|
|
2369
|
+
def test_runner_rotation_skip_note_neutral_wording(tmp_path, capture_summary):
|
|
2370
|
+
"""A bounded --since/--until run that skips BOTH a too-new leading file AND a
|
|
2371
|
+
too-old tail file under one count → the NEUTRAL 'outside' wording (truthful
|
|
2372
|
+
for both directions), counts off the post-window candidates."""
|
|
2373
|
+
d = tmp_path / "syslog"
|
|
2374
|
+
_write_sysrot(d, "syslog.log", {
|
|
2375
|
+
0: ("Jun", 10), # too-new (oldest row > until) → skipped
|
|
2376
|
+
1: ("Jun", 8), # in window
|
|
2377
|
+
2: ("Jun", 6), # in window
|
|
2378
|
+
3: ("Jun", 4), # straddle since → kept
|
|
2379
|
+
4: ("Jun", 2), # too-old → skipped
|
|
2380
|
+
})
|
|
2381
|
+
runner.run(
|
|
2382
|
+
config=_SYSLOG_ONLY,
|
|
2383
|
+
syslog_dir=d,
|
|
2384
|
+
since=_parse_ts(_sysrot_line("Jun", 5)),
|
|
2385
|
+
until=_parse_ts(_sysrot_line("Jun", 9)),
|
|
2386
|
+
)
|
|
2387
|
+
s = capture_summary["summary"]
|
|
2388
|
+
assert (
|
|
2389
|
+
"syslog: loaded 3 of 5 rotation files; 2 skipped outside the selected "
|
|
2390
|
+
"window (by rotation order)." in s.notes
|
|
2391
|
+
)
|
|
2392
|
+
|
|
2393
|
+
|
|
2394
|
+
def test_runner_rotation_fallback_note_wins(tmp_path, capture_summary):
|
|
2395
|
+
"""One out-of-order rotation family → ONE fallback note for the pattern and
|
|
2396
|
+
NO skip-summary (fallback is data-true: the whole archive is read)."""
|
|
2397
|
+
d = tmp_path / "syslog"
|
|
2398
|
+
_write_sysrot(d, "auth.log", {0: ("Jun", 6), 1: ("Jun", 5), 2: ("Jun", 4), 3: ("Jun", 3)})
|
|
2399
|
+
_write_sysrot(d, "kern.log", {0: ("Jun", 8), 1: ("Jun", 10)}) # first-ts RISE → disorder
|
|
2400
|
+
runner.run(
|
|
2401
|
+
config=_SYSLOG_ONLY,
|
|
2402
|
+
syslog_dir=d,
|
|
2403
|
+
since=_parse_ts(_sysrot_line("Jun", 5)),
|
|
2404
|
+
)
|
|
2405
|
+
s = capture_summary["summary"]
|
|
2406
|
+
assert (
|
|
2407
|
+
"syslog: rotation order not monotonic — read the full archive "
|
|
2408
|
+
"(windowing skipped)." in s.notes
|
|
2409
|
+
)
|
|
2410
|
+
assert not any("skipped outside the selected window" in n for n in s.notes)
|
|
2411
|
+
|
|
2412
|
+
|
|
2413
|
+
def test_runner_rotation_no_note_when_unwindowed(tmp_path, capture_summary):
|
|
2414
|
+
"""No explicit window → flat load reads all, no rotation note."""
|
|
2415
|
+
d = tmp_path / "syslog"
|
|
2416
|
+
_write_sysrot(d, "syslog.log", {0: ("Jun", 6), 1: ("Jun", 5), 2: ("Jun", 4)})
|
|
2417
|
+
runner.run(config=_SYSLOG_ONLY, syslog_dir=d, load_all=True)
|
|
2418
|
+
s = capture_summary["summary"]
|
|
2419
|
+
assert not any("rotation" in n.lower() for n in s.notes)
|
|
2420
|
+
|
|
2421
|
+
|
|
2422
|
+
def test_runner_rotation_overlap_export_window_note(tmp_path, capture_summary):
|
|
2423
|
+
"""Overlapping exporter-output windows in a flat dir → the NEW overlap
|
|
2424
|
+
fallback wording (whole-pattern full read), distinct from the monotonic note."""
|
|
2425
|
+
d = tmp_path / "syslog"
|
|
2426
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
2427
|
+
(d / "splunk_20260601_7d.log").write_text(_sysrot_line("Jun", 1) + "\n", encoding="utf-8")
|
|
2428
|
+
(d / "splunk_20260605_1d.log").write_text(_sysrot_line("Jun", 5) + "\n", encoding="utf-8")
|
|
2429
|
+
runner.run(
|
|
2430
|
+
config=_SYSLOG_ONLY,
|
|
2431
|
+
syslog_dir=d,
|
|
2432
|
+
since=_parse_ts(_sysrot_line("Jun", 5)),
|
|
2433
|
+
)
|
|
2434
|
+
s = capture_summary["summary"]
|
|
2435
|
+
assert (
|
|
2436
|
+
"syslog: overlapping export windows — read the full archive "
|
|
2437
|
+
"(windowing skipped)." in s.notes
|
|
2438
|
+
)
|
|
2439
|
+
assert not any("not monotonic" in n for n in s.notes)
|
|
2440
|
+
|
|
2441
|
+
|
|
2442
|
+
def test_runner_rotation_duplicate_note(tmp_path, capture_summary):
|
|
2443
|
+
"""A duplicate rotation slot (a file + its .gz sibling collapsing to one
|
|
2444
|
+
age_rank) → the new 'duplicate rotation files' fallback wording, distinct from
|
|
2445
|
+
the monotonic and overlap notes."""
|
|
2446
|
+
d = tmp_path / "syslog"
|
|
2447
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
2448
|
+
(d / "auth.log").write_text(_sysrot_line("Jun", 6) + "\n", encoding="utf-8")
|
|
2449
|
+
with gzip.open(d / "auth.log.gz", "wt", encoding="utf-8") as fh:
|
|
2450
|
+
fh.write(_sysrot_line("Jun", 6) + "\n")
|
|
2451
|
+
runner.run(
|
|
2452
|
+
config=_SYSLOG_ONLY,
|
|
2453
|
+
syslog_dir=d,
|
|
2454
|
+
since=_parse_ts(_sysrot_line("Jun", 5)),
|
|
2455
|
+
)
|
|
2456
|
+
s = capture_summary["summary"]
|
|
2457
|
+
assert (
|
|
2458
|
+
"syslog: duplicate rotation files — read the full archive "
|
|
2459
|
+
"(windowing skipped)." in s.notes
|
|
2460
|
+
)
|
|
2461
|
+
assert not any(("not monotonic" in n or "overlapping" in n) for n in s.notes)
|
|
2462
|
+
|
|
2463
|
+
|
|
2464
|
+
# ── _source_overlap_notes — plan-time source-dir overlap disclosure ───────────
|
|
2465
|
+
|
|
2466
|
+
|
|
2467
|
+
def _plan_with_needed(needed_logs: dict[str, str]) -> RunPlan:
|
|
2468
|
+
"""Minimal RunPlan carrying only the needed_logs the overlap helper reads."""
|
|
2469
|
+
return RunPlan(
|
|
2470
|
+
detectors={}, selected=[], will_run=[],
|
|
2471
|
+
skipped={}, needed_logs=needed_logs,
|
|
2472
|
+
)
|
|
2473
|
+
|
|
2474
|
+
|
|
2475
|
+
def test_source_overlap_two_families_same_dir(tmp_path) -> None:
|
|
2476
|
+
"""Two IN-PLAN families resolved to the same directory → exactly one note
|
|
2477
|
+
naming both, in canonical key order."""
|
|
2478
|
+
shared = tmp_path / "shared"
|
|
2479
|
+
shared.mkdir()
|
|
2480
|
+
source_dirs = {"zeek_dir": [shared], "syslog_dir": [shared]}
|
|
2481
|
+
plan = _plan_with_needed(
|
|
2482
|
+
{"conn*.log*": "zeek_dir", "*.log*": "syslog_dir"}
|
|
2483
|
+
)
|
|
2484
|
+
notes = _source_overlap_notes(source_dirs, plan)
|
|
2485
|
+
assert len(notes) == 1, notes
|
|
2486
|
+
assert notes[0].startswith("zeek_dir, syslog_dir resolve to the same directory")
|
|
2487
|
+
assert str(shared.resolve()) in notes[0]
|
|
2488
|
+
# Customized-path-truthful tail (no hard-coded exports/<x>/).
|
|
2489
|
+
assert "global exports now auto-segment per source" in notes[0]
|
|
2490
|
+
|
|
2491
|
+
|
|
2492
|
+
def test_source_overlap_three_families_same_dir(tmp_path) -> None:
|
|
2493
|
+
"""≥3 families at one dir → one note listing all three, canonical order."""
|
|
2494
|
+
shared = tmp_path / "shared"
|
|
2495
|
+
shared.mkdir()
|
|
2496
|
+
source_dirs = {
|
|
2497
|
+
"zeek_dir": [shared], "syslog_dir": [shared], "pihole_dir": [shared],
|
|
2498
|
+
}
|
|
2499
|
+
plan = _plan_with_needed({
|
|
2500
|
+
"conn*.log*": "zeek_dir",
|
|
2501
|
+
"*.log*": "syslog_dir",
|
|
2502
|
+
"pihole*.log*": "pihole_dir",
|
|
2503
|
+
})
|
|
2504
|
+
notes = _source_overlap_notes(source_dirs, plan)
|
|
2505
|
+
assert len(notes) == 1, notes
|
|
2506
|
+
assert notes[0].startswith(
|
|
2507
|
+
"zeek_dir, syslog_dir, pihole_dir resolve to the same directory"
|
|
2508
|
+
)
|
|
2509
|
+
|
|
2510
|
+
|
|
2511
|
+
def test_source_overlap_in_plan_negative(tmp_path) -> None:
|
|
2512
|
+
"""GLENN sharp case: two configured dirs resolve to the same directory but
|
|
2513
|
+
only ONE family is in the plan → NO note about the out-of-plan sibling."""
|
|
2514
|
+
shared = tmp_path / "shared"
|
|
2515
|
+
shared.mkdir()
|
|
2516
|
+
source_dirs = {"zeek_dir": [shared], "syslog_dir": [shared]}
|
|
2517
|
+
# Only zeek_dir is planned (e.g. detect=beacon); syslog_dir is configured
|
|
2518
|
+
# but unselected, so it cannot contaminate the run.
|
|
2519
|
+
plan = _plan_with_needed({"conn*.log*": "zeek_dir"})
|
|
2520
|
+
assert _source_overlap_notes(source_dirs, plan) == []
|
|
2521
|
+
|
|
2522
|
+
|
|
2523
|
+
def test_source_overlap_nested_dirs_stay_silent(tmp_path) -> None:
|
|
2524
|
+
"""Equal-dir ONLY: a NESTED pair (parent containing child) is NOT an
|
|
2525
|
+
overlap — flat discovery is non-recursive. Uses real existing dirs so the
|
|
2526
|
+
rail is proven by path inequality, not by a missing dir on the test box."""
|
|
2527
|
+
varlog = tmp_path / "varlog"
|
|
2528
|
+
zeek = varlog / "zeek"
|
|
2529
|
+
zeek.mkdir(parents=True)
|
|
2530
|
+
source_dirs = {"syslog_dir": [varlog], "zeek_dir": [zeek]}
|
|
2531
|
+
plan = _plan_with_needed(
|
|
2532
|
+
{"*.log*": "syslog_dir", "conn*.log*": "zeek_dir"}
|
|
2533
|
+
)
|
|
2534
|
+
assert _source_overlap_notes(source_dirs, plan) == []
|
|
2535
|
+
|
|
2536
|
+
|
|
2537
|
+
def test_source_overlap_files_out_of_scope(tmp_path) -> None:
|
|
2538
|
+
"""Explicit FILE inputs are out of scope — the vector is dir-glob overlap,
|
|
2539
|
+
not a shared named file."""
|
|
2540
|
+
f = tmp_path / "shared.log"
|
|
2541
|
+
f.write_text("x", encoding="utf-8")
|
|
2542
|
+
source_dirs = {"zeek_dir": [f], "syslog_dir": [f]}
|
|
2543
|
+
plan = _plan_with_needed(
|
|
2544
|
+
{"conn*.log*": "zeek_dir", "*.log*": "syslog_dir"}
|
|
2545
|
+
)
|
|
2546
|
+
assert _source_overlap_notes(source_dirs, plan) == []
|
|
2547
|
+
|
|
2548
|
+
|
|
2549
|
+
def test_source_overlap_collapses_per_family_duplicates(tmp_path) -> None:
|
|
2550
|
+
"""Two inputs in ONE family resolving to the same dir are not an overlap —
|
|
2551
|
+
overlap requires two DISTINCT families."""
|
|
2552
|
+
shared = tmp_path / "shared"
|
|
2553
|
+
shared.mkdir()
|
|
2554
|
+
source_dirs = {"zeek_dir": [shared, shared]}
|
|
2555
|
+
plan = _plan_with_needed({"conn*.log*": "zeek_dir"})
|
|
2556
|
+
assert _source_overlap_notes(source_dirs, plan) == []
|
|
2557
|
+
|
|
2558
|
+
|
|
2559
|
+
# ── runner seam pin: the overlap note reaches RunSummary.notes ────────────────
|
|
2560
|
+
|
|
2561
|
+
|
|
2562
|
+
def test_runner_emits_source_overlap_note(
|
|
2563
|
+
tmp_path, capture_summary, mock_load_required_logs,
|
|
2564
|
+
) -> None:
|
|
2565
|
+
"""Seam pin (GLENN): the one-line notes.extend wiring lands the overlap note
|
|
2566
|
+
on the user-facing RunSummary.notes surface, not just in the pure helper.
|
|
2567
|
+
|
|
2568
|
+
zeek_dir (beacon, REQUIRED conn*.log*) and cloudtrail_dir (aws, REQUIRED
|
|
2569
|
+
*.json*) both point at one shared directory holding both files → both
|
|
2570
|
+
families are in-plan at the same resolved dir → overlap note fires."""
|
|
2571
|
+
from loghunter.common.loader import LoadResult, SourceCoverage
|
|
2572
|
+
|
|
2573
|
+
shared = tmp_path / "shared"
|
|
2574
|
+
shared.mkdir()
|
|
2575
|
+
_write_ndjson(shared / "conn.log", [_conn(_TS_JAN5)])
|
|
2576
|
+
(shared / "events.json.log").write_text("{}", encoding="utf-8")
|
|
2577
|
+
|
|
2578
|
+
fake_lr = LoadResult(
|
|
2579
|
+
logs={
|
|
2580
|
+
"conn*.log*": pd.DataFrame(columns=["ts", "src", "dst"]),
|
|
2581
|
+
"*.json*": pd.DataFrame(columns=_CT_COLUMNS_FOR_MOCK),
|
|
2582
|
+
},
|
|
2583
|
+
record_counts={},
|
|
2584
|
+
data_window=None,
|
|
2585
|
+
warnings=[],
|
|
2586
|
+
data_size_bytes=0,
|
|
2587
|
+
coverage={},
|
|
2588
|
+
)
|
|
2589
|
+
mock_load_required_logs(fake_lr)
|
|
2590
|
+
|
|
2591
|
+
runner.run(
|
|
2592
|
+
config={"loghunter": {"detect": "beacon,aws", "default_window": ""}},
|
|
2593
|
+
zeek_dir=shared,
|
|
2594
|
+
cloudtrail_dir=shared,
|
|
2595
|
+
)
|
|
2596
|
+
s = capture_summary["summary"]
|
|
2597
|
+
overlap = [n for n in s.notes if "resolve to the same directory" in n]
|
|
2598
|
+
assert len(overlap) == 1, s.notes
|
|
2599
|
+
assert "zeek_dir" in overlap[0] and "cloudtrail_dir" in overlap[0]
|