loghunter-cli 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loghunter/__init__.py +3 -0
- loghunter/cli.py +1108 -0
- loghunter/cli_init.py +567 -0
- loghunter/common/__init__.py +1 -0
- loghunter/common/allowlist.py +436 -0
- loghunter/common/clustering.py +326 -0
- loghunter/common/config.py +221 -0
- loghunter/common/display.py +323 -0
- loghunter/common/errors.py +45 -0
- loghunter/common/finding.py +239 -0
- loghunter/common/loader/__init__.py +136 -0
- loghunter/common/loader/diagnostics.py +94 -0
- loghunter/common/loader/discovery.py +335 -0
- loghunter/common/loader/io.py +76 -0
- loghunter/common/loader/pipeline.py +1010 -0
- loghunter/common/loader/sniff.py +184 -0
- loghunter/common/loader/types.py +207 -0
- loghunter/common/loader/windowing.py +523 -0
- loghunter/common/output.py +93 -0
- loghunter/common/paths.py +105 -0
- loghunter/common/sources.py +392 -0
- loghunter/data/allowlist/connections.txt +50 -0
- loghunter/data/allowlist/domains_devices.txt +5 -0
- loghunter/data/allowlist/domains_homelab.txt +5 -0
- loghunter/data/allowlist/domains_universal.txt +125 -0
- loghunter/data/config_example.toml +144 -0
- loghunter/detectors/__init__.py +5 -0
- loghunter/detectors/auth.py +27 -0
- loghunter/detectors/aws.py +671 -0
- loghunter/detectors/beacon.py +258 -0
- loghunter/detectors/dns.py +778 -0
- loghunter/detectors/dnsblock.py +29 -0
- loghunter/detectors/duration.py +178 -0
- loghunter/detectors/protocol.py +26 -0
- loghunter/detectors/scan.py +735 -0
- loghunter/detectors/ssl.py +25 -0
- loghunter/detectors/syslog.py +266 -0
- loghunter/detectors/weird.py +27 -0
- loghunter/digest/__init__.py +43 -0
- loghunter/digest/_stats.py +182 -0
- loghunter/digest/blob.py +698 -0
- loghunter/digest/cloudtrail.py +341 -0
- loghunter/digest/conn.py +367 -0
- loghunter/digest/dns.py +364 -0
- loghunter/digest/syslog.py +269 -0
- loghunter/exporters/__init__.py +534 -0
- loghunter/exporters/cloudtrail.py +499 -0
- loghunter/exporters/splunk.py +222 -0
- loghunter/outputs/__init__.py +1 -0
- loghunter/outputs/allowlist.py +75 -0
- loghunter/outputs/csv.py +70 -0
- loghunter/outputs/email.py +44 -0
- loghunter/outputs/html.py +99 -0
- loghunter/outputs/json.py +77 -0
- loghunter/outputs/text.py +1422 -0
- loghunter/parsers/__init__.py +1 -0
- loghunter/parsers/cloudtrail.py +287 -0
- loghunter/parsers/dnsmasq.py +331 -0
- loghunter/parsers/syslog.py +150 -0
- loghunter/parsers/zeek.py +294 -0
- loghunter/parsers/zeek_tsv.py +310 -0
- loghunter/runner.py +1895 -0
- loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
- loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
- loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
- loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
- loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
- migrations/cloudtrail_parquet.py +59 -0
- migrations/conn_fft.py +550 -0
- migrations/conn_scan.py +1097 -0
- migrations/dns_dbscan.py +520 -0
- migrations/get_syslog.py +402 -0
- migrations/syslog_drain3.py +479 -0
- scratch/junk/parquet.py +59 -0
- tests/__init__.py +1 -0
- tests/_cloudtrail_fakes.py +116 -0
- tests/conftest.py +17 -0
- tests/test_allowlist_defaults_accessor.py +90 -0
- tests/test_architecture_spine.py +302 -0
- tests/test_aws_detector.py +504 -0
- tests/test_be_like_water.py +106 -0
- tests/test_cli_help.py +342 -0
- tests/test_cli_multi_positional.py +458 -0
- tests/test_cloudtrail_exporter.py +631 -0
- tests/test_cloudtrail_exporter_botocore.py +207 -0
- tests/test_cloudtrail_parser.py +393 -0
- tests/test_clustering.py +85 -0
- tests/test_clustering_interruptible.py +404 -0
- tests/test_config_cli.py +1006 -0
- tests/test_config_example_drift.py +164 -0
- tests/test_digest_blob.py +1237 -0
- tests/test_digest_cli.py +1040 -0
- tests/test_digest_cloudtrail.py +980 -0
- tests/test_digest_conn.py +1189 -0
- tests/test_digest_dns.py +770 -0
- tests/test_digest_stats.py +282 -0
- tests/test_digest_syslog.py +724 -0
- tests/test_display.py +370 -0
- tests/test_dns_detector.py +1010 -0
- tests/test_dnsmasq_parser.py +467 -0
- tests/test_duration_detector.py +491 -0
- tests/test_export_orchestrator_shape.py +153 -0
- tests/test_init_wizard.py +707 -0
- tests/test_loader.py +3639 -0
- tests/test_loader_package_surface.py +115 -0
- tests/test_loader_window_model.py +215 -0
- tests/test_output_path_cascade.py +575 -0
- tests/test_resolve_path.py +111 -0
- tests/test_root_provenance.py +212 -0
- tests/test_runner.py +2599 -0
- tests/test_scan_detector.py +455 -0
- tests/test_search_paths.py +50 -0
- tests/test_sniff_orchestrator.py +373 -0
- tests/test_sniff_recognizers.py +573 -0
- tests/test_source_resolution_seam.py +471 -0
- tests/test_sources.py +648 -0
- tests/test_splunk_exporter.py +351 -0
- tests/test_syslog_detector.py +458 -0
- tests/test_syslog_parser.py +582 -0
- tests/test_text_output.py +1225 -0
- tests/test_zeek_tsv_parser.py +580 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""A-phase guard: the loader/ package presents the same import surface as the
|
|
2
|
+
former single common/loader.py module, and the test-patchable I/O seams
|
|
3
|
+
(``progress`` / ``_open_log``) remain SETTABLE at the package boundary AND
|
|
4
|
+
patch-through to the load pipeline.
|
|
5
|
+
|
|
6
|
+
This is the extraction's safety net (Glenn execution caution #1): a dropped
|
|
7
|
+
re-export or a facade that snapshots a pre-patch object fails here loudly.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import loghunter.common.loader as loader
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# The full re-export inventory. Every name that resolved at
|
|
18
|
+
# loghunter.common.loader.<name> before the package split MUST still resolve.
|
|
19
|
+
# Pinned literally so a dropped re-export is a hard failure, not silent drift.
|
|
20
|
+
_SURFACE = [
|
|
21
|
+
# display re-export (imported module-global, monkeypatched in 12 tests)
|
|
22
|
+
"progress",
|
|
23
|
+
# io
|
|
24
|
+
"_open_log", "_safe_resolve", "_union_dedupe",
|
|
25
|
+
# types
|
|
26
|
+
"LoadResult", "CoverageTracker", "SourceCoverage", "RotationSkipInfo",
|
|
27
|
+
"_data_window", "_PIHOLE_COLUMNS", "_CLOUDTRAIL_COLUMNS", "_SYSLOG_COLUMNS",
|
|
28
|
+
"_LOG_SUFFIXES",
|
|
29
|
+
# diagnostics
|
|
30
|
+
"_log_type", "_schema_warning", "_zeek_file_read_warning",
|
|
31
|
+
"_cloudtrail_parse_warning",
|
|
32
|
+
# sniff
|
|
33
|
+
"sniff_format", "sniff_format_detailed", "SniffResult", "_is_ndjson",
|
|
34
|
+
"_looks_like_syslog", "_SNIFF_MAX_PEEK", "_SNIFF_ORIGIN", "_SNIFF_RECOGNIZERS",
|
|
35
|
+
"_SYSLOG_SNIFF_BYTES",
|
|
36
|
+
# windowing
|
|
37
|
+
"_apply_ts_filter", "_missing_ts", "is_bounded", "is_zeek_bounded",
|
|
38
|
+
"_classify_rotation_name", "_rotation_base_and_index", "_peek_first_ts",
|
|
39
|
+
"_select_group", "_group_order_conflict", "_rotation_windowed_files",
|
|
40
|
+
"_COMPRESSION_EXTS", "_ROTATION_NUM_RE", "_DATE_RANK_BASE", "_EXPORT_WINDOW_RE",
|
|
41
|
+
# windowing — B+D named window model
|
|
42
|
+
"LoadWindow", "apply_default_window",
|
|
43
|
+
# discovery
|
|
44
|
+
"discover_files", "_DATE_DIR_RE", "_zeek_date_subdirs", "_file_matches_pattern",
|
|
45
|
+
"discover_zeek_files", "_syslog_files", "_discover_syslog_files",
|
|
46
|
+
"_dir_has_regular_files", "discover_cloudtrail_files", "_stem_hostname",
|
|
47
|
+
# discovery — B+D strategy resolvers (folded accessors)
|
|
48
|
+
"_zeek_dated_window", "_flat_default_floor", "_default_resolve_window",
|
|
49
|
+
"_zeek_resolve_window", "_flat_resolve_window",
|
|
50
|
+
# pipeline
|
|
51
|
+
"SourceLoader", "_SOURCE_LOADERS", "run_load", "load_required_logs",
|
|
52
|
+
"load_logs", "load_zeek_log", "load_syslog", "load_pihole", "load_cloudtrail",
|
|
53
|
+
"_zeek_records_from_lines", "_zeek_parse_from_lines", "_parse_ndjson_file",
|
|
54
|
+
"_parse_lines", "_zeek_strategy_parse", "_zeek_normalize",
|
|
55
|
+
"_syslog_strategy_parse", "_pihole_strategy_parse", "_cloudtrail_strategy_parse",
|
|
56
|
+
"_events_from_whole_document", "_syslog_should_skip", "_pihole_should_skip",
|
|
57
|
+
"_NORMALIZER_MAP", "resolve_load_windows",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_full_surface_resolves():
|
|
62
|
+
missing = [name for name in _SURFACE if not hasattr(loader, name)]
|
|
63
|
+
assert not missing, f"loader package dropped re-exports: {missing}"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_source_loaders_registry_identity():
|
|
67
|
+
# The name imported by string-path must BE the registry the pipeline reads.
|
|
68
|
+
from loghunter.common.loader import _SOURCE_LOADERS as imported
|
|
69
|
+
assert imported is loader._SOURCE_LOADERS
|
|
70
|
+
# Every detector source key has a registered strategy (the completeness rail
|
|
71
|
+
# this extraction must not break).
|
|
72
|
+
for key in ("zeek_dir", "syslog_dir", "pihole_dir", "cloudtrail_dir"):
|
|
73
|
+
assert key in loader._SOURCE_LOADERS
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _write_conn(tmp_path: Path) -> Path:
|
|
77
|
+
d = tmp_path / "zeek"
|
|
78
|
+
d.mkdir()
|
|
79
|
+
(d / "conn.log").write_text(
|
|
80
|
+
'{"ts": 1.0, "id.orig_h": "192.0.2.1", "id.resp_h": "192.0.2.2", '
|
|
81
|
+
'"id.resp_p": 53, "proto": "udp"}\n'
|
|
82
|
+
)
|
|
83
|
+
return d
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_progress_patch_through(tmp_path, monkeypatch):
|
|
87
|
+
"""Patching loader.progress (the package attr) must reach pipeline.run_load —
|
|
88
|
+
proves the facade reads progress at call time, not via an import-time snapshot.
|
|
89
|
+
"""
|
|
90
|
+
hits = {"n": 0}
|
|
91
|
+
real = loader.progress
|
|
92
|
+
|
|
93
|
+
def spy(it, **kwargs):
|
|
94
|
+
hits["n"] += 1
|
|
95
|
+
return real(it, **kwargs)
|
|
96
|
+
|
|
97
|
+
monkeypatch.setattr(loader, "progress", spy)
|
|
98
|
+
df = loader.load_logs(_write_conn(tmp_path), "conn*.log*")
|
|
99
|
+
assert len(df) == 1
|
|
100
|
+
assert hits["n"] >= 1, "progress patch did not reach the load pipeline"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_open_log_patch_through(tmp_path, monkeypatch):
|
|
104
|
+
"""Patching loader._open_log (the package attr) must reach pipeline.run_load."""
|
|
105
|
+
hits = {"n": 0}
|
|
106
|
+
real = loader._open_log
|
|
107
|
+
|
|
108
|
+
def spy(path):
|
|
109
|
+
hits["n"] += 1
|
|
110
|
+
return real(path)
|
|
111
|
+
|
|
112
|
+
monkeypatch.setattr(loader, "_open_log", spy)
|
|
113
|
+
df = loader.load_logs(_write_conn(tmp_path), "conn*.log*")
|
|
114
|
+
assert len(df) == 1
|
|
115
|
+
assert hits["n"] >= 1, "_open_log patch did not reach the load pipeline"
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""B+D: the named window model — one LoadWindow type, one resolve_load_windows
|
|
2
|
+
resolver shared by run() and run_digest(), and the contributor contract (a new
|
|
3
|
+
source declares its temporal policy on ONE registry entry — zero runner edits,
|
|
4
|
+
zero new accessor, zero digest twin).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import replace
|
|
10
|
+
from datetime import datetime, timedelta, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import pytest
|
|
15
|
+
|
|
16
|
+
import loghunter.common.loader as loader
|
|
17
|
+
from loghunter import runner
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# ── resolve_load_windows — short-circuits ────────────────────────────────────
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_resolve_load_windows_short_circuits_on_explicit_window(tmp_path):
|
|
24
|
+
d = tmp_path / "zeek"
|
|
25
|
+
d.mkdir()
|
|
26
|
+
sources = {"conn*.log*": "zeek_dir"}
|
|
27
|
+
dirs = {"zeek_dir": [d]}
|
|
28
|
+
since = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
|
29
|
+
assert loader.resolve_load_windows(
|
|
30
|
+
sources, dirs, "1d", since=since, until=None, load_all=False
|
|
31
|
+
) == []
|
|
32
|
+
assert loader.resolve_load_windows(
|
|
33
|
+
sources, dirs, "1d", since=None, until=None, load_all=True
|
|
34
|
+
) == []
|
|
35
|
+
# empty/"all"/invalid default spec → no windows
|
|
36
|
+
assert loader.resolve_load_windows(
|
|
37
|
+
sources, dirs, "all", since=None, until=None, load_all=False
|
|
38
|
+
) == []
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_resolve_load_windows_skips_bounded_file_input(tmp_path):
|
|
42
|
+
f = tmp_path / "conn.log"
|
|
43
|
+
f.write_text("{}\n", encoding="utf-8")
|
|
44
|
+
assert loader.resolve_load_windows(
|
|
45
|
+
{"conn*.log*": "zeek_dir"}, {"zeek_dir": [f]}, "1d",
|
|
46
|
+
since=None, until=None, load_all=False,
|
|
47
|
+
) == []
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ── per-family resolution shapes (the strategy resolver bodies) ───────────────
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_resolve_load_windows_zeek_dated_precise_no_trim(tmp_path):
|
|
54
|
+
"""Dated Zeek layout → precise (since, until) select_window, trim_span None."""
|
|
55
|
+
zd = tmp_path / "zeek"
|
|
56
|
+
zd.mkdir()
|
|
57
|
+
(zd / "2026-01-05").mkdir()
|
|
58
|
+
windows = loader.resolve_load_windows(
|
|
59
|
+
{"conn*.log*": "zeek_dir"}, {"zeek_dir": [zd]}, "1d",
|
|
60
|
+
since=None, until=None, load_all=False,
|
|
61
|
+
)
|
|
62
|
+
assert len(windows) == 1
|
|
63
|
+
w = windows[0]
|
|
64
|
+
assert w.source == "zeek_dir"
|
|
65
|
+
assert w.select_window == (
|
|
66
|
+
datetime(2026, 1, 5, 0, 0, 0, tzinfo=timezone.utc),
|
|
67
|
+
datetime(2026, 1, 5, 23, 59, 59, tzinfo=timezone.utc),
|
|
68
|
+
)
|
|
69
|
+
assert w.trim_span is None
|
|
70
|
+
assert w.keep_null is False # zeek drops unparseable-ts rows
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_resolve_load_windows_zeek_flat_load_full_trim(tmp_path):
|
|
74
|
+
"""Flat Zeek layout → load full (select_window None) + post-load trim_span."""
|
|
75
|
+
zd = tmp_path / "zeek"
|
|
76
|
+
zd.mkdir()
|
|
77
|
+
(zd / "conn.log").write_text("{}\n", encoding="utf-8") # flat, no dated subdirs
|
|
78
|
+
windows = loader.resolve_load_windows(
|
|
79
|
+
{"conn*.log*": "zeek_dir"}, {"zeek_dir": [zd]}, "1d",
|
|
80
|
+
since=None, until=None, load_all=False,
|
|
81
|
+
)
|
|
82
|
+
assert len(windows) == 1
|
|
83
|
+
w = windows[0]
|
|
84
|
+
assert w.select_window is None
|
|
85
|
+
assert w.trim_span == timedelta(days=1)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_resolve_load_windows_flat_family_conservative_floor(tmp_path):
|
|
89
|
+
"""syslog → conservative (floor, None) select_window + precise trim_span;
|
|
90
|
+
keep_null True (syslog retains unparseable-ts rows through the implicit window)."""
|
|
91
|
+
sd = tmp_path / "syslog"
|
|
92
|
+
sd.mkdir()
|
|
93
|
+
(sd / "messages").write_text(
|
|
94
|
+
"Jun 5 12:00:00 host kernel: line\n", encoding="utf-8"
|
|
95
|
+
)
|
|
96
|
+
span = timedelta(days=1)
|
|
97
|
+
windows = loader.resolve_load_windows(
|
|
98
|
+
{"*.log*": "syslog_dir"}, {"syslog_dir": [sd]}, "1d",
|
|
99
|
+
since=None, until=None, load_all=False,
|
|
100
|
+
)
|
|
101
|
+
assert len(windows) == 1
|
|
102
|
+
w = windows[0]
|
|
103
|
+
assert w.select_window is not None and w.select_window[1] is None
|
|
104
|
+
assert w.select_window[0] == loader._peek_first_ts(sd / "messages") - span
|
|
105
|
+
assert w.trim_span == span
|
|
106
|
+
assert w.keep_null is True
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def test_resolve_load_windows_cloudtrail_opts_out(tmp_path):
|
|
110
|
+
"""CloudTrail is baseline-relative → default_window_eligible False → no window."""
|
|
111
|
+
ct = tmp_path / "ct"
|
|
112
|
+
ct.mkdir()
|
|
113
|
+
(ct / "events.json").write_text("[]\n", encoding="utf-8")
|
|
114
|
+
assert loader.resolve_load_windows(
|
|
115
|
+
{"*.json*": "cloudtrail_dir"}, {"cloudtrail_dir": [ct]}, "1d",
|
|
116
|
+
since=None, until=None, load_all=False,
|
|
117
|
+
) == []
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# ── the contributor contract (Doneness #2) ───────────────────────────────────
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _fake_flat_source(**overrides) -> loader.SourceLoader:
|
|
124
|
+
"""A hypothetical new flat source: ONE registry entry declaring only the
|
|
125
|
+
genuinely-variable bits — NO resolve_window, NO window_select, default
|
|
126
|
+
default_window_eligible=True. The fixture for the zero-runner-edits contract."""
|
|
127
|
+
base = loader.SourceLoader(
|
|
128
|
+
discover=lambda p, pattern, since, until: (
|
|
129
|
+
sorted(p.glob("*.log")) if p.is_dir() else [p]
|
|
130
|
+
),
|
|
131
|
+
mode="stream",
|
|
132
|
+
parse=lambda line_iter, *, path, warnings: iter(()),
|
|
133
|
+
ts_policy="keep",
|
|
134
|
+
columns=["ts", "message"],
|
|
135
|
+
should_skip=None,
|
|
136
|
+
normalize=None,
|
|
137
|
+
)
|
|
138
|
+
return replace(base, **overrides) if overrides else base
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def test_contributor_contract_new_source_inherits_universal_default(
|
|
142
|
+
tmp_path, monkeypatch
|
|
143
|
+
):
|
|
144
|
+
"""A new flat source declaring NO resolve_window inherits the universal default
|
|
145
|
+
window (load full + post-load trim) with zero runner edits, zero new accessor,
|
|
146
|
+
and zero digest twin — resolved by the ONE resolve_load_windows entry point."""
|
|
147
|
+
monkeypatch.setitem(loader._SOURCE_LOADERS, "fake_dir", _fake_flat_source())
|
|
148
|
+
d = tmp_path / "fakesrc"
|
|
149
|
+
d.mkdir()
|
|
150
|
+
windows = loader.resolve_load_windows(
|
|
151
|
+
{"*.log": "fake_dir"}, {"fake_dir": [d]}, "1d",
|
|
152
|
+
since=None, until=None, load_all=False,
|
|
153
|
+
)
|
|
154
|
+
assert len(windows) == 1
|
|
155
|
+
w = windows[0]
|
|
156
|
+
assert w.source == "fake_dir"
|
|
157
|
+
assert w.select_window is None # universal default = load full
|
|
158
|
+
assert w.trim_span == timedelta(days=1)
|
|
159
|
+
assert w.keep_null is True # read straight off ts_policy="keep"
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def test_contributor_contract_new_source_can_opt_out(tmp_path, monkeypatch):
|
|
163
|
+
"""A baseline-relative new source opts out via default_window_eligible=False on
|
|
164
|
+
its entry (the cloudtrail pattern) — still zero runner edits, no source-name
|
|
165
|
+
branch — and mints no LoadWindow."""
|
|
166
|
+
monkeypatch.setitem(
|
|
167
|
+
loader._SOURCE_LOADERS,
|
|
168
|
+
"fake_dir",
|
|
169
|
+
_fake_flat_source(default_window_eligible=False),
|
|
170
|
+
)
|
|
171
|
+
d = tmp_path / "fakesrc"
|
|
172
|
+
d.mkdir()
|
|
173
|
+
assert loader.resolve_load_windows(
|
|
174
|
+
{"*.log": "fake_dir"}, {"fake_dir": [d]}, "1d",
|
|
175
|
+
since=None, until=None, load_all=False,
|
|
176
|
+
) == []
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ── digest preservation: window resolution is Zeek-ONLY (caller-side gate) ─────
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def test_digest_window_resolution_is_zeek_only(tmp_path, monkeypatch, capsys):
|
|
183
|
+
"""run_digest invokes the SHARED resolver for the Zeek source ONLY; non-Zeek
|
|
184
|
+
digest directories (syslog/cloudtrail) never resolve a default window → load
|
|
185
|
+
full, exactly as before the twin was deleted. Pinned via a spy on the one
|
|
186
|
+
resolver, exercised on the dry-run path (window resolution runs pre-load)."""
|
|
187
|
+
calls: list[Any] = []
|
|
188
|
+
real = loader.resolve_load_windows
|
|
189
|
+
|
|
190
|
+
def spy(needed_sources, *a, **k):
|
|
191
|
+
calls.append(needed_sources)
|
|
192
|
+
return real(needed_sources, *a, **k)
|
|
193
|
+
|
|
194
|
+
monkeypatch.setattr(loader, "resolve_load_windows", spy)
|
|
195
|
+
|
|
196
|
+
zd = tmp_path / "zeek"
|
|
197
|
+
zd.mkdir()
|
|
198
|
+
runner.run_digest(
|
|
199
|
+
config={"loghunter": {"zeek_dir": str(zd)}}, schema="conn", dry_run=True
|
|
200
|
+
)
|
|
201
|
+
assert len(calls) == 1, "zeek digest resolves the default window"
|
|
202
|
+
|
|
203
|
+
calls.clear()
|
|
204
|
+
sd = tmp_path / "syslog"
|
|
205
|
+
sd.mkdir()
|
|
206
|
+
runner.run_digest(
|
|
207
|
+
config={"loghunter": {"syslog_dir": str(sd)}}, schema="syslog", dry_run=True
|
|
208
|
+
)
|
|
209
|
+
ct = tmp_path / "ct"
|
|
210
|
+
ct.mkdir()
|
|
211
|
+
runner.run_digest(
|
|
212
|
+
config={"loghunter": {"cloudtrail_dir": str(ct)}},
|
|
213
|
+
schema="cloudtrail", dry_run=True,
|
|
214
|
+
)
|
|
215
|
+
assert calls == [], "non-Zeek digests never resolve a default window (load full)"
|