PyPI - loghunter-cli - Versions diffs - 0.1.0.dev0__py3-none-any.whl - Mend

loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

loghunter/__init__.py +3 -0
loghunter/cli.py +1108 -0
loghunter/cli_init.py +567 -0
loghunter/common/__init__.py +1 -0
loghunter/common/allowlist.py +436 -0
loghunter/common/clustering.py +326 -0
loghunter/common/config.py +221 -0
loghunter/common/display.py +323 -0
loghunter/common/errors.py +45 -0
loghunter/common/finding.py +239 -0
loghunter/common/loader/__init__.py +136 -0
loghunter/common/loader/diagnostics.py +94 -0
loghunter/common/loader/discovery.py +335 -0
loghunter/common/loader/io.py +76 -0
loghunter/common/loader/pipeline.py +1010 -0
loghunter/common/loader/sniff.py +184 -0
loghunter/common/loader/types.py +207 -0
loghunter/common/loader/windowing.py +523 -0
loghunter/common/output.py +93 -0
loghunter/common/paths.py +105 -0
loghunter/common/sources.py +392 -0
loghunter/data/allowlist/connections.txt +50 -0
loghunter/data/allowlist/domains_devices.txt +5 -0
loghunter/data/allowlist/domains_homelab.txt +5 -0
loghunter/data/allowlist/domains_universal.txt +125 -0
loghunter/data/config_example.toml +144 -0
loghunter/detectors/__init__.py +5 -0
loghunter/detectors/auth.py +27 -0
loghunter/detectors/aws.py +671 -0
loghunter/detectors/beacon.py +258 -0
loghunter/detectors/dns.py +778 -0
loghunter/detectors/dnsblock.py +29 -0
loghunter/detectors/duration.py +178 -0
loghunter/detectors/protocol.py +26 -0
loghunter/detectors/scan.py +735 -0
loghunter/detectors/ssl.py +25 -0
loghunter/detectors/syslog.py +266 -0
loghunter/detectors/weird.py +27 -0
loghunter/digest/__init__.py +43 -0
loghunter/digest/_stats.py +182 -0
loghunter/digest/blob.py +698 -0
loghunter/digest/cloudtrail.py +341 -0
loghunter/digest/conn.py +367 -0
loghunter/digest/dns.py +364 -0
loghunter/digest/syslog.py +269 -0
loghunter/exporters/__init__.py +534 -0
loghunter/exporters/cloudtrail.py +499 -0
loghunter/exporters/splunk.py +222 -0
loghunter/outputs/__init__.py +1 -0
loghunter/outputs/allowlist.py +75 -0
loghunter/outputs/csv.py +70 -0
loghunter/outputs/email.py +44 -0
loghunter/outputs/html.py +99 -0
loghunter/outputs/json.py +77 -0
loghunter/outputs/text.py +1422 -0
loghunter/parsers/__init__.py +1 -0
loghunter/parsers/cloudtrail.py +287 -0
loghunter/parsers/dnsmasq.py +331 -0
loghunter/parsers/syslog.py +150 -0
loghunter/parsers/zeek.py +294 -0
loghunter/parsers/zeek_tsv.py +310 -0
loghunter/runner.py +1895 -0
loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
migrations/cloudtrail_parquet.py +59 -0
migrations/conn_fft.py +550 -0
migrations/conn_scan.py +1097 -0
migrations/dns_dbscan.py +520 -0
migrations/get_syslog.py +402 -0
migrations/syslog_drain3.py +479 -0
scratch/junk/parquet.py +59 -0
tests/__init__.py +1 -0
tests/_cloudtrail_fakes.py +116 -0
tests/conftest.py +17 -0
tests/test_allowlist_defaults_accessor.py +90 -0
tests/test_architecture_spine.py +302 -0
tests/test_aws_detector.py +504 -0
tests/test_be_like_water.py +106 -0
tests/test_cli_help.py +342 -0
tests/test_cli_multi_positional.py +458 -0
tests/test_cloudtrail_exporter.py +631 -0
tests/test_cloudtrail_exporter_botocore.py +207 -0
tests/test_cloudtrail_parser.py +393 -0
tests/test_clustering.py +85 -0
tests/test_clustering_interruptible.py +404 -0
tests/test_config_cli.py +1006 -0
tests/test_config_example_drift.py +164 -0
tests/test_digest_blob.py +1237 -0
tests/test_digest_cli.py +1040 -0
tests/test_digest_cloudtrail.py +980 -0
tests/test_digest_conn.py +1189 -0
tests/test_digest_dns.py +770 -0
tests/test_digest_stats.py +282 -0
tests/test_digest_syslog.py +724 -0
tests/test_display.py +370 -0
tests/test_dns_detector.py +1010 -0
tests/test_dnsmasq_parser.py +467 -0
tests/test_duration_detector.py +491 -0
tests/test_export_orchestrator_shape.py +153 -0
tests/test_init_wizard.py +707 -0
tests/test_loader.py +3639 -0
tests/test_loader_package_surface.py +115 -0
tests/test_loader_window_model.py +215 -0
tests/test_output_path_cascade.py +575 -0
tests/test_resolve_path.py +111 -0
tests/test_root_provenance.py +212 -0
tests/test_runner.py +2599 -0
tests/test_scan_detector.py +455 -0
tests/test_search_paths.py +50 -0
tests/test_sniff_orchestrator.py +373 -0
tests/test_sniff_recognizers.py +573 -0
tests/test_source_resolution_seam.py +471 -0
tests/test_sources.py +648 -0
tests/test_splunk_exporter.py +351 -0
tests/test_syslog_detector.py +458 -0
tests/test_syslog_parser.py +582 -0
tests/test_text_output.py +1225 -0
tests/test_zeek_tsv_parser.py +580 -0

tests/test_output_path_cascade.py ADDED Viewed

@@ -0,0 +1,575 @@
+"""End-to-end tests for the output-path cascade across analyze and export.
+Five-tier export cascade (most-specific wins):
+  1. --out (CLI)
+  2. query["export_dir"]            (per-query — finest grain)
+  3. backend["export_dir"]          ([export.cloudtrail].export_dir, [export.splunk].export_dir)
+  4. loghunter["export_dir"]        (global default — ships ~/.loghunter/exports;
+                                     auto-segments per source into <base>/<source>/)
+  5. "."                            (CWD floor)
+Analyze medium: stdout default; --out OR [loghunter].report_dir opts into file.
+"""
+from __future__ import annotations
+import sys
+from datetime import datetime
+from pathlib import Path
+import pytest
+from loghunter import cli
+from loghunter.common import config as cfg
+from loghunter.common.paths import effective_root
+from loghunter.exporters import _resolve_output_path
+# ── Export cascade — splunk-shaped (with queries) ─────────────────────────────
+def test_export_tier1_cli_wins_over_all(tmp_path: Path) -> None:
+    """--out beats per-query, backend, and global."""
+    cli_dir = tmp_path / "cli_dir"
+    query = {"export_dir": str(tmp_path / "query_dir"), "output_basename": "syslog"}
+    backend = {"export_dir": str(tmp_path / "backend_dir")}
+    loghunter = {"export_dir": str(tmp_path / "global_dir")}
+    result = _resolve_output_path(
+        query, f"{cli_dir}/", datetime(2026, 6, 1), datetime(2026, 6, 8),
+        "default", backend_config=backend, loghunter_config=loghunter,
+    )
+    assert result.parent == cli_dir
+    assert result.name == "syslog_20260601_7d.log"
+def test_export_tier2_per_query_wins_over_backend_and_global(tmp_path: Path) -> None:
+    """No CLI; per-query export_dir beats backend export_dir and global export_dir."""
+    query = {"export_dir": str(tmp_path / "query_dir"), "output_basename": "syslog"}
+    backend = {"export_dir": str(tmp_path / "backend_dir")}
+    loghunter = {"export_dir": str(tmp_path / "global_dir")}
+    (tmp_path / "query_dir").mkdir()  # ensure existing dir verdict
+    result = _resolve_output_path(
+        query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
+        "default", backend_config=backend, loghunter_config=loghunter,
+    )
+    assert result.parent == tmp_path / "query_dir"
+def test_export_tier3_backend_wins_over_global(tmp_path: Path) -> None:
+    """No CLI/per-query; backend export_dir beats global export_dir."""
+    query = {"output_basename": "syslog"}   # no output_dir
+    backend = {"export_dir": str(tmp_path / "backend_dir")}
+    loghunter = {"export_dir": str(tmp_path / "global_dir")}
+    (tmp_path / "backend_dir").mkdir()
+    result = _resolve_output_path(
+        query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
+        "default", backend_config=backend, loghunter_config=loghunter,
+    )
+    assert result.parent == tmp_path / "backend_dir"
+def test_export_tier4_global_wins_when_only_loghunter_set(tmp_path: Path) -> None:
+    """No CLI/per-query/backend; global export_dir wins AND auto-segments by
+    source: the global base is <base>/<basename>/, basename "syslog"."""
+    query = {"output_basename": "syslog"}
+    backend = {}
+    loghunter = {"export_dir": str(tmp_path / "global_dir")}
+    (tmp_path / "global_dir").mkdir()
+    result = _resolve_output_path(
+        query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
+        "default", backend_config=backend, loghunter_config=loghunter,
+    )
+    assert result.parent == tmp_path / "global_dir" / "syslog"
+def test_export_tier5_cwd_floor_when_nothing_set(monkeypatch, tmp_path: Path) -> None:
+    """All empty -> CWD floor ('.')."""
+    monkeypatch.chdir(tmp_path)
+    query = {"output_basename": "syslog"}
+    result = _resolve_output_path(
+        query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
+        "default", backend_config={}, loghunter_config={},
+    )
+    # CWD floor: "." -> resolves to current directory, which is tmp_path
+    assert result.parent == Path(".")
+# ── Export cascade — cloudtrail-shaped (no per-query stanza) ─────────────────
+def test_cloudtrail_cascade_backend_wins_over_global(tmp_path: Path) -> None:
+    """CloudTrail's implicit-default query has no output_dir; backend wins."""
+    query = {"output_basename": "cloudtrail"}   # synthetic implicit default
+    backend = {"export_dir": str(tmp_path / "ct_dir")}
+    loghunter = {"export_dir": str(tmp_path / "global_dir")}
+    (tmp_path / "ct_dir").mkdir()
+    result = _resolve_output_path(
+        query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
+        "default", extension=".json.log",
+        backend_config=backend, loghunter_config=loghunter,
+    )
+    assert result.parent == tmp_path / "ct_dir"
+    assert result.name == "cloudtrail_20260601_7d.json.log"
+def test_cloudtrail_cascade_falls_to_global_when_no_backend_dir(tmp_path: Path) -> None:
+    """Global tier wins for cloudtrail's implicit default → auto-segments to
+    <base>/cloudtrail/."""
+    query = {"output_basename": "cloudtrail"}
+    backend = {}   # no export_dir on backend stanza
+    loghunter = {"export_dir": str(tmp_path / "global_dir")}
+    (tmp_path / "global_dir").mkdir()
+    result = _resolve_output_path(
+        query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
+        "default", extension=".json.log",
+        backend_config=backend, loghunter_config=loghunter,
+    )
+    assert result.parent == tmp_path / "global_dir" / "cloudtrail"
+def test_cloudtrail_cascade_falls_to_cwd_when_nothing_set(
+    monkeypatch, tmp_path: Path,
+) -> None:
+    monkeypatch.chdir(tmp_path)
+    query = {"output_basename": "cloudtrail"}
+    result = _resolve_output_path(
+        query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
+        "default", extension=".json.log",
+        backend_config={}, loghunter_config={},
+    )
+    assert result.parent == Path(".")
+def test_stale_per_query_output_dir_does_not_participate(tmp_path: Path) -> None:
+    """A/D negative (scoped to EXPORT config): the per-query tier now reads only
+    ``export_dir``. A stale ``output_dir`` key in a query stanza is inert — the
+    cascade falls through to the backend tier, NOT the stale value. This proves
+    the squash deleted ``output_dir`` as an export-config key.
+    Scoped strictly to the exporter cascade — the unrelated analyze-report
+    ``output_dir`` kwarg (runner/cli) is a different function parameter and is
+    untouched by this change."""
+    query = {"output_dir": str(tmp_path / "stale_dir"), "output_basename": "syslog"}
+    backend = {"export_dir": str(tmp_path / "backend_dir")}
+    loghunter = {"export_dir": str(tmp_path / "global_dir")}
+    (tmp_path / "backend_dir").mkdir()
+    result = _resolve_output_path(
+        query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
+        "default", backend_config=backend, loghunter_config=loghunter,
+    )
+    # Backend tier wins (literal, no segment); stale output_dir is ignored.
+    assert result.parent == tmp_path / "backend_dir"
+    assert "stale_dir" not in str(result)
+def test_explicit_per_query_export_dir_does_not_segment(tmp_path: Path) -> None:
+    """Per-query ``export_dir`` is a LITERAL final dir — it wins over the global
+    base and does NOT auto-segment by source (only tier 4 segments)."""
+    query = {"export_dir": str(tmp_path / "query_dir"), "output_basename": "syslog"}
+    loghunter = {"export_dir": str(tmp_path / "global_dir")}
+    (tmp_path / "query_dir").mkdir()
+    result = _resolve_output_path(
+        query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
+        "default", backend_config={}, loghunter_config=loghunter,
+    )
+    assert result.parent == tmp_path / "query_dir"   # NOT .../query_dir/syslog
+def test_export_default_config_lands_at_shipped_export_dir(monkeypatch, tmp_path: Path) -> None:
+    """Zero-config sanity: cfg.load() with no user file yields the shipped
+    [loghunter].export_dir = ~/.loghunter/exports, which is reached at tier 4.
+    No shipped Splunk query — the user must define one. The cascade still works
+    against an empty query stanza (which is what CloudTrail's implicit default
+    looks like at the orchestrator's call site)."""
+    monkeypatch.setattr(cfg, "SEARCH_PATHS", [tmp_path / "missing.toml"])
+    config = cfg.load(config_file=None)
+    loghunter_cfg = config["loghunter"]
+    backend_cfg = config["export"]["splunk"]   # has no export_dir at backend level
+    query_cfg = {"output_basename": "cloudtrail"}   # synthetic — no query.* shipped
+    # Trailing slash on the shipped default communicates directory intent to be_like_water.
+    # Post live-root flip: export_dir is now the relative "exports/" that joins to
+    # root=~/.loghunter via resolve_path. Caller threads root in explicitly.
+    assert loghunter_cfg["export_dir"] == "exports/"
+    result = _resolve_output_path(
+        query_cfg, None, datetime(2026, 5, 30), datetime(2026, 5, 31),
+        "default", backend_config=backend_cfg, loghunter_config=loghunter_cfg,
+        root=effective_root(config),
+    )
+    # Global tier (4) auto-segments per source: basename "cloudtrail".
+    assert result.parent == Path("~/.loghunter/exports/cloudtrail").expanduser()
+# ── Analyze medium decision ───────────────────────────────────────────────────
+def test_analyze_bare_default_config_yields_stdout_mode(
+    monkeypatch, tmp_path: Path,
+) -> None:
+    """REGRESSION GUARD: bare `loghunter <path>` on default config (no report_dir,
+    no --out) yields output_dir=None and output_file=None — runner floors to stdout.
+    Today's behavior must be preserved exactly."""
+    monkeypatch.setattr(cfg, "SEARCH_PATHS", [tmp_path / "missing.toml"])
+    config = cfg.load(config_file=None)
+    # No report_dir set in defaults.
+    assert "report_dir" not in config["loghunter"] or not config["loghunter"].get("report_dir")
+    kwargs = cli._runner_kwargs({}, config)
+    assert kwargs["output_dir"] is None
+    assert kwargs["output_file"] is None
+def test_analyze_out_dir_with_trailing_slash_resolves_to_dir(tmp_path: Path) -> None:
+    target = tmp_path / "myreports"
+    kwargs = cli._runner_kwargs({"out": f"{target}/"}, config={"loghunter": {}})
+    assert kwargs["output_dir"] == target
+    assert kwargs["output_file"] is None
+def test_analyze_out_file_with_no_trailing_slash_and_not_exists_resolves_to_file(
+    tmp_path: Path,
+) -> None:
+    target = tmp_path / "report.html"
+    kwargs = cli._runner_kwargs({"out": str(target)}, config={"loghunter": {}})
+    assert kwargs["output_file"] == target
+    assert kwargs["output_dir"] is None
+def test_analyze_report_dir_set_no_cli_yields_path(tmp_path: Path) -> None:
+    """[loghunter].report_dir set, no --out: file mode at report_dir target."""
+    target = tmp_path / "reports"
+    target.mkdir()    # existing dir -> Step 2 DIRECTORY verdict
+    kwargs = cli._runner_kwargs(
+        {}, config={"loghunter": {"report_dir": str(target)}},
+    )
+    assert kwargs["output_dir"] == target
+    assert kwargs["output_file"] is None
+def test_analyze_cli_out_overrides_report_dir(tmp_path: Path) -> None:
+    """--out wins over [loghunter].report_dir."""
+    cli_target = tmp_path / "cli_dir"
+    config_target = tmp_path / "config_dir"
+    config_target.mkdir()
+    kwargs = cli._runner_kwargs(
+        {"out": f"{cli_target}/"},
+        config={"loghunter": {"report_dir": str(config_target)}},
+    )
+    assert kwargs["output_dir"] == cli_target
+    assert kwargs["output_file"] is None
+# ── Multi-query guard via resolver verdict ───────────────────────────────────
+def _splunk_config_with_queries(tmp_path: Path, queries: dict) -> dict:
+    return {
+        "loghunter": {"export_dir": str(tmp_path / "global_dir")},
+        "export": {"splunk": {"host": "192.0.2.20", "port": 8089, "query": queries}},
+    }
+def test_multi_query_guard_fires_on_file_verdict(monkeypatch, tmp_path: Path) -> None:
+    """--out=hunt.log (not exists) + 2 queries -> error keying on FILE verdict."""
+    from loghunter.exporters import run_export
+    config = _splunk_config_with_queries(tmp_path, {
+        "a": {"spl": "search a"},
+        "b": {"spl": "search b"},
+    })
+    target = tmp_path / "hunt.log"      # not exists -> step 3 -> FILE
+    with pytest.raises(ValueError, match="explicit file path"):
+        run_export(
+            config=config, backend="splunk", query_names=["a", "b"],
+            since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
+            out=str(target), verbose=False,
+        )
+def test_multi_query_guard_silent_for_directory_verdict(monkeypatch, tmp_path: Path) -> None:
+    """--out=hunt/ (trailing slash) + 2 queries -> no error (DIRECTORY verdict).
+    We monkeypatch backend.fetch to skip the actual Splunk call.
+    """
+    from loghunter.exporters import run_export, splunk as splunk_module
+    config = _splunk_config_with_queries(tmp_path, {
+        "a": {"spl": "search a"},
+        "b": {"spl": "search b"},
+    })
+    monkeypatch.setattr(
+        splunk_module, "fetch",
+        lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
+    )
+    monkeypatch.setattr(splunk_module, "write", lambda rows, outpath, verbose: (0, {"bytes": 0, "paths": [outpath]}))
+    out_dir = tmp_path / "hunt"
+    # Should not raise. Multi-query in a DIRECTORY target is fine — each
+    # auto-names.
+    run_export(
+        config=config, backend="splunk", query_names=["a", "b"],
+        since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
+        out=f"{out_dir}/", verbose=False,
+    )
+def test_multi_query_guard_silent_for_single_query_with_file_target(
+    monkeypatch, tmp_path: Path,
+) -> None:
+    """--out=hunt.log (FILE verdict) + 1 query -> no error (gate doesn't fire)."""
+    from loghunter.exporters import run_export, splunk as splunk_module
+    config = _splunk_config_with_queries(tmp_path, {"a": {"spl": "search a"}})
+    monkeypatch.setattr(
+        splunk_module, "fetch",
+        lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
+    )
+    captured: dict = {}
+    def _capture_write(rows, outpath, verbose):
+        captured["outpath"] = outpath
+        return 0, {"bytes": 0, "paths": [outpath]}
+    monkeypatch.setattr(splunk_module, "write", _capture_write)
+    target = tmp_path / "single.log"
+    run_export(
+        config=config, backend="splunk", query_names=["a"],
+        since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
+        out=str(target), verbose=False,
+    )
+    assert captured["outpath"] == target
+# ── File-target + CloudTrail split ───────────────────────────────────────────
+def test_cloudtrail_explicit_filename_no_split(tmp_path: Path) -> None:
+    """Bare name when output fits under the split threshold."""
+    from loghunter.exporters import cloudtrail as ct
+    events = [{"eventTime": "2026-06-01T01:00:00Z", "eventName": "x"}]
+    outpath = tmp_path / "hunt.json.log"
+    n, _ = ct.write(events, outpath, verbose=False)
+    assert n == 1
+    assert outpath.exists()
+    # No _part* files
+    siblings = sorted(p.name for p in tmp_path.iterdir())
+    assert siblings == ["hunt.json.log"]
+def test_cloudtrail_explicit_filename_splits_into_part_files(
+    tmp_path: Path, monkeypatch,
+) -> None:
+    """File target + forced split appends _partNN to the stem before all suffixes."""
+    from loghunter.exporters import cloudtrail as ct
+    monkeypatch.setattr(ct, "_PART_SPLIT_BYTES", 100)
+    events = [
+        {"eventTime": f"2026-06-01T01:00:{i:02d}Z", "eventName": "x", "i": i}
+        for i in range(20)
+    ]
+    outpath = tmp_path / "hunt.json.log"
+    ct.write(events, outpath, verbose=False)
+    # Bare name should NOT remain — first split renames it to _part01.
+    assert not outpath.exists()
+    parts = sorted(p.name for p in tmp_path.glob("hunt_part*.json.log"))
+    assert len(parts) >= 2
+    assert parts[0] == "hunt_part01.json.log"
+# ── orchestrator write-side liveness ─────────────────────────────────────────
+from tests.test_display import _FakeStream  # noqa: E402  reuse non-tty mock
+def test_orchestrator_seals_write_record_to_stderr(
+    monkeypatch, tmp_path: Path, capsys,
+) -> None:
+    """run_export wraps backend_module.write in a liveness block; the sealed
+    record lands on stderr and the existing export stdout surface is unchanged.
+    """
+    from loghunter.exporters import run_export, splunk as splunk_module
+    config = _splunk_config_with_queries(tmp_path, {"a": {"spl": "search a"}})
+    monkeypatch.setattr(
+        splunk_module, "fetch",
+        lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
+    )
+    # Backend write returns a known count — no real I/O.
+    monkeypatch.setattr(splunk_module, "write", lambda rows, outpath, verbose: (1234, {"bytes": 0, "paths": [outpath]}))
+    fake = _FakeStream(tty=False)
+    monkeypatch.setattr(sys, "stderr", fake)
+    target = tmp_path / "single.log"
+    run_export(
+        config=config, backend="splunk", query_names=["a"],
+        since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
+        out=str(target), verbose=False,
+    )
+    # W4: sealed write record on stderr is terse and uniquely identifies the
+    # query (does not duplicate the stdout result line).
+    assert "a: wrote 1,234 lines" in fake.output
+    # W4 stdout grammar: plain header, lowercase window, per-query running…
+    # line + result line, final summary. No boxed Backend/Query/Written rows.
+    captured = capsys.readouterr()
+    assert "loghunter export · splunk" in captured.out
+    assert "window:" in captured.out
+    assert "running a …" in captured.out
+    assert "wrote 1,234 lines" in captured.out
+    assert "done · 1 query" in captured.out
+    # Old boxed-summary surface is gone.
+    assert "Backend :" not in captured.out
+    assert "Query   :" not in captured.out
+    assert "Written :" not in captured.out
+    assert "loghunter export: running query: a" not in fake.output
+    assert "Written : 1,234 lines" not in fake.output
+def test_export_no_ansi_in_output(monkeypatch, tmp_path: Path, capsys) -> None:
+    """W4: exporter narration carries NO ANSI escape codes — plain text only."""
+    from loghunter.exporters import run_export, splunk as splunk_module
+    config = _splunk_config_with_queries(tmp_path, {"a": {"spl": "search a"}})
+    monkeypatch.setattr(
+        splunk_module, "fetch",
+        lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
+    )
+    monkeypatch.setattr(
+        splunk_module, "write",
+        lambda rows, outpath, verbose: (100, {"bytes": 0, "paths": [outpath]}),
+    )
+    target = tmp_path / "single.log"
+    run_export(
+        config=config, backend="splunk", query_names=["a"],
+        since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
+        out=str(target), verbose=False,
+    )
+    out = capsys.readouterr().out
+    assert "\x1b[" not in out
+def test_export_multi_query_totals_line(monkeypatch, tmp_path: Path, capsys) -> None:
+    """W4: with multiple queries, the final ``done · N queries · …`` line
+    aggregates lines + bytes across them."""
+    from loghunter.exporters import run_export, splunk as splunk_module
+    config = _splunk_config_with_queries(
+        tmp_path, {"a": {"spl": "search a"}, "b": {"spl": "search b"}}
+    )
+    monkeypatch.setattr(
+        splunk_module, "fetch",
+        lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
+    )
+    def _write(rows, outpath, verbose):
+        return 100, {"bytes": 4096, "paths": [outpath]}
+    monkeypatch.setattr(splunk_module, "write", _write)
+    run_export(
+        config=config, backend="splunk", query_names=["a", "b"],
+        since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
+        out=None, verbose=False,
+    )
+    out = capsys.readouterr().out
+    # Aggregated totals: 2 queries · 200 lines · 8 KB-ish.
+    assert "done · 2 queries · 200 lines" in out
+def test_export_cloudtrail_split_renders_plus_K_more(
+    monkeypatch, tmp_path: Path, capsys,
+) -> None:
+    """W4 (CloudTrail split): when write_meta carries multiple paths the
+    result line reads ``→ <first_part> (+K more)`` with K = len(paths) - 1."""
+    from loghunter.exporters import run_export
+    from loghunter.exporters import cloudtrail as ct_module
+    from loghunter.exporters import splunk as splunk_module
+    config = _splunk_config_with_queries(tmp_path, {"only": {"spl": "search x"}})
+    monkeypatch.setattr(
+        splunk_module, "fetch",
+        lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
+    )
+    def _split_write(rows, outpath, verbose):
+        # Simulate a 3-part split: bytes summed across parts; paths is the
+        # ordered list the orchestrator reads.
+        parts = [
+            outpath.with_name(outpath.stem + "_part01.log"),
+            outpath.with_name(outpath.stem + "_part02.log"),
+            outpath.with_name(outpath.stem + "_part03.log"),
+        ]
+        return 7_000_000, {"bytes": 6_000_000_000, "paths": parts}
+    monkeypatch.setattr(splunk_module, "write", _split_write)
+    run_export(
+        config=config, backend="splunk", query_names=["only"],
+        since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
+        out=None, verbose=False,
+    )
+    out = capsys.readouterr().out
+    assert "(+2 more)" in out
+    # Bytes are summed (~5.6 GB).
+    assert "GB" in out
+def test_export_streams_per_query_fetch_then_write(
+    monkeypatch, tmp_path: Path,
+) -> None:
+    """W4 CR fix: each query streams ``fetch → write`` in turn; the first
+    query's ``write`` MUST complete before the second query's ``fetch``
+    begins. This preserves partial-success durability and bounds peak
+    memory to one query's result set."""
+    from loghunter.exporters import run_export, splunk as splunk_module
+    config = _splunk_config_with_queries(
+        tmp_path, {"a": {"spl": "search a"}, "b": {"spl": "search b"}}
+    )
+    call_log: list[str] = []
+    def _fetch(query_config, *a, **kw):
+        # Tag every fetch with the SPL string so we can assert ordering.
+        call_log.append(f"fetch:{query_config['spl']}")
+        return ([], {"units": 0, "unit_label": "chunks"})
+    # `current_query` tracks which query's fetch most recently fired so
+    # `_write` can label itself with the right name even when both queries
+    # land in the same output directory (the shared `global_dir` shape from
+    # this test's fixture).
+    current_query: dict[str, str] = {}
+    def _fetch_tracking(query_config, *a, **kw):
+        for tag in ("a", "b"):
+            if query_config.get("spl", "").endswith(tag):
+                current_query["name"] = tag
+        return _fetch(query_config, *a, **kw)
+    def _write(rows, outpath, verbose):
+        call_log.append(f"write:{current_query.get('name', '?')}")
+        return 0, {"bytes": 0, "paths": [outpath]}
+    monkeypatch.setattr(splunk_module, "fetch", _fetch_tracking)
+    monkeypatch.setattr(splunk_module, "write", _write)
+    run_export(
+        config=config, backend="splunk", query_names=["a", "b"],
+        since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
+        out=None, verbose=False,
+    )
+    # Streaming order: fetch a, write a, fetch b, write b. The first
+    # `write` MUST happen before the second `fetch` so an export remains
+    # streaming and partial-success-durable.
+    assert call_log == [
+        "fetch:search a",
+        "write:a",
+        "fetch:search b",
+        "write:b",
+    ], call_log

tests/test_resolve_path.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""Unit coverage for ``common.paths.resolve_path`` and ``effective_root``.
+The LH_ROOT rail collapses scattered ``os.path.expanduser`` calls at the
+CLI/config seam. ``resolve_path`` is pure: no validation, no URL handling,
+no suffix sniffing — string in, string-or-None out, trailing slash preserved.
+"""
+from __future__ import annotations
+import os
+from pathlib import Path
+import pytest
+from loghunter.common.paths import effective_root, resolve_path
+# ── resolve_path: four-branch coverage ────────────────────────────────────────
+def test_resolve_path_none_returns_none() -> None:
+    assert resolve_path(None, "/some/root") is None
+def test_resolve_path_empty_string_returns_none() -> None:
+    """Glenn's note: empty config value → None. Exporter cascade still floors
+    to '.' afterward, but this helper does not."""
+    assert resolve_path("", "/some/root") is None
+def test_resolve_path_absolute_value_returned_as_is_root_ignored() -> None:
+    assert resolve_path("/var/log/zeek", "/elsewhere") == "/var/log/zeek"
+def test_resolve_path_tilde_anchored_expands_user_root_ignored(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
+) -> None:
+    fake_home = tmp_path / "home"
+    monkeypatch.setenv("HOME", str(fake_home))
+    assert resolve_path("~/x/exports", "/elsewhere") == str(fake_home / "x/exports")
+def test_resolve_path_relative_with_root_joins(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
+) -> None:
+    monkeypatch.setenv("HOME", str(tmp_path / "home"))
+    # Absolute root: literal join.
+    assert resolve_path("exports", "/lh") == os.path.join("/lh", "exports")
+def test_resolve_path_relative_with_tilde_root_expanduser_then_join(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
+) -> None:
+    fake_home = tmp_path / "home"
+    monkeypatch.setenv("HOME", str(fake_home))
+    assert resolve_path("exports", "~/lh") == os.path.join(str(fake_home / "lh"), "exports")
+def test_resolve_path_relative_with_empty_root_returns_as_is() -> None:
+    """root="" is the CLI provenance — no root prepended. Shell semantics."""
+    assert resolve_path("exports", "") == "exports"
+# ── trailing-slash preservation across branches ───────────────────────────────
+def test_resolve_path_preserves_trailing_slash_absolute() -> None:
+    assert resolve_path("/var/log/zeek/", "") == "/var/log/zeek/"
+def test_resolve_path_preserves_trailing_slash_tilde(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
+) -> None:
+    monkeypatch.setenv("HOME", str(tmp_path))
+    # Must end in a "/" so be_like_water downstream sees directory intent.
+    result = resolve_path("~/exports/", "")
+    assert result.endswith("/")
+def test_resolve_path_preserves_trailing_slash_relative_root_join() -> None:
+    result = resolve_path("exports/", "/lh")
+    assert result == os.path.join("/lh", "exports/")
+    assert result.endswith("/")
+# ── effective_root precedence: env > config > "" ──────────────────────────────
+def test_effective_root_env_wins_over_config(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("LOGHUNTER_ROOT", "/from-env")
+    config = {"loghunter": {"root": "/from-config"}}
+    assert effective_root(config) == "/from-env"
+def test_effective_root_falls_back_to_config(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
+    config = {"loghunter": {"root": "/from-config"}}
+    assert effective_root(config) == "/from-config"
+def test_effective_root_empty_when_neither_set(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
+    config = {"loghunter": {}}
+    assert effective_root(config) == ""
+def test_effective_root_empty_when_config_root_empty(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Empty config root reads as 'no root' — env fallback applies."""
+    monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
+    config = {"loghunter": {"root": ""}}
+    assert effective_root(config) == ""