loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,575 @@
1
+ """End-to-end tests for the output-path cascade across analyze and export.
2
+
3
+ Five-tier export cascade (most-specific wins):
4
+ 1. --out (CLI)
5
+ 2. query["export_dir"] (per-query — finest grain)
6
+ 3. backend["export_dir"] ([export.cloudtrail].export_dir, [export.splunk].export_dir)
7
+ 4. loghunter["export_dir"] (global default — ships ~/.loghunter/exports;
8
+ auto-segments per source into <base>/<source>/)
9
+ 5. "." (CWD floor)
10
+
11
+ Analyze medium: stdout default; --out OR [loghunter].report_dir opts into file.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import sys
17
+ from datetime import datetime
18
+ from pathlib import Path
19
+
20
+ import pytest
21
+
22
+ from loghunter import cli
23
+ from loghunter.common import config as cfg
24
+ from loghunter.common.paths import effective_root
25
+ from loghunter.exporters import _resolve_output_path
26
+
27
+
28
+ # ── Export cascade — splunk-shaped (with queries) ─────────────────────────────
29
+
30
+
31
+ def test_export_tier1_cli_wins_over_all(tmp_path: Path) -> None:
32
+ """--out beats per-query, backend, and global."""
33
+ cli_dir = tmp_path / "cli_dir"
34
+ query = {"export_dir": str(tmp_path / "query_dir"), "output_basename": "syslog"}
35
+ backend = {"export_dir": str(tmp_path / "backend_dir")}
36
+ loghunter = {"export_dir": str(tmp_path / "global_dir")}
37
+ result = _resolve_output_path(
38
+ query, f"{cli_dir}/", datetime(2026, 6, 1), datetime(2026, 6, 8),
39
+ "default", backend_config=backend, loghunter_config=loghunter,
40
+ )
41
+ assert result.parent == cli_dir
42
+ assert result.name == "syslog_20260601_7d.log"
43
+
44
+
45
+ def test_export_tier2_per_query_wins_over_backend_and_global(tmp_path: Path) -> None:
46
+ """No CLI; per-query export_dir beats backend export_dir and global export_dir."""
47
+ query = {"export_dir": str(tmp_path / "query_dir"), "output_basename": "syslog"}
48
+ backend = {"export_dir": str(tmp_path / "backend_dir")}
49
+ loghunter = {"export_dir": str(tmp_path / "global_dir")}
50
+ (tmp_path / "query_dir").mkdir() # ensure existing dir verdict
51
+ result = _resolve_output_path(
52
+ query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
53
+ "default", backend_config=backend, loghunter_config=loghunter,
54
+ )
55
+ assert result.parent == tmp_path / "query_dir"
56
+
57
+
58
+ def test_export_tier3_backend_wins_over_global(tmp_path: Path) -> None:
59
+ """No CLI/per-query; backend export_dir beats global export_dir."""
60
+ query = {"output_basename": "syslog"} # no output_dir
61
+ backend = {"export_dir": str(tmp_path / "backend_dir")}
62
+ loghunter = {"export_dir": str(tmp_path / "global_dir")}
63
+ (tmp_path / "backend_dir").mkdir()
64
+ result = _resolve_output_path(
65
+ query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
66
+ "default", backend_config=backend, loghunter_config=loghunter,
67
+ )
68
+ assert result.parent == tmp_path / "backend_dir"
69
+
70
+
71
+ def test_export_tier4_global_wins_when_only_loghunter_set(tmp_path: Path) -> None:
72
+ """No CLI/per-query/backend; global export_dir wins AND auto-segments by
73
+ source: the global base is <base>/<basename>/, basename "syslog"."""
74
+ query = {"output_basename": "syslog"}
75
+ backend = {}
76
+ loghunter = {"export_dir": str(tmp_path / "global_dir")}
77
+ (tmp_path / "global_dir").mkdir()
78
+ result = _resolve_output_path(
79
+ query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
80
+ "default", backend_config=backend, loghunter_config=loghunter,
81
+ )
82
+ assert result.parent == tmp_path / "global_dir" / "syslog"
83
+
84
+
85
+ def test_export_tier5_cwd_floor_when_nothing_set(monkeypatch, tmp_path: Path) -> None:
86
+ """All empty -> CWD floor ('.')."""
87
+ monkeypatch.chdir(tmp_path)
88
+ query = {"output_basename": "syslog"}
89
+ result = _resolve_output_path(
90
+ query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
91
+ "default", backend_config={}, loghunter_config={},
92
+ )
93
+ # CWD floor: "." -> resolves to current directory, which is tmp_path
94
+ assert result.parent == Path(".")
95
+
96
+
97
+ # ── Export cascade — cloudtrail-shaped (no per-query stanza) ─────────────────
98
+
99
+
100
+ def test_cloudtrail_cascade_backend_wins_over_global(tmp_path: Path) -> None:
101
+ """CloudTrail's implicit-default query has no output_dir; backend wins."""
102
+ query = {"output_basename": "cloudtrail"} # synthetic implicit default
103
+ backend = {"export_dir": str(tmp_path / "ct_dir")}
104
+ loghunter = {"export_dir": str(tmp_path / "global_dir")}
105
+ (tmp_path / "ct_dir").mkdir()
106
+ result = _resolve_output_path(
107
+ query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
108
+ "default", extension=".json.log",
109
+ backend_config=backend, loghunter_config=loghunter,
110
+ )
111
+ assert result.parent == tmp_path / "ct_dir"
112
+ assert result.name == "cloudtrail_20260601_7d.json.log"
113
+
114
+
115
+ def test_cloudtrail_cascade_falls_to_global_when_no_backend_dir(tmp_path: Path) -> None:
116
+ """Global tier wins for cloudtrail's implicit default → auto-segments to
117
+ <base>/cloudtrail/."""
118
+ query = {"output_basename": "cloudtrail"}
119
+ backend = {} # no export_dir on backend stanza
120
+ loghunter = {"export_dir": str(tmp_path / "global_dir")}
121
+ (tmp_path / "global_dir").mkdir()
122
+ result = _resolve_output_path(
123
+ query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
124
+ "default", extension=".json.log",
125
+ backend_config=backend, loghunter_config=loghunter,
126
+ )
127
+ assert result.parent == tmp_path / "global_dir" / "cloudtrail"
128
+
129
+
130
+ def test_cloudtrail_cascade_falls_to_cwd_when_nothing_set(
131
+ monkeypatch, tmp_path: Path,
132
+ ) -> None:
133
+ monkeypatch.chdir(tmp_path)
134
+ query = {"output_basename": "cloudtrail"}
135
+ result = _resolve_output_path(
136
+ query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
137
+ "default", extension=".json.log",
138
+ backend_config={}, loghunter_config={},
139
+ )
140
+ assert result.parent == Path(".")
141
+
142
+
143
+ def test_stale_per_query_output_dir_does_not_participate(tmp_path: Path) -> None:
144
+ """A/D negative (scoped to EXPORT config): the per-query tier now reads only
145
+ ``export_dir``. A stale ``output_dir`` key in a query stanza is inert — the
146
+ cascade falls through to the backend tier, NOT the stale value. This proves
147
+ the squash deleted ``output_dir`` as an export-config key.
148
+
149
+ Scoped strictly to the exporter cascade — the unrelated analyze-report
150
+ ``output_dir`` kwarg (runner/cli) is a different function parameter and is
151
+ untouched by this change."""
152
+ query = {"output_dir": str(tmp_path / "stale_dir"), "output_basename": "syslog"}
153
+ backend = {"export_dir": str(tmp_path / "backend_dir")}
154
+ loghunter = {"export_dir": str(tmp_path / "global_dir")}
155
+ (tmp_path / "backend_dir").mkdir()
156
+ result = _resolve_output_path(
157
+ query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
158
+ "default", backend_config=backend, loghunter_config=loghunter,
159
+ )
160
+ # Backend tier wins (literal, no segment); stale output_dir is ignored.
161
+ assert result.parent == tmp_path / "backend_dir"
162
+ assert "stale_dir" not in str(result)
163
+
164
+
165
+ def test_explicit_per_query_export_dir_does_not_segment(tmp_path: Path) -> None:
166
+ """Per-query ``export_dir`` is a LITERAL final dir — it wins over the global
167
+ base and does NOT auto-segment by source (only tier 4 segments)."""
168
+ query = {"export_dir": str(tmp_path / "query_dir"), "output_basename": "syslog"}
169
+ loghunter = {"export_dir": str(tmp_path / "global_dir")}
170
+ (tmp_path / "query_dir").mkdir()
171
+ result = _resolve_output_path(
172
+ query, None, datetime(2026, 6, 1), datetime(2026, 6, 8),
173
+ "default", backend_config={}, loghunter_config=loghunter,
174
+ )
175
+ assert result.parent == tmp_path / "query_dir" # NOT .../query_dir/syslog
176
+
177
+
178
+ def test_export_default_config_lands_at_shipped_export_dir(monkeypatch, tmp_path: Path) -> None:
179
+ """Zero-config sanity: cfg.load() with no user file yields the shipped
180
+ [loghunter].export_dir = ~/.loghunter/exports, which is reached at tier 4.
181
+
182
+ No shipped Splunk query — the user must define one. The cascade still works
183
+ against an empty query stanza (which is what CloudTrail's implicit default
184
+ looks like at the orchestrator's call site)."""
185
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [tmp_path / "missing.toml"])
186
+ config = cfg.load(config_file=None)
187
+ loghunter_cfg = config["loghunter"]
188
+ backend_cfg = config["export"]["splunk"] # has no export_dir at backend level
189
+ query_cfg = {"output_basename": "cloudtrail"} # synthetic — no query.* shipped
190
+ # Trailing slash on the shipped default communicates directory intent to be_like_water.
191
+ # Post live-root flip: export_dir is now the relative "exports/" that joins to
192
+ # root=~/.loghunter via resolve_path. Caller threads root in explicitly.
193
+ assert loghunter_cfg["export_dir"] == "exports/"
194
+ result = _resolve_output_path(
195
+ query_cfg, None, datetime(2026, 5, 30), datetime(2026, 5, 31),
196
+ "default", backend_config=backend_cfg, loghunter_config=loghunter_cfg,
197
+ root=effective_root(config),
198
+ )
199
+ # Global tier (4) auto-segments per source: basename "cloudtrail".
200
+ assert result.parent == Path("~/.loghunter/exports/cloudtrail").expanduser()
201
+
202
+
203
+ # ── Analyze medium decision ───────────────────────────────────────────────────
204
+
205
+
206
+ def test_analyze_bare_default_config_yields_stdout_mode(
207
+ monkeypatch, tmp_path: Path,
208
+ ) -> None:
209
+ """REGRESSION GUARD: bare `loghunter <path>` on default config (no report_dir,
210
+ no --out) yields output_dir=None and output_file=None — runner floors to stdout.
211
+ Today's behavior must be preserved exactly."""
212
+ monkeypatch.setattr(cfg, "SEARCH_PATHS", [tmp_path / "missing.toml"])
213
+ config = cfg.load(config_file=None)
214
+ # No report_dir set in defaults.
215
+ assert "report_dir" not in config["loghunter"] or not config["loghunter"].get("report_dir")
216
+ kwargs = cli._runner_kwargs({}, config)
217
+ assert kwargs["output_dir"] is None
218
+ assert kwargs["output_file"] is None
219
+
220
+
221
+ def test_analyze_out_dir_with_trailing_slash_resolves_to_dir(tmp_path: Path) -> None:
222
+ target = tmp_path / "myreports"
223
+ kwargs = cli._runner_kwargs({"out": f"{target}/"}, config={"loghunter": {}})
224
+ assert kwargs["output_dir"] == target
225
+ assert kwargs["output_file"] is None
226
+
227
+
228
+ def test_analyze_out_file_with_no_trailing_slash_and_not_exists_resolves_to_file(
229
+ tmp_path: Path,
230
+ ) -> None:
231
+ target = tmp_path / "report.html"
232
+ kwargs = cli._runner_kwargs({"out": str(target)}, config={"loghunter": {}})
233
+ assert kwargs["output_file"] == target
234
+ assert kwargs["output_dir"] is None
235
+
236
+
237
+ def test_analyze_report_dir_set_no_cli_yields_path(tmp_path: Path) -> None:
238
+ """[loghunter].report_dir set, no --out: file mode at report_dir target."""
239
+ target = tmp_path / "reports"
240
+ target.mkdir() # existing dir -> Step 2 DIRECTORY verdict
241
+ kwargs = cli._runner_kwargs(
242
+ {}, config={"loghunter": {"report_dir": str(target)}},
243
+ )
244
+ assert kwargs["output_dir"] == target
245
+ assert kwargs["output_file"] is None
246
+
247
+
248
+ def test_analyze_cli_out_overrides_report_dir(tmp_path: Path) -> None:
249
+ """--out wins over [loghunter].report_dir."""
250
+ cli_target = tmp_path / "cli_dir"
251
+ config_target = tmp_path / "config_dir"
252
+ config_target.mkdir()
253
+ kwargs = cli._runner_kwargs(
254
+ {"out": f"{cli_target}/"},
255
+ config={"loghunter": {"report_dir": str(config_target)}},
256
+ )
257
+ assert kwargs["output_dir"] == cli_target
258
+ assert kwargs["output_file"] is None
259
+
260
+
261
+ # ── Multi-query guard via resolver verdict ───────────────────────────────────
262
+
263
+
264
+ def _splunk_config_with_queries(tmp_path: Path, queries: dict) -> dict:
265
+ return {
266
+ "loghunter": {"export_dir": str(tmp_path / "global_dir")},
267
+ "export": {"splunk": {"host": "192.0.2.20", "port": 8089, "query": queries}},
268
+ }
269
+
270
+
271
+ def test_multi_query_guard_fires_on_file_verdict(monkeypatch, tmp_path: Path) -> None:
272
+ """--out=hunt.log (not exists) + 2 queries -> error keying on FILE verdict."""
273
+ from loghunter.exporters import run_export
274
+
275
+ config = _splunk_config_with_queries(tmp_path, {
276
+ "a": {"spl": "search a"},
277
+ "b": {"spl": "search b"},
278
+ })
279
+ target = tmp_path / "hunt.log" # not exists -> step 3 -> FILE
280
+ with pytest.raises(ValueError, match="explicit file path"):
281
+ run_export(
282
+ config=config, backend="splunk", query_names=["a", "b"],
283
+ since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
284
+ out=str(target), verbose=False,
285
+ )
286
+
287
+
288
+ def test_multi_query_guard_silent_for_directory_verdict(monkeypatch, tmp_path: Path) -> None:
289
+ """--out=hunt/ (trailing slash) + 2 queries -> no error (DIRECTORY verdict).
290
+
291
+ We monkeypatch backend.fetch to skip the actual Splunk call.
292
+ """
293
+ from loghunter.exporters import run_export, splunk as splunk_module
294
+
295
+ config = _splunk_config_with_queries(tmp_path, {
296
+ "a": {"spl": "search a"},
297
+ "b": {"spl": "search b"},
298
+ })
299
+ monkeypatch.setattr(
300
+ splunk_module, "fetch",
301
+ lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
302
+ )
303
+ monkeypatch.setattr(splunk_module, "write", lambda rows, outpath, verbose: (0, {"bytes": 0, "paths": [outpath]}))
304
+
305
+ out_dir = tmp_path / "hunt"
306
+ # Should not raise. Multi-query in a DIRECTORY target is fine — each
307
+ # auto-names.
308
+ run_export(
309
+ config=config, backend="splunk", query_names=["a", "b"],
310
+ since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
311
+ out=f"{out_dir}/", verbose=False,
312
+ )
313
+
314
+
315
+ def test_multi_query_guard_silent_for_single_query_with_file_target(
316
+ monkeypatch, tmp_path: Path,
317
+ ) -> None:
318
+ """--out=hunt.log (FILE verdict) + 1 query -> no error (gate doesn't fire)."""
319
+ from loghunter.exporters import run_export, splunk as splunk_module
320
+
321
+ config = _splunk_config_with_queries(tmp_path, {"a": {"spl": "search a"}})
322
+ monkeypatch.setattr(
323
+ splunk_module, "fetch",
324
+ lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
325
+ )
326
+ captured: dict = {}
327
+
328
+ def _capture_write(rows, outpath, verbose):
329
+ captured["outpath"] = outpath
330
+ return 0, {"bytes": 0, "paths": [outpath]}
331
+
332
+ monkeypatch.setattr(splunk_module, "write", _capture_write)
333
+
334
+ target = tmp_path / "single.log"
335
+ run_export(
336
+ config=config, backend="splunk", query_names=["a"],
337
+ since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
338
+ out=str(target), verbose=False,
339
+ )
340
+ assert captured["outpath"] == target
341
+
342
+
343
+ # ── File-target + CloudTrail split ───────────────────────────────────────────
344
+
345
+
346
+ def test_cloudtrail_explicit_filename_no_split(tmp_path: Path) -> None:
347
+ """Bare name when output fits under the split threshold."""
348
+ from loghunter.exporters import cloudtrail as ct
349
+
350
+ events = [{"eventTime": "2026-06-01T01:00:00Z", "eventName": "x"}]
351
+ outpath = tmp_path / "hunt.json.log"
352
+ n, _ = ct.write(events, outpath, verbose=False)
353
+ assert n == 1
354
+ assert outpath.exists()
355
+ # No _part* files
356
+ siblings = sorted(p.name for p in tmp_path.iterdir())
357
+ assert siblings == ["hunt.json.log"]
358
+
359
+
360
+ def test_cloudtrail_explicit_filename_splits_into_part_files(
361
+ tmp_path: Path, monkeypatch,
362
+ ) -> None:
363
+ """File target + forced split appends _partNN to the stem before all suffixes."""
364
+ from loghunter.exporters import cloudtrail as ct
365
+
366
+ monkeypatch.setattr(ct, "_PART_SPLIT_BYTES", 100)
367
+ events = [
368
+ {"eventTime": f"2026-06-01T01:00:{i:02d}Z", "eventName": "x", "i": i}
369
+ for i in range(20)
370
+ ]
371
+ outpath = tmp_path / "hunt.json.log"
372
+ ct.write(events, outpath, verbose=False)
373
+ # Bare name should NOT remain — first split renames it to _part01.
374
+ assert not outpath.exists()
375
+ parts = sorted(p.name for p in tmp_path.glob("hunt_part*.json.log"))
376
+ assert len(parts) >= 2
377
+ assert parts[0] == "hunt_part01.json.log"
378
+
379
+
380
+ # ── orchestrator write-side liveness ─────────────────────────────────────────
381
+
382
+
383
+ from tests.test_display import _FakeStream # noqa: E402 reuse non-tty mock
384
+
385
+
386
+ def test_orchestrator_seals_write_record_to_stderr(
387
+ monkeypatch, tmp_path: Path, capsys,
388
+ ) -> None:
389
+ """run_export wraps backend_module.write in a liveness block; the sealed
390
+ record lands on stderr and the existing export stdout surface is unchanged.
391
+ """
392
+ from loghunter.exporters import run_export, splunk as splunk_module
393
+
394
+ config = _splunk_config_with_queries(tmp_path, {"a": {"spl": "search a"}})
395
+ monkeypatch.setattr(
396
+ splunk_module, "fetch",
397
+ lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
398
+ )
399
+ # Backend write returns a known count — no real I/O.
400
+ monkeypatch.setattr(splunk_module, "write", lambda rows, outpath, verbose: (1234, {"bytes": 0, "paths": [outpath]}))
401
+
402
+ fake = _FakeStream(tty=False)
403
+ monkeypatch.setattr(sys, "stderr", fake)
404
+
405
+ target = tmp_path / "single.log"
406
+ run_export(
407
+ config=config, backend="splunk", query_names=["a"],
408
+ since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
409
+ out=str(target), verbose=False,
410
+ )
411
+
412
+ # W4: sealed write record on stderr is terse and uniquely identifies the
413
+ # query (does not duplicate the stdout result line).
414
+ assert "a: wrote 1,234 lines" in fake.output
415
+
416
+ # W4 stdout grammar: plain header, lowercase window, per-query running…
417
+ # line + result line, final summary. No boxed Backend/Query/Written rows.
418
+ captured = capsys.readouterr()
419
+ assert "loghunter export · splunk" in captured.out
420
+ assert "window:" in captured.out
421
+ assert "running a …" in captured.out
422
+ assert "wrote 1,234 lines" in captured.out
423
+ assert "done · 1 query" in captured.out
424
+ # Old boxed-summary surface is gone.
425
+ assert "Backend :" not in captured.out
426
+ assert "Query :" not in captured.out
427
+ assert "Written :" not in captured.out
428
+ assert "loghunter export: running query: a" not in fake.output
429
+ assert "Written : 1,234 lines" not in fake.output
430
+
431
+
432
+ def test_export_no_ansi_in_output(monkeypatch, tmp_path: Path, capsys) -> None:
433
+ """W4: exporter narration carries NO ANSI escape codes — plain text only."""
434
+ from loghunter.exporters import run_export, splunk as splunk_module
435
+
436
+ config = _splunk_config_with_queries(tmp_path, {"a": {"spl": "search a"}})
437
+ monkeypatch.setattr(
438
+ splunk_module, "fetch",
439
+ lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
440
+ )
441
+ monkeypatch.setattr(
442
+ splunk_module, "write",
443
+ lambda rows, outpath, verbose: (100, {"bytes": 0, "paths": [outpath]}),
444
+ )
445
+
446
+ target = tmp_path / "single.log"
447
+ run_export(
448
+ config=config, backend="splunk", query_names=["a"],
449
+ since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
450
+ out=str(target), verbose=False,
451
+ )
452
+ out = capsys.readouterr().out
453
+ assert "\x1b[" not in out
454
+
455
+
456
+ def test_export_multi_query_totals_line(monkeypatch, tmp_path: Path, capsys) -> None:
457
+ """W4: with multiple queries, the final ``done · N queries · …`` line
458
+ aggregates lines + bytes across them."""
459
+ from loghunter.exporters import run_export, splunk as splunk_module
460
+
461
+ config = _splunk_config_with_queries(
462
+ tmp_path, {"a": {"spl": "search a"}, "b": {"spl": "search b"}}
463
+ )
464
+ monkeypatch.setattr(
465
+ splunk_module, "fetch",
466
+ lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
467
+ )
468
+
469
+ def _write(rows, outpath, verbose):
470
+ return 100, {"bytes": 4096, "paths": [outpath]}
471
+
472
+ monkeypatch.setattr(splunk_module, "write", _write)
473
+
474
+ run_export(
475
+ config=config, backend="splunk", query_names=["a", "b"],
476
+ since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
477
+ out=None, verbose=False,
478
+ )
479
+ out = capsys.readouterr().out
480
+ # Aggregated totals: 2 queries · 200 lines · 8 KB-ish.
481
+ assert "done · 2 queries · 200 lines" in out
482
+
483
+
484
+ def test_export_cloudtrail_split_renders_plus_K_more(
485
+ monkeypatch, tmp_path: Path, capsys,
486
+ ) -> None:
487
+ """W4 (CloudTrail split): when write_meta carries multiple paths the
488
+ result line reads ``→ <first_part> (+K more)`` with K = len(paths) - 1."""
489
+ from loghunter.exporters import run_export
490
+ from loghunter.exporters import cloudtrail as ct_module
491
+ from loghunter.exporters import splunk as splunk_module
492
+
493
+ config = _splunk_config_with_queries(tmp_path, {"only": {"spl": "search x"}})
494
+ monkeypatch.setattr(
495
+ splunk_module, "fetch",
496
+ lambda *a, **kw: ([], {"units": 0, "unit_label": "chunks"}),
497
+ )
498
+
499
+ def _split_write(rows, outpath, verbose):
500
+ # Simulate a 3-part split: bytes summed across parts; paths is the
501
+ # ordered list the orchestrator reads.
502
+ parts = [
503
+ outpath.with_name(outpath.stem + "_part01.log"),
504
+ outpath.with_name(outpath.stem + "_part02.log"),
505
+ outpath.with_name(outpath.stem + "_part03.log"),
506
+ ]
507
+ return 7_000_000, {"bytes": 6_000_000_000, "paths": parts}
508
+
509
+ monkeypatch.setattr(splunk_module, "write", _split_write)
510
+
511
+ run_export(
512
+ config=config, backend="splunk", query_names=["only"],
513
+ since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
514
+ out=None, verbose=False,
515
+ )
516
+ out = capsys.readouterr().out
517
+ assert "(+2 more)" in out
518
+ # Bytes are summed (~5.6 GB).
519
+ assert "GB" in out
520
+
521
+
522
+ def test_export_streams_per_query_fetch_then_write(
523
+ monkeypatch, tmp_path: Path,
524
+ ) -> None:
525
+ """W4 CR fix: each query streams ``fetch → write`` in turn; the first
526
+ query's ``write`` MUST complete before the second query's ``fetch``
527
+ begins. This preserves partial-success durability and bounds peak
528
+ memory to one query's result set."""
529
+ from loghunter.exporters import run_export, splunk as splunk_module
530
+
531
+ config = _splunk_config_with_queries(
532
+ tmp_path, {"a": {"spl": "search a"}, "b": {"spl": "search b"}}
533
+ )
534
+
535
+ call_log: list[str] = []
536
+
537
+ def _fetch(query_config, *a, **kw):
538
+ # Tag every fetch with the SPL string so we can assert ordering.
539
+ call_log.append(f"fetch:{query_config['spl']}")
540
+ return ([], {"units": 0, "unit_label": "chunks"})
541
+
542
+ # `current_query` tracks which query's fetch most recently fired so
543
+ # `_write` can label itself with the right name even when both queries
544
+ # land in the same output directory (the shared `global_dir` shape from
545
+ # this test's fixture).
546
+ current_query: dict[str, str] = {}
547
+
548
+ def _fetch_tracking(query_config, *a, **kw):
549
+ for tag in ("a", "b"):
550
+ if query_config.get("spl", "").endswith(tag):
551
+ current_query["name"] = tag
552
+ return _fetch(query_config, *a, **kw)
553
+
554
+ def _write(rows, outpath, verbose):
555
+ call_log.append(f"write:{current_query.get('name', '?')}")
556
+ return 0, {"bytes": 0, "paths": [outpath]}
557
+
558
+ monkeypatch.setattr(splunk_module, "fetch", _fetch_tracking)
559
+ monkeypatch.setattr(splunk_module, "write", _write)
560
+
561
+ run_export(
562
+ config=config, backend="splunk", query_names=["a", "b"],
563
+ since=datetime(2026, 6, 1), until=datetime(2026, 6, 8),
564
+ out=None, verbose=False,
565
+ )
566
+
567
+ # Streaming order: fetch a, write a, fetch b, write b. The first
568
+ # `write` MUST happen before the second `fetch` so an export remains
569
+ # streaming and partial-success-durable.
570
+ assert call_log == [
571
+ "fetch:search a",
572
+ "write:a",
573
+ "fetch:search b",
574
+ "write:b",
575
+ ], call_log
@@ -0,0 +1,111 @@
1
+ """Unit coverage for ``common.paths.resolve_path`` and ``effective_root``.
2
+
3
+ The LH_ROOT rail collapses scattered ``os.path.expanduser`` calls at the
4
+ CLI/config seam. ``resolve_path`` is pure: no validation, no URL handling,
5
+ no suffix sniffing — string in, string-or-None out, trailing slash preserved.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ from pathlib import Path
12
+
13
+ import pytest
14
+
15
+ from loghunter.common.paths import effective_root, resolve_path
16
+
17
+
18
+ # ── resolve_path: four-branch coverage ────────────────────────────────────────
19
+
20
+
21
+ def test_resolve_path_none_returns_none() -> None:
22
+ assert resolve_path(None, "/some/root") is None
23
+
24
+
25
+ def test_resolve_path_empty_string_returns_none() -> None:
26
+ """Glenn's note: empty config value → None. Exporter cascade still floors
27
+ to '.' afterward, but this helper does not."""
28
+ assert resolve_path("", "/some/root") is None
29
+
30
+
31
+ def test_resolve_path_absolute_value_returned_as_is_root_ignored() -> None:
32
+ assert resolve_path("/var/log/zeek", "/elsewhere") == "/var/log/zeek"
33
+
34
+
35
+ def test_resolve_path_tilde_anchored_expands_user_root_ignored(
36
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
37
+ ) -> None:
38
+ fake_home = tmp_path / "home"
39
+ monkeypatch.setenv("HOME", str(fake_home))
40
+ assert resolve_path("~/x/exports", "/elsewhere") == str(fake_home / "x/exports")
41
+
42
+
43
+ def test_resolve_path_relative_with_root_joins(
44
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
45
+ ) -> None:
46
+ monkeypatch.setenv("HOME", str(tmp_path / "home"))
47
+ # Absolute root: literal join.
48
+ assert resolve_path("exports", "/lh") == os.path.join("/lh", "exports")
49
+
50
+
51
+ def test_resolve_path_relative_with_tilde_root_expanduser_then_join(
52
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
53
+ ) -> None:
54
+ fake_home = tmp_path / "home"
55
+ monkeypatch.setenv("HOME", str(fake_home))
56
+ assert resolve_path("exports", "~/lh") == os.path.join(str(fake_home / "lh"), "exports")
57
+
58
+
59
+ def test_resolve_path_relative_with_empty_root_returns_as_is() -> None:
60
+ """root="" is the CLI provenance — no root prepended. Shell semantics."""
61
+ assert resolve_path("exports", "") == "exports"
62
+
63
+
64
+ # ── trailing-slash preservation across branches ───────────────────────────────
65
+
66
+
67
+ def test_resolve_path_preserves_trailing_slash_absolute() -> None:
68
+ assert resolve_path("/var/log/zeek/", "") == "/var/log/zeek/"
69
+
70
+
71
+ def test_resolve_path_preserves_trailing_slash_tilde(
72
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
73
+ ) -> None:
74
+ monkeypatch.setenv("HOME", str(tmp_path))
75
+ # Must end in a "/" so be_like_water downstream sees directory intent.
76
+ result = resolve_path("~/exports/", "")
77
+ assert result.endswith("/")
78
+
79
+
80
+ def test_resolve_path_preserves_trailing_slash_relative_root_join() -> None:
81
+ result = resolve_path("exports/", "/lh")
82
+ assert result == os.path.join("/lh", "exports/")
83
+ assert result.endswith("/")
84
+
85
+
86
+ # ── effective_root precedence: env > config > "" ──────────────────────────────
87
+
88
+
89
+ def test_effective_root_env_wins_over_config(monkeypatch: pytest.MonkeyPatch) -> None:
90
+ monkeypatch.setenv("LOGHUNTER_ROOT", "/from-env")
91
+ config = {"loghunter": {"root": "/from-config"}}
92
+ assert effective_root(config) == "/from-env"
93
+
94
+
95
+ def test_effective_root_falls_back_to_config(monkeypatch: pytest.MonkeyPatch) -> None:
96
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
97
+ config = {"loghunter": {"root": "/from-config"}}
98
+ assert effective_root(config) == "/from-config"
99
+
100
+
101
+ def test_effective_root_empty_when_neither_set(monkeypatch: pytest.MonkeyPatch) -> None:
102
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
103
+ config = {"loghunter": {}}
104
+ assert effective_root(config) == ""
105
+
106
+
107
+ def test_effective_root_empty_when_config_root_empty(monkeypatch: pytest.MonkeyPatch) -> None:
108
+ """Empty config root reads as 'no root' — env fallback applies."""
109
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
110
+ config = {"loghunter": {"root": ""}}
111
+ assert effective_root(config) == ""