loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,212 @@
1
+ """CLI-vs-config provenance for the LH_ROOT path rail.
2
+
3
+ CLI-supplied paths get root="" (just ~-expansion, shell semantics). Config
4
+ paths flow through ``resolve_path(value, root)`` so LH_ROOT applies. The
5
+ provenance split must hold at every resolve site: ``_runner_kwargs``,
6
+ ``_digest_runner_kwargs``, ``_build_digest_fanout_stream``, and the exporter
7
+ output cascade.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from datetime import datetime
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import pytest
17
+
18
+ from loghunter import cli
19
+ from loghunter.common import config as cfg
20
+ from loghunter.exporters import _resolve_output_path
21
+
22
+
23
+ # ── analyze: source-dir LH_ROOT rail ────────────────────────────────────────
24
+ #
25
+ # The four CLI-layer `_runner_kwargs` source-dir tests were DELETED. After
26
+ # the single-ownership refactor, `_runner_kwargs` does NOT resolve source
27
+ # dirs — it passes raw strings (or None) to `runner.run`, which calls
28
+ # `common.sources.resolve_sources`. The provenance rail moved with it:
29
+ #
30
+ # relative-config + LH_ROOT → tests/test_sources.py:
31
+ # test_resolve_sources_config_relative_uses_lh_root
32
+ # AND test_runner_run_applies_root_to_config_source_dirs (this file, below)
33
+ # absolute-config ignores root → covered by resolve_path's own absolute branch
34
+ # (exercised indirectly by every resolver test)
35
+ # CLI override never gets root → tests/test_sources.py:
36
+ # test_resolve_sources_relative_override_ignores_lh_root
37
+ # env LH_ROOT wins over config → tests/test_sources.py:
38
+ # test_resolve_sources_env_lh_root_wins_over_config
39
+
40
+
41
+ # ── analyze: --out / report_dir ──────────────────────────────────────────────
42
+
43
+
44
+ def test_runner_kwargs_relative_report_dir_resolves_against_root(
45
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
46
+ ) -> None:
47
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
48
+ root = tmp_path / "lh"
49
+ (root / "reports").mkdir(parents=True)
50
+ config = {"loghunter": {"root": str(root), "report_dir": "reports"}}
51
+ kwargs = cli._runner_kwargs({}, config)
52
+ assert kwargs["output_dir"] == root / "reports"
53
+
54
+
55
+ def test_runner_kwargs_cli_out_relative_ignores_root(
56
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
57
+ ) -> None:
58
+ """--out=relative-dir/ resolves against CWD, NOT LH_ROOT."""
59
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
60
+ config = {"loghunter": {"root": str(tmp_path / "should-not-apply")}}
61
+ kwargs = cli._runner_kwargs({"out": "rel-cli/"}, config)
62
+ assert kwargs["output_dir"] == Path("rel-cli")
63
+
64
+
65
+ # ── digest: _build_digest_fanout_stream report_dir vs --out ──────────────────
66
+
67
+
68
+ def test_digest_fanout_relative_report_dir_resolves_against_root(
69
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
70
+ ) -> None:
71
+ """Glenn's amendment: the fan-out stream MUST honor LH_ROOT for
72
+ config-supplied report_dir, and ~-only-expansion for CLI --out."""
73
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
74
+ root = tmp_path / "lh"
75
+ (root / "reports").mkdir(parents=True)
76
+ config = {"loghunter": {"root": str(root), "report_dir": "reports"}}
77
+ get_stream, close_stream = cli._build_digest_fanout_stream({}, config)
78
+ fh = get_stream()
79
+ try:
80
+ # Stream is open; the file it opened lives under root/reports.
81
+ assert root / "reports" in Path(fh.name).parents
82
+ finally:
83
+ close_stream()
84
+
85
+
86
+ def test_digest_fanout_cli_out_relative_ignores_root(
87
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
88
+ ) -> None:
89
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
90
+ config = {"loghunter": {"root": str(tmp_path / "should-not-apply")}}
91
+ parsed: dict[str, Any] = {"out": str(tmp_path / "rel.txt")}
92
+ get_stream, close_stream = cli._build_digest_fanout_stream(parsed, config)
93
+ try:
94
+ fh = get_stream()
95
+ assert Path(fh.name) == tmp_path / "rel.txt"
96
+ finally:
97
+ close_stream()
98
+
99
+
100
+ # ── exporter cascade: root applies to config tiers, "" to CLI ────────────────
101
+
102
+
103
+ def test_export_cascade_root_applies_to_loghunter_export_dir(
104
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
105
+ ) -> None:
106
+ """Config-supplied [loghunter].export_dir is relative + root set → joined,
107
+ then auto-segmented per source (global tier). The ``syslog`` subdir is NOT
108
+ pre-created — the trailing-slash directory verdict resolves it regardless."""
109
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
110
+ root = tmp_path / "lh"
111
+ (root / "exports").mkdir(parents=True)
112
+ result = _resolve_output_path(
113
+ {"output_basename": "syslog"}, None,
114
+ datetime(2026, 6, 1), datetime(2026, 6, 2), "default",
115
+ backend_config={}, loghunter_config={"export_dir": "exports"},
116
+ root=str(root),
117
+ )
118
+ assert result.parent == root / "exports" / "syslog"
119
+
120
+
121
+ def test_export_cascade_cli_out_ignores_root(tmp_path: Path) -> None:
122
+ """CLI --out resolves against CWD even when root is set."""
123
+ cli_dir = tmp_path / "cli_out"
124
+ result = _resolve_output_path(
125
+ {"output_basename": "syslog"}, f"{cli_dir}/",
126
+ datetime(2026, 6, 1), datetime(2026, 6, 2), "default",
127
+ backend_config={"export_dir": "should-not-apply"},
128
+ loghunter_config={"export_dir": "ignored-too"},
129
+ root="/lh",
130
+ )
131
+ assert result.parent == cli_dir
132
+
133
+
134
+ # ── empty config slots cascade to CWD floor ──────────────────────────────────
135
+
136
+
137
+ # ── runner.run / run_digest — programmatic-caller config-fallback rail ──────
138
+
139
+
140
+ def test_runner_run_applies_root_to_config_source_dirs(
141
+ monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture,
142
+ ) -> None:
143
+ """Glenn P2: programmatic ``runner.run(config=...)`` callers must see
144
+ LH_ROOT applied to relative config-supplied source dirs, just like the
145
+ CLI seam does. dry_run prints the resolved paths."""
146
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
147
+
148
+ from loghunter import runner
149
+ runner.run(
150
+ config={"loghunter": {
151
+ "root": "/tmp/lh-root",
152
+ "zeek_dir": "zeek",
153
+ "syslog_dir": "syslog",
154
+ "pihole_dir": "pihole",
155
+ "cloudtrail_dir": "cloudtrail",
156
+ }},
157
+ dry_run=True,
158
+ )
159
+ out = capsys.readouterr().out
160
+ assert "/tmp/lh-root/zeek" in out
161
+ assert "/tmp/lh-root/syslog" in out
162
+ assert "/tmp/lh-root/pihole" in out
163
+ assert "/tmp/lh-root/cloudtrail" in out
164
+
165
+
166
+ def test_runner_run_digest_applies_root_to_config_source_dirs(
167
+ monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture,
168
+ ) -> None:
169
+ """run_digest's per-schema config fallback honors LH_ROOT. The syslog
170
+ branch is the one with a TWO-key fallback (syslog_dir first, then zeek);
171
+ cloudtrail and conn each have a single-key fallback. All flow through
172
+ resolve_path."""
173
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
174
+ from loghunter import runner
175
+
176
+ runner.run_digest(
177
+ config={"loghunter": {"root": "/tmp/lh-root", "syslog_dir": "syslog"}},
178
+ schema="syslog",
179
+ dry_run=True,
180
+ )
181
+ out = capsys.readouterr().out
182
+ assert "/tmp/lh-root/syslog" in out
183
+
184
+
185
+ def test_runner_run_digest_cloudtrail_fallback_applies_root(
186
+ monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture,
187
+ ) -> None:
188
+ monkeypatch.delenv("LOGHUNTER_ROOT", raising=False)
189
+ from loghunter import runner
190
+
191
+ runner.run_digest(
192
+ config={"loghunter": {"root": "/tmp/lh-root", "cloudtrail_dir": "ct"}},
193
+ schema="cloudtrail",
194
+ dry_run=True,
195
+ )
196
+ out = capsys.readouterr().out
197
+ assert "/tmp/lh-root/ct" in out
198
+
199
+
200
+ def test_export_cascade_empty_config_values_still_floor_to_cwd(
201
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
202
+ ) -> None:
203
+ """Glenn's note: resolve_path('', root) → None, but the floor is a literal '.'."""
204
+ monkeypatch.chdir(tmp_path)
205
+ result = _resolve_output_path(
206
+ {"output_basename": "syslog"}, None,
207
+ datetime(2026, 6, 1), datetime(2026, 6, 2), "default",
208
+ backend_config={"export_dir": ""},
209
+ loghunter_config={"export_dir": ""},
210
+ root="/lh",
211
+ )
212
+ assert result.parent == Path(".")