loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,164 @@
1
+ """Drift tripwire — keep ``config_example.toml`` honest to runtime defaults.
2
+
3
+ Two assertions, structurally independent:
4
+
5
+ (a) ACTIVE-KEY agreement. Every UNCOMMENTED key under [loghunter], [allowlist],
6
+ and [export.*] in the shipped example MUST equal the corresponding
7
+ _DEFAULTS value. One-way: _DEFAULTS may carry extra keys the example
8
+ doesn't show (e.g. splunk username/password).
9
+
10
+ (b) ENGINE-ROOM honesty. The commented [detectors.*] block at the end of the
11
+ example IS user-facing documentation of detector defaults. Every shown
12
+ `# key = value` line MUST match the corresponding DEFAULT_CONFIG entry.
13
+ The shown set MAY be a SUBSET (deliberately omitted internals) — but it
14
+ must NEVER show an absent key or a wrong value (the bug that landed
15
+ `duration.min_duration_seconds = 300` in the prior shape).
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ import tomllib
22
+ from pathlib import Path
23
+
24
+ import pytest
25
+
26
+ from loghunter.common import config as cfg
27
+ from loghunter.detectors import aws, beacon, dns, duration, scan, syslog
28
+
29
+
30
+ pytestmark = pytest.mark.real_defaults
31
+
32
+
33
+ EXAMPLE_PATH = Path("loghunter/data/config_example.toml")
34
+ ENGINE_ROOM_BANNER = "# Detector tuning — the engine room."
35
+
36
+
37
+ # ── (a) ACTIVE-KEY agreement ──────────────────────────────────────────────────
38
+
39
+
40
+ def _active_part(text: str) -> str:
41
+ """Slice everything BEFORE the engine-room banner — the active config body."""
42
+ idx = text.find(ENGINE_ROOM_BANNER)
43
+ assert idx >= 0, "engine-room banner missing — has the example been retitled?"
44
+ return text[:idx]
45
+
46
+
47
+ def test_example_active_keys_match_defaults() -> None:
48
+ text = EXAMPLE_PATH.read_text(encoding="utf-8")
49
+ parsed = tomllib.loads(_active_part(text))
50
+
51
+ # Walk every uncommented key in the active body and assert it matches
52
+ # _DEFAULTS at the same path. Skip top-level sections not in _DEFAULTS.
53
+ for section, content in parsed.items():
54
+ assert section in cfg._DEFAULTS, (
55
+ f"example carries unknown top-level section [{section}] — defaults: "
56
+ f"{sorted(cfg._DEFAULTS)}"
57
+ )
58
+ _assert_subset(content, cfg._DEFAULTS[section], section)
59
+
60
+
61
+ def _assert_subset(shown: dict, defaults: dict, path: str) -> None:
62
+ """Every key in `shown` must match `defaults[key]` at the same path."""
63
+ for key, val in shown.items():
64
+ assert key in defaults, f"[{path}].{key} is not in _DEFAULTS — drift"
65
+ if isinstance(val, dict):
66
+ assert isinstance(defaults[key], dict), (
67
+ f"[{path}].{key}: example shows a table but _DEFAULTS has scalar"
68
+ )
69
+ _assert_subset(val, defaults[key], f"{path}.{key}")
70
+ else:
71
+ assert val == defaults[key], (
72
+ f"[{path}].{key}: example={val!r} vs _DEFAULTS={defaults[key]!r}"
73
+ )
74
+
75
+
76
+ # ── (b) ENGINE-ROOM honesty ───────────────────────────────────────────────────
77
+
78
+
79
+ _DETECTOR_DEFAULTS = {
80
+ "beacon": beacon.DEFAULT_CONFIG,
81
+ "scan": scan.DEFAULT_CONFIG,
82
+ "duration": duration.DEFAULT_CONFIG,
83
+ "dns": dns.DEFAULT_CONFIG,
84
+ "syslog": syslog.DEFAULT_CONFIG,
85
+ "aws": aws.DEFAULT_CONFIG,
86
+ }
87
+
88
+
89
+ def _engine_room_part(text: str) -> str:
90
+ idx = text.find(ENGINE_ROOM_BANNER)
91
+ assert idx >= 0
92
+ return text[idx:]
93
+
94
+
95
+ def _uncomment_engine_room(block: str) -> str:
96
+ """Strip leading "# " from lines that look like config (table headers,
97
+ `key = value`); leave true comments and blank lines as comments.
98
+ A "config" line is either a TOML table header or a key=value form."""
99
+ out_lines: list[str] = []
100
+ for raw in block.splitlines():
101
+ stripped = raw.lstrip()
102
+ if not stripped.startswith("#"):
103
+ out_lines.append(raw)
104
+ continue
105
+ body = stripped[1:].lstrip() # text after "# "
106
+ # Inline trailing `# comment` after the value: keep the body, drop
107
+ # everything from the first un-quoted '#' onward.
108
+ if body.startswith("[") or _looks_like_kv(body):
109
+ out_lines.append(_strip_inline_trailing_comment(body))
110
+ # else: a true narrative comment — drop it entirely (tomllib would
111
+ # see it as a normal `#`-prefixed comment after un-commenting once,
112
+ # but uncommenting body that doesn't look like config would inject
113
+ # narrative into the TOML namespace).
114
+ return "\n".join(out_lines)
115
+
116
+
117
+ _KV_RE = re.compile(r'^\s*[A-Za-z_][A-Za-z0-9_]*\s*=\s*')
118
+
119
+
120
+ def _looks_like_kv(body: str) -> bool:
121
+ return bool(_KV_RE.match(body))
122
+
123
+
124
+ def _strip_inline_trailing_comment(body: str) -> str:
125
+ """Strip a trailing `# narrative` from a KV line (no quoted strings with #
126
+ in them in the engine-room block, so this is safe)."""
127
+ if "[" in body and "]" in body and "=" not in body:
128
+ return body # table header
129
+ if "#" in body:
130
+ return body.split("#", 1)[0].rstrip()
131
+ return body
132
+
133
+
134
+ def test_engine_room_keys_match_detector_defaults() -> None:
135
+ text = EXAMPLE_PATH.read_text(encoding="utf-8")
136
+ er = _engine_room_part(text)
137
+ parsed = tomllib.loads(_uncomment_engine_room(er))
138
+
139
+ detectors = parsed.get("detectors", {})
140
+ assert detectors, "engine room shows no [detectors.*] blocks — has the example been gutted?"
141
+
142
+ for name, shown_cfg in detectors.items():
143
+ assert name in _DETECTOR_DEFAULTS, (
144
+ f"engine room shows [detectors.{name}] but no DEFAULT_CONFIG known"
145
+ )
146
+ _assert_engine_subset(shown_cfg, _DETECTOR_DEFAULTS[name], f"detectors.{name}")
147
+
148
+
149
+ def _assert_engine_subset(shown: dict, defaults: dict, path: str) -> None:
150
+ for key, val in shown.items():
151
+ if isinstance(val, dict):
152
+ assert key in defaults and isinstance(defaults[key], dict), (
153
+ f"[{path}].{key}: engine room shows a nested table but "
154
+ f"DEFAULT_CONFIG has no matching dict"
155
+ )
156
+ _assert_engine_subset(val, defaults[key], f"{path}.{key}")
157
+ else:
158
+ assert key in defaults, (
159
+ f"[{path}].{key}: engine room shows a key absent from "
160
+ f"DEFAULT_CONFIG (phantom key — exactly the {path} bug class)"
161
+ )
162
+ assert val == defaults[key], (
163
+ f"[{path}].{key}: example={val!r} vs DEFAULT_CONFIG={defaults[key]!r}"
164
+ )