loghunter-cli 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. loghunter/__init__.py +3 -0
  2. loghunter/cli.py +1108 -0
  3. loghunter/cli_init.py +567 -0
  4. loghunter/common/__init__.py +1 -0
  5. loghunter/common/allowlist.py +436 -0
  6. loghunter/common/clustering.py +326 -0
  7. loghunter/common/config.py +221 -0
  8. loghunter/common/display.py +323 -0
  9. loghunter/common/errors.py +45 -0
  10. loghunter/common/finding.py +239 -0
  11. loghunter/common/loader/__init__.py +136 -0
  12. loghunter/common/loader/diagnostics.py +94 -0
  13. loghunter/common/loader/discovery.py +335 -0
  14. loghunter/common/loader/io.py +76 -0
  15. loghunter/common/loader/pipeline.py +1010 -0
  16. loghunter/common/loader/sniff.py +184 -0
  17. loghunter/common/loader/types.py +207 -0
  18. loghunter/common/loader/windowing.py +523 -0
  19. loghunter/common/output.py +93 -0
  20. loghunter/common/paths.py +105 -0
  21. loghunter/common/sources.py +392 -0
  22. loghunter/data/allowlist/connections.txt +50 -0
  23. loghunter/data/allowlist/domains_devices.txt +5 -0
  24. loghunter/data/allowlist/domains_homelab.txt +5 -0
  25. loghunter/data/allowlist/domains_universal.txt +125 -0
  26. loghunter/data/config_example.toml +144 -0
  27. loghunter/detectors/__init__.py +5 -0
  28. loghunter/detectors/auth.py +27 -0
  29. loghunter/detectors/aws.py +671 -0
  30. loghunter/detectors/beacon.py +258 -0
  31. loghunter/detectors/dns.py +778 -0
  32. loghunter/detectors/dnsblock.py +29 -0
  33. loghunter/detectors/duration.py +178 -0
  34. loghunter/detectors/protocol.py +26 -0
  35. loghunter/detectors/scan.py +735 -0
  36. loghunter/detectors/ssl.py +25 -0
  37. loghunter/detectors/syslog.py +266 -0
  38. loghunter/detectors/weird.py +27 -0
  39. loghunter/digest/__init__.py +43 -0
  40. loghunter/digest/_stats.py +182 -0
  41. loghunter/digest/blob.py +698 -0
  42. loghunter/digest/cloudtrail.py +341 -0
  43. loghunter/digest/conn.py +367 -0
  44. loghunter/digest/dns.py +364 -0
  45. loghunter/digest/syslog.py +269 -0
  46. loghunter/exporters/__init__.py +534 -0
  47. loghunter/exporters/cloudtrail.py +499 -0
  48. loghunter/exporters/splunk.py +222 -0
  49. loghunter/outputs/__init__.py +1 -0
  50. loghunter/outputs/allowlist.py +75 -0
  51. loghunter/outputs/csv.py +70 -0
  52. loghunter/outputs/email.py +44 -0
  53. loghunter/outputs/html.py +99 -0
  54. loghunter/outputs/json.py +77 -0
  55. loghunter/outputs/text.py +1422 -0
  56. loghunter/parsers/__init__.py +1 -0
  57. loghunter/parsers/cloudtrail.py +287 -0
  58. loghunter/parsers/dnsmasq.py +331 -0
  59. loghunter/parsers/syslog.py +150 -0
  60. loghunter/parsers/zeek.py +294 -0
  61. loghunter/parsers/zeek_tsv.py +310 -0
  62. loghunter/runner.py +1895 -0
  63. loghunter_cli-0.1.0.dev0.dist-info/METADATA +336 -0
  64. loghunter_cli-0.1.0.dev0.dist-info/RECORD +122 -0
  65. loghunter_cli-0.1.0.dev0.dist-info/WHEEL +5 -0
  66. loghunter_cli-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  67. loghunter_cli-0.1.0.dev0.dist-info/licenses/LICENSE +21 -0
  68. loghunter_cli-0.1.0.dev0.dist-info/top_level.txt +4 -0
  69. migrations/cloudtrail_parquet.py +59 -0
  70. migrations/conn_fft.py +550 -0
  71. migrations/conn_scan.py +1097 -0
  72. migrations/dns_dbscan.py +520 -0
  73. migrations/get_syslog.py +402 -0
  74. migrations/syslog_drain3.py +479 -0
  75. scratch/junk/parquet.py +59 -0
  76. tests/__init__.py +1 -0
  77. tests/_cloudtrail_fakes.py +116 -0
  78. tests/conftest.py +17 -0
  79. tests/test_allowlist_defaults_accessor.py +90 -0
  80. tests/test_architecture_spine.py +302 -0
  81. tests/test_aws_detector.py +504 -0
  82. tests/test_be_like_water.py +106 -0
  83. tests/test_cli_help.py +342 -0
  84. tests/test_cli_multi_positional.py +458 -0
  85. tests/test_cloudtrail_exporter.py +631 -0
  86. tests/test_cloudtrail_exporter_botocore.py +207 -0
  87. tests/test_cloudtrail_parser.py +393 -0
  88. tests/test_clustering.py +85 -0
  89. tests/test_clustering_interruptible.py +404 -0
  90. tests/test_config_cli.py +1006 -0
  91. tests/test_config_example_drift.py +164 -0
  92. tests/test_digest_blob.py +1237 -0
  93. tests/test_digest_cli.py +1040 -0
  94. tests/test_digest_cloudtrail.py +980 -0
  95. tests/test_digest_conn.py +1189 -0
  96. tests/test_digest_dns.py +770 -0
  97. tests/test_digest_stats.py +282 -0
  98. tests/test_digest_syslog.py +724 -0
  99. tests/test_display.py +370 -0
  100. tests/test_dns_detector.py +1010 -0
  101. tests/test_dnsmasq_parser.py +467 -0
  102. tests/test_duration_detector.py +491 -0
  103. tests/test_export_orchestrator_shape.py +153 -0
  104. tests/test_init_wizard.py +707 -0
  105. tests/test_loader.py +3639 -0
  106. tests/test_loader_package_surface.py +115 -0
  107. tests/test_loader_window_model.py +215 -0
  108. tests/test_output_path_cascade.py +575 -0
  109. tests/test_resolve_path.py +111 -0
  110. tests/test_root_provenance.py +212 -0
  111. tests/test_runner.py +2599 -0
  112. tests/test_scan_detector.py +455 -0
  113. tests/test_search_paths.py +50 -0
  114. tests/test_sniff_orchestrator.py +373 -0
  115. tests/test_sniff_recognizers.py +573 -0
  116. tests/test_source_resolution_seam.py +471 -0
  117. tests/test_sources.py +648 -0
  118. tests/test_splunk_exporter.py +351 -0
  119. tests/test_syslog_detector.py +458 -0
  120. tests/test_syslog_parser.py +582 -0
  121. tests/test_text_output.py +1225 -0
  122. tests/test_zeek_tsv_parser.py +580 -0
@@ -0,0 +1,90 @@
1
+ """Allowlist fallback paths read from the single config defaults accessor.
2
+
3
+ After the rework, ``common/allowlist.py`` no longer carries its own copy of
4
+ default paths. When config keys are absent (raw / notebook config), the
5
+ fallback comes from ``cfg.default_allowlist_paths()`` — a deep copy of
6
+ ``_DEFAULTS["allowlist"]``. All three keys are covered: ``domain_patterns``,
7
+ ``connection_rules``, and ``allowlist_dir`` (Glenn's amendment).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pathlib import Path
13
+
14
+ import pytest
15
+
16
+ from loghunter.common import allowlist, config as cfg
17
+
18
+
19
+ def test_default_allowlist_paths_returns_deep_copy_of_defaults() -> None:
20
+ paths = cfg.default_allowlist_paths()
21
+ assert paths == cfg._DEFAULTS["allowlist"]
22
+ paths["domain_patterns"] = ["mutated"]
23
+ assert cfg._DEFAULTS["allowlist"]["domain_patterns"] != ["mutated"], (
24
+ "default_allowlist_paths must return a deep copy"
25
+ )
26
+
27
+
28
+ def test_build_matcher_domain_patterns_fallback_uses_accessor(
29
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
30
+ ) -> None:
31
+ """When config has no domain_patterns, accessor supplies the path. Patch
32
+ the accessor and observe the result."""
33
+ fake = tmp_path / "fake_domains.txt"
34
+ fake.write_text("example.com\n", encoding="utf-8")
35
+ monkeypatch.setattr(
36
+ cfg, "default_allowlist_paths",
37
+ lambda: {"domain_patterns": [str(fake)], "connection_rules": [], "allowlist_dir": ""},
38
+ )
39
+ # Config with NO allowlist subkeys — forces the fallback.
40
+ matcher = allowlist.build_matcher({"allowlist": {}})
41
+ assert "example.com" in matcher._domain_patterns
42
+
43
+
44
+ def test_build_matcher_connection_rules_fallback_uses_accessor(
45
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
46
+ ) -> None:
47
+ fake = tmp_path / "fake_conn.txt"
48
+ fake.write_text("192.0.2.1\n", encoding="utf-8")
49
+ monkeypatch.setattr(
50
+ cfg, "default_allowlist_paths",
51
+ lambda: {"domain_patterns": [], "connection_rules": [str(fake)], "allowlist_dir": ""},
52
+ )
53
+ matcher = allowlist.build_matcher({"allowlist": {}})
54
+ assert len(matcher._numeric_rules) == 1
55
+
56
+
57
+ def test_build_matcher_allowlist_dir_fallback_uses_accessor(
58
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
59
+ ) -> None:
60
+ """Glenn's amendment: allowlist_dir gets the same single-source treatment."""
61
+ fake_dir = tmp_path / "fake_allowlist.d"
62
+ fake_dir.mkdir()
63
+ (fake_dir / "users.toml").write_text(
64
+ '[[allowlist.entry]]\nmatch = "example.com"\ncomment = "x"\n',
65
+ encoding="utf-8",
66
+ )
67
+ monkeypatch.setattr(
68
+ cfg, "default_allowlist_paths",
69
+ lambda: {
70
+ "domain_patterns": [],
71
+ "connection_rules": [],
72
+ "allowlist_dir": str(fake_dir),
73
+ },
74
+ )
75
+ matcher = allowlist.build_matcher({"allowlist": {}})
76
+ assert len(matcher._entries) == 1
77
+ assert matcher._entries[0].match == "example.com"
78
+
79
+
80
+ def test_shipped_domain_files_stay_package_local_not_routed_through_root(
81
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
82
+ ) -> None:
83
+ """_SHIPPED_DOMAIN_FILES are package data, NOT routed through LH_ROOT.
84
+ Setting a bogus root must not displace them."""
85
+ monkeypatch.setenv("LOGHUNTER_ROOT", str(tmp_path / "nonexistent"))
86
+ # Build with no allowlist config — only shipped patterns load.
87
+ matcher = allowlist.build_matcher({"allowlist": {}})
88
+ # Shipped files include large universal lists; at least one entry must load.
89
+ # If the shipped path were routed through bogus root, they'd be absent.
90
+ assert len(matcher._domain_patterns) > 0
@@ -0,0 +1,302 @@
1
+ """Focused tests for detector discovery and output plumbing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import json
7
+ import tempfile
8
+ import unittest
9
+ from datetime import datetime, timezone
10
+ from pathlib import Path
11
+ from types import SimpleNamespace
12
+
13
+ import pandas as pd
14
+
15
+ from loghunter.common.allowlist import build_matcher
16
+ from loghunter.common.finding import Finding, RunSummary, Severity
17
+ from loghunter.common.output import Reporter, get_handler
18
+ from loghunter.runner import build_run_plan, discover_detectors, resolve_detect
19
+
20
+
21
+ def _summary() -> RunSummary:
22
+ now = datetime(2026, 5, 30, tzinfo=timezone.utc)
23
+ return RunSummary(
24
+ data_window=(now, now),
25
+ record_counts={"conn*.log*": 1},
26
+ data_size_bytes=0,
27
+ detectors_run=["beacon"],
28
+ detectors_skipped={},
29
+ )
30
+
31
+
32
+ def _finding() -> Finding:
33
+ now = datetime(2026, 5, 30, tzinfo=timezone.utc)
34
+ return Finding(
35
+ detector="beacon",
36
+ severity=Severity.MEDIUM,
37
+ title="periodic flow",
38
+ description="A periodic flow was observed.",
39
+ evidence={"beacon_score": 0.61, "conn_count": 20},
40
+ next_steps=["Review the source host."],
41
+ ts_generated=now,
42
+ data_window=(now, now),
43
+ )
44
+
45
+
46
+ class ArchitectureSpineTests(unittest.TestCase):
47
+ """Small checks for the app's detector and output boundaries."""
48
+
49
+ def test_discover_detectors_excludes_planned_stubs(self) -> None:
50
+ detectors = discover_detectors()
51
+
52
+ self.assertIn("beacon", detectors)
53
+ self.assertIn("dns", detectors)
54
+ self.assertIn("duration", detectors)
55
+ for planned in ("auth", "ssl", "protocol", "weird", "dnsblock"):
56
+ self.assertNotIn(planned, detectors)
57
+
58
+ def test_resolve_detect_all_uses_available_detectors_only(self) -> None:
59
+ available = sorted(discover_detectors())
60
+
61
+ self.assertEqual(resolve_detect("all", available), available)
62
+ self.assertIn("duration", resolve_detect("all", available))
63
+
64
+ def test_reporter_delivers_to_registered_json_handler(self) -> None:
65
+ stream = io.StringIO()
66
+ handler_cls = get_handler("json")
67
+ handler = handler_cls(stream=stream)
68
+
69
+ Reporter([handler]).run([_finding()], _summary())
70
+
71
+ payload = json.loads(stream.getvalue())
72
+ self.assertEqual(payload["run_summary"]["detectors_run"], ["beacon"])
73
+ self.assertEqual(payload["findings"][0]["detector"], "beacon")
74
+ self.assertEqual(payload["findings"][0]["evidence"]["beacon_score"], 0.61)
75
+
76
+ def test_csv_handler_collects_evidence_columns_before_writing(self) -> None:
77
+ stream = io.StringIO()
78
+ handler_cls = get_handler("csv")
79
+ handler = handler_cls(stream=stream)
80
+
81
+ Reporter([handler]).run([_finding()], _summary())
82
+
83
+ output = stream.getvalue()
84
+ self.assertIn("evidence.beacon_score", output.splitlines()[0])
85
+ self.assertIn("0.61", output)
86
+
87
+ def test_connection_rules_are_local_only_by_default(self) -> None:
88
+ matcher = build_matcher({"allowlist": {"domain_patterns": []}})
89
+
90
+ self.assertEqual(matcher._numeric_rules, [])
91
+
92
+ def test_configured_connection_rule_file_still_filters_rows(self) -> None:
93
+ with tempfile.TemporaryDirectory() as tmp:
94
+ rules_path = Path(tmp) / "connections.txt"
95
+ rules_path.write_text("192.0.2.10 198.51.100.20 :443/tcp\n", encoding="utf-8")
96
+ matcher = build_matcher({
97
+ "allowlist": {
98
+ "domain_patterns": [],
99
+ "connection_rules": [str(rules_path)],
100
+ }
101
+ })
102
+
103
+ df = pd.DataFrame([
104
+ {"src": "192.0.2.10", "dst": "198.51.100.20", "port": 443, "proto": "tcp"},
105
+ {"src": "192.0.2.11", "dst": "203.0.113.20", "port": 443, "proto": "tcp"},
106
+ ])
107
+
108
+ filtered = matcher.filter_df(df, "beacon")
109
+
110
+ self.assertEqual(len(filtered), 1)
111
+ self.assertEqual(filtered.iloc[0]["src"], "192.0.2.11")
112
+
113
+ def test_configured_connection_rule_file_filters_duration_rows(self) -> None:
114
+ """Unscoped flat-file rule suppresses duration the same way it suppresses beacon.
115
+
116
+ Locks the filter-before-analyze pass-through for duration: omission of
117
+ scope is permission for every connection detector that groups on the
118
+ canonical (src, dst, port, proto) tuple, not just beacon.
119
+ """
120
+ with tempfile.TemporaryDirectory() as tmp:
121
+ rules_path = Path(tmp) / "connections.txt"
122
+ rules_path.write_text("192.0.2.10 198.51.100.20 :443/tcp\n", encoding="utf-8")
123
+ matcher = build_matcher({
124
+ "allowlist": {
125
+ "domain_patterns": [],
126
+ "connection_rules": [str(rules_path)],
127
+ }
128
+ })
129
+
130
+ df = pd.DataFrame([
131
+ {"src": "192.0.2.10", "dst": "198.51.100.20", "port": 443, "proto": "tcp"},
132
+ {"src": "192.0.2.11", "dst": "203.0.113.20", "port": 443, "proto": "tcp"},
133
+ ])
134
+
135
+ filtered = matcher.filter_df(df, "duration")
136
+
137
+ self.assertEqual(len(filtered), 1)
138
+ self.assertEqual(filtered.iloc[0]["src"], "192.0.2.11")
139
+
140
+ def test_scoped_stanza_filters_duration_and_gates_unlisted_detector(self) -> None:
141
+ """A stanza scoped to duration suppresses duration and only duration.
142
+
143
+ Locks the scoping half of the contract: a stanza with
144
+ ``detectors = ["duration", "beacon"]`` drops the matching row when
145
+ called for "duration", and the same matcher leaves the row in place
146
+ when called for a detector outside the scope.
147
+ """
148
+ matcher = build_matcher({
149
+ "allowlist": {
150
+ "domain_patterns": "",
151
+ "entry": [
152
+ {
153
+ "match": "ip_pair",
154
+ "src": "192.0.2.10",
155
+ "dst_port": 443,
156
+ "comment": "Example scoped flow",
157
+ "detectors": ["duration", "beacon"],
158
+ }
159
+ ],
160
+ }
161
+ })
162
+
163
+ df = pd.DataFrame([
164
+ {"src": "192.0.2.10", "dst": "198.51.100.20", "port": 443, "proto": "tcp"},
165
+ {"src": "192.0.2.11", "dst": "203.0.113.20", "port": 443, "proto": "tcp"},
166
+ ])
167
+
168
+ filtered_duration = matcher.filter_df(df, "duration")
169
+ self.assertEqual(len(filtered_duration), 1)
170
+ self.assertEqual(filtered_duration.iloc[0]["src"], "192.0.2.11")
171
+
172
+ # Same matcher, same frame, a detector outside the scope — the rule
173
+ # must NOT fire. (filter_df routes by column shape, so a connection
174
+ # frame still flows through _filter_numeric_df for "dns"; what we are
175
+ # asserting is that the scope check gates suppression, not the shape.)
176
+ filtered_dns = matcher.filter_df(df, "dns")
177
+ self.assertEqual(len(filtered_dns), 2)
178
+
179
+ def test_connection_rule_path_can_be_operator_friendly_string(self) -> None:
180
+ with tempfile.TemporaryDirectory() as tmp:
181
+ rules_path = Path(tmp) / "connections.txt"
182
+ rules_path.write_text("192.0.2.10 :443/tcp\n", encoding="utf-8")
183
+ matcher = build_matcher({
184
+ "allowlist": {
185
+ "domain_patterns": "",
186
+ "connection_rules": str(rules_path),
187
+ }
188
+ })
189
+
190
+ assert len(matcher._numeric_rules) == 1
191
+ assert matcher._numeric_rules[0].ip1 == "192.0.2.10"
192
+
193
+ def test_stanza_detector_scope_can_be_comma_string(self) -> None:
194
+ matcher = build_matcher({
195
+ "allowlist": {
196
+ "domain_patterns": "",
197
+ "entry": [
198
+ {
199
+ "match": "dst_port",
200
+ "value": 443,
201
+ "comment": "Example scoped service",
202
+ "detectors": "beacon, scan",
203
+ }
204
+ ],
205
+ }
206
+ })
207
+
208
+ assert matcher._numeric_rules[-1].detectors == ["beacon", "scan"]
209
+
210
+ def test_domain_patterns_filter_dns_rows_before_detection(self) -> None:
211
+ matcher = build_matcher({"allowlist": {"domain_patterns": []}})
212
+ matcher._domain_patterns = ["*.example.test", "re:\\.allowed\\.test$"]
213
+ df = pd.DataFrame([
214
+ {"src": "192.0.2.10", "query": "updates.example.test"},
215
+ {"src": "192.0.2.11", "query": "allowed.test"},
216
+ {"src": "192.0.2.12", "query": "suspicious.invalid"},
217
+ ])
218
+
219
+ filtered = matcher.filter_df(df, "dns")
220
+
221
+ self.assertEqual(filtered["query"].tolist(), ["suspicious.invalid"])
222
+
223
+ def test_shipped_universal_domains_filter_dns_infrastructure(self) -> None:
224
+ matcher = build_matcher({"allowlist": {"domain_patterns": []}})
225
+ df = pd.DataFrame([
226
+ {"src": "192.0.2.10", "query": "2.0.192.in-addr.arpa"},
227
+ {"src": "192.0.2.11", "query": "suspicious.invalid"},
228
+ ])
229
+
230
+ filtered = matcher.filter_df(df, "dns")
231
+
232
+ self.assertEqual(filtered["query"].tolist(), ["suspicious.invalid"])
233
+
234
+
235
+ def test_build_run_plan_records_skips_and_needed_logs(tmp_path: Path) -> None:
236
+ zeek_dir = tmp_path / "zeek"
237
+ zeek_dir.mkdir()
238
+ (zeek_dir / "conn.log").write_text("", encoding="utf-8")
239
+ # Optional log must exist for the satisfiable-only filter to include it in needed_logs.
240
+ (zeek_dir / "conn_summary.log").write_text("", encoding="utf-8")
241
+ detectors = {
242
+ "beacon": SimpleNamespace(
243
+ REQUIRED_LOGS=[{"source": "zeek_dir", "pattern": "conn*.log*"}],
244
+ OPTIONAL_LOGS=[{"source": "zeek_dir", "pattern": "conn_summary*.log*"}],
245
+ ),
246
+ "dns": SimpleNamespace(
247
+ REQUIRED_LOGS=[{"source": "zeek_dir", "pattern": "dns*.log*"}],
248
+ OPTIONAL_LOGS=[],
249
+ ),
250
+ }
251
+
252
+ plan = build_run_plan("all", zeek_dir=zeek_dir, syslog_dir=None, detectors=detectors)
253
+
254
+ assert plan.selected == ["beacon", "dns"]
255
+ assert plan.will_run == ["beacon"]
256
+ assert plan.skipped == {"dns": f"dns*.log* not found in {zeek_dir}"}
257
+ assert plan.needed_logs == {
258
+ "conn*.log*": "zeek_dir",
259
+ "conn_summary*.log*": "zeek_dir",
260
+ }
261
+
262
+
263
+ def test_build_run_plan_honors_exclusion_syntax(tmp_path: Path) -> None:
264
+ zeek_dir = tmp_path / "zeek"
265
+ zeek_dir.mkdir()
266
+ (zeek_dir / "conn.log").write_text("", encoding="utf-8")
267
+ detectors = {
268
+ "beacon": SimpleNamespace(
269
+ REQUIRED_LOGS=[{"source": "zeek_dir", "pattern": "conn*.log*"}],
270
+ OPTIONAL_LOGS=[],
271
+ ),
272
+ "dns": SimpleNamespace(
273
+ REQUIRED_LOGS=[{"source": "zeek_dir", "pattern": "dns*.log*"}],
274
+ OPTIONAL_LOGS=[],
275
+ ),
276
+ }
277
+
278
+ plan = build_run_plan("all,!dns", zeek_dir=zeek_dir, syslog_dir=None, detectors=detectors)
279
+
280
+ assert plan.selected == ["beacon"]
281
+ assert plan.will_run == ["beacon"]
282
+ assert plan.skipped == {}
283
+
284
+
285
+ def test_allowlist_filter_tolerates_pihole_query_none_rows() -> None:
286
+ """Domain filter must not crash on query=None rows (unknown/validation events from pihole)."""
287
+ matcher = build_matcher({"allowlist": {"domain_patterns": []}})
288
+ matcher._domain_patterns = ["bad.example.test"]
289
+ df = pd.DataFrame([
290
+ {"src": "192.0.2.1", "query": "bad.example.test"}, # matches pattern — filtered out
291
+ {"src": "192.0.2.2", "query": "harmless.example.test"}, # no match — survives
292
+ {"src": "192.0.2.3", "query": None}, # unknown/validation — survives
293
+ ])
294
+ filtered = matcher.filter_df(df, "dns")
295
+ assert len(filtered) == 2
296
+ surviving_queries = filtered["query"].tolist()
297
+ assert any(pd.isna(q) for q in surviving_queries)
298
+ assert "bad.example.test" not in surviving_queries
299
+
300
+
301
+ if __name__ == "__main__":
302
+ unittest.main()