bbot 2.6.0.6879rc0__py3-none-any.whl → 2.7.2.7254rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bbot might be problematic. Click here for more details.
- bbot/__init__.py +1 -1
- bbot/core/engine.py +1 -1
- bbot/core/flags.py +1 -0
- bbot/core/helpers/bloom.py +6 -7
- bbot/core/helpers/dns/dns.py +0 -1
- bbot/core/helpers/dns/engine.py +0 -2
- bbot/core/helpers/files.py +2 -2
- bbot/core/helpers/git.py +17 -0
- bbot/core/helpers/misc.py +1 -0
- bbot/core/helpers/ntlm.py +0 -2
- bbot/core/helpers/regex.py +1 -1
- bbot/core/modules.py +0 -54
- bbot/defaults.yml +4 -2
- bbot/modules/apkpure.py +1 -1
- bbot/modules/base.py +11 -5
- bbot/modules/dnsbimi.py +1 -4
- bbot/modules/dnsdumpster.py +35 -52
- bbot/modules/dnstlsrpt.py +0 -6
- bbot/modules/docker_pull.py +1 -1
- bbot/modules/emailformat.py +17 -1
- bbot/modules/filedownload.py +1 -1
- bbot/modules/git_clone.py +47 -22
- bbot/modules/gitdumper.py +4 -14
- bbot/modules/github_workflows.py +1 -1
- bbot/modules/gitlab_com.py +31 -0
- bbot/modules/gitlab_onprem.py +84 -0
- bbot/modules/gowitness.py +0 -6
- bbot/modules/graphql_introspection.py +5 -2
- bbot/modules/httpx.py +2 -0
- bbot/modules/iis_shortnames.py +0 -7
- bbot/modules/internal/unarchive.py +9 -3
- bbot/modules/lightfuzz/lightfuzz.py +5 -1
- bbot/modules/nuclei.py +1 -1
- bbot/modules/output/base.py +0 -5
- bbot/modules/postman_download.py +1 -1
- bbot/modules/retirejs.py +232 -0
- bbot/modules/securitytxt.py +0 -3
- bbot/modules/subdomaincenter.py +1 -16
- bbot/modules/telerik.py +6 -1
- bbot/modules/templates/gitlab.py +98 -0
- bbot/modules/trufflehog.py +1 -1
- bbot/scanner/manager.py +7 -4
- bbot/scanner/scanner.py +1 -1
- bbot/scripts/benchmark_report.py +433 -0
- bbot/test/benchmarks/__init__.py +2 -0
- bbot/test/benchmarks/test_bloom_filter_benchmarks.py +105 -0
- bbot/test/benchmarks/test_closest_match_benchmarks.py +76 -0
- bbot/test/benchmarks/test_event_validation_benchmarks.py +438 -0
- bbot/test/benchmarks/test_excavate_benchmarks.py +291 -0
- bbot/test/benchmarks/test_ipaddress_benchmarks.py +143 -0
- bbot/test/benchmarks/test_weighted_shuffle_benchmarks.py +70 -0
- bbot/test/test_step_1/test_bbot_fastapi.py +2 -2
- bbot/test/test_step_1/test_events.py +0 -1
- bbot/test/test_step_1/test_scan.py +1 -8
- bbot/test/test_step_2/module_tests/base.py +6 -1
- bbot/test/test_step_2/module_tests/test_module_dnsbimi.py +2 -1
- bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py +3 -5
- bbot/test/test_step_2/module_tests/test_module_emailformat.py +1 -1
- bbot/test/test_step_2/module_tests/test_module_emails.py +2 -2
- bbot/test/test_step_2/module_tests/test_module_excavate.py +35 -6
- bbot/test/test_step_2/module_tests/test_module_gitlab_com.py +66 -0
- bbot/test/test_step_2/module_tests/{test_module_gitlab.py → test_module_gitlab_onprem.py} +4 -69
- bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +2 -2
- bbot/test/test_step_2/module_tests/test_module_retirejs.py +159 -0
- bbot/test/test_step_2/module_tests/test_module_telerik.py +1 -1
- {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/METADATA +7 -4
- {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/RECORD +70 -60
- {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/WHEEL +1 -1
- bbot/modules/censys.py +0 -98
- bbot/modules/gitlab.py +0 -141
- bbot/modules/zoomeye.py +0 -77
- bbot/test/test_step_2/module_tests/test_module_censys.py +0 -83
- bbot/test/test_step_2/module_tests/test_module_zoomeye.py +0 -35
- {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/entry_points.txt +0 -0
- {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import random
|
|
3
|
+
from bbot.core.helpers.misc import closest_match
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestClosestMatchBenchmarks:
|
|
7
|
+
"""
|
|
8
|
+
Benchmark tests for closest_match operations.
|
|
9
|
+
|
|
10
|
+
This function is critical for BBOT's DNS brute forcing, where it finds the best
|
|
11
|
+
matching parent event among thousands of choices. Performance here directly impacts
|
|
12
|
+
scan throughput and DNS mutation efficiency.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def setup_method(self):
|
|
16
|
+
"""Setup common test data"""
|
|
17
|
+
# Set deterministic seed for consistent benchmark results
|
|
18
|
+
random.seed(42) # Fixed seed for reproducible results
|
|
19
|
+
|
|
20
|
+
# Generate test data for benchmarks
|
|
21
|
+
self.large_closest_match_choices = self._generate_large_closest_match_choices()
|
|
22
|
+
self.realistic_closest_match_choices = self._generate_realistic_closest_match_choices()
|
|
23
|
+
|
|
24
|
+
def _generate_large_closest_match_choices(self):
|
|
25
|
+
"""Generate large closest match dataset (stress test with many parent events)"""
|
|
26
|
+
choices = []
|
|
27
|
+
for i in range(10000):
|
|
28
|
+
# Generate realistic domain names with more variety
|
|
29
|
+
tld = random.choice(["com", "net", "org", "io", "co", "dev"])
|
|
30
|
+
domain = f"subdomain{i}.example{i % 100}.{tld}"
|
|
31
|
+
choices.append(domain)
|
|
32
|
+
return choices
|
|
33
|
+
|
|
34
|
+
def _generate_realistic_closest_match_choices(self):
|
|
35
|
+
"""Generate realistic closest match parent event choices (like actual BBOT usage)"""
|
|
36
|
+
choices = []
|
|
37
|
+
|
|
38
|
+
# Common TLDs
|
|
39
|
+
tlds = ["com", "net", "org", "io", "co", "dev", "test", "local"]
|
|
40
|
+
|
|
41
|
+
# Generate parent domains with realistic patterns
|
|
42
|
+
for i in range(5000):
|
|
43
|
+
# Base domain patterns
|
|
44
|
+
if i % 10 == 0:
|
|
45
|
+
# Simple domains
|
|
46
|
+
domain = f"example{i}.{random.choice(tlds)}"
|
|
47
|
+
elif i % 5 == 0:
|
|
48
|
+
# Multi-level domains
|
|
49
|
+
domain = f"sub{i}.example{i}.{random.choice(tlds)}"
|
|
50
|
+
else:
|
|
51
|
+
# Complex domains
|
|
52
|
+
domain = f"level1{i}.level2{i}.example{i}.{random.choice(tlds)}"
|
|
53
|
+
|
|
54
|
+
choices.append(domain)
|
|
55
|
+
|
|
56
|
+
return choices
|
|
57
|
+
|
|
58
|
+
@pytest.mark.benchmark(group="closest_match")
|
|
59
|
+
def test_large_closest_match_lookup(self, benchmark):
|
|
60
|
+
"""Benchmark closest_match with large closest match workload (many parent events)"""
|
|
61
|
+
|
|
62
|
+
def find_large_closest_match():
|
|
63
|
+
return closest_match("subdomain5678.example50.com", self.large_closest_match_choices)
|
|
64
|
+
|
|
65
|
+
result = benchmark.pedantic(find_large_closest_match, iterations=50, rounds=10)
|
|
66
|
+
assert result is not None
|
|
67
|
+
|
|
68
|
+
@pytest.mark.benchmark(group="closest_match")
|
|
69
|
+
def test_realistic_closest_match_workload(self, benchmark):
|
|
70
|
+
"""Benchmark closest_match with realistic BBOT closest match parent event choices"""
|
|
71
|
+
|
|
72
|
+
def find_realistic_closest_match():
|
|
73
|
+
return closest_match("subdomain123.example5.com", self.realistic_closest_match_choices)
|
|
74
|
+
|
|
75
|
+
result = benchmark.pedantic(find_realistic_closest_match, iterations=50, rounds=10)
|
|
76
|
+
assert result is not None
|
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import random
|
|
3
|
+
import string
|
|
4
|
+
from bbot.scanner import Scanner
|
|
5
|
+
from bbot.core.event.base import make_event
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestEventValidationBenchmarks:
|
|
9
|
+
def setup_method(self):
|
|
10
|
+
"""Setup minimal scanner configuration for benchmarking event validation"""
|
|
11
|
+
# Set deterministic random seed for reproducible benchmarks
|
|
12
|
+
random.seed(42)
|
|
13
|
+
|
|
14
|
+
# Create a minimal scanner with no modules to isolate event validation performance
|
|
15
|
+
self.scanner_config = {
|
|
16
|
+
"modules": None, # No modules to avoid overhead
|
|
17
|
+
"output_modules": None, # No output modules
|
|
18
|
+
"dns": {"disable": True}, # Disable DNS to avoid network calls
|
|
19
|
+
"web": {"http_timeout": 1}, # Minimal timeouts
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
def _generate_diverse_targets(self, count=1000):
|
|
23
|
+
"""Generate a diverse set of targets that will trigger different event type auto-detection"""
|
|
24
|
+
# Use deterministic random state for reproducible target generation
|
|
25
|
+
rng = random.Random(42)
|
|
26
|
+
targets = []
|
|
27
|
+
|
|
28
|
+
# DNS Names (various formats)
|
|
29
|
+
subdomains = ["www", "api", "mail", "ftp", "admin", "test", "dev", "staging", "blog"]
|
|
30
|
+
tlds = ["com", "org", "net", "io", "co.uk", "de", "fr", "jp"]
|
|
31
|
+
|
|
32
|
+
for _ in range(count // 10):
|
|
33
|
+
# Standard domains
|
|
34
|
+
targets.append(
|
|
35
|
+
f"{rng.choice(subdomains)}.{rng.choice(['example', 'test', 'evilcorp'])}.{rng.choice(tlds)}"
|
|
36
|
+
)
|
|
37
|
+
# Bare domains
|
|
38
|
+
targets.append(f"{rng.choice(['example', 'test', 'company'])}.{rng.choice(tlds)}")
|
|
39
|
+
|
|
40
|
+
# IP Addresses (IPv4 and IPv6)
|
|
41
|
+
for _ in range(count // 15):
|
|
42
|
+
# IPv4
|
|
43
|
+
targets.append(f"{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}")
|
|
44
|
+
# IPv6
|
|
45
|
+
targets.append(f"2001:db8::{rng.randint(1, 9999):x}:{rng.randint(1, 9999):x}")
|
|
46
|
+
|
|
47
|
+
# IP Ranges
|
|
48
|
+
for _ in range(count // 20):
|
|
49
|
+
targets.append(f"192.168.{rng.randint(1, 254)}.0/24")
|
|
50
|
+
targets.append(f"10.0.{rng.randint(1, 254)}.0/24")
|
|
51
|
+
|
|
52
|
+
# URLs (only supported schemes: http, https)
|
|
53
|
+
url_schemes = ["http", "https"] # Only schemes supported by BBOT auto-detection
|
|
54
|
+
url_paths = ["", "/", "/admin", "/api/v1", "/login.php", "/index.html"]
|
|
55
|
+
for _ in range(count // 8):
|
|
56
|
+
scheme = rng.choice(url_schemes)
|
|
57
|
+
domain = f"{rng.choice(subdomains)}.example.{rng.choice(tlds)}"
|
|
58
|
+
path = rng.choice(url_paths)
|
|
59
|
+
port = rng.choice(["", ":8080", ":443", ":80", ":8443"])
|
|
60
|
+
targets.append(f"{scheme}://{domain}{port}{path}")
|
|
61
|
+
|
|
62
|
+
# Open Ports
|
|
63
|
+
ports = [80, 443, 22, 21, 25, 53, 110, 143, 993, 995, 8080, 8443, 3389]
|
|
64
|
+
for _ in range(count // 12):
|
|
65
|
+
domain = f"example.{rng.choice(tlds)}"
|
|
66
|
+
port = rng.choice(ports)
|
|
67
|
+
targets.append(f"{domain}:{port}")
|
|
68
|
+
# IPv4 with port
|
|
69
|
+
ip = f"{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}"
|
|
70
|
+
targets.append(f"{ip}:{port}")
|
|
71
|
+
|
|
72
|
+
# Email Addresses
|
|
73
|
+
email_domains = ["example.com", "test.org", "company.net"]
|
|
74
|
+
email_users = ["admin", "test", "info", "contact", "support", "sales"]
|
|
75
|
+
for _ in range(count // 15):
|
|
76
|
+
user = rng.choice(email_users)
|
|
77
|
+
domain = rng.choice(email_domains)
|
|
78
|
+
targets.append(f"{user}@{domain}")
|
|
79
|
+
# Plus addressing
|
|
80
|
+
targets.append(f"{user}+{rng.randint(1, 999)}@{domain}")
|
|
81
|
+
|
|
82
|
+
# Mixed/Edge cases that should trigger auto-detection logic
|
|
83
|
+
edge_cases = [
|
|
84
|
+
# Localhost variants
|
|
85
|
+
"localhost",
|
|
86
|
+
"127.0.0.1",
|
|
87
|
+
"::1",
|
|
88
|
+
# Punycode domains
|
|
89
|
+
"xn--e1afmkfd.xn--p1ai",
|
|
90
|
+
"xn--fiqs8s.xn--0zwm56d",
|
|
91
|
+
# Long domains (shortened to avoid issues)
|
|
92
|
+
"very-long-subdomain-name-for-testing.test.com",
|
|
93
|
+
# IP with ports
|
|
94
|
+
"192.168.1.1",
|
|
95
|
+
"10.0.0.1:80",
|
|
96
|
+
# URLs with parameters
|
|
97
|
+
"https://example.com/search?q=test&limit=10",
|
|
98
|
+
"http://api.example.com:8080/v1/users?format=json",
|
|
99
|
+
# More standard domains for better compatibility
|
|
100
|
+
"api.test.com",
|
|
101
|
+
"mail.example.org",
|
|
102
|
+
"secure.company.net",
|
|
103
|
+
]
|
|
104
|
+
targets.extend(edge_cases)
|
|
105
|
+
|
|
106
|
+
# Fill remainder with random variations
|
|
107
|
+
remaining = count - len(targets)
|
|
108
|
+
if remaining > 0:
|
|
109
|
+
for _ in range(remaining):
|
|
110
|
+
choice = rng.randint(1, 4)
|
|
111
|
+
if choice == 1:
|
|
112
|
+
# Random domain
|
|
113
|
+
targets.append(f"{''.join(rng.choices(string.ascii_lowercase, k=8))}.com")
|
|
114
|
+
elif choice == 2:
|
|
115
|
+
# Random IP
|
|
116
|
+
targets.append(
|
|
117
|
+
f"{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}"
|
|
118
|
+
)
|
|
119
|
+
elif choice == 3:
|
|
120
|
+
# Random URL
|
|
121
|
+
targets.append(f"https://{''.join(rng.choices(string.ascii_lowercase, k=8))}.com/path")
|
|
122
|
+
else:
|
|
123
|
+
# Random email
|
|
124
|
+
targets.append(f"{''.join(rng.choices(string.ascii_lowercase, k=8))}@example.com")
|
|
125
|
+
|
|
126
|
+
# Ensure we have exactly the requested count by removing duplicates and filling as needed
|
|
127
|
+
unique_targets = list(set(targets))
|
|
128
|
+
|
|
129
|
+
# If we have too few unique targets, generate more
|
|
130
|
+
while len(unique_targets) < count:
|
|
131
|
+
additional_target = f"filler{len(unique_targets)}.example.com"
|
|
132
|
+
if additional_target not in unique_targets:
|
|
133
|
+
unique_targets.append(additional_target)
|
|
134
|
+
|
|
135
|
+
# Return exactly the requested number of unique targets
|
|
136
|
+
return unique_targets[:count]
|
|
137
|
+
|
|
138
|
+
def _generate_diverse_event_data(self, count=1000):
|
|
139
|
+
"""Generate diverse event data that will trigger different auto-detection paths in make_event"""
|
|
140
|
+
# Use deterministic random state for reproducible data generation
|
|
141
|
+
rng = random.Random(42)
|
|
142
|
+
event_data = []
|
|
143
|
+
|
|
144
|
+
# DNS Names (various formats)
|
|
145
|
+
subdomains = ["www", "api", "mail", "ftp", "admin", "test", "dev", "staging", "blog"]
|
|
146
|
+
tlds = ["com", "org", "net", "io", "co.uk", "de", "fr", "jp"]
|
|
147
|
+
|
|
148
|
+
for _ in range(count // 10):
|
|
149
|
+
# Standard domains
|
|
150
|
+
event_data.append(
|
|
151
|
+
f"{rng.choice(subdomains)}.{rng.choice(['example', 'test', 'evilcorp'])}.{rng.choice(tlds)}"
|
|
152
|
+
)
|
|
153
|
+
# Bare domains
|
|
154
|
+
event_data.append(f"{rng.choice(['example', 'test', 'company'])}.{rng.choice(tlds)}")
|
|
155
|
+
|
|
156
|
+
# IP Addresses (IPv4 and IPv6)
|
|
157
|
+
for _ in range(count // 15):
|
|
158
|
+
# IPv4
|
|
159
|
+
event_data.append(
|
|
160
|
+
f"{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}"
|
|
161
|
+
)
|
|
162
|
+
# IPv6
|
|
163
|
+
event_data.append(f"2001:db8::{rng.randint(1, 9999):x}:{rng.randint(1, 9999):x}")
|
|
164
|
+
|
|
165
|
+
# IP Ranges
|
|
166
|
+
for _ in range(count // 20):
|
|
167
|
+
event_data.append(f"192.168.{rng.randint(1, 254)}.0/24")
|
|
168
|
+
event_data.append(f"10.0.{rng.randint(1, 254)}.0/24")
|
|
169
|
+
|
|
170
|
+
# URLs (HTTP/HTTPS)
|
|
171
|
+
url_schemes = ["http", "https"]
|
|
172
|
+
url_paths = ["", "/", "/admin", "/api/v1", "/login.php", "/index.html"]
|
|
173
|
+
for _ in range(count // 8):
|
|
174
|
+
scheme = rng.choice(url_schemes)
|
|
175
|
+
domain = f"{rng.choice(subdomains)}.example.{rng.choice(tlds)}"
|
|
176
|
+
path = rng.choice(url_paths)
|
|
177
|
+
port = rng.choice(["", ":8080", ":443", ":80", ":8443"])
|
|
178
|
+
event_data.append(f"{scheme}://{domain}{port}{path}")
|
|
179
|
+
|
|
180
|
+
# Open Ports
|
|
181
|
+
ports = [80, 443, 22, 21, 25, 53, 110, 143, 993, 995, 8080, 8443, 3389]
|
|
182
|
+
for _ in range(count // 12):
|
|
183
|
+
domain = f"example.{rng.choice(tlds)}"
|
|
184
|
+
port = rng.choice(ports)
|
|
185
|
+
event_data.append(f"{domain}:{port}")
|
|
186
|
+
# IPv4 with port
|
|
187
|
+
ip = f"{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}"
|
|
188
|
+
event_data.append(f"{ip}:{port}")
|
|
189
|
+
|
|
190
|
+
# Email Addresses
|
|
191
|
+
email_domains = ["example.com", "test.org", "company.net"]
|
|
192
|
+
email_users = ["admin", "test", "info", "contact", "support", "sales"]
|
|
193
|
+
for _ in range(count // 15):
|
|
194
|
+
user = rng.choice(email_users)
|
|
195
|
+
domain = rng.choice(email_domains)
|
|
196
|
+
event_data.append(f"{user}@{domain}")
|
|
197
|
+
# Plus addressing
|
|
198
|
+
event_data.append(f"{user}+{rng.randint(1, 999)}@{domain}")
|
|
199
|
+
|
|
200
|
+
# Mixed/Edge cases that test auto-detection logic
|
|
201
|
+
edge_cases = [
|
|
202
|
+
# Localhost variants
|
|
203
|
+
"localhost",
|
|
204
|
+
"127.0.0.1",
|
|
205
|
+
"::1",
|
|
206
|
+
# Punycode domains
|
|
207
|
+
"xn--e1afmkfd.xn--p1ai",
|
|
208
|
+
"xn--fiqs8s.xn--0zwm56d",
|
|
209
|
+
# Long domains
|
|
210
|
+
"very-long-subdomain-name-for-testing.test.com",
|
|
211
|
+
# IP with ports
|
|
212
|
+
"192.168.1.1",
|
|
213
|
+
"10.0.0.1:80",
|
|
214
|
+
# URLs with parameters
|
|
215
|
+
"https://example.com/search?q=test&limit=10",
|
|
216
|
+
"http://api.example.com:8080/v1/users?format=json",
|
|
217
|
+
# Standard domains for better compatibility
|
|
218
|
+
"api.test.com",
|
|
219
|
+
"mail.example.org",
|
|
220
|
+
"secure.company.net",
|
|
221
|
+
]
|
|
222
|
+
event_data.extend(edge_cases)
|
|
223
|
+
|
|
224
|
+
# Fill remainder with random variations
|
|
225
|
+
remaining = count - len(event_data)
|
|
226
|
+
if remaining > 0:
|
|
227
|
+
for _ in range(remaining):
|
|
228
|
+
choice = rng.randint(1, 4)
|
|
229
|
+
if choice == 1:
|
|
230
|
+
# Random domain
|
|
231
|
+
event_data.append(f"{''.join(rng.choices(string.ascii_lowercase, k=8))}.com")
|
|
232
|
+
elif choice == 2:
|
|
233
|
+
# Random IP
|
|
234
|
+
event_data.append(
|
|
235
|
+
f"{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}"
|
|
236
|
+
)
|
|
237
|
+
elif choice == 3:
|
|
238
|
+
# Random URL
|
|
239
|
+
event_data.append(f"https://{''.join(rng.choices(string.ascii_lowercase, k=8))}.com/path")
|
|
240
|
+
else:
|
|
241
|
+
# Random email
|
|
242
|
+
event_data.append(f"{''.join(rng.choices(string.ascii_lowercase, k=8))}@example.com")
|
|
243
|
+
|
|
244
|
+
# Ensure we have exactly the requested count by removing duplicates and filling as needed
|
|
245
|
+
unique_data = list(set(event_data))
|
|
246
|
+
|
|
247
|
+
# If we have too few unique entries, generate more
|
|
248
|
+
while len(unique_data) < count:
|
|
249
|
+
additional_data = f"filler{len(unique_data)}.example.com"
|
|
250
|
+
if additional_data not in unique_data:
|
|
251
|
+
unique_data.append(additional_data)
|
|
252
|
+
|
|
253
|
+
# Return exactly the requested number of unique data items
|
|
254
|
+
return unique_data[:count]
|
|
255
|
+
|
|
256
|
+
@pytest.mark.benchmark(group="event_validation_scan_startup_small")
|
|
257
|
+
def test_event_validation_full_scan_startup_small_batch(self, benchmark):
|
|
258
|
+
"""Benchmark full scan startup event validation with small batch (100 targets) for quick iteration"""
|
|
259
|
+
targets = self._generate_diverse_targets(100)
|
|
260
|
+
|
|
261
|
+
def validate_event_batch():
|
|
262
|
+
scan = Scanner(*targets, config=self.scanner_config)
|
|
263
|
+
# Count successful event creations and types detected
|
|
264
|
+
event_counts = {}
|
|
265
|
+
total_events = 0
|
|
266
|
+
|
|
267
|
+
for event_seed in scan.target.seeds:
|
|
268
|
+
event_type = event_seed.type
|
|
269
|
+
event_counts[event_type] = event_counts.get(event_type, 0) + 1
|
|
270
|
+
total_events += 1
|
|
271
|
+
|
|
272
|
+
return {
|
|
273
|
+
"total_events_processed": total_events,
|
|
274
|
+
"unique_event_types": len(event_counts),
|
|
275
|
+
"event_type_breakdown": event_counts,
|
|
276
|
+
"targets_input": len(targets),
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
result = benchmark(validate_event_batch)
|
|
280
|
+
assert result["total_events_processed"] == result["targets_input"] # Should process ALL targets
|
|
281
|
+
assert result["unique_event_types"] >= 3 # Should detect at least DNS_NAME, IP_ADDRESS, URL
|
|
282
|
+
|
|
283
|
+
@pytest.mark.benchmark(group="event_validation_scan_startup_large")
|
|
284
|
+
def test_event_validation_full_scan_startup_large_batch(self, benchmark):
|
|
285
|
+
"""Benchmark full scan startup event validation with large batch (1000 targets) for comprehensive testing"""
|
|
286
|
+
targets = self._generate_diverse_targets(1000)
|
|
287
|
+
|
|
288
|
+
def validate_large_batch():
|
|
289
|
+
scan = Scanner(*targets, config=self.scanner_config)
|
|
290
|
+
|
|
291
|
+
# Comprehensive analysis of validation pipeline performance
|
|
292
|
+
validation_metrics = {
|
|
293
|
+
"targets_input": len(targets),
|
|
294
|
+
"events_created": 0,
|
|
295
|
+
"validation_errors": 0,
|
|
296
|
+
"auto_detection_success": 0,
|
|
297
|
+
"type_distribution": {},
|
|
298
|
+
"processing_efficiency": 0.0,
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
try:
|
|
302
|
+
for event_seed in scan.target.seeds:
|
|
303
|
+
validation_metrics["events_created"] += 1
|
|
304
|
+
event_type = event_seed.type
|
|
305
|
+
|
|
306
|
+
if event_type not in validation_metrics["type_distribution"]:
|
|
307
|
+
validation_metrics["type_distribution"][event_type] = 0
|
|
308
|
+
validation_metrics["type_distribution"][event_type] += 1
|
|
309
|
+
|
|
310
|
+
# If we got a valid event type, auto-detection succeeded
|
|
311
|
+
if event_type and event_type != "UNKNOWN":
|
|
312
|
+
validation_metrics["auto_detection_success"] += 1
|
|
313
|
+
|
|
314
|
+
except Exception:
|
|
315
|
+
validation_metrics["validation_errors"] += 1
|
|
316
|
+
|
|
317
|
+
# Calculate efficiency ratio
|
|
318
|
+
if validation_metrics["targets_input"] > 0:
|
|
319
|
+
validation_metrics["processing_efficiency"] = (
|
|
320
|
+
validation_metrics["events_created"] / validation_metrics["targets_input"]
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
return validation_metrics
|
|
324
|
+
|
|
325
|
+
result = benchmark(validate_large_batch)
|
|
326
|
+
assert result["events_created"] == result["targets_input"] # Should process ALL targets successfully
|
|
327
|
+
assert result["processing_efficiency"] == 1.0 # 100% success rate
|
|
328
|
+
assert len(result["type_distribution"]) >= 5 # Should detect multiple event types
|
|
329
|
+
|
|
330
|
+
@pytest.mark.benchmark(group="make_event_small")
|
|
331
|
+
def test_make_event_autodetection_small(self, benchmark):
|
|
332
|
+
"""Benchmark make_event with auto-detection for small batch (100 items)"""
|
|
333
|
+
event_data = self._generate_diverse_event_data(100)
|
|
334
|
+
|
|
335
|
+
def create_events_with_autodetection():
|
|
336
|
+
events_created = []
|
|
337
|
+
type_distribution = {}
|
|
338
|
+
validation_errors = 0
|
|
339
|
+
|
|
340
|
+
for data in event_data:
|
|
341
|
+
try:
|
|
342
|
+
# Test auto-detection by not providing event_type
|
|
343
|
+
event = make_event(data, dummy=True)
|
|
344
|
+
events_created.append(event)
|
|
345
|
+
|
|
346
|
+
event_type = event.type
|
|
347
|
+
type_distribution[event_type] = type_distribution.get(event_type, 0) + 1
|
|
348
|
+
|
|
349
|
+
except Exception:
|
|
350
|
+
validation_errors += 1
|
|
351
|
+
|
|
352
|
+
return {
|
|
353
|
+
"events_created": len(events_created),
|
|
354
|
+
"type_distribution": type_distribution,
|
|
355
|
+
"validation_errors": validation_errors,
|
|
356
|
+
"autodetection_success_rate": len(events_created) / len(event_data) if event_data else 0,
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
result = benchmark.pedantic(create_events_with_autodetection, iterations=50, rounds=10)
|
|
360
|
+
assert result["events_created"] == len(event_data) # Should create events for all data
|
|
361
|
+
assert result["validation_errors"] == 0 # Should have no validation errors
|
|
362
|
+
assert len(result["type_distribution"]) >= 3 # Should detect multiple event types
|
|
363
|
+
assert result["autodetection_success_rate"] == 1.0 # 100% success rate
|
|
364
|
+
|
|
365
|
+
@pytest.mark.benchmark(group="make_event_large")
|
|
366
|
+
def test_make_event_autodetection_large(self, benchmark):
|
|
367
|
+
"""Benchmark make_event with auto-detection for large batch (1000 items)"""
|
|
368
|
+
event_data = self._generate_diverse_event_data(1000)
|
|
369
|
+
|
|
370
|
+
def create_large_event_batch():
|
|
371
|
+
performance_metrics = {
|
|
372
|
+
"total_processed": len(event_data),
|
|
373
|
+
"events_created": 0,
|
|
374
|
+
"autodetection_failures": 0,
|
|
375
|
+
"type_distribution": {},
|
|
376
|
+
"processing_efficiency": 0.0,
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
for data in event_data:
|
|
380
|
+
try:
|
|
381
|
+
# Use dummy=True for performance (no scan/parent validation)
|
|
382
|
+
event = make_event(data, dummy=True)
|
|
383
|
+
performance_metrics["events_created"] += 1
|
|
384
|
+
|
|
385
|
+
event_type = event.type
|
|
386
|
+
if event_type not in performance_metrics["type_distribution"]:
|
|
387
|
+
performance_metrics["type_distribution"][event_type] = 0
|
|
388
|
+
performance_metrics["type_distribution"][event_type] += 1
|
|
389
|
+
|
|
390
|
+
except Exception:
|
|
391
|
+
performance_metrics["autodetection_failures"] += 1
|
|
392
|
+
|
|
393
|
+
# Calculate efficiency ratio
|
|
394
|
+
performance_metrics["processing_efficiency"] = (
|
|
395
|
+
performance_metrics["events_created"] / performance_metrics["total_processed"]
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
return performance_metrics
|
|
399
|
+
|
|
400
|
+
result = benchmark.pedantic(create_large_event_batch, iterations=50, rounds=10)
|
|
401
|
+
assert result["events_created"] == result["total_processed"] # Should process all successfully
|
|
402
|
+
assert result["autodetection_failures"] == 0 # Should have no failures
|
|
403
|
+
assert result["processing_efficiency"] == 1.0 # 100% efficiency
|
|
404
|
+
assert len(result["type_distribution"]) >= 5 # Should detect multiple event types
|
|
405
|
+
|
|
406
|
+
@pytest.mark.benchmark(group="make_event_explicit_types")
|
|
407
|
+
def test_make_event_explicit_types(self, benchmark):
|
|
408
|
+
"""Benchmark make_event when event types are explicitly provided (no auto-detection)"""
|
|
409
|
+
# Create data with explicit type mappings to bypass auto-detection
|
|
410
|
+
test_cases = [
|
|
411
|
+
("example.com", "DNS_NAME"),
|
|
412
|
+
("192.168.1.1", "IP_ADDRESS"),
|
|
413
|
+
("https://example.com", "URL"),
|
|
414
|
+
("admin@example.com", "EMAIL_ADDRESS"),
|
|
415
|
+
("example.com:80", "OPEN_TCP_PORT"),
|
|
416
|
+
] * 20 # 100 total cases
|
|
417
|
+
|
|
418
|
+
def create_events_explicit_types():
|
|
419
|
+
events_created = []
|
|
420
|
+
type_distribution = {}
|
|
421
|
+
|
|
422
|
+
for data, event_type in test_cases:
|
|
423
|
+
# Explicitly provide event_type to skip auto-detection
|
|
424
|
+
event = make_event(data, event_type=event_type, dummy=True)
|
|
425
|
+
events_created.append(event)
|
|
426
|
+
|
|
427
|
+
type_distribution[event_type] = type_distribution.get(event_type, 0) + 1
|
|
428
|
+
|
|
429
|
+
return {
|
|
430
|
+
"events_created": len(events_created),
|
|
431
|
+
"type_distribution": type_distribution,
|
|
432
|
+
"bypass_autodetection": True,
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
result = benchmark.pedantic(create_events_explicit_types, iterations=50, rounds=10)
|
|
436
|
+
assert result["events_created"] == len(test_cases) # Should create all events
|
|
437
|
+
assert result["bypass_autodetection"] # Confirms we bypassed auto-detection
|
|
438
|
+
assert len(result["type_distribution"]) == 5 # Should have exactly 5 types
|