bbot 2.6.0.6879rc0__py3-none-any.whl → 2.7.2.7254rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bbot might be problematic. Click here for more details.
- bbot/__init__.py +1 -1
- bbot/core/engine.py +1 -1
- bbot/core/flags.py +1 -0
- bbot/core/helpers/bloom.py +6 -7
- bbot/core/helpers/dns/dns.py +0 -1
- bbot/core/helpers/dns/engine.py +0 -2
- bbot/core/helpers/files.py +2 -2
- bbot/core/helpers/git.py +17 -0
- bbot/core/helpers/misc.py +1 -0
- bbot/core/helpers/ntlm.py +0 -2
- bbot/core/helpers/regex.py +1 -1
- bbot/core/modules.py +0 -54
- bbot/defaults.yml +4 -2
- bbot/modules/apkpure.py +1 -1
- bbot/modules/base.py +11 -5
- bbot/modules/dnsbimi.py +1 -4
- bbot/modules/dnsdumpster.py +35 -52
- bbot/modules/dnstlsrpt.py +0 -6
- bbot/modules/docker_pull.py +1 -1
- bbot/modules/emailformat.py +17 -1
- bbot/modules/filedownload.py +1 -1
- bbot/modules/git_clone.py +47 -22
- bbot/modules/gitdumper.py +4 -14
- bbot/modules/github_workflows.py +1 -1
- bbot/modules/gitlab_com.py +31 -0
- bbot/modules/gitlab_onprem.py +84 -0
- bbot/modules/gowitness.py +0 -6
- bbot/modules/graphql_introspection.py +5 -2
- bbot/modules/httpx.py +2 -0
- bbot/modules/iis_shortnames.py +0 -7
- bbot/modules/internal/unarchive.py +9 -3
- bbot/modules/lightfuzz/lightfuzz.py +5 -1
- bbot/modules/nuclei.py +1 -1
- bbot/modules/output/base.py +0 -5
- bbot/modules/postman_download.py +1 -1
- bbot/modules/retirejs.py +232 -0
- bbot/modules/securitytxt.py +0 -3
- bbot/modules/subdomaincenter.py +1 -16
- bbot/modules/telerik.py +6 -1
- bbot/modules/templates/gitlab.py +98 -0
- bbot/modules/trufflehog.py +1 -1
- bbot/scanner/manager.py +7 -4
- bbot/scanner/scanner.py +1 -1
- bbot/scripts/benchmark_report.py +433 -0
- bbot/test/benchmarks/__init__.py +2 -0
- bbot/test/benchmarks/test_bloom_filter_benchmarks.py +105 -0
- bbot/test/benchmarks/test_closest_match_benchmarks.py +76 -0
- bbot/test/benchmarks/test_event_validation_benchmarks.py +438 -0
- bbot/test/benchmarks/test_excavate_benchmarks.py +291 -0
- bbot/test/benchmarks/test_ipaddress_benchmarks.py +143 -0
- bbot/test/benchmarks/test_weighted_shuffle_benchmarks.py +70 -0
- bbot/test/test_step_1/test_bbot_fastapi.py +2 -2
- bbot/test/test_step_1/test_events.py +0 -1
- bbot/test/test_step_1/test_scan.py +1 -8
- bbot/test/test_step_2/module_tests/base.py +6 -1
- bbot/test/test_step_2/module_tests/test_module_dnsbimi.py +2 -1
- bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py +3 -5
- bbot/test/test_step_2/module_tests/test_module_emailformat.py +1 -1
- bbot/test/test_step_2/module_tests/test_module_emails.py +2 -2
- bbot/test/test_step_2/module_tests/test_module_excavate.py +35 -6
- bbot/test/test_step_2/module_tests/test_module_gitlab_com.py +66 -0
- bbot/test/test_step_2/module_tests/{test_module_gitlab.py → test_module_gitlab_onprem.py} +4 -69
- bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +2 -2
- bbot/test/test_step_2/module_tests/test_module_retirejs.py +159 -0
- bbot/test/test_step_2/module_tests/test_module_telerik.py +1 -1
- {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/METADATA +7 -4
- {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/RECORD +70 -60
- {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/WHEEL +1 -1
- bbot/modules/censys.py +0 -98
- bbot/modules/gitlab.py +0 -141
- bbot/modules/zoomeye.py +0 -77
- bbot/test/test_step_2/module_tests/test_module_censys.py +0 -83
- bbot/test/test_step_2/module_tests/test_module_zoomeye.py +0 -35
- {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/entry_points.txt +0 -0
- {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import asyncio
|
|
3
|
+
from bbot.scanner import Scanner
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestExcavateDirectBenchmarks:
|
|
7
|
+
"""
|
|
8
|
+
Direct benchmark tests for Excavate module operations.
|
|
9
|
+
|
|
10
|
+
These tests measure the performance of excavate's core YARA processing
|
|
11
|
+
by calling the excavate.search() method directly with specific text sizes
|
|
12
|
+
in both single-threaded and parallel asyncio tasks to test the GIL sidestep feature of YARA.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
# Number of text segments per test
|
|
16
|
+
TEXT_SEGMENTS_COUNT = 100
|
|
17
|
+
|
|
18
|
+
# Prescribed sizes for deterministic benchmarking (in bytes)
|
|
19
|
+
SMALL_SIZE = 4096 # 4KB
|
|
20
|
+
LARGE_SIZE = 5242880 # 5MB
|
|
21
|
+
|
|
22
|
+
def _generate_text_segments(self, target_size, count):
|
|
23
|
+
"""Generate a list of text segments of the specified size"""
|
|
24
|
+
segments = []
|
|
25
|
+
|
|
26
|
+
for i in range(count):
|
|
27
|
+
# Generate realistic content that excavate can work with
|
|
28
|
+
base_content = self._generate_realistic_content(i)
|
|
29
|
+
|
|
30
|
+
# Pad to the exact target size with deterministic content
|
|
31
|
+
remaining_size = target_size - len(base_content)
|
|
32
|
+
if remaining_size > 0:
|
|
33
|
+
# Use deterministic padding pattern
|
|
34
|
+
padding_pattern = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. "
|
|
35
|
+
padding_repeats = (remaining_size // len(padding_pattern)) + 1
|
|
36
|
+
padding = (padding_pattern * padding_repeats)[:remaining_size]
|
|
37
|
+
content = base_content + padding
|
|
38
|
+
else:
|
|
39
|
+
content = base_content[:target_size]
|
|
40
|
+
|
|
41
|
+
segments.append(content)
|
|
42
|
+
|
|
43
|
+
return segments
|
|
44
|
+
|
|
45
|
+
def _generate_realistic_content(self, index):
|
|
46
|
+
"""Generate realistic content that excavate can extract from"""
|
|
47
|
+
return f"""
|
|
48
|
+
<html>
|
|
49
|
+
<head>
|
|
50
|
+
<title>Test Content {index}</title>
|
|
51
|
+
<script src="https://api{index}.example.com/js/app.js"></script>
|
|
52
|
+
</head>
|
|
53
|
+
<body>
|
|
54
|
+
<h1>Page {index}</h1>
|
|
55
|
+
|
|
56
|
+
<!-- URLs and subdomains -->
|
|
57
|
+
<a href="https://www{index}.example.com/page{index}">Link {index}</a>
|
|
58
|
+
<a href="https://cdn{index}.example.com/assets/">CDN {index}</a>
|
|
59
|
+
<img src="https://img{index}.example.com/photo{index}.jpg" />
|
|
60
|
+
|
|
61
|
+
<!-- Forms with parameters -->
|
|
62
|
+
<form action="/search{index}" method="GET">
|
|
63
|
+
<input type="text" name="query{index}" value="test{index}">
|
|
64
|
+
<input type="hidden" name="token{index}" value="abc123{index}">
|
|
65
|
+
<button type="submit">Search</button>
|
|
66
|
+
</form>
|
|
67
|
+
|
|
68
|
+
<!-- API endpoints -->
|
|
69
|
+
<script>
|
|
70
|
+
fetch('https://api{index}.example.com/v1/users/{index}')
|
|
71
|
+
.then(response => response.json())
|
|
72
|
+
.then(data => console.log(data));
|
|
73
|
+
|
|
74
|
+
// WebSocket connection
|
|
75
|
+
const ws = new WebSocket('wss://realtime{index}.example.com/socket');
|
|
76
|
+
</script>
|
|
77
|
+
|
|
78
|
+
<!-- Various protocols -->
|
|
79
|
+
<p>FTP: ftp://ftp{index}.example.com:21/files/</p>
|
|
80
|
+
<p>SSH: ssh://server{index}.example.com:22/</p>
|
|
81
|
+
<p>Email: contact{index}@example.com</p>
|
|
82
|
+
|
|
83
|
+
<!-- JSON data -->
|
|
84
|
+
<script type="application/json">
|
|
85
|
+
{{
|
|
86
|
+
"apiEndpoint{index}": "https://api{index}.example.com/data",
|
|
87
|
+
"parameter{index}": "value{index}",
|
|
88
|
+
"secretKey{index}": "sk_test_{index}_abcdef123456"
|
|
89
|
+
}}
|
|
90
|
+
</script>
|
|
91
|
+
|
|
92
|
+
<!-- Comments with URLs -->
|
|
93
|
+
<!-- https://hidden{index}.example.com/admin -->
|
|
94
|
+
<!-- TODO: Check https://internal{index}.example.com/debug -->
|
|
95
|
+
</body>
|
|
96
|
+
</html>
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
async def _run_excavate_single_thread(self, text_segments):
|
|
100
|
+
"""Run excavate processing in single thread"""
|
|
101
|
+
# Create scanner and initialize excavate
|
|
102
|
+
scan = Scanner("example.com", modules=["httpx"], config={"excavate": True})
|
|
103
|
+
await scan._prep()
|
|
104
|
+
excavate_module = scan.modules.get("excavate")
|
|
105
|
+
|
|
106
|
+
if not excavate_module:
|
|
107
|
+
raise RuntimeError("Excavate module not found")
|
|
108
|
+
|
|
109
|
+
# Track events emitted by excavate
|
|
110
|
+
emitted_events = []
|
|
111
|
+
|
|
112
|
+
async def track_emit_event(event_data, *args, **kwargs):
|
|
113
|
+
emitted_events.append(event_data)
|
|
114
|
+
|
|
115
|
+
excavate_module.emit_event = track_emit_event
|
|
116
|
+
|
|
117
|
+
# Process all text segments sequentially
|
|
118
|
+
results = []
|
|
119
|
+
for i, text_segment in enumerate(text_segments):
|
|
120
|
+
# Create a mock HTTP_RESPONSE event
|
|
121
|
+
mock_event = scan.make_event(
|
|
122
|
+
{
|
|
123
|
+
"url": f"https://example.com/test/{i}",
|
|
124
|
+
"method": "GET",
|
|
125
|
+
"body": text_segment,
|
|
126
|
+
"header-dict": {"Content-Type": ["text/html"]},
|
|
127
|
+
"raw_header": "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r\n",
|
|
128
|
+
"status_code": 200,
|
|
129
|
+
},
|
|
130
|
+
"HTTP_RESPONSE",
|
|
131
|
+
parent=scan.root_event,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Process with excavate
|
|
135
|
+
await excavate_module.search(text_segment, mock_event, "text/html", f"Single thread benchmark {i}")
|
|
136
|
+
results.append(f"processed_{i}")
|
|
137
|
+
|
|
138
|
+
return results, emitted_events
|
|
139
|
+
|
|
140
|
+
async def _run_excavate_parallel_tasks(self, text_segments):
|
|
141
|
+
"""Run excavate processing with parallel asyncio tasks"""
|
|
142
|
+
# Create scanner and initialize excavate
|
|
143
|
+
scan = Scanner("example.com", modules=["httpx"], config={"excavate": True})
|
|
144
|
+
await scan._prep()
|
|
145
|
+
excavate_module = scan.modules.get("excavate")
|
|
146
|
+
|
|
147
|
+
if not excavate_module:
|
|
148
|
+
raise RuntimeError("Excavate module not found")
|
|
149
|
+
|
|
150
|
+
# Define async task to process a single text segment
|
|
151
|
+
async def process_segment(segment_index, text_segment):
|
|
152
|
+
mock_event = scan.make_event(
|
|
153
|
+
{
|
|
154
|
+
"url": f"https://example.com/parallel/{segment_index}",
|
|
155
|
+
"method": "GET",
|
|
156
|
+
"body": text_segment,
|
|
157
|
+
"header-dict": {"Content-Type": ["text/html"]},
|
|
158
|
+
"raw_header": "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r\n",
|
|
159
|
+
"status_code": 200,
|
|
160
|
+
},
|
|
161
|
+
"HTTP_RESPONSE",
|
|
162
|
+
parent=scan.root_event,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
await excavate_module.search(
|
|
166
|
+
text_segment, mock_event, "text/html", f"Parallel benchmark task {segment_index}"
|
|
167
|
+
)
|
|
168
|
+
return f"processed_{segment_index}"
|
|
169
|
+
|
|
170
|
+
# Create all tasks and run them concurrently
|
|
171
|
+
tasks = [process_segment(i, text_segment) for i, text_segment in enumerate(text_segments)]
|
|
172
|
+
|
|
173
|
+
# Run all tasks in parallel
|
|
174
|
+
results = await asyncio.gather(*tasks)
|
|
175
|
+
return results
|
|
176
|
+
|
|
177
|
+
# Single Thread Tests
|
|
178
|
+
@pytest.mark.benchmark(group="excavate_single_small")
|
|
179
|
+
def test_excavate_single_thread_small(self, benchmark):
|
|
180
|
+
"""Benchmark excavate single thread processing with small (4KB) segments"""
|
|
181
|
+
text_segments = self._generate_text_segments(self.SMALL_SIZE, self.TEXT_SEGMENTS_COUNT)
|
|
182
|
+
|
|
183
|
+
def run_test():
|
|
184
|
+
return asyncio.run(self._run_excavate_single_thread(text_segments))
|
|
185
|
+
|
|
186
|
+
result, events = benchmark(run_test)
|
|
187
|
+
|
|
188
|
+
assert len(result) == self.TEXT_SEGMENTS_COUNT
|
|
189
|
+
total_size_mb = (self.SMALL_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
|
|
190
|
+
|
|
191
|
+
# Count events by type
|
|
192
|
+
total_events = len(events)
|
|
193
|
+
url_events = len([e for e in events if e.type == "URL_UNVERIFIED"])
|
|
194
|
+
dns_events = len([e for e in events if e.type == "DNS_NAME"])
|
|
195
|
+
email_events = len([e for e in events if e.type == "EMAIL_ADDRESS"])
|
|
196
|
+
protocol_events = len([e for e in events if e.type == "PROTOCOL"])
|
|
197
|
+
finding_events = len([e for e in events if e.type == "FINDING"])
|
|
198
|
+
|
|
199
|
+
print("\n✅ Single-thread small segments benchmark completed")
|
|
200
|
+
print(f"📊 Processed {len(result):,} segments of {self.SMALL_SIZE / 1024:.0f}KB each")
|
|
201
|
+
print(f"📊 Total size processed: {total_size_mb:.1f} MB")
|
|
202
|
+
print(f"📊 Total events: {total_events}")
|
|
203
|
+
print(f"📊 URL events: {url_events}")
|
|
204
|
+
print(f"📊 DNS events: {dns_events}")
|
|
205
|
+
print(f"📊 Email events: {email_events}")
|
|
206
|
+
print(f"📊 Protocol events: {protocol_events}")
|
|
207
|
+
print(f"📊 Finding events: {finding_events}")
|
|
208
|
+
|
|
209
|
+
# Validate that excavate actually found and processed content
|
|
210
|
+
assert total_events > 0, "Expected to find some events from excavate"
|
|
211
|
+
assert url_events > 0 or dns_events > 0 or protocol_events > 0, (
|
|
212
|
+
"Expected excavate to find URLs, DNS names, or protocols"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
@pytest.mark.benchmark(group="excavate_single_large")
|
|
216
|
+
def test_excavate_single_thread_large(self, benchmark):
|
|
217
|
+
"""Benchmark excavate single thread processing with large (10MB) segments"""
|
|
218
|
+
text_segments = self._generate_text_segments(self.LARGE_SIZE, self.TEXT_SEGMENTS_COUNT)
|
|
219
|
+
|
|
220
|
+
def run_test():
|
|
221
|
+
return asyncio.run(self._run_excavate_single_thread(text_segments))
|
|
222
|
+
|
|
223
|
+
result, events = benchmark(run_test)
|
|
224
|
+
|
|
225
|
+
assert len(result) == self.TEXT_SEGMENTS_COUNT
|
|
226
|
+
total_size_mb = (self.LARGE_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
|
|
227
|
+
|
|
228
|
+
# Count events by type
|
|
229
|
+
total_events = len(events)
|
|
230
|
+
url_events = len([e for e in events if e.type == "URL_UNVERIFIED"])
|
|
231
|
+
dns_events = len([e for e in events if e.type == "DNS_NAME"])
|
|
232
|
+
email_events = len([e for e in events if e.type == "EMAIL_ADDRESS"])
|
|
233
|
+
protocol_events = len([e for e in events if e.type == "PROTOCOL"])
|
|
234
|
+
finding_events = len([e for e in events if e.type == "FINDING"])
|
|
235
|
+
|
|
236
|
+
print("\n✅ Single-thread large segments benchmark completed")
|
|
237
|
+
print(f"📊 Processed {len(result):,} segments of {self.LARGE_SIZE / (1024 * 1024):.0f}MB each")
|
|
238
|
+
print(f"📊 Total size processed: {total_size_mb:.1f} MB")
|
|
239
|
+
print(f"📊 Total events: {total_events}")
|
|
240
|
+
print(f"📊 URL events: {url_events}")
|
|
241
|
+
print(f"📊 DNS events: {dns_events}")
|
|
242
|
+
print(f"📊 Email events: {email_events}")
|
|
243
|
+
print(f"📊 Protocol events: {protocol_events}")
|
|
244
|
+
print(f"📊 Finding events: {finding_events}")
|
|
245
|
+
|
|
246
|
+
# Validate that excavate actually found and processed content
|
|
247
|
+
assert total_events > 0, "Expected to find some events from excavate"
|
|
248
|
+
assert url_events > 0 or dns_events > 0 or protocol_events > 0, (
|
|
249
|
+
"Expected excavate to find URLs, DNS names, or protocols"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
# Parallel Tests
|
|
253
|
+
@pytest.mark.benchmark(group="excavate_parallel_small")
|
|
254
|
+
def test_excavate_parallel_tasks_small(self, benchmark):
|
|
255
|
+
"""Benchmark excavate parallel processing with small (4KB) segments"""
|
|
256
|
+
text_segments = self._generate_text_segments(self.SMALL_SIZE, self.TEXT_SEGMENTS_COUNT)
|
|
257
|
+
|
|
258
|
+
def run_test():
|
|
259
|
+
return asyncio.run(self._run_excavate_parallel_tasks(text_segments))
|
|
260
|
+
|
|
261
|
+
result = benchmark(run_test)
|
|
262
|
+
|
|
263
|
+
assert len(result) == self.TEXT_SEGMENTS_COUNT
|
|
264
|
+
total_size_mb = (self.SMALL_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
|
|
265
|
+
print("\n✅ Parallel small segments benchmark completed")
|
|
266
|
+
print(f"📊 Processed {len(result):,} segments of {self.SMALL_SIZE / 1024:.0f}KB each in parallel")
|
|
267
|
+
print(f"📊 Total size processed: {total_size_mb:.1f} MB")
|
|
268
|
+
print("📊 Tasks executed concurrently to test YARA GIL sidestep")
|
|
269
|
+
|
|
270
|
+
# Basic assertion that excavate is actually working (should find URLs in our test content)
|
|
271
|
+
assert len(result) > 0, "Expected excavate to process all segments"
|
|
272
|
+
|
|
273
|
+
@pytest.mark.benchmark(group="excavate_parallel_large")
|
|
274
|
+
def test_excavate_parallel_tasks_large(self, benchmark):
|
|
275
|
+
"""Benchmark excavate parallel processing with large (10MB) segments to test YARA GIL sidestep"""
|
|
276
|
+
text_segments = self._generate_text_segments(self.LARGE_SIZE, self.TEXT_SEGMENTS_COUNT)
|
|
277
|
+
|
|
278
|
+
def run_test():
|
|
279
|
+
return asyncio.run(self._run_excavate_parallel_tasks(text_segments))
|
|
280
|
+
|
|
281
|
+
result = benchmark(run_test)
|
|
282
|
+
|
|
283
|
+
assert len(result) == self.TEXT_SEGMENTS_COUNT
|
|
284
|
+
total_size_mb = (self.LARGE_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
|
|
285
|
+
print("\n✅ Parallel large segments benchmark completed")
|
|
286
|
+
print(f"📊 Processed {len(result):,} segments of {self.LARGE_SIZE / (1024 * 1024):.0f}MB each in parallel")
|
|
287
|
+
print(f"📊 Total size processed: {total_size_mb:.1f} MB")
|
|
288
|
+
print("📊 Tasks executed concurrently to test YARA GIL sidestep")
|
|
289
|
+
|
|
290
|
+
# Basic assertion that excavate is actually working (should find URLs in our test content)
|
|
291
|
+
assert len(result) > 0, "Expected excavate to process all segments"
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import random
|
|
3
|
+
import string
|
|
4
|
+
from bbot.core.helpers.misc import make_ip_type, is_ip
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TestIPAddressBenchmarks:
|
|
8
|
+
"""
|
|
9
|
+
Benchmark tests for IP address processing operations.
|
|
10
|
+
|
|
11
|
+
These tests measure the performance of BBOT-level IP functions which are
|
|
12
|
+
critical for network scanning efficiency and could benefit from different
|
|
13
|
+
underlying implementations.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def setup_method(self):
|
|
17
|
+
"""Setup common test data"""
|
|
18
|
+
# Set deterministic seed for consistent benchmark results
|
|
19
|
+
random.seed(42) # Fixed seed for reproducible results
|
|
20
|
+
|
|
21
|
+
# Generate test data of different types and sizes
|
|
22
|
+
self.valid_ips = self._generate_valid_ips()
|
|
23
|
+
self.invalid_ips = self._generate_invalid_ips()
|
|
24
|
+
self.mixed_data = self._generate_mixed_data()
|
|
25
|
+
|
|
26
|
+
def _generate_valid_ips(self):
|
|
27
|
+
"""Generate valid IP addresses for testing"""
|
|
28
|
+
valid_ips = []
|
|
29
|
+
|
|
30
|
+
# IPv4 addresses
|
|
31
|
+
for i in range(1000):
|
|
32
|
+
valid_ips.append(
|
|
33
|
+
f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# IPv6 addresses
|
|
37
|
+
for i in range(500):
|
|
38
|
+
ipv6_parts = []
|
|
39
|
+
for j in range(8):
|
|
40
|
+
ipv6_parts.append(f"{random.randint(0, 65535):x}")
|
|
41
|
+
valid_ips.append(":".join(ipv6_parts))
|
|
42
|
+
|
|
43
|
+
# Network addresses
|
|
44
|
+
for i in range(500):
|
|
45
|
+
base_ip = f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.0"
|
|
46
|
+
valid_ips.append(f"{base_ip}/{random.randint(8, 30)}")
|
|
47
|
+
|
|
48
|
+
# IP ranges
|
|
49
|
+
for i in range(200):
|
|
50
|
+
start_ip = (
|
|
51
|
+
f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 200)}"
|
|
52
|
+
)
|
|
53
|
+
end_ip = f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(201, 254)}"
|
|
54
|
+
valid_ips.append(f"{start_ip}-{end_ip}")
|
|
55
|
+
|
|
56
|
+
return valid_ips
|
|
57
|
+
|
|
58
|
+
def _generate_invalid_ips(self):
|
|
59
|
+
"""Generate invalid IP addresses for testing"""
|
|
60
|
+
invalid_ips = []
|
|
61
|
+
|
|
62
|
+
# Malformed IPv4
|
|
63
|
+
for i in range(500):
|
|
64
|
+
invalid_ips.append(
|
|
65
|
+
f"{random.randint(256, 999)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
|
|
66
|
+
)
|
|
67
|
+
invalid_ips.append(f"{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}")
|
|
68
|
+
invalid_ips.append(
|
|
69
|
+
f"{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Malformed IPv6
|
|
73
|
+
for i in range(300):
|
|
74
|
+
ipv6_parts = []
|
|
75
|
+
for j in range(random.randint(5, 10)): # Wrong number of parts
|
|
76
|
+
ipv6_parts.append(f"{random.randint(0, 65535):x}")
|
|
77
|
+
invalid_ips.append(":".join(ipv6_parts))
|
|
78
|
+
|
|
79
|
+
# Random strings
|
|
80
|
+
for i in range(200):
|
|
81
|
+
length = random.randint(5, 20)
|
|
82
|
+
invalid_ips.append("".join(random.choices(string.ascii_letters + string.digits, k=length)))
|
|
83
|
+
|
|
84
|
+
return invalid_ips
|
|
85
|
+
|
|
86
|
+
def _generate_mixed_data(self):
|
|
87
|
+
"""Generate mixed valid/invalid data for realistic testing"""
|
|
88
|
+
mixed = []
|
|
89
|
+
mixed.extend(self.valid_ips[:500]) # First 500 valid
|
|
90
|
+
mixed.extend(self.invalid_ips[:500]) # First 500 invalid
|
|
91
|
+
# Use deterministic shuffle with fixed seed for consistent results
|
|
92
|
+
random.seed(42) # Reset seed before shuffle
|
|
93
|
+
random.shuffle(mixed) # Shuffle for realistic distribution
|
|
94
|
+
return mixed
|
|
95
|
+
|
|
96
|
+
@pytest.mark.benchmark(group="ip_validation")
|
|
97
|
+
def test_is_ip_performance(self, benchmark):
|
|
98
|
+
"""Benchmark IP validation performance with mixed data"""
|
|
99
|
+
|
|
100
|
+
def validate_ips():
|
|
101
|
+
valid_count = 0
|
|
102
|
+
for ip in self.mixed_data:
|
|
103
|
+
if is_ip(ip):
|
|
104
|
+
valid_count += 1
|
|
105
|
+
return valid_count
|
|
106
|
+
|
|
107
|
+
result = benchmark(validate_ips)
|
|
108
|
+
assert result > 0
|
|
109
|
+
|
|
110
|
+
@pytest.mark.benchmark(group="ip_type_detection")
|
|
111
|
+
def test_make_ip_type_performance(self, benchmark):
|
|
112
|
+
"""Benchmark IP type detection performance"""
|
|
113
|
+
|
|
114
|
+
def detect_ip_types():
|
|
115
|
+
type_count = 0
|
|
116
|
+
for ip in self.valid_ips:
|
|
117
|
+
try:
|
|
118
|
+
make_ip_type(ip)
|
|
119
|
+
type_count += 1
|
|
120
|
+
except Exception:
|
|
121
|
+
pass
|
|
122
|
+
return type_count
|
|
123
|
+
|
|
124
|
+
result = benchmark(detect_ip_types)
|
|
125
|
+
assert result > 0
|
|
126
|
+
|
|
127
|
+
@pytest.mark.benchmark(group="ip_processing")
|
|
128
|
+
def test_mixed_ip_operations(self, benchmark):
|
|
129
|
+
"""Benchmark combined IP validation + type detection"""
|
|
130
|
+
|
|
131
|
+
def process_ips():
|
|
132
|
+
processed = 0
|
|
133
|
+
for ip in self.mixed_data:
|
|
134
|
+
if is_ip(ip):
|
|
135
|
+
try:
|
|
136
|
+
make_ip_type(ip)
|
|
137
|
+
processed += 1
|
|
138
|
+
except Exception:
|
|
139
|
+
pass
|
|
140
|
+
return processed
|
|
141
|
+
|
|
142
|
+
result = benchmark(process_ips)
|
|
143
|
+
assert result > 0
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import random
|
|
3
|
+
from bbot.core.helpers.misc import weighted_shuffle
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestWeightedShuffleBenchmarks:
|
|
7
|
+
"""
|
|
8
|
+
Benchmark tests for weighted_shuffle operations.
|
|
9
|
+
|
|
10
|
+
This function is critical for BBOT's queue management, where it shuffles
|
|
11
|
+
incoming queues based on module priority weights. Performance here directly
|
|
12
|
+
impacts scan throughput and responsiveness.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def setup_method(self):
|
|
16
|
+
"""Setup common test data"""
|
|
17
|
+
# Set deterministic seed for consistent benchmark results
|
|
18
|
+
random.seed(42) # Fixed seed for reproducible results
|
|
19
|
+
|
|
20
|
+
# Generate test data of different sizes and complexity
|
|
21
|
+
self.small_data = self._generate_small_dataset()
|
|
22
|
+
self.medium_data = self._generate_medium_dataset()
|
|
23
|
+
self.large_data = self._generate_large_dataset()
|
|
24
|
+
self.priority_weights = self._generate_priority_weights()
|
|
25
|
+
|
|
26
|
+
def _generate_small_dataset(self):
|
|
27
|
+
"""Generate small dataset (like few modules)"""
|
|
28
|
+
return {"items": ["module_a", "module_b", "module_c"], "weights": [0.6, 0.3, 0.1]}
|
|
29
|
+
|
|
30
|
+
def _generate_medium_dataset(self):
|
|
31
|
+
"""Generate medium dataset (like typical scan)"""
|
|
32
|
+
items = [f"module_{i}" for i in range(20)]
|
|
33
|
+
weights = [random.uniform(0.1, 1.0) for _ in range(20)]
|
|
34
|
+
return {"items": items, "weights": weights}
|
|
35
|
+
|
|
36
|
+
def _generate_large_dataset(self):
|
|
37
|
+
"""Generate large dataset (like complex scan with many modules)"""
|
|
38
|
+
items = [f"module_{i}" for i in range(100)]
|
|
39
|
+
weights = [random.uniform(0.1, 1.0) for _ in range(100)]
|
|
40
|
+
return {"items": items, "weights": weights}
|
|
41
|
+
|
|
42
|
+
def _generate_priority_weights(self):
|
|
43
|
+
"""Generate realistic priority weights (like BBOT module priorities)"""
|
|
44
|
+
# BBOT uses priorities 1-5, where lower priority = higher weight
|
|
45
|
+
# Weights are calculated as [5] + [6 - m.priority for m in modules]
|
|
46
|
+
priorities = [5] + [6 - p for p in [1, 2, 3, 4, 5]] * 20 # 5 + 5*20 = 105 items
|
|
47
|
+
items = [f"queue_{i}" for i in range(len(priorities))]
|
|
48
|
+
return {"items": items, "weights": priorities}
|
|
49
|
+
|
|
50
|
+
@pytest.mark.benchmark(group="weighted_shuffle")
|
|
51
|
+
def test_typical_queue_shuffle(self, benchmark):
|
|
52
|
+
"""Benchmark weighted shuffle with typical BBOT scan workload"""
|
|
53
|
+
|
|
54
|
+
def shuffle_typical():
|
|
55
|
+
return weighted_shuffle(self.medium_data["items"], self.medium_data["weights"])
|
|
56
|
+
|
|
57
|
+
result = benchmark(shuffle_typical)
|
|
58
|
+
assert len(result) == 20
|
|
59
|
+
assert all(item in result for item in self.medium_data["items"])
|
|
60
|
+
|
|
61
|
+
@pytest.mark.benchmark(group="weighted_shuffle")
|
|
62
|
+
def test_priority_queue_shuffle(self, benchmark):
|
|
63
|
+
"""Benchmark weighted shuffle with realistic BBOT priority weights"""
|
|
64
|
+
|
|
65
|
+
def shuffle_priorities():
|
|
66
|
+
return weighted_shuffle(self.priority_weights["items"], self.priority_weights["weights"])
|
|
67
|
+
|
|
68
|
+
result = benchmark(shuffle_priorities)
|
|
69
|
+
assert len(result) == len(self.priority_weights["items"])
|
|
70
|
+
assert all(item in result for item in self.priority_weights["items"])
|
|
@@ -22,8 +22,8 @@ def test_bbot_multiprocess(bbot_httpserver):
|
|
|
22
22
|
queue = multiprocessing.Queue()
|
|
23
23
|
events_process = multiprocessing.Process(target=run_bbot_multiprocess, args=(queue,))
|
|
24
24
|
events_process.start()
|
|
25
|
-
events_process.join()
|
|
26
|
-
events = queue.get()
|
|
25
|
+
events_process.join(timeout=300)
|
|
26
|
+
events = queue.get(timeout=10)
|
|
27
27
|
assert len(events) >= 3
|
|
28
28
|
scan_events = [e for e in events if e["type"] == "SCAN"]
|
|
29
29
|
assert len(scan_events) == 2
|
|
@@ -209,7 +209,6 @@ async def test_events(events, helpers):
|
|
|
209
209
|
javascript_event = scan.make_event("http://evilcorp.com/asdf/a.js?b=c#d", "URL_UNVERIFIED", parent=scan.root_event)
|
|
210
210
|
assert "extension-js" in javascript_event.tags
|
|
211
211
|
await scan.ingress_module.handle_event(javascript_event)
|
|
212
|
-
assert "httpx-only" in javascript_event.tags
|
|
213
212
|
|
|
214
213
|
# scope distance
|
|
215
214
|
event1 = scan.make_event("1.2.3.4", dummy=True)
|
|
@@ -111,7 +111,6 @@ async def test_task_scan_handle_event_timeout(bbot_scanner):
|
|
|
111
111
|
class LongBatchModule(BaseModule):
|
|
112
112
|
watched_events = ["IP_ADDRESS"]
|
|
113
113
|
handled_event = False
|
|
114
|
-
canceled = False
|
|
115
114
|
_name = "long_batch"
|
|
116
115
|
_batch_size = 2
|
|
117
116
|
|
|
@@ -147,24 +146,18 @@ async def test_task_scan_handle_event_timeout(bbot_scanner):
|
|
|
147
146
|
|
|
148
147
|
@pytest.mark.asyncio
|
|
149
148
|
async def test_url_extension_handling(bbot_scanner):
|
|
150
|
-
scan = bbot_scanner(config={"url_extension_blacklist": ["css"]
|
|
149
|
+
scan = bbot_scanner(config={"url_extension_blacklist": ["css"]})
|
|
151
150
|
await scan._prep()
|
|
152
151
|
assert scan.url_extension_blacklist == {"css"}
|
|
153
|
-
assert scan.url_extension_httpx_only == {"js"}
|
|
154
152
|
good_event = scan.make_event("https://evilcorp.com/a.txt", "URL", tags=["status-200"], parent=scan.root_event)
|
|
155
153
|
bad_event = scan.make_event("https://evilcorp.com/a.css", "URL", tags=["status-200"], parent=scan.root_event)
|
|
156
|
-
httpx_event = scan.make_event("https://evilcorp.com/a.js", "URL", tags=["status-200"], parent=scan.root_event)
|
|
157
154
|
assert "blacklisted" not in bad_event.tags
|
|
158
|
-
assert "httpx-only" not in httpx_event.tags
|
|
159
155
|
result = await scan.ingress_module.handle_event(good_event)
|
|
160
156
|
assert result is None
|
|
161
157
|
result, reason = await scan.ingress_module.handle_event(bad_event)
|
|
162
158
|
assert result is False
|
|
163
159
|
assert reason == "event is blacklisted"
|
|
164
160
|
assert "blacklisted" in bad_event.tags
|
|
165
|
-
result = await scan.ingress_module.handle_event(httpx_event)
|
|
166
|
-
assert result is None
|
|
167
|
-
assert "httpx-only" in httpx_event.tags
|
|
168
161
|
|
|
169
162
|
await scan._cleanup()
|
|
170
163
|
|
|
@@ -61,6 +61,7 @@ class ModuleTestBase:
|
|
|
61
61
|
config=self.config,
|
|
62
62
|
whitelist=module_test_base.whitelist,
|
|
63
63
|
blacklist=module_test_base.blacklist,
|
|
64
|
+
force_start=getattr(module_test_base, "force_start", False),
|
|
64
65
|
)
|
|
65
66
|
self.events = []
|
|
66
67
|
self.log = logging.getLogger(f"bbot.test.{module_test_base.name}")
|
|
@@ -108,10 +109,14 @@ class ModuleTestBase:
|
|
|
108
109
|
self.log.debug("Executing setup_after_prep()")
|
|
109
110
|
await self.setup_after_prep(module_test)
|
|
110
111
|
self.log.debug("Starting scan")
|
|
111
|
-
|
|
112
|
+
await self._execute_scan(module_test)
|
|
112
113
|
self.log.debug(f"Finished {module_test.name} module test")
|
|
113
114
|
yield module_test
|
|
114
115
|
|
|
116
|
+
async def _execute_scan(self, module_test):
|
|
117
|
+
"""Execute the scan and collect events. Can be overridden by benchmark classes."""
|
|
118
|
+
module_test.events = [e async for e in module_test.scan.async_start()]
|
|
119
|
+
|
|
115
120
|
@pytest.mark.asyncio
|
|
116
121
|
async def test_module_run(self, module_test):
|
|
117
122
|
from bbot.core.helpers.misc import execute_sync_or_async
|
|
@@ -6,11 +6,12 @@ raw_bimi_txt_default = (
|
|
|
6
6
|
raw_bimi_txt_nondefault = '"v=BIMI1; l=https://nondefault.thirdparty.tld/brand/logo.svg;a=https://nondefault.thirdparty.tld/brand/certificate.pem;"'
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class
|
|
9
|
+
class TestDnsbimi(ModuleTestBase):
|
|
10
10
|
targets = ["test.localdomain"]
|
|
11
11
|
modules_overrides = ["dnsbimi", "speculate"]
|
|
12
12
|
config_overrides = {
|
|
13
13
|
"modules": {"dnsbimi": {"emit_raw_dns_records": True, "selectors": "default,nondefault"}},
|
|
14
|
+
"omit_event_types": ["HTTP_RESPONSE", "RAW_TEXT", "DNS_NAME_UNRESOLVED", "FILESYSTEM", "WEB_PARAMETER"],
|
|
14
15
|
}
|
|
15
16
|
|
|
16
17
|
async def setup_after_prep(self, module_test):
|