bbot 2.6.0.6879rc0__py3-none-any.whl → 2.7.2.7254rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbot might be problematic. Click here for more details.

Files changed (75) hide show
  1. bbot/__init__.py +1 -1
  2. bbot/core/engine.py +1 -1
  3. bbot/core/flags.py +1 -0
  4. bbot/core/helpers/bloom.py +6 -7
  5. bbot/core/helpers/dns/dns.py +0 -1
  6. bbot/core/helpers/dns/engine.py +0 -2
  7. bbot/core/helpers/files.py +2 -2
  8. bbot/core/helpers/git.py +17 -0
  9. bbot/core/helpers/misc.py +1 -0
  10. bbot/core/helpers/ntlm.py +0 -2
  11. bbot/core/helpers/regex.py +1 -1
  12. bbot/core/modules.py +0 -54
  13. bbot/defaults.yml +4 -2
  14. bbot/modules/apkpure.py +1 -1
  15. bbot/modules/base.py +11 -5
  16. bbot/modules/dnsbimi.py +1 -4
  17. bbot/modules/dnsdumpster.py +35 -52
  18. bbot/modules/dnstlsrpt.py +0 -6
  19. bbot/modules/docker_pull.py +1 -1
  20. bbot/modules/emailformat.py +17 -1
  21. bbot/modules/filedownload.py +1 -1
  22. bbot/modules/git_clone.py +47 -22
  23. bbot/modules/gitdumper.py +4 -14
  24. bbot/modules/github_workflows.py +1 -1
  25. bbot/modules/gitlab_com.py +31 -0
  26. bbot/modules/gitlab_onprem.py +84 -0
  27. bbot/modules/gowitness.py +0 -6
  28. bbot/modules/graphql_introspection.py +5 -2
  29. bbot/modules/httpx.py +2 -0
  30. bbot/modules/iis_shortnames.py +0 -7
  31. bbot/modules/internal/unarchive.py +9 -3
  32. bbot/modules/lightfuzz/lightfuzz.py +5 -1
  33. bbot/modules/nuclei.py +1 -1
  34. bbot/modules/output/base.py +0 -5
  35. bbot/modules/postman_download.py +1 -1
  36. bbot/modules/retirejs.py +232 -0
  37. bbot/modules/securitytxt.py +0 -3
  38. bbot/modules/subdomaincenter.py +1 -16
  39. bbot/modules/telerik.py +6 -1
  40. bbot/modules/templates/gitlab.py +98 -0
  41. bbot/modules/trufflehog.py +1 -1
  42. bbot/scanner/manager.py +7 -4
  43. bbot/scanner/scanner.py +1 -1
  44. bbot/scripts/benchmark_report.py +433 -0
  45. bbot/test/benchmarks/__init__.py +2 -0
  46. bbot/test/benchmarks/test_bloom_filter_benchmarks.py +105 -0
  47. bbot/test/benchmarks/test_closest_match_benchmarks.py +76 -0
  48. bbot/test/benchmarks/test_event_validation_benchmarks.py +438 -0
  49. bbot/test/benchmarks/test_excavate_benchmarks.py +291 -0
  50. bbot/test/benchmarks/test_ipaddress_benchmarks.py +143 -0
  51. bbot/test/benchmarks/test_weighted_shuffle_benchmarks.py +70 -0
  52. bbot/test/test_step_1/test_bbot_fastapi.py +2 -2
  53. bbot/test/test_step_1/test_events.py +0 -1
  54. bbot/test/test_step_1/test_scan.py +1 -8
  55. bbot/test/test_step_2/module_tests/base.py +6 -1
  56. bbot/test/test_step_2/module_tests/test_module_dnsbimi.py +2 -1
  57. bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py +3 -5
  58. bbot/test/test_step_2/module_tests/test_module_emailformat.py +1 -1
  59. bbot/test/test_step_2/module_tests/test_module_emails.py +2 -2
  60. bbot/test/test_step_2/module_tests/test_module_excavate.py +35 -6
  61. bbot/test/test_step_2/module_tests/test_module_gitlab_com.py +66 -0
  62. bbot/test/test_step_2/module_tests/{test_module_gitlab.py → test_module_gitlab_onprem.py} +4 -69
  63. bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +2 -2
  64. bbot/test/test_step_2/module_tests/test_module_retirejs.py +159 -0
  65. bbot/test/test_step_2/module_tests/test_module_telerik.py +1 -1
  66. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/METADATA +7 -4
  67. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/RECORD +70 -60
  68. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/WHEEL +1 -1
  69. bbot/modules/censys.py +0 -98
  70. bbot/modules/gitlab.py +0 -141
  71. bbot/modules/zoomeye.py +0 -77
  72. bbot/test/test_step_2/module_tests/test_module_censys.py +0 -83
  73. bbot/test/test_step_2/module_tests/test_module_zoomeye.py +0 -35
  74. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/entry_points.txt +0 -0
  75. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,291 @@
1
+ import pytest
2
+ import asyncio
3
+ from bbot.scanner import Scanner
4
+
5
+
6
+ class TestExcavateDirectBenchmarks:
7
+ """
8
+ Direct benchmark tests for Excavate module operations.
9
+
10
+ These tests measure the performance of excavate's core YARA processing
11
+ by calling the excavate.search() method directly with specific text sizes
12
+ in both single-threaded and parallel asyncio tasks to test the GIL sidestep feature of YARA.
13
+ """
14
+
15
+ # Number of text segments per test
16
+ TEXT_SEGMENTS_COUNT = 100
17
+
18
+ # Prescribed sizes for deterministic benchmarking (in bytes)
19
+ SMALL_SIZE = 4096 # 4KB
20
+ LARGE_SIZE = 5242880 # 5MB
21
+
22
+ def _generate_text_segments(self, target_size, count):
23
+ """Generate a list of text segments of the specified size"""
24
+ segments = []
25
+
26
+ for i in range(count):
27
+ # Generate realistic content that excavate can work with
28
+ base_content = self._generate_realistic_content(i)
29
+
30
+ # Pad to the exact target size with deterministic content
31
+ remaining_size = target_size - len(base_content)
32
+ if remaining_size > 0:
33
+ # Use deterministic padding pattern
34
+ padding_pattern = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. "
35
+ padding_repeats = (remaining_size // len(padding_pattern)) + 1
36
+ padding = (padding_pattern * padding_repeats)[:remaining_size]
37
+ content = base_content + padding
38
+ else:
39
+ content = base_content[:target_size]
40
+
41
+ segments.append(content)
42
+
43
+ return segments
44
+
45
+ def _generate_realistic_content(self, index):
46
+ """Generate realistic content that excavate can extract from"""
47
+ return f"""
48
+ <html>
49
+ <head>
50
+ <title>Test Content {index}</title>
51
+ <script src="https://api{index}.example.com/js/app.js"></script>
52
+ </head>
53
+ <body>
54
+ <h1>Page {index}</h1>
55
+
56
+ <!-- URLs and subdomains -->
57
+ <a href="https://www{index}.example.com/page{index}">Link {index}</a>
58
+ <a href="https://cdn{index}.example.com/assets/">CDN {index}</a>
59
+ <img src="https://img{index}.example.com/photo{index}.jpg" />
60
+
61
+ <!-- Forms with parameters -->
62
+ <form action="/search{index}" method="GET">
63
+ <input type="text" name="query{index}" value="test{index}">
64
+ <input type="hidden" name="token{index}" value="abc123{index}">
65
+ <button type="submit">Search</button>
66
+ </form>
67
+
68
+ <!-- API endpoints -->
69
+ <script>
70
+ fetch('https://api{index}.example.com/v1/users/{index}')
71
+ .then(response => response.json())
72
+ .then(data => console.log(data));
73
+
74
+ // WebSocket connection
75
+ const ws = new WebSocket('wss://realtime{index}.example.com/socket');
76
+ </script>
77
+
78
+ <!-- Various protocols -->
79
+ <p>FTP: ftp://ftp{index}.example.com:21/files/</p>
80
+ <p>SSH: ssh://server{index}.example.com:22/</p>
81
+ <p>Email: contact{index}@example.com</p>
82
+
83
+ <!-- JSON data -->
84
+ <script type="application/json">
85
+ {{
86
+ "apiEndpoint{index}": "https://api{index}.example.com/data",
87
+ "parameter{index}": "value{index}",
88
+ "secretKey{index}": "sk_test_{index}_abcdef123456"
89
+ }}
90
+ </script>
91
+
92
+ <!-- Comments with URLs -->
93
+ <!-- https://hidden{index}.example.com/admin -->
94
+ <!-- TODO: Check https://internal{index}.example.com/debug -->
95
+ </body>
96
+ </html>
97
+ """
98
+
99
+ async def _run_excavate_single_thread(self, text_segments):
100
+ """Run excavate processing in single thread"""
101
+ # Create scanner and initialize excavate
102
+ scan = Scanner("example.com", modules=["httpx"], config={"excavate": True})
103
+ await scan._prep()
104
+ excavate_module = scan.modules.get("excavate")
105
+
106
+ if not excavate_module:
107
+ raise RuntimeError("Excavate module not found")
108
+
109
+ # Track events emitted by excavate
110
+ emitted_events = []
111
+
112
+ async def track_emit_event(event_data, *args, **kwargs):
113
+ emitted_events.append(event_data)
114
+
115
+ excavate_module.emit_event = track_emit_event
116
+
117
+ # Process all text segments sequentially
118
+ results = []
119
+ for i, text_segment in enumerate(text_segments):
120
+ # Create a mock HTTP_RESPONSE event
121
+ mock_event = scan.make_event(
122
+ {
123
+ "url": f"https://example.com/test/{i}",
124
+ "method": "GET",
125
+ "body": text_segment,
126
+ "header-dict": {"Content-Type": ["text/html"]},
127
+ "raw_header": "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r\n",
128
+ "status_code": 200,
129
+ },
130
+ "HTTP_RESPONSE",
131
+ parent=scan.root_event,
132
+ )
133
+
134
+ # Process with excavate
135
+ await excavate_module.search(text_segment, mock_event, "text/html", f"Single thread benchmark {i}")
136
+ results.append(f"processed_{i}")
137
+
138
+ return results, emitted_events
139
+
140
+ async def _run_excavate_parallel_tasks(self, text_segments):
141
+ """Run excavate processing with parallel asyncio tasks"""
142
+ # Create scanner and initialize excavate
143
+ scan = Scanner("example.com", modules=["httpx"], config={"excavate": True})
144
+ await scan._prep()
145
+ excavate_module = scan.modules.get("excavate")
146
+
147
+ if not excavate_module:
148
+ raise RuntimeError("Excavate module not found")
149
+
150
+ # Define async task to process a single text segment
151
+ async def process_segment(segment_index, text_segment):
152
+ mock_event = scan.make_event(
153
+ {
154
+ "url": f"https://example.com/parallel/{segment_index}",
155
+ "method": "GET",
156
+ "body": text_segment,
157
+ "header-dict": {"Content-Type": ["text/html"]},
158
+ "raw_header": "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r\n",
159
+ "status_code": 200,
160
+ },
161
+ "HTTP_RESPONSE",
162
+ parent=scan.root_event,
163
+ )
164
+
165
+ await excavate_module.search(
166
+ text_segment, mock_event, "text/html", f"Parallel benchmark task {segment_index}"
167
+ )
168
+ return f"processed_{segment_index}"
169
+
170
+ # Create all tasks and run them concurrently
171
+ tasks = [process_segment(i, text_segment) for i, text_segment in enumerate(text_segments)]
172
+
173
+ # Run all tasks in parallel
174
+ results = await asyncio.gather(*tasks)
175
+ return results
176
+
177
+ # Single Thread Tests
178
+ @pytest.mark.benchmark(group="excavate_single_small")
179
+ def test_excavate_single_thread_small(self, benchmark):
180
+ """Benchmark excavate single thread processing with small (4KB) segments"""
181
+ text_segments = self._generate_text_segments(self.SMALL_SIZE, self.TEXT_SEGMENTS_COUNT)
182
+
183
+ def run_test():
184
+ return asyncio.run(self._run_excavate_single_thread(text_segments))
185
+
186
+ result, events = benchmark(run_test)
187
+
188
+ assert len(result) == self.TEXT_SEGMENTS_COUNT
189
+ total_size_mb = (self.SMALL_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
190
+
191
+ # Count events by type
192
+ total_events = len(events)
193
+ url_events = len([e for e in events if e.type == "URL_UNVERIFIED"])
194
+ dns_events = len([e for e in events if e.type == "DNS_NAME"])
195
+ email_events = len([e for e in events if e.type == "EMAIL_ADDRESS"])
196
+ protocol_events = len([e for e in events if e.type == "PROTOCOL"])
197
+ finding_events = len([e for e in events if e.type == "FINDING"])
198
+
199
+ print("\n✅ Single-thread small segments benchmark completed")
200
+ print(f"📊 Processed {len(result):,} segments of {self.SMALL_SIZE / 1024:.0f}KB each")
201
+ print(f"📊 Total size processed: {total_size_mb:.1f} MB")
202
+ print(f"📊 Total events: {total_events}")
203
+ print(f"📊 URL events: {url_events}")
204
+ print(f"📊 DNS events: {dns_events}")
205
+ print(f"📊 Email events: {email_events}")
206
+ print(f"📊 Protocol events: {protocol_events}")
207
+ print(f"📊 Finding events: {finding_events}")
208
+
209
+ # Validate that excavate actually found and processed content
210
+ assert total_events > 0, "Expected to find some events from excavate"
211
+ assert url_events > 0 or dns_events > 0 or protocol_events > 0, (
212
+ "Expected excavate to find URLs, DNS names, or protocols"
213
+ )
214
+
215
+ @pytest.mark.benchmark(group="excavate_single_large")
216
+ def test_excavate_single_thread_large(self, benchmark):
217
+ """Benchmark excavate single thread processing with large (10MB) segments"""
218
+ text_segments = self._generate_text_segments(self.LARGE_SIZE, self.TEXT_SEGMENTS_COUNT)
219
+
220
+ def run_test():
221
+ return asyncio.run(self._run_excavate_single_thread(text_segments))
222
+
223
+ result, events = benchmark(run_test)
224
+
225
+ assert len(result) == self.TEXT_SEGMENTS_COUNT
226
+ total_size_mb = (self.LARGE_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
227
+
228
+ # Count events by type
229
+ total_events = len(events)
230
+ url_events = len([e for e in events if e.type == "URL_UNVERIFIED"])
231
+ dns_events = len([e for e in events if e.type == "DNS_NAME"])
232
+ email_events = len([e for e in events if e.type == "EMAIL_ADDRESS"])
233
+ protocol_events = len([e for e in events if e.type == "PROTOCOL"])
234
+ finding_events = len([e for e in events if e.type == "FINDING"])
235
+
236
+ print("\n✅ Single-thread large segments benchmark completed")
237
+ print(f"📊 Processed {len(result):,} segments of {self.LARGE_SIZE / (1024 * 1024):.0f}MB each")
238
+ print(f"📊 Total size processed: {total_size_mb:.1f} MB")
239
+ print(f"📊 Total events: {total_events}")
240
+ print(f"📊 URL events: {url_events}")
241
+ print(f"📊 DNS events: {dns_events}")
242
+ print(f"📊 Email events: {email_events}")
243
+ print(f"📊 Protocol events: {protocol_events}")
244
+ print(f"📊 Finding events: {finding_events}")
245
+
246
+ # Validate that excavate actually found and processed content
247
+ assert total_events > 0, "Expected to find some events from excavate"
248
+ assert url_events > 0 or dns_events > 0 or protocol_events > 0, (
249
+ "Expected excavate to find URLs, DNS names, or protocols"
250
+ )
251
+
252
+ # Parallel Tests
253
+ @pytest.mark.benchmark(group="excavate_parallel_small")
254
+ def test_excavate_parallel_tasks_small(self, benchmark):
255
+ """Benchmark excavate parallel processing with small (4KB) segments"""
256
+ text_segments = self._generate_text_segments(self.SMALL_SIZE, self.TEXT_SEGMENTS_COUNT)
257
+
258
+ def run_test():
259
+ return asyncio.run(self._run_excavate_parallel_tasks(text_segments))
260
+
261
+ result = benchmark(run_test)
262
+
263
+ assert len(result) == self.TEXT_SEGMENTS_COUNT
264
+ total_size_mb = (self.SMALL_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
265
+ print("\n✅ Parallel small segments benchmark completed")
266
+ print(f"📊 Processed {len(result):,} segments of {self.SMALL_SIZE / 1024:.0f}KB each in parallel")
267
+ print(f"📊 Total size processed: {total_size_mb:.1f} MB")
268
+ print("📊 Tasks executed concurrently to test YARA GIL sidestep")
269
+
270
+ # Basic assertion that excavate is actually working (should find URLs in our test content)
271
+ assert len(result) > 0, "Expected excavate to process all segments"
272
+
273
+ @pytest.mark.benchmark(group="excavate_parallel_large")
274
+ def test_excavate_parallel_tasks_large(self, benchmark):
275
+ """Benchmark excavate parallel processing with large (10MB) segments to test YARA GIL sidestep"""
276
+ text_segments = self._generate_text_segments(self.LARGE_SIZE, self.TEXT_SEGMENTS_COUNT)
277
+
278
+ def run_test():
279
+ return asyncio.run(self._run_excavate_parallel_tasks(text_segments))
280
+
281
+ result = benchmark(run_test)
282
+
283
+ assert len(result) == self.TEXT_SEGMENTS_COUNT
284
+ total_size_mb = (self.LARGE_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
285
+ print("\n✅ Parallel large segments benchmark completed")
286
+ print(f"📊 Processed {len(result):,} segments of {self.LARGE_SIZE / (1024 * 1024):.0f}MB each in parallel")
287
+ print(f"📊 Total size processed: {total_size_mb:.1f} MB")
288
+ print("📊 Tasks executed concurrently to test YARA GIL sidestep")
289
+
290
+ # Basic assertion that excavate is actually working (should find URLs in our test content)
291
+ assert len(result) > 0, "Expected excavate to process all segments"
@@ -0,0 +1,143 @@
1
+ import pytest
2
+ import random
3
+ import string
4
+ from bbot.core.helpers.misc import make_ip_type, is_ip
5
+
6
+
7
+ class TestIPAddressBenchmarks:
8
+ """
9
+ Benchmark tests for IP address processing operations.
10
+
11
+ These tests measure the performance of BBOT-level IP functions which are
12
+ critical for network scanning efficiency and could benefit from different
13
+ underlying implementations.
14
+ """
15
+
16
+ def setup_method(self):
17
+ """Setup common test data"""
18
+ # Set deterministic seed for consistent benchmark results
19
+ random.seed(42) # Fixed seed for reproducible results
20
+
21
+ # Generate test data of different types and sizes
22
+ self.valid_ips = self._generate_valid_ips()
23
+ self.invalid_ips = self._generate_invalid_ips()
24
+ self.mixed_data = self._generate_mixed_data()
25
+
26
+ def _generate_valid_ips(self):
27
+ """Generate valid IP addresses for testing"""
28
+ valid_ips = []
29
+
30
+ # IPv4 addresses
31
+ for i in range(1000):
32
+ valid_ips.append(
33
+ f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
34
+ )
35
+
36
+ # IPv6 addresses
37
+ for i in range(500):
38
+ ipv6_parts = []
39
+ for j in range(8):
40
+ ipv6_parts.append(f"{random.randint(0, 65535):x}")
41
+ valid_ips.append(":".join(ipv6_parts))
42
+
43
+ # Network addresses
44
+ for i in range(500):
45
+ base_ip = f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.0"
46
+ valid_ips.append(f"{base_ip}/{random.randint(8, 30)}")
47
+
48
+ # IP ranges
49
+ for i in range(200):
50
+ start_ip = (
51
+ f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 200)}"
52
+ )
53
+ end_ip = f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(201, 254)}"
54
+ valid_ips.append(f"{start_ip}-{end_ip}")
55
+
56
+ return valid_ips
57
+
58
+ def _generate_invalid_ips(self):
59
+ """Generate invalid IP addresses for testing"""
60
+ invalid_ips = []
61
+
62
+ # Malformed IPv4
63
+ for i in range(500):
64
+ invalid_ips.append(
65
+ f"{random.randint(256, 999)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
66
+ )
67
+ invalid_ips.append(f"{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}")
68
+ invalid_ips.append(
69
+ f"{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
70
+ )
71
+
72
+ # Malformed IPv6
73
+ for i in range(300):
74
+ ipv6_parts = []
75
+ for j in range(random.randint(5, 10)): # Wrong number of parts
76
+ ipv6_parts.append(f"{random.randint(0, 65535):x}")
77
+ invalid_ips.append(":".join(ipv6_parts))
78
+
79
+ # Random strings
80
+ for i in range(200):
81
+ length = random.randint(5, 20)
82
+ invalid_ips.append("".join(random.choices(string.ascii_letters + string.digits, k=length)))
83
+
84
+ return invalid_ips
85
+
86
+ def _generate_mixed_data(self):
87
+ """Generate mixed valid/invalid data for realistic testing"""
88
+ mixed = []
89
+ mixed.extend(self.valid_ips[:500]) # First 500 valid
90
+ mixed.extend(self.invalid_ips[:500]) # First 500 invalid
91
+ # Use deterministic shuffle with fixed seed for consistent results
92
+ random.seed(42) # Reset seed before shuffle
93
+ random.shuffle(mixed) # Shuffle for realistic distribution
94
+ return mixed
95
+
96
+ @pytest.mark.benchmark(group="ip_validation")
97
+ def test_is_ip_performance(self, benchmark):
98
+ """Benchmark IP validation performance with mixed data"""
99
+
100
+ def validate_ips():
101
+ valid_count = 0
102
+ for ip in self.mixed_data:
103
+ if is_ip(ip):
104
+ valid_count += 1
105
+ return valid_count
106
+
107
+ result = benchmark(validate_ips)
108
+ assert result > 0
109
+
110
+ @pytest.mark.benchmark(group="ip_type_detection")
111
+ def test_make_ip_type_performance(self, benchmark):
112
+ """Benchmark IP type detection performance"""
113
+
114
+ def detect_ip_types():
115
+ type_count = 0
116
+ for ip in self.valid_ips:
117
+ try:
118
+ make_ip_type(ip)
119
+ type_count += 1
120
+ except Exception:
121
+ pass
122
+ return type_count
123
+
124
+ result = benchmark(detect_ip_types)
125
+ assert result > 0
126
+
127
+ @pytest.mark.benchmark(group="ip_processing")
128
+ def test_mixed_ip_operations(self, benchmark):
129
+ """Benchmark combined IP validation + type detection"""
130
+
131
+ def process_ips():
132
+ processed = 0
133
+ for ip in self.mixed_data:
134
+ if is_ip(ip):
135
+ try:
136
+ make_ip_type(ip)
137
+ processed += 1
138
+ except Exception:
139
+ pass
140
+ return processed
141
+
142
+ result = benchmark(process_ips)
143
+ assert result > 0
@@ -0,0 +1,70 @@
1
+ import pytest
2
+ import random
3
+ from bbot.core.helpers.misc import weighted_shuffle
4
+
5
+
6
+ class TestWeightedShuffleBenchmarks:
7
+ """
8
+ Benchmark tests for weighted_shuffle operations.
9
+
10
+ This function is critical for BBOT's queue management, where it shuffles
11
+ incoming queues based on module priority weights. Performance here directly
12
+ impacts scan throughput and responsiveness.
13
+ """
14
+
15
+ def setup_method(self):
16
+ """Setup common test data"""
17
+ # Set deterministic seed for consistent benchmark results
18
+ random.seed(42) # Fixed seed for reproducible results
19
+
20
+ # Generate test data of different sizes and complexity
21
+ self.small_data = self._generate_small_dataset()
22
+ self.medium_data = self._generate_medium_dataset()
23
+ self.large_data = self._generate_large_dataset()
24
+ self.priority_weights = self._generate_priority_weights()
25
+
26
+ def _generate_small_dataset(self):
27
+ """Generate small dataset (like few modules)"""
28
+ return {"items": ["module_a", "module_b", "module_c"], "weights": [0.6, 0.3, 0.1]}
29
+
30
+ def _generate_medium_dataset(self):
31
+ """Generate medium dataset (like typical scan)"""
32
+ items = [f"module_{i}" for i in range(20)]
33
+ weights = [random.uniform(0.1, 1.0) for _ in range(20)]
34
+ return {"items": items, "weights": weights}
35
+
36
+ def _generate_large_dataset(self):
37
+ """Generate large dataset (like complex scan with many modules)"""
38
+ items = [f"module_{i}" for i in range(100)]
39
+ weights = [random.uniform(0.1, 1.0) for _ in range(100)]
40
+ return {"items": items, "weights": weights}
41
+
42
+ def _generate_priority_weights(self):
43
+ """Generate realistic priority weights (like BBOT module priorities)"""
44
+ # BBOT uses priorities 1-5, where lower priority = higher weight
45
+ # Weights are calculated as [5] + [6 - m.priority for m in modules]
46
+ priorities = [5] + [6 - p for p in [1, 2, 3, 4, 5]] * 20 # 5 + 5*20 = 105 items
47
+ items = [f"queue_{i}" for i in range(len(priorities))]
48
+ return {"items": items, "weights": priorities}
49
+
50
+ @pytest.mark.benchmark(group="weighted_shuffle")
51
+ def test_typical_queue_shuffle(self, benchmark):
52
+ """Benchmark weighted shuffle with typical BBOT scan workload"""
53
+
54
+ def shuffle_typical():
55
+ return weighted_shuffle(self.medium_data["items"], self.medium_data["weights"])
56
+
57
+ result = benchmark(shuffle_typical)
58
+ assert len(result) == 20
59
+ assert all(item in result for item in self.medium_data["items"])
60
+
61
+ @pytest.mark.benchmark(group="weighted_shuffle")
62
+ def test_priority_queue_shuffle(self, benchmark):
63
+ """Benchmark weighted shuffle with realistic BBOT priority weights"""
64
+
65
+ def shuffle_priorities():
66
+ return weighted_shuffle(self.priority_weights["items"], self.priority_weights["weights"])
67
+
68
+ result = benchmark(shuffle_priorities)
69
+ assert len(result) == len(self.priority_weights["items"])
70
+ assert all(item in result for item in self.priority_weights["items"])
@@ -22,8 +22,8 @@ def test_bbot_multiprocess(bbot_httpserver):
22
22
  queue = multiprocessing.Queue()
23
23
  events_process = multiprocessing.Process(target=run_bbot_multiprocess, args=(queue,))
24
24
  events_process.start()
25
- events_process.join()
26
- events = queue.get()
25
+ events_process.join(timeout=300)
26
+ events = queue.get(timeout=10)
27
27
  assert len(events) >= 3
28
28
  scan_events = [e for e in events if e["type"] == "SCAN"]
29
29
  assert len(scan_events) == 2
@@ -209,7 +209,6 @@ async def test_events(events, helpers):
209
209
  javascript_event = scan.make_event("http://evilcorp.com/asdf/a.js?b=c#d", "URL_UNVERIFIED", parent=scan.root_event)
210
210
  assert "extension-js" in javascript_event.tags
211
211
  await scan.ingress_module.handle_event(javascript_event)
212
- assert "httpx-only" in javascript_event.tags
213
212
 
214
213
  # scope distance
215
214
  event1 = scan.make_event("1.2.3.4", dummy=True)
@@ -111,7 +111,6 @@ async def test_task_scan_handle_event_timeout(bbot_scanner):
111
111
  class LongBatchModule(BaseModule):
112
112
  watched_events = ["IP_ADDRESS"]
113
113
  handled_event = False
114
- canceled = False
115
114
  _name = "long_batch"
116
115
  _batch_size = 2
117
116
 
@@ -147,24 +146,18 @@ async def test_task_scan_handle_event_timeout(bbot_scanner):
147
146
 
148
147
  @pytest.mark.asyncio
149
148
  async def test_url_extension_handling(bbot_scanner):
150
- scan = bbot_scanner(config={"url_extension_blacklist": ["css"], "url_extension_httpx_only": ["js"]})
149
+ scan = bbot_scanner(config={"url_extension_blacklist": ["css"]})
151
150
  await scan._prep()
152
151
  assert scan.url_extension_blacklist == {"css"}
153
- assert scan.url_extension_httpx_only == {"js"}
154
152
  good_event = scan.make_event("https://evilcorp.com/a.txt", "URL", tags=["status-200"], parent=scan.root_event)
155
153
  bad_event = scan.make_event("https://evilcorp.com/a.css", "URL", tags=["status-200"], parent=scan.root_event)
156
- httpx_event = scan.make_event("https://evilcorp.com/a.js", "URL", tags=["status-200"], parent=scan.root_event)
157
154
  assert "blacklisted" not in bad_event.tags
158
- assert "httpx-only" not in httpx_event.tags
159
155
  result = await scan.ingress_module.handle_event(good_event)
160
156
  assert result is None
161
157
  result, reason = await scan.ingress_module.handle_event(bad_event)
162
158
  assert result is False
163
159
  assert reason == "event is blacklisted"
164
160
  assert "blacklisted" in bad_event.tags
165
- result = await scan.ingress_module.handle_event(httpx_event)
166
- assert result is None
167
- assert "httpx-only" in httpx_event.tags
168
161
 
169
162
  await scan._cleanup()
170
163
 
@@ -61,6 +61,7 @@ class ModuleTestBase:
61
61
  config=self.config,
62
62
  whitelist=module_test_base.whitelist,
63
63
  blacklist=module_test_base.blacklist,
64
+ force_start=getattr(module_test_base, "force_start", False),
64
65
  )
65
66
  self.events = []
66
67
  self.log = logging.getLogger(f"bbot.test.{module_test_base.name}")
@@ -108,10 +109,14 @@ class ModuleTestBase:
108
109
  self.log.debug("Executing setup_after_prep()")
109
110
  await self.setup_after_prep(module_test)
110
111
  self.log.debug("Starting scan")
111
- module_test.events = [e async for e in module_test.scan.async_start()]
112
+ await self._execute_scan(module_test)
112
113
  self.log.debug(f"Finished {module_test.name} module test")
113
114
  yield module_test
114
115
 
116
+ async def _execute_scan(self, module_test):
117
+ """Execute the scan and collect events. Can be overridden by benchmark classes."""
118
+ module_test.events = [e async for e in module_test.scan.async_start()]
119
+
115
120
  @pytest.mark.asyncio
116
121
  async def test_module_run(self, module_test):
117
122
  from bbot.core.helpers.misc import execute_sync_or_async
@@ -6,11 +6,12 @@ raw_bimi_txt_default = (
6
6
  raw_bimi_txt_nondefault = '"v=BIMI1; l=https://nondefault.thirdparty.tld/brand/logo.svg;a=https://nondefault.thirdparty.tld/brand/certificate.pem;"'
7
7
 
8
8
 
9
- class TestBIMI(ModuleTestBase):
9
+ class TestDnsbimi(ModuleTestBase):
10
10
  targets = ["test.localdomain"]
11
11
  modules_overrides = ["dnsbimi", "speculate"]
12
12
  config_overrides = {
13
13
  "modules": {"dnsbimi": {"emit_raw_dns_records": True, "selectors": "default,nondefault"}},
14
+ "omit_event_types": ["HTTP_RESPONSE", "RAW_TEXT", "DNS_NAME_UNRESOLVED", "FILESYSTEM", "WEB_PARAMETER"],
14
15
  }
15
16
 
16
17
  async def setup_after_prep(self, module_test):