bbot 2.5.0__py3-none-any.whl → 2.7.2.7424rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. bbot/__init__.py +1 -1
  2. bbot/cli.py +22 -8
  3. bbot/core/engine.py +1 -1
  4. bbot/core/event/__init__.py +2 -2
  5. bbot/core/event/base.py +138 -110
  6. bbot/core/flags.py +1 -0
  7. bbot/core/helpers/bloom.py +6 -7
  8. bbot/core/helpers/command.py +5 -2
  9. bbot/core/helpers/depsinstaller/installer.py +78 -7
  10. bbot/core/helpers/dns/dns.py +0 -1
  11. bbot/core/helpers/dns/engine.py +0 -2
  12. bbot/core/helpers/files.py +2 -2
  13. bbot/core/helpers/git.py +17 -0
  14. bbot/core/helpers/helper.py +6 -5
  15. bbot/core/helpers/misc.py +15 -28
  16. bbot/core/helpers/names_generator.py +5 -0
  17. bbot/core/helpers/ntlm.py +0 -2
  18. bbot/core/helpers/regex.py +1 -1
  19. bbot/core/helpers/regexes.py +25 -8
  20. bbot/core/helpers/web/engine.py +1 -1
  21. bbot/core/helpers/web/web.py +2 -1
  22. bbot/core/modules.py +22 -60
  23. bbot/core/shared_deps.py +38 -0
  24. bbot/defaults.yml +4 -2
  25. bbot/modules/apkpure.py +2 -2
  26. bbot/modules/aspnet_bin_exposure.py +80 -0
  27. bbot/modules/baddns.py +1 -1
  28. bbot/modules/baddns_direct.py +1 -1
  29. bbot/modules/baddns_zone.py +1 -1
  30. bbot/modules/badsecrets.py +1 -1
  31. bbot/modules/base.py +129 -40
  32. bbot/modules/bucket_amazon.py +1 -1
  33. bbot/modules/bucket_digitalocean.py +1 -1
  34. bbot/modules/bucket_firebase.py +1 -1
  35. bbot/modules/bucket_google.py +1 -1
  36. bbot/modules/{bucket_azure.py → bucket_microsoft.py} +2 -2
  37. bbot/modules/builtwith.py +4 -2
  38. bbot/modules/c99.py +1 -1
  39. bbot/modules/dnsbimi.py +1 -4
  40. bbot/modules/dnsbrute.py +6 -1
  41. bbot/modules/dnscommonsrv.py +1 -0
  42. bbot/modules/dnsdumpster.py +35 -52
  43. bbot/modules/dnstlsrpt.py +0 -6
  44. bbot/modules/docker_pull.py +2 -2
  45. bbot/modules/emailformat.py +17 -1
  46. bbot/modules/ffuf.py +4 -1
  47. bbot/modules/ffuf_shortnames.py +6 -3
  48. bbot/modules/filedownload.py +8 -5
  49. bbot/modules/fullhunt.py +1 -1
  50. bbot/modules/git_clone.py +47 -22
  51. bbot/modules/gitdumper.py +5 -15
  52. bbot/modules/github_workflows.py +6 -5
  53. bbot/modules/gitlab_com.py +31 -0
  54. bbot/modules/gitlab_onprem.py +84 -0
  55. bbot/modules/gowitness.py +60 -30
  56. bbot/modules/graphql_introspection.py +145 -0
  57. bbot/modules/httpx.py +2 -0
  58. bbot/modules/hunt.py +10 -3
  59. bbot/modules/iis_shortnames.py +16 -7
  60. bbot/modules/internal/cloudcheck.py +65 -72
  61. bbot/modules/internal/unarchive.py +9 -3
  62. bbot/modules/lightfuzz/lightfuzz.py +6 -2
  63. bbot/modules/lightfuzz/submodules/esi.py +42 -0
  64. bbot/modules/{deadly/medusa.py → medusa.py} +4 -7
  65. bbot/modules/nuclei.py +2 -2
  66. bbot/modules/otx.py +9 -2
  67. bbot/modules/output/base.py +3 -11
  68. bbot/modules/paramminer_headers.py +10 -7
  69. bbot/modules/passivetotal.py +1 -1
  70. bbot/modules/portfilter.py +2 -0
  71. bbot/modules/portscan.py +1 -1
  72. bbot/modules/postman_download.py +2 -2
  73. bbot/modules/retirejs.py +232 -0
  74. bbot/modules/securitytxt.py +0 -3
  75. bbot/modules/sslcert.py +2 -2
  76. bbot/modules/subdomaincenter.py +1 -16
  77. bbot/modules/telerik.py +7 -2
  78. bbot/modules/templates/bucket.py +24 -4
  79. bbot/modules/templates/gitlab.py +98 -0
  80. bbot/modules/trufflehog.py +7 -4
  81. bbot/modules/wafw00f.py +2 -2
  82. bbot/presets/web/dotnet-audit.yml +1 -0
  83. bbot/presets/web/lightfuzz-heavy.yml +1 -1
  84. bbot/presets/web/lightfuzz-medium.yml +1 -1
  85. bbot/presets/web/lightfuzz-superheavy.yml +1 -1
  86. bbot/scanner/manager.py +44 -37
  87. bbot/scanner/scanner.py +17 -4
  88. bbot/scripts/benchmark_report.py +433 -0
  89. bbot/test/benchmarks/__init__.py +2 -0
  90. bbot/test/benchmarks/test_bloom_filter_benchmarks.py +105 -0
  91. bbot/test/benchmarks/test_closest_match_benchmarks.py +76 -0
  92. bbot/test/benchmarks/test_event_validation_benchmarks.py +438 -0
  93. bbot/test/benchmarks/test_excavate_benchmarks.py +291 -0
  94. bbot/test/benchmarks/test_ipaddress_benchmarks.py +143 -0
  95. bbot/test/benchmarks/test_weighted_shuffle_benchmarks.py +70 -0
  96. bbot/test/conftest.py +1 -1
  97. bbot/test/test_step_1/test_bbot_fastapi.py +2 -2
  98. bbot/test/test_step_1/test_events.py +22 -21
  99. bbot/test/test_step_1/test_helpers.py +20 -0
  100. bbot/test/test_step_1/test_manager_scope_accuracy.py +45 -0
  101. bbot/test/test_step_1/test_modules_basic.py +40 -15
  102. bbot/test/test_step_1/test_python_api.py +2 -2
  103. bbot/test/test_step_1/test_regexes.py +21 -4
  104. bbot/test/test_step_1/test_scan.py +7 -8
  105. bbot/test/test_step_1/test_web.py +46 -0
  106. bbot/test/test_step_2/module_tests/base.py +6 -1
  107. bbot/test/test_step_2/module_tests/test_module_aspnet_bin_exposure.py +73 -0
  108. bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py +52 -18
  109. bbot/test/test_step_2/module_tests/test_module_bucket_google.py +1 -1
  110. bbot/test/test_step_2/module_tests/{test_module_bucket_azure.py → test_module_bucket_microsoft.py} +7 -5
  111. bbot/test/test_step_2/module_tests/test_module_cloudcheck.py +19 -31
  112. bbot/test/test_step_2/module_tests/test_module_dnsbimi.py +2 -1
  113. bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py +3 -5
  114. bbot/test/test_step_2/module_tests/test_module_emailformat.py +1 -1
  115. bbot/test/test_step_2/module_tests/test_module_emails.py +2 -2
  116. bbot/test/test_step_2/module_tests/test_module_excavate.py +64 -5
  117. bbot/test/test_step_2/module_tests/test_module_extractous.py +13 -1
  118. bbot/test/test_step_2/module_tests/test_module_github_workflows.py +10 -1
  119. bbot/test/test_step_2/module_tests/test_module_gitlab_com.py +66 -0
  120. bbot/test/test_step_2/module_tests/{test_module_gitlab.py → test_module_gitlab_onprem.py} +4 -69
  121. bbot/test/test_step_2/module_tests/test_module_gowitness.py +5 -5
  122. bbot/test/test_step_2/module_tests/test_module_graphql_introspection.py +34 -0
  123. bbot/test/test_step_2/module_tests/test_module_iis_shortnames.py +46 -1
  124. bbot/test/test_step_2/module_tests/test_module_jadx.py +9 -0
  125. bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +71 -3
  126. bbot/test/test_step_2/module_tests/test_module_nuclei.py +8 -6
  127. bbot/test/test_step_2/module_tests/test_module_otx.py +3 -0
  128. bbot/test/test_step_2/module_tests/test_module_portfilter.py +2 -0
  129. bbot/test/test_step_2/module_tests/test_module_retirejs.py +161 -0
  130. bbot/test/test_step_2/module_tests/test_module_telerik.py +1 -1
  131. bbot/test/test_step_2/module_tests/test_module_trufflehog.py +10 -1
  132. bbot/test/test_step_2/module_tests/test_module_unarchive.py +9 -0
  133. {bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info}/METADATA +12 -9
  134. {bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info}/RECORD +137 -124
  135. {bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info}/WHEEL +1 -1
  136. {bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info/licenses}/LICENSE +98 -58
  137. bbot/modules/binaryedge.py +0 -42
  138. bbot/modules/censys.py +0 -98
  139. bbot/modules/gitlab.py +0 -141
  140. bbot/modules/zoomeye.py +0 -77
  141. bbot/test/test_step_2/module_tests/test_module_binaryedge.py +0 -33
  142. bbot/test/test_step_2/module_tests/test_module_censys.py +0 -83
  143. bbot/test/test_step_2/module_tests/test_module_zoomeye.py +0 -35
  144. {bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,291 @@
1
+ import pytest
2
+ import asyncio
3
+ from bbot.scanner import Scanner
4
+
5
+
6
+ class TestExcavateDirectBenchmarks:
7
+ """
8
+ Direct benchmark tests for Excavate module operations.
9
+
10
+ These tests measure the performance of excavate's core YARA processing
11
+ by calling the excavate.search() method directly with specific text sizes
12
+ in both single-threaded and parallel asyncio tasks to test the GIL sidestep feature of YARA.
13
+ """
14
+
15
+ # Number of text segments per test
16
+ TEXT_SEGMENTS_COUNT = 100
17
+
18
+ # Prescribed sizes for deterministic benchmarking (in bytes)
19
+ SMALL_SIZE = 4096 # 4KB
20
+ LARGE_SIZE = 5242880 # 5MB
21
+
22
+ def _generate_text_segments(self, target_size, count):
23
+ """Generate a list of text segments of the specified size"""
24
+ segments = []
25
+
26
+ for i in range(count):
27
+ # Generate realistic content that excavate can work with
28
+ base_content = self._generate_realistic_content(i)
29
+
30
+ # Pad to the exact target size with deterministic content
31
+ remaining_size = target_size - len(base_content)
32
+ if remaining_size > 0:
33
+ # Use deterministic padding pattern
34
+ padding_pattern = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. "
35
+ padding_repeats = (remaining_size // len(padding_pattern)) + 1
36
+ padding = (padding_pattern * padding_repeats)[:remaining_size]
37
+ content = base_content + padding
38
+ else:
39
+ content = base_content[:target_size]
40
+
41
+ segments.append(content)
42
+
43
+ return segments
44
+
45
+ def _generate_realistic_content(self, index):
46
+ """Generate realistic content that excavate can extract from"""
47
+ return f"""
48
+ <html>
49
+ <head>
50
+ <title>Test Content {index}</title>
51
+ <script src="https://api{index}.example.com/js/app.js"></script>
52
+ </head>
53
+ <body>
54
+ <h1>Page {index}</h1>
55
+
56
+ <!-- URLs and subdomains -->
57
+ <a href="https://www{index}.example.com/page{index}">Link {index}</a>
58
+ <a href="https://cdn{index}.example.com/assets/">CDN {index}</a>
59
+ <img src="https://img{index}.example.com/photo{index}.jpg" />
60
+
61
+ <!-- Forms with parameters -->
62
+ <form action="/search{index}" method="GET">
63
+ <input type="text" name="query{index}" value="test{index}">
64
+ <input type="hidden" name="token{index}" value="abc123{index}">
65
+ <button type="submit">Search</button>
66
+ </form>
67
+
68
+ <!-- API endpoints -->
69
+ <script>
70
+ fetch('https://api{index}.example.com/v1/users/{index}')
71
+ .then(response => response.json())
72
+ .then(data => console.log(data));
73
+
74
+ // WebSocket connection
75
+ const ws = new WebSocket('wss://realtime{index}.example.com/socket');
76
+ </script>
77
+
78
+ <!-- Various protocols -->
79
+ <p>FTP: ftp://ftp{index}.example.com:21/files/</p>
80
+ <p>SSH: ssh://server{index}.example.com:22/</p>
81
+ <p>Email: contact{index}@example.com</p>
82
+
83
+ <!-- JSON data -->
84
+ <script type="application/json">
85
+ {{
86
+ "apiEndpoint{index}": "https://api{index}.example.com/data",
87
+ "parameter{index}": "value{index}",
88
+ "secretKey{index}": "sk_test_{index}_abcdef123456"
89
+ }}
90
+ </script>
91
+
92
+ <!-- Comments with URLs -->
93
+ <!-- https://hidden{index}.example.com/admin -->
94
+ <!-- TODO: Check https://internal{index}.example.com/debug -->
95
+ </body>
96
+ </html>
97
+ """
98
+
99
+ async def _run_excavate_single_thread(self, text_segments):
100
+ """Run excavate processing in single thread"""
101
+ # Create scanner and initialize excavate
102
+ scan = Scanner("example.com", modules=["httpx"], config={"excavate": True})
103
+ await scan._prep()
104
+ excavate_module = scan.modules.get("excavate")
105
+
106
+ if not excavate_module:
107
+ raise RuntimeError("Excavate module not found")
108
+
109
+ # Track events emitted by excavate
110
+ emitted_events = []
111
+
112
+ async def track_emit_event(event_data, *args, **kwargs):
113
+ emitted_events.append(event_data)
114
+
115
+ excavate_module.emit_event = track_emit_event
116
+
117
+ # Process all text segments sequentially
118
+ results = []
119
+ for i, text_segment in enumerate(text_segments):
120
+ # Create a mock HTTP_RESPONSE event
121
+ mock_event = scan.make_event(
122
+ {
123
+ "url": f"https://example.com/test/{i}",
124
+ "method": "GET",
125
+ "body": text_segment,
126
+ "header-dict": {"Content-Type": ["text/html"]},
127
+ "raw_header": "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r\n",
128
+ "status_code": 200,
129
+ },
130
+ "HTTP_RESPONSE",
131
+ parent=scan.root_event,
132
+ )
133
+
134
+ # Process with excavate
135
+ await excavate_module.search(text_segment, mock_event, "text/html", f"Single thread benchmark {i}")
136
+ results.append(f"processed_{i}")
137
+
138
+ return results, emitted_events
139
+
140
+ async def _run_excavate_parallel_tasks(self, text_segments):
141
+ """Run excavate processing with parallel asyncio tasks"""
142
+ # Create scanner and initialize excavate
143
+ scan = Scanner("example.com", modules=["httpx"], config={"excavate": True})
144
+ await scan._prep()
145
+ excavate_module = scan.modules.get("excavate")
146
+
147
+ if not excavate_module:
148
+ raise RuntimeError("Excavate module not found")
149
+
150
+ # Define async task to process a single text segment
151
+ async def process_segment(segment_index, text_segment):
152
+ mock_event = scan.make_event(
153
+ {
154
+ "url": f"https://example.com/parallel/{segment_index}",
155
+ "method": "GET",
156
+ "body": text_segment,
157
+ "header-dict": {"Content-Type": ["text/html"]},
158
+ "raw_header": "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r\n",
159
+ "status_code": 200,
160
+ },
161
+ "HTTP_RESPONSE",
162
+ parent=scan.root_event,
163
+ )
164
+
165
+ await excavate_module.search(
166
+ text_segment, mock_event, "text/html", f"Parallel benchmark task {segment_index}"
167
+ )
168
+ return f"processed_{segment_index}"
169
+
170
+ # Create all tasks and run them concurrently
171
+ tasks = [process_segment(i, text_segment) for i, text_segment in enumerate(text_segments)]
172
+
173
+ # Run all tasks in parallel
174
+ results = await asyncio.gather(*tasks)
175
+ return results
176
+
177
+ # Single Thread Tests
178
+ @pytest.mark.benchmark(group="excavate_single_small")
179
+ def test_excavate_single_thread_small(self, benchmark):
180
+ """Benchmark excavate single thread processing with small (4KB) segments"""
181
+ text_segments = self._generate_text_segments(self.SMALL_SIZE, self.TEXT_SEGMENTS_COUNT)
182
+
183
+ def run_test():
184
+ return asyncio.run(self._run_excavate_single_thread(text_segments))
185
+
186
+ result, events = benchmark(run_test)
187
+
188
+ assert len(result) == self.TEXT_SEGMENTS_COUNT
189
+ total_size_mb = (self.SMALL_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
190
+
191
+ # Count events by type
192
+ total_events = len(events)
193
+ url_events = len([e for e in events if e.type == "URL_UNVERIFIED"])
194
+ dns_events = len([e for e in events if e.type == "DNS_NAME"])
195
+ email_events = len([e for e in events if e.type == "EMAIL_ADDRESS"])
196
+ protocol_events = len([e for e in events if e.type == "PROTOCOL"])
197
+ finding_events = len([e for e in events if e.type == "FINDING"])
198
+
199
+ print("\n✅ Single-thread small segments benchmark completed")
200
+ print(f"📊 Processed {len(result):,} segments of {self.SMALL_SIZE / 1024:.0f}KB each")
201
+ print(f"📊 Total size processed: {total_size_mb:.1f} MB")
202
+ print(f"📊 Total events: {total_events}")
203
+ print(f"📊 URL events: {url_events}")
204
+ print(f"📊 DNS events: {dns_events}")
205
+ print(f"📊 Email events: {email_events}")
206
+ print(f"📊 Protocol events: {protocol_events}")
207
+ print(f"📊 Finding events: {finding_events}")
208
+
209
+ # Validate that excavate actually found and processed content
210
+ assert total_events > 0, "Expected to find some events from excavate"
211
+ assert url_events > 0 or dns_events > 0 or protocol_events > 0, (
212
+ "Expected excavate to find URLs, DNS names, or protocols"
213
+ )
214
+
215
+ @pytest.mark.benchmark(group="excavate_single_large")
216
+ def test_excavate_single_thread_large(self, benchmark):
217
+ """Benchmark excavate single thread processing with large (10MB) segments"""
218
+ text_segments = self._generate_text_segments(self.LARGE_SIZE, self.TEXT_SEGMENTS_COUNT)
219
+
220
+ def run_test():
221
+ return asyncio.run(self._run_excavate_single_thread(text_segments))
222
+
223
+ result, events = benchmark(run_test)
224
+
225
+ assert len(result) == self.TEXT_SEGMENTS_COUNT
226
+ total_size_mb = (self.LARGE_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
227
+
228
+ # Count events by type
229
+ total_events = len(events)
230
+ url_events = len([e for e in events if e.type == "URL_UNVERIFIED"])
231
+ dns_events = len([e for e in events if e.type == "DNS_NAME"])
232
+ email_events = len([e for e in events if e.type == "EMAIL_ADDRESS"])
233
+ protocol_events = len([e for e in events if e.type == "PROTOCOL"])
234
+ finding_events = len([e for e in events if e.type == "FINDING"])
235
+
236
+ print("\n✅ Single-thread large segments benchmark completed")
237
+ print(f"📊 Processed {len(result):,} segments of {self.LARGE_SIZE / (1024 * 1024):.0f}MB each")
238
+ print(f"📊 Total size processed: {total_size_mb:.1f} MB")
239
+ print(f"📊 Total events: {total_events}")
240
+ print(f"📊 URL events: {url_events}")
241
+ print(f"📊 DNS events: {dns_events}")
242
+ print(f"📊 Email events: {email_events}")
243
+ print(f"📊 Protocol events: {protocol_events}")
244
+ print(f"📊 Finding events: {finding_events}")
245
+
246
+ # Validate that excavate actually found and processed content
247
+ assert total_events > 0, "Expected to find some events from excavate"
248
+ assert url_events > 0 or dns_events > 0 or protocol_events > 0, (
249
+ "Expected excavate to find URLs, DNS names, or protocols"
250
+ )
251
+
252
+ # Parallel Tests
253
+ @pytest.mark.benchmark(group="excavate_parallel_small")
254
+ def test_excavate_parallel_tasks_small(self, benchmark):
255
+ """Benchmark excavate parallel processing with small (4KB) segments"""
256
+ text_segments = self._generate_text_segments(self.SMALL_SIZE, self.TEXT_SEGMENTS_COUNT)
257
+
258
+ def run_test():
259
+ return asyncio.run(self._run_excavate_parallel_tasks(text_segments))
260
+
261
+ result = benchmark(run_test)
262
+
263
+ assert len(result) == self.TEXT_SEGMENTS_COUNT
264
+ total_size_mb = (self.SMALL_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
265
+ print("\n✅ Parallel small segments benchmark completed")
266
+ print(f"📊 Processed {len(result):,} segments of {self.SMALL_SIZE / 1024:.0f}KB each in parallel")
267
+ print(f"📊 Total size processed: {total_size_mb:.1f} MB")
268
+ print("📊 Tasks executed concurrently to test YARA GIL sidestep")
269
+
270
+ # Basic assertion that excavate is actually working (should find URLs in our test content)
271
+ assert len(result) > 0, "Expected excavate to process all segments"
272
+
273
+ @pytest.mark.benchmark(group="excavate_parallel_large")
274
+ def test_excavate_parallel_tasks_large(self, benchmark):
275
+ """Benchmark excavate parallel processing with large (10MB) segments to test YARA GIL sidestep"""
276
+ text_segments = self._generate_text_segments(self.LARGE_SIZE, self.TEXT_SEGMENTS_COUNT)
277
+
278
+ def run_test():
279
+ return asyncio.run(self._run_excavate_parallel_tasks(text_segments))
280
+
281
+ result = benchmark(run_test)
282
+
283
+ assert len(result) == self.TEXT_SEGMENTS_COUNT
284
+ total_size_mb = (self.LARGE_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
285
+ print("\n✅ Parallel large segments benchmark completed")
286
+ print(f"📊 Processed {len(result):,} segments of {self.LARGE_SIZE / (1024 * 1024):.0f}MB each in parallel")
287
+ print(f"📊 Total size processed: {total_size_mb:.1f} MB")
288
+ print("📊 Tasks executed concurrently to test YARA GIL sidestep")
289
+
290
+ # Basic assertion that excavate is actually working (should find URLs in our test content)
291
+ assert len(result) > 0, "Expected excavate to process all segments"
@@ -0,0 +1,143 @@
1
+ import pytest
2
+ import random
3
+ import string
4
+ from bbot.core.helpers.misc import make_ip_type, is_ip
5
+
6
+
7
+ class TestIPAddressBenchmarks:
8
+ """
9
+ Benchmark tests for IP address processing operations.
10
+
11
+ These tests measure the performance of BBOT-level IP functions which are
12
+ critical for network scanning efficiency and could benefit from different
13
+ underlying implementations.
14
+ """
15
+
16
+ def setup_method(self):
17
+ """Setup common test data"""
18
+ # Set deterministic seed for consistent benchmark results
19
+ random.seed(42) # Fixed seed for reproducible results
20
+
21
+ # Generate test data of different types and sizes
22
+ self.valid_ips = self._generate_valid_ips()
23
+ self.invalid_ips = self._generate_invalid_ips()
24
+ self.mixed_data = self._generate_mixed_data()
25
+
26
+ def _generate_valid_ips(self):
27
+ """Generate valid IP addresses for testing"""
28
+ valid_ips = []
29
+
30
+ # IPv4 addresses
31
+ for i in range(1000):
32
+ valid_ips.append(
33
+ f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
34
+ )
35
+
36
+ # IPv6 addresses
37
+ for i in range(500):
38
+ ipv6_parts = []
39
+ for j in range(8):
40
+ ipv6_parts.append(f"{random.randint(0, 65535):x}")
41
+ valid_ips.append(":".join(ipv6_parts))
42
+
43
+ # Network addresses
44
+ for i in range(500):
45
+ base_ip = f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.0"
46
+ valid_ips.append(f"{base_ip}/{random.randint(8, 30)}")
47
+
48
+ # IP ranges
49
+ for i in range(200):
50
+ start_ip = (
51
+ f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 200)}"
52
+ )
53
+ end_ip = f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(201, 254)}"
54
+ valid_ips.append(f"{start_ip}-{end_ip}")
55
+
56
+ return valid_ips
57
+
58
+ def _generate_invalid_ips(self):
59
+ """Generate invalid IP addresses for testing"""
60
+ invalid_ips = []
61
+
62
+ # Malformed IPv4
63
+ for i in range(500):
64
+ invalid_ips.append(
65
+ f"{random.randint(256, 999)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
66
+ )
67
+ invalid_ips.append(f"{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}")
68
+ invalid_ips.append(
69
+ f"{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
70
+ )
71
+
72
+ # Malformed IPv6
73
+ for i in range(300):
74
+ ipv6_parts = []
75
+ for j in range(random.randint(5, 10)): # Wrong number of parts
76
+ ipv6_parts.append(f"{random.randint(0, 65535):x}")
77
+ invalid_ips.append(":".join(ipv6_parts))
78
+
79
+ # Random strings
80
+ for i in range(200):
81
+ length = random.randint(5, 20)
82
+ invalid_ips.append("".join(random.choices(string.ascii_letters + string.digits, k=length)))
83
+
84
+ return invalid_ips
85
+
86
+ def _generate_mixed_data(self):
87
+ """Generate mixed valid/invalid data for realistic testing"""
88
+ mixed = []
89
+ mixed.extend(self.valid_ips[:500]) # First 500 valid
90
+ mixed.extend(self.invalid_ips[:500]) # First 500 invalid
91
+ # Use deterministic shuffle with fixed seed for consistent results
92
+ random.seed(42) # Reset seed before shuffle
93
+ random.shuffle(mixed) # Shuffle for realistic distribution
94
+ return mixed
95
+
96
+ @pytest.mark.benchmark(group="ip_validation")
97
+ def test_is_ip_performance(self, benchmark):
98
+ """Benchmark IP validation performance with mixed data"""
99
+
100
+ def validate_ips():
101
+ valid_count = 0
102
+ for ip in self.mixed_data:
103
+ if is_ip(ip):
104
+ valid_count += 1
105
+ return valid_count
106
+
107
+ result = benchmark(validate_ips)
108
+ assert result > 0
109
+
110
+ @pytest.mark.benchmark(group="ip_type_detection")
111
+ def test_make_ip_type_performance(self, benchmark):
112
+ """Benchmark IP type detection performance"""
113
+
114
+ def detect_ip_types():
115
+ type_count = 0
116
+ for ip in self.valid_ips:
117
+ try:
118
+ make_ip_type(ip)
119
+ type_count += 1
120
+ except Exception:
121
+ pass
122
+ return type_count
123
+
124
+ result = benchmark(detect_ip_types)
125
+ assert result > 0
126
+
127
+ @pytest.mark.benchmark(group="ip_processing")
128
+ def test_mixed_ip_operations(self, benchmark):
129
+ """Benchmark combined IP validation + type detection"""
130
+
131
+ def process_ips():
132
+ processed = 0
133
+ for ip in self.mixed_data:
134
+ if is_ip(ip):
135
+ try:
136
+ make_ip_type(ip)
137
+ processed += 1
138
+ except Exception:
139
+ pass
140
+ return processed
141
+
142
+ result = benchmark(process_ips)
143
+ assert result > 0
@@ -0,0 +1,70 @@
1
+ import pytest
2
+ import random
3
+ from bbot.core.helpers.misc import weighted_shuffle
4
+
5
+
6
+ class TestWeightedShuffleBenchmarks:
7
+ """
8
+ Benchmark tests for weighted_shuffle operations.
9
+
10
+ This function is critical for BBOT's queue management, where it shuffles
11
+ incoming queues based on module priority weights. Performance here directly
12
+ impacts scan throughput and responsiveness.
13
+ """
14
+
15
+ def setup_method(self):
16
+ """Setup common test data"""
17
+ # Set deterministic seed for consistent benchmark results
18
+ random.seed(42) # Fixed seed for reproducible results
19
+
20
+ # Generate test data of different sizes and complexity
21
+ self.small_data = self._generate_small_dataset()
22
+ self.medium_data = self._generate_medium_dataset()
23
+ self.large_data = self._generate_large_dataset()
24
+ self.priority_weights = self._generate_priority_weights()
25
+
26
+ def _generate_small_dataset(self):
27
+ """Generate small dataset (like few modules)"""
28
+ return {"items": ["module_a", "module_b", "module_c"], "weights": [0.6, 0.3, 0.1]}
29
+
30
+ def _generate_medium_dataset(self):
31
+ """Generate medium dataset (like typical scan)"""
32
+ items = [f"module_{i}" for i in range(20)]
33
+ weights = [random.uniform(0.1, 1.0) for _ in range(20)]
34
+ return {"items": items, "weights": weights}
35
+
36
+ def _generate_large_dataset(self):
37
+ """Generate large dataset (like complex scan with many modules)"""
38
+ items = [f"module_{i}" for i in range(100)]
39
+ weights = [random.uniform(0.1, 1.0) for _ in range(100)]
40
+ return {"items": items, "weights": weights}
41
+
42
+ def _generate_priority_weights(self):
43
+ """Generate realistic priority weights (like BBOT module priorities)"""
44
+ # BBOT uses priorities 1-5, where lower priority = higher weight
45
+ # Weights are calculated as [5] + [6 - m.priority for m in modules]
46
+ priorities = [5] + [6 - p for p in [1, 2, 3, 4, 5]] * 20 # 5 + 5*20 = 105 items
47
+ items = [f"queue_{i}" for i in range(len(priorities))]
48
+ return {"items": items, "weights": priorities}
49
+
50
+ @pytest.mark.benchmark(group="weighted_shuffle")
51
+ def test_typical_queue_shuffle(self, benchmark):
52
+ """Benchmark weighted shuffle with typical BBOT scan workload"""
53
+
54
+ def shuffle_typical():
55
+ return weighted_shuffle(self.medium_data["items"], self.medium_data["weights"])
56
+
57
+ result = benchmark(shuffle_typical)
58
+ assert len(result) == 20
59
+ assert all(item in result for item in self.medium_data["items"])
60
+
61
+ @pytest.mark.benchmark(group="weighted_shuffle")
62
+ def test_priority_queue_shuffle(self, benchmark):
63
+ """Benchmark weighted shuffle with realistic BBOT priority weights"""
64
+
65
+ def shuffle_priorities():
66
+ return weighted_shuffle(self.priority_weights["items"], self.priority_weights["weights"])
67
+
68
+ result = benchmark(shuffle_priorities)
69
+ assert len(result) == len(self.priority_weights["items"])
70
+ assert all(item in result for item in self.priority_weights["items"])
bbot/test/conftest.py CHANGED
@@ -1,8 +1,8 @@
1
1
  import os
2
2
  import ssl
3
3
  import time
4
- import shutil
5
4
  import pytest
5
+ import shutil
6
6
  import asyncio
7
7
  import logging
8
8
  from pathlib import Path
@@ -22,8 +22,8 @@ def test_bbot_multiprocess(bbot_httpserver):
22
22
  queue = multiprocessing.Queue()
23
23
  events_process = multiprocessing.Process(target=run_bbot_multiprocess, args=(queue,))
24
24
  events_process.start()
25
- events_process.join()
26
- events = queue.get()
25
+ events_process.join(timeout=300)
26
+ events = queue.get(timeout=10)
27
27
  assert len(events) >= 3
28
28
  scan_events = [e for e in events if e["type"] == "SCAN"]
29
29
  assert len(scan_events) == 2
@@ -42,7 +42,7 @@ async def test_events(events, helpers):
42
42
  assert events.ipv4 == scan.make_event("8.8.8.8", dummy=True)
43
43
  assert "8.8.8.8" in events.ipv4
44
44
  assert events.ipv4.host_filterable == "8.8.8.8"
45
- assert "8.8.8.8" == events.ipv4
45
+ assert events.ipv4.data == "8.8.8.8"
46
46
  assert "8.8.8.8" in events.netv4
47
47
  assert "8.8.8.9" not in events.ipv4
48
48
  assert "8.8.9.8" not in events.netv4
@@ -60,7 +60,7 @@ async def test_events(events, helpers):
60
60
  assert events.emoji not in events.netv6
61
61
  assert events.netv6 not in events.emoji
62
62
  ipv6_event = scan.make_event(" [DEaD::c0De]:88", "DNS_NAME", dummy=True)
63
- assert "dead::c0de" == ipv6_event
63
+ assert ipv6_event.data == "dead::c0de"
64
64
  assert ipv6_event.host_filterable == "dead::c0de"
65
65
  range_to_ip = scan.make_event("1.2.3.4/32", dummy=True)
66
66
  assert range_to_ip.type == "IP_ADDRESS"
@@ -87,7 +87,7 @@ async def test_events(events, helpers):
87
87
  open_port_event = scan.make_event(" eViLcorp.COM.:88", "DNS_NAME", dummy=True)
88
88
  dns_event = scan.make_event("evilcorp.com.", "DNS_NAME", dummy=True)
89
89
  for e in (open_port_event, dns_event):
90
- assert "evilcorp.com" == e
90
+ assert e.data == "evilcorp.com"
91
91
  assert e.netloc == "evilcorp.com"
92
92
  assert e.json()["netloc"] == "evilcorp.com"
93
93
  assert e.port is None
@@ -117,17 +117,19 @@ async def test_events(events, helpers):
117
117
  assert events.emoji not in events.url_unverified
118
118
  assert events.emoji not in events.ipv6_url_unverified
119
119
  assert events.url_unverified not in events.emoji
120
- assert "https://evilcorp.com" == scan.make_event("https://evilcorp.com:443", dummy=True)
121
- assert "http://evilcorp.com" == scan.make_event("http://evilcorp.com:80", dummy=True)
120
+
121
+ # URL normalization tests – compare against normalized event.data / .with_port().geturl()
122
+ assert scan.make_event("https://evilcorp.com:443", dummy=True).data == "https://evilcorp.com/"
123
+ assert scan.make_event("http://evilcorp.com:80", dummy=True).data == "http://evilcorp.com/"
122
124
  assert "http://evilcorp.com:80/asdf.js" in scan.make_event("http://evilcorp.com/asdf.js", dummy=True)
123
125
  assert "http://evilcorp.com/asdf.js" in scan.make_event("http://evilcorp.com:80/asdf.js", dummy=True)
124
- assert "https://evilcorp.com:443" == scan.make_event("https://evilcorp.com", dummy=True)
125
- assert "http://evilcorp.com:80" == scan.make_event("http://evilcorp.com", dummy=True)
126
- assert "https://evilcorp.com:80" == scan.make_event("https://evilcorp.com:80", dummy=True)
127
- assert "http://evilcorp.com:443" == scan.make_event("http://evilcorp.com:443", dummy=True)
126
+ assert scan.make_event("https://evilcorp.com", dummy=True).data == "https://evilcorp.com/"
127
+ assert scan.make_event("http://evilcorp.com", dummy=True).data == "http://evilcorp.com/"
128
+ assert scan.make_event("https://evilcorp.com:80", dummy=True).data == "https://evilcorp.com:80/"
129
+ assert scan.make_event("http://evilcorp.com:443", dummy=True).data == "http://evilcorp.com:443/"
128
130
  assert scan.make_event("https://evilcorp.com", dummy=True).with_port().geturl() == "https://evilcorp.com:443/"
129
131
  assert scan.make_event("https://evilcorp.com:666", dummy=True).with_port().geturl() == "https://evilcorp.com:666/"
130
- assert scan.make_event("https://evilcorp.com.:666", dummy=True) == "https://evilcorp.com:666/"
132
+ assert scan.make_event("https://evilcorp.com.:666", dummy=True).data == "https://evilcorp.com:666/"
131
133
  assert scan.make_event("https://[bad::c0de]", dummy=True).with_port().geturl() == "https://[bad::c0de]:443/"
132
134
  assert scan.make_event("https://[bad::c0de]:666", dummy=True).with_port().geturl() == "https://[bad::c0de]:666/"
133
135
  url_event = scan.make_event("https://evilcorp.com", "URL", events.ipv4_url, tags=["status-200"])
@@ -209,7 +211,6 @@ async def test_events(events, helpers):
209
211
  javascript_event = scan.make_event("http://evilcorp.com/asdf/a.js?b=c#d", "URL_UNVERIFIED", parent=scan.root_event)
210
212
  assert "extension-js" in javascript_event.tags
211
213
  await scan.ingress_module.handle_event(javascript_event)
212
- assert "httpx-only" in javascript_event.tags
213
214
 
214
215
  # scope distance
215
216
  event1 = scan.make_event("1.2.3.4", dummy=True)
@@ -261,21 +262,21 @@ async def test_events(events, helpers):
261
262
  )
262
263
  assert event.discovery_context == "something discovered IP_ADDRESS: 127.0.0.1"
263
264
 
264
- # updating an already-created event with make_event()
265
+ # updating an already-created event with update_event()
265
266
  # updating tags
266
267
  event1 = scan.make_event("127.0.0.1", parent=scan.root_event)
267
- updated_event = scan.make_event(event1, tags="asdf")
268
- assert "asdf" not in event1.tags
268
+ updated_event = scan.update_event(event1, tags="asdf")
269
+ # assert "asdf" not in event1.tags # why was this test added? why is it important the original event stays untouched? 🤔
269
270
  assert "asdf" in updated_event.tags
270
271
  # updating parent
271
272
  event2 = scan.make_event("127.0.0.1", parent=scan.root_event)
272
- updated_event = scan.make_event(event2, parent=event1)
273
- assert event2.parent == scan.root_event
273
+ updated_event = scan.update_event(event2, parent=event1)
274
+ # assert event2.parent == scan.root_event
274
275
  assert updated_event.parent == event1
275
- # updating module
276
+ # updating module/internal flag
276
277
  event3 = scan.make_event("127.0.0.1", parent=scan.root_event)
277
- updated_event = scan.make_event(event3, internal=True)
278
- assert event3.internal is False
278
+ updated_event = scan.update_event(event3, internal=True)
279
+ # assert event3.internal is False
279
280
  assert updated_event.internal is True
280
281
 
281
282
  # event sorting
@@ -1056,13 +1057,13 @@ async def test_mobile_app():
1056
1057
 
1057
1058
  @pytest.mark.asyncio
1058
1059
  async def test_filesystem():
1059
- scan = Scanner("FILESYSTEM:/tmp/asdf")
1060
+ scan = Scanner("FILESYSTEM:/tmp/asdfasdgasdfasdfddsdf")
1060
1061
  events = [e async for e in scan.async_start()]
1061
1062
  assert len(events) == 3
1062
1063
  filesystem_events = [e for e in events if e.type == "FILESYSTEM"]
1063
1064
  assert len(filesystem_events) == 1
1064
1065
  assert filesystem_events[0].type == "FILESYSTEM"
1065
- assert filesystem_events[0].data == {"path": "/tmp/asdf"}
1066
+ assert filesystem_events[0].data == {"path": "/tmp/asdfasdgasdfasdfddsdf"}
1066
1067
 
1067
1068
 
1068
1069
  def test_event_hashing():
@@ -155,6 +155,7 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_httpserver):
155
155
  assert helpers.extract_host("https://[dead::beef]:22?a=b") == ("dead::beef", "https://[", "]:22?a=b")
156
156
  assert helpers.extract_host("https://[dead::beef]/?a=b") == ("dead::beef", "https://[", "]/?a=b")
157
157
  assert helpers.extract_host("https://[dead::beef]?a=b") == ("dead::beef", "https://[", "]?a=b")
158
+ assert helpers.extract_host("https://[::1]") == ("::1", "https://[", "]")
158
159
  assert helpers.extract_host("ftp://username:password@my-ftp.com/my-file.csv") == (
159
160
  "my-ftp.com",
160
161
  "ftp://username:password@",
@@ -954,3 +955,22 @@ async def test_parameter_validation(helpers):
954
955
  assert p in cookie_valid_params and p not in cookie_invalid_params
955
956
  else:
956
957
  assert p in cookie_invalid_params and p not in cookie_valid_params
958
+
959
+
960
+ @pytest.mark.asyncio
961
+ async def test_rm_temp_dir_at_exit(helpers):
962
+ from bbot.scanner import Scanner
963
+
964
+ scan = Scanner("127.0.0.1", modules=["httpx"])
965
+ await scan._prep()
966
+
967
+ temp_dir = scan.home / "temp"
968
+
969
+ # temp dir should exist
970
+ assert temp_dir.exists()
971
+
972
+ events = [e async for e in scan.async_start()]
973
+ assert events
974
+
975
+ # temp dir should be removed
976
+ assert not temp_dir.exists()