bbot 2.6.0.6879rc0__py3-none-any.whl → 2.7.2.7254rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbot might be problematic. Click here for more details.

Files changed (75) hide show
  1. bbot/__init__.py +1 -1
  2. bbot/core/engine.py +1 -1
  3. bbot/core/flags.py +1 -0
  4. bbot/core/helpers/bloom.py +6 -7
  5. bbot/core/helpers/dns/dns.py +0 -1
  6. bbot/core/helpers/dns/engine.py +0 -2
  7. bbot/core/helpers/files.py +2 -2
  8. bbot/core/helpers/git.py +17 -0
  9. bbot/core/helpers/misc.py +1 -0
  10. bbot/core/helpers/ntlm.py +0 -2
  11. bbot/core/helpers/regex.py +1 -1
  12. bbot/core/modules.py +0 -54
  13. bbot/defaults.yml +4 -2
  14. bbot/modules/apkpure.py +1 -1
  15. bbot/modules/base.py +11 -5
  16. bbot/modules/dnsbimi.py +1 -4
  17. bbot/modules/dnsdumpster.py +35 -52
  18. bbot/modules/dnstlsrpt.py +0 -6
  19. bbot/modules/docker_pull.py +1 -1
  20. bbot/modules/emailformat.py +17 -1
  21. bbot/modules/filedownload.py +1 -1
  22. bbot/modules/git_clone.py +47 -22
  23. bbot/modules/gitdumper.py +4 -14
  24. bbot/modules/github_workflows.py +1 -1
  25. bbot/modules/gitlab_com.py +31 -0
  26. bbot/modules/gitlab_onprem.py +84 -0
  27. bbot/modules/gowitness.py +0 -6
  28. bbot/modules/graphql_introspection.py +5 -2
  29. bbot/modules/httpx.py +2 -0
  30. bbot/modules/iis_shortnames.py +0 -7
  31. bbot/modules/internal/unarchive.py +9 -3
  32. bbot/modules/lightfuzz/lightfuzz.py +5 -1
  33. bbot/modules/nuclei.py +1 -1
  34. bbot/modules/output/base.py +0 -5
  35. bbot/modules/postman_download.py +1 -1
  36. bbot/modules/retirejs.py +232 -0
  37. bbot/modules/securitytxt.py +0 -3
  38. bbot/modules/subdomaincenter.py +1 -16
  39. bbot/modules/telerik.py +6 -1
  40. bbot/modules/templates/gitlab.py +98 -0
  41. bbot/modules/trufflehog.py +1 -1
  42. bbot/scanner/manager.py +7 -4
  43. bbot/scanner/scanner.py +1 -1
  44. bbot/scripts/benchmark_report.py +433 -0
  45. bbot/test/benchmarks/__init__.py +2 -0
  46. bbot/test/benchmarks/test_bloom_filter_benchmarks.py +105 -0
  47. bbot/test/benchmarks/test_closest_match_benchmarks.py +76 -0
  48. bbot/test/benchmarks/test_event_validation_benchmarks.py +438 -0
  49. bbot/test/benchmarks/test_excavate_benchmarks.py +291 -0
  50. bbot/test/benchmarks/test_ipaddress_benchmarks.py +143 -0
  51. bbot/test/benchmarks/test_weighted_shuffle_benchmarks.py +70 -0
  52. bbot/test/test_step_1/test_bbot_fastapi.py +2 -2
  53. bbot/test/test_step_1/test_events.py +0 -1
  54. bbot/test/test_step_1/test_scan.py +1 -8
  55. bbot/test/test_step_2/module_tests/base.py +6 -1
  56. bbot/test/test_step_2/module_tests/test_module_dnsbimi.py +2 -1
  57. bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py +3 -5
  58. bbot/test/test_step_2/module_tests/test_module_emailformat.py +1 -1
  59. bbot/test/test_step_2/module_tests/test_module_emails.py +2 -2
  60. bbot/test/test_step_2/module_tests/test_module_excavate.py +35 -6
  61. bbot/test/test_step_2/module_tests/test_module_gitlab_com.py +66 -0
  62. bbot/test/test_step_2/module_tests/{test_module_gitlab.py → test_module_gitlab_onprem.py} +4 -69
  63. bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +2 -2
  64. bbot/test/test_step_2/module_tests/test_module_retirejs.py +159 -0
  65. bbot/test/test_step_2/module_tests/test_module_telerik.py +1 -1
  66. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/METADATA +7 -4
  67. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/RECORD +70 -60
  68. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/WHEEL +1 -1
  69. bbot/modules/censys.py +0 -98
  70. bbot/modules/gitlab.py +0 -141
  71. bbot/modules/zoomeye.py +0 -77
  72. bbot/test/test_step_2/module_tests/test_module_censys.py +0 -83
  73. bbot/test/test_step_2/module_tests/test_module_zoomeye.py +0 -35
  74. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info}/entry_points.txt +0 -0
  75. {bbot-2.6.0.6879rc0.dist-info → bbot-2.7.2.7254rc0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,76 @@
1
+ import pytest
2
+ import random
3
+ from bbot.core.helpers.misc import closest_match
4
+
5
+
6
+ class TestClosestMatchBenchmarks:
7
+ """
8
+ Benchmark tests for closest_match operations.
9
+
10
+ This function is critical for BBOT's DNS brute forcing, where it finds the best
11
+ matching parent event among thousands of choices. Performance here directly impacts
12
+ scan throughput and DNS mutation efficiency.
13
+ """
14
+
15
+ def setup_method(self):
16
+ """Setup common test data"""
17
+ # Set deterministic seed for consistent benchmark results
18
+ random.seed(42) # Fixed seed for reproducible results
19
+
20
+ # Generate test data for benchmarks
21
+ self.large_closest_match_choices = self._generate_large_closest_match_choices()
22
+ self.realistic_closest_match_choices = self._generate_realistic_closest_match_choices()
23
+
24
+ def _generate_large_closest_match_choices(self):
25
+ """Generate large closest match dataset (stress test with many parent events)"""
26
+ choices = []
27
+ for i in range(10000):
28
+ # Generate realistic domain names with more variety
29
+ tld = random.choice(["com", "net", "org", "io", "co", "dev"])
30
+ domain = f"subdomain{i}.example{i % 100}.{tld}"
31
+ choices.append(domain)
32
+ return choices
33
+
34
+ def _generate_realistic_closest_match_choices(self):
35
+ """Generate realistic closest match parent event choices (like actual BBOT usage)"""
36
+ choices = []
37
+
38
+ # Common TLDs
39
+ tlds = ["com", "net", "org", "io", "co", "dev", "test", "local"]
40
+
41
+ # Generate parent domains with realistic patterns
42
+ for i in range(5000):
43
+ # Base domain patterns
44
+ if i % 10 == 0:
45
+ # Simple domains
46
+ domain = f"example{i}.{random.choice(tlds)}"
47
+ elif i % 5 == 0:
48
+ # Multi-level domains
49
+ domain = f"sub{i}.example{i}.{random.choice(tlds)}"
50
+ else:
51
+ # Complex domains
52
+ domain = f"level1{i}.level2{i}.example{i}.{random.choice(tlds)}"
53
+
54
+ choices.append(domain)
55
+
56
+ return choices
57
+
58
+ @pytest.mark.benchmark(group="closest_match")
59
+ def test_large_closest_match_lookup(self, benchmark):
60
+ """Benchmark closest_match with large closest match workload (many parent events)"""
61
+
62
+ def find_large_closest_match():
63
+ return closest_match("subdomain5678.example50.com", self.large_closest_match_choices)
64
+
65
+ result = benchmark.pedantic(find_large_closest_match, iterations=50, rounds=10)
66
+ assert result is not None
67
+
68
+ @pytest.mark.benchmark(group="closest_match")
69
+ def test_realistic_closest_match_workload(self, benchmark):
70
+ """Benchmark closest_match with realistic BBOT closest match parent event choices"""
71
+
72
+ def find_realistic_closest_match():
73
+ return closest_match("subdomain123.example5.com", self.realistic_closest_match_choices)
74
+
75
+ result = benchmark.pedantic(find_realistic_closest_match, iterations=50, rounds=10)
76
+ assert result is not None
@@ -0,0 +1,438 @@
1
+ import pytest
2
+ import random
3
+ import string
4
+ from bbot.scanner import Scanner
5
+ from bbot.core.event.base import make_event
6
+
7
+
8
+ class TestEventValidationBenchmarks:
9
+ def setup_method(self):
10
+ """Setup minimal scanner configuration for benchmarking event validation"""
11
+ # Set deterministic random seed for reproducible benchmarks
12
+ random.seed(42)
13
+
14
+ # Create a minimal scanner with no modules to isolate event validation performance
15
+ self.scanner_config = {
16
+ "modules": None, # No modules to avoid overhead
17
+ "output_modules": None, # No output modules
18
+ "dns": {"disable": True}, # Disable DNS to avoid network calls
19
+ "web": {"http_timeout": 1}, # Minimal timeouts
20
+ }
21
+
22
+ def _generate_diverse_targets(self, count=1000):
23
+ """Generate a diverse set of targets that will trigger different event type auto-detection"""
24
+ # Use deterministic random state for reproducible target generation
25
+ rng = random.Random(42)
26
+ targets = []
27
+
28
+ # DNS Names (various formats)
29
+ subdomains = ["www", "api", "mail", "ftp", "admin", "test", "dev", "staging", "blog"]
30
+ tlds = ["com", "org", "net", "io", "co.uk", "de", "fr", "jp"]
31
+
32
+ for _ in range(count // 10):
33
+ # Standard domains
34
+ targets.append(
35
+ f"{rng.choice(subdomains)}.{rng.choice(['example', 'test', 'evilcorp'])}.{rng.choice(tlds)}"
36
+ )
37
+ # Bare domains
38
+ targets.append(f"{rng.choice(['example', 'test', 'company'])}.{rng.choice(tlds)}")
39
+
40
+ # IP Addresses (IPv4 and IPv6)
41
+ for _ in range(count // 15):
42
+ # IPv4
43
+ targets.append(f"{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}")
44
+ # IPv6
45
+ targets.append(f"2001:db8::{rng.randint(1, 9999):x}:{rng.randint(1, 9999):x}")
46
+
47
+ # IP Ranges
48
+ for _ in range(count // 20):
49
+ targets.append(f"192.168.{rng.randint(1, 254)}.0/24")
50
+ targets.append(f"10.0.{rng.randint(1, 254)}.0/24")
51
+
52
+ # URLs (only supported schemes: http, https)
53
+ url_schemes = ["http", "https"] # Only schemes supported by BBOT auto-detection
54
+ url_paths = ["", "/", "/admin", "/api/v1", "/login.php", "/index.html"]
55
+ for _ in range(count // 8):
56
+ scheme = rng.choice(url_schemes)
57
+ domain = f"{rng.choice(subdomains)}.example.{rng.choice(tlds)}"
58
+ path = rng.choice(url_paths)
59
+ port = rng.choice(["", ":8080", ":443", ":80", ":8443"])
60
+ targets.append(f"{scheme}://{domain}{port}{path}")
61
+
62
+ # Open Ports
63
+ ports = [80, 443, 22, 21, 25, 53, 110, 143, 993, 995, 8080, 8443, 3389]
64
+ for _ in range(count // 12):
65
+ domain = f"example.{rng.choice(tlds)}"
66
+ port = rng.choice(ports)
67
+ targets.append(f"{domain}:{port}")
68
+ # IPv4 with port
69
+ ip = f"{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}"
70
+ targets.append(f"{ip}:{port}")
71
+
72
+ # Email Addresses
73
+ email_domains = ["example.com", "test.org", "company.net"]
74
+ email_users = ["admin", "test", "info", "contact", "support", "sales"]
75
+ for _ in range(count // 15):
76
+ user = rng.choice(email_users)
77
+ domain = rng.choice(email_domains)
78
+ targets.append(f"{user}@{domain}")
79
+ # Plus addressing
80
+ targets.append(f"{user}+{rng.randint(1, 999)}@{domain}")
81
+
82
+ # Mixed/Edge cases that should trigger auto-detection logic
83
+ edge_cases = [
84
+ # Localhost variants
85
+ "localhost",
86
+ "127.0.0.1",
87
+ "::1",
88
+ # Punycode domains
89
+ "xn--e1afmkfd.xn--p1ai",
90
+ "xn--fiqs8s.xn--0zwm56d",
91
+ # Long domains (shortened to avoid issues)
92
+ "very-long-subdomain-name-for-testing.test.com",
93
+ # IP with ports
94
+ "192.168.1.1",
95
+ "10.0.0.1:80",
96
+ # URLs with parameters
97
+ "https://example.com/search?q=test&limit=10",
98
+ "http://api.example.com:8080/v1/users?format=json",
99
+ # More standard domains for better compatibility
100
+ "api.test.com",
101
+ "mail.example.org",
102
+ "secure.company.net",
103
+ ]
104
+ targets.extend(edge_cases)
105
+
106
+ # Fill remainder with random variations
107
+ remaining = count - len(targets)
108
+ if remaining > 0:
109
+ for _ in range(remaining):
110
+ choice = rng.randint(1, 4)
111
+ if choice == 1:
112
+ # Random domain
113
+ targets.append(f"{''.join(rng.choices(string.ascii_lowercase, k=8))}.com")
114
+ elif choice == 2:
115
+ # Random IP
116
+ targets.append(
117
+ f"{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}"
118
+ )
119
+ elif choice == 3:
120
+ # Random URL
121
+ targets.append(f"https://{''.join(rng.choices(string.ascii_lowercase, k=8))}.com/path")
122
+ else:
123
+ # Random email
124
+ targets.append(f"{''.join(rng.choices(string.ascii_lowercase, k=8))}@example.com")
125
+
126
+ # Ensure we have exactly the requested count by removing duplicates and filling as needed
127
+ unique_targets = list(set(targets))
128
+
129
+ # If we have too few unique targets, generate more
130
+ while len(unique_targets) < count:
131
+ additional_target = f"filler{len(unique_targets)}.example.com"
132
+ if additional_target not in unique_targets:
133
+ unique_targets.append(additional_target)
134
+
135
+ # Return exactly the requested number of unique targets
136
+ return unique_targets[:count]
137
+
138
+ def _generate_diverse_event_data(self, count=1000):
139
+ """Generate diverse event data that will trigger different auto-detection paths in make_event"""
140
+ # Use deterministic random state for reproducible data generation
141
+ rng = random.Random(42)
142
+ event_data = []
143
+
144
+ # DNS Names (various formats)
145
+ subdomains = ["www", "api", "mail", "ftp", "admin", "test", "dev", "staging", "blog"]
146
+ tlds = ["com", "org", "net", "io", "co.uk", "de", "fr", "jp"]
147
+
148
+ for _ in range(count // 10):
149
+ # Standard domains
150
+ event_data.append(
151
+ f"{rng.choice(subdomains)}.{rng.choice(['example', 'test', 'evilcorp'])}.{rng.choice(tlds)}"
152
+ )
153
+ # Bare domains
154
+ event_data.append(f"{rng.choice(['example', 'test', 'company'])}.{rng.choice(tlds)}")
155
+
156
+ # IP Addresses (IPv4 and IPv6)
157
+ for _ in range(count // 15):
158
+ # IPv4
159
+ event_data.append(
160
+ f"{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}"
161
+ )
162
+ # IPv6
163
+ event_data.append(f"2001:db8::{rng.randint(1, 9999):x}:{rng.randint(1, 9999):x}")
164
+
165
+ # IP Ranges
166
+ for _ in range(count // 20):
167
+ event_data.append(f"192.168.{rng.randint(1, 254)}.0/24")
168
+ event_data.append(f"10.0.{rng.randint(1, 254)}.0/24")
169
+
170
+ # URLs (HTTP/HTTPS)
171
+ url_schemes = ["http", "https"]
172
+ url_paths = ["", "/", "/admin", "/api/v1", "/login.php", "/index.html"]
173
+ for _ in range(count // 8):
174
+ scheme = rng.choice(url_schemes)
175
+ domain = f"{rng.choice(subdomains)}.example.{rng.choice(tlds)}"
176
+ path = rng.choice(url_paths)
177
+ port = rng.choice(["", ":8080", ":443", ":80", ":8443"])
178
+ event_data.append(f"{scheme}://{domain}{port}{path}")
179
+
180
+ # Open Ports
181
+ ports = [80, 443, 22, 21, 25, 53, 110, 143, 993, 995, 8080, 8443, 3389]
182
+ for _ in range(count // 12):
183
+ domain = f"example.{rng.choice(tlds)}"
184
+ port = rng.choice(ports)
185
+ event_data.append(f"{domain}:{port}")
186
+ # IPv4 with port
187
+ ip = f"{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}"
188
+ event_data.append(f"{ip}:{port}")
189
+
190
+ # Email Addresses
191
+ email_domains = ["example.com", "test.org", "company.net"]
192
+ email_users = ["admin", "test", "info", "contact", "support", "sales"]
193
+ for _ in range(count // 15):
194
+ user = rng.choice(email_users)
195
+ domain = rng.choice(email_domains)
196
+ event_data.append(f"{user}@{domain}")
197
+ # Plus addressing
198
+ event_data.append(f"{user}+{rng.randint(1, 999)}@{domain}")
199
+
200
+ # Mixed/Edge cases that test auto-detection logic
201
+ edge_cases = [
202
+ # Localhost variants
203
+ "localhost",
204
+ "127.0.0.1",
205
+ "::1",
206
+ # Punycode domains
207
+ "xn--e1afmkfd.xn--p1ai",
208
+ "xn--fiqs8s.xn--0zwm56d",
209
+ # Long domains
210
+ "very-long-subdomain-name-for-testing.test.com",
211
+ # IP with ports
212
+ "192.168.1.1",
213
+ "10.0.0.1:80",
214
+ # URLs with parameters
215
+ "https://example.com/search?q=test&limit=10",
216
+ "http://api.example.com:8080/v1/users?format=json",
217
+ # Standard domains for better compatibility
218
+ "api.test.com",
219
+ "mail.example.org",
220
+ "secure.company.net",
221
+ ]
222
+ event_data.extend(edge_cases)
223
+
224
+ # Fill remainder with random variations
225
+ remaining = count - len(event_data)
226
+ if remaining > 0:
227
+ for _ in range(remaining):
228
+ choice = rng.randint(1, 4)
229
+ if choice == 1:
230
+ # Random domain
231
+ event_data.append(f"{''.join(rng.choices(string.ascii_lowercase, k=8))}.com")
232
+ elif choice == 2:
233
+ # Random IP
234
+ event_data.append(
235
+ f"{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}.{rng.randint(1, 254)}"
236
+ )
237
+ elif choice == 3:
238
+ # Random URL
239
+ event_data.append(f"https://{''.join(rng.choices(string.ascii_lowercase, k=8))}.com/path")
240
+ else:
241
+ # Random email
242
+ event_data.append(f"{''.join(rng.choices(string.ascii_lowercase, k=8))}@example.com")
243
+
244
+ # Ensure we have exactly the requested count by removing duplicates and filling as needed
245
+ unique_data = list(set(event_data))
246
+
247
+ # If we have too few unique entries, generate more
248
+ while len(unique_data) < count:
249
+ additional_data = f"filler{len(unique_data)}.example.com"
250
+ if additional_data not in unique_data:
251
+ unique_data.append(additional_data)
252
+
253
+ # Return exactly the requested number of unique data items
254
+ return unique_data[:count]
255
+
256
+ @pytest.mark.benchmark(group="event_validation_scan_startup_small")
257
+ def test_event_validation_full_scan_startup_small_batch(self, benchmark):
258
+ """Benchmark full scan startup event validation with small batch (100 targets) for quick iteration"""
259
+ targets = self._generate_diverse_targets(100)
260
+
261
+ def validate_event_batch():
262
+ scan = Scanner(*targets, config=self.scanner_config)
263
+ # Count successful event creations and types detected
264
+ event_counts = {}
265
+ total_events = 0
266
+
267
+ for event_seed in scan.target.seeds:
268
+ event_type = event_seed.type
269
+ event_counts[event_type] = event_counts.get(event_type, 0) + 1
270
+ total_events += 1
271
+
272
+ return {
273
+ "total_events_processed": total_events,
274
+ "unique_event_types": len(event_counts),
275
+ "event_type_breakdown": event_counts,
276
+ "targets_input": len(targets),
277
+ }
278
+
279
+ result = benchmark(validate_event_batch)
280
+ assert result["total_events_processed"] == result["targets_input"] # Should process ALL targets
281
+ assert result["unique_event_types"] >= 3 # Should detect at least DNS_NAME, IP_ADDRESS, URL
282
+
283
+ @pytest.mark.benchmark(group="event_validation_scan_startup_large")
284
+ def test_event_validation_full_scan_startup_large_batch(self, benchmark):
285
+ """Benchmark full scan startup event validation with large batch (1000 targets) for comprehensive testing"""
286
+ targets = self._generate_diverse_targets(1000)
287
+
288
+ def validate_large_batch():
289
+ scan = Scanner(*targets, config=self.scanner_config)
290
+
291
+ # Comprehensive analysis of validation pipeline performance
292
+ validation_metrics = {
293
+ "targets_input": len(targets),
294
+ "events_created": 0,
295
+ "validation_errors": 0,
296
+ "auto_detection_success": 0,
297
+ "type_distribution": {},
298
+ "processing_efficiency": 0.0,
299
+ }
300
+
301
+ try:
302
+ for event_seed in scan.target.seeds:
303
+ validation_metrics["events_created"] += 1
304
+ event_type = event_seed.type
305
+
306
+ if event_type not in validation_metrics["type_distribution"]:
307
+ validation_metrics["type_distribution"][event_type] = 0
308
+ validation_metrics["type_distribution"][event_type] += 1
309
+
310
+ # If we got a valid event type, auto-detection succeeded
311
+ if event_type and event_type != "UNKNOWN":
312
+ validation_metrics["auto_detection_success"] += 1
313
+
314
+ except Exception:
315
+ validation_metrics["validation_errors"] += 1
316
+
317
+ # Calculate efficiency ratio
318
+ if validation_metrics["targets_input"] > 0:
319
+ validation_metrics["processing_efficiency"] = (
320
+ validation_metrics["events_created"] / validation_metrics["targets_input"]
321
+ )
322
+
323
+ return validation_metrics
324
+
325
+ result = benchmark(validate_large_batch)
326
+ assert result["events_created"] == result["targets_input"] # Should process ALL targets successfully
327
+ assert result["processing_efficiency"] == 1.0 # 100% success rate
328
+ assert len(result["type_distribution"]) >= 5 # Should detect multiple event types
329
+
330
+ @pytest.mark.benchmark(group="make_event_small")
331
+ def test_make_event_autodetection_small(self, benchmark):
332
+ """Benchmark make_event with auto-detection for small batch (100 items)"""
333
+ event_data = self._generate_diverse_event_data(100)
334
+
335
+ def create_events_with_autodetection():
336
+ events_created = []
337
+ type_distribution = {}
338
+ validation_errors = 0
339
+
340
+ for data in event_data:
341
+ try:
342
+ # Test auto-detection by not providing event_type
343
+ event = make_event(data, dummy=True)
344
+ events_created.append(event)
345
+
346
+ event_type = event.type
347
+ type_distribution[event_type] = type_distribution.get(event_type, 0) + 1
348
+
349
+ except Exception:
350
+ validation_errors += 1
351
+
352
+ return {
353
+ "events_created": len(events_created),
354
+ "type_distribution": type_distribution,
355
+ "validation_errors": validation_errors,
356
+ "autodetection_success_rate": len(events_created) / len(event_data) if event_data else 0,
357
+ }
358
+
359
+ result = benchmark.pedantic(create_events_with_autodetection, iterations=50, rounds=10)
360
+ assert result["events_created"] == len(event_data) # Should create events for all data
361
+ assert result["validation_errors"] == 0 # Should have no validation errors
362
+ assert len(result["type_distribution"]) >= 3 # Should detect multiple event types
363
+ assert result["autodetection_success_rate"] == 1.0 # 100% success rate
364
+
365
+ @pytest.mark.benchmark(group="make_event_large")
366
+ def test_make_event_autodetection_large(self, benchmark):
367
+ """Benchmark make_event with auto-detection for large batch (1000 items)"""
368
+ event_data = self._generate_diverse_event_data(1000)
369
+
370
+ def create_large_event_batch():
371
+ performance_metrics = {
372
+ "total_processed": len(event_data),
373
+ "events_created": 0,
374
+ "autodetection_failures": 0,
375
+ "type_distribution": {},
376
+ "processing_efficiency": 0.0,
377
+ }
378
+
379
+ for data in event_data:
380
+ try:
381
+ # Use dummy=True for performance (no scan/parent validation)
382
+ event = make_event(data, dummy=True)
383
+ performance_metrics["events_created"] += 1
384
+
385
+ event_type = event.type
386
+ if event_type not in performance_metrics["type_distribution"]:
387
+ performance_metrics["type_distribution"][event_type] = 0
388
+ performance_metrics["type_distribution"][event_type] += 1
389
+
390
+ except Exception:
391
+ performance_metrics["autodetection_failures"] += 1
392
+
393
+ # Calculate efficiency ratio
394
+ performance_metrics["processing_efficiency"] = (
395
+ performance_metrics["events_created"] / performance_metrics["total_processed"]
396
+ )
397
+
398
+ return performance_metrics
399
+
400
+ result = benchmark.pedantic(create_large_event_batch, iterations=50, rounds=10)
401
+ assert result["events_created"] == result["total_processed"] # Should process all successfully
402
+ assert result["autodetection_failures"] == 0 # Should have no failures
403
+ assert result["processing_efficiency"] == 1.0 # 100% efficiency
404
+ assert len(result["type_distribution"]) >= 5 # Should detect multiple event types
405
+
406
+ @pytest.mark.benchmark(group="make_event_explicit_types")
407
+ def test_make_event_explicit_types(self, benchmark):
408
+ """Benchmark make_event when event types are explicitly provided (no auto-detection)"""
409
+ # Create data with explicit type mappings to bypass auto-detection
410
+ test_cases = [
411
+ ("example.com", "DNS_NAME"),
412
+ ("192.168.1.1", "IP_ADDRESS"),
413
+ ("https://example.com", "URL"),
414
+ ("admin@example.com", "EMAIL_ADDRESS"),
415
+ ("example.com:80", "OPEN_TCP_PORT"),
416
+ ] * 20 # 100 total cases
417
+
418
+ def create_events_explicit_types():
419
+ events_created = []
420
+ type_distribution = {}
421
+
422
+ for data, event_type in test_cases:
423
+ # Explicitly provide event_type to skip auto-detection
424
+ event = make_event(data, event_type=event_type, dummy=True)
425
+ events_created.append(event)
426
+
427
+ type_distribution[event_type] = type_distribution.get(event_type, 0) + 1
428
+
429
+ return {
430
+ "events_created": len(events_created),
431
+ "type_distribution": type_distribution,
432
+ "bypass_autodetection": True,
433
+ }
434
+
435
+ result = benchmark.pedantic(create_events_explicit_types, iterations=50, rounds=10)
436
+ assert result["events_created"] == len(test_cases) # Should create all events
437
+ assert result["bypass_autodetection"] # Confirms we bypassed auto-detection
438
+ assert len(result["type_distribution"]) == 5 # Should have exactly 5 types