PyPI - bbot - Versions diffs - 2.5.0__py3-none-any.whl → 2.7.2.7424rc0__py3-none-any.whl - Mend

bbot 2.5.0py3-none-any.whl → 2.7.2.7424rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

bbot/__init__.py +1 -1
bbot/cli.py +22 -8
bbot/core/engine.py +1 -1
bbot/core/event/__init__.py +2 -2
bbot/core/event/base.py +138 -110
bbot/core/flags.py +1 -0
bbot/core/helpers/bloom.py +6 -7
bbot/core/helpers/command.py +5 -2
bbot/core/helpers/depsinstaller/installer.py +78 -7
bbot/core/helpers/dns/dns.py +0 -1
bbot/core/helpers/dns/engine.py +0 -2
bbot/core/helpers/files.py +2 -2
bbot/core/helpers/git.py +17 -0
bbot/core/helpers/helper.py +6 -5
bbot/core/helpers/misc.py +15 -28
bbot/core/helpers/names_generator.py +5 -0
bbot/core/helpers/ntlm.py +0 -2
bbot/core/helpers/regex.py +1 -1
bbot/core/helpers/regexes.py +25 -8
bbot/core/helpers/web/engine.py +1 -1
bbot/core/helpers/web/web.py +2 -1
bbot/core/modules.py +22 -60
bbot/core/shared_deps.py +38 -0
bbot/defaults.yml +4 -2
bbot/modules/apkpure.py +2 -2
bbot/modules/aspnet_bin_exposure.py +80 -0
bbot/modules/baddns.py +1 -1
bbot/modules/baddns_direct.py +1 -1
bbot/modules/baddns_zone.py +1 -1
bbot/modules/badsecrets.py +1 -1
bbot/modules/base.py +129 -40
bbot/modules/bucket_amazon.py +1 -1
bbot/modules/bucket_digitalocean.py +1 -1
bbot/modules/bucket_firebase.py +1 -1
bbot/modules/bucket_google.py +1 -1
bbot/modules/{bucket_azure.py → bucket_microsoft.py} +2 -2
bbot/modules/builtwith.py +4 -2
bbot/modules/c99.py +1 -1
bbot/modules/dnsbimi.py +1 -4
bbot/modules/dnsbrute.py +6 -1
bbot/modules/dnscommonsrv.py +1 -0
bbot/modules/dnsdumpster.py +35 -52
bbot/modules/dnstlsrpt.py +0 -6
bbot/modules/docker_pull.py +2 -2
bbot/modules/emailformat.py +17 -1
bbot/modules/ffuf.py +4 -1
bbot/modules/ffuf_shortnames.py +6 -3
bbot/modules/filedownload.py +8 -5
bbot/modules/fullhunt.py +1 -1
bbot/modules/git_clone.py +47 -22
bbot/modules/gitdumper.py +5 -15
bbot/modules/github_workflows.py +6 -5
bbot/modules/gitlab_com.py +31 -0
bbot/modules/gitlab_onprem.py +84 -0
bbot/modules/gowitness.py +60 -30
bbot/modules/graphql_introspection.py +145 -0
bbot/modules/httpx.py +2 -0
bbot/modules/hunt.py +10 -3
bbot/modules/iis_shortnames.py +16 -7
bbot/modules/internal/cloudcheck.py +65 -72
bbot/modules/internal/unarchive.py +9 -3
bbot/modules/lightfuzz/lightfuzz.py +6 -2
bbot/modules/lightfuzz/submodules/esi.py +42 -0
bbot/modules/{deadly/medusa.py → medusa.py} +4 -7
bbot/modules/nuclei.py +2 -2
bbot/modules/otx.py +9 -2
bbot/modules/output/base.py +3 -11
bbot/modules/paramminer_headers.py +10 -7
bbot/modules/passivetotal.py +1 -1
bbot/modules/portfilter.py +2 -0
bbot/modules/portscan.py +1 -1
bbot/modules/postman_download.py +2 -2
bbot/modules/retirejs.py +232 -0
bbot/modules/securitytxt.py +0 -3
bbot/modules/sslcert.py +2 -2
bbot/modules/subdomaincenter.py +1 -16
bbot/modules/telerik.py +7 -2
bbot/modules/templates/bucket.py +24 -4
bbot/modules/templates/gitlab.py +98 -0
bbot/modules/trufflehog.py +7 -4
bbot/modules/wafw00f.py +2 -2
bbot/presets/web/dotnet-audit.yml +1 -0
bbot/presets/web/lightfuzz-heavy.yml +1 -1
bbot/presets/web/lightfuzz-medium.yml +1 -1
bbot/presets/web/lightfuzz-superheavy.yml +1 -1
bbot/scanner/manager.py +44 -37
bbot/scanner/scanner.py +17 -4
bbot/scripts/benchmark_report.py +433 -0
bbot/test/benchmarks/__init__.py +2 -0
bbot/test/benchmarks/test_bloom_filter_benchmarks.py +105 -0
bbot/test/benchmarks/test_closest_match_benchmarks.py +76 -0
bbot/test/benchmarks/test_event_validation_benchmarks.py +438 -0
bbot/test/benchmarks/test_excavate_benchmarks.py +291 -0
bbot/test/benchmarks/test_ipaddress_benchmarks.py +143 -0
bbot/test/benchmarks/test_weighted_shuffle_benchmarks.py +70 -0
bbot/test/conftest.py +1 -1
bbot/test/test_step_1/test_bbot_fastapi.py +2 -2
bbot/test/test_step_1/test_events.py +22 -21
bbot/test/test_step_1/test_helpers.py +20 -0
bbot/test/test_step_1/test_manager_scope_accuracy.py +45 -0
bbot/test/test_step_1/test_modules_basic.py +40 -15
bbot/test/test_step_1/test_python_api.py +2 -2
bbot/test/test_step_1/test_regexes.py +21 -4
bbot/test/test_step_1/test_scan.py +7 -8
bbot/test/test_step_1/test_web.py +46 -0
bbot/test/test_step_2/module_tests/base.py +6 -1
bbot/test/test_step_2/module_tests/test_module_aspnet_bin_exposure.py +73 -0
bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py +52 -18
bbot/test/test_step_2/module_tests/test_module_bucket_google.py +1 -1
bbot/test/test_step_2/module_tests/{test_module_bucket_azure.py → test_module_bucket_microsoft.py} +7 -5
bbot/test/test_step_2/module_tests/test_module_cloudcheck.py +19 -31
bbot/test/test_step_2/module_tests/test_module_dnsbimi.py +2 -1
bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py +3 -5
bbot/test/test_step_2/module_tests/test_module_emailformat.py +1 -1
bbot/test/test_step_2/module_tests/test_module_emails.py +2 -2
bbot/test/test_step_2/module_tests/test_module_excavate.py +64 -5
bbot/test/test_step_2/module_tests/test_module_extractous.py +13 -1
bbot/test/test_step_2/module_tests/test_module_github_workflows.py +10 -1
bbot/test/test_step_2/module_tests/test_module_gitlab_com.py +66 -0
bbot/test/test_step_2/module_tests/{test_module_gitlab.py → test_module_gitlab_onprem.py} +4 -69
bbot/test/test_step_2/module_tests/test_module_gowitness.py +5 -5
bbot/test/test_step_2/module_tests/test_module_graphql_introspection.py +34 -0
bbot/test/test_step_2/module_tests/test_module_iis_shortnames.py +46 -1
bbot/test/test_step_2/module_tests/test_module_jadx.py +9 -0
bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +71 -3
bbot/test/test_step_2/module_tests/test_module_nuclei.py +8 -6
bbot/test/test_step_2/module_tests/test_module_otx.py +3 -0
bbot/test/test_step_2/module_tests/test_module_portfilter.py +2 -0
bbot/test/test_step_2/module_tests/test_module_retirejs.py +161 -0
bbot/test/test_step_2/module_tests/test_module_telerik.py +1 -1
bbot/test/test_step_2/module_tests/test_module_trufflehog.py +10 -1
bbot/test/test_step_2/module_tests/test_module_unarchive.py +9 -0
{bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info}/METADATA +12 -9
{bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info}/RECORD +137 -124
{bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info}/WHEEL +1 -1
{bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info/licenses}/LICENSE +98 -58
bbot/modules/binaryedge.py +0 -42
bbot/modules/censys.py +0 -98
bbot/modules/gitlab.py +0 -141
bbot/modules/zoomeye.py +0 -77
bbot/test/test_step_2/module_tests/test_module_binaryedge.py +0 -33
bbot/test/test_step_2/module_tests/test_module_censys.py +0 -83
bbot/test/test_step_2/module_tests/test_module_zoomeye.py +0 -35
{bbot-2.5.0.dist-info → bbot-2.7.2.7424rc0.dist-info}/entry_points.txt +0 -0

bbot/test/benchmarks/test_excavate_benchmarks.py ADDED Viewed

@@ -0,0 +1,291 @@
+import pytest
+import asyncio
+from bbot.scanner import Scanner
+class TestExcavateDirectBenchmarks:
+    """
+    Direct benchmark tests for Excavate module operations.
+    These tests measure the performance of excavate's core YARA processing
+    by calling the excavate.search() method directly with specific text sizes
+    in both single-threaded and parallel asyncio tasks to test the GIL sidestep feature of YARA.
+    """
+    # Number of text segments per test
+    TEXT_SEGMENTS_COUNT = 100
+    # Prescribed sizes for deterministic benchmarking (in bytes)
+    SMALL_SIZE = 4096  # 4KB
+    LARGE_SIZE = 5242880  # 5MB
+    def _generate_text_segments(self, target_size, count):
+        """Generate a list of text segments of the specified size"""
+        segments = []
+        for i in range(count):
+            # Generate realistic content that excavate can work with
+            base_content = self._generate_realistic_content(i)
+            # Pad to the exact target size with deterministic content
+            remaining_size = target_size - len(base_content)
+            if remaining_size > 0:
+                # Use deterministic padding pattern
+                padding_pattern = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. "
+                padding_repeats = (remaining_size // len(padding_pattern)) + 1
+                padding = (padding_pattern * padding_repeats)[:remaining_size]
+                content = base_content + padding
+            else:
+                content = base_content[:target_size]
+            segments.append(content)
+        return segments
+    def _generate_realistic_content(self, index):
+        """Generate realistic content that excavate can extract from"""
+        return f"""
+        <html>
+        <head>
+            <title>Test Content {index}</title>
+            <script src="https://api{index}.example.com/js/app.js"></script>
+        </head>
+        <body>
+            <h1>Page {index}</h1>
+            <!-- URLs and subdomains -->
+            <a href="https://www{index}.example.com/page{index}">Link {index}</a>
+            <a href="https://cdn{index}.example.com/assets/">CDN {index}</a>
+            <img src="https://img{index}.example.com/photo{index}.jpg" />
+            <!-- Forms with parameters -->
+            <form action="/search{index}" method="GET">
+                <input type="text" name="query{index}" value="test{index}">
+                <input type="hidden" name="token{index}" value="abc123{index}">
+                <button type="submit">Search</button>
+            </form>
+            <!-- API endpoints -->
+            <script>
+                fetch('https://api{index}.example.com/v1/users/{index}')
+                    .then(response => response.json())
+                    .then(data => console.log(data));
+                // WebSocket connection
+                const ws = new WebSocket('wss://realtime{index}.example.com/socket');
+            </script>
+            <!-- Various protocols -->
+            <p>FTP: ftp://ftp{index}.example.com:21/files/</p>
+            <p>SSH: ssh://server{index}.example.com:22/</p>
+            <p>Email: contact{index}@example.com</p>
+            <!-- JSON data -->
+            <script type="application/json">
+            {{
+                "apiEndpoint{index}": "https://api{index}.example.com/data",
+                "parameter{index}": "value{index}",
+                "secretKey{index}": "sk_test_{index}_abcdef123456"
+            }}
+            </script>
+            <!-- Comments with URLs -->
+            <!-- https://hidden{index}.example.com/admin -->
+            <!-- TODO: Check https://internal{index}.example.com/debug -->
+        </body>
+        </html>
+        """
+    async def _run_excavate_single_thread(self, text_segments):
+        """Run excavate processing in single thread"""
+        # Create scanner and initialize excavate
+        scan = Scanner("example.com", modules=["httpx"], config={"excavate": True})
+        await scan._prep()
+        excavate_module = scan.modules.get("excavate")
+        if not excavate_module:
+            raise RuntimeError("Excavate module not found")
+        # Track events emitted by excavate
+        emitted_events = []
+        async def track_emit_event(event_data, *args, **kwargs):
+            emitted_events.append(event_data)
+        excavate_module.emit_event = track_emit_event
+        # Process all text segments sequentially
+        results = []
+        for i, text_segment in enumerate(text_segments):
+            # Create a mock HTTP_RESPONSE event
+            mock_event = scan.make_event(
+                {
+                    "url": f"https://example.com/test/{i}",
+                    "method": "GET",
+                    "body": text_segment,
+                    "header-dict": {"Content-Type": ["text/html"]},
+                    "raw_header": "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r\n",
+                    "status_code": 200,
+                },
+                "HTTP_RESPONSE",
+                parent=scan.root_event,
+            )
+            # Process with excavate
+            await excavate_module.search(text_segment, mock_event, "text/html", f"Single thread benchmark {i}")
+            results.append(f"processed_{i}")
+        return results, emitted_events
+    async def _run_excavate_parallel_tasks(self, text_segments):
+        """Run excavate processing with parallel asyncio tasks"""
+        # Create scanner and initialize excavate
+        scan = Scanner("example.com", modules=["httpx"], config={"excavate": True})
+        await scan._prep()
+        excavate_module = scan.modules.get("excavate")
+        if not excavate_module:
+            raise RuntimeError("Excavate module not found")
+        # Define async task to process a single text segment
+        async def process_segment(segment_index, text_segment):
+            mock_event = scan.make_event(
+                {
+                    "url": f"https://example.com/parallel/{segment_index}",
+                    "method": "GET",
+                    "body": text_segment,
+                    "header-dict": {"Content-Type": ["text/html"]},
+                    "raw_header": "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r\n",
+                    "status_code": 200,
+                },
+                "HTTP_RESPONSE",
+                parent=scan.root_event,
+            )
+            await excavate_module.search(
+                text_segment, mock_event, "text/html", f"Parallel benchmark task {segment_index}"
+            )
+            return f"processed_{segment_index}"
+        # Create all tasks and run them concurrently
+        tasks = [process_segment(i, text_segment) for i, text_segment in enumerate(text_segments)]
+        # Run all tasks in parallel
+        results = await asyncio.gather(*tasks)
+        return results
+    # Single Thread Tests
+    @pytest.mark.benchmark(group="excavate_single_small")
+    def test_excavate_single_thread_small(self, benchmark):
+        """Benchmark excavate single thread processing with small (4KB) segments"""
+        text_segments = self._generate_text_segments(self.SMALL_SIZE, self.TEXT_SEGMENTS_COUNT)
+        def run_test():
+            return asyncio.run(self._run_excavate_single_thread(text_segments))
+        result, events = benchmark(run_test)
+        assert len(result) == self.TEXT_SEGMENTS_COUNT
+        total_size_mb = (self.SMALL_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
+        # Count events by type
+        total_events = len(events)
+        url_events = len([e for e in events if e.type == "URL_UNVERIFIED"])
+        dns_events = len([e for e in events if e.type == "DNS_NAME"])
+        email_events = len([e for e in events if e.type == "EMAIL_ADDRESS"])
+        protocol_events = len([e for e in events if e.type == "PROTOCOL"])
+        finding_events = len([e for e in events if e.type == "FINDING"])
+        print("\n✅ Single-thread small segments benchmark completed")
+        print(f"📊 Processed {len(result):,} segments of {self.SMALL_SIZE / 1024:.0f}KB each")
+        print(f"📊 Total size processed: {total_size_mb:.1f} MB")
+        print(f"📊 Total events: {total_events}")
+        print(f"📊 URL events: {url_events}")
+        print(f"📊 DNS events: {dns_events}")
+        print(f"📊 Email events: {email_events}")
+        print(f"📊 Protocol events: {protocol_events}")
+        print(f"📊 Finding events: {finding_events}")
+        # Validate that excavate actually found and processed content
+        assert total_events > 0, "Expected to find some events from excavate"
+        assert url_events > 0 or dns_events > 0 or protocol_events > 0, (
+            "Expected excavate to find URLs, DNS names, or protocols"
+        )
+    @pytest.mark.benchmark(group="excavate_single_large")
+    def test_excavate_single_thread_large(self, benchmark):
+        """Benchmark excavate single thread processing with large (10MB) segments"""
+        text_segments = self._generate_text_segments(self.LARGE_SIZE, self.TEXT_SEGMENTS_COUNT)
+        def run_test():
+            return asyncio.run(self._run_excavate_single_thread(text_segments))
+        result, events = benchmark(run_test)
+        assert len(result) == self.TEXT_SEGMENTS_COUNT
+        total_size_mb = (self.LARGE_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
+        # Count events by type
+        total_events = len(events)
+        url_events = len([e for e in events if e.type == "URL_UNVERIFIED"])
+        dns_events = len([e for e in events if e.type == "DNS_NAME"])
+        email_events = len([e for e in events if e.type == "EMAIL_ADDRESS"])
+        protocol_events = len([e for e in events if e.type == "PROTOCOL"])
+        finding_events = len([e for e in events if e.type == "FINDING"])
+        print("\n✅ Single-thread large segments benchmark completed")
+        print(f"📊 Processed {len(result):,} segments of {self.LARGE_SIZE / (1024 * 1024):.0f}MB each")
+        print(f"📊 Total size processed: {total_size_mb:.1f} MB")
+        print(f"📊 Total events: {total_events}")
+        print(f"📊 URL events: {url_events}")
+        print(f"📊 DNS events: {dns_events}")
+        print(f"📊 Email events: {email_events}")
+        print(f"📊 Protocol events: {protocol_events}")
+        print(f"📊 Finding events: {finding_events}")
+        # Validate that excavate actually found and processed content
+        assert total_events > 0, "Expected to find some events from excavate"
+        assert url_events > 0 or dns_events > 0 or protocol_events > 0, (
+            "Expected excavate to find URLs, DNS names, or protocols"
+        )
+    # Parallel Tests
+    @pytest.mark.benchmark(group="excavate_parallel_small")
+    def test_excavate_parallel_tasks_small(self, benchmark):
+        """Benchmark excavate parallel processing with small (4KB) segments"""
+        text_segments = self._generate_text_segments(self.SMALL_SIZE, self.TEXT_SEGMENTS_COUNT)
+        def run_test():
+            return asyncio.run(self._run_excavate_parallel_tasks(text_segments))
+        result = benchmark(run_test)
+        assert len(result) == self.TEXT_SEGMENTS_COUNT
+        total_size_mb = (self.SMALL_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
+        print("\n✅ Parallel small segments benchmark completed")
+        print(f"📊 Processed {len(result):,} segments of {self.SMALL_SIZE / 1024:.0f}KB each in parallel")
+        print(f"📊 Total size processed: {total_size_mb:.1f} MB")
+        print("📊 Tasks executed concurrently to test YARA GIL sidestep")
+        # Basic assertion that excavate is actually working (should find URLs in our test content)
+        assert len(result) > 0, "Expected excavate to process all segments"
+    @pytest.mark.benchmark(group="excavate_parallel_large")
+    def test_excavate_parallel_tasks_large(self, benchmark):
+        """Benchmark excavate parallel processing with large (10MB) segments to test YARA GIL sidestep"""
+        text_segments = self._generate_text_segments(self.LARGE_SIZE, self.TEXT_SEGMENTS_COUNT)
+        def run_test():
+            return asyncio.run(self._run_excavate_parallel_tasks(text_segments))
+        result = benchmark(run_test)
+        assert len(result) == self.TEXT_SEGMENTS_COUNT
+        total_size_mb = (self.LARGE_SIZE * self.TEXT_SEGMENTS_COUNT) / (1024 * 1024)
+        print("\n✅ Parallel large segments benchmark completed")
+        print(f"📊 Processed {len(result):,} segments of {self.LARGE_SIZE / (1024 * 1024):.0f}MB each in parallel")
+        print(f"📊 Total size processed: {total_size_mb:.1f} MB")
+        print("📊 Tasks executed concurrently to test YARA GIL sidestep")
+        # Basic assertion that excavate is actually working (should find URLs in our test content)
+        assert len(result) > 0, "Expected excavate to process all segments"

bbot/test/benchmarks/test_ipaddress_benchmarks.py ADDED Viewed

@@ -0,0 +1,143 @@
+import pytest
+import random
+import string
+from bbot.core.helpers.misc import make_ip_type, is_ip
+class TestIPAddressBenchmarks:
+    """
+    Benchmark tests for IP address processing operations.
+    These tests measure the performance of BBOT-level IP functions which are
+    critical for network scanning efficiency and could benefit from different
+    underlying implementations.
+    """
+    def setup_method(self):
+        """Setup common test data"""
+        # Set deterministic seed for consistent benchmark results
+        random.seed(42)  # Fixed seed for reproducible results
+        # Generate test data of different types and sizes
+        self.valid_ips = self._generate_valid_ips()
+        self.invalid_ips = self._generate_invalid_ips()
+        self.mixed_data = self._generate_mixed_data()
+    def _generate_valid_ips(self):
+        """Generate valid IP addresses for testing"""
+        valid_ips = []
+        # IPv4 addresses
+        for i in range(1000):
+            valid_ips.append(
+                f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
+            )
+        # IPv6 addresses
+        for i in range(500):
+            ipv6_parts = []
+            for j in range(8):
+                ipv6_parts.append(f"{random.randint(0, 65535):x}")
+            valid_ips.append(":".join(ipv6_parts))
+        # Network addresses
+        for i in range(500):
+            base_ip = f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.0"
+            valid_ips.append(f"{base_ip}/{random.randint(8, 30)}")
+        # IP ranges
+        for i in range(200):
+            start_ip = (
+                f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 200)}"
+            )
+            end_ip = f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(201, 254)}"
+            valid_ips.append(f"{start_ip}-{end_ip}")
+        return valid_ips
+    def _generate_invalid_ips(self):
+        """Generate invalid IP addresses for testing"""
+        invalid_ips = []
+        # Malformed IPv4
+        for i in range(500):
+            invalid_ips.append(
+                f"{random.randint(256, 999)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
+            )
+            invalid_ips.append(f"{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}")
+            invalid_ips.append(
+                f"{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
+            )
+        # Malformed IPv6
+        for i in range(300):
+            ipv6_parts = []
+            for j in range(random.randint(5, 10)):  # Wrong number of parts
+                ipv6_parts.append(f"{random.randint(0, 65535):x}")
+            invalid_ips.append(":".join(ipv6_parts))
+        # Random strings
+        for i in range(200):
+            length = random.randint(5, 20)
+            invalid_ips.append("".join(random.choices(string.ascii_letters + string.digits, k=length)))
+        return invalid_ips
+    def _generate_mixed_data(self):
+        """Generate mixed valid/invalid data for realistic testing"""
+        mixed = []
+        mixed.extend(self.valid_ips[:500])  # First 500 valid
+        mixed.extend(self.invalid_ips[:500])  # First 500 invalid
+        # Use deterministic shuffle with fixed seed for consistent results
+        random.seed(42)  # Reset seed before shuffle
+        random.shuffle(mixed)  # Shuffle for realistic distribution
+        return mixed
+    @pytest.mark.benchmark(group="ip_validation")
+    def test_is_ip_performance(self, benchmark):
+        """Benchmark IP validation performance with mixed data"""
+        def validate_ips():
+            valid_count = 0
+            for ip in self.mixed_data:
+                if is_ip(ip):
+                    valid_count += 1
+            return valid_count
+        result = benchmark(validate_ips)
+        assert result > 0
+    @pytest.mark.benchmark(group="ip_type_detection")
+    def test_make_ip_type_performance(self, benchmark):
+        """Benchmark IP type detection performance"""
+        def detect_ip_types():
+            type_count = 0
+            for ip in self.valid_ips:
+                try:
+                    make_ip_type(ip)
+                    type_count += 1
+                except Exception:
+                    pass
+            return type_count
+        result = benchmark(detect_ip_types)
+        assert result > 0
+    @pytest.mark.benchmark(group="ip_processing")
+    def test_mixed_ip_operations(self, benchmark):
+        """Benchmark combined IP validation + type detection"""
+        def process_ips():
+            processed = 0
+            for ip in self.mixed_data:
+                if is_ip(ip):
+                    try:
+                        make_ip_type(ip)
+                        processed += 1
+                    except Exception:
+                        pass
+            return processed
+        result = benchmark(process_ips)
+        assert result > 0

bbot/test/benchmarks/test_weighted_shuffle_benchmarks.py ADDED Viewed

@@ -0,0 +1,70 @@
+import pytest
+import random
+from bbot.core.helpers.misc import weighted_shuffle
+class TestWeightedShuffleBenchmarks:
+    """
+    Benchmark tests for weighted_shuffle operations.
+    This function is critical for BBOT's queue management, where it shuffles
+    incoming queues based on module priority weights. Performance here directly
+    impacts scan throughput and responsiveness.
+    """
+    def setup_method(self):
+        """Setup common test data"""
+        # Set deterministic seed for consistent benchmark results
+        random.seed(42)  # Fixed seed for reproducible results
+        # Generate test data of different sizes and complexity
+        self.small_data = self._generate_small_dataset()
+        self.medium_data = self._generate_medium_dataset()
+        self.large_data = self._generate_large_dataset()
+        self.priority_weights = self._generate_priority_weights()
+    def _generate_small_dataset(self):
+        """Generate small dataset (like few modules)"""
+        return {"items": ["module_a", "module_b", "module_c"], "weights": [0.6, 0.3, 0.1]}
+    def _generate_medium_dataset(self):
+        """Generate medium dataset (like typical scan)"""
+        items = [f"module_{i}" for i in range(20)]
+        weights = [random.uniform(0.1, 1.0) for _ in range(20)]
+        return {"items": items, "weights": weights}
+    def _generate_large_dataset(self):
+        """Generate large dataset (like complex scan with many modules)"""
+        items = [f"module_{i}" for i in range(100)]
+        weights = [random.uniform(0.1, 1.0) for _ in range(100)]
+        return {"items": items, "weights": weights}
+    def _generate_priority_weights(self):
+        """Generate realistic priority weights (like BBOT module priorities)"""
+        # BBOT uses priorities 1-5, where lower priority = higher weight
+        # Weights are calculated as [5] + [6 - m.priority for m in modules]
+        priorities = [5] + [6 - p for p in [1, 2, 3, 4, 5]] * 20  # 5 + 5*20 = 105 items
+        items = [f"queue_{i}" for i in range(len(priorities))]
+        return {"items": items, "weights": priorities}
+    @pytest.mark.benchmark(group="weighted_shuffle")
+    def test_typical_queue_shuffle(self, benchmark):
+        """Benchmark weighted shuffle with typical BBOT scan workload"""
+        def shuffle_typical():
+            return weighted_shuffle(self.medium_data["items"], self.medium_data["weights"])
+        result = benchmark(shuffle_typical)
+        assert len(result) == 20
+        assert all(item in result for item in self.medium_data["items"])
+    @pytest.mark.benchmark(group="weighted_shuffle")
+    def test_priority_queue_shuffle(self, benchmark):
+        """Benchmark weighted shuffle with realistic BBOT priority weights"""
+        def shuffle_priorities():
+            return weighted_shuffle(self.priority_weights["items"], self.priority_weights["weights"])
+        result = benchmark(shuffle_priorities)
+        assert len(result) == len(self.priority_weights["items"])
+        assert all(item in result for item in self.priority_weights["items"])

bbot/test/conftest.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import os
 import ssl
 import time
-import shutil
 import pytest
+import shutil
 import asyncio
 import logging
 from pathlib import Path

bbot/test/test_step_1/test_bbot_fastapi.py CHANGED Viewed

@@ -22,8 +22,8 @@ def test_bbot_multiprocess(bbot_httpserver):
     queue = multiprocessing.Queue()
     events_process = multiprocessing.Process(target=run_bbot_multiprocess, args=(queue,))
     events_process.start()
-    events_process.join()
-    events = queue.get()
+    events_process.join(timeout=300)
+    events = queue.get(timeout=10)
     assert len(events) >= 3
     scan_events = [e for e in events if e["type"] == "SCAN"]
     assert len(scan_events) == 2

bbot/test/test_step_1/test_events.py CHANGED Viewed

@@ -42,7 +42,7 @@ async def test_events(events, helpers):
     assert events.ipv4 == scan.make_event("8.8.8.8", dummy=True)
     assert "8.8.8.8" in events.ipv4
     assert events.ipv4.host_filterable == "8.8.8.8"
-    assert "8.8.8.8" == events.ipv4
+    assert events.ipv4.data == "8.8.8.8"
     assert "8.8.8.8" in events.netv4
     assert "8.8.8.9" not in events.ipv4
     assert "8.8.9.8" not in events.netv4
@@ -60,7 +60,7 @@ async def test_events(events, helpers):
     assert events.emoji not in events.netv6
     assert events.netv6 not in events.emoji
     ipv6_event = scan.make_event(" [DEaD::c0De]:88", "DNS_NAME", dummy=True)
-    assert "dead::c0de" == ipv6_event
+    assert ipv6_event.data == "dead::c0de"
     assert ipv6_event.host_filterable == "dead::c0de"
     range_to_ip = scan.make_event("1.2.3.4/32", dummy=True)
     assert range_to_ip.type == "IP_ADDRESS"
@@ -87,7 +87,7 @@ async def test_events(events, helpers):
     open_port_event = scan.make_event(" eViLcorp.COM.:88", "DNS_NAME", dummy=True)
     dns_event = scan.make_event("evilcorp.com.", "DNS_NAME", dummy=True)
     for e in (open_port_event, dns_event):
-        assert "evilcorp.com" == e
+        assert e.data == "evilcorp.com"
         assert e.netloc == "evilcorp.com"
         assert e.json()["netloc"] == "evilcorp.com"
         assert e.port is None
@@ -117,17 +117,19 @@ async def test_events(events, helpers):
     assert events.emoji not in events.url_unverified
     assert events.emoji not in events.ipv6_url_unverified
     assert events.url_unverified not in events.emoji
-    assert "https://evilcorp.com" == scan.make_event("https://evilcorp.com:443", dummy=True)
-    assert "http://evilcorp.com" == scan.make_event("http://evilcorp.com:80", dummy=True)
+    # URL normalization tests – compare against normalized event.data / .with_port().geturl()
+    assert scan.make_event("https://evilcorp.com:443", dummy=True).data == "https://evilcorp.com/"
+    assert scan.make_event("http://evilcorp.com:80", dummy=True).data == "http://evilcorp.com/"
     assert "http://evilcorp.com:80/asdf.js" in scan.make_event("http://evilcorp.com/asdf.js", dummy=True)
     assert "http://evilcorp.com/asdf.js" in scan.make_event("http://evilcorp.com:80/asdf.js", dummy=True)
-    assert "https://evilcorp.com:443" == scan.make_event("https://evilcorp.com", dummy=True)
-    assert "http://evilcorp.com:80" == scan.make_event("http://evilcorp.com", dummy=True)
-    assert "https://evilcorp.com:80" == scan.make_event("https://evilcorp.com:80", dummy=True)
-    assert "http://evilcorp.com:443" == scan.make_event("http://evilcorp.com:443", dummy=True)
+    assert scan.make_event("https://evilcorp.com", dummy=True).data == "https://evilcorp.com/"
+    assert scan.make_event("http://evilcorp.com", dummy=True).data == "http://evilcorp.com/"
+    assert scan.make_event("https://evilcorp.com:80", dummy=True).data == "https://evilcorp.com:80/"
+    assert scan.make_event("http://evilcorp.com:443", dummy=True).data == "http://evilcorp.com:443/"
     assert scan.make_event("https://evilcorp.com", dummy=True).with_port().geturl() == "https://evilcorp.com:443/"
     assert scan.make_event("https://evilcorp.com:666", dummy=True).with_port().geturl() == "https://evilcorp.com:666/"
-    assert scan.make_event("https://evilcorp.com.:666", dummy=True) == "https://evilcorp.com:666/"
+    assert scan.make_event("https://evilcorp.com.:666", dummy=True).data == "https://evilcorp.com:666/"
     assert scan.make_event("https://[bad::c0de]", dummy=True).with_port().geturl() == "https://[bad::c0de]:443/"
     assert scan.make_event("https://[bad::c0de]:666", dummy=True).with_port().geturl() == "https://[bad::c0de]:666/"
     url_event = scan.make_event("https://evilcorp.com", "URL", events.ipv4_url, tags=["status-200"])
@@ -209,7 +211,6 @@ async def test_events(events, helpers):
     javascript_event = scan.make_event("http://evilcorp.com/asdf/a.js?b=c#d", "URL_UNVERIFIED", parent=scan.root_event)
     assert "extension-js" in javascript_event.tags
     await scan.ingress_module.handle_event(javascript_event)
-    assert "httpx-only" in javascript_event.tags
     # scope distance
     event1 = scan.make_event("1.2.3.4", dummy=True)
@@ -261,21 +262,21 @@ async def test_events(events, helpers):
     )
     assert event.discovery_context == "something discovered IP_ADDRESS: 127.0.0.1"
-    # updating an already-created event with make_event()
+    # updating an already-created event with update_event()
     # updating tags
     event1 = scan.make_event("127.0.0.1", parent=scan.root_event)
-    updated_event = scan.make_event(event1, tags="asdf")
-    assert "asdf" not in event1.tags
+    updated_event = scan.update_event(event1, tags="asdf")
+    # assert "asdf" not in event1.tags # why was this test added? why is it important the original event stays untouched? 🤔
     assert "asdf" in updated_event.tags
     # updating parent
     event2 = scan.make_event("127.0.0.1", parent=scan.root_event)
-    updated_event = scan.make_event(event2, parent=event1)
-    assert event2.parent == scan.root_event
+    updated_event = scan.update_event(event2, parent=event1)
+    # assert event2.parent == scan.root_event
     assert updated_event.parent == event1
-    # updating module
+    # updating module/internal flag
     event3 = scan.make_event("127.0.0.1", parent=scan.root_event)
-    updated_event = scan.make_event(event3, internal=True)
-    assert event3.internal is False
+    updated_event = scan.update_event(event3, internal=True)
+    # assert event3.internal is False
     assert updated_event.internal is True
     # event sorting
@@ -1056,13 +1057,13 @@ async def test_mobile_app():
 @pytest.mark.asyncio
 async def test_filesystem():
-    scan = Scanner("FILESYSTEM:/tmp/asdf")
+    scan = Scanner("FILESYSTEM:/tmp/asdfasdgasdfasdfddsdf")
     events = [e async for e in scan.async_start()]
     assert len(events) == 3
     filesystem_events = [e for e in events if e.type == "FILESYSTEM"]
     assert len(filesystem_events) == 1
     assert filesystem_events[0].type == "FILESYSTEM"
-    assert filesystem_events[0].data == {"path": "/tmp/asdf"}
+    assert filesystem_events[0].data == {"path": "/tmp/asdfasdgasdfasdfddsdf"}
 def test_event_hashing():

bbot/test/test_step_1/test_helpers.py CHANGED Viewed

@@ -155,6 +155,7 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_httpserver):
     assert helpers.extract_host("https://[dead::beef]:22?a=b") == ("dead::beef", "https://[", "]:22?a=b")
     assert helpers.extract_host("https://[dead::beef]/?a=b") == ("dead::beef", "https://[", "]/?a=b")
     assert helpers.extract_host("https://[dead::beef]?a=b") == ("dead::beef", "https://[", "]?a=b")
+    assert helpers.extract_host("https://[::1]") == ("::1", "https://[", "]")
     assert helpers.extract_host("ftp://username:password@my-ftp.com/my-file.csv") == (
         "my-ftp.com",
         "ftp://username:password@",
@@ -954,3 +955,22 @@ async def test_parameter_validation(helpers):
             assert p in cookie_valid_params and p not in cookie_invalid_params
         else:
             assert p in cookie_invalid_params and p not in cookie_valid_params
+@pytest.mark.asyncio
+async def test_rm_temp_dir_at_exit(helpers):
+    from bbot.scanner import Scanner
+    scan = Scanner("127.0.0.1", modules=["httpx"])
+    await scan._prep()
+    temp_dir = scan.home / "temp"
+    # temp dir should exist
+    assert temp_dir.exists()
+    events = [e async for e in scan.async_start()]
+    assert events
+    # temp dir should be removed
+    assert not temp_dir.exists()

bbot 2.5.0__py3-none-any.whl → 2.7.2.7424rc0__py3-none-any.whl

bbot 2.5.0py3-none-any.whl → 2.7.2.7424rc0py3-none-any.whl