PyPI - changedetection.io-osint-processor - Versions diffs - 0.0.1__py3-none-any.whl - Mend

changedetection.io-osint-processor 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

changedetection_io_osint_processor-0.0.1.dist-info/METADATA +274 -0
changedetection_io_osint_processor-0.0.1.dist-info/RECORD +29 -0
changedetection_io_osint_processor-0.0.1.dist-info/WHEEL +5 -0
changedetection_io_osint_processor-0.0.1.dist-info/entry_points.txt +2 -0
changedetection_io_osint_processor-0.0.1.dist-info/licenses/LICENSE +661 -0
changedetection_io_osint_processor-0.0.1.dist-info/top_level.txt +1 -0
changedetectionio_osint/__init__.py +22 -0
changedetectionio_osint/forms.py +289 -0
changedetectionio_osint/plugin.py +37 -0
changedetectionio_osint/processor.py +655 -0
changedetectionio_osint/steps/__init__.py +4 -0
changedetectionio_osint/steps/base.py +76 -0
changedetectionio_osint/steps/bgp.py +88 -0
changedetectionio_osint/steps/dns.py +147 -0
changedetectionio_osint/steps/dns_scan.py +88 -0
changedetectionio_osint/steps/dnssec.py +260 -0
changedetectionio_osint/steps/email_security.py +236 -0
changedetectionio_osint/steps/http_fingerprint.py +359 -0
changedetectionio_osint/steps/http_scan.py +31 -0
changedetectionio_osint/steps/mac_lookup.py +209 -0
changedetectionio_osint/steps/os_detection.py +245 -0
changedetectionio_osint/steps/portscan.py +113 -0
changedetectionio_osint/steps/registry.py +49 -0
changedetectionio_osint/steps/smtp_fingerprint.py +517 -0
changedetectionio_osint/steps/ssh_fingerprint.py +310 -0
changedetectionio_osint/steps/tls_analysis.py +332 -0
changedetectionio_osint/steps/traceroute.py +127 -0
changedetectionio_osint/steps/whois_lookup.py +125 -0
changedetectionio_osint/steps/whois_scan.py +123 -0

changedetectionio_osint/steps/email_security.py ADDED Viewed

@@ -0,0 +1,236 @@
+"""
+Email Security Reconnaissance Step
+Analyzes SPF, DMARC, and DKIM records for email authentication and anti-spoofing
+"""
+import asyncio
+# SOCKS5 proxy support: Requires DNS-over-TCP implementation (TODO: use dns.query.tcp with SOCKS5 socket)
+supports_socks5 = False
+import re
+from loguru import logger
+async def scan_email_security(hostname, dns_resolver, watch_uuid=None, update_signal=None):
+    """
+    Perform email security reconnaissance (SPF, DMARC, DKIM)
+    Args:
+        hostname: Target hostname to query
+        dns_resolver: Configured dns.resolver.Resolver instance
+        watch_uuid: Optional watch UUID for status updates
+        update_signal: Optional blinker signal for status updates
+    Returns:
+        dict: Email security results with SPF, DMARC, DKIM data
+    """
+    if update_signal and watch_uuid:
+        update_signal.send(watch_uuid=watch_uuid, status="Email Security")
+    def query_email_security():
+        results = {
+            'spf': None,
+            'dmarc': None,
+            'dkim': [],
+            'spf_valid': False,
+            'dmarc_valid': False,
+            'spf_policy': None,
+            'dmarc_policy': None,
+            'dmarc_pct': None,
+            'dmarc_rua': [],
+            'dmarc_ruf': [],
+        }
+        # === SPF (Sender Policy Framework) ===
+        # SPF records are TXT records on the domain itself
+        try:
+            answers = dns_resolver.resolve(hostname, 'TXT')
+            for rdata in answers:
+                txt_value = str(rdata).strip('"')
+                if txt_value.startswith('v=spf1'):
+                    results['spf'] = txt_value
+                    results['spf_valid'] = True
+                    # Parse SPF policy (last mechanism: all)
+                    # Common endings: -all (fail), ~all (softfail), +all (pass), ?all (neutral)
+                    if '-all' in txt_value:
+                        results['spf_policy'] = 'strict (-all)'
+                    elif '~all' in txt_value:
+                        results['spf_policy'] = 'softfail (~all)'
+                    elif '+all' in txt_value:
+                        results['spf_policy'] = 'permissive (+all)'
+                    elif '?all' in txt_value:
+                        results['spf_policy'] = 'neutral (?all)'
+                    else:
+                        results['spf_policy'] = 'unknown'
+                    break
+        except Exception as e:
+            logger.debug(f"SPF query failed: {e}")
+        # === DMARC (Domain-based Message Authentication) ===
+        # DMARC records are TXT records on _dmarc.domain.com
+        try:
+            dmarc_domain = f"_dmarc.{hostname}"
+            answers = dns_resolver.resolve(dmarc_domain, 'TXT')
+            for rdata in answers:
+                txt_value = str(rdata).strip('"')
+                if txt_value.startswith('v=DMARC1'):
+                    results['dmarc'] = txt_value
+                    results['dmarc_valid'] = True
+                    # Parse DMARC policy (p=none/quarantine/reject)
+                    policy_match = re.search(r'p=(\w+)', txt_value)
+                    if policy_match:
+                        results['dmarc_policy'] = policy_match.group(1)
+                    # Parse DMARC percentage (pct=0-100)
+                    pct_match = re.search(r'pct=(\d+)', txt_value)
+                    if pct_match:
+                        results['dmarc_pct'] = int(pct_match.group(1))
+                    else:
+                        results['dmarc_pct'] = 100  # Default is 100%
+                    # Parse aggregate reporting URIs (rua)
+                    rua_match = re.search(r'rua=([^;]+)', txt_value)
+                    if rua_match:
+                        results['dmarc_rua'] = [uri.strip() for uri in rua_match.group(1).split(',')]
+                    # Parse forensic reporting URIs (ruf)
+                    ruf_match = re.search(r'ruf=([^;]+)', txt_value)
+                    if ruf_match:
+                        results['dmarc_ruf'] = [uri.strip() for uri in ruf_match.group(1).split(',')]
+                    break
+        except Exception as e:
+            logger.debug(f"DMARC query failed: {e}")
+        # === DKIM (DomainKeys Identified Mail) ===
+        # DKIM records are TXT records on <selector>._domainkey.domain.com
+        # Common selectors to check (brute force approach since selector is arbitrary)
+        common_selectors = [
+            'default', 'google', 'k1', 'k2', 'k3', 'dkim', 'selector1', 'selector2',
+            's1', 's2', 'mail', 'email', 'mx', 'smtp', 'mta', 'key1', 'key2'
+        ]
+        for selector in common_selectors:
+            try:
+                dkim_domain = f"{selector}._domainkey.{hostname}"
+                answers = dns_resolver.resolve(dkim_domain, 'TXT')
+                for rdata in answers:
+                    txt_value = str(rdata).strip('"')
+                    if 'v=DKIM1' in txt_value or 'p=' in txt_value:
+                        # Extract key type if present
+                        key_type = 'RSA'  # Default
+                        k_match = re.search(r'k=(\w+)', txt_value)
+                        if k_match:
+                            key_type = k_match.group(1).upper()
+                        results['dkim'].append({
+                            'selector': selector,
+                            'record': txt_value[:100] + '...' if len(txt_value) > 100 else txt_value,
+                            'key_type': key_type
+                        })
+                        break
+            except Exception:
+                continue
+        return results
+    return await asyncio.to_thread(query_email_security)
+def format_email_security_results(email_results):
+    """Format email security results for output"""
+    lines = []
+    lines.append("=== Email Security (SPF/DMARC/DKIM) ===")
+    if not email_results:
+        lines.append("No email security records found")
+        lines.append("")
+        return '\n'.join(lines)
+    # SPF Section
+    lines.append("")
+    lines.append("SPF (Sender Policy Framework):")
+    if email_results.get('spf_valid'):
+        lines.append(f"  Status: ✓ SPF record found")
+        lines.append(f"  Policy: {email_results.get('spf_policy', 'unknown')}")
+        lines.append(f"  Record: {email_results.get('spf')}")
+        # Security assessment
+        policy = email_results.get('spf_policy', '')
+        if 'strict' in policy:
+            lines.append("  Security: ✓ Strong (rejects unauthorized senders)")
+        elif 'softfail' in policy:
+            lines.append("  Security: ⚠ Moderate (marks unauthorized senders as suspicious)")
+        elif 'permissive' in policy:
+            lines.append("  Security: ✗ Weak (allows all senders)")
+        else:
+            lines.append("  Security: ? Unknown policy")
+    else:
+        lines.append("  Status: ✗ No SPF record found")
+        lines.append("  Security: ✗ Domain is vulnerable to email spoofing")
+    # DMARC Section
+    lines.append("")
+    lines.append("DMARC (Domain-based Message Authentication):")
+    if email_results.get('dmarc_valid'):
+        lines.append(f"  Status: ✓ DMARC record found")
+        lines.append(f"  Policy: {email_results.get('dmarc_policy', 'unknown')}")
+        lines.append(f"  Enforcement: {email_results.get('dmarc_pct', 100)}% of messages")
+        if email_results.get('dmarc_rua'):
+            lines.append(f"  Aggregate Reports: {', '.join(email_results['dmarc_rua'])}")
+        if email_results.get('dmarc_ruf'):
+            lines.append(f"  Forensic Reports: {', '.join(email_results['dmarc_ruf'])}")
+        lines.append(f"  Record: {email_results.get('dmarc')}")
+        # Security assessment
+        policy = email_results.get('dmarc_policy', '')
+        pct = email_results.get('dmarc_pct', 100)
+        if policy == 'reject' and pct == 100:
+            lines.append("  Security: ✓ Strong (rejects failed authentication)")
+        elif policy == 'quarantine':
+            lines.append("  Security: ⚠ Moderate (quarantines failed authentication)")
+        elif policy == 'none':
+            lines.append("  Security: ⚠ Monitor-only (no enforcement)")
+        else:
+            lines.append("  Security: ? Unknown policy")
+    else:
+        lines.append("  Status: ✗ No DMARC record found")
+        lines.append("  Security: ⚠ No DMARC policy enforcement")
+    # DKIM Section
+    lines.append("")
+    lines.append("DKIM (DomainKeys Identified Mail):")
+    if email_results.get('dkim'):
+        lines.append(f"  Status: ✓ Found {len(email_results['dkim'])} DKIM selector(s)")
+        for dkim_entry in email_results['dkim']:
+            lines.append(f"  Selector: {dkim_entry['selector']}")
+            lines.append(f"    Key Type: {dkim_entry['key_type']}")
+            lines.append(f"    Record: {dkim_entry['record']}")
+        lines.append("  Security: ✓ Email signing enabled")
+    else:
+        lines.append("  Status: ⚠ No DKIM records found (checked common selectors)")
+        lines.append("  Note: DKIM may be present with a custom selector")
+    # Overall Security Summary
+    lines.append("")
+    lines.append("Overall Email Security Posture:")
+    spf_ok = email_results.get('spf_valid', False)
+    dmarc_ok = email_results.get('dmarc_valid', False)
+    dkim_ok = len(email_results.get('dkim', [])) > 0
+    score = sum([spf_ok, dmarc_ok, dkim_ok])
+    if score == 3:
+        lines.append("  ✓ Excellent: SPF, DMARC, and DKIM all configured")
+    elif score == 2:
+        lines.append("  ⚠ Good: 2 out of 3 email security standards configured")
+    elif score == 1:
+        lines.append("  ⚠ Weak: Only 1 out of 3 email security standards configured")
+    else:
+        lines.append("  ✗ Poor: No email security standards configured")
+        lines.append("  Recommendation: Configure SPF, DMARC, and DKIM to prevent email spoofing")
+    lines.append("")
+    return '\n'.join(lines)

changedetectionio_osint/steps/http_fingerprint.py ADDED Viewed

@@ -0,0 +1,359 @@
+"""
+HTTP Fingerprinting Step
+Captures server-side HTTP/HTTPS fingerprints including redirect chains and CDN detection
+"""
+import asyncio
+# SOCKS5 proxy support: HTTP requests support SOCKS5 via requests library
+supports_socks5 = True
+import socket
+import time
+import hashlib
+from urllib.parse import urlparse, urljoin, urlunparse
+from loguru import logger
+async def scan_http(url, dns_resolver, proxy_url=None, watch_uuid=None, update_signal=None):
+    """
+    Perform HTTP fingerprinting on target URL
+    Args:
+        url: Target URL
+        dns_resolver: Configured dns.resolver.Resolver instance
+        proxy_url: Optional proxy URL
+        watch_uuid: Optional watch UUID for status updates
+        update_signal: Optional blinker signal for status updates
+    Returns:
+        dict: HTTP fingerprint data
+    """
+    if update_signal and watch_uuid:
+        update_signal.send(watch_uuid=watch_uuid, status="HTTP")
+    def fetch_http_fingerprint():
+        """Synchronous HTTP fingerprinting - captures server TLS configuration"""
+        import requests
+        # Monkey-patch socket.getaddrinfo to use our custom DNS server
+        # CRITICAL: Skip DNS monkey-patching when using SOCKS5 proxy to prevent DNS leaks
+        # SOCKS5 proxy should handle DNS resolution (use socks5h:// for remote DNS)
+        original_getaddrinfo = socket.getaddrinfo
+        if not proxy_url or not proxy_url.strip():
+            # Only monkey-patch DNS when NOT using proxy
+            def custom_getaddrinfo(host, port, family=0, type=0, proto=0, flags=0):
+                """Custom getaddrinfo that uses our DNS_SERVER"""
+                try:
+                    # Use our dns_resolver to resolve the hostname
+                    try:
+                        answers = dns_resolver.resolve(host, 'A')
+                        resolved_ip = str(answers[0])
+                    except:
+                        # Fallback to AAAA
+                        try:
+                            answers = dns_resolver.resolve(host, 'AAAA')
+                            resolved_ip = str(answers[0])
+                        except:
+                            # If our DNS fails, fall back to original
+                            return original_getaddrinfo(host, port, family, type, proto, flags)
+                    # Return address info with our resolved IP
+                    if ':' in resolved_ip:
+                        # IPv6
+                        return [(socket.AF_INET6, socket.SOCK_STREAM, proto, '', (resolved_ip, port, 0, 0))]
+                    else:
+                        # IPv4
+                        return [(socket.AF_INET, socket.SOCK_STREAM, proto, '', (resolved_ip, port))]
+                except:
+                    return original_getaddrinfo(host, port, family, type, proto, flags)
+            # Apply the monkey-patch (only when not using proxy)
+            socket.getaddrinfo = custom_getaddrinfo
+        else:
+            # When using SOCKS5 proxy: let proxy handle DNS resolution
+            # User should use socks5h:// (not socks5://) for remote DNS resolution
+            logger.debug("SOCKS5 proxy configured - skipping DNS monkey-patch to prevent leaks")
+        parsed = urlparse(url)
+        session = requests.Session()
+        # Configure proxy if provided
+        proxies = None
+        if proxy_url:
+            proxies = {'http': proxy_url, 'https': proxy_url}
+        # Set realistic headers
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Accept-Encoding': 'gzip, deflate',
+            'Connection': 'keep-alive',
+        }
+        fingerprint = {}
+        start_time = time.time()
+        # Track redirect chain
+        redirect_chain = []
+        # CDN/WAF detection patterns
+        cdn_waf_indicators = {
+            'Cloudflare': ['CF-Ray', 'cf-request-id', '__cfduid', 'cloudflare'],
+            'Akamai': ['X-Akamai-', 'akamai'],
+            'AWS CloudFront': ['X-Amz-Cf-', 'cloudfront'],
+            'Fastly': ['Fastly-', 'X-Fastly-'],
+            'Incapsula': ['X-CDN: Incapsula', 'incap_ses', 'visid_incap'],
+            'Sucuri': ['X-Sucuri-', 'sucuri'],
+            'StackPath': ['X-Stackpath-'],
+            'KeyCDN': ['X-Edge-', 'Server: keycdn'],
+            'Imperva': ['X-Iinfo', 'imperva'],
+            'F5 BIG-IP': ['BigIP', 'F5-', 'X-WA-Info'],
+            'Nginx': ['Server: nginx', 'X-Nginx-'],
+            'Varnish': ['Via: varnish', 'X-Varnish'],
+            'Squid': ['Via: squid', 'X-Squid-'],
+        }
+        try:
+            # Follow redirects manually to capture chain
+            current_url = url
+            max_redirects = 5
+            redirect_count = 0
+            while redirect_count < max_redirects:
+                response = session.get(
+                    current_url,
+                    headers=headers,
+                    proxies=proxies,
+                    timeout=10,
+                    allow_redirects=False,
+                    verify=True
+                )
+                # Record this hop in redirect chain
+                redirect_chain.append({
+                    'url': current_url,
+                    'status': response.status_code,
+                    'location': response.headers.get('Location', '')
+                })
+                # Check if it's a redirect
+                if response.status_code in [301, 302, 303, 307, 308]:
+                    location = response.headers.get('Location')
+                    if not location:
+                        break
+                    # Handle relative URLs
+                    if location.startswith('/'):
+                        parsed_current = urlparse(current_url)
+                        location = urlunparse((
+                            parsed_current.scheme,
+                            parsed_current.netloc,
+                            location,
+                            '', '', ''
+                        ))
+                    elif not location.startswith(('http://', 'https://')):
+                        location = urljoin(current_url, location)
+                    current_url = location
+                    redirect_count += 1
+                else:
+                    # Final response
+                    break
+            elapsed_time = time.time() - start_time
+            # Basic response info
+            fingerprint['status_code'] = response.status_code
+            fingerprint['reason'] = response.reason
+            fingerprint['elapsed_ms'] = int(elapsed_time * 1000)
+            fingerprint['content_length'] = len(response.content)
+            fingerprint['http_version'] = f"HTTP/{response.raw.version // 10}.{response.raw.version % 10}"
+            # All response headers
+            fingerprint['headers'] = dict(response.headers)
+            # Server fingerprinting
+            fingerprint['server'] = response.headers.get('Server', 'Not disclosed')
+            fingerprint['powered_by'] = response.headers.get('X-Powered-By', 'Not disclosed')
+            # Security headers
+            security_headers = {
+                'Strict-Transport-Security': response.headers.get('Strict-Transport-Security'),
+                'Content-Security-Policy': response.headers.get('Content-Security-Policy'),
+                'X-Frame-Options': response.headers.get('X-Frame-Options'),
+                'X-Content-Type-Options': response.headers.get('X-Content-Type-Options'),
+                'X-XSS-Protection': response.headers.get('X-XSS-Protection'),
+                'Referrer-Policy': response.headers.get('Referrer-Policy'),
+                'Permissions-Policy': response.headers.get('Permissions-Policy'),
+            }
+            fingerprint['security_headers'] = {k: v for k, v in security_headers.items() if v}
+            # SERVER TLS Configuration (what the server chose/prefers)
+            if parsed.scheme == 'https':
+                try:
+                    # Get what the SERVER negotiated/chose
+                    if hasattr(response.raw, 'connection') and response.raw.connection:
+                        sock = getattr(response.raw.connection, 'sock', None)
+                        if sock:
+                            # Server's certificate info
+                            if hasattr(sock, 'getpeercert'):
+                                cert = sock.getpeercert()
+                                if cert:
+                                    fingerprint['server_cert_subject'] = dict(x[0] for x in cert.get('subject', []))
+                                    fingerprint['server_cert_issuer'] = dict(x[0] for x in cert.get('issuer', []))
+                            # What the SERVER chose/negotiated with us
+                            if hasattr(sock, 'version'):
+                                fingerprint['server_tls_version'] = sock.version()
+                            if hasattr(sock, 'cipher'):
+                                cipher_info = sock.cipher()
+                                fingerprint['server_cipher'] = cipher_info
+                                # JA3S-like: The server's preferred cipher tells us about the server
+                                if cipher_info:
+                                    # Create simple server fingerprint from what it chose
+                                    server_fp_string = f"{cipher_info[0]}:{cipher_info[1]}:{cipher_info[2]}"
+                                    fingerprint['server_cipher_fingerprint'] = hashlib.md5(server_fp_string.encode()).hexdigest()
+                except Exception as e:
+                    fingerprint['ssl_error'] = str(e)
+            # Cookies
+            if response.cookies:
+                fingerprint['cookies'] = [
+                    f"{cookie.name}={'[HttpOnly]' if cookie.has_nonstandard_attr('HttpOnly') else ''}"
+                    f"{'[Secure]' if cookie.secure else ''}"
+                    for cookie in response.cookies
+                ]
+            # CDN/WAF Detection
+            detected_cdns = []
+            all_headers_lower = {k.lower(): v.lower() for k, v in response.headers.items()}
+            all_cookies_lower = ' '.join([c.name.lower() for c in response.cookies])
+            for cdn_name, indicators in cdn_waf_indicators.items():
+                for indicator in indicators:
+                    indicator_lower = indicator.lower()
+                    # Check headers (both key and value)
+                    header_match = any(
+                        indicator_lower in header_key or indicator_lower in header_value
+                        for header_key, header_value in all_headers_lower.items()
+                    )
+                    # Check cookies
+                    cookie_match = indicator_lower in all_cookies_lower
+                    if header_match or cookie_match:
+                        if cdn_name not in detected_cdns:
+                            detected_cdns.append(cdn_name)
+                        break
+            if detected_cdns:
+                fingerprint['cdn_waf'] = detected_cdns
+            # Store redirect chain in fingerprint
+            fingerprint['redirect_chain'] = redirect_chain
+        except Exception as e:
+            fingerprint['error'] = str(e)
+            logger.error(f"HTTP fingerprinting failed: {e}")
+        finally:
+            # Restore original getaddrinfo
+            socket.getaddrinfo = original_getaddrinfo
+        return fingerprint
+    return await asyncio.to_thread(fetch_http_fingerprint)
+def format_http_results(http_fingerprint, parsed_url):
+    """Format HTTP results for output"""
+    lines = []
+    lines.append("=== HTTP Response Fingerprint ===")
+    if 'error' in http_fingerprint:
+        lines.append(f"Error: {http_fingerprint['error']}")
+    else:
+        # Response basics
+        lines.append(f"Status: {http_fingerprint.get('status_code')} {http_fingerprint.get('reason')}")
+        lines.append(f"HTTP Version: {http_fingerprint.get('http_version')}")
+        lines.append(f"Content Length: {http_fingerprint.get('content_length')} bytes")
+        lines.append("")
+        # Server identification
+        lines.append("Server Identification:")
+        lines.append(f"  Server: {http_fingerprint.get('server')}")
+        lines.append(f"  X-Powered-By: {http_fingerprint.get('powered_by')}")
+        lines.append("")
+        # Security headers
+        if http_fingerprint.get('security_headers'):
+            lines.append("Security Headers:")
+            for header, value in http_fingerprint['security_headers'].items():
+                # Truncate long CSP headers
+                if len(str(value)) > 100:
+                    value = str(value)[:100] + "..."
+                lines.append(f"  {header}: {value}")
+            lines.append("")
+        # SERVER TLS Configuration (what the server chose)
+        if http_fingerprint.get('server_tls_version'):
+            lines.append("Server TLS Configuration:")
+            lines.append(f"  TLS Version: {http_fingerprint.get('server_tls_version')}")
+            if http_fingerprint.get('server_cipher'):
+                cipher = http_fingerprint['server_cipher']
+                lines.append(f"  Server Chose Cipher: {cipher[0]}")
+                lines.append(f"  Cipher Protocol: {cipher[1]}")
+                lines.append(f"  Cipher Bits: {cipher[2]}")
+            # Server cipher fingerprint (JA3S-like)
+            if http_fingerprint.get('server_cipher_fingerprint'):
+                lines.append(f"  Server Cipher Fingerprint: {http_fingerprint['server_cipher_fingerprint']}")
+            lines.append("")
+            lines.append("  Note: The cipher the server chose can indicate")
+            lines.append("  server software (nginx, Apache, IIS, Cloudflare, etc.)")
+            lines.append("  See 'SSL/TLS Analysis' section for full server capabilities.")
+            lines.append("")
+        # CDN/WAF Detection
+        if http_fingerprint.get('cdn_waf'):
+            lines.append("CDN/WAF/Proxy Detection:")
+            for cdn in http_fingerprint['cdn_waf']:
+                lines.append(f"  - {cdn}")
+            lines.append("")
+        # Redirect Chain
+        redirect_chain = http_fingerprint.get('redirect_chain', [])
+        if redirect_chain and len(redirect_chain) > 1:
+            lines.append("Redirect Chain:")
+            for i, hop in enumerate(redirect_chain, 1):
+                lines.append(f"  {i}. [{hop['status']}] {hop['url']}")
+                if hop.get('location'):
+                    lines.append(f"     → {hop['location']}")
+            lines.append("")
+        # Interesting headers
+        headers = http_fingerprint.get('headers', {})
+        interesting_headers = [
+            'Content-Type', 'Content-Encoding', 'Transfer-Encoding',
+            'Cache-Control', 'Pragma', 'Expires', 'ETag', 'Last-Modified',
+            'Access-Control-Allow-Origin', 'Vary', 'X-Request-ID', 'X-Runtime'
+        ]
+        found_headers = {h: headers[h] for h in interesting_headers if h in headers}
+        if found_headers:
+            lines.append("Notable Headers:")
+            for header, value in found_headers.items():
+                if len(str(value)) > 100:
+                    value = str(value)[:100] + "..."
+                lines.append(f"  {header}: {value}")
+            lines.append("")
+        # Cookies
+        if http_fingerprint.get('cookies'):
+            lines.append("Cookies Set:")
+            for cookie in http_fingerprint['cookies']:
+                lines.append(f"  {cookie}")
+    lines.append("")
+    return '\n'.join(lines)

changedetectionio_osint/steps/http_scan.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""
+HTTP Fingerprinting Step
+"""
+from .base import ScanStep
+from .registry import register_step
+from . import http_fingerprint
+@register_step
+class HTTPScanStep(ScanStep):
+    """HTTP response fingerprinting, CDN/WAF detection, redirect chains"""
+    name = "HTTP Response Fingerprint"
+    order = 30
+    async def scan(self, context: dict):
+        """Perform HTTP fingerprinting"""
+        return await http_fingerprint.scan_http(
+            context['url'],
+            context['dns_resolver'],
+            context.get('proxy_url'),
+            context.get('watch_uuid'),
+            context.get('update_signal')
+        )
+    def format_results(self, results):
+        """Format HTTP results"""
+        if results and not isinstance(results, Exception):
+            return http_fingerprint.format_http_results(results, context['parsed_url'])
+        return "=== HTTP Response Fingerprint ===\nNo data available\n"