PyPI - python-proxy-headers - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

python-proxy-headers 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

python_proxy_headers/autoscraper_proxy.py ADDED Viewed

@@ -0,0 +1,344 @@
+"""
+AutoScraper extension for sending and receiving proxy headers.
+This module provides an AutoScraper subclass that enables:
+1. Sending custom headers to proxy servers during CONNECT
+2. Using our ProxySession for all HTTP requests
+Example usage:
+    from python_proxy_headers.autoscraper_proxy import ProxyAutoScraper
+    scraper = ProxyAutoScraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
+    # Build with proxy
+    result = scraper.build(
+        url='https://example.com',
+        wanted_list=['Example Domain'],
+        request_args={'proxies': {'https': 'http://proxy:8080'}}
+    )
+    # Get results with proxy
+    result = scraper.get_result_similar(
+        url='https://other-example.com',
+        request_args={'proxies': {'https': 'http://proxy:8080'}}
+    )
+"""
+from typing import Dict, List, Optional, Any
+from urllib.parse import urlparse
+try:
+    from autoscraper import AutoScraper
+except ImportError:
+    raise ImportError(
+        "autoscraper is required for this module. "
+        "Install it with: pip install autoscraper"
+    )
+from .requests_adapter import ProxySession
+class ProxyAutoScraper(AutoScraper):
+    """
+    AutoScraper with proxy header support.
+    This class extends AutoScraper to use our ProxySession for HTTP requests,
+    enabling custom proxy headers to be sent during CONNECT tunneling.
+    Args:
+        proxy_headers: Dict of headers to send to proxy servers
+        stack_list: Initial stack list (rules) for the scraper
+    Example:
+        scraper = ProxyAutoScraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
+        result = scraper.build(
+            url='https://finance.yahoo.com/quote/AAPL/',
+            wanted_list=['Apple Inc.'],
+            request_args={'proxies': {'https': 'http://proxy:8080'}}
+        )
+        # Use the learned rules on another page
+        result = scraper.get_result_similar(
+            url='https://finance.yahoo.com/quote/GOOG/',
+            request_args={'proxies': {'https': 'http://proxy:8080'}}
+        )
+    """
+    def __init__(
+        self,
+        proxy_headers: Optional[Dict[str, str]] = None,
+        stack_list: Optional[List] = None
+    ):
+        super().__init__(stack_list=stack_list)
+        self._proxy_headers = proxy_headers or {}
+        self._session: Optional[ProxySession] = None
+    def _get_session(self) -> ProxySession:
+        """Get or create the ProxySession."""
+        if self._session is None:
+            self._session = ProxySession(proxy_headers=self._proxy_headers)
+        return self._session
+    def set_proxy_headers(self, proxy_headers: Dict[str, str]):
+        """
+        Update the proxy headers.
+        This will close the current session and create a new one with
+        the updated headers on the next request.
+        Args:
+            proxy_headers: New proxy headers to use
+        """
+        self._proxy_headers = proxy_headers
+        if self._session is not None:
+            self._session.close()
+            self._session = None
+    def close(self):
+        """Close the underlying session."""
+        if self._session is not None:
+            self._session.close()
+            self._session = None
+    def __enter__(self):
+        return self
+    def __exit__(self, *args):
+        self.close()
+    @classmethod
+    def _fetch_html(cls, url, request_args=None):
+        """
+        Fetch HTML from URL using the standard requests.
+        Note: This is the class method from parent. For proxy header support,
+        use instance methods which use the ProxySession.
+        """
+        # Fall back to parent implementation for class method calls
+        return super()._fetch_html(url, request_args)
+    def _fetch_html_with_proxy(self, url: str, request_args: Optional[Dict] = None) -> str:
+        """
+        Fetch HTML from URL using ProxySession with proxy header support.
+        Args:
+            url: URL to fetch
+            request_args: Additional request arguments (proxies, headers, etc.)
+        Returns:
+            HTML content as string
+        """
+        request_args = request_args or {}
+        # Build headers
+        headers = dict(self.request_headers)
+        if url:
+            headers["Host"] = urlparse(url).netloc
+        user_headers = request_args.pop("headers", {})
+        headers.update(user_headers)
+        # Use our ProxySession
+        session = self._get_session()
+        # Copy session-level settings if not in request_args
+        if 'proxies' in request_args:
+            session.proxies.update(request_args.pop('proxies'))
+        res = session.get(url, headers=headers, **request_args)
+        # Handle encoding
+        if res.encoding == "ISO-8859-1" and "ISO-8859-1" not in res.headers.get(
+            "Content-Type", ""
+        ):
+            res.encoding = res.apparent_encoding
+        return res.text
+    def _get_soup_with_proxy(self, url=None, html=None, request_args=None):
+        """
+        Get BeautifulSoup object using ProxySession.
+        Args:
+            url: URL to fetch (optional if html is provided)
+            html: HTML string (optional if url is provided)
+            request_args: Additional request arguments
+        Returns:
+            BeautifulSoup object
+        """
+        from html import unescape
+        from bs4 import BeautifulSoup
+        from autoscraper.utils import normalize
+        if html:
+            html = normalize(unescape(html))
+            return BeautifulSoup(html, "lxml")
+        html = self._fetch_html_with_proxy(url, request_args)
+        html = normalize(unescape(html))
+        return BeautifulSoup(html, "lxml")
+    def build(
+        self,
+        url: Optional[str] = None,
+        wanted_list: Optional[List] = None,
+        wanted_dict: Optional[Dict] = None,
+        html: Optional[str] = None,
+        request_args: Optional[Dict] = None,
+        update: bool = False,
+        text_fuzz_ratio: float = 1.0,
+    ) -> List:
+        """
+        Build scraping rules with proxy header support.
+        Same as AutoScraper.build() but uses ProxySession for requests.
+        Parameters:
+            url: URL of the target web page
+            wanted_list: List of needed contents to be scraped
+            wanted_dict: Dict of needed contents (keys are aliases)
+            html: HTML string (alternative to URL)
+            request_args: Request arguments including proxies
+            update: If True, add to existing rules
+            text_fuzz_ratio: Fuzziness ratio for matching
+        Returns:
+            List of similar results
+        """
+        from html import unescape
+        from autoscraper.utils import normalize, unique_hashable, unique_stack_list
+        if not wanted_list and not (wanted_dict and any(wanted_dict.values())):
+            raise ValueError("No targets were supplied")
+        # Use our proxy-aware soup getter
+        soup = self._get_soup_with_proxy(url=url, html=html, request_args=request_args)
+        result_list = []
+        if update is False:
+            self.stack_list = []
+        if wanted_list:
+            wanted_dict = {"": wanted_list}
+        wanted_list = []
+        for alias, wanted_items in wanted_dict.items():
+            wanted_items = [normalize(w) for w in wanted_items]
+            wanted_list += wanted_items
+            for wanted in wanted_items:
+                children = self._get_children(soup, wanted, url, text_fuzz_ratio)
+                for child in children:
+                    result, stack = self._get_result_for_child(child, soup, url)
+                    stack["alias"] = alias
+                    result_list += result
+                    self.stack_list.append(stack)
+        result_list = [item.text for item in result_list]
+        result_list = unique_hashable(result_list)
+        self.stack_list = unique_stack_list(self.stack_list)
+        return result_list
+    def get_result_similar(
+        self,
+        url: Optional[str] = None,
+        html: Optional[str] = None,
+        soup=None,
+        request_args: Optional[Dict] = None,
+        grouped: bool = False,
+        group_by_alias: bool = False,
+        unique: Optional[bool] = None,
+        attr_fuzz_ratio: float = 1.0,
+        keep_blank: bool = False,
+        keep_order: bool = False,
+        contain_sibling_leaves: bool = False,
+    ):
+        """
+        Get similar results with proxy header support.
+        Same as AutoScraper.get_result_similar() but uses ProxySession.
+        """
+        if soup is None and url is not None:
+            soup = self._get_soup_with_proxy(url=url, html=html, request_args=request_args)
+        return super().get_result_similar(
+            url=url,
+            html=html,
+            soup=soup,
+            request_args=None,  # Already fetched
+            grouped=grouped,
+            group_by_alias=group_by_alias,
+            unique=unique,
+            attr_fuzz_ratio=attr_fuzz_ratio,
+            keep_blank=keep_blank,
+            keep_order=keep_order,
+            contain_sibling_leaves=contain_sibling_leaves,
+        )
+    def get_result_exact(
+        self,
+        url: Optional[str] = None,
+        html: Optional[str] = None,
+        soup=None,
+        request_args: Optional[Dict] = None,
+        grouped: bool = False,
+        group_by_alias: bool = False,
+        unique: Optional[bool] = None,
+        attr_fuzz_ratio: float = 1.0,
+        keep_blank: bool = False,
+    ):
+        """
+        Get exact results with proxy header support.
+        Same as AutoScraper.get_result_exact() but uses ProxySession.
+        """
+        if soup is None and url is not None:
+            soup = self._get_soup_with_proxy(url=url, html=html, request_args=request_args)
+        return super().get_result_exact(
+            url=url,
+            html=html,
+            soup=soup,
+            request_args=None,  # Already fetched
+            grouped=grouped,
+            group_by_alias=group_by_alias,
+            unique=unique,
+            attr_fuzz_ratio=attr_fuzz_ratio,
+            keep_blank=keep_blank,
+        )
+    def get_result(
+        self,
+        url: Optional[str] = None,
+        html: Optional[str] = None,
+        request_args: Optional[Dict] = None,
+        grouped: bool = False,
+        group_by_alias: bool = False,
+        unique: Optional[bool] = None,
+        attr_fuzz_ratio: float = 1.0,
+    ):
+        """
+        Get similar and exact results with proxy header support.
+        Same as AutoScraper.get_result() but uses ProxySession.
+        """
+        soup = self._get_soup_with_proxy(url=url, html=html, request_args=request_args)
+        args = dict(
+            url=url,
+            soup=soup,
+            grouped=grouped,
+            group_by_alias=group_by_alias,
+            unique=unique,
+            attr_fuzz_ratio=attr_fuzz_ratio,
+        )
+        similar = self.get_result_similar(**args)
+        exact = self.get_result_exact(**args)
+        return similar, exact

python_proxy_headers/cloudscraper_proxy.py ADDED Viewed

@@ -0,0 +1,213 @@
+"""
+CloudScraper extension for sending and receiving proxy headers.
+This module provides a CloudScraper subclass that enables:
+1. Sending custom headers to proxy servers during CONNECT
+2. Capturing response headers from proxy servers
+Example usage:
+    from python_proxy_headers.cloudscraper_proxy import create_scraper
+    scraper = create_scraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
+    scraper.proxies = {'https': 'http://proxy:8080'}
+    response = scraper.get('https://example.com')
+    # Access proxy response headers (stored on the response object)
+    print(response.proxy_headers)
+"""
+from typing import Dict, Optional, Any
+try:
+    import cloudscraper
+    from cloudscraper import CipherSuiteAdapter
+except ImportError:
+    raise ImportError(
+        "cloudscraper is required for this module. "
+        "Install it with: pip install cloudscraper"
+    )
+from .urllib3_proxy_manager import proxy_from_url
+class CipherSuiteProxyHeaderAdapter(CipherSuiteAdapter):
+    """
+    Combines CloudScraper's CipherSuiteAdapter with proxy header support.
+    This adapter:
+    - Maintains CloudScraper's TLS/cipher suite customization
+    - Adds the ability to send custom headers to proxy servers
+    - Uses our custom ProxyManager that captures proxy response headers
+    """
+    def __init__(self, proxy_headers: Optional[Dict[str, str]] = None, **kwargs):
+        self._proxy_headers = proxy_headers or {}
+        super().__init__(**kwargs)
+    def proxy_manager_for(self, proxy, **proxy_kwargs):
+        """
+        Return a ProxyManager for the given proxy with custom header support.
+        Overrides the default proxy_manager_for to use our custom ProxyManager
+        that supports sending and receiving proxy headers.
+        """
+        if proxy in self.proxy_manager:
+            manager = self.proxy_manager[proxy]
+        elif proxy.lower().startswith("socks"):
+            # SOCKS proxies don't support custom headers
+            return super().proxy_manager_for(proxy, **proxy_kwargs)
+        else:
+            # Get standard proxy headers (e.g., Proxy-Authorization)
+            _proxy_headers = self.proxy_headers(proxy)
+            # Merge with our custom proxy headers
+            if self._proxy_headers:
+                _proxy_headers.update(self._proxy_headers)
+            # Pass SSL context if available
+            if hasattr(self, 'ssl_context') and self.ssl_context:
+                proxy_kwargs['ssl_context'] = self.ssl_context
+            if hasattr(self, 'source_address') and self.source_address:
+                proxy_kwargs['source_address'] = self.source_address
+            manager = self.proxy_manager[proxy] = proxy_from_url(
+                proxy,
+                proxy_headers=_proxy_headers,
+                num_pools=self._pool_connections,
+                maxsize=self._pool_maxsize,
+                block=self._pool_block,
+                **proxy_kwargs,
+            )
+        return manager
+class ProxyCloudScraper(cloudscraper.CloudScraper):
+    """
+    CloudScraper with proxy header support.
+    This class extends CloudScraper to add the ability to:
+    - Send custom headers to proxy servers during CONNECT tunneling
+    - Receive and access headers from proxy server responses
+    Args:
+        proxy_headers: Dict of headers to send to proxy servers
+        **kwargs: All other arguments passed to CloudScraper
+    Example:
+        scraper = ProxyCloudScraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
+        scraper.proxies = {'https': 'http://proxy.example.com:8080'}
+        response = scraper.get('https://httpbin.org/ip')
+        print(response.proxy_headers)  # Headers from proxy CONNECT response
+    """
+    def __init__(self, proxy_headers: Optional[Dict[str, str]] = None, **kwargs):
+        self._proxy_headers = proxy_headers or {}
+        # Call parent init
+        super().__init__(**kwargs)
+        # Replace the HTTPS adapter with our proxy-header-aware version
+        # We need to preserve the cipher suite settings from the parent
+        self.mount(
+            'https://',
+            CipherSuiteProxyHeaderAdapter(
+                proxy_headers=self._proxy_headers,
+                cipherSuite=self.cipherSuite,
+                ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'),
+                server_hostname=getattr(self, 'server_hostname', None),
+                source_address=getattr(self, 'source_address', None),
+                ssl_context=getattr(self, 'ssl_context', None)
+            )
+        )
+        # Also mount for HTTP (though proxy headers are mainly for HTTPS CONNECT)
+        self.mount(
+            'http://',
+            CipherSuiteProxyHeaderAdapter(
+                proxy_headers=self._proxy_headers,
+                cipherSuite=self.cipherSuite,
+                ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'),
+                server_hostname=getattr(self, 'server_hostname', None),
+                source_address=getattr(self, 'source_address', None),
+                ssl_context=getattr(self, 'ssl_context', None)
+            )
+        )
+    def set_proxy_headers(self, proxy_headers: Dict[str, str]):
+        """
+        Update the proxy headers and remount adapters.
+        Args:
+            proxy_headers: New proxy headers to use
+        """
+        self._proxy_headers = proxy_headers
+        # Remount adapters with new headers
+        self.mount(
+            'https://',
+            CipherSuiteProxyHeaderAdapter(
+                proxy_headers=self._proxy_headers,
+                cipherSuite=self.cipherSuite,
+                ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'),
+                server_hostname=getattr(self, 'server_hostname', None),
+                source_address=getattr(self, 'source_address', None),
+                ssl_context=getattr(self, 'ssl_context', None)
+            )
+        )
+        self.mount(
+            'http://',
+            CipherSuiteProxyHeaderAdapter(
+                proxy_headers=self._proxy_headers,
+                cipherSuite=self.cipherSuite,
+                ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'),
+                server_hostname=getattr(self, 'server_hostname', None),
+                source_address=getattr(self, 'source_address', None),
+                ssl_context=getattr(self, 'ssl_context', None)
+            )
+        )
+def create_scraper(
+    proxy_headers: Optional[Dict[str, str]] = None,
+    sess: Optional[Any] = None,
+    **kwargs
+) -> ProxyCloudScraper:
+    """
+    Create a CloudScraper with proxy header support.
+    This is a drop-in replacement for cloudscraper.create_scraper() that
+    adds proxy header capabilities.
+    Args:
+        proxy_headers: Dict of headers to send to proxy servers
+        sess: Existing session to copy attributes from
+        **kwargs: All other arguments passed to CloudScraper
+    Returns:
+        ProxyCloudScraper instance
+    Example:
+        from python_proxy_headers.cloudscraper_proxy import create_scraper
+        scraper = create_scraper(
+            proxy_headers={'X-ProxyMesh-Country': 'US'},
+            browser='chrome'
+        )
+        scraper.proxies = {'https': 'http://proxy:8080'}
+        response = scraper.get('https://example.com')
+    """
+    scraper = ProxyCloudScraper(proxy_headers=proxy_headers, **kwargs)
+    if sess:
+        for attr in ['auth', 'cert', 'cookies', 'headers', 'hooks', 'params', 'proxies', 'data']:
+            val = getattr(sess, attr, None)
+            if val is not None:
+                setattr(scraper, attr, val)
+    return scraper
+# Convenience alias
+session = create_scraper

python_proxy_headers/pycurl_proxy.py ADDED Viewed

@@ -0,0 +1,379 @@
+"""
+PycURL extension for sending and receiving proxy headers.
+This module provides helper functions and classes for working with proxy headers
+in pycurl. It can be used in two ways:
+1. Low-level helpers for existing pycurl code:
+    import pycurl
+    from python_proxy_headers.pycurl_proxy import set_proxy_headers, HeaderCapture
+    c = pycurl.Curl()
+    c.setopt(pycurl.URL, 'https://example.com')
+    c.setopt(pycurl.PROXY, 'http://proxy:8080')
+    # Add proxy headers
+    set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'})
+    # Capture response headers (including proxy CONNECT headers)
+    capture = HeaderCapture(c)
+    c.perform()
+    print(capture.proxy_headers)   # Headers from proxy CONNECT response
+    print(capture.origin_headers)  # Headers from origin server
+2. High-level convenience functions:
+    from python_proxy_headers.pycurl_proxy import get
+    response = get('https://example.com',
+                   proxy='http://proxy:8080',
+                   proxy_headers={'X-ProxyMesh-Country': 'US'})
+    print(response.proxy_headers)
+"""
+from io import BytesIO
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Tuple
+try:
+    import pycurl
+except ImportError:
+    raise ImportError(
+        "pycurl is required for this module. "
+        "Install it with: pip install pycurl"
+    )
+# =============================================================================
+# Low-level helper functions
+# =============================================================================
+def set_proxy_headers(curl, headers: Dict[str, str]) -> None:
+    """
+    Set custom headers to send to the proxy server during CONNECT.
+    Args:
+        curl: A pycurl.Curl instance
+        headers: Dict of headers to send to the proxy
+    Example:
+        c = pycurl.Curl()
+        c.setopt(pycurl.PROXY, 'http://proxy:8080')
+        set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'})
+        c.perform()
+    """
+    if not headers:
+        return
+    header_list = [f"{k}: {v}" for k, v in headers.items()]
+    # Set CURLOPT_PROXYHEADER
+    try:
+        curl.setopt(pycurl.PROXYHEADER, header_list)
+    except AttributeError:
+        # Fallback to numeric option (10228) if not exposed
+        curl.setopt(10228, header_list)
+    # Set CURLOPT_HEADEROPT to CURLHEADER_SEPARATE so proxy headers
+    # are only sent to the proxy, not the origin
+    try:
+        curl.setopt(pycurl.HEADEROPT, pycurl.HEADER_SEPARATE)
+    except AttributeError:
+        try:
+            curl.setopt(229, 1)  # CURLOPT_HEADEROPT = 229, CURLHEADER_SEPARATE = 1
+        except pycurl.error:
+            pass  # Option may not be available in older libcurl versions
+class HeaderCapture:
+    """
+    Captures and parses HTTP response headers from pycurl requests.
+    For HTTPS requests through a proxy, this separates:
+    - proxy_headers: Headers from the proxy's CONNECT response
+    - origin_headers: Headers from the origin server's response
+    Example:
+        c = pycurl.Curl()
+        c.setopt(pycurl.URL, 'https://example.com')
+        c.setopt(pycurl.PROXY, 'http://proxy:8080')
+        capture = HeaderCapture(c)  # Installs HEADERFUNCTION callback
+        c.perform()
+        print(capture.proxy_headers)   # {'X-ProxyMesh-IP': '1.2.3.4', ...}
+        print(capture.origin_headers)  # {'Content-Type': 'text/html', ...}
+        print(capture.proxy_status)    # 200
+    """
+    def __init__(self, curl=None):
+        """
+        Initialize header capture.
+        Args:
+            curl: Optional pycurl.Curl instance. If provided, automatically
+                  installs the HEADERFUNCTION callback.
+        """
+        self._header_lines: List[bytes] = []
+        self._parsed = False
+        self._sections: List[Tuple[Optional[int], Dict[str, str]]] = []
+        if curl is not None:
+            self.install(curl)
+    def install(self, curl) -> 'HeaderCapture':
+        """
+        Install the header callback on a pycurl.Curl instance.
+        Args:
+            curl: A pycurl.Curl instance
+        Returns:
+            self, for chaining
+        """
+        curl.setopt(pycurl.HEADERFUNCTION, self._header_callback)
+        return self
+    def _header_callback(self, header_line: bytes) -> int:
+        """Callback for pycurl HEADERFUNCTION."""
+        self._header_lines.append(header_line)
+        self._parsed = False  # Invalidate cache
+        return len(header_line)
+    def _parse(self) -> None:
+        """Parse collected header lines into sections."""
+        if self._parsed:
+            return
+        self._sections = []
+        current_headers: Dict[str, str] = {}
+        current_status: Optional[int] = None
+        for line in self._header_lines:
+            line_str = line.decode('utf-8', errors='replace').strip()
+            if line_str.startswith('HTTP/'):
+                # New response section - save previous if exists
+                if current_headers or current_status is not None:
+                    self._sections.append((current_status, current_headers))
+                current_headers = {}
+                # Parse status line: HTTP/1.1 200 OK
+                parts = line_str.split(' ', 2)
+                if len(parts) >= 2:
+                    try:
+                        current_status = int(parts[1])
+                    except ValueError:
+                        current_status = None
+                else:
+                    current_status = None
+            elif ':' in line_str:
+                key, value = line_str.split(':', 1)
+                current_headers[key.strip()] = value.strip()
+        # Don't forget the last section
+        if current_headers or current_status is not None:
+            self._sections.append((current_status, current_headers))
+        self._parsed = True
+    def reset(self) -> None:
+        """Clear captured headers for reuse."""
+        self._header_lines.clear()
+        self._sections.clear()
+        self._parsed = False
+    @property
+    def proxy_headers(self) -> Dict[str, str]:
+        """
+        Headers from the proxy's CONNECT response.
+        Returns empty dict if not an HTTPS-via-proxy request or no headers captured.
+        """
+        self._parse()
+        if len(self._sections) >= 2:
+            return self._sections[0][1]
+        return {}
+    @property
+    def proxy_status(self) -> Optional[int]:
+        """
+        Status code from the proxy's CONNECT response.
+        Returns None if not an HTTPS-via-proxy request.
+        """
+        self._parse()
+        if len(self._sections) >= 2:
+            return self._sections[0][0]
+        return None
+    @property
+    def origin_headers(self) -> Dict[str, str]:
+        """Headers from the origin server's response."""
+        self._parse()
+        if self._sections:
+            return self._sections[-1][1]
+        return {}
+    @property
+    def origin_status(self) -> Optional[int]:
+        """Status code from the origin server's response."""
+        self._parse()
+        if self._sections:
+            return self._sections[-1][0]
+        return None
+    @property
+    def all_headers(self) -> Dict[str, str]:
+        """All headers merged (proxy headers take precedence for conflicts)."""
+        self._parse()
+        merged = {}
+        for _, headers in self._sections:
+            merged.update(headers)
+        return merged
+# =============================================================================
+# High-level convenience API
+# =============================================================================
+@dataclass
+class Response:
+    """Response object from high-level API."""
+    status_code: int
+    headers: Dict[str, str]
+    content: bytes
+    proxy_headers: Dict[str, str] = field(default_factory=dict)
+    proxy_status: Optional[int] = None
+    @property
+    def text(self) -> str:
+        """Response body as text."""
+        return self.content.decode('utf-8', errors='replace')
+    def raise_for_status(self) -> None:
+        """Raise exception if status code indicates error."""
+        if self.status_code >= 400:
+            raise Exception(f"HTTP Error {self.status_code}")
+def request(
+    method: str,
+    url: str,
+    proxy: Optional[str] = None,
+    proxy_headers: Optional[Dict[str, str]] = None,
+    headers: Optional[Dict[str, str]] = None,
+    data: Optional[bytes] = None,
+    timeout: Optional[int] = None,
+    verify: bool = True,
+) -> Response:
+    """
+    Make an HTTP request with proxy header support.
+    Args:
+        method: HTTP method (GET, POST, etc.)
+        url: Target URL
+        proxy: Proxy URL (e.g., 'http://user:pass@proxy:8080')
+        proxy_headers: Headers to send to the proxy
+        headers: Headers to send to the origin server
+        data: Request body for POST/PUT/PATCH
+        timeout: Request timeout in seconds
+        verify: Whether to verify SSL certificates
+    Returns:
+        Response object with body, headers, and proxy_headers
+    """
+    c = pycurl.Curl()
+    body = BytesIO()
+    capture = HeaderCapture(c)
+    try:
+        c.setopt(pycurl.URL, url)
+        c.setopt(pycurl.WRITEFUNCTION, body.write)
+        # HTTP method
+        method = method.upper()
+        if method == 'GET':
+            c.setopt(pycurl.HTTPGET, 1)
+        elif method == 'POST':
+            c.setopt(pycurl.POST, 1)
+            if data:
+                c.setopt(pycurl.POSTFIELDS, data)
+        elif method == 'PUT':
+            c.setopt(pycurl.CUSTOMREQUEST, 'PUT')
+            if data:
+                c.setopt(pycurl.POSTFIELDS, data)
+        elif method == 'DELETE':
+            c.setopt(pycurl.CUSTOMREQUEST, 'DELETE')
+        elif method == 'HEAD':
+            c.setopt(pycurl.NOBODY, 1)
+        elif method == 'PATCH':
+            c.setopt(pycurl.CUSTOMREQUEST, 'PATCH')
+            if data:
+                c.setopt(pycurl.POSTFIELDS, data)
+        else:
+            c.setopt(pycurl.CUSTOMREQUEST, method)
+        # Request headers
+        if headers:
+            c.setopt(pycurl.HTTPHEADER, [f"{k}: {v}" for k, v in headers.items()])
+        # Proxy
+        if proxy:
+            c.setopt(pycurl.PROXY, proxy)
+            if proxy_headers:
+                set_proxy_headers(c, proxy_headers)
+        # Timeout
+        if timeout:
+            c.setopt(pycurl.TIMEOUT, timeout)
+        # SSL
+        if not verify:
+            c.setopt(pycurl.SSL_VERIFYPEER, 0)
+            c.setopt(pycurl.SSL_VERIFYHOST, 0)
+        c.perform()
+        return Response(
+            status_code=c.getinfo(pycurl.RESPONSE_CODE),
+            headers=capture.origin_headers,
+            content=body.getvalue(),
+            proxy_headers=capture.proxy_headers,
+            proxy_status=capture.proxy_status,
+        )
+    finally:
+        c.close()
+def get(url: str, **kwargs) -> Response:
+    """Make a GET request."""
+    return request('GET', url, **kwargs)
+def post(url: str, **kwargs) -> Response:
+    """Make a POST request."""
+    return request('POST', url, **kwargs)
+def put(url: str, **kwargs) -> Response:
+    """Make a PUT request."""
+    return request('PUT', url, **kwargs)
+def delete(url: str, **kwargs) -> Response:
+    """Make a DELETE request."""
+    return request('DELETE', url, **kwargs)
+def head(url: str, **kwargs) -> Response:
+    """Make a HEAD request."""
+    return request('HEAD', url, **kwargs)
+def patch(url: str, **kwargs) -> Response:
+    """Make a PATCH request."""
+    return request('PATCH', url, **kwargs)

python_proxy_headers-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,176 @@
+Metadata-Version: 2.4
+Name: python-proxy-headers
+Version: 0.2.0
+Summary: Handle custom proxy headers for http & https requests in various python libraries
+Author-email: ProxyMesh <support@proxymesh.com>
+Project-URL: Homepage, https://github.com/proxymesh/python-proxy-headers
+Project-URL: Changelog, https://github.com/proxymesh/python-proxy-headers/commits/main/
+Project-URL: Issues, https://github.com/proxymesh/python-proxy-headers/issues
+Project-URL: Documentation, https://python-proxy-headers.readthedocs.io/en/latest/
+Project-URL: ProxyMesh, https://proxymesh.com
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Internet :: WWW/HTTP
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Dynamic: license-file
+# Python Proxy Headers
+[![Documentation Status](https://readthedocs.org/projects/python-proxy-headers/badge/?version=latest)](https://python-proxy-headers.readthedocs.io/en/latest/?badge=latest)
+[![PyPI version](https://badge.fury.io/py/python-proxy-headers.svg)](https://badge.fury.io/py/python-proxy-headers)
+Extensions for Python HTTP libraries to support **sending and receiving custom proxy headers** during HTTPS CONNECT tunneling.
+## The Problem
+When making HTTPS requests through a proxy, the connection is established via a CONNECT tunnel. During this process:
+1. **Sending headers to the proxy** - Most Python HTTP libraries don't provide an easy way to send custom headers (like `X-ProxyMesh-Country`) to the proxy server during the CONNECT handshake.
+2. **Receiving headers from the proxy** - The proxy's response headers from the CONNECT request are typically discarded, making it impossible to read custom headers (like `X-ProxyMesh-IP`) that the proxy sends back.
+This library solves both problems for popular Python HTTP libraries.
+## Supported Libraries
+| Library | Module | Use Case |
+|---------|--------|----------|
+| [urllib3](https://python-proxy-headers.readthedocs.io/en/latest/urllib3.html) | `urllib3_proxy_manager` | Low-level HTTP client |
+| [requests](https://python-proxy-headers.readthedocs.io/en/latest/requests.html) | `requests_adapter` | Simple HTTP requests |
+| [aiohttp](https://python-proxy-headers.readthedocs.io/en/latest/aiohttp.html) | `aiohttp_proxy` | Async HTTP client |
+| [httpx](https://python-proxy-headers.readthedocs.io/en/latest/httpx.html) | `httpx_proxy` | Modern HTTP client |
+| [pycurl](https://python-proxy-headers.readthedocs.io/en/latest/pycurl.html) | `pycurl_proxy` | libcurl bindings |
+| [cloudscraper](https://python-proxy-headers.readthedocs.io/en/latest/cloudscraper.html) | `cloudscraper_proxy` | Cloudflare bypass |
+| [autoscraper](https://python-proxy-headers.readthedocs.io/en/latest/autoscraper.html) | `autoscraper_proxy` | Automatic web scraping |
+## Installation
+```bash
+pip install python-proxy-headers
+```
+Then install the HTTP library you want to use (e.g., `pip install requests`).
+> **Note:** This package has no dependencies by default - install only what you need.
+## Quick Start
+### requests
+```python
+from python_proxy_headers.requests_adapter import ProxySession
+with ProxySession(proxy_headers={'X-ProxyMesh-Country': 'US'}) as session:
+    session.proxies = {'https': 'http://user:pass@proxy.example.com:8080'}
+    response = session.get('https://httpbin.org/ip')
+    # Proxy headers are merged into response.headers
+    print(response.headers.get('X-ProxyMesh-IP'))
+```
+### httpx
+```python
+from python_proxy_headers.httpx_proxy import get
+response = get(
+    'https://httpbin.org/ip',
+    proxy='http://user:pass@proxy.example.com:8080'
+)
+# Proxy CONNECT response headers are merged into response.headers
+print(response.headers.get('X-ProxyMesh-IP'))
+```
+### aiohttp
+```python
+import asyncio
+from python_proxy_headers.aiohttp_proxy import ProxyClientSession
+async def main():
+    async with ProxyClientSession() as session:
+        async with session.get(
+            'https://httpbin.org/ip',
+            proxy='http://user:pass@proxy.example.com:8080'
+        ) as response:
+            # Proxy headers merged into response.headers
+            print(response.headers.get('X-ProxyMesh-IP'))
+asyncio.run(main())
+```
+### pycurl (low-level)
+```python
+import pycurl
+from python_proxy_headers.pycurl_proxy import set_proxy_headers, HeaderCapture
+c = pycurl.Curl()
+c.setopt(pycurl.URL, 'https://httpbin.org/ip')
+c.setopt(pycurl.PROXY, 'http://proxy.example.com:8080')
+# Add these two lines to any existing pycurl code
+set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'})
+capture = HeaderCapture(c)
+c.perform()
+print(capture.proxy_headers)  # Headers from proxy CONNECT response
+c.close()
+```
+### cloudscraper
+```python
+from python_proxy_headers.cloudscraper_proxy import create_scraper
+# Drop-in replacement for cloudscraper.create_scraper()
+scraper = create_scraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
+scraper.proxies = {'https': 'http://proxy.example.com:8080'}
+response = scraper.get('https://example.com')
+# All CloudScraper features (Cloudflare bypass) preserved
+```
+## Testing
+A test harness is included to verify proxy header functionality:
+```bash
+# Set your proxy
+export PROXY_URL='http://user:pass@proxy.example.com:8080'
+# Test all modules
+python test_proxy_headers.py
+# Test specific modules
+python test_proxy_headers.py requests httpx
+# Verbose output (show header values)
+python test_proxy_headers.py -v
+```
+## Documentation
+For detailed documentation, API reference, and more examples:
+- **Full Documentation:** [python-proxy-headers.readthedocs.io](https://python-proxy-headers.readthedocs.io/en/latest/)
+- **Example Code:** [proxy-examples for Python](https://github.com/proxymesh/proxy-examples/tree/main/python)
+## Related Projects
+- **[scrapy-proxy-headers](https://github.com/proxymesh/scrapy-proxy-headers)** - Proxy header support for Scrapy
+## About
+Created by [ProxyMesh](https://proxymesh.com) to help our customers use custom headers to control proxy behavior. Works with any proxy that supports custom headers.
+## License
+MIT License

python_proxy_headers-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+python_proxy_headers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+python_proxy_headers/aiohttp_proxy.py,sha256=OyEruj3CCrFmlEikmmOXGQsVJnrAfK-TMzycP-sLH_Y,4920
+python_proxy_headers/autoscraper_proxy.py,sha256=g51K71xOah_uNtMIXEXggtd6T8ol7vAukg7oaWVvhEA,11342
+python_proxy_headers/cloudscraper_proxy.py,sha256=6xF58QywHV4IYlC_KtXIWQTXZjE8CeEa5-GfqcSGZ3U,7780
+python_proxy_headers/httpx_proxy.py,sha256=Q8tDlfl4u3fjsZZDA1zj1pbN7fiAwJ3ctFGojbqtaEo,11829
+python_proxy_headers/pycurl_proxy.py,sha256=_JAt_o5gX7o0sRLOQXG3421mEehzBm-rhUl4c9IG5HQ,11738
+python_proxy_headers/requests_adapter.py,sha256=CfGEEYc0eaKKNA11VbhZzv9qFVEL1uMSzAUNvidIyEU,2313
+python_proxy_headers/urllib3_proxy_manager.py,sha256=FG6keO1ENBhVQLmnUkwVKpdC3U9xEXNxKGy9_7xjd_s,4399
+python_proxy_headers-0.2.0.dist-info/licenses/LICENSE,sha256=i_H6fvudjqZOKkZFCIHGmbPHIxszjCAT90UJSY2OM0U,1066
+python_proxy_headers-0.2.0.dist-info/METADATA,sha256=21O32NtwCvq62LY8iLr7mOi01OK-9mDWqdkYR1_Pscs,6288
+python_proxy_headers-0.2.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
+python_proxy_headers-0.2.0.dist-info/top_level.txt,sha256=Bhm-Qc6vn0DAd2Li8ZBn6KLkls2zJnM4-CtXHvIiRh8,21
+python_proxy_headers-0.2.0.dist-info/RECORD,,

{python_proxy_headers-0.1.0.dist-info → python_proxy_headers-0.2.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.0)
+Generator: setuptools (82.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

python_proxy_headers-0.1.0.dist-info/METADATA DELETED Viewed

@@ -1,151 +0,0 @@
-Metadata-Version: 2.2
-Name: python-proxy-headers
-Version: 0.1.0
-Summary: Handle custom proxy headers for http requests in various python libraries
-Author-email: ProxyMesh <support@proxymesh.com>
-Project-URL: Homepage, https://github.com/proxymesh/python-proxy-headers
-Project-URL: Changelog, https://github.com/proxymesh/python-proxy-headers/commits/main/
-Project-URL: Issues, https://github.com/proxymesh/python-proxy-headers/issues
-Classifier: Programming Language :: Python :: 3
-Classifier: Operating System :: OS Independent
-Classifier: License :: OSI Approved :: BSD License
-Classifier: Intended Audience :: Developers
-Classifier: Topic :: Internet :: WWW/HTTP
-Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Requires-Python: >=3.8
-Description-Content-Type: text/markdown
-License-File: LICENSE
-# Python Proxy Headers
-The `python-proxy-headers` package provides support for handling custom proxy headers when making HTTPS requests in various python modules.
-We currently provide extensions to the following packages:
-* [urllib3](https://urllib3.readthedocs.io/en/stable/)
-* [requests](https://docs.python-requests.org/en/latest/index.html)
-* [aiohttp](https://docs.aiohttp.org/en/stable/index.html)
-* [httpx](https://www.python-httpx.org/)
-None of these modules provide good support for parsing custom response headers from proxy servers. And some of them make it hard to send custom headers to proxy servers. So we at [ProxyMesh](https://proxymesh.com) made these extension modules to support our customers that use Python and want to use custom headers to control our proxy behavior. But these modules can work for handling custom headers with any proxy.
-*If you are looking for [Scrapy](https://scrapy.org/) support, please see our [scrapy-proxy-headers](https://github.com/proxymesh/scrapy-proxy-headers) project.*
-## Installation
-Examples for how to use these extension modules are described below. You must first do the following:
-1. `pip install python-proxy-headers`
-2. Install the appropriate package based on the python module you want to use.
-This package does not have any dependencies because we don't know which module you want to use.
-You can also find more example code in our [proxy-examples for python](https://github.com/proxymesh/proxy-examples/tree/main/python).
-## urllib3
-If you just want to send custom proxy headers, but don't need to receive proxy response headers, then you can [urllib3.ProxyManager](https://urllib3.readthedocs.io/en/stable/reference/urllib3.poolmanager.html#urllib3.ProxyManager), like so:
-``` python
-import urllib3
-proxy = urllib3.ProxyManager('http://PROXYHOST:PORT', proxy_headers={'X-ProxyMesh-Country': 'US'})
-r = proxy.request('GET', 'https://api.ipify.org?format=json')
-```
-Note that when using this method, if you keep reusing the same `ProxyManager` instance, you may be re-using the proxy connection, which may have different behavior than if you create a new proxy connection for each request. For example, with ProxyMesh you may keep getting the same IP address if you reuse the proxy connection.
-To get proxy response headers, use our extension module like this:
-``` python
-from python_proxy_headers import urllib3_proxy_manager
-proxy = urllib3_proxy_manager.ProxyHeaderManager('http://PROXYHOST:PORT')
-r = proxy.request('GET', 'https://api.ipify.org?format=json')
-r.headers['X-ProxyMesh-IP']
-```
-You can also pass `proxy_headers` into our `ProxyHeaderManager` as well. For example, you can pass back the same `X-ProxyMesh-IP` header to ensure you get the same IP address on subsequent requests.
-## requests
-The requests adapter builds on our `urllib3_proxy_manager` module to make it easy to pass in proxy headers and receive proxy response headers.
-``` python
-from python_proxy_headers import requests_adapter
-r = requests_adapter.get('https://api.ipify.org?format=json', proxies={'http': 'http://PROXYHOST:PORT', 'https': 'http://PROXYHOST:PORT'}, proxy_headers={'X-ProxyMesh-Country': 'US'})
-r.headers['X-ProxyMesh-IP']
-```
-The `requests_adapter` module supports all the standard requests methods: `get`, `post`, `put`, `delete`, etc.
-## aiohttp
-While it's not documented, aiohttp does support passing in custom proxy headers by default.
-``` python
-import aiohttp
-async with aiohttp.ClientSession() as session:
-	async with session.get('https://api.ipify.org?format=json', proxy="http://PROXYHOST:PORT", proxy_headers={'X-ProxyMesh-Country': 'US'}) as r:
-		await r.text()
-```
-However, if you want to get proxy response, you should use our extension module:
-``` python
-from python_proxy_headers import aiohttp_proxy
-async with aiohttp_proxy.ProxyClientSession() as session:
-	async with session.get('https://api.ipify.org?format=json', proxy="http://PROXYHOST:PORT", proxy_headers={'X-ProxyMesh-Country': 'US'}) as r:
-		await r.text()
-r.headers['X-ProxyMesh-IP']
-```
-## httpx
-httpx also supports proxy headers by default, though it's not documented:
-``` python
-import httpx
-proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
-transport = HTTPProxyTransport(proxy=proxy)
-with httpx.Client(mounts={'http://': transort, 'https://': transport}) as client:
-	r = client.get('https://api.ipify.org?format=json')
-```
-But to get the response headers, you need to use our extension module:
-``` python
-import httpx
-from python_proxy_headers.httpx_proxy import HTTPProxyTransport
-proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
-transport = HTTPProxyTransport(proxy=proxy)
-with httpx.Client(mounts={'http://': transort, 'https://': transport}) as client:
-	r = client.get('https://api.ipify.org?format=json')
-r.headers['X-ProxyMesh-IP']
-```
-This module also provide helper methods similar to requests:
-``` python
-import httpx
-from python_proxy_headers import httpx_proxy
-proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
-r = httpx_proxy.get('https://api.ipify.org?format=json', proxy=proxy)
-r.headers['X-ProxyMesh-IP']
-```
-And finally, httpx supports async requests, so we provide an async extension too:
-``` python
-import httpx
-from python_proxy_headers.httpx_proxy import AsyncHTTPProxyTransport
-proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
-transport = AsyncHTTPProxyTransport(proxy=proxy)
-async with httpx.AsyncClient(mounts={'http://': transport, 'https://': transport}) as client:
-	r = await client.get('https://api.ipify.org?format=json')
-r.headers['X-ProxyMesh-IP']
-```
-Our httpx helper module internally provides extension classes for [httpcore](https://www.encode.io/httpcore/), for handling proxy headers over tunnel connections.
-You can use those classes if you're building on top of httpcore.

python_proxy_headers-0.1.0.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-python_proxy_headers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-python_proxy_headers/aiohttp_proxy.py,sha256=OyEruj3CCrFmlEikmmOXGQsVJnrAfK-TMzycP-sLH_Y,4920
-python_proxy_headers/httpx_proxy.py,sha256=Q8tDlfl4u3fjsZZDA1zj1pbN7fiAwJ3ctFGojbqtaEo,11829
-python_proxy_headers/requests_adapter.py,sha256=CfGEEYc0eaKKNA11VbhZzv9qFVEL1uMSzAUNvidIyEU,2313
-python_proxy_headers/urllib3_proxy_manager.py,sha256=FG6keO1ENBhVQLmnUkwVKpdC3U9xEXNxKGy9_7xjd_s,4399
-python_proxy_headers-0.1.0.dist-info/LICENSE,sha256=i_H6fvudjqZOKkZFCIHGmbPHIxszjCAT90UJSY2OM0U,1066
-python_proxy_headers-0.1.0.dist-info/METADATA,sha256=gXUKShZK4ExJ7mfOh3Pi_InDIyLXyIrtWGFEqLKr9d0,6761
-python_proxy_headers-0.1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-python_proxy_headers-0.1.0.dist-info/top_level.txt,sha256=Bhm-Qc6vn0DAd2Li8ZBn6KLkls2zJnM4-CtXHvIiRh8,21
-python_proxy_headers-0.1.0.dist-info/RECORD,,

{python_proxy_headers-0.1.0.dist-info → python_proxy_headers-0.2.0.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{python_proxy_headers-0.1.0.dist-info → python_proxy_headers-0.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

python-proxy-headers 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

python-proxy-headers 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl