python-proxy-headers 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,344 @@
1
+ """
2
+ AutoScraper extension for sending and receiving proxy headers.
3
+
4
+ This module provides an AutoScraper subclass that enables:
5
+ 1. Sending custom headers to proxy servers during CONNECT
6
+ 2. Using our ProxySession for all HTTP requests
7
+
8
+ Example usage:
9
+ from python_proxy_headers.autoscraper_proxy import ProxyAutoScraper
10
+
11
+ scraper = ProxyAutoScraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
12
+
13
+ # Build with proxy
14
+ result = scraper.build(
15
+ url='https://example.com',
16
+ wanted_list=['Example Domain'],
17
+ request_args={'proxies': {'https': 'http://proxy:8080'}}
18
+ )
19
+
20
+ # Get results with proxy
21
+ result = scraper.get_result_similar(
22
+ url='https://other-example.com',
23
+ request_args={'proxies': {'https': 'http://proxy:8080'}}
24
+ )
25
+ """
26
+
27
+ from typing import Dict, List, Optional, Any
28
+ from urllib.parse import urlparse
29
+
30
+ try:
31
+ from autoscraper import AutoScraper
32
+ except ImportError:
33
+ raise ImportError(
34
+ "autoscraper is required for this module. "
35
+ "Install it with: pip install autoscraper"
36
+ )
37
+
38
+ from .requests_adapter import ProxySession
39
+
40
+
41
+ class ProxyAutoScraper(AutoScraper):
42
+ """
43
+ AutoScraper with proxy header support.
44
+
45
+ This class extends AutoScraper to use our ProxySession for HTTP requests,
46
+ enabling custom proxy headers to be sent during CONNECT tunneling.
47
+
48
+ Args:
49
+ proxy_headers: Dict of headers to send to proxy servers
50
+ stack_list: Initial stack list (rules) for the scraper
51
+
52
+ Example:
53
+ scraper = ProxyAutoScraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
54
+
55
+ result = scraper.build(
56
+ url='https://finance.yahoo.com/quote/AAPL/',
57
+ wanted_list=['Apple Inc.'],
58
+ request_args={'proxies': {'https': 'http://proxy:8080'}}
59
+ )
60
+
61
+ # Use the learned rules on another page
62
+ result = scraper.get_result_similar(
63
+ url='https://finance.yahoo.com/quote/GOOG/',
64
+ request_args={'proxies': {'https': 'http://proxy:8080'}}
65
+ )
66
+ """
67
+
68
+ def __init__(
69
+ self,
70
+ proxy_headers: Optional[Dict[str, str]] = None,
71
+ stack_list: Optional[List] = None
72
+ ):
73
+ super().__init__(stack_list=stack_list)
74
+ self._proxy_headers = proxy_headers or {}
75
+ self._session: Optional[ProxySession] = None
76
+
77
+ def _get_session(self) -> ProxySession:
78
+ """Get or create the ProxySession."""
79
+ if self._session is None:
80
+ self._session = ProxySession(proxy_headers=self._proxy_headers)
81
+ return self._session
82
+
83
+ def set_proxy_headers(self, proxy_headers: Dict[str, str]):
84
+ """
85
+ Update the proxy headers.
86
+
87
+ This will close the current session and create a new one with
88
+ the updated headers on the next request.
89
+
90
+ Args:
91
+ proxy_headers: New proxy headers to use
92
+ """
93
+ self._proxy_headers = proxy_headers
94
+ if self._session is not None:
95
+ self._session.close()
96
+ self._session = None
97
+
98
+ def close(self):
99
+ """Close the underlying session."""
100
+ if self._session is not None:
101
+ self._session.close()
102
+ self._session = None
103
+
104
+ def __enter__(self):
105
+ return self
106
+
107
+ def __exit__(self, *args):
108
+ self.close()
109
+
110
+ @classmethod
111
+ def _fetch_html(cls, url, request_args=None):
112
+ """
113
+ Fetch HTML from URL using the standard requests.
114
+
115
+ Note: This is the class method from parent. For proxy header support,
116
+ use instance methods which use the ProxySession.
117
+ """
118
+ # Fall back to parent implementation for class method calls
119
+ return super()._fetch_html(url, request_args)
120
+
121
+ def _fetch_html_with_proxy(self, url: str, request_args: Optional[Dict] = None) -> str:
122
+ """
123
+ Fetch HTML from URL using ProxySession with proxy header support.
124
+
125
+ Args:
126
+ url: URL to fetch
127
+ request_args: Additional request arguments (proxies, headers, etc.)
128
+
129
+ Returns:
130
+ HTML content as string
131
+ """
132
+ request_args = request_args or {}
133
+
134
+ # Build headers
135
+ headers = dict(self.request_headers)
136
+ if url:
137
+ headers["Host"] = urlparse(url).netloc
138
+
139
+ user_headers = request_args.pop("headers", {})
140
+ headers.update(user_headers)
141
+
142
+ # Use our ProxySession
143
+ session = self._get_session()
144
+
145
+ # Copy session-level settings if not in request_args
146
+ if 'proxies' in request_args:
147
+ session.proxies.update(request_args.pop('proxies'))
148
+
149
+ res = session.get(url, headers=headers, **request_args)
150
+
151
+ # Handle encoding
152
+ if res.encoding == "ISO-8859-1" and "ISO-8859-1" not in res.headers.get(
153
+ "Content-Type", ""
154
+ ):
155
+ res.encoding = res.apparent_encoding
156
+
157
+ return res.text
158
+
159
+ def _get_soup_with_proxy(self, url=None, html=None, request_args=None):
160
+ """
161
+ Get BeautifulSoup object using ProxySession.
162
+
163
+ Args:
164
+ url: URL to fetch (optional if html is provided)
165
+ html: HTML string (optional if url is provided)
166
+ request_args: Additional request arguments
167
+
168
+ Returns:
169
+ BeautifulSoup object
170
+ """
171
+ from html import unescape
172
+ from bs4 import BeautifulSoup
173
+ from autoscraper.utils import normalize
174
+
175
+ if html:
176
+ html = normalize(unescape(html))
177
+ return BeautifulSoup(html, "lxml")
178
+
179
+ html = self._fetch_html_with_proxy(url, request_args)
180
+ html = normalize(unescape(html))
181
+
182
+ return BeautifulSoup(html, "lxml")
183
+
184
+ def build(
185
+ self,
186
+ url: Optional[str] = None,
187
+ wanted_list: Optional[List] = None,
188
+ wanted_dict: Optional[Dict] = None,
189
+ html: Optional[str] = None,
190
+ request_args: Optional[Dict] = None,
191
+ update: bool = False,
192
+ text_fuzz_ratio: float = 1.0,
193
+ ) -> List:
194
+ """
195
+ Build scraping rules with proxy header support.
196
+
197
+ Same as AutoScraper.build() but uses ProxySession for requests.
198
+
199
+ Parameters:
200
+ url: URL of the target web page
201
+ wanted_list: List of needed contents to be scraped
202
+ wanted_dict: Dict of needed contents (keys are aliases)
203
+ html: HTML string (alternative to URL)
204
+ request_args: Request arguments including proxies
205
+ update: If True, add to existing rules
206
+ text_fuzz_ratio: Fuzziness ratio for matching
207
+
208
+ Returns:
209
+ List of similar results
210
+ """
211
+ from html import unescape
212
+ from autoscraper.utils import normalize, unique_hashable, unique_stack_list
213
+
214
+ if not wanted_list and not (wanted_dict and any(wanted_dict.values())):
215
+ raise ValueError("No targets were supplied")
216
+
217
+ # Use our proxy-aware soup getter
218
+ soup = self._get_soup_with_proxy(url=url, html=html, request_args=request_args)
219
+
220
+ result_list = []
221
+
222
+ if update is False:
223
+ self.stack_list = []
224
+
225
+ if wanted_list:
226
+ wanted_dict = {"": wanted_list}
227
+
228
+ wanted_list = []
229
+
230
+ for alias, wanted_items in wanted_dict.items():
231
+ wanted_items = [normalize(w) for w in wanted_items]
232
+ wanted_list += wanted_items
233
+
234
+ for wanted in wanted_items:
235
+ children = self._get_children(soup, wanted, url, text_fuzz_ratio)
236
+
237
+ for child in children:
238
+ result, stack = self._get_result_for_child(child, soup, url)
239
+ stack["alias"] = alias
240
+ result_list += result
241
+ self.stack_list.append(stack)
242
+
243
+ result_list = [item.text for item in result_list]
244
+ result_list = unique_hashable(result_list)
245
+
246
+ self.stack_list = unique_stack_list(self.stack_list)
247
+ return result_list
248
+
249
+ def get_result_similar(
250
+ self,
251
+ url: Optional[str] = None,
252
+ html: Optional[str] = None,
253
+ soup=None,
254
+ request_args: Optional[Dict] = None,
255
+ grouped: bool = False,
256
+ group_by_alias: bool = False,
257
+ unique: Optional[bool] = None,
258
+ attr_fuzz_ratio: float = 1.0,
259
+ keep_blank: bool = False,
260
+ keep_order: bool = False,
261
+ contain_sibling_leaves: bool = False,
262
+ ):
263
+ """
264
+ Get similar results with proxy header support.
265
+
266
+ Same as AutoScraper.get_result_similar() but uses ProxySession.
267
+ """
268
+ if soup is None and url is not None:
269
+ soup = self._get_soup_with_proxy(url=url, html=html, request_args=request_args)
270
+
271
+ return super().get_result_similar(
272
+ url=url,
273
+ html=html,
274
+ soup=soup,
275
+ request_args=None, # Already fetched
276
+ grouped=grouped,
277
+ group_by_alias=group_by_alias,
278
+ unique=unique,
279
+ attr_fuzz_ratio=attr_fuzz_ratio,
280
+ keep_blank=keep_blank,
281
+ keep_order=keep_order,
282
+ contain_sibling_leaves=contain_sibling_leaves,
283
+ )
284
+
285
+ def get_result_exact(
286
+ self,
287
+ url: Optional[str] = None,
288
+ html: Optional[str] = None,
289
+ soup=None,
290
+ request_args: Optional[Dict] = None,
291
+ grouped: bool = False,
292
+ group_by_alias: bool = False,
293
+ unique: Optional[bool] = None,
294
+ attr_fuzz_ratio: float = 1.0,
295
+ keep_blank: bool = False,
296
+ ):
297
+ """
298
+ Get exact results with proxy header support.
299
+
300
+ Same as AutoScraper.get_result_exact() but uses ProxySession.
301
+ """
302
+ if soup is None and url is not None:
303
+ soup = self._get_soup_with_proxy(url=url, html=html, request_args=request_args)
304
+
305
+ return super().get_result_exact(
306
+ url=url,
307
+ html=html,
308
+ soup=soup,
309
+ request_args=None, # Already fetched
310
+ grouped=grouped,
311
+ group_by_alias=group_by_alias,
312
+ unique=unique,
313
+ attr_fuzz_ratio=attr_fuzz_ratio,
314
+ keep_blank=keep_blank,
315
+ )
316
+
317
+ def get_result(
318
+ self,
319
+ url: Optional[str] = None,
320
+ html: Optional[str] = None,
321
+ request_args: Optional[Dict] = None,
322
+ grouped: bool = False,
323
+ group_by_alias: bool = False,
324
+ unique: Optional[bool] = None,
325
+ attr_fuzz_ratio: float = 1.0,
326
+ ):
327
+ """
328
+ Get similar and exact results with proxy header support.
329
+
330
+ Same as AutoScraper.get_result() but uses ProxySession.
331
+ """
332
+ soup = self._get_soup_with_proxy(url=url, html=html, request_args=request_args)
333
+
334
+ args = dict(
335
+ url=url,
336
+ soup=soup,
337
+ grouped=grouped,
338
+ group_by_alias=group_by_alias,
339
+ unique=unique,
340
+ attr_fuzz_ratio=attr_fuzz_ratio,
341
+ )
342
+ similar = self.get_result_similar(**args)
343
+ exact = self.get_result_exact(**args)
344
+ return similar, exact
@@ -0,0 +1,213 @@
1
+ """
2
+ CloudScraper extension for sending and receiving proxy headers.
3
+
4
+ This module provides a CloudScraper subclass that enables:
5
+ 1. Sending custom headers to proxy servers during CONNECT
6
+ 2. Capturing response headers from proxy servers
7
+
8
+ Example usage:
9
+ from python_proxy_headers.cloudscraper_proxy import create_scraper
10
+
11
+ scraper = create_scraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
12
+ scraper.proxies = {'https': 'http://proxy:8080'}
13
+ response = scraper.get('https://example.com')
14
+
15
+ # Access proxy response headers (stored on the response object)
16
+ print(response.proxy_headers)
17
+ """
18
+
19
+ from typing import Dict, Optional, Any
20
+
21
+ try:
22
+ import cloudscraper
23
+ from cloudscraper import CipherSuiteAdapter
24
+ except ImportError:
25
+ raise ImportError(
26
+ "cloudscraper is required for this module. "
27
+ "Install it with: pip install cloudscraper"
28
+ )
29
+
30
+ from .urllib3_proxy_manager import proxy_from_url
31
+
32
+
33
+ class CipherSuiteProxyHeaderAdapter(CipherSuiteAdapter):
34
+ """
35
+ Combines CloudScraper's CipherSuiteAdapter with proxy header support.
36
+
37
+ This adapter:
38
+ - Maintains CloudScraper's TLS/cipher suite customization
39
+ - Adds the ability to send custom headers to proxy servers
40
+ - Uses our custom ProxyManager that captures proxy response headers
41
+ """
42
+
43
+ def __init__(self, proxy_headers: Optional[Dict[str, str]] = None, **kwargs):
44
+ self._proxy_headers = proxy_headers or {}
45
+ super().__init__(**kwargs)
46
+
47
+ def proxy_manager_for(self, proxy, **proxy_kwargs):
48
+ """
49
+ Return a ProxyManager for the given proxy with custom header support.
50
+
51
+ Overrides the default proxy_manager_for to use our custom ProxyManager
52
+ that supports sending and receiving proxy headers.
53
+ """
54
+ if proxy in self.proxy_manager:
55
+ manager = self.proxy_manager[proxy]
56
+ elif proxy.lower().startswith("socks"):
57
+ # SOCKS proxies don't support custom headers
58
+ return super().proxy_manager_for(proxy, **proxy_kwargs)
59
+ else:
60
+ # Get standard proxy headers (e.g., Proxy-Authorization)
61
+ _proxy_headers = self.proxy_headers(proxy)
62
+
63
+ # Merge with our custom proxy headers
64
+ if self._proxy_headers:
65
+ _proxy_headers.update(self._proxy_headers)
66
+
67
+ # Pass SSL context if available
68
+ if hasattr(self, 'ssl_context') and self.ssl_context:
69
+ proxy_kwargs['ssl_context'] = self.ssl_context
70
+
71
+ if hasattr(self, 'source_address') and self.source_address:
72
+ proxy_kwargs['source_address'] = self.source_address
73
+
74
+ manager = self.proxy_manager[proxy] = proxy_from_url(
75
+ proxy,
76
+ proxy_headers=_proxy_headers,
77
+ num_pools=self._pool_connections,
78
+ maxsize=self._pool_maxsize,
79
+ block=self._pool_block,
80
+ **proxy_kwargs,
81
+ )
82
+
83
+ return manager
84
+
85
+
86
+ class ProxyCloudScraper(cloudscraper.CloudScraper):
87
+ """
88
+ CloudScraper with proxy header support.
89
+
90
+ This class extends CloudScraper to add the ability to:
91
+ - Send custom headers to proxy servers during CONNECT tunneling
92
+ - Receive and access headers from proxy server responses
93
+
94
+ Args:
95
+ proxy_headers: Dict of headers to send to proxy servers
96
+ **kwargs: All other arguments passed to CloudScraper
97
+
98
+ Example:
99
+ scraper = ProxyCloudScraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
100
+ scraper.proxies = {'https': 'http://proxy.example.com:8080'}
101
+ response = scraper.get('https://httpbin.org/ip')
102
+ print(response.proxy_headers) # Headers from proxy CONNECT response
103
+ """
104
+
105
+ def __init__(self, proxy_headers: Optional[Dict[str, str]] = None, **kwargs):
106
+ self._proxy_headers = proxy_headers or {}
107
+
108
+ # Call parent init
109
+ super().__init__(**kwargs)
110
+
111
+ # Replace the HTTPS adapter with our proxy-header-aware version
112
+ # We need to preserve the cipher suite settings from the parent
113
+ self.mount(
114
+ 'https://',
115
+ CipherSuiteProxyHeaderAdapter(
116
+ proxy_headers=self._proxy_headers,
117
+ cipherSuite=self.cipherSuite,
118
+ ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'),
119
+ server_hostname=getattr(self, 'server_hostname', None),
120
+ source_address=getattr(self, 'source_address', None),
121
+ ssl_context=getattr(self, 'ssl_context', None)
122
+ )
123
+ )
124
+
125
+ # Also mount for HTTP (though proxy headers are mainly for HTTPS CONNECT)
126
+ self.mount(
127
+ 'http://',
128
+ CipherSuiteProxyHeaderAdapter(
129
+ proxy_headers=self._proxy_headers,
130
+ cipherSuite=self.cipherSuite,
131
+ ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'),
132
+ server_hostname=getattr(self, 'server_hostname', None),
133
+ source_address=getattr(self, 'source_address', None),
134
+ ssl_context=getattr(self, 'ssl_context', None)
135
+ )
136
+ )
137
+
138
+ def set_proxy_headers(self, proxy_headers: Dict[str, str]):
139
+ """
140
+ Update the proxy headers and remount adapters.
141
+
142
+ Args:
143
+ proxy_headers: New proxy headers to use
144
+ """
145
+ self._proxy_headers = proxy_headers
146
+
147
+ # Remount adapters with new headers
148
+ self.mount(
149
+ 'https://',
150
+ CipherSuiteProxyHeaderAdapter(
151
+ proxy_headers=self._proxy_headers,
152
+ cipherSuite=self.cipherSuite,
153
+ ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'),
154
+ server_hostname=getattr(self, 'server_hostname', None),
155
+ source_address=getattr(self, 'source_address', None),
156
+ ssl_context=getattr(self, 'ssl_context', None)
157
+ )
158
+ )
159
+ self.mount(
160
+ 'http://',
161
+ CipherSuiteProxyHeaderAdapter(
162
+ proxy_headers=self._proxy_headers,
163
+ cipherSuite=self.cipherSuite,
164
+ ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'),
165
+ server_hostname=getattr(self, 'server_hostname', None),
166
+ source_address=getattr(self, 'source_address', None),
167
+ ssl_context=getattr(self, 'ssl_context', None)
168
+ )
169
+ )
170
+
171
+
172
+ def create_scraper(
173
+ proxy_headers: Optional[Dict[str, str]] = None,
174
+ sess: Optional[Any] = None,
175
+ **kwargs
176
+ ) -> ProxyCloudScraper:
177
+ """
178
+ Create a CloudScraper with proxy header support.
179
+
180
+ This is a drop-in replacement for cloudscraper.create_scraper() that
181
+ adds proxy header capabilities.
182
+
183
+ Args:
184
+ proxy_headers: Dict of headers to send to proxy servers
185
+ sess: Existing session to copy attributes from
186
+ **kwargs: All other arguments passed to CloudScraper
187
+
188
+ Returns:
189
+ ProxyCloudScraper instance
190
+
191
+ Example:
192
+ from python_proxy_headers.cloudscraper_proxy import create_scraper
193
+
194
+ scraper = create_scraper(
195
+ proxy_headers={'X-ProxyMesh-Country': 'US'},
196
+ browser='chrome'
197
+ )
198
+ scraper.proxies = {'https': 'http://proxy:8080'}
199
+ response = scraper.get('https://example.com')
200
+ """
201
+ scraper = ProxyCloudScraper(proxy_headers=proxy_headers, **kwargs)
202
+
203
+ if sess:
204
+ for attr in ['auth', 'cert', 'cookies', 'headers', 'hooks', 'params', 'proxies', 'data']:
205
+ val = getattr(sess, attr, None)
206
+ if val is not None:
207
+ setattr(scraper, attr, val)
208
+
209
+ return scraper
210
+
211
+
212
+ # Convenience alias
213
+ session = create_scraper
@@ -0,0 +1,379 @@
1
+ """
2
+ PycURL extension for sending and receiving proxy headers.
3
+
4
+ This module provides helper functions and classes for working with proxy headers
5
+ in pycurl. It can be used in two ways:
6
+
7
+ 1. Low-level helpers for existing pycurl code:
8
+
9
+ import pycurl
10
+ from python_proxy_headers.pycurl_proxy import set_proxy_headers, HeaderCapture
11
+
12
+ c = pycurl.Curl()
13
+ c.setopt(pycurl.URL, 'https://example.com')
14
+ c.setopt(pycurl.PROXY, 'http://proxy:8080')
15
+
16
+ # Add proxy headers
17
+ set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'})
18
+
19
+ # Capture response headers (including proxy CONNECT headers)
20
+ capture = HeaderCapture(c)
21
+
22
+ c.perform()
23
+
24
+ print(capture.proxy_headers) # Headers from proxy CONNECT response
25
+ print(capture.origin_headers) # Headers from origin server
26
+
27
+ 2. High-level convenience functions:
28
+
29
+ from python_proxy_headers.pycurl_proxy import get
30
+
31
+ response = get('https://example.com',
32
+ proxy='http://proxy:8080',
33
+ proxy_headers={'X-ProxyMesh-Country': 'US'})
34
+ print(response.proxy_headers)
35
+ """
36
+
37
+ from io import BytesIO
38
+ from dataclasses import dataclass, field
39
+ from typing import Dict, List, Optional, Tuple
40
+
41
+ try:
42
+ import pycurl
43
+ except ImportError:
44
+ raise ImportError(
45
+ "pycurl is required for this module. "
46
+ "Install it with: pip install pycurl"
47
+ )
48
+
49
+
50
+ # =============================================================================
51
+ # Low-level helper functions
52
+ # =============================================================================
53
+
54
+ def set_proxy_headers(curl, headers: Dict[str, str]) -> None:
55
+ """
56
+ Set custom headers to send to the proxy server during CONNECT.
57
+
58
+ Args:
59
+ curl: A pycurl.Curl instance
60
+ headers: Dict of headers to send to the proxy
61
+
62
+ Example:
63
+ c = pycurl.Curl()
64
+ c.setopt(pycurl.PROXY, 'http://proxy:8080')
65
+ set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'})
66
+ c.perform()
67
+ """
68
+ if not headers:
69
+ return
70
+
71
+ header_list = [f"{k}: {v}" for k, v in headers.items()]
72
+
73
+ # Set CURLOPT_PROXYHEADER
74
+ try:
75
+ curl.setopt(pycurl.PROXYHEADER, header_list)
76
+ except AttributeError:
77
+ # Fallback to numeric option (10228) if not exposed
78
+ curl.setopt(10228, header_list)
79
+
80
+ # Set CURLOPT_HEADEROPT to CURLHEADER_SEPARATE so proxy headers
81
+ # are only sent to the proxy, not the origin
82
+ try:
83
+ curl.setopt(pycurl.HEADEROPT, pycurl.HEADER_SEPARATE)
84
+ except AttributeError:
85
+ try:
86
+ curl.setopt(229, 1) # CURLOPT_HEADEROPT = 229, CURLHEADER_SEPARATE = 1
87
+ except pycurl.error:
88
+ pass # Option may not be available in older libcurl versions
89
+
90
+
91
+ class HeaderCapture:
92
+ """
93
+ Captures and parses HTTP response headers from pycurl requests.
94
+
95
+ For HTTPS requests through a proxy, this separates:
96
+ - proxy_headers: Headers from the proxy's CONNECT response
97
+ - origin_headers: Headers from the origin server's response
98
+
99
+ Example:
100
+ c = pycurl.Curl()
101
+ c.setopt(pycurl.URL, 'https://example.com')
102
+ c.setopt(pycurl.PROXY, 'http://proxy:8080')
103
+
104
+ capture = HeaderCapture(c) # Installs HEADERFUNCTION callback
105
+
106
+ c.perform()
107
+
108
+ print(capture.proxy_headers) # {'X-ProxyMesh-IP': '1.2.3.4', ...}
109
+ print(capture.origin_headers) # {'Content-Type': 'text/html', ...}
110
+ print(capture.proxy_status) # 200
111
+ """
112
+
113
+ def __init__(self, curl=None):
114
+ """
115
+ Initialize header capture.
116
+
117
+ Args:
118
+ curl: Optional pycurl.Curl instance. If provided, automatically
119
+ installs the HEADERFUNCTION callback.
120
+ """
121
+ self._header_lines: List[bytes] = []
122
+ self._parsed = False
123
+ self._sections: List[Tuple[Optional[int], Dict[str, str]]] = []
124
+
125
+ if curl is not None:
126
+ self.install(curl)
127
+
128
+ def install(self, curl) -> 'HeaderCapture':
129
+ """
130
+ Install the header callback on a pycurl.Curl instance.
131
+
132
+ Args:
133
+ curl: A pycurl.Curl instance
134
+
135
+ Returns:
136
+ self, for chaining
137
+ """
138
+ curl.setopt(pycurl.HEADERFUNCTION, self._header_callback)
139
+ return self
140
+
141
+ def _header_callback(self, header_line: bytes) -> int:
142
+ """Callback for pycurl HEADERFUNCTION."""
143
+ self._header_lines.append(header_line)
144
+ self._parsed = False # Invalidate cache
145
+ return len(header_line)
146
+
147
+ def _parse(self) -> None:
148
+ """Parse collected header lines into sections."""
149
+ if self._parsed:
150
+ return
151
+
152
+ self._sections = []
153
+ current_headers: Dict[str, str] = {}
154
+ current_status: Optional[int] = None
155
+
156
+ for line in self._header_lines:
157
+ line_str = line.decode('utf-8', errors='replace').strip()
158
+
159
+ if line_str.startswith('HTTP/'):
160
+ # New response section - save previous if exists
161
+ if current_headers or current_status is not None:
162
+ self._sections.append((current_status, current_headers))
163
+ current_headers = {}
164
+ # Parse status line: HTTP/1.1 200 OK
165
+ parts = line_str.split(' ', 2)
166
+ if len(parts) >= 2:
167
+ try:
168
+ current_status = int(parts[1])
169
+ except ValueError:
170
+ current_status = None
171
+ else:
172
+ current_status = None
173
+ elif ':' in line_str:
174
+ key, value = line_str.split(':', 1)
175
+ current_headers[key.strip()] = value.strip()
176
+
177
+ # Don't forget the last section
178
+ if current_headers or current_status is not None:
179
+ self._sections.append((current_status, current_headers))
180
+
181
+ self._parsed = True
182
+
183
+ def reset(self) -> None:
184
+ """Clear captured headers for reuse."""
185
+ self._header_lines.clear()
186
+ self._sections.clear()
187
+ self._parsed = False
188
+
189
+ @property
190
+ def proxy_headers(self) -> Dict[str, str]:
191
+ """
192
+ Headers from the proxy's CONNECT response.
193
+
194
+ Returns empty dict if not an HTTPS-via-proxy request or no headers captured.
195
+ """
196
+ self._parse()
197
+ if len(self._sections) >= 2:
198
+ return self._sections[0][1]
199
+ return {}
200
+
201
+ @property
202
+ def proxy_status(self) -> Optional[int]:
203
+ """
204
+ Status code from the proxy's CONNECT response.
205
+
206
+ Returns None if not an HTTPS-via-proxy request.
207
+ """
208
+ self._parse()
209
+ if len(self._sections) >= 2:
210
+ return self._sections[0][0]
211
+ return None
212
+
213
+ @property
214
+ def origin_headers(self) -> Dict[str, str]:
215
+ """Headers from the origin server's response."""
216
+ self._parse()
217
+ if self._sections:
218
+ return self._sections[-1][1]
219
+ return {}
220
+
221
+ @property
222
+ def origin_status(self) -> Optional[int]:
223
+ """Status code from the origin server's response."""
224
+ self._parse()
225
+ if self._sections:
226
+ return self._sections[-1][0]
227
+ return None
228
+
229
+ @property
230
+ def all_headers(self) -> Dict[str, str]:
231
+ """All headers merged (proxy headers take precedence for conflicts)."""
232
+ self._parse()
233
+ merged = {}
234
+ for _, headers in self._sections:
235
+ merged.update(headers)
236
+ return merged
237
+
238
+
239
+ # =============================================================================
240
+ # High-level convenience API
241
+ # =============================================================================
242
+
243
+ @dataclass
244
+ class Response:
245
+ """Response object from high-level API."""
246
+ status_code: int
247
+ headers: Dict[str, str]
248
+ content: bytes
249
+ proxy_headers: Dict[str, str] = field(default_factory=dict)
250
+ proxy_status: Optional[int] = None
251
+
252
+ @property
253
+ def text(self) -> str:
254
+ """Response body as text."""
255
+ return self.content.decode('utf-8', errors='replace')
256
+
257
+ def raise_for_status(self) -> None:
258
+ """Raise exception if status code indicates error."""
259
+ if self.status_code >= 400:
260
+ raise Exception(f"HTTP Error {self.status_code}")
261
+
262
+
263
+ def request(
264
+ method: str,
265
+ url: str,
266
+ proxy: Optional[str] = None,
267
+ proxy_headers: Optional[Dict[str, str]] = None,
268
+ headers: Optional[Dict[str, str]] = None,
269
+ data: Optional[bytes] = None,
270
+ timeout: Optional[int] = None,
271
+ verify: bool = True,
272
+ ) -> Response:
273
+ """
274
+ Make an HTTP request with proxy header support.
275
+
276
+ Args:
277
+ method: HTTP method (GET, POST, etc.)
278
+ url: Target URL
279
+ proxy: Proxy URL (e.g., 'http://user:pass@proxy:8080')
280
+ proxy_headers: Headers to send to the proxy
281
+ headers: Headers to send to the origin server
282
+ data: Request body for POST/PUT/PATCH
283
+ timeout: Request timeout in seconds
284
+ verify: Whether to verify SSL certificates
285
+
286
+ Returns:
287
+ Response object with body, headers, and proxy_headers
288
+ """
289
+ c = pycurl.Curl()
290
+ body = BytesIO()
291
+ capture = HeaderCapture(c)
292
+
293
+ try:
294
+ c.setopt(pycurl.URL, url)
295
+ c.setopt(pycurl.WRITEFUNCTION, body.write)
296
+
297
+ # HTTP method
298
+ method = method.upper()
299
+ if method == 'GET':
300
+ c.setopt(pycurl.HTTPGET, 1)
301
+ elif method == 'POST':
302
+ c.setopt(pycurl.POST, 1)
303
+ if data:
304
+ c.setopt(pycurl.POSTFIELDS, data)
305
+ elif method == 'PUT':
306
+ c.setopt(pycurl.CUSTOMREQUEST, 'PUT')
307
+ if data:
308
+ c.setopt(pycurl.POSTFIELDS, data)
309
+ elif method == 'DELETE':
310
+ c.setopt(pycurl.CUSTOMREQUEST, 'DELETE')
311
+ elif method == 'HEAD':
312
+ c.setopt(pycurl.NOBODY, 1)
313
+ elif method == 'PATCH':
314
+ c.setopt(pycurl.CUSTOMREQUEST, 'PATCH')
315
+ if data:
316
+ c.setopt(pycurl.POSTFIELDS, data)
317
+ else:
318
+ c.setopt(pycurl.CUSTOMREQUEST, method)
319
+
320
+ # Request headers
321
+ if headers:
322
+ c.setopt(pycurl.HTTPHEADER, [f"{k}: {v}" for k, v in headers.items()])
323
+
324
+ # Proxy
325
+ if proxy:
326
+ c.setopt(pycurl.PROXY, proxy)
327
+ if proxy_headers:
328
+ set_proxy_headers(c, proxy_headers)
329
+
330
+ # Timeout
331
+ if timeout:
332
+ c.setopt(pycurl.TIMEOUT, timeout)
333
+
334
+ # SSL
335
+ if not verify:
336
+ c.setopt(pycurl.SSL_VERIFYPEER, 0)
337
+ c.setopt(pycurl.SSL_VERIFYHOST, 0)
338
+
339
+ c.perform()
340
+
341
+ return Response(
342
+ status_code=c.getinfo(pycurl.RESPONSE_CODE),
343
+ headers=capture.origin_headers,
344
+ content=body.getvalue(),
345
+ proxy_headers=capture.proxy_headers,
346
+ proxy_status=capture.proxy_status,
347
+ )
348
+ finally:
349
+ c.close()
350
+
351
+
352
+ def get(url: str, **kwargs) -> Response:
353
+ """Make a GET request."""
354
+ return request('GET', url, **kwargs)
355
+
356
+
357
+ def post(url: str, **kwargs) -> Response:
358
+ """Make a POST request."""
359
+ return request('POST', url, **kwargs)
360
+
361
+
362
+ def put(url: str, **kwargs) -> Response:
363
+ """Make a PUT request."""
364
+ return request('PUT', url, **kwargs)
365
+
366
+
367
+ def delete(url: str, **kwargs) -> Response:
368
+ """Make a DELETE request."""
369
+ return request('DELETE', url, **kwargs)
370
+
371
+
372
+ def head(url: str, **kwargs) -> Response:
373
+ """Make a HEAD request."""
374
+ return request('HEAD', url, **kwargs)
375
+
376
+
377
+ def patch(url: str, **kwargs) -> Response:
378
+ """Make a PATCH request."""
379
+ return request('PATCH', url, **kwargs)
@@ -0,0 +1,176 @@
1
+ Metadata-Version: 2.4
2
+ Name: python-proxy-headers
3
+ Version: 0.2.0
4
+ Summary: Handle custom proxy headers for http & https requests in various python libraries
5
+ Author-email: ProxyMesh <support@proxymesh.com>
6
+ Project-URL: Homepage, https://github.com/proxymesh/python-proxy-headers
7
+ Project-URL: Changelog, https://github.com/proxymesh/python-proxy-headers/commits/main/
8
+ Project-URL: Issues, https://github.com/proxymesh/python-proxy-headers/issues
9
+ Project-URL: Documentation, https://python-proxy-headers.readthedocs.io/en/latest/
10
+ Project-URL: ProxyMesh, https://proxymesh.com
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: License :: OSI Approved :: BSD License
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Topic :: Internet :: WWW/HTTP
16
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
17
+ Requires-Python: >=3.8
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Dynamic: license-file
21
+
22
+ # Python Proxy Headers
23
+
24
+ [![Documentation Status](https://readthedocs.org/projects/python-proxy-headers/badge/?version=latest)](https://python-proxy-headers.readthedocs.io/en/latest/?badge=latest)
25
+ [![PyPI version](https://badge.fury.io/py/python-proxy-headers.svg)](https://badge.fury.io/py/python-proxy-headers)
26
+
27
+ Extensions for Python HTTP libraries to support **sending and receiving custom proxy headers** during HTTPS CONNECT tunneling.
28
+
29
+ ## The Problem
30
+
31
+ When making HTTPS requests through a proxy, the connection is established via a CONNECT tunnel. During this process:
32
+
33
+ 1. **Sending headers to the proxy** - Most Python HTTP libraries don't provide an easy way to send custom headers (like `X-ProxyMesh-Country`) to the proxy server during the CONNECT handshake.
34
+
35
+ 2. **Receiving headers from the proxy** - The proxy's response headers from the CONNECT request are typically discarded, making it impossible to read custom headers (like `X-ProxyMesh-IP`) that the proxy sends back.
36
+
37
+ This library solves both problems for popular Python HTTP libraries.
38
+
39
+ ## Supported Libraries
40
+
41
+ | Library | Module | Use Case |
42
+ |---------|--------|----------|
43
+ | [urllib3](https://python-proxy-headers.readthedocs.io/en/latest/urllib3.html) | `urllib3_proxy_manager` | Low-level HTTP client |
44
+ | [requests](https://python-proxy-headers.readthedocs.io/en/latest/requests.html) | `requests_adapter` | Simple HTTP requests |
45
+ | [aiohttp](https://python-proxy-headers.readthedocs.io/en/latest/aiohttp.html) | `aiohttp_proxy` | Async HTTP client |
46
+ | [httpx](https://python-proxy-headers.readthedocs.io/en/latest/httpx.html) | `httpx_proxy` | Modern HTTP client |
47
+ | [pycurl](https://python-proxy-headers.readthedocs.io/en/latest/pycurl.html) | `pycurl_proxy` | libcurl bindings |
48
+ | [cloudscraper](https://python-proxy-headers.readthedocs.io/en/latest/cloudscraper.html) | `cloudscraper_proxy` | Cloudflare bypass |
49
+ | [autoscraper](https://python-proxy-headers.readthedocs.io/en/latest/autoscraper.html) | `autoscraper_proxy` | Automatic web scraping |
50
+
51
+ ## Installation
52
+
53
+ ```bash
54
+ pip install python-proxy-headers
55
+ ```
56
+
57
+ Then install the HTTP library you want to use (e.g., `pip install requests`).
58
+
59
+ > **Note:** This package has no dependencies by default - install only what you need.
60
+
61
+ ## Quick Start
62
+
63
+ ### requests
64
+
65
+ ```python
66
+ from python_proxy_headers.requests_adapter import ProxySession
67
+
68
+ with ProxySession(proxy_headers={'X-ProxyMesh-Country': 'US'}) as session:
69
+ session.proxies = {'https': 'http://user:pass@proxy.example.com:8080'}
70
+ response = session.get('https://httpbin.org/ip')
71
+
72
+ # Proxy headers are merged into response.headers
73
+ print(response.headers.get('X-ProxyMesh-IP'))
74
+ ```
75
+
76
+ ### httpx
77
+
78
+ ```python
79
+ from python_proxy_headers.httpx_proxy import get
80
+
81
+ response = get(
82
+ 'https://httpbin.org/ip',
83
+ proxy='http://user:pass@proxy.example.com:8080'
84
+ )
85
+
86
+ # Proxy CONNECT response headers are merged into response.headers
87
+ print(response.headers.get('X-ProxyMesh-IP'))
88
+ ```
89
+
90
+ ### aiohttp
91
+
92
+ ```python
93
+ import asyncio
94
+ from python_proxy_headers.aiohttp_proxy import ProxyClientSession
95
+
96
+ async def main():
97
+ async with ProxyClientSession() as session:
98
+ async with session.get(
99
+ 'https://httpbin.org/ip',
100
+ proxy='http://user:pass@proxy.example.com:8080'
101
+ ) as response:
102
+ # Proxy headers merged into response.headers
103
+ print(response.headers.get('X-ProxyMesh-IP'))
104
+
105
+ asyncio.run(main())
106
+ ```
107
+
108
+ ### pycurl (low-level)
109
+
110
+ ```python
111
+ import pycurl
112
+ from python_proxy_headers.pycurl_proxy import set_proxy_headers, HeaderCapture
113
+
114
+ c = pycurl.Curl()
115
+ c.setopt(pycurl.URL, 'https://httpbin.org/ip')
116
+ c.setopt(pycurl.PROXY, 'http://proxy.example.com:8080')
117
+
118
+ # Add these two lines to any existing pycurl code
119
+ set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'})
120
+ capture = HeaderCapture(c)
121
+
122
+ c.perform()
123
+
124
+ print(capture.proxy_headers) # Headers from proxy CONNECT response
125
+ c.close()
126
+ ```
127
+
128
+ ### cloudscraper
129
+
130
+ ```python
131
+ from python_proxy_headers.cloudscraper_proxy import create_scraper
132
+
133
+ # Drop-in replacement for cloudscraper.create_scraper()
134
+ scraper = create_scraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
135
+ scraper.proxies = {'https': 'http://proxy.example.com:8080'}
136
+
137
+ response = scraper.get('https://example.com')
138
+ # All CloudScraper features (Cloudflare bypass) preserved
139
+ ```
140
+
141
+ ## Testing
142
+
143
+ A test harness is included to verify proxy header functionality:
144
+
145
+ ```bash
146
+ # Set your proxy
147
+ export PROXY_URL='http://user:pass@proxy.example.com:8080'
148
+
149
+ # Test all modules
150
+ python test_proxy_headers.py
151
+
152
+ # Test specific modules
153
+ python test_proxy_headers.py requests httpx
154
+
155
+ # Verbose output (show header values)
156
+ python test_proxy_headers.py -v
157
+ ```
158
+
159
+ ## Documentation
160
+
161
+ For detailed documentation, API reference, and more examples:
162
+
163
+ - **Full Documentation:** [python-proxy-headers.readthedocs.io](https://python-proxy-headers.readthedocs.io/en/latest/)
164
+ - **Example Code:** [proxy-examples for Python](https://github.com/proxymesh/proxy-examples/tree/main/python)
165
+
166
+ ## Related Projects
167
+
168
+ - **[scrapy-proxy-headers](https://github.com/proxymesh/scrapy-proxy-headers)** - Proxy header support for Scrapy
169
+
170
+ ## About
171
+
172
+ Created by [ProxyMesh](https://proxymesh.com) to help our customers use custom headers to control proxy behavior. Works with any proxy that supports custom headers.
173
+
174
+ ## License
175
+
176
+ MIT License
@@ -0,0 +1,13 @@
1
+ python_proxy_headers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ python_proxy_headers/aiohttp_proxy.py,sha256=OyEruj3CCrFmlEikmmOXGQsVJnrAfK-TMzycP-sLH_Y,4920
3
+ python_proxy_headers/autoscraper_proxy.py,sha256=g51K71xOah_uNtMIXEXggtd6T8ol7vAukg7oaWVvhEA,11342
4
+ python_proxy_headers/cloudscraper_proxy.py,sha256=6xF58QywHV4IYlC_KtXIWQTXZjE8CeEa5-GfqcSGZ3U,7780
5
+ python_proxy_headers/httpx_proxy.py,sha256=Q8tDlfl4u3fjsZZDA1zj1pbN7fiAwJ3ctFGojbqtaEo,11829
6
+ python_proxy_headers/pycurl_proxy.py,sha256=_JAt_o5gX7o0sRLOQXG3421mEehzBm-rhUl4c9IG5HQ,11738
7
+ python_proxy_headers/requests_adapter.py,sha256=CfGEEYc0eaKKNA11VbhZzv9qFVEL1uMSzAUNvidIyEU,2313
8
+ python_proxy_headers/urllib3_proxy_manager.py,sha256=FG6keO1ENBhVQLmnUkwVKpdC3U9xEXNxKGy9_7xjd_s,4399
9
+ python_proxy_headers-0.2.0.dist-info/licenses/LICENSE,sha256=i_H6fvudjqZOKkZFCIHGmbPHIxszjCAT90UJSY2OM0U,1066
10
+ python_proxy_headers-0.2.0.dist-info/METADATA,sha256=21O32NtwCvq62LY8iLr7mOi01OK-9mDWqdkYR1_Pscs,6288
11
+ python_proxy_headers-0.2.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
12
+ python_proxy_headers-0.2.0.dist-info/top_level.txt,sha256=Bhm-Qc6vn0DAd2Li8ZBn6KLkls2zJnM4-CtXHvIiRh8,21
13
+ python_proxy_headers-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (82.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,151 +0,0 @@
1
- Metadata-Version: 2.2
2
- Name: python-proxy-headers
3
- Version: 0.1.0
4
- Summary: Handle custom proxy headers for http requests in various python libraries
5
- Author-email: ProxyMesh <support@proxymesh.com>
6
- Project-URL: Homepage, https://github.com/proxymesh/python-proxy-headers
7
- Project-URL: Changelog, https://github.com/proxymesh/python-proxy-headers/commits/main/
8
- Project-URL: Issues, https://github.com/proxymesh/python-proxy-headers/issues
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: Operating System :: OS Independent
11
- Classifier: License :: OSI Approved :: BSD License
12
- Classifier: Intended Audience :: Developers
13
- Classifier: Topic :: Internet :: WWW/HTTP
14
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
15
- Requires-Python: >=3.8
16
- Description-Content-Type: text/markdown
17
- License-File: LICENSE
18
-
19
- # Python Proxy Headers
20
-
21
- The `python-proxy-headers` package provides support for handling custom proxy headers when making HTTPS requests in various python modules.
22
-
23
- We currently provide extensions to the following packages:
24
-
25
- * [urllib3](https://urllib3.readthedocs.io/en/stable/)
26
- * [requests](https://docs.python-requests.org/en/latest/index.html)
27
- * [aiohttp](https://docs.aiohttp.org/en/stable/index.html)
28
- * [httpx](https://www.python-httpx.org/)
29
-
30
- None of these modules provide good support for parsing custom response headers from proxy servers. And some of them make it hard to send custom headers to proxy servers. So we at [ProxyMesh](https://proxymesh.com) made these extension modules to support our customers that use Python and want to use custom headers to control our proxy behavior. But these modules can work for handling custom headers with any proxy.
31
-
32
- *If you are looking for [Scrapy](https://scrapy.org/) support, please see our [scrapy-proxy-headers](https://github.com/proxymesh/scrapy-proxy-headers) project.*
33
-
34
- ## Installation
35
-
36
- Examples for how to use these extension modules are described below. You must first do the following:
37
-
38
- 1. `pip install python-proxy-headers`
39
- 2. Install the appropriate package based on the python module you want to use.
40
-
41
- This package does not have any dependencies because we don't know which module you want to use.
42
-
43
- You can also find more example code in our [proxy-examples for python](https://github.com/proxymesh/proxy-examples/tree/main/python).
44
-
45
- ## urllib3
46
-
47
- If you just want to send custom proxy headers, but don't need to receive proxy response headers, then you can [urllib3.ProxyManager](https://urllib3.readthedocs.io/en/stable/reference/urllib3.poolmanager.html#urllib3.ProxyManager), like so:
48
-
49
- ``` python
50
- import urllib3
51
- proxy = urllib3.ProxyManager('http://PROXYHOST:PORT', proxy_headers={'X-ProxyMesh-Country': 'US'})
52
- r = proxy.request('GET', 'https://api.ipify.org?format=json')
53
- ```
54
-
55
- Note that when using this method, if you keep reusing the same `ProxyManager` instance, you may be re-using the proxy connection, which may have different behavior than if you create a new proxy connection for each request. For example, with ProxyMesh you may keep getting the same IP address if you reuse the proxy connection.
56
-
57
- To get proxy response headers, use our extension module like this:
58
-
59
- ``` python
60
- from python_proxy_headers import urllib3_proxy_manager
61
- proxy = urllib3_proxy_manager.ProxyHeaderManager('http://PROXYHOST:PORT')
62
- r = proxy.request('GET', 'https://api.ipify.org?format=json')
63
- r.headers['X-ProxyMesh-IP']
64
- ```
65
-
66
- You can also pass `proxy_headers` into our `ProxyHeaderManager` as well. For example, you can pass back the same `X-ProxyMesh-IP` header to ensure you get the same IP address on subsequent requests.
67
-
68
- ## requests
69
-
70
- The requests adapter builds on our `urllib3_proxy_manager` module to make it easy to pass in proxy headers and receive proxy response headers.
71
-
72
- ``` python
73
- from python_proxy_headers import requests_adapter
74
- r = requests_adapter.get('https://api.ipify.org?format=json', proxies={'http': 'http://PROXYHOST:PORT', 'https': 'http://PROXYHOST:PORT'}, proxy_headers={'X-ProxyMesh-Country': 'US'})
75
- r.headers['X-ProxyMesh-IP']
76
- ```
77
-
78
- The `requests_adapter` module supports all the standard requests methods: `get`, `post`, `put`, `delete`, etc.
79
-
80
- ## aiohttp
81
-
82
- While it's not documented, aiohttp does support passing in custom proxy headers by default.
83
-
84
- ``` python
85
- import aiohttp
86
- async with aiohttp.ClientSession() as session:
87
- async with session.get('https://api.ipify.org?format=json', proxy="http://PROXYHOST:PORT", proxy_headers={'X-ProxyMesh-Country': 'US'}) as r:
88
- await r.text()
89
- ```
90
-
91
- However, if you want to get proxy response, you should use our extension module:
92
-
93
- ``` python
94
- from python_proxy_headers import aiohttp_proxy
95
- async with aiohttp_proxy.ProxyClientSession() as session:
96
- async with session.get('https://api.ipify.org?format=json', proxy="http://PROXYHOST:PORT", proxy_headers={'X-ProxyMesh-Country': 'US'}) as r:
97
- await r.text()
98
-
99
- r.headers['X-ProxyMesh-IP']
100
- ```
101
-
102
- ## httpx
103
-
104
- httpx also supports proxy headers by default, though it's not documented:
105
-
106
- ``` python
107
- import httpx
108
- proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
109
- transport = HTTPProxyTransport(proxy=proxy)
110
- with httpx.Client(mounts={'http://': transort, 'https://': transport}) as client:
111
- r = client.get('https://api.ipify.org?format=json')
112
- ```
113
-
114
- But to get the response headers, you need to use our extension module:
115
-
116
- ``` python
117
- import httpx
118
- from python_proxy_headers.httpx_proxy import HTTPProxyTransport
119
- proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
120
- transport = HTTPProxyTransport(proxy=proxy)
121
- with httpx.Client(mounts={'http://': transort, 'https://': transport}) as client:
122
- r = client.get('https://api.ipify.org?format=json')
123
-
124
- r.headers['X-ProxyMesh-IP']
125
- ```
126
-
127
- This module also provide helper methods similar to requests:
128
-
129
- ``` python
130
- import httpx
131
- from python_proxy_headers import httpx_proxy
132
- proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
133
- r = httpx_proxy.get('https://api.ipify.org?format=json', proxy=proxy)
134
- r.headers['X-ProxyMesh-IP']
135
- ```
136
-
137
- And finally, httpx supports async requests, so we provide an async extension too:
138
-
139
- ``` python
140
- import httpx
141
- from python_proxy_headers.httpx_proxy import AsyncHTTPProxyTransport
142
- proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
143
- transport = AsyncHTTPProxyTransport(proxy=proxy)
144
- async with httpx.AsyncClient(mounts={'http://': transport, 'https://': transport}) as client:
145
- r = await client.get('https://api.ipify.org?format=json')
146
-
147
- r.headers['X-ProxyMesh-IP']
148
- ```
149
-
150
- Our httpx helper module internally provides extension classes for [httpcore](https://www.encode.io/httpcore/), for handling proxy headers over tunnel connections.
151
- You can use those classes if you're building on top of httpcore.
@@ -1,10 +0,0 @@
1
- python_proxy_headers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- python_proxy_headers/aiohttp_proxy.py,sha256=OyEruj3CCrFmlEikmmOXGQsVJnrAfK-TMzycP-sLH_Y,4920
3
- python_proxy_headers/httpx_proxy.py,sha256=Q8tDlfl4u3fjsZZDA1zj1pbN7fiAwJ3ctFGojbqtaEo,11829
4
- python_proxy_headers/requests_adapter.py,sha256=CfGEEYc0eaKKNA11VbhZzv9qFVEL1uMSzAUNvidIyEU,2313
5
- python_proxy_headers/urllib3_proxy_manager.py,sha256=FG6keO1ENBhVQLmnUkwVKpdC3U9xEXNxKGy9_7xjd_s,4399
6
- python_proxy_headers-0.1.0.dist-info/LICENSE,sha256=i_H6fvudjqZOKkZFCIHGmbPHIxszjCAT90UJSY2OM0U,1066
7
- python_proxy_headers-0.1.0.dist-info/METADATA,sha256=gXUKShZK4ExJ7mfOh3Pi_InDIyLXyIrtWGFEqLKr9d0,6761
8
- python_proxy_headers-0.1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
9
- python_proxy_headers-0.1.0.dist-info/top_level.txt,sha256=Bhm-Qc6vn0DAd2Li8ZBn6KLkls2zJnM4-CtXHvIiRh8,21
10
- python_proxy_headers-0.1.0.dist-info/RECORD,,