askpablos-scrapy-api 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,103 @@
1
+ Metadata-Version: 2.4
2
+ Name: askpablos-scrapy-api
3
+ Version: 0.1.0
4
+ Summary: Scrapy middleware to route requests through AskPablos Proxy API
5
+ Author-email: Fawad Ali <fawadstar6@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/fawadss1/askpablos-scrapy-api
8
+ Requires-Python: >=3.7
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: scrapy>=2.6.0
11
+ Requires-Dist: requests>=2.25.0
12
+
13
+ # AskPablos Scrapy API Middleware
14
+
15
+ **AskPablosScrapyAPI** is a Scrapy downloader middleware that allows your spiders to transparently route specific requests through an external proxy API like AskPablos. It supports headless browser mode and rotating proxies on a per-request basis, while integrating cleanly with Scrapy's native settings system.
16
+
17
+ ---
18
+
19
+ ## 🚀 Features
20
+
21
+ - ✅ Only activates for requests that explicitly ask for proxy routing
22
+ - ✅ Supports rotating proxies and headless browser rendering
23
+ - ✅ Compatible with per-spider `CUSTOM_SETTINGS`
24
+ - ✅ Automatically signs requests using HMAC
25
+ - ✅ Clean plug-and-play design for reuse across Scrapy projects
26
+
27
+ ---
28
+
29
+ ## 📦 Installation
30
+
31
+ Install via pip:
32
+
33
+ ```bash
34
+ pip install askpablos-scrapy-api
35
+ ```
36
+
37
+ Or directly from the repository:
38
+
39
+ ```bash
40
+ pip install git+https://github.com/fawadss1/askpablos-scrapy-api.git
41
+ ```
42
+
43
+ ---
44
+
45
+ ## 🔧 Quick Setup
46
+
47
+ 1. Add the middleware to your Scrapy project settings:
48
+
49
+ ```python
50
+ # settings.py
51
+ DOWNLOADER_MIDDLEWARES = {
52
+ "askpablos_scrapy_api.AskPablosScrapyAPI": 543,
53
+ }
54
+
55
+ # API credentials
56
+ API_KEY = "your-api-key"
57
+ SECRET_KEY = "your-secret-key"
58
+ ```
59
+
60
+ 2. Use in your spider by adding `askpablos_api_map` to the request meta:
61
+
62
+ ```python
63
+ def start_requests(self):
64
+ yield scrapy.Request(
65
+ url="https://example.com",
66
+ callback=self.parse,
67
+ meta={
68
+ "askpablos_api_map": {
69
+ "browser": True, # Use headless browser
70
+ "rotate_proxy": True # Use rotating proxy IP
71
+ }
72
+ }
73
+ )
74
+ ```
75
+
76
+ ---
77
+
78
+ ## 📚 Documentation
79
+
80
+ For detailed usage instructions and advanced configurations:
81
+
82
+ - [Usage Guide](https://github.com/fawadss1/askpablos-scrapy-api/blob/main/docs/usage.md)
83
+ - [FAQ](https://github.com/fawadss1/askpablos-scrapy-api/blob/main/docs/faq.md)
84
+
85
+ ---
86
+
87
+ ## 📋 Requirements
88
+
89
+ - Python 3.7+
90
+ - Scrapy 2.6+
91
+ - Valid AskPablos Proxy API credentials
92
+
93
+ ---
94
+
95
+ ## 📝 License
96
+
97
+ This project is licensed under the MIT License - see the LICENSE file for details.
98
+
99
+ ---
100
+
101
+ ## 👤 Author
102
+
103
+ Fawad Ali ([@fawadss1](https://github.com/fawadss1))
@@ -0,0 +1,91 @@
1
+ # AskPablos Scrapy API Middleware
2
+
3
+ **AskPablosScrapyAPI** is a Scrapy downloader middleware that allows your spiders to transparently route specific requests through an external proxy API like AskPablos. It supports headless browser mode and rotating proxies on a per-request basis, while integrating cleanly with Scrapy's native settings system.
4
+
5
+ ---
6
+
7
+ ## 🚀 Features
8
+
9
+ - ✅ Only activates for requests that explicitly ask for proxy routing
10
+ - ✅ Supports rotating proxies and headless browser rendering
11
+ - ✅ Compatible with per-spider `CUSTOM_SETTINGS`
12
+ - ✅ Automatically signs requests using HMAC
13
+ - ✅ Clean plug-and-play design for reuse across Scrapy projects
14
+
15
+ ---
16
+
17
+ ## 📦 Installation
18
+
19
+ Install via pip:
20
+
21
+ ```bash
22
+ pip install askpablos-scrapy-api
23
+ ```
24
+
25
+ Or directly from the repository:
26
+
27
+ ```bash
28
+ pip install git+https://github.com/fawadss1/askpablos-scrapy-api.git
29
+ ```
30
+
31
+ ---
32
+
33
+ ## 🔧 Quick Setup
34
+
35
+ 1. Add the middleware to your Scrapy project settings:
36
+
37
+ ```python
38
+ # settings.py
39
+ DOWNLOADER_MIDDLEWARES = {
40
+ "askpablos_scrapy_api.AskPablosScrapyAPI": 543,
41
+ }
42
+
43
+ # API credentials
44
+ API_KEY = "your-api-key"
45
+ SECRET_KEY = "your-secret-key"
46
+ ```
47
+
48
+ 2. Use in your spider by adding `askpablos_api_map` to the request meta:
49
+
50
+ ```python
51
+ def start_requests(self):
52
+ yield scrapy.Request(
53
+ url="https://example.com",
54
+ callback=self.parse,
55
+ meta={
56
+ "askpablos_api_map": {
57
+ "browser": True, # Use headless browser
58
+ "rotate_proxy": True # Use rotating proxy IP
59
+ }
60
+ }
61
+ )
62
+ ```
63
+
64
+ ---
65
+
66
+ ## 📚 Documentation
67
+
68
+ For detailed usage instructions and advanced configurations:
69
+
70
+ - [Usage Guide](https://github.com/fawadss1/askpablos-scrapy-api/blob/main/docs/usage.md)
71
+ - [FAQ](https://github.com/fawadss1/askpablos-scrapy-api/blob/main/docs/faq.md)
72
+
73
+ ---
74
+
75
+ ## 📋 Requirements
76
+
77
+ - Python 3.7+
78
+ - Scrapy 2.6+
79
+ - Valid AskPablos Proxy API credentials
80
+
81
+ ---
82
+
83
+ ## 📝 License
84
+
85
+ This project is licensed under the MIT License - see the LICENSE file for details.
86
+
87
+ ---
88
+
89
+ ## 👤 Author
90
+
91
+ Fawad Ali ([@fawadss1](https://github.com/fawadss1))
@@ -0,0 +1 @@
1
+ from .middleware import AskPablosScrapyAPI
@@ -0,0 +1,98 @@
1
+ import json
2
+ import base64
3
+ import hmac
4
+ import hashlib
5
+ import requests
6
+ from scrapy.http import HtmlResponse
7
+
8
+
9
+ class AskPablosScrapyAPI:
10
+ """
11
+ Scrapy middleware to route selected requests through AskPablos proxy API.
12
+
13
+ This middleware activates **only** for requests that include:
14
+ meta = {
15
+ "askpablos_api_map": {
16
+ "browser": True, # Optional: Use headless browser
17
+ "rotate_proxy": True # Optional: Use rotating proxy IP
18
+ }
19
+ }
20
+
21
+ It will bypass any request that does not include the `askpablos_api_map` key or has it as an empty dict.
22
+
23
+ Configuration (via settings.py or `CUSTOM_SETTINGS`):
24
+ API_KEY = "<your API key>"
25
+ SECRET_KEY = "<your secret key>"
26
+ """
27
+
28
+ API_URL = "http://10.10.10.178:7500/api/proxy/"
29
+
30
+ def __init__(self, api_key, secret_key):
31
+ self.api_key = api_key
32
+ self.secret_key = secret_key
33
+
34
+ @classmethod
35
+ def from_crawler(cls, crawler):
36
+ api_key = crawler.settings.get("API_KEY")
37
+ secret_key = crawler.settings.get("SECRET_KEY")
38
+
39
+ if not api_key or not secret_key:
40
+ raise ValueError("API_KEY and SECRET_KEY must be defined in settings.")
41
+
42
+ return cls(api_key=api_key, secret_key=secret_key)
43
+
44
+ def process_request(self, request, spider):
45
+ proxy_cfg = request.meta.get("askpablos_api_map")
46
+
47
+ if not proxy_cfg or not isinstance(proxy_cfg, dict) or not proxy_cfg:
48
+ return None # Skip proxying
49
+
50
+ browser = proxy_cfg.get("browser", False)
51
+ rotate_proxy = proxy_cfg.get("rotate_proxy", False)
52
+
53
+ payload = {
54
+ "url": request.url,
55
+ "method": "GET",
56
+ "browser": browser,
57
+ "rotateProxy": rotate_proxy
58
+ }
59
+
60
+ try:
61
+ request_json = json.dumps(payload, separators=(',', ':'))
62
+ signature = hmac.new(
63
+ self.secret_key.encode(),
64
+ request_json.encode(),
65
+ hashlib.sha256
66
+ ).digest()
67
+ signature_b64 = base64.b64encode(signature).decode()
68
+
69
+ headers = {
70
+ "Content-Type": "application/json",
71
+ "X-API-Key": self.api_key,
72
+ "X-Signature": signature_b64
73
+ }
74
+
75
+ response = requests.post(self.API_URL, data=request_json, headers=headers, timeout=30)
76
+ response.raise_for_status()
77
+
78
+ try:
79
+ proxy_response = response.json()
80
+ except ValueError:
81
+ spider.logger.error(f"[AskPablos API] Invalid JSON response from {self.API_URL}")
82
+ return None
83
+
84
+ html_body = proxy_response.get("body")
85
+ if not html_body:
86
+ spider.logger.error(f"[AskPablos API] No 'body' in response for {request.url}")
87
+ return None
88
+
89
+ return HtmlResponse(
90
+ url=request.url,
91
+ body=html_body,
92
+ encoding="utf-8",
93
+ request=request
94
+ )
95
+
96
+ except Exception as e:
97
+ spider.logger.error(f"[AskPablos API] Error processing {request.url}: {e}")
98
+ return None
@@ -0,0 +1,103 @@
1
+ Metadata-Version: 2.4
2
+ Name: askpablos-scrapy-api
3
+ Version: 0.1.0
4
+ Summary: Scrapy middleware to route requests through AskPablos Proxy API
5
+ Author-email: Fawad Ali <fawadstar6@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/fawadss1/askpablos-scrapy-api
8
+ Requires-Python: >=3.7
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: scrapy>=2.6.0
11
+ Requires-Dist: requests>=2.25.0
12
+
13
+ # AskPablos Scrapy API Middleware
14
+
15
+ **AskPablosScrapyAPI** is a Scrapy downloader middleware that allows your spiders to transparently route specific requests through an external proxy API like AskPablos. It supports headless browser mode and rotating proxies on a per-request basis, while integrating cleanly with Scrapy's native settings system.
16
+
17
+ ---
18
+
19
+ ## 🚀 Features
20
+
21
+ - ✅ Only activates for requests that explicitly ask for proxy routing
22
+ - ✅ Supports rotating proxies and headless browser rendering
23
+ - ✅ Compatible with per-spider `CUSTOM_SETTINGS`
24
+ - ✅ Automatically signs requests using HMAC
25
+ - ✅ Clean plug-and-play design for reuse across Scrapy projects
26
+
27
+ ---
28
+
29
+ ## 📦 Installation
30
+
31
+ Install via pip:
32
+
33
+ ```bash
34
+ pip install askpablos-scrapy-api
35
+ ```
36
+
37
+ Or directly from the repository:
38
+
39
+ ```bash
40
+ pip install git+https://github.com/fawadss1/askpablos-scrapy-api.git
41
+ ```
42
+
43
+ ---
44
+
45
+ ## 🔧 Quick Setup
46
+
47
+ 1. Add the middleware to your Scrapy project settings:
48
+
49
+ ```python
50
+ # settings.py
51
+ DOWNLOADER_MIDDLEWARES = {
52
+ "askpablos_scrapy_api.AskPablosScrapyAPI": 543,
53
+ }
54
+
55
+ # API credentials
56
+ API_KEY = "your-api-key"
57
+ SECRET_KEY = "your-secret-key"
58
+ ```
59
+
60
+ 2. Use in your spider by adding `askpablos_api_map` to the request meta:
61
+
62
+ ```python
63
+ def start_requests(self):
64
+ yield scrapy.Request(
65
+ url="https://example.com",
66
+ callback=self.parse,
67
+ meta={
68
+ "askpablos_api_map": {
69
+ "browser": True, # Use headless browser
70
+ "rotate_proxy": True # Use rotating proxy IP
71
+ }
72
+ }
73
+ )
74
+ ```
75
+
76
+ ---
77
+
78
+ ## 📚 Documentation
79
+
80
+ For detailed usage instructions and advanced configurations:
81
+
82
+ - [Usage Guide](https://github.com/fawadss1/askpablos-scrapy-api/blob/main/docs/usage.md)
83
+ - [FAQ](https://github.com/fawadss1/askpablos-scrapy-api/blob/main/docs/faq.md)
84
+
85
+ ---
86
+
87
+ ## 📋 Requirements
88
+
89
+ - Python 3.7+
90
+ - Scrapy 2.6+
91
+ - Valid AskPablos Proxy API credentials
92
+
93
+ ---
94
+
95
+ ## 📝 License
96
+
97
+ This project is licensed under the MIT License - see the LICENSE file for details.
98
+
99
+ ---
100
+
101
+ ## 👤 Author
102
+
103
+ Fawad Ali ([@fawadss1](https://github.com/fawadss1))
@@ -0,0 +1,9 @@
1
+ README.md
2
+ pyproject.toml
3
+ askpablos_scrapy_api/__init__.py
4
+ askpablos_scrapy_api/middleware.py
5
+ askpablos_scrapy_api.egg-info/PKG-INFO
6
+ askpablos_scrapy_api.egg-info/SOURCES.txt
7
+ askpablos_scrapy_api.egg-info/dependency_links.txt
8
+ askpablos_scrapy_api.egg-info/requires.txt
9
+ askpablos_scrapy_api.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ scrapy>=2.6.0
2
+ requests>=2.25.0
@@ -0,0 +1 @@
1
+ askpablos_scrapy_api
@@ -0,0 +1,21 @@
1
+ [project]
2
+ name = "askpablos-scrapy-api"
3
+ version = "0.1.0"
4
+ description = "Scrapy middleware to route requests through AskPablos Proxy API"
5
+ authors = [
6
+ { name="Fawad Ali", email="fawadstar6@gmail.com" }
7
+ ]
8
+ license = {text = "MIT"}
9
+ readme = "README.md"
10
+ requires-python = ">=3.7"
11
+ dependencies = [
12
+ "scrapy>=2.6.0",
13
+ "requests>=2.25.0"
14
+ ]
15
+
16
+ [project.urls]
17
+ Homepage = "https://github.com/fawadss1/askpablos-scrapy-api"
18
+
19
+ [build-system]
20
+ requires = ["setuptools>=61.0"]
21
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+