human-requests 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- human_requests-0.1.0/LICENSE +21 -0
- human_requests-0.1.0/PKG-INFO +249 -0
- human_requests-0.1.0/README.md +187 -0
- human_requests-0.1.0/human_requests/__init__.py +7 -0
- human_requests-0.1.0/human_requests/abstraction/cookies.py +160 -0
- human_requests-0.1.0/human_requests/abstraction/http.py +70 -0
- human_requests-0.1.0/human_requests/abstraction/proxy_manager.py +84 -0
- human_requests-0.1.0/human_requests/abstraction/request.py +25 -0
- human_requests-0.1.0/human_requests/abstraction/response.py +49 -0
- human_requests-0.1.0/human_requests/browsers/__init__.py +3 -0
- human_requests-0.1.0/human_requests/browsers/browser_master.py +121 -0
- human_requests-0.1.0/human_requests/browsers/families/__init__.py +5 -0
- human_requests-0.1.0/human_requests/browsers/families/base.py +69 -0
- human_requests-0.1.0/human_requests/browsers/families/camoufox_family.py +68 -0
- human_requests-0.1.0/human_requests/browsers/families/patchright_family.py +66 -0
- human_requests-0.1.0/human_requests/browsers/families/playwright_family.py +102 -0
- human_requests-0.1.0/human_requests/impersonation.py +171 -0
- human_requests-0.1.0/human_requests/py.typed +0 -0
- human_requests-0.1.0/human_requests/session.py +416 -0
- human_requests-0.1.0/human_requests/tools/helper_tools.py +101 -0
- human_requests-0.1.0/human_requests/tools/http_utils.py +103 -0
- human_requests-0.1.0/human_requests.egg-info/PKG-INFO +249 -0
- human_requests-0.1.0/human_requests.egg-info/SOURCES.txt +27 -0
- human_requests-0.1.0/human_requests.egg-info/dependency_links.txt +1 -0
- human_requests-0.1.0/human_requests.egg-info/requires.txt +21 -0
- human_requests-0.1.0/human_requests.egg-info/top_level.txt +1 -0
- human_requests-0.1.0/pyproject.toml +91 -0
- human_requests-0.1.0/setup.cfg +4 -0
- human_requests-0.1.0/tests/test_base.py +359 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Miskler
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: human-requests
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Playwright+curl_cffi traffic orchestrator
|
|
5
|
+
Author-email: Miskler <mail@miskler.ru>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Miskler
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/Miskler/human-requests
|
|
29
|
+
Project-URL: Repository, https://github.com/Miskler/human-requests
|
|
30
|
+
Project-URL: Documentation, https://miskler.github.io/human-requests/basic/quick_start/index.html
|
|
31
|
+
Project-URL: Issues, https://github.com/Miskler/human-requests/issues
|
|
32
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
33
|
+
Classifier: Operating System :: OS Independent
|
|
34
|
+
Classifier: Programming Language :: Python :: 3
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
39
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
40
|
+
Classifier: Topic :: Internet
|
|
41
|
+
Classifier: Topic :: Utilities
|
|
42
|
+
Requires-Python: >=3.10
|
|
43
|
+
Description-Content-Type: text/markdown
|
|
44
|
+
License-File: LICENSE
|
|
45
|
+
Requires-Dist: curl-cffi
|
|
46
|
+
Requires-Dist: browserforge
|
|
47
|
+
Provides-Extra: playwright
|
|
48
|
+
Requires-Dist: playwright; extra == "playwright"
|
|
49
|
+
Provides-Extra: stealth-playwright
|
|
50
|
+
Requires-Dist: playwright; extra == "stealth-playwright"
|
|
51
|
+
Requires-Dist: playwright-stealth; extra == "stealth-playwright"
|
|
52
|
+
Provides-Extra: camoufox
|
|
53
|
+
Requires-Dist: camoufox[geoip]; extra == "camoufox"
|
|
54
|
+
Provides-Extra: patchright
|
|
55
|
+
Requires-Dist: patchright; extra == "patchright"
|
|
56
|
+
Provides-Extra: all
|
|
57
|
+
Requires-Dist: playwright; extra == "all"
|
|
58
|
+
Requires-Dist: playwright-stealth; extra == "all"
|
|
59
|
+
Requires-Dist: camoufox[geoip]; extra == "all"
|
|
60
|
+
Requires-Dist: patchright; extra == "all"
|
|
61
|
+
Dynamic: license-file
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
<div align="center">
|
|
65
|
+
|
|
66
|
+
# 🧰 Human Requests
|
|
67
|
+
|
|
68
|
+
<img src="https://raw.githubusercontent.com/Miskler/human-requests/refs/heads/main/assets/logo.png" width="70%" alt="logo.webp" />
|
|
69
|
+
|
|
70
|
+
*Asynchronous library for browser‑like HTTP scenarios with controlled offline rendering and two‑way state transfer.*
|
|
71
|
+
|
|
72
|
+
[](https://miskler.github.io/human-requests/tests/tests-report.html)
|
|
73
|
+
[](https://miskler.github.io/human-requests/coverage/)
|
|
74
|
+
[](https://python.org)
|
|
75
|
+
[](https://pypi.org/project/human-requests/)
|
|
76
|
+
[](LICENSE)
|
|
77
|
+
[](https://github.com/psf/black)
|
|
78
|
+
[](https://mypy.readthedocs.io/en/stable/index.html)
|
|
79
|
+
[](https://discord.gg/UnJnGHNbBp)
|
|
80
|
+
[](https://t.me/miskler_dev)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
**[⭐ Star us on GitHub](https://github.com/Miskler/human-requests)** | **[📚 Read the Docs](https://miskler.github.io/human-requests/quick_start)** | **[🐛 Report a Bug](https://github.com/Miskler/human-requests/issues)**
|
|
84
|
+
|
|
85
|
+
## ✨ Features
|
|
86
|
+
|
|
87
|
+
</div>
|
|
88
|
+
|
|
89
|
+
- **HTTP by default.** Direct requests via `curl_cffi` in *impersonate* mode + real browser headers generation.
|
|
90
|
+
- **Browser on demand.** Offline render of an already received response (no repeated HTTP) and JS execution.
|
|
91
|
+
- **Unified state.** Two‑way transfer of **cookies** and **`localStorage`** between HTTP and the browser (storage_state ⇄ session).
|
|
92
|
+
- **Async by design.** Native `asyncio` for predictable concurrency.
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
<div align="center">
|
|
96
|
+
|
|
97
|
+
## 🚀 Quick Start
|
|
98
|
+
|
|
99
|
+
### Installation
|
|
100
|
+
|
|
101
|
+
</div>
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
pip install human-requests[playwright-stealth]
|
|
105
|
+
playwright install
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
<div align="center">
|
|
109
|
+
|
|
110
|
+
### Direct request *(pretend to be a browser)*
|
|
111
|
+
|
|
112
|
+
</div>
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
import asyncio
|
|
116
|
+
from human_requests import Session, HttpMethod
|
|
117
|
+
|
|
118
|
+
async def main():
|
|
119
|
+
async with Session(headless=True, browser="camoufox") as s:
|
|
120
|
+
resp = await s.request(HttpMethod.GET, "https://target.example/")
|
|
121
|
+
print(resp.status_code, len(resp.text))
|
|
122
|
+
|
|
123
|
+
asyncio.run(main())
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
<div align="center">
|
|
127
|
+
|
|
128
|
+
### Render an already received response *(without another request)*
|
|
129
|
+
|
|
130
|
+
</div>
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
# resp — the result of an HTTP request
|
|
134
|
+
async with resp.render(wait_until="networkidle") as page:
|
|
135
|
+
await page.wait_for_selector("#content")
|
|
136
|
+
|
|
137
|
+
# after exiting:
|
|
138
|
+
# - cookies and localStorage are synced back into the session
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
<div align="center">
|
|
142
|
+
|
|
143
|
+
### Warm‑up: inject `localStorage` BEFORE page start
|
|
144
|
+
|
|
145
|
+
</div>
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
origin = "https://target.example"
|
|
149
|
+
|
|
150
|
+
async with Session(headless=True, browser="camoufox") as s:
|
|
151
|
+
# prepare storage_state in advance
|
|
152
|
+
s.local_storage.setdefault(origin, {})
|
|
153
|
+
s.local_storage[origin]["seen"] = "1"
|
|
154
|
+
s.local_storage[origin]["ab_variant"] = "B"
|
|
155
|
+
|
|
156
|
+
# the browser starts with the required values already in place
|
|
157
|
+
async with s.goto_page(f"{origin}/", wait_until="networkidle"):
|
|
158
|
+
pass
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
<div align="center">
|
|
162
|
+
|
|
163
|
+
### Accessing state
|
|
164
|
+
|
|
165
|
+
</div>
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
# Cookies:
|
|
169
|
+
print(s.cookies.storage)
|
|
170
|
+
|
|
171
|
+
# LocalStorage:
|
|
172
|
+
print(s.local_storage.get("https://target.example", {}))
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
<div align="center">
|
|
176
|
+
|
|
177
|
+
## Key Characteristics
|
|
178
|
+
|
|
179
|
+
</div>
|
|
180
|
+
|
|
181
|
+
- HTTP impersonation: `curl_cffi` + browser‑grade headers on every request.
|
|
182
|
+
- Offline render: first response interception (fulfill) and soft reloads without recreating contexts.
|
|
183
|
+
- State as a first‑class citizen: cookies and `localStorage` sync both ways.
|
|
184
|
+
- Unified proxy layer: single proxy format → for `curl_cffi` and Playwright.
|
|
185
|
+
- Clean stack: no external Go binaries.
|
|
186
|
+
|
|
187
|
+
<div align="center">
|
|
188
|
+
|
|
189
|
+
## Comparison: human-requests vs hrequests
|
|
190
|
+
|
|
191
|
+
</div>
|
|
192
|
+
|
|
193
|
+
| Aspect | human-requests | hrequests |
|
|
194
|
+
|---|---|---|
|
|
195
|
+
| Execution model | `asyncio` (native) | sync + threads/gevent |
|
|
196
|
+
| HTTP impersonation | `curl_cffi` impersonate + per‑request browser headers | `tls-client` (Go backend) |
|
|
197
|
+
| Offline `Response` render | Yes (fulfill + soft‑reload; no repeated HTTP) | Yes (post‑render with cookies/content update) |
|
|
198
|
+
| Cookies ↔ HTTP/Browser | Two‑way transfer | Two‑way transfer |
|
|
199
|
+
| `localStorage` ↔ HTTP/Browser | First‑class (storage_state ⇄ session) | Via `page.evaluate(...)` |
|
|
200
|
+
| Typing | mypy‑friendly | — |
|
|
201
|
+
| Dependencies | No Go binaries | Go backend (`tls-client`) |
|
|
202
|
+
| Built‑in HTML parser | — | `selectolax` |
|
|
203
|
+
|
|
204
|
+
> The focus of human-requests is a **controlled** anti‑bot pipeline in `asyncio`: HTTP by default, a browser only where needed, with state hand‑off.
|
|
205
|
+
|
|
206
|
+
<div align="center">
|
|
207
|
+
|
|
208
|
+
## 🛠️ Development
|
|
209
|
+
|
|
210
|
+
### Setup
|
|
211
|
+
|
|
212
|
+
</div>
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
git clone https://github.com/Miskler/human-requests.git
|
|
216
|
+
cd human-requests
|
|
217
|
+
python -m venv .venv
|
|
218
|
+
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
|
219
|
+
make build
|
|
220
|
+
make install-dev
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
<div align="center">
|
|
224
|
+
|
|
225
|
+
### Commands
|
|
226
|
+
|
|
227
|
+
</div>
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
# Checks
|
|
231
|
+
pytest # tests + coverage
|
|
232
|
+
make lint # ruff/flake8/isort/black (if enabled)
|
|
233
|
+
make type-check # mypy/pyright
|
|
234
|
+
# Actions
|
|
235
|
+
make format # formatting
|
|
236
|
+
make docs # build documentation
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
<div align="center">
|
|
240
|
+
|
|
241
|
+
### Dev: local test server
|
|
242
|
+
|
|
243
|
+
</div>
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
# from the test_server/ folder
|
|
247
|
+
make serve # foreground (Ctrl+C to stop)
|
|
248
|
+
make stop # stop background process
|
|
249
|
+
```
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
|
|
2
|
+
<div align="center">
|
|
3
|
+
|
|
4
|
+
# 🧰 Human Requests
|
|
5
|
+
|
|
6
|
+
<img src="https://raw.githubusercontent.com/Miskler/human-requests/refs/heads/main/assets/logo.png" width="70%" alt="logo.webp" />
|
|
7
|
+
|
|
8
|
+
*Asynchronous library for browser‑like HTTP scenarios with controlled offline rendering and two‑way state transfer.*
|
|
9
|
+
|
|
10
|
+
[](https://miskler.github.io/human-requests/tests/tests-report.html)
|
|
11
|
+
[](https://miskler.github.io/human-requests/coverage/)
|
|
12
|
+
[](https://python.org)
|
|
13
|
+
[](https://pypi.org/project/human-requests/)
|
|
14
|
+
[](LICENSE)
|
|
15
|
+
[](https://github.com/psf/black)
|
|
16
|
+
[](https://mypy.readthedocs.io/en/stable/index.html)
|
|
17
|
+
[](https://discord.gg/UnJnGHNbBp)
|
|
18
|
+
[](https://t.me/miskler_dev)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
**[⭐ Star us on GitHub](https://github.com/Miskler/human-requests)** | **[📚 Read the Docs](https://miskler.github.io/human-requests/quick_start)** | **[🐛 Report a Bug](https://github.com/Miskler/human-requests/issues)**
|
|
22
|
+
|
|
23
|
+
## ✨ Features
|
|
24
|
+
|
|
25
|
+
</div>
|
|
26
|
+
|
|
27
|
+
- **HTTP by default.** Direct requests via `curl_cffi` in *impersonate* mode + real browser headers generation.
|
|
28
|
+
- **Browser on demand.** Offline render of an already received response (no repeated HTTP) and JS execution.
|
|
29
|
+
- **Unified state.** Two‑way transfer of **cookies** and **`localStorage`** between HTTP and the browser (storage_state ⇄ session).
|
|
30
|
+
- **Async by design.** Native `asyncio` for predictable concurrency.
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
<div align="center">
|
|
34
|
+
|
|
35
|
+
## 🚀 Quick Start
|
|
36
|
+
|
|
37
|
+
### Installation
|
|
38
|
+
|
|
39
|
+
</div>
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install human-requests[playwright-stealth]
|
|
43
|
+
playwright install
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
<div align="center">
|
|
47
|
+
|
|
48
|
+
### Direct request *(pretend to be a browser)*
|
|
49
|
+
|
|
50
|
+
</div>
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import asyncio
|
|
54
|
+
from human_requests import Session, HttpMethod
|
|
55
|
+
|
|
56
|
+
async def main():
|
|
57
|
+
async with Session(headless=True, browser="camoufox") as s:
|
|
58
|
+
resp = await s.request(HttpMethod.GET, "https://target.example/")
|
|
59
|
+
print(resp.status_code, len(resp.text))
|
|
60
|
+
|
|
61
|
+
asyncio.run(main())
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
<div align="center">
|
|
65
|
+
|
|
66
|
+
### Render an already received response *(without another request)*
|
|
67
|
+
|
|
68
|
+
</div>
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
# resp — the result of an HTTP request
|
|
72
|
+
async with resp.render(wait_until="networkidle") as page:
|
|
73
|
+
await page.wait_for_selector("#content")
|
|
74
|
+
|
|
75
|
+
# after exiting:
|
|
76
|
+
# - cookies and localStorage are synced back into the session
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
<div align="center">
|
|
80
|
+
|
|
81
|
+
### Warm‑up: inject `localStorage` BEFORE page start
|
|
82
|
+
|
|
83
|
+
</div>
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
origin = "https://target.example"
|
|
87
|
+
|
|
88
|
+
async with Session(headless=True, browser="camoufox") as s:
|
|
89
|
+
# prepare storage_state in advance
|
|
90
|
+
s.local_storage.setdefault(origin, {})
|
|
91
|
+
s.local_storage[origin]["seen"] = "1"
|
|
92
|
+
s.local_storage[origin]["ab_variant"] = "B"
|
|
93
|
+
|
|
94
|
+
# the browser starts with the required values already in place
|
|
95
|
+
async with s.goto_page(f"{origin}/", wait_until="networkidle"):
|
|
96
|
+
pass
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
<div align="center">
|
|
100
|
+
|
|
101
|
+
### Accessing state
|
|
102
|
+
|
|
103
|
+
</div>
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
# Cookies:
|
|
107
|
+
print(s.cookies.storage)
|
|
108
|
+
|
|
109
|
+
# LocalStorage:
|
|
110
|
+
print(s.local_storage.get("https://target.example", {}))
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
<div align="center">
|
|
114
|
+
|
|
115
|
+
## Key Characteristics
|
|
116
|
+
|
|
117
|
+
</div>
|
|
118
|
+
|
|
119
|
+
- HTTP impersonation: `curl_cffi` + browser‑grade headers on every request.
|
|
120
|
+
- Offline render: first response interception (fulfill) and soft reloads without recreating contexts.
|
|
121
|
+
- State as a first‑class citizen: cookies and `localStorage` sync both ways.
|
|
122
|
+
- Unified proxy layer: single proxy format → for `curl_cffi` and Playwright.
|
|
123
|
+
- Clean stack: no external Go binaries.
|
|
124
|
+
|
|
125
|
+
<div align="center">
|
|
126
|
+
|
|
127
|
+
## Comparison: human-requests vs hrequests
|
|
128
|
+
|
|
129
|
+
</div>
|
|
130
|
+
|
|
131
|
+
| Aspect | human-requests | hrequests |
|
|
132
|
+
|---|---|---|
|
|
133
|
+
| Execution model | `asyncio` (native) | sync + threads/gevent |
|
|
134
|
+
| HTTP impersonation | `curl_cffi` impersonate + per‑request browser headers | `tls-client` (Go backend) |
|
|
135
|
+
| Offline `Response` render | Yes (fulfill + soft‑reload; no repeated HTTP) | Yes (post‑render with cookies/content update) |
|
|
136
|
+
| Cookies ↔ HTTP/Browser | Two‑way transfer | Two‑way transfer |
|
|
137
|
+
| `localStorage` ↔ HTTP/Browser | First‑class (storage_state ⇄ session) | Via `page.evaluate(...)` |
|
|
138
|
+
| Typing | mypy‑friendly | — |
|
|
139
|
+
| Dependencies | No Go binaries | Go backend (`tls-client`) |
|
|
140
|
+
| Built‑in HTML parser | — | `selectolax` |
|
|
141
|
+
|
|
142
|
+
> The focus of human-requests is a **controlled** anti‑bot pipeline in `asyncio`: HTTP by default, a browser only where needed, with state hand‑off.
|
|
143
|
+
|
|
144
|
+
<div align="center">
|
|
145
|
+
|
|
146
|
+
## 🛠️ Development
|
|
147
|
+
|
|
148
|
+
### Setup
|
|
149
|
+
|
|
150
|
+
</div>
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
git clone https://github.com/Miskler/human-requests.git
|
|
154
|
+
cd human-requests
|
|
155
|
+
python -m venv .venv
|
|
156
|
+
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
|
157
|
+
make build
|
|
158
|
+
make install-dev
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
<div align="center">
|
|
162
|
+
|
|
163
|
+
### Commands
|
|
164
|
+
|
|
165
|
+
</div>
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
# Checks
|
|
169
|
+
pytest # tests + coverage
|
|
170
|
+
make lint # ruff/flake8/isort/black (if enabled)
|
|
171
|
+
make type-check # mypy/pyright
|
|
172
|
+
# Actions
|
|
173
|
+
make format # formatting
|
|
174
|
+
make docs # build documentation
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
<div align="center">
|
|
178
|
+
|
|
179
|
+
### Dev: local test server
|
|
180
|
+
|
|
181
|
+
</div>
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
# from the test_server/ folder
|
|
185
|
+
make serve # foreground (Ctrl+C to stop)
|
|
186
|
+
make stop # stop background process
|
|
187
|
+
```
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any, Iterable, Iterator, Literal, Mapping
|
|
4
|
+
from urllib.parse import urlsplit
|
|
5
|
+
|
|
6
|
+
from playwright.async_api import StorageStateCookie
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Cookie:
|
|
11
|
+
"""
|
|
12
|
+
A dataclass containing the information about a cookie.
|
|
13
|
+
|
|
14
|
+
Please, see the MDN Web Docs for the full documentation:
|
|
15
|
+
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
name: str
|
|
19
|
+
"""This is the name of the cookie
|
|
20
|
+
that will be used to identify the cookie in the Cookie header."""
|
|
21
|
+
|
|
22
|
+
value: str
|
|
23
|
+
"""This is the value that will be sent with the Cookie header."""
|
|
24
|
+
|
|
25
|
+
path: str = "/"
|
|
26
|
+
"""This is the path from which the cookie will be readable."""
|
|
27
|
+
|
|
28
|
+
domain: str = ""
|
|
29
|
+
"""This is the domain from which the cookie will be readable."""
|
|
30
|
+
|
|
31
|
+
expires: int = 0
|
|
32
|
+
"""This is the date when the cookie will be deleted. Coded in Unix timestamp."""
|
|
33
|
+
|
|
34
|
+
max_age: int = 0
|
|
35
|
+
"""This is the maximum age of the cookie in seconds."""
|
|
36
|
+
|
|
37
|
+
same_site: Literal["Lax", "Strict", "None"] = "Lax"
|
|
38
|
+
"""This is the policy that determines whether the cookie will be sent with requests."""
|
|
39
|
+
|
|
40
|
+
secure: bool = False
|
|
41
|
+
"""This is whether the cookie will be sent over a secure connection."""
|
|
42
|
+
|
|
43
|
+
http_only: bool = False
|
|
44
|
+
"""This is whether the cookie will be accessible to JavaScript."""
|
|
45
|
+
|
|
46
|
+
def expires_as_datetime(self) -> datetime:
|
|
47
|
+
"""This is the same as the `expires` property but as a datetime object."""
|
|
48
|
+
return datetime.fromtimestamp(self.expires)
|
|
49
|
+
|
|
50
|
+
def max_age_as_datetime(self) -> datetime:
|
|
51
|
+
"""This is the same as the `max_age` property but as a datetime object."""
|
|
52
|
+
return datetime.fromtimestamp(self.max_age)
|
|
53
|
+
|
|
54
|
+
def to_playwright_like_dict(self) -> StorageStateCookie:
|
|
55
|
+
"""Return a dictionary compatible with Playwright StorageState cookies."""
|
|
56
|
+
return {
|
|
57
|
+
"name": self.name,
|
|
58
|
+
"value": self.value,
|
|
59
|
+
"domain": self.domain or "",
|
|
60
|
+
"path": self.path or "/",
|
|
61
|
+
"expires": float(self.expires or 0),
|
|
62
|
+
"httpOnly": bool(self.http_only or False),
|
|
63
|
+
"secure": bool(self.secure or False),
|
|
64
|
+
"sameSite": self.same_site,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def from_playwright_like_dict(data: Mapping[str, Any]) -> "Cookie":
|
|
69
|
+
"""Accept any mapping (dict or Playwright's StorageStateCookie)."""
|
|
70
|
+
return Cookie(
|
|
71
|
+
name=str(data["name"]),
|
|
72
|
+
value=str(data["value"]),
|
|
73
|
+
domain=str(data.get("domain") or ""),
|
|
74
|
+
path=str(data.get("path") or "/"),
|
|
75
|
+
expires=int(data.get("expires") or 0),
|
|
76
|
+
secure=bool(data.get("secure")),
|
|
77
|
+
http_only=bool(data.get("httpOnly")),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass
|
|
82
|
+
class CookieManager:
|
|
83
|
+
"""Convenient jar-style wrapper + Playwright conversion."""
|
|
84
|
+
|
|
85
|
+
storage: list[Cookie] = field(default_factory=list)
|
|
86
|
+
|
|
87
|
+
# ────── dunder helpers ──────
|
|
88
|
+
def __iter__(self) -> Iterator[Cookie]:
|
|
89
|
+
return iter(self.storage)
|
|
90
|
+
|
|
91
|
+
def __len__(self) -> int:
|
|
92
|
+
return len(self.storage)
|
|
93
|
+
|
|
94
|
+
def __bool__(self) -> bool:
|
|
95
|
+
return bool(self.storage)
|
|
96
|
+
|
|
97
|
+
# ────── CRUD ──────
|
|
98
|
+
def get(self, name: str, domain: str | None = None, path: str | None = None) -> Cookie | None:
|
|
99
|
+
"""Get a cookie by name, domain, and path."""
|
|
100
|
+
return next(
|
|
101
|
+
(
|
|
102
|
+
c
|
|
103
|
+
for c in self.storage
|
|
104
|
+
if c.name == name
|
|
105
|
+
and (domain is None or c.domain == domain)
|
|
106
|
+
and (path is None or c.path == path)
|
|
107
|
+
),
|
|
108
|
+
None,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def get_for_domain(self, url_or_domain: str) -> list[Cookie]:
|
|
112
|
+
"""Get all cookies available for a domain/URL."""
|
|
113
|
+
host = urlsplit(url_or_domain).hostname or url_or_domain.split(":")[0]
|
|
114
|
+
if not host:
|
|
115
|
+
return []
|
|
116
|
+
|
|
117
|
+
def _match(cookie_domain: str, h: str) -> bool:
|
|
118
|
+
return h == cookie_domain or h.endswith("." + cookie_domain)
|
|
119
|
+
|
|
120
|
+
return [c for c in self.storage if _match(c.domain, host)]
|
|
121
|
+
|
|
122
|
+
def add(self, cookie: Cookie | Iterable[Cookie]) -> None:
|
|
123
|
+
"""Add a cookie or cookies."""
|
|
124
|
+
|
|
125
|
+
def _add_one(c: Cookie) -> None:
|
|
126
|
+
key = (c.domain, c.path, c.name)
|
|
127
|
+
for i, old in enumerate(self.storage):
|
|
128
|
+
if (old.domain, old.path, old.name) == key:
|
|
129
|
+
self.storage[i] = c
|
|
130
|
+
break
|
|
131
|
+
else:
|
|
132
|
+
self.storage.append(c)
|
|
133
|
+
|
|
134
|
+
if isinstance(cookie, Iterable) and not isinstance(cookie, Cookie):
|
|
135
|
+
for c in cookie:
|
|
136
|
+
_add_one(c)
|
|
137
|
+
else:
|
|
138
|
+
_add_one(cookie)
|
|
139
|
+
|
|
140
|
+
def delete(
|
|
141
|
+
self, name: str, domain: str | None = None, path: str | None = None
|
|
142
|
+
) -> Cookie | None:
|
|
143
|
+
"""Delete a cookie by name, domain, and path."""
|
|
144
|
+
for i, c in enumerate(self.storage):
|
|
145
|
+
if (
|
|
146
|
+
c.name == name
|
|
147
|
+
and (domain is None or c.domain == domain)
|
|
148
|
+
and (path is None or c.path == path)
|
|
149
|
+
):
|
|
150
|
+
return self.storage.pop(i)
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
# ────── Playwright helpers ──────
|
|
154
|
+
def to_playwright(self) -> list[StorageStateCookie]:
|
|
155
|
+
"""Serialize all cookies into a format understood by Playwright."""
|
|
156
|
+
return [c.to_playwright_like_dict() for c in self.storage]
|
|
157
|
+
|
|
158
|
+
def add_from_playwright(self, raw_cookies: Iterable[Mapping[str, Any]]) -> None:
|
|
159
|
+
"""Inverse operation — add a list of Playwright cookies/mappings to the jar."""
|
|
160
|
+
self.add(Cookie.from_playwright_like_dict(rc) for rc in raw_cookies)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from urllib.parse import parse_qs, urlparse
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class HttpMethod(Enum):
|
|
8
|
+
"""Represents an HTTP method."""
|
|
9
|
+
|
|
10
|
+
GET = "GET"
|
|
11
|
+
"""Retrieves data from a server.
|
|
12
|
+
It only reads data and does not modify it."""
|
|
13
|
+
POST = "POST"
|
|
14
|
+
"""Submits data to a server to create a new resource.
|
|
15
|
+
It can also be used to update existing resources."""
|
|
16
|
+
PUT = "PUT"
|
|
17
|
+
"""Updates a existing resource on a server.
|
|
18
|
+
It can also be used to create a new resource."""
|
|
19
|
+
PATCH = "PATCH"
|
|
20
|
+
"""Updates a existing resource on a server.
|
|
21
|
+
It only updates the fields that are provided in the request body."""
|
|
22
|
+
DELETE = "DELETE"
|
|
23
|
+
"""Deletes a resource from a server."""
|
|
24
|
+
HEAD = "HEAD"
|
|
25
|
+
"""Retrieves metadata from a server.
|
|
26
|
+
It only reads the headers and does not return the response body."""
|
|
27
|
+
OPTIONS = "OPTIONS"
|
|
28
|
+
"""Provides information about the HTTP methods supported by a server.
|
|
29
|
+
It can be used for Cross-Origin Resource Sharing (CORS) request."""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class URL:
|
|
34
|
+
"""A dataclass containing the parsed URL components."""
|
|
35
|
+
|
|
36
|
+
full_url: str
|
|
37
|
+
"""The full URL."""
|
|
38
|
+
base_url: str = ""
|
|
39
|
+
"""The base URL, without query parameters."""
|
|
40
|
+
secure: bool = False
|
|
41
|
+
"""Whether the URL is secure (https/wss)."""
|
|
42
|
+
protocol: str = ""
|
|
43
|
+
"""The protocol of the URL."""
|
|
44
|
+
path: str = ""
|
|
45
|
+
"""The path of the URL."""
|
|
46
|
+
domain_with_port: str = ""
|
|
47
|
+
"""The domain of the URL with port."""
|
|
48
|
+
domain: str = ""
|
|
49
|
+
"""The domain of the URL."""
|
|
50
|
+
port: Optional[int] = None
|
|
51
|
+
"""The port of the URL."""
|
|
52
|
+
params: dict[str, list[str]] = field(default_factory=dict)
|
|
53
|
+
"""A dictionary of query parameters."""
|
|
54
|
+
|
|
55
|
+
def __post_init__(self) -> None:
|
|
56
|
+
parsed_url = urlparse(self.full_url)
|
|
57
|
+
|
|
58
|
+
object.__setattr__(self, "base_url", parsed_url._replace(query="").geturl())
|
|
59
|
+
object.__setattr__(self, "secure", parsed_url.scheme in ["https", "wss"])
|
|
60
|
+
object.__setattr__(self, "protocol", parsed_url.scheme)
|
|
61
|
+
|
|
62
|
+
object.__setattr__(self, "path", parsed_url.path)
|
|
63
|
+
|
|
64
|
+
full_domen = parsed_url.netloc.split(":")
|
|
65
|
+
object.__setattr__(self, "domain_with_port", parsed_url.netloc)
|
|
66
|
+
object.__setattr__(self, "domain", full_domen[0])
|
|
67
|
+
if len(full_domen) > 1:
|
|
68
|
+
object.__setattr__(self, "port", int(full_domen[1]))
|
|
69
|
+
|
|
70
|
+
object.__setattr__(self, "params", parse_qs(parsed_url.query))
|