never-primp 1.0.0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of never-primp might be problematic. Click here for more details.
- {never_primp-1.0.0 → never_primp-1.0.2}/.gitignore +1 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/Cargo.lock +1 -1
- {never_primp-1.0.0 → never_primp-1.0.2}/Cargo.toml +1 -1
- never_primp-1.0.2/ORDERED_HEADERS.md +167 -0
- never_primp-1.0.2/PKG-INFO +803 -0
- never_primp-1.0.2/README.md +775 -0
- never_primp-1.0.2/README_CN.md +768 -0
- never_primp-1.0.2/SPLIT_COOKIES.md +276 -0
- never_primp-1.0.2/example_ordered_headers.py +58 -0
- never_primp-1.0.2/example_split_cookies.py +36 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/never_primp/__init__.py +151 -2
- {never_primp-1.0.0 → never_primp-1.0.2}/never_primp/never_primp.pyi +50 -2
- {never_primp-1.0.0 → never_primp-1.0.2}/pyproject.toml +1 -1
- {never_primp-1.0.0 → never_primp-1.0.2}/src/lib.rs +229 -32
- {never_primp-1.0.0 → never_primp-1.0.2}/src/traits.rs +18 -1
- never_primp-1.0.2/test.py +53 -0
- never_primp-1.0.0/PKG-INFO +0 -358
- never_primp-1.0.0/README.md +0 -330
- never_primp-1.0.0/benchmark.jpg +0 -0
- never_primp-1.0.0/test.py +0 -20
- never_primp-1.0.0/test_cookie_management.py +0 -283
- {never_primp-1.0.0 → never_primp-1.0.2}/.claude/settings.local.json +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/.github/workflows/build.yml +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/LICENSE +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/benchmark/README.md +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/benchmark/benchmark.py +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/benchmark/generate_image.py +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/benchmark/requirements.txt +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/benchmark/server.py +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/never_primp/py.typed +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/src/impersonate.rs +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/src/response.rs +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/src/utils.rs +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/test_features.py +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/test_performance.py +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/tests/test_asyncclient.py +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/tests/test_client.py +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/tests/test_defs.py +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/tests/test_main.py +0 -0
- {never_primp-1.0.0 → never_primp-1.0.2}/tests/test_response.py +0 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# Ordered Headers - 有序请求头
|
|
2
|
+
|
|
3
|
+
## 为什么需要?
|
|
4
|
+
|
|
5
|
+
部分网站的反爬虫系统会检测 HTTP 请求头的**顺序**,普通 HTTP 客户端无法保证顺序,导致请求被识别为机器人。
|
|
6
|
+
|
|
7
|
+
## 快速使用
|
|
8
|
+
|
|
9
|
+
### 客户端级别
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from never_primp import Client
|
|
13
|
+
|
|
14
|
+
client = Client(
|
|
15
|
+
ordered_headers={
|
|
16
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
17
|
+
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
18
|
+
"accept-language": "en-US,en;q=0.9",
|
|
19
|
+
"accept-encoding": "gzip, deflate, br",
|
|
20
|
+
"sec-fetch-dest": "document",
|
|
21
|
+
"sec-fetch-mode": "navigate",
|
|
22
|
+
}
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
response = client.get("https://example.com")
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### 单次请求
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
client = Client()
|
|
32
|
+
|
|
33
|
+
response = client.get(
|
|
34
|
+
"https://example.com",
|
|
35
|
+
ordered_headers={
|
|
36
|
+
"authorization": "Bearer token",
|
|
37
|
+
"accept": "application/json",
|
|
38
|
+
}
|
|
39
|
+
)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### 动态修改
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
# 完全替换
|
|
46
|
+
client.ordered_headers = {...}
|
|
47
|
+
|
|
48
|
+
# 增量更新(保持原有顺序,更新值)
|
|
49
|
+
client.ordered_headers_update({"referer": "https://google.com"})
|
|
50
|
+
|
|
51
|
+
# 获取当前设置
|
|
52
|
+
current = client.ordered_headers
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## 实战示例
|
|
56
|
+
|
|
57
|
+
### Chrome 浏览器完整模拟
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
client = Client(
|
|
61
|
+
impersonate="chrome_141",
|
|
62
|
+
ordered_headers={
|
|
63
|
+
"sec-ch-ua": '"Chromium";v="141", "Not?A_Brand";v="8"',
|
|
64
|
+
"sec-ch-ua-mobile": "?0",
|
|
65
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
66
|
+
"upgrade-insecure-requests": "1",
|
|
67
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
68
|
+
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
69
|
+
"sec-fetch-site": "none",
|
|
70
|
+
"sec-fetch-mode": "navigate",
|
|
71
|
+
"sec-fetch-user": "?1",
|
|
72
|
+
"sec-fetch-dest": "document",
|
|
73
|
+
"accept-encoding": "gzip, deflate, br, zstd",
|
|
74
|
+
"accept-language": "en-US,en;q=0.9",
|
|
75
|
+
},
|
|
76
|
+
http2_only=True,
|
|
77
|
+
)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### 获取真实浏览器的请求头顺序
|
|
81
|
+
|
|
82
|
+
**方法 1**: 使用 Chrome DevTools
|
|
83
|
+
1. 打开 DevTools (F12)
|
|
84
|
+
2. Network 标签
|
|
85
|
+
3. 访问目标网站
|
|
86
|
+
4. 查看 Request Headers,按显示顺序记录
|
|
87
|
+
|
|
88
|
+
**方法 2**: 使用抓包工具(推荐)
|
|
89
|
+
- Reqable
|
|
90
|
+
- Charles Proxy
|
|
91
|
+
- mitmproxy
|
|
92
|
+
|
|
93
|
+
查看 **Raw Request**,复制请求头顺序。
|
|
94
|
+
|
|
95
|
+
## 技术细节
|
|
96
|
+
|
|
97
|
+
### 与 headers 的区别
|
|
98
|
+
|
|
99
|
+
| 特性 | `headers` | `ordered_headers` |
|
|
100
|
+
|------|-----------|------------------|
|
|
101
|
+
| 底层实现 | `HeaderMap` | `OrigHeaderMap` |
|
|
102
|
+
| 顺序保证 | ❌ 大致保序 | ✅ 严格保序 |
|
|
103
|
+
| 大小写保持 | ❌ 标准化 | ✅ 原始形式 |
|
|
104
|
+
| 性能 | 快 | 稍慢(<5%)|
|
|
105
|
+
| 适用场景 | 普通请求 | 反爬虫绕过 |
|
|
106
|
+
|
|
107
|
+
### 优先级
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
client = Client(
|
|
111
|
+
headers={"user-agent": "ignored"},
|
|
112
|
+
ordered_headers={"user-agent": "used"} # 优先使用
|
|
113
|
+
)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
`ordered_headers` > `headers`
|
|
117
|
+
|
|
118
|
+
## 常见问题
|
|
119
|
+
|
|
120
|
+
**Q: 如何知道网站是否检测请求头顺序?**
|
|
121
|
+
|
|
122
|
+
A: 对比测试法
|
|
123
|
+
1. 使用 `headers` → 被拒绝
|
|
124
|
+
2. 使用 `ordered_headers`(模拟浏览器顺序)→ 成功
|
|
125
|
+
3. 说明网站检测了顺序
|
|
126
|
+
|
|
127
|
+
**Q: Python dict 保持顺序吗?**
|
|
128
|
+
|
|
129
|
+
A: Python 3.7+ 的 dict **保持插入顺序**,可以直接使用。
|
|
130
|
+
|
|
131
|
+
**Q: 与 impersonate 配合使用?**
|
|
132
|
+
|
|
133
|
+
A: `impersonate` 会覆盖自定义头部。如需精确控制,不要使用 `impersonate`,手动配置 `ordered_headers`。
|
|
134
|
+
|
|
135
|
+
**Q: 性能影响?**
|
|
136
|
+
|
|
137
|
+
A: <5%,实际应用中可忽略。
|
|
138
|
+
|
|
139
|
+
## 最佳实践
|
|
140
|
+
|
|
141
|
+
1. **默认不使用**:仅在需要时启用(被检测时)
|
|
142
|
+
2. **复制真实浏览器**:使用抓包工具获取真实顺序
|
|
143
|
+
3. **配合其他功能**:`ordered_headers` + `split_cookies` + `http2_only` = 完美模拟
|
|
144
|
+
4. **注意大小写**:保持与浏览器一致(通常首字母大写或全小写)
|
|
145
|
+
|
|
146
|
+
## 调试技巧
|
|
147
|
+
|
|
148
|
+
### 验证顺序是否正确
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
# 使用 httpbin.org
|
|
152
|
+
response = client.get("https://httpbin.org/headers")
|
|
153
|
+
print(response.json()["headers"])
|
|
154
|
+
|
|
155
|
+
# 使用代理抓包
|
|
156
|
+
client = Client(
|
|
157
|
+
proxy="http://127.0.0.1:8888", # Reqable/Charles
|
|
158
|
+
verify=False,
|
|
159
|
+
ordered_headers={...}
|
|
160
|
+
)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
在抓包工具中查看 **Request Headers**,确认顺序。
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
**总结**:`ordered_headers` 是反爬虫的利器,在需要精确模拟浏览器时使用。
|