cobweb-launcher 3.1.13__tar.gz → 3.1.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/PKG-INFO +15 -30
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/README.md +14 -29
- cobweb-launcher-3.1.14/cobweb/base/common_queue.py +51 -0
- cobweb-launcher-3.1.14/cobweb/base/item.py +65 -0
- cobweb-launcher-3.1.14/cobweb/base/request.py +88 -0
- cobweb-launcher-3.1.14/cobweb/base/seed.py +162 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/db/api_db.py +2 -1
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/db/redis_db.py +2 -1
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/launchers/launcher.py +27 -24
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/schedulers/scheduler.py +28 -11
- cobweb-launcher-3.1.14/cobweb/schedulers/scheduler_with_redis.py +176 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/setting.py +6 -6
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/utils/decorators.py +1 -2
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb_launcher.egg-info/PKG-INFO +15 -30
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/setup.py +1 -1
- cobweb-launcher-3.1.13/cobweb/base/common_queue.py +0 -30
- cobweb-launcher-3.1.13/cobweb/base/item.py +0 -60
- cobweb-launcher-3.1.13/cobweb/base/request.py +0 -82
- cobweb-launcher-3.1.13/cobweb/base/seed.py +0 -122
- cobweb-launcher-3.1.13/cobweb/schedulers/scheduler_with_redis.py +0 -172
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/LICENSE +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/__init__.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/base/__init__.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/base/logger.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/base/response.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/constant.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/crawlers/__init__.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/crawlers/crawler.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/exceptions/__init__.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/exceptions/oss_db_exception.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/launchers/__init__.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/launchers/distributor.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/launchers/uploader.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/pipelines/__init__.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/pipelines/pipeline.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/pipelines/pipeline_csv.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/pipelines/pipeline_loghub.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/schedulers/__init__.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/utils/__init__.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/utils/bloom.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/utils/dotting.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/utils/oss.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb/utils/tools.py +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-3.1.13 → cobweb-launcher-3.1.14}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cobweb-launcher
|
3
|
-
Version: 3.1.
|
3
|
+
Version: 3.1.14
|
4
4
|
Summary: spider_hole
|
5
5
|
Home-page: https://github.com/Juannie-PP/cobweb
|
6
6
|
Author: Juannie-PP
|
@@ -29,12 +29,12 @@ pip3 install --upgrade cobweb-launcher
|
|
29
29
|
```
|
30
30
|
## 使用方法介绍
|
31
31
|
### 1. 任务创建
|
32
|
-
-
|
32
|
+
- Launcher任务创建
|
33
33
|
```python
|
34
|
-
from cobweb import
|
34
|
+
from cobweb import Launcher
|
35
35
|
|
36
36
|
# 创建启动器
|
37
|
-
app =
|
37
|
+
app = Launcher(task="test", project="test")
|
38
38
|
|
39
39
|
# 设置采集种子
|
40
40
|
app.SEEDS = [{
|
@@ -44,29 +44,15 @@ app.SEEDS = [{
|
|
44
44
|
# 启动任务
|
45
45
|
app.start()
|
46
46
|
```
|
47
|
-
- LauncherPro任务创建
|
48
|
-
LauncherPro依赖redis实现分布式调度,使用LauncherPro启动器需要完成环境变量的配置或自定义setting文件中的redis配置,如何配置查看`2. 自定义配置文件参数`
|
49
|
-
```python
|
50
|
-
from cobweb import LauncherPro
|
51
|
-
|
52
|
-
# 创建启动器
|
53
|
-
app = LauncherPro(
|
54
|
-
task="test",
|
55
|
-
project="test"
|
56
|
-
)
|
57
|
-
...
|
58
|
-
# 启动任务
|
59
|
-
app.start()
|
60
|
-
```
|
61
47
|
### 2. 自定义配置文件参数
|
62
48
|
- 通过自定义setting文件,配置文件导入字符串方式
|
63
49
|
> 默认配置文件:import cobweb.setting
|
64
50
|
> 不推荐!!!目前有bug,随缘使用...
|
65
51
|
例如:同级目录下自定义创建了setting.py文件。
|
66
52
|
```python
|
67
|
-
from cobweb import
|
53
|
+
from cobweb import Launcher
|
68
54
|
|
69
|
-
app =
|
55
|
+
app = Launcher(
|
70
56
|
task="test",
|
71
57
|
project="test",
|
72
58
|
setting="import setting"
|
@@ -78,10 +64,10 @@ app.start()
|
|
78
64
|
```
|
79
65
|
- 自定义修改setting中对象值
|
80
66
|
```python
|
81
|
-
from cobweb import
|
67
|
+
from cobweb import Launcher
|
82
68
|
|
83
69
|
# 创建启动器
|
84
|
-
app =
|
70
|
+
app = Launcher(
|
85
71
|
task="test",
|
86
72
|
project="test",
|
87
73
|
REDIS_CONFIG = {
|
@@ -99,10 +85,10 @@ app.start()
|
|
99
85
|
`@app.request`使用装饰器封装自定义请求方法,作用于发生请求前的操作,返回Request对象或继承于BaseItem对象,用于控制请求参数。
|
100
86
|
```python
|
101
87
|
from typing import Union
|
102
|
-
from cobweb import
|
88
|
+
from cobweb import Launcher
|
103
89
|
from cobweb.base import Seed, Request, BaseItem
|
104
90
|
|
105
|
-
app =
|
91
|
+
app = Launcher(
|
106
92
|
task="test",
|
107
93
|
project="test"
|
108
94
|
)
|
@@ -127,10 +113,10 @@ app.start()
|
|
127
113
|
`@app.download`使用装饰器封装自定义下载方法,作用于发生请求时的操作,返回Response对象或继承于BaseItem对象,用于控制请求参数。
|
128
114
|
```python
|
129
115
|
from typing import Union
|
130
|
-
from cobweb import
|
116
|
+
from cobweb import Launcher
|
131
117
|
from cobweb.base import Request, Response, BaseItem
|
132
118
|
|
133
|
-
app =
|
119
|
+
app = Launcher(
|
134
120
|
task="test",
|
135
121
|
project="test"
|
136
122
|
)
|
@@ -158,14 +144,14 @@ app.start()
|
|
158
144
|
解析方法返回继承于BaseItem的对象,yield返回进行控制数据存储流程。
|
159
145
|
```python
|
160
146
|
from typing import Union
|
161
|
-
from cobweb import
|
147
|
+
from cobweb import Launcher
|
162
148
|
from cobweb.base import Seed, Response, BaseItem
|
163
149
|
|
164
150
|
class TestItem(BaseItem):
|
165
151
|
__TABLE__ = "test_data" # 表名
|
166
152
|
__FIELDS__ = "field1, field2, field3" # 字段名
|
167
153
|
|
168
|
-
app =
|
154
|
+
app = Launcher(
|
169
155
|
task="test",
|
170
156
|
project="test"
|
171
157
|
)
|
@@ -187,12 +173,11 @@ app.start()
|
|
187
173
|
> upload_item = item.to_dict
|
188
174
|
> upload_item["text"] = item.response.text
|
189
175
|
> yield ConsoleItem(item.seed, data=json.dumps(upload_item, ensure_ascii=False))
|
190
|
-
##
|
176
|
+
## todo
|
191
177
|
- 队列优化完善,使用queue的机制wait()同步各模块执行?
|
192
178
|
- 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
|
193
179
|
- 去重过滤(布隆过滤器等)
|
194
180
|
- 单机防丢失
|
195
|
-
- excel、mysql、redis数据完善
|
196
181
|
|
197
182
|
> 未更新流程图!!!
|
198
183
|

|
@@ -14,12 +14,12 @@ pip3 install --upgrade cobweb-launcher
|
|
14
14
|
```
|
15
15
|
## 使用方法介绍
|
16
16
|
### 1. 任务创建
|
17
|
-
-
|
17
|
+
- Launcher任务创建
|
18
18
|
```python
|
19
|
-
from cobweb import
|
19
|
+
from cobweb import Launcher
|
20
20
|
|
21
21
|
# 创建启动器
|
22
|
-
app =
|
22
|
+
app = Launcher(task="test", project="test")
|
23
23
|
|
24
24
|
# 设置采集种子
|
25
25
|
app.SEEDS = [{
|
@@ -29,29 +29,15 @@ app.SEEDS = [{
|
|
29
29
|
# 启动任务
|
30
30
|
app.start()
|
31
31
|
```
|
32
|
-
- LauncherPro任务创建
|
33
|
-
LauncherPro依赖redis实现分布式调度,使用LauncherPro启动器需要完成环境变量的配置或自定义setting文件中的redis配置,如何配置查看`2. 自定义配置文件参数`
|
34
|
-
```python
|
35
|
-
from cobweb import LauncherPro
|
36
|
-
|
37
|
-
# 创建启动器
|
38
|
-
app = LauncherPro(
|
39
|
-
task="test",
|
40
|
-
project="test"
|
41
|
-
)
|
42
|
-
...
|
43
|
-
# 启动任务
|
44
|
-
app.start()
|
45
|
-
```
|
46
32
|
### 2. 自定义配置文件参数
|
47
33
|
- 通过自定义setting文件,配置文件导入字符串方式
|
48
34
|
> 默认配置文件:import cobweb.setting
|
49
35
|
> 不推荐!!!目前有bug,随缘使用...
|
50
36
|
例如:同级目录下自定义创建了setting.py文件。
|
51
37
|
```python
|
52
|
-
from cobweb import
|
38
|
+
from cobweb import Launcher
|
53
39
|
|
54
|
-
app =
|
40
|
+
app = Launcher(
|
55
41
|
task="test",
|
56
42
|
project="test",
|
57
43
|
setting="import setting"
|
@@ -63,10 +49,10 @@ app.start()
|
|
63
49
|
```
|
64
50
|
- 自定义修改setting中对象值
|
65
51
|
```python
|
66
|
-
from cobweb import
|
52
|
+
from cobweb import Launcher
|
67
53
|
|
68
54
|
# 创建启动器
|
69
|
-
app =
|
55
|
+
app = Launcher(
|
70
56
|
task="test",
|
71
57
|
project="test",
|
72
58
|
REDIS_CONFIG = {
|
@@ -84,10 +70,10 @@ app.start()
|
|
84
70
|
`@app.request`使用装饰器封装自定义请求方法,作用于发生请求前的操作,返回Request对象或继承于BaseItem对象,用于控制请求参数。
|
85
71
|
```python
|
86
72
|
from typing import Union
|
87
|
-
from cobweb import
|
73
|
+
from cobweb import Launcher
|
88
74
|
from cobweb.base import Seed, Request, BaseItem
|
89
75
|
|
90
|
-
app =
|
76
|
+
app = Launcher(
|
91
77
|
task="test",
|
92
78
|
project="test"
|
93
79
|
)
|
@@ -112,10 +98,10 @@ app.start()
|
|
112
98
|
`@app.download`使用装饰器封装自定义下载方法,作用于发生请求时的操作,返回Response对象或继承于BaseItem对象,用于控制请求参数。
|
113
99
|
```python
|
114
100
|
from typing import Union
|
115
|
-
from cobweb import
|
101
|
+
from cobweb import Launcher
|
116
102
|
from cobweb.base import Request, Response, BaseItem
|
117
103
|
|
118
|
-
app =
|
104
|
+
app = Launcher(
|
119
105
|
task="test",
|
120
106
|
project="test"
|
121
107
|
)
|
@@ -143,14 +129,14 @@ app.start()
|
|
143
129
|
解析方法返回继承于BaseItem的对象,yield返回进行控制数据存储流程。
|
144
130
|
```python
|
145
131
|
from typing import Union
|
146
|
-
from cobweb import
|
132
|
+
from cobweb import Launcher
|
147
133
|
from cobweb.base import Seed, Response, BaseItem
|
148
134
|
|
149
135
|
class TestItem(BaseItem):
|
150
136
|
__TABLE__ = "test_data" # 表名
|
151
137
|
__FIELDS__ = "field1, field2, field3" # 字段名
|
152
138
|
|
153
|
-
app =
|
139
|
+
app = Launcher(
|
154
140
|
task="test",
|
155
141
|
project="test"
|
156
142
|
)
|
@@ -172,12 +158,11 @@ app.start()
|
|
172
158
|
> upload_item = item.to_dict
|
173
159
|
> upload_item["text"] = item.response.text
|
174
160
|
> yield ConsoleItem(item.seed, data=json.dumps(upload_item, ensure_ascii=False))
|
175
|
-
##
|
161
|
+
## todo
|
176
162
|
- 队列优化完善,使用queue的机制wait()同步各模块执行?
|
177
163
|
- 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
|
178
164
|
- 去重过滤(布隆过滤器等)
|
179
165
|
- 单机防丢失
|
180
|
-
- excel、mysql、redis数据完善
|
181
166
|
|
182
167
|
> 未更新流程图!!!
|
183
168
|

|
@@ -0,0 +1,51 @@
|
|
1
|
+
from collections import deque
|
2
|
+
from typing import Any, Iterable, Union
|
3
|
+
|
4
|
+
|
5
|
+
class Queue:
|
6
|
+
def __init__(self):
|
7
|
+
"""初始化队列"""
|
8
|
+
self._queue = deque()
|
9
|
+
|
10
|
+
@property
|
11
|
+
def length(self) -> int:
|
12
|
+
"""返回队列长度"""
|
13
|
+
return len(self._queue)
|
14
|
+
|
15
|
+
def empty(self) -> bool:
|
16
|
+
"""检查队列是否为空"""
|
17
|
+
return not self._queue
|
18
|
+
|
19
|
+
def push(self, data: Union[Any, Iterable], direct_insertion: bool = False):
|
20
|
+
"""
|
21
|
+
向队列中添加数据。
|
22
|
+
如果数据是可迭代对象(如列表、元组或集合),可以选择直接扩展队列。
|
23
|
+
"""
|
24
|
+
if not data:
|
25
|
+
return
|
26
|
+
|
27
|
+
if not direct_insertion and isinstance(data, (list, tuple, set)):
|
28
|
+
self._queue.extend(data)
|
29
|
+
else:
|
30
|
+
self._queue.append(data)
|
31
|
+
|
32
|
+
def pop(self) -> Any:
|
33
|
+
"""
|
34
|
+
从队列左侧弹出一个元素。
|
35
|
+
如果队列为空,返回 None。
|
36
|
+
"""
|
37
|
+
try:
|
38
|
+
return self._queue.popleft()
|
39
|
+
except IndexError:
|
40
|
+
return None
|
41
|
+
|
42
|
+
def iter_items(self, limit: int = 1) -> Iterable:
|
43
|
+
"""
|
44
|
+
按指定数量从队列中弹出多个元素并生成它们。
|
45
|
+
如果队列为空或达到限制,则停止生成。
|
46
|
+
"""
|
47
|
+
for _ in range(limit):
|
48
|
+
item = self.pop()
|
49
|
+
if item is None:
|
50
|
+
break
|
51
|
+
yield item
|
@@ -0,0 +1,65 @@
|
|
1
|
+
from .seed import Seed
|
2
|
+
from typing import Dict, Any
|
3
|
+
from collections import namedtuple
|
4
|
+
|
5
|
+
|
6
|
+
class ItemMeta(type):
|
7
|
+
|
8
|
+
def __new__(cls, name: str, bases: tuple, dct: dict) -> type:
|
9
|
+
new_class = super().__new__(cls, name, bases, dct)
|
10
|
+
if name != "BaseItem":
|
11
|
+
table = getattr(new_class, "__TABLE__")
|
12
|
+
fields = getattr(new_class, "__FIELDS__")
|
13
|
+
if not table or not fields:
|
14
|
+
raise ValueError(f"Missing required attributes '__TABLE__' or '__FIELDS__' in class {name}")
|
15
|
+
new_class.Data = namedtuple(table, fields)
|
16
|
+
return new_class
|
17
|
+
|
18
|
+
|
19
|
+
class BaseItem(metaclass=ItemMeta):
|
20
|
+
|
21
|
+
__TABLE__ = ""
|
22
|
+
__FIELDS__ = ""
|
23
|
+
|
24
|
+
def __init__(self, seed: Seed, **kwargs):
|
25
|
+
self.seed = seed
|
26
|
+
|
27
|
+
data = {}
|
28
|
+
for key, value in kwargs.items():
|
29
|
+
if key in self.__FIELDS__:
|
30
|
+
data[key] = value
|
31
|
+
else:
|
32
|
+
setattr(self, key, value)
|
33
|
+
|
34
|
+
try:
|
35
|
+
self.data = self.Data(**data)
|
36
|
+
except TypeError as e:
|
37
|
+
raise ValueError(f"Invalid field values for Data: {e}") from e
|
38
|
+
|
39
|
+
@property
|
40
|
+
def to_dict(self) -> Dict[str, Any]:
|
41
|
+
return self.data._asdict()
|
42
|
+
|
43
|
+
@property
|
44
|
+
def fields(self) -> tuple[str]:
|
45
|
+
return self.data._fields
|
46
|
+
|
47
|
+
@property
|
48
|
+
def table(self) -> str:
|
49
|
+
return self.Data.__name__
|
50
|
+
|
51
|
+
def __setitem__(self, key: str, value: Any):
|
52
|
+
setattr(self, key, value)
|
53
|
+
|
54
|
+
def __getitem__(self, key: str) -> Any:
|
55
|
+
return getattr(self, key, None)
|
56
|
+
|
57
|
+
def __getattr__(self, name: str) -> Any:
|
58
|
+
return None
|
59
|
+
|
60
|
+
|
61
|
+
class CSVItem(BaseItem):
|
62
|
+
|
63
|
+
__TABLE__ = "cobweb"
|
64
|
+
__FIELDS__ = "data"
|
65
|
+
|
@@ -0,0 +1,88 @@
|
|
1
|
+
import random
|
2
|
+
import requests
|
3
|
+
from typing import Any, Dict
|
4
|
+
|
5
|
+
|
6
|
+
class Request:
|
7
|
+
"""
|
8
|
+
请求类,用于封装 HTTP 请求并提供相关功能。
|
9
|
+
"""
|
10
|
+
|
11
|
+
__REQUEST_ATTRS__ = {
|
12
|
+
"params",
|
13
|
+
"headers",
|
14
|
+
"cookies",
|
15
|
+
"data",
|
16
|
+
"json",
|
17
|
+
"files",
|
18
|
+
"auth",
|
19
|
+
"timeout",
|
20
|
+
"proxies",
|
21
|
+
"hooks",
|
22
|
+
"stream",
|
23
|
+
"verify",
|
24
|
+
"cert",
|
25
|
+
"allow_redirects",
|
26
|
+
}
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
url: str,
|
31
|
+
seed: Any,
|
32
|
+
random_ua: bool = True,
|
33
|
+
check_status_code: bool = True,
|
34
|
+
**kwargs,
|
35
|
+
):
|
36
|
+
"""
|
37
|
+
初始化请求对象。
|
38
|
+
:param url: 请求的 URL。
|
39
|
+
:param seed: 种子对象或标识符。
|
40
|
+
:param random_ua: 是否使用随机 User-Agent,默认为 True。
|
41
|
+
:param check_status_code: 是否检查响应状态码,默认为 True。
|
42
|
+
:param kwargs: 其他扩展参数。
|
43
|
+
"""
|
44
|
+
self.url = url
|
45
|
+
self.seed = seed
|
46
|
+
self.check_status_code = check_status_code
|
47
|
+
self.request_setting: Dict[str, Any] = {}
|
48
|
+
|
49
|
+
for key, value in kwargs.items():
|
50
|
+
if key in self.__class__.__REQUEST_ATTRS__:
|
51
|
+
self.request_setting[key] = value
|
52
|
+
else:
|
53
|
+
setattr(self, key, value)
|
54
|
+
|
55
|
+
self.method = getattr(self, "method", None) or (
|
56
|
+
"POST" if self.request_setting.get("data") or self.request_setting.get("json") else "GET"
|
57
|
+
)
|
58
|
+
|
59
|
+
if random_ua:
|
60
|
+
self._build_header()
|
61
|
+
|
62
|
+
@property
|
63
|
+
def _random_ua(self) -> str:
|
64
|
+
v1 = random.randint(4, 15)
|
65
|
+
v2 = random.randint(3, 11)
|
66
|
+
v3 = random.randint(1, 16)
|
67
|
+
v4 = random.randint(533, 605)
|
68
|
+
v5 = random.randint(1000, 6000)
|
69
|
+
v6 = random.randint(10, 80)
|
70
|
+
return (f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_{v1}_{v2}) "
|
71
|
+
f"AppleWebKit/{v4}.{v3} (KHTML, like Gecko) "
|
72
|
+
f"Chrome/105.0.0.0 Safari/{v4}.{v3} Edg/105.0.{v5}.{v6}")
|
73
|
+
|
74
|
+
def _build_header(self):
|
75
|
+
headers = self.request_setting.setdefault("headers", {})
|
76
|
+
if not headers.get("user-agent"):
|
77
|
+
headers["user-agent"] = self._random_ua
|
78
|
+
|
79
|
+
def download(self) -> requests.Response:
|
80
|
+
response = requests.request(self.method, self.url, **self.request_setting)
|
81
|
+
if self.check_status_code:
|
82
|
+
response.raise_for_status()
|
83
|
+
return response
|
84
|
+
|
85
|
+
@property
|
86
|
+
def to_dict(self) -> Dict[str, Any]:
|
87
|
+
excluded_keys = {"url", "seed", "check_status_code", "request_setting"}
|
88
|
+
return {k: v for k, v in self.__dict__.items() if k not in excluded_keys}
|
@@ -0,0 +1,162 @@
|
|
1
|
+
import json
|
2
|
+
import time
|
3
|
+
import hashlib
|
4
|
+
from typing import Any, Dict, Optional, Union
|
5
|
+
|
6
|
+
|
7
|
+
class SeedParams:
|
8
|
+
"""
|
9
|
+
定义种子参数类,用于存储种子的元信息。
|
10
|
+
"""
|
11
|
+
|
12
|
+
def __init__(
|
13
|
+
self,
|
14
|
+
retry: Optional[int] = None,
|
15
|
+
priority: Optional[int] = None,
|
16
|
+
seed_version: Optional[int] = None,
|
17
|
+
seed_status: Optional[str] = None,
|
18
|
+
proxy_type: Optional[str] = None,
|
19
|
+
proxy: Optional[str] = None,
|
20
|
+
):
|
21
|
+
self.retry = retry or 0
|
22
|
+
self.priority = priority or 300
|
23
|
+
self.seed_version = seed_version or int(time.time())
|
24
|
+
self.seed_status = seed_status
|
25
|
+
self.proxy_type = proxy_type
|
26
|
+
self.proxy = proxy
|
27
|
+
|
28
|
+
def __getattr__(self, name: str) -> Any:
|
29
|
+
"""动态获取未定义的属性,返回 None"""
|
30
|
+
return None
|
31
|
+
|
32
|
+
|
33
|
+
class Seed:
|
34
|
+
"""
|
35
|
+
种子类,用于表示一个种子对象,包含种子的基本属性和方法。
|
36
|
+
"""
|
37
|
+
|
38
|
+
__SEED_PARAMS__ = [
|
39
|
+
"retry",
|
40
|
+
"priority",
|
41
|
+
"seed_version",
|
42
|
+
"seed_status",
|
43
|
+
"proxy_type",
|
44
|
+
"proxy",
|
45
|
+
]
|
46
|
+
|
47
|
+
def __init__(
|
48
|
+
self,
|
49
|
+
seed: Union[str, bytes, Dict[str, Any]] = None,
|
50
|
+
sid: Optional[str] = None,
|
51
|
+
retry: Optional[int] = None,
|
52
|
+
priority: Optional[int] = None,
|
53
|
+
seed_version: Optional[int] = None,
|
54
|
+
seed_status: Optional[str] = None,
|
55
|
+
proxy_type: Optional[str] = None,
|
56
|
+
proxy: Optional[str] = None,
|
57
|
+
**kwargs,
|
58
|
+
):
|
59
|
+
"""
|
60
|
+
初始化种子对象。
|
61
|
+
:param seed: 种子数据,可以是字符串、字节或字典。
|
62
|
+
:param sid: 种子的唯一标识符。
|
63
|
+
:param retry: 重试次数。
|
64
|
+
:param priority: 优先级。
|
65
|
+
:param seed_version: 种子版本。
|
66
|
+
:param seed_status: 种子状态。
|
67
|
+
:param proxy_type: 代理类型。
|
68
|
+
:param proxy: 代理地址。
|
69
|
+
:param kwargs: 其他扩展参数。
|
70
|
+
"""
|
71
|
+
# 初始化种子数据
|
72
|
+
if seed:
|
73
|
+
if isinstance(seed, (str, bytes)):
|
74
|
+
try:
|
75
|
+
item = json.loads(seed)
|
76
|
+
self._init_seed(item)
|
77
|
+
except json.JSONDecodeError as e:
|
78
|
+
raise ValueError(f"Invalid JSON format for seed: {seed}") from e
|
79
|
+
elif isinstance(seed, dict):
|
80
|
+
self._init_seed(seed)
|
81
|
+
else:
|
82
|
+
raise TypeError(f"Seed type error, must be str, bytes, or dict! Seed: {seed}")
|
83
|
+
|
84
|
+
# 初始化种子参数
|
85
|
+
seed_params = {
|
86
|
+
"retry": retry,
|
87
|
+
"priority": priority,
|
88
|
+
"seed_version": seed_version,
|
89
|
+
"seed_status": seed_status,
|
90
|
+
"proxy_type": proxy_type,
|
91
|
+
"proxy": proxy,
|
92
|
+
}
|
93
|
+
|
94
|
+
# 合并扩展参数
|
95
|
+
if kwargs:
|
96
|
+
self._init_seed(kwargs)
|
97
|
+
seed_params.update({k: v for k, v in kwargs.items() if k in self.__SEED_PARAMS__})
|
98
|
+
|
99
|
+
# 初始化唯一标识符
|
100
|
+
if sid or not getattr(self, "sid", None):
|
101
|
+
self._init_id(sid)
|
102
|
+
|
103
|
+
# 设置参数对象
|
104
|
+
self.params = SeedParams(**seed_params)
|
105
|
+
|
106
|
+
def __getattr__(self, name: str) -> Any:
|
107
|
+
"""动态获取未定义的属性,返回 None"""
|
108
|
+
return None
|
109
|
+
|
110
|
+
def __setitem__(self, key: str, value: Any):
|
111
|
+
"""支持字典式设置属性"""
|
112
|
+
setattr(self, key, value)
|
113
|
+
|
114
|
+
def __getitem__(self, key: str) -> Any:
|
115
|
+
"""支持字典式获取属性"""
|
116
|
+
return getattr(self, key, None)
|
117
|
+
|
118
|
+
def __str__(self) -> str:
|
119
|
+
"""返回种子的 JSON 字符串表示"""
|
120
|
+
return self.to_string
|
121
|
+
|
122
|
+
def __repr__(self) -> str:
|
123
|
+
"""返回种子的调试字符串表示"""
|
124
|
+
attrs = [f"{k}={v}" for k, v in self.__dict__.items()]
|
125
|
+
return f"{self.__class__.__name__}({', '.join(attrs)})"
|
126
|
+
|
127
|
+
def _init_seed(self, seed_info: Dict[str, Any]):
|
128
|
+
"""初始化种子数据"""
|
129
|
+
for key, value in seed_info.items():
|
130
|
+
if key not in self.__SEED_PARAMS__:
|
131
|
+
self.__setattr__(key, value)
|
132
|
+
|
133
|
+
def _init_id(self, sid: Optional[str]):
|
134
|
+
"""初始化种子的唯一标识符"""
|
135
|
+
if not sid:
|
136
|
+
sid = hashlib.md5(self.to_string.encode()).hexdigest()
|
137
|
+
self.__setattr__("sid", sid)
|
138
|
+
|
139
|
+
@property
|
140
|
+
def to_dict(self) -> Dict[str, Any]:
|
141
|
+
"""返回种子的字典表示(不包含 params 属性)"""
|
142
|
+
seed = self.__dict__.copy()
|
143
|
+
seed.pop("params", None)
|
144
|
+
return seed
|
145
|
+
|
146
|
+
@property
|
147
|
+
def to_string(self) -> str:
|
148
|
+
"""返回种子的紧凑 JSON 字符串表示"""
|
149
|
+
return json.dumps(
|
150
|
+
self.to_dict,
|
151
|
+
ensure_ascii=False,
|
152
|
+
separators=(",", ":")
|
153
|
+
)
|
154
|
+
|
155
|
+
@property
|
156
|
+
def get_all(self) -> str:
|
157
|
+
"""返回种子的所有属性(包括 params)的 JSON 字符串表示"""
|
158
|
+
return json.dumps(
|
159
|
+
self.__dict__,
|
160
|
+
ensure_ascii=False,
|
161
|
+
separators=(",", ":")
|
162
|
+
)
|
@@ -57,7 +57,8 @@ class ApiDB:
|
|
57
57
|
return self._get_response(api="/zcard", params=dict(name=name))
|
58
58
|
|
59
59
|
def zadd(self, name, item: dict, **kwargs):
|
60
|
-
|
60
|
+
if item:
|
61
|
+
return self._post_response(api="/zadd", data=dict(name=name, mapping=item, **kwargs))
|
61
62
|
|
62
63
|
def zrem(self, name, *values):
|
63
64
|
return self._post_response(api="/zrem", data=dict(name=name, values=values))
|
@@ -120,7 +120,8 @@ class RedisDB:
|
|
120
120
|
def zadd(self, name, item: dict, **kwargs):
|
121
121
|
# with self.get_connection() as client:
|
122
122
|
# return client.zadd(name, item, **kwargs)
|
123
|
-
|
123
|
+
if item:
|
124
|
+
return self.execute_command("zadd", name, item, **kwargs)
|
124
125
|
|
125
126
|
def zrem(self, name, *value):
|
126
127
|
# with self.get_connection() as client:
|