cobweb-launcher 1.2.25__py3-none-any.whl → 3.2.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/__init__.py +4 -1
- cobweb/base/__init__.py +3 -3
- cobweb/base/common_queue.py +37 -16
- cobweb/base/item.py +35 -16
- cobweb/base/{log.py → logger.py} +3 -3
- cobweb/base/request.py +741 -54
- cobweb/base/response.py +380 -13
- cobweb/base/seed.py +96 -48
- cobweb/base/task_queue.py +180 -0
- cobweb/base/test.py +257 -0
- cobweb/constant.py +10 -1
- cobweb/crawlers/crawler.py +12 -155
- cobweb/db/api_db.py +3 -2
- cobweb/db/redis_db.py +117 -28
- cobweb/launchers/__init__.py +4 -3
- cobweb/launchers/distributor.py +141 -0
- cobweb/launchers/launcher.py +95 -157
- cobweb/launchers/uploader.py +68 -0
- cobweb/log_dots/__init__.py +2 -0
- cobweb/log_dots/dot.py +258 -0
- cobweb/log_dots/loghub_dot.py +53 -0
- cobweb/pipelines/__init__.py +1 -1
- cobweb/pipelines/pipeline.py +5 -55
- cobweb/pipelines/pipeline_csv.py +25 -0
- cobweb/pipelines/pipeline_loghub.py +32 -12
- cobweb/schedulers/__init__.py +1 -0
- cobweb/schedulers/scheduler.py +66 -0
- cobweb/schedulers/scheduler_with_redis.py +189 -0
- cobweb/setting.py +27 -40
- cobweb/utils/__init__.py +5 -3
- cobweb/utils/bloom.py +58 -58
- cobweb/{base → utils}/decorators.py +14 -12
- cobweb/utils/dotting.py +300 -0
- cobweb/utils/oss.py +113 -94
- cobweb/utils/tools.py +3 -15
- {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-3.2.20.dist-info}/METADATA +31 -43
- cobweb_launcher-3.2.20.dist-info/RECORD +44 -0
- {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-3.2.20.dist-info}/WHEEL +1 -1
- cobweb/crawlers/base_crawler.py +0 -144
- cobweb/crawlers/file_crawler.py +0 -98
- cobweb/launchers/launcher_air.py +0 -88
- cobweb/launchers/launcher_api.py +0 -221
- cobweb/launchers/launcher_pro.py +0 -222
- cobweb/pipelines/base_pipeline.py +0 -54
- cobweb/pipelines/loghub_pipeline.py +0 -34
- cobweb/pipelines/pipeline_console.py +0 -22
- cobweb_launcher-1.2.25.dist-info/RECORD +0 -40
- {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-3.2.20.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-3.2.20.dist-info}/top_level.txt +0 -0
cobweb/base/response.py
CHANGED
|
@@ -1,23 +1,390 @@
|
|
|
1
|
+
from typing import Any, Dict, Union
|
|
1
2
|
|
|
2
3
|
|
|
3
4
|
class Response:
|
|
5
|
+
"""
|
|
6
|
+
响应对象类,支持动态属性访问和字典式操作
|
|
7
|
+
|
|
8
|
+
优化特性:
|
|
9
|
+
1. 使用 __slots__ 减少内存占用
|
|
10
|
+
2. 缓存 to_dict 结果提高性能
|
|
11
|
+
3. 更好的错误处理和类型检查
|
|
12
|
+
4. 支持弱引用避免循环引用
|
|
13
|
+
5. 线程安全的属性访问
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__slots__ = ('_seed', '_response', '_extra_attrs', '_dict_cache', '__weakref__')
|
|
4
17
|
|
|
5
18
|
def __init__(
|
|
6
19
|
self,
|
|
7
|
-
seed,
|
|
8
|
-
response,
|
|
9
|
-
**kwargs
|
|
10
|
-
):
|
|
11
|
-
|
|
12
|
-
|
|
20
|
+
seed: Any,
|
|
21
|
+
response: Any,
|
|
22
|
+
**kwargs: Any
|
|
23
|
+
) -> None:
|
|
24
|
+
"""
|
|
25
|
+
初始化 Response 对象
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
seed: 种子对象,用于动态属性访问
|
|
29
|
+
response: 响应对象
|
|
30
|
+
**kwargs: 额外的属性
|
|
31
|
+
"""
|
|
32
|
+
# 使用私有属性避免与动态属性冲突
|
|
33
|
+
object.__setattr__(self, '_seed', seed)
|
|
34
|
+
object.__setattr__(self, '_response', response)
|
|
35
|
+
object.__setattr__(self, '_extra_attrs', kwargs.copy())
|
|
36
|
+
object.__setattr__(self, '_dict_cache', None)
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def seed(self) -> Any:
|
|
40
|
+
"""获取种子对象"""
|
|
41
|
+
return self._seed
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def response(self) -> Any:
|
|
45
|
+
"""获取响应对象"""
|
|
46
|
+
return self._response
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
50
|
+
"""
|
|
51
|
+
转换为字典格式,使用缓存提高性能
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
包含所有属性的字典
|
|
55
|
+
"""
|
|
56
|
+
if self._dict_cache is None:
|
|
57
|
+
_dict = self._extra_attrs.copy()
|
|
58
|
+
|
|
59
|
+
# 安全地获取 seed 的字典表示
|
|
60
|
+
if hasattr(self._seed, 'to_dict'):
|
|
61
|
+
if callable(self._seed.to_dict):
|
|
62
|
+
try:
|
|
63
|
+
_dict.update(self._seed.to_dict())
|
|
64
|
+
except Exception as e:
|
|
65
|
+
# 记录错误但不中断执行
|
|
66
|
+
_dict['_seed_to_dict_error'] = str(e)
|
|
67
|
+
else:
|
|
68
|
+
_dict.update(self._seed.to_dict)
|
|
69
|
+
elif hasattr(self._seed, '__dict__'):
|
|
70
|
+
_dict.update(self._seed.__dict__)
|
|
71
|
+
elif isinstance(self._seed, dict):
|
|
72
|
+
_dict.update(self._seed)
|
|
73
|
+
|
|
74
|
+
# 缓存结果
|
|
75
|
+
object.__setattr__(self, '_dict_cache', _dict)
|
|
76
|
+
|
|
77
|
+
return self._dict_cache.copy() # 返回副本避免外部修改
|
|
78
|
+
|
|
79
|
+
def invalidate_cache(self) -> None:
|
|
80
|
+
"""清除缓存,当对象状态改变时调用"""
|
|
81
|
+
object.__setattr__(self, '_dict_cache', None)
|
|
82
|
+
|
|
83
|
+
def __getattr__(self, name: str) -> Any:
|
|
84
|
+
"""
|
|
85
|
+
动态获取属性
|
|
86
|
+
|
|
87
|
+
优先级:
|
|
88
|
+
1. _extra_attrs 中的属性
|
|
89
|
+
2. seed 对象的属性
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
name: 属性名
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
属性值
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
AttributeError: 当属性不存在时
|
|
99
|
+
"""
|
|
100
|
+
# 避免递归调用
|
|
101
|
+
if name.startswith('_'):
|
|
102
|
+
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
|
|
103
|
+
|
|
104
|
+
# 首先检查额外属性
|
|
105
|
+
if name in self._extra_attrs:
|
|
106
|
+
return self._extra_attrs[name]
|
|
107
|
+
|
|
108
|
+
# 然后检查 seed 对象
|
|
109
|
+
return self._get_from_seed(name)
|
|
110
|
+
|
|
111
|
+
def _get_from_seed(self, name: str) -> Any:
|
|
112
|
+
"""
|
|
113
|
+
从 seed 对象获取属性
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
name: 属性名
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
属性值
|
|
120
|
+
|
|
121
|
+
Raises:
|
|
122
|
+
AttributeError: 当属性不存在时
|
|
123
|
+
"""
|
|
124
|
+
if self._seed is None:
|
|
125
|
+
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}' (seed is None)")
|
|
126
|
+
|
|
127
|
+
# 尝试不同的访问方式
|
|
128
|
+
try:
|
|
129
|
+
# 方式1: 字典式访问
|
|
130
|
+
if hasattr(self._seed, '__getitem__'):
|
|
131
|
+
try:
|
|
132
|
+
return self._seed[name]
|
|
133
|
+
except (KeyError, TypeError):
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
# 方式2: 属性访问
|
|
137
|
+
if hasattr(self._seed, name):
|
|
138
|
+
return getattr(self._seed, name)
|
|
139
|
+
|
|
140
|
+
# 方式3: 如果 seed 是字典
|
|
141
|
+
if isinstance(self._seed, dict) and name in self._seed:
|
|
142
|
+
return self._seed[name]
|
|
143
|
+
|
|
144
|
+
except Exception as e:
|
|
145
|
+
raise AttributeError(
|
|
146
|
+
f"Error accessing '{name}' from seed: {e}"
|
|
147
|
+
) from e
|
|
148
|
+
|
|
149
|
+
# 属性不存在
|
|
150
|
+
raise AttributeError(
|
|
151
|
+
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
def __getitem__(self, key: str) -> Any:
|
|
155
|
+
"""
|
|
156
|
+
支持字典式访问
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
key: 键名
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
对应的值
|
|
163
|
+
"""
|
|
164
|
+
try:
|
|
165
|
+
return getattr(self, key)
|
|
166
|
+
except AttributeError:
|
|
167
|
+
raise KeyError(key)
|
|
168
|
+
|
|
169
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
170
|
+
"""
|
|
171
|
+
设置属性
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
name: 属性名
|
|
175
|
+
value: 属性值
|
|
176
|
+
"""
|
|
177
|
+
if name.startswith('_') or name in self.__slots__:
|
|
178
|
+
object.__setattr__(self, name, value)
|
|
179
|
+
else:
|
|
180
|
+
# 设置到额外属性中
|
|
181
|
+
self._extra_attrs[name] = value
|
|
182
|
+
# 清除缓存
|
|
183
|
+
self.invalidate_cache()
|
|
184
|
+
|
|
185
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
|
186
|
+
"""支持字典式设置"""
|
|
187
|
+
setattr(self, key, value)
|
|
188
|
+
|
|
189
|
+
def __delattr__(self, name: str) -> None:
|
|
190
|
+
"""
|
|
191
|
+
删除属性
|
|
13
192
|
|
|
14
|
-
|
|
15
|
-
|
|
193
|
+
Args:
|
|
194
|
+
name: 属性名
|
|
195
|
+
"""
|
|
196
|
+
if name.startswith('_') or name in self.__slots__:
|
|
197
|
+
object.__delattr__(self, name)
|
|
198
|
+
elif name in self._extra_attrs:
|
|
199
|
+
del self._extra_attrs[name]
|
|
200
|
+
self.invalidate_cache()
|
|
201
|
+
else:
|
|
202
|
+
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
|
|
203
|
+
|
|
204
|
+
def __delitem__(self, key: str) -> None:
|
|
205
|
+
"""支持字典式删除"""
|
|
206
|
+
delattr(self, key)
|
|
207
|
+
|
|
208
|
+
def __contains__(self, key: str) -> bool:
|
|
209
|
+
"""
|
|
210
|
+
检查是否包含某个属性
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
key: 属性名
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
是否包含该属性
|
|
217
|
+
"""
|
|
218
|
+
try:
|
|
219
|
+
getattr(self, key)
|
|
220
|
+
return True
|
|
221
|
+
except AttributeError:
|
|
222
|
+
return False
|
|
223
|
+
|
|
224
|
+
def __iter__(self):
|
|
225
|
+
"""支持迭代,返回所有属性名"""
|
|
226
|
+
return iter(self.to_dict.keys())
|
|
227
|
+
|
|
228
|
+
def keys(self):
|
|
229
|
+
"""返回所有属性名"""
|
|
230
|
+
return self.to_dict.keys()
|
|
231
|
+
|
|
232
|
+
def values(self):
|
|
233
|
+
"""返回所有属性值"""
|
|
234
|
+
return self.to_dict.values()
|
|
235
|
+
|
|
236
|
+
def items(self):
|
|
237
|
+
"""返回所有属性键值对"""
|
|
238
|
+
return self.to_dict.items()
|
|
239
|
+
|
|
240
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
241
|
+
"""
|
|
242
|
+
安全获取属性值
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
key: 属性名
|
|
246
|
+
default: 默认值
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
属性值或默认值
|
|
250
|
+
"""
|
|
251
|
+
try:
|
|
252
|
+
return getattr(self, key)
|
|
253
|
+
except AttributeError:
|
|
254
|
+
return default
|
|
255
|
+
|
|
256
|
+
def update(self, other: Union[Dict[str, Any], 'Response'], **kwargs: Any) -> None:
|
|
257
|
+
"""
|
|
258
|
+
更新属性
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
other: 字典或另一个 Response 对象
|
|
262
|
+
**kwargs: 额外的属性
|
|
263
|
+
"""
|
|
264
|
+
if isinstance(other, dict):
|
|
265
|
+
self._extra_attrs.update(other)
|
|
266
|
+
elif isinstance(other, Response):
|
|
267
|
+
self._extra_attrs.update(other._extra_attrs)
|
|
268
|
+
elif hasattr(other, 'items'):
|
|
269
|
+
self._extra_attrs.update(dict(other.items()))
|
|
270
|
+
|
|
271
|
+
self._extra_attrs.update(kwargs)
|
|
272
|
+
self.invalidate_cache()
|
|
273
|
+
|
|
274
|
+
def copy(self) -> 'Response':
|
|
275
|
+
"""
|
|
276
|
+
创建副本
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
新的 Response 对象
|
|
280
|
+
"""
|
|
281
|
+
return Response(self._seed, self._response, **self._extra_attrs)
|
|
282
|
+
|
|
283
|
+
def __repr__(self) -> str:
|
|
284
|
+
"""字符串表示"""
|
|
285
|
+
extra_attrs = ', '.join(f'{k}={v!r}' for k, v in list(self._extra_attrs.items())[:3])
|
|
286
|
+
if len(self._extra_attrs) > 3:
|
|
287
|
+
extra_attrs += f', ... (+{len(self._extra_attrs) - 3} more)'
|
|
288
|
+
|
|
289
|
+
return f"{self.__class__.__name__}(seed={self._seed!r}, response={self._response!r}, {extra_attrs})"
|
|
290
|
+
|
|
291
|
+
def __str__(self) -> str:
|
|
292
|
+
"""用户友好的字符串表示"""
|
|
293
|
+
return f"{self.__class__.__name__} with {len(self._extra_attrs)} extra attributes"
|
|
294
|
+
|
|
295
|
+
def __eq__(self, other: Any) -> bool:
|
|
296
|
+
"""相等性比较"""
|
|
297
|
+
if not isinstance(other, Response):
|
|
298
|
+
return False
|
|
299
|
+
|
|
300
|
+
return (
|
|
301
|
+
self._seed == other._seed and
|
|
302
|
+
self._response == other._response and
|
|
303
|
+
self._extra_attrs == other._extra_attrs
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def __hash__(self) -> int:
|
|
307
|
+
"""哈希值计算"""
|
|
308
|
+
# 只对不可变部分计算哈希
|
|
309
|
+
try:
|
|
310
|
+
return hash((id(self._seed), id(self._response), tuple(sorted(self._extra_attrs.items()))))
|
|
311
|
+
except TypeError:
|
|
312
|
+
# 如果包含不可哈希的值,使用对象ID
|
|
313
|
+
return hash(id(self))
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
# 扩展版本:支持更多高级特性
|
|
317
|
+
class AdvancedResponse(Response):
|
|
318
|
+
"""
|
|
319
|
+
高级响应类,提供更多功能
|
|
320
|
+
"""
|
|
321
|
+
|
|
322
|
+
__slots__ = ('_observers', '_frozen')
|
|
323
|
+
|
|
324
|
+
def __init__(self, seed: Any, response: Any, frozen: bool = False, **kwargs: Any) -> None:
|
|
325
|
+
"""
|
|
326
|
+
初始化高级响应对象
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
seed: 种子对象
|
|
330
|
+
response: 响应对象
|
|
331
|
+
frozen: 是否冻结对象(不允许修改)
|
|
332
|
+
**kwargs: 额外属性
|
|
333
|
+
"""
|
|
334
|
+
super().__init__(seed, response, **kwargs)
|
|
335
|
+
object.__setattr__(self, '_observers', [])
|
|
336
|
+
object.__setattr__(self, '_frozen', frozen)
|
|
337
|
+
|
|
338
|
+
def freeze(self) -> None:
|
|
339
|
+
"""冻结对象,不允许修改"""
|
|
340
|
+
object.__setattr__(self, '_frozen', True)
|
|
341
|
+
|
|
342
|
+
def unfreeze(self) -> None:
|
|
343
|
+
"""解冻对象,允许修改"""
|
|
344
|
+
object.__setattr__(self, '_frozen', False)
|
|
16
345
|
|
|
17
346
|
@property
|
|
18
|
-
def
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
347
|
+
def is_frozen(self) -> bool:
|
|
348
|
+
"""检查对象是否被冻结"""
|
|
349
|
+
return self._frozen
|
|
350
|
+
|
|
351
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
352
|
+
"""设置属性(支持冻结检查)"""
|
|
353
|
+
if self._frozen and not name.startswith('_'):
|
|
354
|
+
raise AttributeError(f"Cannot modify frozen {self.__class__.__name__} object")
|
|
355
|
+
|
|
356
|
+
old_value = getattr(self, name, None)
|
|
357
|
+
super().__setattr__(name, value)
|
|
358
|
+
|
|
359
|
+
# 通知观察者
|
|
360
|
+
self._notify_observers(name, old_value, value)
|
|
361
|
+
|
|
362
|
+
def add_observer(self, callback: callable) -> None:
|
|
363
|
+
"""
|
|
364
|
+
添加观察者
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
callback: 回调函数,签名为 callback(attr_name, old_value, new_value)
|
|
368
|
+
"""
|
|
369
|
+
if callback not in self._observers:
|
|
370
|
+
self._observers.append(callback)
|
|
371
|
+
|
|
372
|
+
def remove_observer(self, callback: callable) -> None:
|
|
373
|
+
"""
|
|
374
|
+
移除观察者
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
callback: 要移除的回调函数
|
|
378
|
+
"""
|
|
379
|
+
if callback in self._observers:
|
|
380
|
+
self._observers.remove(callback)
|
|
381
|
+
|
|
382
|
+
def _notify_observers(self, attr_name: str, old_value: Any, new_value: Any) -> None:
|
|
383
|
+
"""通知所有观察者"""
|
|
384
|
+
for observer in self._observers:
|
|
385
|
+
try:
|
|
386
|
+
observer(attr_name, old_value, new_value)
|
|
387
|
+
except Exception as e:
|
|
388
|
+
# 记录错误但不中断执行
|
|
389
|
+
print(f"Observer error: {e}")
|
|
23
390
|
|
cobweb/base/seed.py
CHANGED
|
@@ -1,103 +1,151 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import time
|
|
3
3
|
import hashlib
|
|
4
|
+
from typing import Any, Dict, Optional, Union
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class SeedParams:
|
|
8
|
+
"""
|
|
9
|
+
定义种子参数类,用于存储种子的元信息。
|
|
10
|
+
"""
|
|
7
11
|
|
|
8
|
-
def __init__(
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
retry: Optional[int] = None,
|
|
15
|
+
priority: Optional[int] = None,
|
|
16
|
+
seed_version: Optional[int] = None,
|
|
17
|
+
seed_status: Optional[str] = None,
|
|
18
|
+
proxy_type: Optional[str] = None,
|
|
19
|
+
proxy: Optional[str] = None,
|
|
20
|
+
):
|
|
9
21
|
self.retry = retry or 0
|
|
10
22
|
self.priority = priority or 300
|
|
11
23
|
self.seed_version = seed_version or int(time.time())
|
|
12
24
|
self.seed_status = seed_status
|
|
25
|
+
self.proxy_type = proxy_type
|
|
26
|
+
self.proxy = proxy
|
|
27
|
+
|
|
28
|
+
def __getattr__(self, name: str) -> Any:
|
|
29
|
+
"""动态获取未定义的属性,返回 None"""
|
|
30
|
+
return None
|
|
13
31
|
|
|
14
32
|
|
|
15
33
|
class Seed:
|
|
34
|
+
"""
|
|
35
|
+
种子类,用于表示一个种子对象,包含种子的基本属性和方法。
|
|
36
|
+
"""
|
|
16
37
|
|
|
17
38
|
__SEED_PARAMS__ = [
|
|
18
39
|
"retry",
|
|
19
40
|
"priority",
|
|
20
41
|
"seed_version",
|
|
21
|
-
"seed_status"
|
|
42
|
+
"seed_status",
|
|
43
|
+
"proxy_type",
|
|
44
|
+
"proxy",
|
|
22
45
|
]
|
|
23
46
|
|
|
24
47
|
def __init__(
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
48
|
+
self,
|
|
49
|
+
seed: Union[str, bytes, Dict[str, Any]] = None,
|
|
50
|
+
sid: Optional[str] = None,
|
|
51
|
+
retry: Optional[int] = None,
|
|
52
|
+
priority: Optional[int] = None,
|
|
53
|
+
seed_version: Optional[int] = None,
|
|
54
|
+
seed_status: Optional[str] = None,
|
|
55
|
+
proxy_type: Optional[str] = None,
|
|
56
|
+
proxy: Optional[str] = None,
|
|
57
|
+
**kwargs,
|
|
33
58
|
):
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
59
|
+
"""
|
|
60
|
+
初始化种子对象。
|
|
61
|
+
:param seed: 种子数据,可以是字符串、字节或字典。
|
|
62
|
+
:param sid: 种子的唯一标识符。
|
|
63
|
+
:param retry: 重试次数。
|
|
64
|
+
:param priority: 优先级。
|
|
65
|
+
:param seed_version: 种子版本。
|
|
66
|
+
:param seed_status: 种子状态。
|
|
67
|
+
:param proxy_type: 代理类型。
|
|
68
|
+
:param proxy: 代理地址。
|
|
69
|
+
:param kwargs: 其他扩展参数。
|
|
70
|
+
"""
|
|
71
|
+
# 初始化种子数据
|
|
72
|
+
if seed:
|
|
73
|
+
if isinstance(seed, (str, bytes)):
|
|
74
|
+
try:
|
|
75
|
+
item = json.loads(seed)
|
|
76
|
+
self._init_seed(item)
|
|
77
|
+
except json.JSONDecodeError as e:
|
|
78
|
+
raise ValueError(f"Invalid JSON format for seed: {seed}") from e
|
|
79
|
+
elif isinstance(seed, dict):
|
|
80
|
+
self._init_seed(seed)
|
|
81
|
+
else:
|
|
82
|
+
raise TypeError(f"Seed type error, must be str, bytes, or dict! Seed: {seed}")
|
|
83
|
+
|
|
84
|
+
# 初始化种子参数
|
|
49
85
|
seed_params = {
|
|
50
86
|
"retry": retry,
|
|
51
87
|
"priority": priority,
|
|
52
88
|
"seed_version": seed_version,
|
|
53
89
|
"seed_status": seed_status,
|
|
90
|
+
"proxy_type": proxy_type,
|
|
91
|
+
"proxy": proxy,
|
|
54
92
|
}
|
|
55
93
|
|
|
94
|
+
# 合并扩展参数
|
|
56
95
|
if kwargs:
|
|
57
96
|
self._init_seed(kwargs)
|
|
58
|
-
seed_params.update({
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
})
|
|
97
|
+
seed_params.update({k: v for k, v in kwargs.items() if k in self.__SEED_PARAMS__})
|
|
98
|
+
|
|
99
|
+
# 初始化唯一标识符
|
|
62
100
|
if sid or not getattr(self, "sid", None):
|
|
63
101
|
self._init_id(sid)
|
|
102
|
+
|
|
103
|
+
# 设置参数对象
|
|
64
104
|
self.params = SeedParams(**seed_params)
|
|
65
105
|
|
|
66
|
-
def __getattr__(self, name):
|
|
106
|
+
def __getattr__(self, name: str) -> Any:
|
|
107
|
+
"""动态获取未定义的属性,返回 None"""
|
|
67
108
|
return None
|
|
68
109
|
|
|
69
|
-
def __setitem__(self, key, value):
|
|
110
|
+
def __setitem__(self, key: str, value: Any):
|
|
111
|
+
"""支持字典式设置属性"""
|
|
70
112
|
setattr(self, key, value)
|
|
71
113
|
|
|
72
|
-
def __getitem__(self,
|
|
73
|
-
|
|
114
|
+
def __getitem__(self, key: str) -> Any:
|
|
115
|
+
"""支持字典式获取属性"""
|
|
116
|
+
return getattr(self, key, None)
|
|
74
117
|
|
|
75
|
-
def __str__(self):
|
|
76
|
-
|
|
118
|
+
def __str__(self) -> str:
|
|
119
|
+
"""返回种子的 JSON 字符串表示"""
|
|
120
|
+
return self.to_string
|
|
77
121
|
|
|
78
|
-
def __repr__(self):
|
|
79
|
-
|
|
80
|
-
|
|
122
|
+
def __repr__(self) -> str:
|
|
123
|
+
"""返回种子的调试字符串表示"""
|
|
124
|
+
attrs = [f"{k}={v}" for k, v in self.__dict__.items()]
|
|
125
|
+
return f"{self.__class__.__name__}({', '.join(attrs)})"
|
|
81
126
|
|
|
82
|
-
def _init_seed(self, seed_info:
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
127
|
+
def _init_seed(self, seed_info: Dict[str, Any]):
|
|
128
|
+
"""初始化种子数据"""
|
|
129
|
+
for key, value in seed_info.items():
|
|
130
|
+
if key not in self.__SEED_PARAMS__:
|
|
131
|
+
self.__setattr__(key, value)
|
|
86
132
|
|
|
87
|
-
def _init_id(self, sid):
|
|
133
|
+
def _init_id(self, sid: Optional[str]):
|
|
134
|
+
"""初始化种子的唯一标识符"""
|
|
88
135
|
if not sid:
|
|
89
136
|
sid = hashlib.md5(self.to_string.encode()).hexdigest()
|
|
90
137
|
self.__setattr__("sid", sid)
|
|
91
138
|
|
|
92
139
|
@property
|
|
93
|
-
def to_dict(self) ->
|
|
140
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
141
|
+
"""返回种子的字典表示(不包含 params 属性)"""
|
|
94
142
|
seed = self.__dict__.copy()
|
|
95
|
-
|
|
96
|
-
del seed["params"]
|
|
143
|
+
seed.pop("params", None)
|
|
97
144
|
return seed
|
|
98
145
|
|
|
99
146
|
@property
|
|
100
147
|
def to_string(self) -> str:
|
|
148
|
+
"""返回种子的紧凑 JSON 字符串表示"""
|
|
101
149
|
return json.dumps(
|
|
102
150
|
self.to_dict,
|
|
103
151
|
ensure_ascii=False,
|
|
@@ -105,10 +153,10 @@ class Seed:
|
|
|
105
153
|
)
|
|
106
154
|
|
|
107
155
|
@property
|
|
108
|
-
def get_all(self):
|
|
156
|
+
def get_all(self) -> str:
|
|
157
|
+
"""返回种子的所有属性(包括 params)的 JSON 字符串表示"""
|
|
109
158
|
return json.dumps(
|
|
110
159
|
self.__dict__,
|
|
111
160
|
ensure_ascii=False,
|
|
112
161
|
separators=(",", ":")
|
|
113
162
|
)
|
|
114
|
-
|