cobweb-launcher 3.1.13__py3-none-any.whl → 3.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/base/common_queue.py +37 -16
- cobweb/base/item.py +28 -23
- cobweb/base/request.py +36 -30
- cobweb/base/seed.py +91 -51
- cobweb/db/api_db.py +2 -1
- cobweb/db/redis_db.py +2 -1
- cobweb/launchers/launcher.py +27 -24
- cobweb/schedulers/scheduler.py +28 -11
- cobweb/schedulers/scheduler_with_redis.py +86 -82
- cobweb/setting.py +6 -6
- cobweb/utils/decorators.py +1 -2
- {cobweb_launcher-3.1.13.dist-info → cobweb_launcher-3.1.14.dist-info}/METADATA +15 -30
- {cobweb_launcher-3.1.13.dist-info → cobweb_launcher-3.1.14.dist-info}/RECORD +16 -16
- {cobweb_launcher-3.1.13.dist-info → cobweb_launcher-3.1.14.dist-info}/LICENSE +0 -0
- {cobweb_launcher-3.1.13.dist-info → cobweb_launcher-3.1.14.dist-info}/WHEEL +0 -0
- {cobweb_launcher-3.1.13.dist-info → cobweb_launcher-3.1.14.dist-info}/top_level.txt +0 -0
cobweb/base/common_queue.py
CHANGED
@@ -1,30 +1,51 @@
|
|
1
1
|
from collections import deque
|
2
|
+
from typing import Any, Iterable, Union
|
2
3
|
|
3
4
|
|
4
5
|
class Queue:
|
5
|
-
|
6
6
|
def __init__(self):
|
7
|
+
"""初始化队列"""
|
7
8
|
self._queue = deque()
|
8
9
|
|
9
10
|
@property
|
10
11
|
def length(self) -> int:
|
12
|
+
"""返回队列长度"""
|
11
13
|
return len(self._queue)
|
12
14
|
|
13
|
-
def
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
15
|
+
def empty(self) -> bool:
|
16
|
+
"""检查队列是否为空"""
|
17
|
+
return not self._queue
|
18
|
+
|
19
|
+
def push(self, data: Union[Any, Iterable], direct_insertion: bool = False):
|
20
|
+
"""
|
21
|
+
向队列中添加数据。
|
22
|
+
如果数据是可迭代对象(如列表、元组或集合),可以选择直接扩展队列。
|
23
|
+
"""
|
24
|
+
if not data:
|
25
|
+
return
|
26
|
+
|
27
|
+
if not direct_insertion and isinstance(data, (list, tuple, set)):
|
28
|
+
self._queue.extend(data)
|
29
|
+
else:
|
30
|
+
self._queue.append(data)
|
31
|
+
|
32
|
+
def pop(self) -> Any:
|
33
|
+
"""
|
34
|
+
从队列左侧弹出一个元素。
|
35
|
+
如果队列为空,返回 None。
|
36
|
+
"""
|
25
37
|
try:
|
26
|
-
return self._queue.popleft()
|
38
|
+
return self._queue.popleft()
|
27
39
|
except IndexError:
|
28
40
|
return None
|
29
|
-
|
30
|
-
|
41
|
+
|
42
|
+
def iter_items(self, limit: int = 1) -> Iterable:
|
43
|
+
"""
|
44
|
+
按指定数量从队列中弹出多个元素并生成它们。
|
45
|
+
如果队列为空或达到限制,则停止生成。
|
46
|
+
"""
|
47
|
+
for _ in range(limit):
|
48
|
+
item = self.pop()
|
49
|
+
if item is None:
|
50
|
+
break
|
51
|
+
yield item
|
cobweb/base/item.py
CHANGED
@@ -1,19 +1,22 @@
|
|
1
1
|
from .seed import Seed
|
2
|
+
from typing import Dict, Any
|
2
3
|
from collections import namedtuple
|
3
4
|
|
4
5
|
|
5
|
-
class
|
6
|
+
class ItemMeta(type):
|
6
7
|
|
7
|
-
def __new__(cls, name, bases, dct):
|
8
|
-
|
8
|
+
def __new__(cls, name: str, bases: tuple, dct: dict) -> type:
|
9
|
+
new_class = super().__new__(cls, name, bases, dct)
|
9
10
|
if name != "BaseItem":
|
10
|
-
table = getattr(
|
11
|
-
fields = getattr(
|
12
|
-
|
13
|
-
|
11
|
+
table = getattr(new_class, "__TABLE__")
|
12
|
+
fields = getattr(new_class, "__FIELDS__")
|
13
|
+
if not table or not fields:
|
14
|
+
raise ValueError(f"Missing required attributes '__TABLE__' or '__FIELDS__' in class {name}")
|
15
|
+
new_class.Data = namedtuple(table, fields)
|
16
|
+
return new_class
|
14
17
|
|
15
18
|
|
16
|
-
class BaseItem(metaclass=
|
19
|
+
class BaseItem(metaclass=ItemMeta):
|
17
20
|
|
18
21
|
__TABLE__ = ""
|
19
22
|
__FIELDS__ = ""
|
@@ -23,34 +26,36 @@ class BaseItem(metaclass=Item):
|
|
23
26
|
|
24
27
|
data = {}
|
25
28
|
for key, value in kwargs.items():
|
26
|
-
if key
|
27
|
-
self.__setattr__(key, value)
|
28
|
-
else:
|
29
|
+
if key in self.__FIELDS__:
|
29
30
|
data[key] = value
|
31
|
+
else:
|
32
|
+
setattr(self, key, value)
|
30
33
|
|
31
|
-
|
34
|
+
try:
|
35
|
+
self.data = self.Data(**data)
|
36
|
+
except TypeError as e:
|
37
|
+
raise ValueError(f"Invalid field values for Data: {e}") from e
|
32
38
|
|
33
39
|
@property
|
34
|
-
def to_dict(self):
|
40
|
+
def to_dict(self) -> Dict[str, Any]:
|
35
41
|
return self.data._asdict()
|
36
42
|
|
37
43
|
@property
|
38
|
-
def
|
39
|
-
return self.
|
44
|
+
def fields(self) -> tuple[str]:
|
45
|
+
return self.data._fields
|
40
46
|
|
41
47
|
@property
|
42
|
-
def
|
43
|
-
return self.
|
44
|
-
|
45
|
-
def __getattr__(self, name):
|
46
|
-
return None
|
48
|
+
def table(self) -> str:
|
49
|
+
return self.Data.__name__
|
47
50
|
|
48
|
-
def __setitem__(self, key, value):
|
51
|
+
def __setitem__(self, key: str, value: Any):
|
49
52
|
setattr(self, key, value)
|
50
53
|
|
51
|
-
def __getitem__(self,
|
52
|
-
return getattr(self,
|
54
|
+
def __getitem__(self, key: str) -> Any:
|
55
|
+
return getattr(self, key, None)
|
53
56
|
|
57
|
+
def __getattr__(self, name: str) -> Any:
|
58
|
+
return None
|
54
59
|
|
55
60
|
|
56
61
|
class CSVItem(BaseItem):
|
cobweb/base/request.py
CHANGED
@@ -1,8 +1,12 @@
|
|
1
1
|
import random
|
2
2
|
import requests
|
3
|
+
from typing import Any, Dict
|
3
4
|
|
4
5
|
|
5
6
|
class Request:
|
7
|
+
"""
|
8
|
+
请求类,用于封装 HTTP 请求并提供相关功能。
|
9
|
+
"""
|
6
10
|
|
7
11
|
__REQUEST_ATTRS__ = {
|
8
12
|
"params",
|
@@ -22,26 +26,35 @@ class Request:
|
|
22
26
|
}
|
23
27
|
|
24
28
|
def __init__(
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
29
|
+
self,
|
30
|
+
url: str,
|
31
|
+
seed: Any,
|
32
|
+
random_ua: bool = True,
|
33
|
+
check_status_code: bool = True,
|
34
|
+
**kwargs,
|
31
35
|
):
|
36
|
+
"""
|
37
|
+
初始化请求对象。
|
38
|
+
:param url: 请求的 URL。
|
39
|
+
:param seed: 种子对象或标识符。
|
40
|
+
:param random_ua: 是否使用随机 User-Agent,默认为 True。
|
41
|
+
:param check_status_code: 是否检查响应状态码,默认为 True。
|
42
|
+
:param kwargs: 其他扩展参数。
|
43
|
+
"""
|
32
44
|
self.url = url
|
33
45
|
self.seed = seed
|
34
46
|
self.check_status_code = check_status_code
|
35
|
-
self.request_setting = {}
|
47
|
+
self.request_setting: Dict[str, Any] = {}
|
36
48
|
|
37
|
-
for
|
38
|
-
if
|
39
|
-
self.request_setting[
|
40
|
-
|
41
|
-
|
49
|
+
for key, value in kwargs.items():
|
50
|
+
if key in self.__class__.__REQUEST_ATTRS__:
|
51
|
+
self.request_setting[key] = value
|
52
|
+
else:
|
53
|
+
setattr(self, key, value)
|
42
54
|
|
43
|
-
|
44
|
-
|
55
|
+
self.method = getattr(self, "method", None) or (
|
56
|
+
"POST" if self.request_setting.get("data") or self.request_setting.get("json") else "GET"
|
57
|
+
)
|
45
58
|
|
46
59
|
if random_ua:
|
47
60
|
self._build_header()
|
@@ -54,15 +67,14 @@ class Request:
|
|
54
67
|
v4 = random.randint(533, 605)
|
55
68
|
v5 = random.randint(1000, 6000)
|
56
69
|
v6 = random.randint(10, 80)
|
57
|
-
|
58
|
-
|
59
|
-
|
70
|
+
return (f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_{v1}_{v2}) "
|
71
|
+
f"AppleWebKit/{v4}.{v3} (KHTML, like Gecko) "
|
72
|
+
f"Chrome/105.0.0.0 Safari/{v4}.{v3} Edg/105.0.{v5}.{v6}")
|
60
73
|
|
61
74
|
def _build_header(self):
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
self.request_setting["headers"]["user-agent"] = self._random_ua
|
75
|
+
headers = self.request_setting.setdefault("headers", {})
|
76
|
+
if not headers.get("user-agent"):
|
77
|
+
headers["user-agent"] = self._random_ua
|
66
78
|
|
67
79
|
def download(self) -> requests.Response:
|
68
80
|
response = requests.request(self.method, self.url, **self.request_setting)
|
@@ -71,12 +83,6 @@ class Request:
|
|
71
83
|
return response
|
72
84
|
|
73
85
|
@property
|
74
|
-
def to_dict(self):
|
75
|
-
|
76
|
-
|
77
|
-
_dict.pop('seed')
|
78
|
-
_dict.pop('check_status_code')
|
79
|
-
_dict.pop('request_setting')
|
80
|
-
return _dict
|
81
|
-
|
82
|
-
|
86
|
+
def to_dict(self) -> Dict[str, Any]:
|
87
|
+
excluded_keys = {"url", "seed", "check_status_code", "request_setting"}
|
88
|
+
return {k: v for k, v in self.__dict__.items() if k not in excluded_keys}
|
cobweb/base/seed.py
CHANGED
@@ -1,11 +1,23 @@
|
|
1
1
|
import json
|
2
2
|
import time
|
3
3
|
import hashlib
|
4
|
+
from typing import Any, Dict, Optional, Union
|
4
5
|
|
5
6
|
|
6
7
|
class SeedParams:
|
8
|
+
"""
|
9
|
+
定义种子参数类,用于存储种子的元信息。
|
10
|
+
"""
|
7
11
|
|
8
|
-
def __init__(
|
12
|
+
def __init__(
|
13
|
+
self,
|
14
|
+
retry: Optional[int] = None,
|
15
|
+
priority: Optional[int] = None,
|
16
|
+
seed_version: Optional[int] = None,
|
17
|
+
seed_status: Optional[str] = None,
|
18
|
+
proxy_type: Optional[str] = None,
|
19
|
+
proxy: Optional[str] = None,
|
20
|
+
):
|
9
21
|
self.retry = retry or 0
|
10
22
|
self.priority = priority or 300
|
11
23
|
self.seed_version = seed_version or int(time.time())
|
@@ -13,8 +25,15 @@ class SeedParams:
|
|
13
25
|
self.proxy_type = proxy_type
|
14
26
|
self.proxy = proxy
|
15
27
|
|
28
|
+
def __getattr__(self, name: str) -> Any:
|
29
|
+
"""动态获取未定义的属性,返回 None"""
|
30
|
+
return None
|
31
|
+
|
16
32
|
|
17
33
|
class Seed:
|
34
|
+
"""
|
35
|
+
种子类,用于表示一个种子对象,包含种子的基本属性和方法。
|
36
|
+
"""
|
18
37
|
|
19
38
|
__SEED_PARAMS__ = [
|
20
39
|
"retry",
|
@@ -22,90 +41,111 @@ class Seed:
|
|
22
41
|
"seed_version",
|
23
42
|
"seed_status",
|
24
43
|
"proxy_type",
|
25
|
-
"proxy"
|
44
|
+
"proxy",
|
26
45
|
]
|
27
46
|
|
28
47
|
def __init__(
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
48
|
+
self,
|
49
|
+
seed: Union[str, bytes, Dict[str, Any]] = None,
|
50
|
+
sid: Optional[str] = None,
|
51
|
+
retry: Optional[int] = None,
|
52
|
+
priority: Optional[int] = None,
|
53
|
+
seed_version: Optional[int] = None,
|
54
|
+
seed_status: Optional[str] = None,
|
55
|
+
proxy_type: Optional[str] = None,
|
56
|
+
proxy: Optional[str] = None,
|
57
|
+
**kwargs,
|
39
58
|
):
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
59
|
+
"""
|
60
|
+
初始化种子对象。
|
61
|
+
:param seed: 种子数据,可以是字符串、字节或字典。
|
62
|
+
:param sid: 种子的唯一标识符。
|
63
|
+
:param retry: 重试次数。
|
64
|
+
:param priority: 优先级。
|
65
|
+
:param seed_version: 种子版本。
|
66
|
+
:param seed_status: 种子状态。
|
67
|
+
:param proxy_type: 代理类型。
|
68
|
+
:param proxy: 代理地址。
|
69
|
+
:param kwargs: 其他扩展参数。
|
70
|
+
"""
|
71
|
+
# 初始化种子数据
|
72
|
+
if seed:
|
73
|
+
if isinstance(seed, (str, bytes)):
|
74
|
+
try:
|
75
|
+
item = json.loads(seed)
|
76
|
+
self._init_seed(item)
|
77
|
+
except json.JSONDecodeError as e:
|
78
|
+
raise ValueError(f"Invalid JSON format for seed: {seed}") from e
|
79
|
+
elif isinstance(seed, dict):
|
80
|
+
self._init_seed(seed)
|
81
|
+
else:
|
82
|
+
raise TypeError(f"Seed type error, must be str, bytes, or dict! Seed: {seed}")
|
83
|
+
|
84
|
+
# 初始化种子参数
|
55
85
|
seed_params = {
|
56
86
|
"retry": retry,
|
57
87
|
"priority": priority,
|
58
88
|
"seed_version": seed_version,
|
59
89
|
"seed_status": seed_status,
|
60
90
|
"proxy_type": proxy_type,
|
61
|
-
"proxy": proxy
|
91
|
+
"proxy": proxy,
|
62
92
|
}
|
63
93
|
|
94
|
+
# 合并扩展参数
|
64
95
|
if kwargs:
|
65
96
|
self._init_seed(kwargs)
|
66
|
-
seed_params.update({
|
67
|
-
|
68
|
-
|
69
|
-
})
|
97
|
+
seed_params.update({k: v for k, v in kwargs.items() if k in self.__SEED_PARAMS__})
|
98
|
+
|
99
|
+
# 初始化唯一标识符
|
70
100
|
if sid or not getattr(self, "sid", None):
|
71
101
|
self._init_id(sid)
|
102
|
+
|
103
|
+
# 设置参数对象
|
72
104
|
self.params = SeedParams(**seed_params)
|
73
105
|
|
74
|
-
def __getattr__(self, name):
|
106
|
+
def __getattr__(self, name: str) -> Any:
|
107
|
+
"""动态获取未定义的属性,返回 None"""
|
75
108
|
return None
|
76
109
|
|
77
|
-
def __setitem__(self, key, value):
|
110
|
+
def __setitem__(self, key: str, value: Any):
|
111
|
+
"""支持字典式设置属性"""
|
78
112
|
setattr(self, key, value)
|
79
113
|
|
80
|
-
def __getitem__(self,
|
81
|
-
|
114
|
+
def __getitem__(self, key: str) -> Any:
|
115
|
+
"""支持字典式获取属性"""
|
116
|
+
return getattr(self, key, None)
|
82
117
|
|
83
|
-
def __str__(self):
|
84
|
-
|
118
|
+
def __str__(self) -> str:
|
119
|
+
"""返回种子的 JSON 字符串表示"""
|
120
|
+
return self.to_string
|
85
121
|
|
86
|
-
def __repr__(self):
|
87
|
-
|
88
|
-
|
122
|
+
def __repr__(self) -> str:
|
123
|
+
"""返回种子的调试字符串表示"""
|
124
|
+
attrs = [f"{k}={v}" for k, v in self.__dict__.items()]
|
125
|
+
return f"{self.__class__.__name__}({', '.join(attrs)})"
|
89
126
|
|
90
|
-
def _init_seed(self, seed_info:
|
91
|
-
|
92
|
-
|
93
|
-
|
127
|
+
def _init_seed(self, seed_info: Dict[str, Any]):
|
128
|
+
"""初始化种子数据"""
|
129
|
+
for key, value in seed_info.items():
|
130
|
+
if key not in self.__SEED_PARAMS__:
|
131
|
+
self.__setattr__(key, value)
|
94
132
|
|
95
|
-
def _init_id(self, sid):
|
133
|
+
def _init_id(self, sid: Optional[str]):
|
134
|
+
"""初始化种子的唯一标识符"""
|
96
135
|
if not sid:
|
97
136
|
sid = hashlib.md5(self.to_string.encode()).hexdigest()
|
98
137
|
self.__setattr__("sid", sid)
|
99
138
|
|
100
139
|
@property
|
101
|
-
def to_dict(self) ->
|
140
|
+
def to_dict(self) -> Dict[str, Any]:
|
141
|
+
"""返回种子的字典表示(不包含 params 属性)"""
|
102
142
|
seed = self.__dict__.copy()
|
103
|
-
|
104
|
-
del seed["params"]
|
143
|
+
seed.pop("params", None)
|
105
144
|
return seed
|
106
145
|
|
107
146
|
@property
|
108
147
|
def to_string(self) -> str:
|
148
|
+
"""返回种子的紧凑 JSON 字符串表示"""
|
109
149
|
return json.dumps(
|
110
150
|
self.to_dict,
|
111
151
|
ensure_ascii=False,
|
@@ -113,10 +153,10 @@ class Seed:
|
|
113
153
|
)
|
114
154
|
|
115
155
|
@property
|
116
|
-
def get_all(self):
|
156
|
+
def get_all(self) -> str:
|
157
|
+
"""返回种子的所有属性(包括 params)的 JSON 字符串表示"""
|
117
158
|
return json.dumps(
|
118
159
|
self.__dict__,
|
119
160
|
ensure_ascii=False,
|
120
161
|
separators=(",", ":")
|
121
162
|
)
|
122
|
-
|
cobweb/db/api_db.py
CHANGED
@@ -57,7 +57,8 @@ class ApiDB:
|
|
57
57
|
return self._get_response(api="/zcard", params=dict(name=name))
|
58
58
|
|
59
59
|
def zadd(self, name, item: dict, **kwargs):
|
60
|
-
|
60
|
+
if item:
|
61
|
+
return self._post_response(api="/zadd", data=dict(name=name, mapping=item, **kwargs))
|
61
62
|
|
62
63
|
def zrem(self, name, *values):
|
63
64
|
return self._post_response(api="/zrem", data=dict(name=name, values=values))
|
cobweb/db/redis_db.py
CHANGED
@@ -120,7 +120,8 @@ class RedisDB:
|
|
120
120
|
def zadd(self, name, item: dict, **kwargs):
|
121
121
|
# with self.get_connection() as client:
|
122
122
|
# return client.zadd(name, item, **kwargs)
|
123
|
-
|
123
|
+
if item:
|
124
|
+
return self.execute_command("zadd", name, item, **kwargs)
|
124
125
|
|
125
126
|
def zrem(self, name, *value):
|
126
127
|
# with self.get_connection() as client:
|
cobweb/launchers/launcher.py
CHANGED
@@ -9,6 +9,7 @@ from cobweb.base import Seed, Queue, logger
|
|
9
9
|
from cobweb.utils.tools import dynamic_load_class
|
10
10
|
from cobweb.launchers.distributor import Distributor
|
11
11
|
from cobweb.launchers.uploader import Uploader
|
12
|
+
from typing import Optional, Union, Dict, Any, Callable
|
12
13
|
|
13
14
|
|
14
15
|
class Launcher:
|
@@ -18,8 +19,8 @@ class Launcher:
|
|
18
19
|
_DONE_QUEUE_ = Queue()
|
19
20
|
_UPLOAD_QUEUE_ = Queue()
|
20
21
|
|
21
|
-
|
22
|
-
|
22
|
+
__REGISTER_FUNC__: Dict[str, Callable] = {}
|
23
|
+
__WORKER_THREAD__: Dict[str, threading.Thread] = {}
|
23
24
|
|
24
25
|
def __init__(self, task, project, custom_setting=None, **kwargs):
|
25
26
|
super().__init__()
|
@@ -31,24 +32,10 @@ class Launcher:
|
|
31
32
|
self._stop = threading.Event() # 结束事件
|
32
33
|
self._pause = threading.Event() # 暂停事件
|
33
34
|
|
34
|
-
_setting =
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
_setting = custom_setting
|
39
|
-
else:
|
40
|
-
if isinstance(custom_setting, str):
|
41
|
-
custom_setting = importlib.import_module(custom_setting)
|
42
|
-
if not inspect.ismodule(custom_setting):
|
43
|
-
raise Exception
|
44
|
-
for k, v in custom_setting.__dict__.items():
|
45
|
-
if not k.startswith("__") and not inspect.ismodule(v):
|
46
|
-
_setting[k] = v
|
47
|
-
|
48
|
-
_setting.update(**kwargs)
|
49
|
-
|
50
|
-
for k, v in _setting.items():
|
51
|
-
setattr(setting, k.upper(), v)
|
35
|
+
_setting = self._load_custom_settings(custom_setting)
|
36
|
+
_setting.update(kwargs)
|
37
|
+
for key, value in _setting.items():
|
38
|
+
setattr(setting, key.upper(), value)
|
52
39
|
|
53
40
|
self._done_model = setting.DONE_MODEL
|
54
41
|
self._task_model = setting.TASK_MODEL
|
@@ -57,6 +44,23 @@ class Launcher:
|
|
57
44
|
self.SpiderCrawler = dynamic_load_class(setting.CRAWLER)
|
58
45
|
self.SpiderPipeline = dynamic_load_class(setting.PIPELINE)
|
59
46
|
|
47
|
+
@staticmethod
|
48
|
+
def _load_custom_settings(custom_setting: Optional[Union[str, Dict]]) -> Dict[str, Any]:
|
49
|
+
_setting = {}
|
50
|
+
if custom_setting:
|
51
|
+
if isinstance(custom_setting, dict):
|
52
|
+
_setting = custom_setting
|
53
|
+
elif isinstance(custom_setting, str):
|
54
|
+
module = importlib.import_module(custom_setting)
|
55
|
+
_setting = {
|
56
|
+
k: v
|
57
|
+
for k, v in module.__dict__.items()
|
58
|
+
if not k.startswith("__") and not inspect.ismodule(v)
|
59
|
+
}
|
60
|
+
else:
|
61
|
+
raise ValueError("custom_setting must be a dictionary or a module path.")
|
62
|
+
return _setting
|
63
|
+
|
60
64
|
@property
|
61
65
|
def request(self):
|
62
66
|
"""
|
@@ -102,18 +106,17 @@ class Launcher:
|
|
102
106
|
self.SpiderCrawler.parse = func
|
103
107
|
return decorator
|
104
108
|
|
105
|
-
def start_seeds(self, seeds: list):
|
109
|
+
def start_seeds(self, seeds: list[Union[str, Dict]]) -> list[Seed]:
|
106
110
|
seed_list = [Seed(seed) for seed in seeds]
|
107
111
|
self._TODO_QUEUE_.push(seed_list)
|
108
112
|
return seed_list
|
109
113
|
|
110
|
-
def _register(self, func, tag: str = "launcher"):
|
111
|
-
name = tag
|
114
|
+
def _register(self, func: Callable, tag: str = "launcher"):
|
115
|
+
name = f"{tag}:{func.__name__}_{uuid.uuid4()}"
|
112
116
|
self.__REGISTER_FUNC__[name] = func
|
113
117
|
if not self.__WORKER_THREAD__.get(name):
|
114
118
|
worker_thread = threading.Thread(name=name, target=func)
|
115
119
|
self.__WORKER_THREAD__[name] = worker_thread
|
116
|
-
# worker_thread.start()
|
117
120
|
|
118
121
|
def _monitor(self):
|
119
122
|
while not self._stop.is_set():
|
cobweb/schedulers/scheduler.py
CHANGED
@@ -8,6 +8,7 @@ from abc import ABC, abstractmethod
|
|
8
8
|
|
9
9
|
class Scheduler(ABC, threading.Thread):
|
10
10
|
|
11
|
+
__WORKING_ITEMS__ = {}
|
11
12
|
__LAUNCHER_FUNC__ = ["_reset", "_scheduler", "_insert", "_refresh", "_delete"]
|
12
13
|
|
13
14
|
def __init__(
|
@@ -49,19 +50,35 @@ class Scheduler(ABC, threading.Thread):
|
|
49
50
|
self.upload = upload
|
50
51
|
|
51
52
|
self.callback_register = callback_register
|
52
|
-
|
53
|
-
self.working_seeds = dict()
|
53
|
+
self.lock = threading.Lock()
|
54
54
|
|
55
55
|
def is_empty(self):
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
56
|
+
return all(queue.empty() for queue in (self.new, self.todo, self.done, self.upload))
|
57
|
+
|
58
|
+
def set_working_items(self, item_info: dict = None):
|
59
|
+
if not item_info:
|
60
|
+
return
|
61
|
+
with self.lock:
|
62
|
+
self.__WORKING_ITEMS__.update(item_info)
|
63
|
+
|
64
|
+
def get_working_items(self) -> dict:
|
65
|
+
with self.lock:
|
66
|
+
return self.__WORKING_ITEMS__.copy()
|
67
|
+
|
68
|
+
def remove_working_items(self, items: list[str] = None):
|
69
|
+
if not items:
|
70
|
+
return
|
71
|
+
with self.lock:
|
72
|
+
for item in items:
|
73
|
+
self.__WORKING_ITEMS__.pop(item, None)
|
74
|
+
|
75
|
+
def get_working_items_count(self) -> int:
|
76
|
+
with self.lock:
|
77
|
+
return len(self.__WORKING_ITEMS__)
|
78
|
+
|
79
|
+
def clear_working_items(self):
|
80
|
+
with self.lock:
|
81
|
+
self.__WORKING_ITEMS__.clear()
|
65
82
|
|
66
83
|
@abstractmethod
|
67
84
|
def reset(self):
|
@@ -25,28 +25,26 @@ class RedisScheduler(Scheduler):
|
|
25
25
|
callback_register: Callable
|
26
26
|
):
|
27
27
|
super().__init__(task, project, stop, pause, new, todo, done, upload, callback_register)
|
28
|
-
self.todo_key = "{
|
29
|
-
self.done_key = "{
|
30
|
-
self.fail_key = "{
|
31
|
-
self.heartbeat_key = "heartbeat
|
32
|
-
self.speed_control_key = "speed_control
|
33
|
-
self.reset_lock_key = "lock:reset
|
34
|
-
# self.redis_queue_empty_event = threading.Event()
|
28
|
+
self.todo_key = f"{{{project}:{task}}}:todo"
|
29
|
+
self.done_key = f"{{{project}:{task}}}:done"
|
30
|
+
self.fail_key = f"{{{project}:{task}}}:fail"
|
31
|
+
self.heartbeat_key = f"heartbeat:{project}_{task}"
|
32
|
+
self.speed_control_key = f"speed_control:{project}_{task}"
|
33
|
+
self.reset_lock_key = f"lock:reset:{project}_{task}"
|
35
34
|
self.db = ApiDB() if use_api else RedisDB()
|
36
35
|
|
37
|
-
@check_pause
|
38
36
|
def reset(self):
|
39
37
|
"""
|
40
38
|
检查过期种子,重新添加到redis缓存中
|
41
39
|
"""
|
42
|
-
|
43
|
-
|
40
|
+
while not self.stop.is_set():
|
41
|
+
if self.db.lock(self.reset_lock_key, t=60):
|
44
42
|
|
45
|
-
|
46
|
-
|
47
|
-
|
43
|
+
_min = -int(time.time()) + self.seed_reset_seconds
|
44
|
+
self.db.members(self.todo_key, 0, _min=_min, _max="(0")
|
45
|
+
self.db.delete(self.reset_lock_key)
|
48
46
|
|
49
|
-
|
47
|
+
time.sleep(60)
|
50
48
|
|
51
49
|
@check_pause
|
52
50
|
def schedule(self):
|
@@ -55,109 +53,115 @@ class RedisScheduler(Scheduler):
|
|
55
53
|
"""
|
56
54
|
if not self.db.zcount(self.todo_key, 0, "(1000"):
|
57
55
|
time.sleep(self.scheduler_wait_seconds)
|
58
|
-
|
56
|
+
return
|
57
|
+
|
58
|
+
if self.todo.length >= self.todo_queue_size:
|
59
59
|
time.sleep(self.todo_queue_full_wait_seconds)
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
60
|
+
return
|
61
|
+
|
62
|
+
members = self.db.members(
|
63
|
+
self.todo_key, int(time.time()),
|
64
|
+
count=self.todo_queue_size,
|
65
|
+
_min=0, _max="(1000"
|
66
|
+
)
|
67
|
+
|
68
|
+
logger.debug(f"Retrieved {len(members)} seeds from Redis.")
|
69
|
+
|
70
|
+
seeds, item_info = list(), dict()
|
71
|
+
for member, priority in members:
|
72
|
+
seed = Seed(member, priority=priority)
|
73
|
+
item_info[seed.to_string] = seed.params.priority
|
74
|
+
seeds.append(seed)
|
75
|
+
|
76
|
+
self.set_working_items(item_info)
|
77
|
+
self.todo.push(seeds)
|
70
78
|
|
71
79
|
@check_pause
|
72
80
|
def insert(self):
|
73
81
|
"""
|
74
82
|
添加新种子到redis队列中
|
75
83
|
"""
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
if new_seeds:
|
87
|
-
self.db.zadd(self.todo_key, new_seeds, nx=True)
|
88
|
-
if del_seeds:
|
89
|
-
self.done.push(list(del_seeds))
|
90
|
-
if status:
|
91
|
-
time.sleep(self.new_queue_wait_seconds)
|
84
|
+
seeds, delete_seeds = dict(), set()
|
85
|
+
for seed, new_seed in self.new.iter_items(limit=self.new_queue_max_size):
|
86
|
+
seeds[new_seed.to_string] = new_seed.params.priority
|
87
|
+
delete_seeds.add(seed)
|
88
|
+
|
89
|
+
self.db.zadd(self.todo_key, seeds, nx=True)
|
90
|
+
self.done.push(delete_seeds)
|
91
|
+
|
92
|
+
if self.new.length < self.new_queue_max_size:
|
93
|
+
time.sleep(self.scheduler_wait_seconds)
|
92
94
|
|
93
95
|
@check_pause
|
94
96
|
def refresh(self):
|
95
97
|
"""
|
96
98
|
刷新doing种子过期时间,防止reset重新消费
|
97
99
|
"""
|
98
|
-
if self.
|
100
|
+
if item_info := self.get_working_items():
|
99
101
|
refresh_time = int(time.time())
|
100
|
-
|
101
|
-
self.db.zadd(self.todo_key,
|
102
|
-
|
102
|
+
seed_info = {k: -refresh_time - v / 1000 for k, v in item_info.items()}
|
103
|
+
self.db.zadd(self.todo_key, seed_info, xx=True)
|
104
|
+
self.set_working_items(seed_info)
|
105
|
+
time.sleep(20)
|
103
106
|
|
104
107
|
@check_pause
|
105
108
|
def delete(self):
|
106
109
|
"""
|
107
110
|
删除队列种子,根据状态添加至成功或失败队列,移除doing字典种子索引
|
108
111
|
"""
|
109
|
-
|
110
|
-
status = self.done.length < self.done_queue_max_size
|
112
|
+
seeds = [seed.to_string for seed in self.done.iter_items(limit=self.done_queue_max_size)]
|
111
113
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
break
|
116
|
-
seed_list.append(seed.to_string)
|
114
|
+
if seeds:
|
115
|
+
self.db.zrem(name=self.todo_key, *seeds)
|
116
|
+
self.remove_working_items(seeds)
|
117
117
|
|
118
|
-
if
|
119
|
-
|
120
|
-
self.db.zrem(self.todo_key, *seed_list)
|
121
|
-
self.remove_working_seeds(seed_list)
|
122
|
-
|
123
|
-
if status:
|
118
|
+
if self.done.length < self.done_queue_max_size:
|
124
119
|
time.sleep(self.done_queue_wait_seconds)
|
125
120
|
|
126
121
|
def run(self):
|
127
122
|
start_time = int(time.time())
|
128
123
|
|
129
|
-
self.
|
130
|
-
|
131
|
-
self.callback_register(self.delete, tag="scheduler")
|
132
|
-
self.callback_register(self.refresh, tag="scheduler")
|
133
|
-
self.callback_register(self.schedule, tag="scheduler")
|
124
|
+
for func in [self.reset, self.insert, self.delete, self.refresh, self.schedule]:
|
125
|
+
self.callback_register(func, tag="scheduler")
|
134
126
|
|
135
127
|
while not self.stop.is_set():
|
136
|
-
working_count =
|
128
|
+
working_count = self.get_working_items_count()
|
137
129
|
memory_count = self.db.zcount(self.todo_key, "-inf", "(0")
|
138
130
|
todo_count = self.db.zcount(self.todo_key, 0, "(1000")
|
139
131
|
all_count = self.db.zcard(self.todo_key)
|
140
|
-
|
141
|
-
if self.
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
self.pause.clear()
|
150
|
-
else:
|
151
|
-
logger.info("pause! waiting for resume...")
|
152
|
-
elif all_count:
|
153
|
-
logger.info(f"todo seeds count: {todo_count}, queue length: {all_count}")
|
132
|
+
|
133
|
+
if self.pause.is_set():
|
134
|
+
execute_time = int(time.time()) - start_time
|
135
|
+
if not self.task_model and execute_time > self.before_scheduler_wait_seconds:
|
136
|
+
logger.info("Done! ready to close thread...")
|
137
|
+
self.stop.set()
|
138
|
+
elif todo_count:
|
139
|
+
logger.info(
|
140
|
+
f"Recovery {self.task} task run!Todo seeds count: {todo_count}, queue length: {all_count}")
|
154
141
|
self.pause.clear()
|
155
142
|
else:
|
156
|
-
logger.info("
|
157
|
-
|
158
|
-
|
159
|
-
|
143
|
+
logger.info("Pause! waiting for resume...")
|
144
|
+
|
145
|
+
elif self.is_empty():
|
146
|
+
|
147
|
+
if all_count:
|
148
|
+
logger.info(f"Todo seeds count: {todo_count}, queue length: {all_count}")
|
160
149
|
self.pause.clear()
|
150
|
+
else:
|
151
|
+
count = 0
|
152
|
+
for _ in range(3):
|
153
|
+
if not all_count:
|
154
|
+
count += 1
|
155
|
+
time.sleep(5)
|
156
|
+
logger.info("Checking count...")
|
157
|
+
else:
|
158
|
+
break
|
159
|
+
if count >= 3:
|
160
|
+
logger.info("Todo queue is empty! Pause set...")
|
161
|
+
self.clear_working_items()
|
162
|
+
self.pause.set()
|
163
|
+
|
164
|
+
else:
|
161
165
|
logger.info(LogTemplate.launcher_pro_polling.format(
|
162
166
|
task=self.task,
|
163
167
|
doing_len=working_count,
|
cobweb/setting.py
CHANGED
@@ -43,12 +43,12 @@ TASK_MODEL = 0
|
|
43
43
|
# 流控措施, 0:关闭,1:开启
|
44
44
|
SPEED_CONTROL = 1
|
45
45
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
46
|
+
# redis config
|
47
|
+
REDIS_CONFIG = {
|
48
|
+
"host": "127.0.0.1",
|
49
|
+
"port": 6379,
|
50
|
+
"db": 0
|
51
|
+
}
|
52
52
|
|
53
53
|
# loghub pipeline config
|
54
54
|
# os.getenv("LOGHUB_ENDPOINT"),
|
cobweb/utils/decorators.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
import time
|
2
2
|
from functools import wraps
|
3
|
-
|
4
3
|
from cobweb.base import logger
|
5
4
|
|
6
5
|
|
@@ -38,6 +37,6 @@ def check_pause(func):
|
|
38
37
|
logger.info(f"{func.__name__}: " + str(e))
|
39
38
|
finally:
|
40
39
|
time.sleep(0.1)
|
41
|
-
logger.info(f"
|
40
|
+
logger.info(f"Pause detected: {func.__name__} thread closing...")
|
42
41
|
|
43
42
|
return wrapper
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cobweb-launcher
|
3
|
-
Version: 3.1.
|
3
|
+
Version: 3.1.14
|
4
4
|
Summary: spider_hole
|
5
5
|
Home-page: https://github.com/Juannie-PP/cobweb
|
6
6
|
Author: Juannie-PP
|
@@ -32,12 +32,12 @@ pip3 install --upgrade cobweb-launcher
|
|
32
32
|
```
|
33
33
|
## 使用方法介绍
|
34
34
|
### 1. 任务创建
|
35
|
-
-
|
35
|
+
- Launcher任务创建
|
36
36
|
```python
|
37
|
-
from cobweb import
|
37
|
+
from cobweb import Launcher
|
38
38
|
|
39
39
|
# 创建启动器
|
40
|
-
app =
|
40
|
+
app = Launcher(task="test", project="test")
|
41
41
|
|
42
42
|
# 设置采集种子
|
43
43
|
app.SEEDS = [{
|
@@ -47,29 +47,15 @@ app.SEEDS = [{
|
|
47
47
|
# 启动任务
|
48
48
|
app.start()
|
49
49
|
```
|
50
|
-
- LauncherPro任务创建
|
51
|
-
LauncherPro依赖redis实现分布式调度,使用LauncherPro启动器需要完成环境变量的配置或自定义setting文件中的redis配置,如何配置查看`2. 自定义配置文件参数`
|
52
|
-
```python
|
53
|
-
from cobweb import LauncherPro
|
54
|
-
|
55
|
-
# 创建启动器
|
56
|
-
app = LauncherPro(
|
57
|
-
task="test",
|
58
|
-
project="test"
|
59
|
-
)
|
60
|
-
...
|
61
|
-
# 启动任务
|
62
|
-
app.start()
|
63
|
-
```
|
64
50
|
### 2. 自定义配置文件参数
|
65
51
|
- 通过自定义setting文件,配置文件导入字符串方式
|
66
52
|
> 默认配置文件:import cobweb.setting
|
67
53
|
> 不推荐!!!目前有bug,随缘使用...
|
68
54
|
例如:同级目录下自定义创建了setting.py文件。
|
69
55
|
```python
|
70
|
-
from cobweb import
|
56
|
+
from cobweb import Launcher
|
71
57
|
|
72
|
-
app =
|
58
|
+
app = Launcher(
|
73
59
|
task="test",
|
74
60
|
project="test",
|
75
61
|
setting="import setting"
|
@@ -81,10 +67,10 @@ app.start()
|
|
81
67
|
```
|
82
68
|
- 自定义修改setting中对象值
|
83
69
|
```python
|
84
|
-
from cobweb import
|
70
|
+
from cobweb import Launcher
|
85
71
|
|
86
72
|
# 创建启动器
|
87
|
-
app =
|
73
|
+
app = Launcher(
|
88
74
|
task="test",
|
89
75
|
project="test",
|
90
76
|
REDIS_CONFIG = {
|
@@ -102,10 +88,10 @@ app.start()
|
|
102
88
|
`@app.request`使用装饰器封装自定义请求方法,作用于发生请求前的操作,返回Request对象或继承于BaseItem对象,用于控制请求参数。
|
103
89
|
```python
|
104
90
|
from typing import Union
|
105
|
-
from cobweb import
|
91
|
+
from cobweb import Launcher
|
106
92
|
from cobweb.base import Seed, Request, BaseItem
|
107
93
|
|
108
|
-
app =
|
94
|
+
app = Launcher(
|
109
95
|
task="test",
|
110
96
|
project="test"
|
111
97
|
)
|
@@ -130,10 +116,10 @@ app.start()
|
|
130
116
|
`@app.download`使用装饰器封装自定义下载方法,作用于发生请求时的操作,返回Response对象或继承于BaseItem对象,用于控制请求参数。
|
131
117
|
```python
|
132
118
|
from typing import Union
|
133
|
-
from cobweb import
|
119
|
+
from cobweb import Launcher
|
134
120
|
from cobweb.base import Request, Response, BaseItem
|
135
121
|
|
136
|
-
app =
|
122
|
+
app = Launcher(
|
137
123
|
task="test",
|
138
124
|
project="test"
|
139
125
|
)
|
@@ -161,14 +147,14 @@ app.start()
|
|
161
147
|
解析方法返回继承于BaseItem的对象,yield返回进行控制数据存储流程。
|
162
148
|
```python
|
163
149
|
from typing import Union
|
164
|
-
from cobweb import
|
150
|
+
from cobweb import Launcher
|
165
151
|
from cobweb.base import Seed, Response, BaseItem
|
166
152
|
|
167
153
|
class TestItem(BaseItem):
|
168
154
|
__TABLE__ = "test_data" # 表名
|
169
155
|
__FIELDS__ = "field1, field2, field3" # 字段名
|
170
156
|
|
171
|
-
app =
|
157
|
+
app = Launcher(
|
172
158
|
task="test",
|
173
159
|
project="test"
|
174
160
|
)
|
@@ -190,12 +176,11 @@ app.start()
|
|
190
176
|
> upload_item = item.to_dict
|
191
177
|
> upload_item["text"] = item.response.text
|
192
178
|
> yield ConsoleItem(item.seed, data=json.dumps(upload_item, ensure_ascii=False))
|
193
|
-
##
|
179
|
+
## todo
|
194
180
|
- 队列优化完善,使用queue的机制wait()同步各模块执行?
|
195
181
|
- 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
|
196
182
|
- 去重过滤(布隆过滤器等)
|
197
183
|
- 单机防丢失
|
198
|
-
- excel、mysql、redis数据完善
|
199
184
|
|
200
185
|
> 未更新流程图!!!
|
201
186
|

|
@@ -1,39 +1,39 @@
|
|
1
1
|
cobweb/__init__.py,sha256=YdBi3uytEFRXan155xU1kKMpiUKUupO2RGeJyXmH0zk,129
|
2
2
|
cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
|
3
|
-
cobweb/setting.py,sha256=
|
3
|
+
cobweb/setting.py,sha256=rHPQfc4a1xMTbkt3_KXBfUomhYcbTXogsz7ew-QsqHw,1670
|
4
4
|
cobweb/base/__init__.py,sha256=c1qwQrpe5atW_OK_Qw9kaiZpVDey3t_nvRwlH7QRIqY,179
|
5
|
-
cobweb/base/common_queue.py,sha256=
|
6
|
-
cobweb/base/item.py,sha256=
|
5
|
+
cobweb/base/common_queue.py,sha256=hYdaM70KrWjvACuLKaGhkI2VqFCnd87NVvWzmnfIg8Q,1423
|
6
|
+
cobweb/base/item.py,sha256=1bS4U_3vzI2jzSSeoEbLoLT_5CfgLPopWiEYtaahbvw,1674
|
7
7
|
cobweb/base/logger.py,sha256=Vsg1bD4LXW91VgY-ANsmaUu-mD88hU_WS83f7jX3qF8,2011
|
8
|
-
cobweb/base/request.py,sha256=
|
8
|
+
cobweb/base/request.py,sha256=MBYYjWpbRQRulPG0zPbK0DO3LKmScqQ4tBzFXekYkao,2652
|
9
9
|
cobweb/base/response.py,sha256=g8e5H0hEiRfqseh3nD7t6a1rhIJYRMV7nI47kqNOd-U,446
|
10
|
-
cobweb/base/seed.py,sha256=
|
10
|
+
cobweb/base/seed.py,sha256=ddaWCq_KaWwpmPl1CToJlfCxEEnoJ16kjo6azJs9uls,5000
|
11
11
|
cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
|
12
12
|
cobweb/crawlers/crawler.py,sha256=ZZVZJ17RWuvzUFGLjqdvyVZpmuq-ynslJwXQzdm_UdQ,709
|
13
13
|
cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
|
14
|
-
cobweb/db/api_db.py,sha256=
|
15
|
-
cobweb/db/redis_db.py,sha256=
|
14
|
+
cobweb/db/api_db.py,sha256=qIhEGB-reKPVFtWPIJYFVK16Us32GBgYjgFjcF-V0GM,3036
|
15
|
+
cobweb/db/redis_db.py,sha256=X7dUpW50QcmRPjYlYg7b-fXF_fcjuRRk3DBx2ggetXk,7687
|
16
16
|
cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
|
17
17
|
cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
|
18
18
|
cobweb/launchers/__init__.py,sha256=6_v2jd2sgj6YnOB1nPKiYBskuXVb5xpQnq2YaDGJgQ8,100
|
19
19
|
cobweb/launchers/distributor.py,sha256=ALvu7MVZLSQPmWJc_FR-UUIlTMv4PAu8q7tt-KzK1v8,6810
|
20
|
-
cobweb/launchers/launcher.py,sha256=
|
20
|
+
cobweb/launchers/launcher.py,sha256=L75eYKemPVqT0cuwfBy_Vh0CObWilDpJ9ibD29g5L38,5742
|
21
21
|
cobweb/launchers/uploader.py,sha256=5Hm1pmco8PsFrtBDRN9aw6IjAElfX2wdN1yaILtp03w,2059
|
22
22
|
cobweb/pipelines/__init__.py,sha256=rtkaaCZ4u1XcxpkDLHztETQjEcLZ_6DXTHjdfcJlyxQ,97
|
23
23
|
cobweb/pipelines/pipeline.py,sha256=OgSEZ2DdqofpZcer1Wj1tuBqn8OHVjrYQ5poqt75czQ,357
|
24
24
|
cobweb/pipelines/pipeline_csv.py,sha256=TFqxqgVUqkBF6Jott4zd6fvCSxzG67lpafRQtXPw1eg,807
|
25
25
|
cobweb/pipelines/pipeline_loghub.py,sha256=zwIa_pcWBB2UNGd32Cu-i1jKGNruTbo2STdxl1WGwZ0,1829
|
26
26
|
cobweb/schedulers/__init__.py,sha256=LEya11fdAv0X28YzbQTeC1LQZ156Fj4cyEMGqQHUWW0,49
|
27
|
-
cobweb/schedulers/scheduler.py,sha256=
|
28
|
-
cobweb/schedulers/scheduler_with_redis.py,sha256=
|
27
|
+
cobweb/schedulers/scheduler.py,sha256=sFlxQY-ehd-Lbskvr21v0wjkb88bs551djAY96Bz1Qs,2777
|
28
|
+
cobweb/schedulers/scheduler_with_redis.py,sha256=x68CVJqcMkoUD5_AraG7lcJ6M1UdMCjt1e3i1K8Ea40,6286
|
29
29
|
cobweb/utils/__init__.py,sha256=TRFJyyBjaQH_sejU6G_msOeHpjc3ZXU0dUOO5GQfknM,171
|
30
30
|
cobweb/utils/bloom.py,sha256=A8xqtHXp7jgRoBuUlpovmq8lhU5y7IEF0FOCjfQDb6s,1855
|
31
|
-
cobweb/utils/decorators.py,sha256=
|
31
|
+
cobweb/utils/decorators.py,sha256=ZwVQlz-lYHgXgKf9KRCp15EWPzTDdhoikYUNUCIqNeM,1140
|
32
32
|
cobweb/utils/dotting.py,sha256=L-jGSApdnFIP4jUWH6p5qIme0aJ1vyDrxAx8wOJWvcs,1960
|
33
33
|
cobweb/utils/oss.py,sha256=wmToIIVNO8nCQVRmreVaZejk01aCWS35e1NV6cr0yGI,4192
|
34
34
|
cobweb/utils/tools.py,sha256=14TCedqt07m4z6bCnFAsITOFixeGr8V3aOKk--L7Cr0,879
|
35
|
-
cobweb_launcher-3.1.
|
36
|
-
cobweb_launcher-3.1.
|
37
|
-
cobweb_launcher-3.1.
|
38
|
-
cobweb_launcher-3.1.
|
39
|
-
cobweb_launcher-3.1.
|
35
|
+
cobweb_launcher-3.1.14.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
36
|
+
cobweb_launcher-3.1.14.dist-info/METADATA,sha256=OX4pI6FO7KD_XCkpBj2efE674q2E2PVpem_Xnaq2Z_0,5998
|
37
|
+
cobweb_launcher-3.1.14.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
38
|
+
cobweb_launcher-3.1.14.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
39
|
+
cobweb_launcher-3.1.14.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|