cobweb-launcher 1.3.15__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/__init__.py +1 -1
- cobweb/base/__init__.py +4 -149
- cobweb/base/common_queue.py +0 -13
- cobweb/base/request.py +2 -14
- cobweb/base/seed.py +16 -12
- cobweb/constant.py +0 -16
- cobweb/crawlers/crawler.py +3 -85
- cobweb/db/redis_db.py +109 -52
- cobweb/launchers/__init__.py +8 -2
- cobweb/launchers/distributor.py +171 -0
- cobweb/launchers/launcher.py +87 -131
- cobweb/launchers/uploader.py +65 -0
- cobweb/pipelines/pipeline.py +3 -36
- cobweb/schedulers/__init__.py +1 -3
- cobweb/schedulers/launcher_air.py +93 -0
- cobweb/schedulers/launcher_api.py +225 -0
- cobweb/schedulers/scheduler.py +85 -0
- cobweb/schedulers/scheduler_with_redis.py +177 -0
- cobweb/setting.py +15 -32
- cobweb/utils/__init__.py +2 -1
- cobweb/utils/decorators.py +43 -0
- cobweb/utils/dotting.py +55 -0
- cobweb/utils/oss.py +28 -9
- {cobweb_launcher-1.3.15.dist-info → cobweb_launcher-3.1.0.dist-info}/METADATA +1 -1
- cobweb_launcher-3.1.0.dist-info/RECORD +41 -0
- cobweb/base/basic.py +0 -297
- cobweb/base/dotting.py +0 -35
- cobweb/launchers/launcher_air.py +0 -88
- cobweb/launchers/launcher_api.py +0 -89
- cobweb/launchers/launcher_pro.py +0 -88
- cobweb/schedulers/scheduler_api.py +0 -72
- cobweb/schedulers/scheduler_redis.py +0 -72
- cobweb_launcher-1.3.15.dist-info/RECORD +0 -40
- {cobweb_launcher-1.3.15.dist-info → cobweb_launcher-3.1.0.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.3.15.dist-info → cobweb_launcher-3.1.0.dist-info}/WHEEL +0 -0
- {cobweb_launcher-1.3.15.dist-info → cobweb_launcher-3.1.0.dist-info}/top_level.txt +0 -0
cobweb/utils/oss.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
|
1
|
+
|
2
2
|
from cobweb import setting
|
3
3
|
from requests import Response
|
4
4
|
from oss2 import Auth, Bucket, models, PartIterator
|
5
5
|
from cobweb.exceptions import oss_db_exception
|
6
|
-
from cobweb.base import
|
6
|
+
from cobweb.base.decorators import decorator_oss_db
|
7
7
|
|
8
8
|
|
9
9
|
class OssUtil:
|
@@ -23,6 +23,9 @@ class OssUtil:
|
|
23
23
|
self.chunk_size = int(chunk_size or setting.OSS_CHUNK_SIZE)
|
24
24
|
self.min_upload_size = int(min_upload_size or setting.OSS_MIN_UPLOAD_SIZE)
|
25
25
|
|
26
|
+
self.failed_count = 0
|
27
|
+
self._kw = kwargs
|
28
|
+
|
26
29
|
self._auth = Auth(
|
27
30
|
access_key_id=access_key or setting.OSS_ACCESS_KEY,
|
28
31
|
access_key_secret=secret_key or setting.OSS_SECRET_KEY
|
@@ -31,26 +34,42 @@ class OssUtil:
|
|
31
34
|
auth=self._auth,
|
32
35
|
endpoint=self.endpoint,
|
33
36
|
bucket_name=self.bucket,
|
34
|
-
**
|
37
|
+
**self._kw
|
35
38
|
)
|
36
39
|
|
40
|
+
def failed(self):
|
41
|
+
self.failed_count += 1
|
42
|
+
if self.failed_count >= 5:
|
43
|
+
self._client = Bucket(
|
44
|
+
auth=self._auth,
|
45
|
+
endpoint=self.endpoint,
|
46
|
+
bucket_name=self.bucket,
|
47
|
+
**self._kw
|
48
|
+
)
|
49
|
+
|
37
50
|
def exists(self, key: str) -> bool:
|
38
|
-
|
51
|
+
try:
|
52
|
+
result = self._client.object_exists(key)
|
53
|
+
self.failed_count = 0
|
54
|
+
return result
|
55
|
+
except Exception as e:
|
56
|
+
self.failed()
|
57
|
+
raise e
|
39
58
|
|
40
59
|
def head(self, key: str) -> models.HeadObjectResult:
|
41
60
|
return self._client.head_object(key)
|
42
61
|
|
43
|
-
@
|
62
|
+
@decorator_oss_db(exception=oss_db_exception.OssDBInitPartError)
|
44
63
|
def init_part(self, key) -> models.InitMultipartUploadResult:
|
45
64
|
"""初始化分片上传"""
|
46
65
|
return self._client.init_multipart_upload(key)
|
47
66
|
|
48
|
-
@
|
67
|
+
@decorator_oss_db(exception=oss_db_exception.OssDBPutObjError)
|
49
68
|
def put(self, key, data) -> models.PutObjectResult:
|
50
69
|
"""文件上传"""
|
51
70
|
return self._client.put_object(key, data)
|
52
71
|
|
53
|
-
@
|
72
|
+
@decorator_oss_db(exception=oss_db_exception.OssDBPutPartError)
|
54
73
|
def put_part(self, key, upload_id, position, data) -> models.PutObjectResult:
|
55
74
|
"""分片上传"""
|
56
75
|
return self._client.upload_part(key, upload_id, position, data)
|
@@ -59,13 +78,13 @@ class OssUtil:
|
|
59
78
|
"""获取分片列表"""
|
60
79
|
return [part_info for part_info in PartIterator(self._client, key, upload_id)]
|
61
80
|
|
62
|
-
@
|
81
|
+
@decorator_oss_db(exception=oss_db_exception.OssDBMergeError)
|
63
82
|
def merge(self, key, upload_id, parts=None) -> models.PutObjectResult:
|
64
83
|
"""合并分片"""
|
65
84
|
headers = None if parts else {"x-oss-complete-all": "yes"}
|
66
85
|
return self._client.complete_multipart_upload(key, upload_id, parts, headers=headers)
|
67
86
|
|
68
|
-
@
|
87
|
+
@decorator_oss_db(exception=oss_db_exception.OssDBAppendObjError)
|
69
88
|
def append(self, key, position, data) -> models.AppendObjectResult:
|
70
89
|
"""追加上传"""
|
71
90
|
return self._client.append_object(key, position, data)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
cobweb/__init__.py,sha256=UfNq1asNq7_a8IKf5WCbk0ju5fxT4wElAyaItf_a-d8,67
|
2
|
+
cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
|
3
|
+
cobweb/setting.py,sha256=yP9ZkVShTY4sCZ4DpzI_hO-FMSf0vGrQvkp7UCmZaa0,2338
|
4
|
+
cobweb/base/__init__.py,sha256=epVQttTHQcux9kAtrGuO9HB_wP74L-pi74vhzc4QOCw,224
|
5
|
+
cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
|
6
|
+
cobweb/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
|
7
|
+
cobweb/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
|
8
|
+
cobweb/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
|
9
|
+
cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
|
10
|
+
cobweb/base/seed.py,sha256=A-F1urjbE5hYNWTCwq3sUV4nrxlK_RGMoCmjBmIwYsI,3158
|
11
|
+
cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
|
12
|
+
cobweb/crawlers/crawler.py,sha256=73WFGVNvIFvH8cP5RfDhAhM-WaFL_mdwnAhNohDnBO0,696
|
13
|
+
cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
|
14
|
+
cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
|
15
|
+
cobweb/db/redis_db.py,sha256=rK6PnKLLVTeg6HMGVtlYl4_b5og3aO2J5jZrqU2Aoso,7721
|
16
|
+
cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
|
17
|
+
cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
|
18
|
+
cobweb/launchers/__init__.py,sha256=XEW4hmBMPKp2bRkHBxxlplXwvKdlLVYBFg3etjtbRXo,222
|
19
|
+
cobweb/launchers/distributor.py,sha256=Br5G6Jn6aJeaAX9y3yBfNVRvq2-kIIz9LsjRCXLmzGg,6447
|
20
|
+
cobweb/launchers/launcher.py,sha256=WTyv5bgKE_VwZXuWia_F26fH2zi6mps0wsnC16bks9k,5253
|
21
|
+
cobweb/launchers/uploader.py,sha256=2zAYMEpfUYo8zCzH6Jhwkj9Y4_aJYVNnx0D5H8srKW4,1806
|
22
|
+
cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
|
23
|
+
cobweb/pipelines/pipeline.py,sha256=qwoOYMhlAB-MnEmMNpNeauTHoRTOr2wyBDYS4MF6B1c,261
|
24
|
+
cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
|
25
|
+
cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
|
26
|
+
cobweb/schedulers/__init__.py,sha256=LEya11fdAv0X28YzbQTeC1LQZ156Fj4cyEMGqQHUWW0,49
|
27
|
+
cobweb/schedulers/launcher_air.py,sha256=qdcxq41I9zN5snEpMUUNEQNTtUiZM1Hw_3N9zu4PuAs,3058
|
28
|
+
cobweb/schedulers/launcher_api.py,sha256=3-A6k3Igvi-xnvP9M_3NkJKUweDJ_pY10ZrHClteD-g,8628
|
29
|
+
cobweb/schedulers/scheduler.py,sha256=mN9XvaOCzNnBWQfzslTIM860ZGq2gyLtxpbVUd0Slqs,2240
|
30
|
+
cobweb/schedulers/scheduler_with_redis.py,sha256=pCimXqqpJ2xI1fs-6Ecy2vs9vHC-PPFX4IBMLDtpbho,6394
|
31
|
+
cobweb/utils/__init__.py,sha256=8Bu5iZrIOUMS4jv4hi0inRPtscf6MK0ZFa7gQ7ZFoqw,145
|
32
|
+
cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
|
33
|
+
cobweb/utils/decorators.py,sha256=066JCY_RNMr2mXkhEv8XTtOOKkv9CFiBm0ZNCcC-2ag,1131
|
34
|
+
cobweb/utils/dotting.py,sha256=mVICaa26R-dQ4JGmPK-kkR6QjX38QiRewXZnGb2DCIc,1784
|
35
|
+
cobweb/utils/oss.py,sha256=6x_ugXanh1R-6ZylQzUDQh4OeFZHujhWFCOxbzy53JY,3984
|
36
|
+
cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
|
37
|
+
cobweb_launcher-3.1.0.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
38
|
+
cobweb_launcher-3.1.0.dist-info/METADATA,sha256=Cl_g5S9zmKx6qhAPllDGYYpow_slpwwXYxJxtZoOzck,6509
|
39
|
+
cobweb_launcher-3.1.0.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
40
|
+
cobweb_launcher-3.1.0.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
41
|
+
cobweb_launcher-3.1.0.dist-info/RECORD,,
|
cobweb/base/basic.py
DELETED
@@ -1,297 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
import random
|
3
|
-
import time
|
4
|
-
import hashlib
|
5
|
-
import requests
|
6
|
-
|
7
|
-
|
8
|
-
class Params:
|
9
|
-
|
10
|
-
def __init__(self, retry=None, priority=None, version=None, status=None):
|
11
|
-
self.retry = retry or 0
|
12
|
-
self.priority = priority or 300
|
13
|
-
self.version = version or int(time.time())
|
14
|
-
self.status = status
|
15
|
-
|
16
|
-
|
17
|
-
class Seed:
|
18
|
-
__SEED_PARAMS__ = [
|
19
|
-
"retry",
|
20
|
-
"priority",
|
21
|
-
"version",
|
22
|
-
"status"
|
23
|
-
]
|
24
|
-
|
25
|
-
def __init__(
|
26
|
-
self,
|
27
|
-
seed,
|
28
|
-
sid=None,
|
29
|
-
retry=None,
|
30
|
-
priority=None,
|
31
|
-
version=None,
|
32
|
-
status=None,
|
33
|
-
**kwargs
|
34
|
-
):
|
35
|
-
if any(isinstance(seed, t) for t in (str, bytes)):
|
36
|
-
try:
|
37
|
-
item = json.loads(seed)
|
38
|
-
self._init_seed(item)
|
39
|
-
except json.JSONDecodeError:
|
40
|
-
self.__setattr__("url", seed)
|
41
|
-
elif isinstance(seed, dict):
|
42
|
-
self._init_seed(seed)
|
43
|
-
else:
|
44
|
-
raise TypeError(Exception(
|
45
|
-
f"seed type error, "
|
46
|
-
f"must be str or dict! "
|
47
|
-
f"seed: {seed}"
|
48
|
-
))
|
49
|
-
|
50
|
-
seed_params = {
|
51
|
-
"retry": retry,
|
52
|
-
"priority": priority,
|
53
|
-
"version": version,
|
54
|
-
"status": status,
|
55
|
-
}
|
56
|
-
|
57
|
-
if kwargs:
|
58
|
-
# for k, v in kwargs.items():
|
59
|
-
# if k in seed_params.keys():
|
60
|
-
# seed_params[k] = v
|
61
|
-
# else:
|
62
|
-
# self.__setattr__(k, v)
|
63
|
-
self._init_seed(kwargs)
|
64
|
-
seed_params.update({
|
65
|
-
k: v for k, v in kwargs.items()
|
66
|
-
if k in self.__SEED_PARAMS__
|
67
|
-
})
|
68
|
-
if sid or not getattr(self, "sid", None):
|
69
|
-
self._init_id(sid)
|
70
|
-
self.params = Params(**seed_params)
|
71
|
-
|
72
|
-
def __getattr__(self, name):
|
73
|
-
return None
|
74
|
-
|
75
|
-
def __setitem__(self, key, value):
|
76
|
-
setattr(self, key, value)
|
77
|
-
|
78
|
-
def __getitem__(self, item):
|
79
|
-
return getattr(self, item)
|
80
|
-
|
81
|
-
def __str__(self):
|
82
|
-
return json.dumps(self.__dict__, ensure_ascii=False)
|
83
|
-
|
84
|
-
def __repr__(self):
|
85
|
-
chars = [f"{k}={v}" for k, v in self.__dict__.items()]
|
86
|
-
return f'{self.__class__.__name__}({", ".join(chars)})'
|
87
|
-
|
88
|
-
def _init_seed(self, seed_info: dict):
|
89
|
-
for k, v in seed_info.items():
|
90
|
-
if k not in self.__SEED_PARAMS__:
|
91
|
-
self.__setattr__(k, v)
|
92
|
-
|
93
|
-
def _init_id(self, sid):
|
94
|
-
if not sid:
|
95
|
-
sid = hashlib.md5(self.to_string.encode()).hexdigest()
|
96
|
-
self.__setattr__("sid", sid)
|
97
|
-
|
98
|
-
@property
|
99
|
-
def to_dict(self) -> dict:
|
100
|
-
seed = self.__dict__.copy()
|
101
|
-
if seed.get("params"):
|
102
|
-
del seed["params"]
|
103
|
-
return seed
|
104
|
-
|
105
|
-
@property
|
106
|
-
def to_string(self) -> str:
|
107
|
-
return json.dumps(
|
108
|
-
self.to_dict,
|
109
|
-
ensure_ascii=False,
|
110
|
-
separators=(",", ":")
|
111
|
-
)
|
112
|
-
|
113
|
-
@property
|
114
|
-
def seed(self):
|
115
|
-
return self.to_string
|
116
|
-
|
117
|
-
|
118
|
-
class Request:
|
119
|
-
__SEED_PARAMS__ = [
|
120
|
-
"retry",
|
121
|
-
"priority",
|
122
|
-
"version",
|
123
|
-
"status"
|
124
|
-
]
|
125
|
-
|
126
|
-
__REQUEST_ATTRS__ = {
|
127
|
-
"params",
|
128
|
-
"headers",
|
129
|
-
"cookies",
|
130
|
-
"data",
|
131
|
-
"json",
|
132
|
-
"files",
|
133
|
-
"auth",
|
134
|
-
"timeout",
|
135
|
-
"proxies",
|
136
|
-
"hooks",
|
137
|
-
"stream",
|
138
|
-
"verify",
|
139
|
-
"cert",
|
140
|
-
"allow_redirects",
|
141
|
-
}
|
142
|
-
|
143
|
-
def __init__(
|
144
|
-
self,
|
145
|
-
# url,
|
146
|
-
seed,
|
147
|
-
random_ua=True,
|
148
|
-
check_status_code=True,
|
149
|
-
retry=None,
|
150
|
-
priority=None,
|
151
|
-
version=None,
|
152
|
-
status=None,
|
153
|
-
**kwargs
|
154
|
-
):
|
155
|
-
# self.url = url
|
156
|
-
self.check_status_code = check_status_code
|
157
|
-
self.request_setting = {}
|
158
|
-
|
159
|
-
seed_params = {
|
160
|
-
"retry": retry,
|
161
|
-
"priority": priority,
|
162
|
-
"version": version,
|
163
|
-
"status": status,
|
164
|
-
}
|
165
|
-
|
166
|
-
if isinstance(seed, Seed):
|
167
|
-
kwargs.update(**seed.to_dict)
|
168
|
-
elif isinstance(seed, str):
|
169
|
-
kwargs.update(**json.loads(seed))
|
170
|
-
elif isinstance(seed, dict):
|
171
|
-
kwargs.update(**seed)
|
172
|
-
|
173
|
-
for k, v in kwargs.items():
|
174
|
-
if k in self.__class__.__REQUEST_ATTRS__:
|
175
|
-
self.request_setting[k] = v
|
176
|
-
continue
|
177
|
-
elif k in self.__SEED_PARAMS__:
|
178
|
-
seed_params[k] = v
|
179
|
-
self.__setattr__(k, v)
|
180
|
-
|
181
|
-
if not getattr(self, "method", None):
|
182
|
-
self.method = "POST" if self.request_setting.get("data") or self.request_setting.get("json") else "GET"
|
183
|
-
|
184
|
-
if random_ua:
|
185
|
-
self._build_header()
|
186
|
-
|
187
|
-
self.params = Params(**seed_params)
|
188
|
-
# self.seed = self.to_string
|
189
|
-
|
190
|
-
@property
|
191
|
-
def _random_ua(self) -> str:
|
192
|
-
v1 = random.randint(4, 15)
|
193
|
-
v2 = random.randint(3, 11)
|
194
|
-
v3 = random.randint(1, 16)
|
195
|
-
v4 = random.randint(533, 605)
|
196
|
-
v5 = random.randint(1000, 6000)
|
197
|
-
v6 = random.randint(10, 80)
|
198
|
-
user_agent = (f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_{v1}_{v2}) AppleWebKit/{v4}.{v3} "
|
199
|
-
f"(KHTML, like Gecko) Chrome/105.0.0.0 Safari/{v4}.{v3} Edg/105.0.{v5}.{v6}")
|
200
|
-
return user_agent
|
201
|
-
|
202
|
-
def _build_header(self) -> dict:
|
203
|
-
if not self.request_setting.get("headers"):
|
204
|
-
self.request_setting["headers"] = {"accept": "*/*", "user-agent": self._random_ua}
|
205
|
-
elif "user-agent" not in [key.lower() for key in self.request_setting["headers"].keys()]:
|
206
|
-
self.request_setting["headers"]["user-agent"] = self._random_ua
|
207
|
-
|
208
|
-
def download(self) -> requests.Response:
|
209
|
-
response = requests.request(self.method, self.url, **self.request_setting)
|
210
|
-
if self.check_status_code:
|
211
|
-
response.raise_for_status()
|
212
|
-
return response
|
213
|
-
|
214
|
-
def __getattr__(self, name):
|
215
|
-
return None
|
216
|
-
|
217
|
-
def __setitem__(self, key, value):
|
218
|
-
setattr(self, key, value)
|
219
|
-
|
220
|
-
def __getitem__(self, item):
|
221
|
-
return getattr(self, item)
|
222
|
-
|
223
|
-
@property
|
224
|
-
def to_dict(self):
|
225
|
-
_dict = self.__dict__.copy()
|
226
|
-
# _dict.pop('seed')
|
227
|
-
_dict.pop('params')
|
228
|
-
_dict.pop('check_status_code')
|
229
|
-
# _dict.pop('request_setting')
|
230
|
-
return _dict
|
231
|
-
|
232
|
-
@property
|
233
|
-
def to_string(self) -> str:
|
234
|
-
return json.dumps(
|
235
|
-
self.to_dict,
|
236
|
-
ensure_ascii=False,
|
237
|
-
separators=(",", ":")
|
238
|
-
)
|
239
|
-
|
240
|
-
@property
|
241
|
-
def seed(self):
|
242
|
-
return self.to_string
|
243
|
-
|
244
|
-
|
245
|
-
class Response:
|
246
|
-
|
247
|
-
def __init__(
|
248
|
-
self,
|
249
|
-
seed,
|
250
|
-
response,
|
251
|
-
retry=None,
|
252
|
-
priority=None,
|
253
|
-
version=None,
|
254
|
-
status=None,
|
255
|
-
**kwargs
|
256
|
-
):
|
257
|
-
self.seed = seed
|
258
|
-
self.response = response
|
259
|
-
seed_params = {
|
260
|
-
"retry": retry,
|
261
|
-
"priority": priority,
|
262
|
-
"version": version,
|
263
|
-
"status": status,
|
264
|
-
}
|
265
|
-
for k, v in kwargs.items():
|
266
|
-
if k in seed_params.keys():
|
267
|
-
seed_params[k] = v
|
268
|
-
else:
|
269
|
-
self.__setattr__(k, v)
|
270
|
-
self.params = Params(**seed_params)
|
271
|
-
|
272
|
-
@property
|
273
|
-
def to_dict(self):
|
274
|
-
_dict = self.__dict__.copy()
|
275
|
-
_dict.pop('seed')
|
276
|
-
_dict.pop('response')
|
277
|
-
_dict.pop('method')
|
278
|
-
_dict.pop('params')
|
279
|
-
_dict.pop('request_setting')
|
280
|
-
return _dict
|
281
|
-
|
282
|
-
@property
|
283
|
-
def to_string(self) -> str:
|
284
|
-
return json.dumps(
|
285
|
-
self.to_dict,
|
286
|
-
ensure_ascii=False,
|
287
|
-
separators=(",", ":")
|
288
|
-
)
|
289
|
-
|
290
|
-
def __getattr__(self, name):
|
291
|
-
return None
|
292
|
-
|
293
|
-
def __setitem__(self, key, value):
|
294
|
-
setattr(self, key, value)
|
295
|
-
|
296
|
-
def __getitem__(self, item):
|
297
|
-
return getattr(self, item)
|
cobweb/base/dotting.py
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import json
|
3
|
-
from aliyun.log import LogClient, LogItem, PutLogsRequest
|
4
|
-
|
5
|
-
|
6
|
-
class LoghubDot:
|
7
|
-
|
8
|
-
def __init__(self):
|
9
|
-
endpoint = os.getenv("DOTTING_ENDPOINT", "")
|
10
|
-
accessKeyId = os.getenv("DOTTING_ACCESS_KEY", "")
|
11
|
-
accessKey = os.getenv("DOTTING_SECRET_KEY", "")
|
12
|
-
self.client = LogClient(endpoint=endpoint, accessKeyId=accessKeyId, accessKey=accessKey) \
|
13
|
-
if endpoint and accessKeyId and accessKey else None
|
14
|
-
|
15
|
-
def build(self, topic, **kwargs):
|
16
|
-
if self.client:
|
17
|
-
temp = {}
|
18
|
-
log_items = []
|
19
|
-
log_item = LogItem()
|
20
|
-
for key, value in kwargs.items():
|
21
|
-
if not isinstance(value, str):
|
22
|
-
temp[key] = json.dumps(value, ensure_ascii=False)
|
23
|
-
else:
|
24
|
-
temp[key] = value
|
25
|
-
contents = sorted(temp.items())
|
26
|
-
log_item.set_contents(contents)
|
27
|
-
log_items.append(log_item)
|
28
|
-
request = PutLogsRequest(
|
29
|
-
project="databee-download-log",
|
30
|
-
logstore="download-logging",
|
31
|
-
topic=topic,
|
32
|
-
logitems=log_items,
|
33
|
-
compress=True
|
34
|
-
)
|
35
|
-
self.client.put_logs(request=request)
|
cobweb/launchers/launcher_air.py
DELETED
@@ -1,88 +0,0 @@
|
|
1
|
-
# import time
|
2
|
-
#
|
3
|
-
# from cobweb.base import logger
|
4
|
-
# from cobweb.constant import LogTemplate
|
5
|
-
# from .launcher import Launcher, check_pause
|
6
|
-
#
|
7
|
-
#
|
8
|
-
# class LauncherAir(Launcher):
|
9
|
-
#
|
10
|
-
# # def _scheduler(self):
|
11
|
-
# # if self.start_seeds:
|
12
|
-
# # self.__LAUNCHER_QUEUE__['todo'].push(self.start_seeds)
|
13
|
-
#
|
14
|
-
# @check_pause
|
15
|
-
# def _insert(self):
|
16
|
-
# seeds = {}
|
17
|
-
# status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
|
18
|
-
# for _ in range(self._new_queue_max_size):
|
19
|
-
# seed = self.__LAUNCHER_QUEUE__['new'].pop()
|
20
|
-
# if not seed:
|
21
|
-
# break
|
22
|
-
# seeds[seed.to_string] = seed.params.priority
|
23
|
-
# if seeds:
|
24
|
-
# self.__LAUNCHER_QUEUE__['todo'].push(seeds)
|
25
|
-
# if status:
|
26
|
-
# time.sleep(self._new_queue_wait_seconds)
|
27
|
-
#
|
28
|
-
# @check_pause
|
29
|
-
# def _delete(self):
|
30
|
-
# seeds = []
|
31
|
-
# status = self.__LAUNCHER_QUEUE__['done'].length < self._done_queue_max_size
|
32
|
-
#
|
33
|
-
# for _ in range(self._done_queue_max_size):
|
34
|
-
# seed = self.__LAUNCHER_QUEUE__['done'].pop()
|
35
|
-
# if not seed:
|
36
|
-
# break
|
37
|
-
# seeds.append(seed.to_string)
|
38
|
-
#
|
39
|
-
# if seeds:
|
40
|
-
# self._remove_doing_seeds(seeds)
|
41
|
-
#
|
42
|
-
# if status:
|
43
|
-
# time.sleep(self._done_queue_wait_seconds)
|
44
|
-
#
|
45
|
-
# def _polling(self):
|
46
|
-
#
|
47
|
-
# check_emtpy_times = 0
|
48
|
-
#
|
49
|
-
# while not self._stop.is_set():
|
50
|
-
#
|
51
|
-
# queue_not_empty_count = 0
|
52
|
-
# pooling_wait_seconds = 30
|
53
|
-
#
|
54
|
-
# for q in self.__LAUNCHER_QUEUE__.values():
|
55
|
-
# if q.length != 0:
|
56
|
-
# queue_not_empty_count += 1
|
57
|
-
#
|
58
|
-
# if queue_not_empty_count == 0:
|
59
|
-
# pooling_wait_seconds = 3
|
60
|
-
# if self._pause.is_set():
|
61
|
-
# check_emtpy_times = 0
|
62
|
-
# if not self._task_model:
|
63
|
-
# logger.info("Done! Ready to close thread...")
|
64
|
-
# self._stop.set()
|
65
|
-
# elif check_emtpy_times > 2:
|
66
|
-
# self.__DOING__ = {}
|
67
|
-
# self._pause.set()
|
68
|
-
# else:
|
69
|
-
# logger.info(
|
70
|
-
# "check whether the task is complete, "
|
71
|
-
# f"reset times {3 - check_emtpy_times}"
|
72
|
-
# )
|
73
|
-
# check_emtpy_times += 1
|
74
|
-
# elif self._pause.is_set():
|
75
|
-
# self._pause.clear()
|
76
|
-
# self._execute()
|
77
|
-
# else:
|
78
|
-
# logger.info(LogTemplate.launcher_air_polling.format(
|
79
|
-
# task=self.task,
|
80
|
-
# doing_len=len(self.__DOING__.keys()),
|
81
|
-
# todo_len=self.__LAUNCHER_QUEUE__['todo'].length,
|
82
|
-
# done_len=self.__LAUNCHER_QUEUE__['done'].length,
|
83
|
-
# upload_len=self.__LAUNCHER_QUEUE__['upload'].length,
|
84
|
-
# ))
|
85
|
-
#
|
86
|
-
# time.sleep(pooling_wait_seconds)
|
87
|
-
#
|
88
|
-
#
|
cobweb/launchers/launcher_api.py
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
import time
|
2
|
-
|
3
|
-
from cobweb.base import TaskQueue, Decorators, Seed, Request
|
4
|
-
from cobweb.schedulers import ApiScheduler
|
5
|
-
from .launcher import Launcher
|
6
|
-
|
7
|
-
|
8
|
-
class LauncherApi(Launcher):
|
9
|
-
|
10
|
-
def __init__(self, task, project, custom_setting=None, **kwargs):
|
11
|
-
super().__init__(task, project, custom_setting, **kwargs)
|
12
|
-
self._redis_download = "{%s:%s}:download" % (project, task)
|
13
|
-
self._redis_todo = "{%s:%s}:todo" % (project, task)
|
14
|
-
self._scheduler = ApiScheduler(task, project)
|
15
|
-
|
16
|
-
@Decorators.stop
|
17
|
-
def _schedule(self):
|
18
|
-
thread_sleep = self.scheduling_wait_time
|
19
|
-
for q, key, size, item_info, Cls in [
|
20
|
-
(TaskQueue.TODO, self._redis_todo, self.todo_queue_size, self._task_info["todo"], Seed),
|
21
|
-
(TaskQueue.DOWNLOAD, self._redis_download, self.download_queue_size, self._task_info["download"], Request),
|
22
|
-
]:
|
23
|
-
if q.length < size:
|
24
|
-
for member, priority in self._scheduler.schedule(key, self.scheduling_size):
|
25
|
-
item = Cls(member, priority=priority)
|
26
|
-
q.push(item)
|
27
|
-
self.add_working_item(key.split(":")[-1], item.seed, priority)
|
28
|
-
thread_sleep = 0.1
|
29
|
-
time.sleep(thread_sleep)
|
30
|
-
|
31
|
-
@Decorators.stop
|
32
|
-
def _heartbeat(self):
|
33
|
-
if self._scheduler.working.is_set():
|
34
|
-
self._scheduler.set_heartbeat()
|
35
|
-
time.sleep(3)
|
36
|
-
|
37
|
-
@Decorators.stop
|
38
|
-
def _reset(self):
|
39
|
-
self._scheduler.reset(
|
40
|
-
keys=[self._redis_todo, self._redis_download],
|
41
|
-
reset_time=self.seed_reset_seconds
|
42
|
-
)
|
43
|
-
time.sleep(30)
|
44
|
-
|
45
|
-
@Decorators.pause
|
46
|
-
def _insert(self):
|
47
|
-
thread_sleep = 0.1
|
48
|
-
for q, key, size in [
|
49
|
-
(TaskQueue.SEED, self._redis_todo, self.seed_queue_size),
|
50
|
-
(TaskQueue.REQUEST, self._redis_download, self.request_queue_size),
|
51
|
-
]:
|
52
|
-
item_info = {}
|
53
|
-
while (item := q.pop()) and len(item_info.keys()) < self.inserting_size:
|
54
|
-
item_info[item.seed] = item.params.priority
|
55
|
-
if q.length >= size:
|
56
|
-
thread_sleep = self.inserting_wait_time
|
57
|
-
self._scheduler.insert(key, item_info)
|
58
|
-
time.sleep(thread_sleep)
|
59
|
-
|
60
|
-
@Decorators.pause
|
61
|
-
def _refresh(self):
|
62
|
-
self._scheduler.refresh(self._redis_todo, self._task_info["todo"])
|
63
|
-
self._scheduler.refresh(self._redis_download, self._task_info["download"])
|
64
|
-
time.sleep(10)
|
65
|
-
|
66
|
-
@Decorators.pause
|
67
|
-
def _remove(self):
|
68
|
-
thread_sleep = self.removing_wait_time
|
69
|
-
for q, key, size in [
|
70
|
-
(TaskQueue.DELETE, self._redis_todo, self.delete_queue_size),
|
71
|
-
(TaskQueue.DONE, self._redis_download, self.done_queue_size),
|
72
|
-
]:
|
73
|
-
items = []
|
74
|
-
while (item := q.pop()) and len(items) < self.removing_size:
|
75
|
-
items.append(item)
|
76
|
-
self._scheduler.delete(key, items)
|
77
|
-
self.remove_working_items(key.split(":")[-1], items)
|
78
|
-
if q.length >= size:
|
79
|
-
thread_sleep = 0.1
|
80
|
-
time.sleep(thread_sleep)
|
81
|
-
|
82
|
-
def _init_schedule_thread(self):
|
83
|
-
self._add_thread(func=self._heartbeat)
|
84
|
-
self._add_thread(func=self._reset)
|
85
|
-
self._add_thread(func=self._refresh)
|
86
|
-
self._add_thread(func=self._schedule)
|
87
|
-
self._add_thread(func=self._insert)
|
88
|
-
self._add_thread(func=self._remove)
|
89
|
-
# self._add_thread(func=self._polling)
|