cobweb-launcher 1.2.60__py3-none-any.whl → 1.2.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/crawlers/crawler.py +1 -1
- cobweb/launchers/launcher.py +2 -2
- cobweb/launchers/launcher_air.py +11 -6
- cobweb/launchers/launcher_api.py +12 -6
- cobweb/launchers/launcher_pro.py +12 -6
- cobweb/utils/oss.py +22 -3
- {cobweb_launcher-1.2.60.dist-info → cobweb_launcher-1.2.62.dist-info}/METADATA +1 -1
- {cobweb_launcher-1.2.60.dist-info → cobweb_launcher-1.2.62.dist-info}/RECORD +11 -11
- {cobweb_launcher-1.2.60.dist-info → cobweb_launcher-1.2.62.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.2.60.dist-info → cobweb_launcher-1.2.62.dist-info}/WHEEL +0 -0
- {cobweb_launcher-1.2.60.dist-info → cobweb_launcher-1.2.62.dist-info}/top_level.txt +0 -0
cobweb/crawlers/crawler.py
CHANGED
@@ -83,7 +83,7 @@ class Crawler(threading.Thread):
|
|
83
83
|
if isinstance(item, BaseItem):
|
84
84
|
self._upload_data(item)
|
85
85
|
elif isinstance(item, Seed):
|
86
|
-
self._add_seed(item)
|
86
|
+
self._add_seed((seed, item))
|
87
87
|
elif isinstance(item, str) and item == DealModel.poll:
|
88
88
|
self._set_seed(seed)
|
89
89
|
elif isinstance(item, str) and item == DealModel.done:
|
cobweb/launchers/launcher.py
CHANGED
@@ -173,8 +173,8 @@ class Launcher(threading.Thread):
|
|
173
173
|
def _upload_data(self, data, **kwargs):
|
174
174
|
self.__LAUNCHER_QUEUE__["upload"].push(data, **kwargs)
|
175
175
|
|
176
|
-
def _add_seed(self,
|
177
|
-
self.__LAUNCHER_QUEUE__["new"].push(
|
176
|
+
def _add_seed(self, seeds, **kwargs):
|
177
|
+
self.__LAUNCHER_QUEUE__["new"].push(seeds, direct_insertion=True, **kwargs)
|
178
178
|
|
179
179
|
def _delete_seed(self, seed, **kwargs):
|
180
180
|
self.__LAUNCHER_QUEUE__["done"].push(seed, **kwargs)
|
cobweb/launchers/launcher_air.py
CHANGED
@@ -13,15 +13,20 @@ class LauncherAir(Launcher):
|
|
13
13
|
|
14
14
|
@check_pause
|
15
15
|
def _insert(self):
|
16
|
-
|
16
|
+
new_seeds = {}
|
17
|
+
del_seeds = set()
|
17
18
|
status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
|
18
19
|
for _ in range(self._new_queue_max_size):
|
19
|
-
|
20
|
-
if not
|
20
|
+
seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
|
21
|
+
if not seed_tuple:
|
21
22
|
break
|
22
|
-
|
23
|
-
|
24
|
-
|
23
|
+
seed, new_seed = seed_tuple
|
24
|
+
new_seeds[new_seed.to_string] = new_seed.params.priority
|
25
|
+
del_seeds.add(seed.to_string)
|
26
|
+
if new_seeds:
|
27
|
+
self.__LAUNCHER_QUEUE__['todo'].push(new_seeds)
|
28
|
+
if del_seeds:
|
29
|
+
self.__LAUNCHER_QUEUE__['done'].push(del_seeds)
|
25
30
|
if status:
|
26
31
|
time.sleep(self._new_queue_wait_seconds)
|
27
32
|
|
cobweb/launchers/launcher_api.py
CHANGED
@@ -106,14 +106,20 @@ class LauncherApi(Launcher):
|
|
106
106
|
"""
|
107
107
|
添加新种子到redis队列中
|
108
108
|
"""
|
109
|
-
|
109
|
+
new_seeds = {}
|
110
|
+
del_seeds = set()
|
110
111
|
status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
|
111
112
|
for _ in range(self._new_queue_max_size):
|
112
|
-
|
113
|
-
if
|
114
|
-
|
115
|
-
|
116
|
-
|
113
|
+
seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
|
114
|
+
if not seed_tuple:
|
115
|
+
break
|
116
|
+
seed, new_seed = seed_tuple
|
117
|
+
new_seeds[new_seed.to_string] = new_seed.params.priority
|
118
|
+
del_seeds.add(seed.to_string)
|
119
|
+
if new_seeds:
|
120
|
+
self._db.zadd(self._todo_key, new_seeds, nx=True)
|
121
|
+
if del_seeds:
|
122
|
+
self.__LAUNCHER_QUEUE__['done'].push(del_seeds)
|
117
123
|
if status:
|
118
124
|
time.sleep(self._new_queue_wait_seconds)
|
119
125
|
|
cobweb/launchers/launcher_pro.py
CHANGED
@@ -108,14 +108,20 @@ class LauncherPro(Launcher):
|
|
108
108
|
"""
|
109
109
|
添加新种子到redis队列中
|
110
110
|
"""
|
111
|
-
|
111
|
+
new_seeds = {}
|
112
|
+
del_seeds = set()
|
112
113
|
status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
|
113
114
|
for _ in range(self._new_queue_max_size):
|
114
|
-
|
115
|
-
if
|
116
|
-
|
117
|
-
|
118
|
-
|
115
|
+
seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
|
116
|
+
if not seed_tuple:
|
117
|
+
break
|
118
|
+
seed, new_seed = seed_tuple
|
119
|
+
new_seeds[new_seed.to_string] = new_seed.params.priority
|
120
|
+
del_seeds.add(seed.to_string)
|
121
|
+
if new_seeds:
|
122
|
+
self._db.zadd(self._todo_key, new_seeds, nx=True)
|
123
|
+
if del_seeds:
|
124
|
+
self.__LAUNCHER_QUEUE__['done'].push(del_seeds)
|
119
125
|
if status:
|
120
126
|
time.sleep(self._new_queue_wait_seconds)
|
121
127
|
|
cobweb/utils/oss.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
|
2
2
|
from cobweb import setting
|
3
3
|
from requests import Response
|
4
4
|
from oss2 import Auth, Bucket, models, PartIterator
|
@@ -23,6 +23,9 @@ class OssUtil:
|
|
23
23
|
self.chunk_size = int(chunk_size or setting.OSS_CHUNK_SIZE)
|
24
24
|
self.min_upload_size = int(min_upload_size or setting.OSS_MIN_UPLOAD_SIZE)
|
25
25
|
|
26
|
+
self.failed_count = 0
|
27
|
+
self._kw = kwargs
|
28
|
+
|
26
29
|
self._auth = Auth(
|
27
30
|
access_key_id=access_key or setting.OSS_ACCESS_KEY,
|
28
31
|
access_key_secret=secret_key or setting.OSS_SECRET_KEY
|
@@ -31,11 +34,27 @@ class OssUtil:
|
|
31
34
|
auth=self._auth,
|
32
35
|
endpoint=self.endpoint,
|
33
36
|
bucket_name=self.bucket,
|
34
|
-
**
|
37
|
+
**self._kw
|
35
38
|
)
|
36
39
|
|
40
|
+
def failed(self):
|
41
|
+
self.failed_count += 1
|
42
|
+
if self.failed_count >= 5:
|
43
|
+
self._client = Bucket(
|
44
|
+
auth=self._auth,
|
45
|
+
endpoint=self.endpoint,
|
46
|
+
bucket_name=self.bucket,
|
47
|
+
**self._kw
|
48
|
+
)
|
49
|
+
|
37
50
|
def exists(self, key: str) -> bool:
|
38
|
-
|
51
|
+
try:
|
52
|
+
result = self._client.object_exists(key)
|
53
|
+
self.failed_count = 0
|
54
|
+
return result
|
55
|
+
except Exception as e:
|
56
|
+
self.failed()
|
57
|
+
raise e
|
39
58
|
|
40
59
|
def head(self, key: str) -> models.HeadObjectResult:
|
41
60
|
return self._client.head_object(key)
|
@@ -13,7 +13,7 @@ cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
|
|
13
13
|
cobweb/base/seed.py,sha256=A-F1urjbE5hYNWTCwq3sUV4nrxlK_RGMoCmjBmIwYsI,3158
|
14
14
|
cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
|
15
15
|
cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
|
16
|
-
cobweb/crawlers/crawler.py,sha256=
|
16
|
+
cobweb/crawlers/crawler.py,sha256=kbpgBllmA2ve3Hp-XvVH89t2q5G39i-m7it5xJ1p1WE,8973
|
17
17
|
cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
|
18
18
|
cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
|
19
19
|
cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
|
@@ -22,10 +22,10 @@ cobweb/db/redis_db_new.py,sha256=F09LWVjtC2JFdCaKatZ2bAOLKbsnes85_nZRe2dtSIc,469
|
|
22
22
|
cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
|
23
23
|
cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
|
24
24
|
cobweb/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
|
25
|
-
cobweb/launchers/launcher.py,sha256=
|
26
|
-
cobweb/launchers/launcher_air.py,sha256=
|
27
|
-
cobweb/launchers/launcher_api.py,sha256=
|
28
|
-
cobweb/launchers/launcher_pro.py,sha256=
|
25
|
+
cobweb/launchers/launcher.py,sha256=aR1cnQymjQQUJe-W_dRoQTd9t3Qav9QkLDzeWW7i0xE,7921
|
26
|
+
cobweb/launchers/launcher_air.py,sha256=qdcxq41I9zN5snEpMUUNEQNTtUiZM1Hw_3N9zu4PuAs,3058
|
27
|
+
cobweb/launchers/launcher_api.py,sha256=RlWuGSb_izevqJBVlyi_D8mlUXMKReTSOHTHHMEn8FE,8453
|
28
|
+
cobweb/launchers/launcher_pro.py,sha256=Jn5T89eWZafr4wGTllqrXB5IDyZ8G9hiuk7dW3Xl1VA,8395
|
29
29
|
cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
|
30
30
|
cobweb/pipelines/pipeline.py,sha256=4TJLX0sUHRxYndF5A4Vs5btUGI-wigkOcFvhTW1hLXI,2009
|
31
31
|
cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
|
@@ -36,10 +36,10 @@ cobweb/schedulers/scheduler_redis.py,sha256=E5fjc3nNld8GbUhUGT7uY4smRejj2J2ZIzp2
|
|
36
36
|
cobweb/utils/__init__.py,sha256=Ev2LZZ1-S56iQYDqFZrqadizEv4Gk8Of-DraH-_WnKY,109
|
37
37
|
cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
|
38
38
|
cobweb/utils/dotting.py,sha256=mVICaa26R-dQ4JGmPK-kkR6QjX38QiRewXZnGb2DCIc,1784
|
39
|
-
cobweb/utils/oss.py,sha256=
|
39
|
+
cobweb/utils/oss.py,sha256=6x_ugXanh1R-6ZylQzUDQh4OeFZHujhWFCOxbzy53JY,3984
|
40
40
|
cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
|
41
|
-
cobweb_launcher-1.2.
|
42
|
-
cobweb_launcher-1.2.
|
43
|
-
cobweb_launcher-1.2.
|
44
|
-
cobweb_launcher-1.2.
|
45
|
-
cobweb_launcher-1.2.
|
41
|
+
cobweb_launcher-1.2.62.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
42
|
+
cobweb_launcher-1.2.62.dist-info/METADATA,sha256=fgQXgBFjxcRQdCLc4m_2qmM11V8QHleCbROVG2dPIFo,6510
|
43
|
+
cobweb_launcher-1.2.62.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
44
|
+
cobweb_launcher-1.2.62.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
45
|
+
cobweb_launcher-1.2.62.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|