cobweb-launcher 1.1.4__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cobweb-launcher might be problematic. Click here for more details.
- cobweb/crawlers/base_crawler.py +23 -18
- cobweb/db/redis_db.py +3 -2
- cobweb/launchers/launcher_pro.py +17 -16
- {cobweb_launcher-1.1.4.dist-info → cobweb_launcher-1.1.6.dist-info}/METADATA +1 -1
- {cobweb_launcher-1.1.4.dist-info → cobweb_launcher-1.1.6.dist-info}/RECORD +8 -8
- {cobweb_launcher-1.1.4.dist-info → cobweb_launcher-1.1.6.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.1.4.dist-info → cobweb_launcher-1.1.6.dist-info}/WHEEL +0 -0
- {cobweb_launcher-1.1.4.dist-info → cobweb_launcher-1.1.6.dist-info}/top_level.txt +0 -0
cobweb/crawlers/base_crawler.py
CHANGED
|
@@ -58,18 +58,18 @@ class Crawler(threading.Thread):
|
|
|
58
58
|
self.launcher_queue['done'].push(seed)
|
|
59
59
|
continue
|
|
60
60
|
|
|
61
|
-
|
|
61
|
+
seed_detail_log_info = download_log_info(seed.to_dict)
|
|
62
62
|
|
|
63
|
-
|
|
63
|
+
try:
|
|
64
|
+
item = self.request(seed)
|
|
64
65
|
|
|
65
|
-
|
|
66
|
+
if isinstance(item, Request):
|
|
66
67
|
|
|
67
|
-
|
|
68
|
-
raise TypeError("download function isn't a generator")
|
|
68
|
+
download_iterators = self.download(item)
|
|
69
69
|
|
|
70
|
-
|
|
70
|
+
if not isgenerator(download_iterators):
|
|
71
|
+
raise TypeError("download function isn't a generator")
|
|
71
72
|
|
|
72
|
-
try:
|
|
73
73
|
for it in download_iterators:
|
|
74
74
|
if isinstance(it, Response):
|
|
75
75
|
response_detail_log_info = download_log_info(it.to_dict)
|
|
@@ -104,17 +104,22 @@ class Crawler(threading.Thread):
|
|
|
104
104
|
else:
|
|
105
105
|
raise TypeError("yield value type error!")
|
|
106
106
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
107
|
+
elif isinstance(item, BaseItem):
|
|
108
|
+
self.upload_queue.push(item)
|
|
109
|
+
else:
|
|
110
|
+
raise TypeError(
|
|
111
|
+
f"request func return value type error!"
|
|
112
|
+
f"item.__class__ is {item.__class__.__name__}"
|
|
113
|
+
)
|
|
114
|
+
except Exception as e:
|
|
115
|
+
logger.info(LogTemplate.download_exception.format(
|
|
116
|
+
detail=seed_detail_log_info, retry=seed.params.retry,
|
|
117
|
+
priority=seed.params.priority, seed_version=seed.params.seed_version,
|
|
118
|
+
identifier=seed.params.identifier, exception=e
|
|
119
|
+
))
|
|
120
|
+
seed.params.retry += 1
|
|
121
|
+
self.launcher_queue['todo'].push(seed)
|
|
122
|
+
|
|
118
123
|
|
|
119
124
|
def run(self):
|
|
120
125
|
for index in range(self.spider_thread_num):
|
cobweb/db/redis_db.py
CHANGED
|
@@ -4,8 +4,9 @@ from cobweb import setting
|
|
|
4
4
|
|
|
5
5
|
class RedisDB:
|
|
6
6
|
|
|
7
|
-
def __init__(self):
|
|
8
|
-
|
|
7
|
+
def __init__(self, **kwargs):
|
|
8
|
+
redis_config = kwargs or setting.REDIS_CONFIG
|
|
9
|
+
pool = redis.ConnectionPool(**redis_config)
|
|
9
10
|
self._client = redis.Redis(connection_pool=pool)
|
|
10
11
|
|
|
11
12
|
def setnx(self, name, value=""):
|
cobweb/launchers/launcher_pro.py
CHANGED
|
@@ -140,22 +140,23 @@ class LauncherPro(Launcher):
|
|
|
140
140
|
while not self._stop.is_set():
|
|
141
141
|
queue_not_empty_count = 0
|
|
142
142
|
pooling_wait_seconds = 30
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
143
|
+
for q in self.__LAUNCHER_QUEUE__.values():
|
|
144
|
+
if q.length != 0:
|
|
145
|
+
queue_not_empty_count += 1
|
|
146
|
+
if self._pause.is_set():
|
|
147
|
+
self._pause.clear()
|
|
148
|
+
self._execute()
|
|
149
|
+
elif queue_not_empty_count == 0:
|
|
150
|
+
pooling_wait_seconds = 3
|
|
151
|
+
check_emtpy_times += 1
|
|
152
|
+
else:
|
|
153
|
+
check_emtpy_times = 0
|
|
154
|
+
|
|
155
|
+
if not self._db.zcard(self._todo) and check_emtpy_times > 2:
|
|
156
|
+
check_emtpy_times = 0
|
|
157
|
+
self.__DOING__ = {}
|
|
158
|
+
self._pause.set()
|
|
159
|
+
|
|
159
160
|
if not self._pause.is_set():
|
|
160
161
|
logger.info(LogTemplate.launcher_pro_polling.format(
|
|
161
162
|
task=self.task,
|
|
@@ -10,23 +10,23 @@ cobweb/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
|
|
|
10
10
|
cobweb/base/response.py,sha256=7h9TwCNqRlwM_fvNmid9zOoRfHbKB8ABSU0eaVUJdVo,405
|
|
11
11
|
cobweb/base/seed.py,sha256=XswH16eEd6iwIBpt71E2S_AsV5UVCcOEOBFoP0r5QRo,2900
|
|
12
12
|
cobweb/crawlers/__init__.py,sha256=_HAXBg7Sq8fsDGSjDm3AQz9aQtLZONpt5b8dSe607mI,91
|
|
13
|
-
cobweb/crawlers/base_crawler.py,sha256=
|
|
13
|
+
cobweb/crawlers/base_crawler.py,sha256=uR1wQ2sJpFovNoAK52293rF03O-jNbv24P5QoNt1tW0,5169
|
|
14
14
|
cobweb/crawlers/file_crawler.py,sha256=LTiHaxhEiJyiAGgodO3an8AYf_y88AeMoFcKae3Vx_M,8381
|
|
15
15
|
cobweb/db/__init__.py,sha256=ut0iEyBLjcJL06WNG_5_d4hO5PJWvDrKWMkDOdmgh2M,30
|
|
16
|
-
cobweb/db/redis_db.py,sha256=
|
|
16
|
+
cobweb/db/redis_db.py,sha256=NNI2QkRV1hEZI-z-COEncXt88z3pZN6wusKlcQzc8V4,4304
|
|
17
17
|
cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
|
|
18
18
|
cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
|
|
19
19
|
cobweb/launchers/__init__.py,sha256=qwlkEJVri7dvCgi45aX3lqAmQS0HrPicAipDvH75kew,69
|
|
20
20
|
cobweb/launchers/launcher.py,sha256=O6Kkvqk-0kOxJom8YO9zW18e_2eMYrA5RTS9Xy4TW5k,5665
|
|
21
|
-
cobweb/launchers/launcher_pro.py,sha256=
|
|
21
|
+
cobweb/launchers/launcher_pro.py,sha256=2N_GU3PVv1CP-ZBPOhh2xAm_KkPHx4SVC0JPW1t3JB4,6550
|
|
22
22
|
cobweb/pipelines/__init__.py,sha256=xanY-Z1d7zRR5JhCdW2htzrAywnKBkigiaUlTFa6of0,80
|
|
23
23
|
cobweb/pipelines/base_pipeline.py,sha256=fYnWf79GmhufXpcnMa3te18SbmnVeYLwxfyo-zLd9CY,1577
|
|
24
24
|
cobweb/pipelines/loghub_pipeline.py,sha256=cjPO6w6UJ0jNw2fVvdX0BCdlm58T7dmYXlxzXOBpvfY,1027
|
|
25
25
|
cobweb/utils/__init__.py,sha256=JTE4sBfHnKHhD6w9Auk0MIT7O9BMOamCeryhlHNx3Zg,47
|
|
26
26
|
cobweb/utils/oss.py,sha256=uD5aN2oVYImit3amE6TjxWMaTAcbAh9dCnpIQhf4M9Q,3238
|
|
27
27
|
cobweb/utils/tools.py,sha256=bVd3iRGBvwhohQAH7AXTTjbmQ54Z35K0O-fatEyhePU,1249
|
|
28
|
-
cobweb_launcher-1.1.
|
|
29
|
-
cobweb_launcher-1.1.
|
|
30
|
-
cobweb_launcher-1.1.
|
|
31
|
-
cobweb_launcher-1.1.
|
|
32
|
-
cobweb_launcher-1.1.
|
|
28
|
+
cobweb_launcher-1.1.6.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
|
29
|
+
cobweb_launcher-1.1.6.dist-info/METADATA,sha256=U7bo6LBCeF5nZMKm7zUNyUHt40cNvlkhr2lgK7LSkQQ,1245
|
|
30
|
+
cobweb_launcher-1.1.6.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
|
31
|
+
cobweb_launcher-1.1.6.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
|
32
|
+
cobweb_launcher-1.1.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|