cobweb-launcher 1.1.19__py3-none-any.whl → 1.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cobweb-launcher might be problematic. Click here for more details.
- cobweb/crawlers/base_crawler.py +15 -0
- cobweb/launchers/launcher_pro.py +3 -2
- {cobweb_launcher-1.1.19.dist-info → cobweb_launcher-1.1.21.dist-info}/METADATA +1 -1
- {cobweb_launcher-1.1.19.dist-info → cobweb_launcher-1.1.21.dist-info}/RECORD +7 -7
- {cobweb_launcher-1.1.19.dist-info → cobweb_launcher-1.1.21.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.1.19.dist-info → cobweb_launcher-1.1.21.dist-info}/WHEEL +0 -0
- {cobweb_launcher-1.1.19.dist-info → cobweb_launcher-1.1.21.dist-info}/top_level.txt +0 -0
cobweb/crawlers/base_crawler.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import threading
|
|
2
|
+
import time
|
|
2
3
|
|
|
3
4
|
from inspect import isgenerator
|
|
4
5
|
from typing import Union, Callable, Mapping
|
|
@@ -81,15 +82,22 @@ class Crawler(threading.Thread):
|
|
|
81
82
|
if not isgenerator(request_iterators):
|
|
82
83
|
raise TypeError("request function isn't a generator!")
|
|
83
84
|
|
|
85
|
+
iterator_status = False
|
|
86
|
+
|
|
84
87
|
for request_item in request_iterators:
|
|
85
88
|
|
|
89
|
+
iterator_status = True
|
|
90
|
+
|
|
86
91
|
if isinstance(request_item, Request):
|
|
92
|
+
iterator_status = False
|
|
87
93
|
download_iterators = self.download(request_item)
|
|
88
94
|
if not isgenerator(download_iterators):
|
|
89
95
|
raise TypeError("download function isn't a generator")
|
|
90
96
|
|
|
91
97
|
for download_item in download_iterators:
|
|
98
|
+
iterator_status = True
|
|
92
99
|
if isinstance(download_item, Response):
|
|
100
|
+
iterator_status = False
|
|
93
101
|
logger.info(LogTemplate.download_info.format(
|
|
94
102
|
detail=seed_detail_log_info,
|
|
95
103
|
retry=seed.params.retry,
|
|
@@ -103,6 +111,7 @@ class Crawler(threading.Thread):
|
|
|
103
111
|
if not isgenerator(parse_iterators):
|
|
104
112
|
raise TypeError("parse function isn't a generator")
|
|
105
113
|
for parse_item in parse_iterators:
|
|
114
|
+
iterator_status = True
|
|
106
115
|
if isinstance(parse_item, Response):
|
|
107
116
|
raise TypeError("upload_item can't be a Response instance")
|
|
108
117
|
self.distribute(parse_item, seed)
|
|
@@ -110,6 +119,10 @@ class Crawler(threading.Thread):
|
|
|
110
119
|
self.distribute(download_item, seed)
|
|
111
120
|
else:
|
|
112
121
|
self.distribute(request_item, seed)
|
|
122
|
+
|
|
123
|
+
if not iterator_status:
|
|
124
|
+
raise ValueError("request/download/parse function yield value error!")
|
|
125
|
+
|
|
113
126
|
except Exception as e:
|
|
114
127
|
logger.info(LogTemplate.download_exception.format(
|
|
115
128
|
detail=seed_detail_log_info,
|
|
@@ -120,6 +133,8 @@ class Crawler(threading.Thread):
|
|
|
120
133
|
))
|
|
121
134
|
seed.params.retry += 1
|
|
122
135
|
self.launcher_queue['todo'].push(seed)
|
|
136
|
+
finally:
|
|
137
|
+
time.sleep(0.1)
|
|
123
138
|
|
|
124
139
|
def run(self):
|
|
125
140
|
for index in range(self.spider_thread_num):
|
cobweb/launchers/launcher_pro.py
CHANGED
|
@@ -125,12 +125,13 @@ class LauncherPro(Launcher):
|
|
|
125
125
|
seeds.append(seed.to_string)
|
|
126
126
|
if seeds:
|
|
127
127
|
self._db.zrem(self._todo, *seeds)
|
|
128
|
+
self._remove_doing_seeds(seeds)
|
|
128
129
|
if s_seeds:
|
|
129
130
|
self._db.done([self._todo, self._done], *s_seeds)
|
|
131
|
+
self._remove_doing_seeds(s_seeds)
|
|
130
132
|
if f_seeds:
|
|
131
133
|
self._db.done([self._todo, self._fail], *f_seeds)
|
|
132
|
-
|
|
133
|
-
self._remove_doing_seeds(seeds)
|
|
134
|
+
self._remove_doing_seeds(f_seeds)
|
|
134
135
|
|
|
135
136
|
if status:
|
|
136
137
|
time.sleep(self._done_queue_wait_seconds)
|
|
@@ -10,7 +10,7 @@ cobweb/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
|
|
|
10
10
|
cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
|
|
11
11
|
cobweb/base/seed.py,sha256=Uz_VBRlAxNYQcFHk3tsZFMlU96yPOedHaWGTvk-zKd8,2908
|
|
12
12
|
cobweb/crawlers/__init__.py,sha256=Rr3DTjD-abMA1_FYcQJZYNvQvcWMuEVcsIU6duqHrw4,75
|
|
13
|
-
cobweb/crawlers/base_crawler.py,sha256=
|
|
13
|
+
cobweb/crawlers/base_crawler.py,sha256=05uIIPs_1RdRKPfEcS8zAwgXbBODOJqAJUjWsMw7jQo,5651
|
|
14
14
|
cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
|
|
15
15
|
cobweb/db/__init__.py,sha256=ut0iEyBLjcJL06WNG_5_d4hO5PJWvDrKWMkDOdmgh2M,30
|
|
16
16
|
cobweb/db/redis_db.py,sha256=NNI2QkRV1hEZI-z-COEncXt88z3pZN6wusKlcQzc8V4,4304
|
|
@@ -18,15 +18,15 @@ cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk
|
|
|
18
18
|
cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
|
|
19
19
|
cobweb/launchers/__init__.py,sha256=qwlkEJVri7dvCgi45aX3lqAmQS0HrPicAipDvH75kew,69
|
|
20
20
|
cobweb/launchers/launcher.py,sha256=LQdlpaF4fafEX01_3B9CB6hD-6YMPag6QOGl9rDprNE,5707
|
|
21
|
-
cobweb/launchers/launcher_pro.py,sha256=
|
|
21
|
+
cobweb/launchers/launcher_pro.py,sha256=VH_HnSQGAzxod6aFB3ZySi6-5Yp4vI-VE3u1XC1dfWg,6697
|
|
22
22
|
cobweb/pipelines/__init__.py,sha256=xanY-Z1d7zRR5JhCdW2htzrAywnKBkigiaUlTFa6of0,80
|
|
23
23
|
cobweb/pipelines/base_pipeline.py,sha256=fYnWf79GmhufXpcnMa3te18SbmnVeYLwxfyo-zLd9CY,1577
|
|
24
24
|
cobweb/pipelines/loghub_pipeline.py,sha256=cjPO6w6UJ0jNw2fVvdX0BCdlm58T7dmYXlxzXOBpvfY,1027
|
|
25
25
|
cobweb/utils/__init__.py,sha256=JTE4sBfHnKHhD6w9Auk0MIT7O9BMOamCeryhlHNx3Zg,47
|
|
26
26
|
cobweb/utils/oss.py,sha256=qAl05ybL2Jp6KFjHDHVMfmeBHQmDKPtZleHjHAY7LZc,3277
|
|
27
27
|
cobweb/utils/tools.py,sha256=bVd3iRGBvwhohQAH7AXTTjbmQ54Z35K0O-fatEyhePU,1249
|
|
28
|
-
cobweb_launcher-1.1.
|
|
29
|
-
cobweb_launcher-1.1.
|
|
30
|
-
cobweb_launcher-1.1.
|
|
31
|
-
cobweb_launcher-1.1.
|
|
32
|
-
cobweb_launcher-1.1.
|
|
28
|
+
cobweb_launcher-1.1.21.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
|
29
|
+
cobweb_launcher-1.1.21.dist-info/METADATA,sha256=jPQSJUuw_07x61hjH52AhxralQQjb8yDVnR0ajtccmU,1246
|
|
30
|
+
cobweb_launcher-1.1.21.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
|
31
|
+
cobweb_launcher-1.1.21.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
|
32
|
+
cobweb_launcher-1.1.21.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|