cobweb-launcher 1.1.19__py3-none-any.whl → 1.1.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cobweb-launcher might be problematic. Click here for more details.
- cobweb/crawlers/base_crawler.py +12 -0
- {cobweb_launcher-1.1.19.dist-info → cobweb_launcher-1.1.20.dist-info}/METADATA +1 -1
- {cobweb_launcher-1.1.19.dist-info → cobweb_launcher-1.1.20.dist-info}/RECORD +6 -6
- {cobweb_launcher-1.1.19.dist-info → cobweb_launcher-1.1.20.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.1.19.dist-info → cobweb_launcher-1.1.20.dist-info}/WHEEL +0 -0
- {cobweb_launcher-1.1.19.dist-info → cobweb_launcher-1.1.20.dist-info}/top_level.txt +0 -0
cobweb/crawlers/base_crawler.py
CHANGED
|
@@ -81,15 +81,22 @@ class Crawler(threading.Thread):
|
|
|
81
81
|
if not isgenerator(request_iterators):
|
|
82
82
|
raise TypeError("request function isn't a generator!")
|
|
83
83
|
|
|
84
|
+
iterator_status = False
|
|
85
|
+
|
|
84
86
|
for request_item in request_iterators:
|
|
85
87
|
|
|
88
|
+
iterator_status = True
|
|
89
|
+
|
|
86
90
|
if isinstance(request_item, Request):
|
|
91
|
+
iterator_status = False
|
|
87
92
|
download_iterators = self.download(request_item)
|
|
88
93
|
if not isgenerator(download_iterators):
|
|
89
94
|
raise TypeError("download function isn't a generator")
|
|
90
95
|
|
|
91
96
|
for download_item in download_iterators:
|
|
97
|
+
iterator_status = True
|
|
92
98
|
if isinstance(download_item, Response):
|
|
99
|
+
iterator_status = False
|
|
93
100
|
logger.info(LogTemplate.download_info.format(
|
|
94
101
|
detail=seed_detail_log_info,
|
|
95
102
|
retry=seed.params.retry,
|
|
@@ -103,6 +110,7 @@ class Crawler(threading.Thread):
|
|
|
103
110
|
if not isgenerator(parse_iterators):
|
|
104
111
|
raise TypeError("parse function isn't a generator")
|
|
105
112
|
for parse_item in parse_iterators:
|
|
113
|
+
iterator_status = True
|
|
106
114
|
if isinstance(parse_item, Response):
|
|
107
115
|
raise TypeError("upload_item can't be a Response instance")
|
|
108
116
|
self.distribute(parse_item, seed)
|
|
@@ -110,6 +118,10 @@ class Crawler(threading.Thread):
|
|
|
110
118
|
self.distribute(download_item, seed)
|
|
111
119
|
else:
|
|
112
120
|
self.distribute(request_item, seed)
|
|
121
|
+
|
|
122
|
+
if not iterator_status:
|
|
123
|
+
raise ValueError("request/download/parse function yield value error!")
|
|
124
|
+
|
|
113
125
|
except Exception as e:
|
|
114
126
|
logger.info(LogTemplate.download_exception.format(
|
|
115
127
|
detail=seed_detail_log_info,
|
|
@@ -10,7 +10,7 @@ cobweb/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
|
|
|
10
10
|
cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
|
|
11
11
|
cobweb/base/seed.py,sha256=Uz_VBRlAxNYQcFHk3tsZFMlU96yPOedHaWGTvk-zKd8,2908
|
|
12
12
|
cobweb/crawlers/__init__.py,sha256=Rr3DTjD-abMA1_FYcQJZYNvQvcWMuEVcsIU6duqHrw4,75
|
|
13
|
-
cobweb/crawlers/base_crawler.py,sha256=
|
|
13
|
+
cobweb/crawlers/base_crawler.py,sha256=8x7MF-GZOWntP4D8llKsOync5_pUganHf0dN2lzN2LA,5586
|
|
14
14
|
cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
|
|
15
15
|
cobweb/db/__init__.py,sha256=ut0iEyBLjcJL06WNG_5_d4hO5PJWvDrKWMkDOdmgh2M,30
|
|
16
16
|
cobweb/db/redis_db.py,sha256=NNI2QkRV1hEZI-z-COEncXt88z3pZN6wusKlcQzc8V4,4304
|
|
@@ -25,8 +25,8 @@ cobweb/pipelines/loghub_pipeline.py,sha256=cjPO6w6UJ0jNw2fVvdX0BCdlm58T7dmYXlxzX
|
|
|
25
25
|
cobweb/utils/__init__.py,sha256=JTE4sBfHnKHhD6w9Auk0MIT7O9BMOamCeryhlHNx3Zg,47
|
|
26
26
|
cobweb/utils/oss.py,sha256=qAl05ybL2Jp6KFjHDHVMfmeBHQmDKPtZleHjHAY7LZc,3277
|
|
27
27
|
cobweb/utils/tools.py,sha256=bVd3iRGBvwhohQAH7AXTTjbmQ54Z35K0O-fatEyhePU,1249
|
|
28
|
-
cobweb_launcher-1.1.
|
|
29
|
-
cobweb_launcher-1.1.
|
|
30
|
-
cobweb_launcher-1.1.
|
|
31
|
-
cobweb_launcher-1.1.
|
|
32
|
-
cobweb_launcher-1.1.
|
|
28
|
+
cobweb_launcher-1.1.20.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
|
29
|
+
cobweb_launcher-1.1.20.dist-info/METADATA,sha256=ZYpoRx19wG_i5ssXYkgpjXPVRXNV3-aOZO2MMIKluVY,1246
|
|
30
|
+
cobweb_launcher-1.1.20.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
|
31
|
+
cobweb_launcher-1.1.20.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
|
32
|
+
cobweb_launcher-1.1.20.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|