cobweb-launcher 1.1.3__py3-none-any.whl → 1.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cobweb-launcher might be problematic. Click here for more details.
- cobweb/__init__.py +1 -0
- cobweb/constant.py +6 -0
- cobweb/crawlers/__init__.py +1 -1
- cobweb/crawlers/base_crawler.py +23 -18
- cobweb/db/redis_db.py +3 -2
- {cobweb_launcher-1.1.3.dist-info → cobweb_launcher-1.1.5.dist-info}/METADATA +1 -1
- {cobweb_launcher-1.1.3.dist-info → cobweb_launcher-1.1.5.dist-info}/RECORD +10 -10
- {cobweb_launcher-1.1.3.dist-info → cobweb_launcher-1.1.5.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.1.3.dist-info → cobweb_launcher-1.1.5.dist-info}/WHEEL +0 -0
- {cobweb_launcher-1.1.3.dist-info → cobweb_launcher-1.1.5.dist-info}/top_level.txt +0 -0
cobweb/__init__.py
CHANGED
cobweb/constant.py
CHANGED
cobweb/crawlers/__init__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
from .base_crawler import Crawler
|
|
2
|
-
from .file_crawler import FileCrawlerAir
|
|
2
|
+
from .file_crawler import FileCrawlerAir, FileCrawlerPro
|
cobweb/crawlers/base_crawler.py
CHANGED
|
@@ -58,18 +58,18 @@ class Crawler(threading.Thread):
|
|
|
58
58
|
self.launcher_queue['done'].push(seed)
|
|
59
59
|
continue
|
|
60
60
|
|
|
61
|
-
|
|
61
|
+
seed_detail_log_info = download_log_info(seed.to_dict)
|
|
62
62
|
|
|
63
|
-
|
|
63
|
+
try:
|
|
64
|
+
item = self.request(seed)
|
|
64
65
|
|
|
65
|
-
|
|
66
|
+
if isinstance(item, Request):
|
|
66
67
|
|
|
67
|
-
|
|
68
|
-
raise TypeError("download function isn't a generator")
|
|
68
|
+
download_iterators = self.download(item)
|
|
69
69
|
|
|
70
|
-
|
|
70
|
+
if not isgenerator(download_iterators):
|
|
71
|
+
raise TypeError("download function isn't a generator")
|
|
71
72
|
|
|
72
|
-
try:
|
|
73
73
|
for it in download_iterators:
|
|
74
74
|
if isinstance(it, Response):
|
|
75
75
|
response_detail_log_info = download_log_info(it.to_dict)
|
|
@@ -104,17 +104,22 @@ class Crawler(threading.Thread):
|
|
|
104
104
|
else:
|
|
105
105
|
raise TypeError("yield value type error!")
|
|
106
106
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
107
|
+
elif isinstance(item, BaseItem):
|
|
108
|
+
self.upload_queue.push(item)
|
|
109
|
+
else:
|
|
110
|
+
raise TypeError(
|
|
111
|
+
f"request func return value type error!"
|
|
112
|
+
f"item.__class__ is {item.__class__.__name__}"
|
|
113
|
+
)
|
|
114
|
+
except Exception as e:
|
|
115
|
+
logger.info(LogTemplate.download_exception.format(
|
|
116
|
+
detail=seed_detail_log_info, retry=seed.params.retry,
|
|
117
|
+
priority=seed.params.priority, seed_version=seed.params.seed_version,
|
|
118
|
+
identifier=seed.params.identifier, exception=e
|
|
119
|
+
))
|
|
120
|
+
seed.params.retry += 1
|
|
121
|
+
self.launcher_queue['todo'].push(seed)
|
|
122
|
+
|
|
118
123
|
|
|
119
124
|
def run(self):
|
|
120
125
|
for index in range(self.spider_thread_num):
|
cobweb/db/redis_db.py
CHANGED
|
@@ -4,8 +4,9 @@ from cobweb import setting
|
|
|
4
4
|
|
|
5
5
|
class RedisDB:
|
|
6
6
|
|
|
7
|
-
def __init__(self):
|
|
8
|
-
|
|
7
|
+
def __init__(self, **kwargs):
|
|
8
|
+
redis_config = kwargs or setting.REDIS_CONFIG
|
|
9
|
+
pool = redis.ConnectionPool(**redis_config)
|
|
9
10
|
self._client = redis.Redis(connection_pool=pool)
|
|
10
11
|
|
|
11
12
|
def setnx(self, name, value=""):
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
cobweb/__init__.py,sha256=
|
|
2
|
-
cobweb/constant.py,sha256
|
|
1
|
+
cobweb/__init__.py,sha256=UjV-xEAOr7IDUEWp8mkGvbQlOxF8JfG677Ru_OkAZZc,75
|
|
2
|
+
cobweb/constant.py,sha256=-h7w2xN7WV9vrikN54TW4oSHUk0HruWpg9g2wjUoeDk,2020
|
|
3
3
|
cobweb/setting.py,sha256=MRjqm75EAhqFi_6We5piF_cTCfRJojRXDKkLhP3yhvo,1951
|
|
4
4
|
cobweb/base/__init__.py,sha256=diiK5MygQaWjlWNLbW6eUIg-93O6glMGC9WLNM5jyOc,209
|
|
5
5
|
cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
|
|
@@ -9,11 +9,11 @@ cobweb/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
|
|
|
9
9
|
cobweb/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
|
|
10
10
|
cobweb/base/response.py,sha256=7h9TwCNqRlwM_fvNmid9zOoRfHbKB8ABSU0eaVUJdVo,405
|
|
11
11
|
cobweb/base/seed.py,sha256=XswH16eEd6iwIBpt71E2S_AsV5UVCcOEOBFoP0r5QRo,2900
|
|
12
|
-
cobweb/crawlers/__init__.py,sha256=
|
|
13
|
-
cobweb/crawlers/base_crawler.py,sha256=
|
|
12
|
+
cobweb/crawlers/__init__.py,sha256=_HAXBg7Sq8fsDGSjDm3AQz9aQtLZONpt5b8dSe607mI,91
|
|
13
|
+
cobweb/crawlers/base_crawler.py,sha256=uR1wQ2sJpFovNoAK52293rF03O-jNbv24P5QoNt1tW0,5169
|
|
14
14
|
cobweb/crawlers/file_crawler.py,sha256=LTiHaxhEiJyiAGgodO3an8AYf_y88AeMoFcKae3Vx_M,8381
|
|
15
15
|
cobweb/db/__init__.py,sha256=ut0iEyBLjcJL06WNG_5_d4hO5PJWvDrKWMkDOdmgh2M,30
|
|
16
|
-
cobweb/db/redis_db.py,sha256=
|
|
16
|
+
cobweb/db/redis_db.py,sha256=NNI2QkRV1hEZI-z-COEncXt88z3pZN6wusKlcQzc8V4,4304
|
|
17
17
|
cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
|
|
18
18
|
cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
|
|
19
19
|
cobweb/launchers/__init__.py,sha256=qwlkEJVri7dvCgi45aX3lqAmQS0HrPicAipDvH75kew,69
|
|
@@ -25,8 +25,8 @@ cobweb/pipelines/loghub_pipeline.py,sha256=cjPO6w6UJ0jNw2fVvdX0BCdlm58T7dmYXlxzX
|
|
|
25
25
|
cobweb/utils/__init__.py,sha256=JTE4sBfHnKHhD6w9Auk0MIT7O9BMOamCeryhlHNx3Zg,47
|
|
26
26
|
cobweb/utils/oss.py,sha256=uD5aN2oVYImit3amE6TjxWMaTAcbAh9dCnpIQhf4M9Q,3238
|
|
27
27
|
cobweb/utils/tools.py,sha256=bVd3iRGBvwhohQAH7AXTTjbmQ54Z35K0O-fatEyhePU,1249
|
|
28
|
-
cobweb_launcher-1.1.
|
|
29
|
-
cobweb_launcher-1.1.
|
|
30
|
-
cobweb_launcher-1.1.
|
|
31
|
-
cobweb_launcher-1.1.
|
|
32
|
-
cobweb_launcher-1.1.
|
|
28
|
+
cobweb_launcher-1.1.5.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
|
29
|
+
cobweb_launcher-1.1.5.dist-info/METADATA,sha256=F5vvm0pngBeivTOVQ_srp-7epa3gHadaovtottKAIko,1245
|
|
30
|
+
cobweb_launcher-1.1.5.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
|
31
|
+
cobweb_launcher-1.1.5.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
|
32
|
+
cobweb_launcher-1.1.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|