cobweb-launcher 1.1.14__tar.gz → 1.1.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cobweb-launcher might be problematic. Click here for more details.
- {cobweb-launcher-1.1.14/cobweb_launcher.egg-info → cobweb-launcher-1.1.15}/PKG-INFO +1 -1
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/crawlers/base_crawler.py +19 -20
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15/cobweb_launcher.egg-info}/PKG-INFO +1 -1
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/setup.py +1 -1
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/LICENSE +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/README.md +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/__init__.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/base/__init__.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/base/common_queue.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/base/decorators.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/base/item.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/base/log.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/base/request.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/base/response.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/base/seed.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/constant.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/crawlers/__init__.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/crawlers/file_crawler.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/db/redis_db.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/exceptions/__init__.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/exceptions/oss_db_exception.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/launchers/__init__.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/launchers/launcher.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/launchers/launcher_pro.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/pipelines/__init__.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/pipelines/base_pipeline.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/pipelines/loghub_pipeline.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/setting.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/utils/__init__.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/utils/oss.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb/utils/tools.py +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/setup.cfg +0 -0
|
@@ -75,24 +75,23 @@ class Crawler(threading.Thread):
|
|
|
75
75
|
|
|
76
76
|
seed_detail_log_info = download_log_info(seed.to_dict)
|
|
77
77
|
|
|
78
|
-
|
|
78
|
+
try:
|
|
79
|
+
request_iterators = self.request(seed)
|
|
79
80
|
|
|
80
|
-
|
|
81
|
-
|
|
81
|
+
if not isgenerator(request_iterators):
|
|
82
|
+
raise TypeError("request function isn't a generator!")
|
|
82
83
|
|
|
83
|
-
|
|
84
|
+
for request_item in request_iterators:
|
|
84
85
|
|
|
85
|
-
|
|
86
|
-
|
|
86
|
+
if isinstance(request_item, BaseItem):
|
|
87
|
+
self.upload_queue.push(request_item)
|
|
87
88
|
|
|
88
|
-
|
|
89
|
-
try:
|
|
89
|
+
elif isinstance(request_item, Request):
|
|
90
90
|
download_iterators = self.download(request_item)
|
|
91
91
|
if not isgenerator(download_iterators):
|
|
92
92
|
raise TypeError("download function isn't a generator")
|
|
93
93
|
for download_item in download_iterators:
|
|
94
94
|
if isinstance(download_item, Response):
|
|
95
|
-
response_detail_log_info = download_log_info(download_item.to_dict)
|
|
96
95
|
logger.info(LogTemplate.download_info.format(
|
|
97
96
|
detail=seed_detail_log_info,
|
|
98
97
|
retry=seed.params.retry,
|
|
@@ -100,7 +99,7 @@ class Crawler(threading.Thread):
|
|
|
100
99
|
seed_version=seed.params.seed_version,
|
|
101
100
|
identifier=seed.identifier or "",
|
|
102
101
|
status=download_item.response,
|
|
103
|
-
response=
|
|
102
|
+
response=download_log_info(download_item.to_dict)
|
|
104
103
|
))
|
|
105
104
|
parse_iterators = self.parse(download_item)
|
|
106
105
|
if not isgenerator(parse_iterators):
|
|
@@ -111,16 +110,16 @@ class Crawler(threading.Thread):
|
|
|
111
110
|
self.distribute(parse_item, seed)
|
|
112
111
|
else:
|
|
113
112
|
self.distribute(download_item, seed)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
113
|
+
except Exception as e:
|
|
114
|
+
logger.info(LogTemplate.download_exception.format(
|
|
115
|
+
detail=seed_detail_log_info,
|
|
116
|
+
retry=seed.params.retry,
|
|
117
|
+
priority=seed.params.priority,
|
|
118
|
+
seed_version=seed.params.seed_version,
|
|
119
|
+
identifier=seed.identifier or "", exception=e
|
|
120
|
+
))
|
|
121
|
+
seed.params.retry += 1
|
|
122
|
+
self.launcher_queue['todo'].push(seed)
|
|
124
123
|
|
|
125
124
|
def run(self):
|
|
126
125
|
for index in range(self.spider_thread_num):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cobweb-launcher-1.1.14 → cobweb-launcher-1.1.15}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|