cobweb-launcher 1.2.48__tar.gz → 1.2.50__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cobweb-launcher might be problematic. Click here for more details.
- {cobweb-launcher-1.2.48/cobweb_launcher.egg-info → cobweb-launcher-1.2.50}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/base/seed.py +10 -2
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/crawlers/crawler.py +8 -3
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/utils/dotting.py +0 -9
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50/cobweb_launcher.egg-info}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/setup.py +1 -1
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/LICENSE +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/README.md +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/__init__.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/base/__init__.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/base/common_queue.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/base/decorators.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/base/item.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/base/log.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/base/request.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/base/response.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/constant.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/crawlers/__init__.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/crawlers/base_crawler.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/crawlers/file_crawler.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/db/api_db.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/db/redis_db.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/exceptions/__init__.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/exceptions/oss_db_exception.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/launchers/__init__.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/launchers/launcher.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/launchers/launcher_air.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/launchers/launcher_api.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/launchers/launcher_pro.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/pipelines/__init__.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/pipelines/pipeline.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/pipelines/pipeline_console.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/pipelines/pipeline_loghub.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/setting.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/utils/__init__.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/utils/bloom.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/utils/oss.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb/utils/tools.py +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/setup.cfg +0 -0
- {cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/test/test.py +0 -0
|
@@ -5,11 +5,13 @@ import hashlib
|
|
|
5
5
|
|
|
6
6
|
class SeedParams:
|
|
7
7
|
|
|
8
|
-
def __init__(self, retry, priority, seed_version, seed_status=None):
|
|
8
|
+
def __init__(self, retry, priority, seed_version, seed_status=None, proxy_type=None, proxy=None):
|
|
9
9
|
self.retry = retry or 0
|
|
10
10
|
self.priority = priority or 300
|
|
11
11
|
self.seed_version = seed_version or int(time.time())
|
|
12
12
|
self.seed_status = seed_status
|
|
13
|
+
self.proxy_type = proxy_type
|
|
14
|
+
self.proxy = proxy
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
class Seed:
|
|
@@ -18,7 +20,9 @@ class Seed:
|
|
|
18
20
|
"retry",
|
|
19
21
|
"priority",
|
|
20
22
|
"seed_version",
|
|
21
|
-
"seed_status"
|
|
23
|
+
"seed_status",
|
|
24
|
+
"proxy_type",
|
|
25
|
+
"proxy"
|
|
22
26
|
]
|
|
23
27
|
|
|
24
28
|
def __init__(
|
|
@@ -29,6 +33,8 @@ class Seed:
|
|
|
29
33
|
priority=None,
|
|
30
34
|
seed_version=None,
|
|
31
35
|
seed_status=None,
|
|
36
|
+
proxy_type=None,
|
|
37
|
+
proxy=None,
|
|
32
38
|
**kwargs
|
|
33
39
|
):
|
|
34
40
|
if any(isinstance(seed, t) for t in (str, bytes)):
|
|
@@ -51,6 +57,8 @@ class Seed:
|
|
|
51
57
|
"priority": priority,
|
|
52
58
|
"seed_version": seed_version,
|
|
53
59
|
"seed_status": seed_status,
|
|
60
|
+
"proxy_type": proxy_type,
|
|
61
|
+
"proxy": proxy
|
|
54
62
|
}
|
|
55
63
|
|
|
56
64
|
if kwargs:
|
|
@@ -19,7 +19,6 @@ from cobweb.base import (
|
|
|
19
19
|
logger
|
|
20
20
|
)
|
|
21
21
|
from cobweb.utils import LoghubDot
|
|
22
|
-
proxy_type = os.getenv("PROXY_TYPE", "")
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
class Crawler(threading.Thread):
|
|
@@ -149,7 +148,10 @@ class Crawler(threading.Thread):
|
|
|
149
148
|
topic=urlparse(download_item.response.request.url).netloc,
|
|
150
149
|
data_size=int(download_item.response.headers.get("content-length", 0)),
|
|
151
150
|
cost_time=end_time - start_time, status = 200,
|
|
152
|
-
url=download_item.response.url,
|
|
151
|
+
url=download_item.response.url,
|
|
152
|
+
proxy_type=seed.params.proxy_type,
|
|
153
|
+
proxy=seed.params.proxy,
|
|
154
|
+
project=self.project, task=self.task,
|
|
153
155
|
)
|
|
154
156
|
parse_iterators = self.parse(download_item)
|
|
155
157
|
if not isgenerator(parse_iterators):
|
|
@@ -177,7 +179,10 @@ class Crawler(threading.Thread):
|
|
|
177
179
|
topic=urlparse(url).netloc,
|
|
178
180
|
data_size=-1, cost_time=-1,
|
|
179
181
|
status=status, url=url,
|
|
180
|
-
proxy_type=proxy_type,
|
|
182
|
+
proxy_type=seed.params.proxy_type,
|
|
183
|
+
proxy=seed.params.proxy,
|
|
184
|
+
project=self.project,
|
|
185
|
+
task=self.task,
|
|
181
186
|
msg=exception_msg,
|
|
182
187
|
)
|
|
183
188
|
logger.info(LogTemplate.download_exception.format(
|
|
@@ -24,16 +24,7 @@ class LoghubDot:
|
|
|
24
24
|
temp[key] = value
|
|
25
25
|
contents = sorted(temp.items())
|
|
26
26
|
log_item.set_contents(contents)
|
|
27
|
-
# log_items.append(log_item)
|
|
28
|
-
# request = PutLogsRequest(
|
|
29
|
-
# project="databee-download-log",
|
|
30
|
-
# logstore="log",
|
|
31
|
-
# topic=topic,
|
|
32
|
-
# logitems=log_items,
|
|
33
|
-
# compress=True
|
|
34
|
-
# )
|
|
35
27
|
self.queue.push((topic, log_item), direct_insertion=True)
|
|
36
|
-
# self.client.put_logs(request=request)
|
|
37
28
|
|
|
38
29
|
def build_run(self):
|
|
39
30
|
while True:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cobweb-launcher-1.2.48 → cobweb-launcher-1.2.50}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|