cobweb-launcher 1.2.49__py3-none-any.whl → 1.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/base/__init__.py +141 -4
- cobweb/base/basic.py +28 -82
- cobweb/base/common_queue.py +13 -0
- cobweb/base/dotting.py +1 -1
- cobweb/base/request.py +14 -2
- cobweb/base/seed.py +10 -6
- cobweb/constant.py +16 -0
- cobweb/crawlers/crawler.py +51 -181
- cobweb/db/redis_db.py +28 -0
- cobweb/launchers/__init__.py +2 -2
- cobweb/launchers/launcher.py +110 -141
- cobweb/launchers/launcher_api.py +66 -114
- cobweb/launchers/launcher_pro.py +76 -194
- cobweb/pipelines/base_pipeline.py +54 -0
- cobweb/pipelines/loghub_pipeline.py +34 -0
- cobweb/pipelines/pipeline.py +25 -49
- cobweb/schedulers/__init__.py +0 -2
- cobweb/schedulers/scheduler_redis.py +5 -8
- cobweb/setting.py +29 -6
- cobweb/utils/dotting.py +10 -42
- cobweb_/__init__.py +2 -0
- cobweb_/base/__init__.py +9 -0
- cobweb_/base/common_queue.py +30 -0
- cobweb_/base/decorators.py +40 -0
- cobweb_/base/item.py +46 -0
- cobweb_/base/log.py +94 -0
- cobweb_/base/request.py +82 -0
- cobweb_/base/response.py +23 -0
- cobweb_/base/seed.py +114 -0
- cobweb_/constant.py +94 -0
- cobweb_/crawlers/__init__.py +1 -0
- cobweb_/crawlers/crawler.py +184 -0
- cobweb_/db/__init__.py +2 -0
- cobweb_/db/api_db.py +82 -0
- cobweb_/db/redis_db.py +130 -0
- cobweb_/exceptions/__init__.py +1 -0
- cobweb_/exceptions/oss_db_exception.py +28 -0
- cobweb_/launchers/__init__.py +3 -0
- cobweb_/launchers/launcher.py +235 -0
- cobweb_/launchers/launcher_air.py +88 -0
- cobweb_/launchers/launcher_api.py +221 -0
- cobweb_/launchers/launcher_pro.py +222 -0
- cobweb_/pipelines/__init__.py +3 -0
- cobweb_/pipelines/pipeline.py +69 -0
- cobweb_/pipelines/pipeline_console.py +22 -0
- cobweb_/pipelines/pipeline_loghub.py +34 -0
- cobweb_/setting.py +74 -0
- cobweb_/utils/__init__.py +5 -0
- cobweb_/utils/bloom.py +58 -0
- cobweb_/utils/dotting.py +32 -0
- cobweb_/utils/oss.py +94 -0
- cobweb_/utils/tools.py +42 -0
- {cobweb_launcher-1.2.49.dist-info → cobweb_launcher-1.3.2.dist-info}/METADATA +1 -1
- cobweb_launcher-1.3.2.dist-info/RECORD +110 -0
- cobweb_launcher-1.3.2.dist-info/top_level.txt +2 -0
- cobweb_new/__init__.py +2 -0
- cobweb_new/base/__init__.py +72 -0
- cobweb_new/base/common_queue.py +53 -0
- cobweb_new/base/decorators.py +72 -0
- cobweb_new/base/item.py +46 -0
- cobweb_new/base/log.py +94 -0
- cobweb_new/base/request.py +82 -0
- cobweb_new/base/response.py +23 -0
- cobweb_new/base/seed.py +118 -0
- cobweb_new/constant.py +105 -0
- cobweb_new/crawlers/__init__.py +1 -0
- cobweb_new/crawlers/crawler-new.py +85 -0
- cobweb_new/crawlers/crawler.py +170 -0
- cobweb_new/db/__init__.py +2 -0
- cobweb_new/db/api_db.py +82 -0
- cobweb_new/db/redis_db.py +158 -0
- cobweb_new/exceptions/__init__.py +1 -0
- cobweb_new/exceptions/oss_db_exception.py +28 -0
- cobweb_new/launchers/__init__.py +3 -0
- cobweb_new/launchers/launcher.py +237 -0
- cobweb_new/launchers/launcher_air.py +88 -0
- cobweb_new/launchers/launcher_api.py +161 -0
- cobweb_new/launchers/launcher_pro.py +96 -0
- cobweb_new/launchers/tesss.py +47 -0
- cobweb_new/pipelines/__init__.py +3 -0
- cobweb_new/pipelines/pipeline.py +68 -0
- cobweb_new/pipelines/pipeline_console.py +22 -0
- cobweb_new/pipelines/pipeline_loghub.py +34 -0
- cobweb_new/setting.py +95 -0
- cobweb_new/utils/__init__.py +5 -0
- cobweb_new/utils/bloom.py +58 -0
- cobweb_new/utils/oss.py +94 -0
- cobweb_new/utils/tools.py +42 -0
- cobweb/schedulers/scheduler_api.py +0 -72
- cobweb_launcher-1.2.49.dist-info/RECORD +0 -44
- cobweb_launcher-1.2.49.dist-info/top_level.txt +0 -1
- {cobweb_launcher-1.2.49.dist-info → cobweb_launcher-1.3.2.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.2.49.dist-info → cobweb_launcher-1.3.2.dist-info}/WHEEL +0 -0
cobweb/pipelines/pipeline.py
CHANGED
@@ -2,68 +2,44 @@ import time
|
|
2
2
|
import threading
|
3
3
|
|
4
4
|
from abc import ABC, abstractmethod
|
5
|
-
from cobweb.base import BaseItem,
|
5
|
+
from cobweb.base import BaseItem, TaskQueue, logger, Decorators
|
6
|
+
from cobweb import setting
|
6
7
|
|
7
8
|
|
8
|
-
class Pipeline(
|
9
|
+
class Pipeline(ABC):
|
9
10
|
|
10
11
|
def __init__(
|
11
12
|
self,
|
12
|
-
stop: threading.Event,
|
13
13
|
pause: threading.Event,
|
14
|
-
upload: Queue, done: Queue,
|
15
|
-
upload_size: int,
|
16
|
-
wait_seconds: int
|
17
14
|
):
|
18
15
|
super().__init__()
|
19
|
-
self.
|
20
|
-
self.
|
21
|
-
self.
|
22
|
-
self._done = done
|
23
|
-
|
24
|
-
self.upload_size = upload_size
|
25
|
-
self.wait_seconds = wait_seconds
|
16
|
+
self.pause = pause
|
17
|
+
self.upload_queue_size = setting.UPLOAD_QUEUE_SIZE
|
18
|
+
self.upload_wait_time = setting.UPLOAD_WAIT_TIME
|
26
19
|
|
27
20
|
@abstractmethod
|
28
21
|
def build(self, item: BaseItem) -> dict:
|
29
|
-
|
22
|
+
...
|
30
23
|
|
31
24
|
@abstractmethod
|
32
25
|
def upload(self, table: str, data: list) -> bool:
|
33
|
-
|
26
|
+
...
|
34
27
|
|
28
|
+
@Decorators.pause
|
35
29
|
def run(self):
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
for table, datas in data_info.items():
|
53
|
-
try:
|
54
|
-
self.upload(table, datas)
|
55
|
-
except Exception as e:
|
56
|
-
logger.info(e)
|
57
|
-
status = False
|
58
|
-
except Exception as e:
|
59
|
-
logger.info(e)
|
60
|
-
status = False
|
61
|
-
if not status:
|
62
|
-
for seed in seeds:
|
63
|
-
seed.params.seed_status = "deal model: fail"
|
64
|
-
if seeds:
|
65
|
-
self._done.push(seeds)
|
66
|
-
|
67
|
-
logger.info("upload pipeline close!")
|
68
|
-
|
69
|
-
|
30
|
+
data_info, seeds = {}, []
|
31
|
+
thread_sleep = self.upload_wait_time if TaskQueue.UPLOAD.length < self.upload_queue_size else 0.1
|
32
|
+
try:
|
33
|
+
while item := TaskQueue.UPLOAD.pop() and len(seeds) <= self.upload_queue_size:
|
34
|
+
data = self.build(item)
|
35
|
+
data_info.setdefault(item.table, []).append(data)
|
36
|
+
seeds.append(item.seed)
|
37
|
+
for table, datas in data_info.items():
|
38
|
+
self.upload(table, datas)
|
39
|
+
except Exception as e:
|
40
|
+
logger.info(e)
|
41
|
+
seeds = None
|
42
|
+
finally:
|
43
|
+
TaskQueue.DONE.push(seeds)
|
44
|
+
|
45
|
+
time.sleep(thread_sleep)
|
cobweb/schedulers/__init__.py
CHANGED
@@ -33,7 +33,7 @@ class RedisScheduler:
|
|
33
33
|
members = self._db.members(key, source, count=count, _min=0, _max="(1000")
|
34
34
|
for member, priority in members:
|
35
35
|
# seed = Seed(member, priority=priority)
|
36
|
-
yield member, priority
|
36
|
+
yield member.decode(), priority
|
37
37
|
|
38
38
|
def insert(self, key, items):
|
39
39
|
if items:
|
@@ -48,8 +48,7 @@ class RedisScheduler:
|
|
48
48
|
_min = reset_time - int(time.time()) if self.heartbeat else "-inf"
|
49
49
|
|
50
50
|
for key in keys:
|
51
|
-
|
52
|
-
self._db.members(key, 0, _min=_min, _max="(0")
|
51
|
+
self._db.members(key, 0, _min=_min, _max="(0")
|
53
52
|
|
54
53
|
if not self.heartbeat:
|
55
54
|
self.working.set()
|
@@ -59,13 +58,11 @@ class RedisScheduler:
|
|
59
58
|
|
60
59
|
def refresh(self, key, items: dict[str, int]):
|
61
60
|
refresh_time = int(time.time())
|
62
|
-
its = {k: -refresh_time - v / 1000 for k, v in items
|
63
|
-
|
64
|
-
self._db.zadd(key, item=its, xx=True)
|
61
|
+
its = {k: -refresh_time - v / 1000 for k, v in items}
|
62
|
+
self._db.zadd(key, item=its, xx=True)
|
65
63
|
|
66
64
|
def delete(self, key, values):
|
67
|
-
|
68
|
-
self._db.zrem(key, *values)
|
65
|
+
self._db.zrem(key, *values)
|
69
66
|
|
70
67
|
|
71
68
|
|
cobweb/setting.py
CHANGED
@@ -37,8 +37,14 @@ PIPELINE = "cobweb.pipelines.pipeline_console.Console"
|
|
37
37
|
# Launcher 等待时间
|
38
38
|
|
39
39
|
BEFORE_SCHEDULER_WAIT_SECONDS = 60 # 调度前等待时间,只作用于单次任务
|
40
|
-
|
41
|
-
|
40
|
+
|
41
|
+
SCHEDULING_WAIT_TIME = 15 # SCHEDULER ITEM 调度等待时间
|
42
|
+
INSERTING_WAIT_TIME = 30 # INSERT ITEM 等待时间
|
43
|
+
REMOVING_WAIT_TIME = 5 # REMOVE ITEM 等待时间
|
44
|
+
RESET_WAIT_TIME = 30 # REST ITEM 等待时间
|
45
|
+
UPLOAD_WAIT_TIME = 15 # 上传等待时间
|
46
|
+
|
47
|
+
TODO_QUEUE_FULL_WAIT_SECONDS = 5 # 队列已满时等待时间
|
42
48
|
NEW_QUEUE_WAIT_SECONDS = 30 # new队列等待时间
|
43
49
|
DONE_QUEUE_WAIT_SECONDS = 5 # done队列等待时间
|
44
50
|
UPLOAD_QUEUE_WAIT_SECONDS = 15 # upload队列等待时间
|
@@ -46,10 +52,27 @@ SEED_RESET_SECONDS = 30 # 种子重制时间
|
|
46
52
|
|
47
53
|
|
48
54
|
# Launcher 队列长度
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
55
|
+
SCHEDULING_SIZE = 100 # 调度队列长度
|
56
|
+
INSERTING_SIZE = 100 # INSERT 长度
|
57
|
+
REMOVING_SIZE = 100 # REMOVE 长度
|
58
|
+
|
59
|
+
# SEED = Queue() # 添加任务种子队列
|
60
|
+
# TODO = Queue() # 任务种子队列
|
61
|
+
# REQUEST = Queue() # 请求队列
|
62
|
+
# DOWNLOAD = Queue() # 下载任务队列
|
63
|
+
# RESPONSE = Queue() # 响应队列
|
64
|
+
# DONE = Queue() # 下载完成队列
|
65
|
+
# UPLOAD = Queue() # 任务上传队列
|
66
|
+
# DELETE = Queue() # 任务删除队列
|
67
|
+
|
68
|
+
SEED_QUEUE_SIZE = 100 # TODO 队列长度
|
69
|
+
TODO_QUEUE_SIZE = 100 # TODO 队列长度
|
70
|
+
REQUEST_QUEUE_SIZE = 100 # new队列长度
|
71
|
+
DOWNLOAD_QUEUE_SIZE = 100 # done队列长度
|
72
|
+
RESPONSE_QUEUE_SIZE = 100 # upload队列长度
|
73
|
+
DONE_QUEUE_SIZE = 100 # upload队列长度
|
74
|
+
UPLOAD_QUEUE_SIZE = 100 # upload队列长度
|
75
|
+
DELETE_QUEUE_SIZE = 100 # upload队列长度
|
53
76
|
|
54
77
|
# DONE_MODEL IN (0, 1), 种子完成模式
|
55
78
|
DONE_MODEL = 0 # 0:种子消费成功直接从队列移除,失败则添加至失败队列;1:种子消费成功添加至成功队列,失败添加至失败队列
|
cobweb/utils/dotting.py
CHANGED
@@ -1,9 +1,6 @@
|
|
1
1
|
import json
|
2
|
-
import time
|
3
2
|
|
4
3
|
from aliyun.log import LogClient, LogItem, PutLogsRequest
|
5
|
-
|
6
|
-
from cobweb.base import Queue, logger
|
7
4
|
from cobweb import setting
|
8
5
|
|
9
6
|
|
@@ -11,11 +8,11 @@ class LoghubDot:
|
|
11
8
|
|
12
9
|
def __init__(self):
|
13
10
|
self.client = LogClient(**setting.LOGHUB_CONFIG)
|
14
|
-
self.queue = Queue()
|
15
11
|
|
16
12
|
def build(self, topic, **kwargs):
|
17
13
|
|
18
14
|
temp = {}
|
15
|
+
log_items = []
|
19
16
|
log_item = LogItem()
|
20
17
|
for key, value in kwargs.items():
|
21
18
|
if not isinstance(value, str):
|
@@ -24,41 +21,12 @@ class LoghubDot:
|
|
24
21
|
temp[key] = value
|
25
22
|
contents = sorted(temp.items())
|
26
23
|
log_item.set_contents(contents)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
self.
|
36
|
-
# self.client.put_logs(request=request)
|
37
|
-
|
38
|
-
def build_run(self):
|
39
|
-
while True:
|
40
|
-
start_time = int(time.time())
|
41
|
-
while True:
|
42
|
-
cost_time = int(time.time()) - start_time
|
43
|
-
if self.queue.length >= 1000 or cost_time > 10:
|
44
|
-
break
|
45
|
-
time.sleep(0.5)
|
46
|
-
try:
|
47
|
-
log_item_info = {}
|
48
|
-
for _ in range(1000):
|
49
|
-
its = self.queue.pop()
|
50
|
-
if not its:
|
51
|
-
break
|
52
|
-
topic, item = its
|
53
|
-
log_item_info.setdefault(topic, []).append(item)
|
54
|
-
for topic, log_items in log_item_info.items():
|
55
|
-
request = PutLogsRequest(
|
56
|
-
project="databee-download-log",
|
57
|
-
logstore="log",
|
58
|
-
topic=topic,
|
59
|
-
logitems=log_items,
|
60
|
-
compress=True
|
61
|
-
)
|
62
|
-
self.client.put_logs(request=request)
|
63
|
-
except Exception as e:
|
64
|
-
logger.info(str(e))
|
24
|
+
log_items.append(log_item)
|
25
|
+
request = PutLogsRequest(
|
26
|
+
project="databee-download-log",
|
27
|
+
logstore="cobweb_log",
|
28
|
+
topic=topic,
|
29
|
+
logitems=log_items,
|
30
|
+
compress=True
|
31
|
+
)
|
32
|
+
self.client.put_logs(request=request)
|
cobweb_/__init__.py
ADDED
cobweb_/base/__init__.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
from collections import deque
|
2
|
+
|
3
|
+
|
4
|
+
class Queue:
|
5
|
+
|
6
|
+
def __init__(self):
|
7
|
+
self._queue = deque()
|
8
|
+
|
9
|
+
@property
|
10
|
+
def length(self) -> int:
|
11
|
+
return len(self._queue)
|
12
|
+
|
13
|
+
def push(self, data, left: bool = False, direct_insertion: bool = False):
|
14
|
+
try:
|
15
|
+
if not data:
|
16
|
+
return None
|
17
|
+
if not direct_insertion and any(isinstance(data, t) for t in (list, tuple)):
|
18
|
+
self._queue.extendleft(data) if left else self._queue.extend(data)
|
19
|
+
else:
|
20
|
+
self._queue.appendleft(data) if left else self._queue.append(data)
|
21
|
+
except AttributeError:
|
22
|
+
pass
|
23
|
+
|
24
|
+
def pop(self, left: bool = True):
|
25
|
+
try:
|
26
|
+
return self._queue.popleft() if left else self._queue.pop()
|
27
|
+
except IndexError:
|
28
|
+
return None
|
29
|
+
except AttributeError:
|
30
|
+
return None
|
@@ -0,0 +1,40 @@
|
|
1
|
+
from functools import wraps
|
2
|
+
|
3
|
+
|
4
|
+
# def check_redis_status(func):
|
5
|
+
# @wraps(func)
|
6
|
+
# def wrapper(*args, **kwargs):
|
7
|
+
# try:
|
8
|
+
# result = func(*args, **kwargs)
|
9
|
+
# except Exception:
|
10
|
+
# result = False
|
11
|
+
# return result
|
12
|
+
#
|
13
|
+
# return wrapper
|
14
|
+
|
15
|
+
|
16
|
+
def decorator_oss_db(exception, retries=3):
|
17
|
+
def decorator(func):
|
18
|
+
@wraps(func)
|
19
|
+
def wrapper(callback_func, *args, **kwargs):
|
20
|
+
result = None
|
21
|
+
for i in range(retries):
|
22
|
+
msg = None
|
23
|
+
try:
|
24
|
+
return func(callback_func, *args, **kwargs)
|
25
|
+
except Exception as e:
|
26
|
+
result = None
|
27
|
+
msg = e
|
28
|
+
finally:
|
29
|
+
if result:
|
30
|
+
return result
|
31
|
+
|
32
|
+
if i >= 2 and msg:
|
33
|
+
raise exception(msg)
|
34
|
+
|
35
|
+
return wrapper
|
36
|
+
|
37
|
+
return decorator
|
38
|
+
|
39
|
+
|
40
|
+
|
cobweb_/base/item.py
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
from .seed import Seed
|
2
|
+
from collections import namedtuple
|
3
|
+
|
4
|
+
|
5
|
+
class Item(type):
|
6
|
+
|
7
|
+
def __new__(cls, name, bases, dct):
|
8
|
+
new_class_instance = type.__new__(cls, name, bases, dct)
|
9
|
+
if name != "BaseItem":
|
10
|
+
table = getattr(new_class_instance, "__TABLE__")
|
11
|
+
fields = getattr(new_class_instance, "__FIELDS__")
|
12
|
+
new_class_instance.Data = namedtuple(table, fields)
|
13
|
+
return new_class_instance
|
14
|
+
|
15
|
+
|
16
|
+
class BaseItem(metaclass=Item):
|
17
|
+
|
18
|
+
__TABLE__ = ""
|
19
|
+
__FIELDS__ = ""
|
20
|
+
|
21
|
+
def __init__(self, seed: Seed, **kwargs):
|
22
|
+
self.seed = seed
|
23
|
+
|
24
|
+
data = {}
|
25
|
+
for key, value in kwargs.items():
|
26
|
+
if key not in self.__FIELDS__:
|
27
|
+
self.__setattr__(key, value)
|
28
|
+
else:
|
29
|
+
data[key] = value
|
30
|
+
|
31
|
+
self.data = self.Data(**data)
|
32
|
+
|
33
|
+
@property
|
34
|
+
def to_dict(self):
|
35
|
+
return self.data._asdict()
|
36
|
+
|
37
|
+
@property
|
38
|
+
def table(self):
|
39
|
+
return self.Data.__name__
|
40
|
+
|
41
|
+
|
42
|
+
class ConsoleItem(BaseItem):
|
43
|
+
|
44
|
+
__TABLE__ = "console"
|
45
|
+
__FIELDS__ = "data"
|
46
|
+
|
cobweb_/base/log.py
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
|
4
|
+
class ColorCodes:
|
5
|
+
# Text Reset
|
6
|
+
RESET = "\033[0m"
|
7
|
+
|
8
|
+
# Regular Colors
|
9
|
+
RED = "\033[31m"
|
10
|
+
GREEN = "\033[32m"
|
11
|
+
YELLOW = "\033[33m"
|
12
|
+
BLUE = "\033[34m"
|
13
|
+
PURPLE = "\033[35m"
|
14
|
+
CYAN = "\033[36m"
|
15
|
+
WHITE = "\033[37m"
|
16
|
+
|
17
|
+
# Bright Colors
|
18
|
+
BRIGHT_RED = "\033[91m"
|
19
|
+
BRIGHT_GREEN = "\033[92m"
|
20
|
+
BRIGHT_YELLOW = "\033[93m"
|
21
|
+
BRIGHT_BLUE = "\033[94m"
|
22
|
+
BRIGHT_PURPLE = "\033[95m"
|
23
|
+
BRIGHT_CYAN = "\033[96m"
|
24
|
+
BRIGHT_WHITE = "\033[97m"
|
25
|
+
|
26
|
+
# Background Colors
|
27
|
+
BG_RED = "\033[41m"
|
28
|
+
BG_GREEN = "\033[42m"
|
29
|
+
BG_YELLOW = "\033[43m"
|
30
|
+
BG_BLUE = "\033[44m"
|
31
|
+
BG_PURPLE = "\033[45m"
|
32
|
+
BG_CYAN = "\033[46m"
|
33
|
+
BG_WHITE = "\033[47m"
|
34
|
+
|
35
|
+
# Bright Background Colors
|
36
|
+
BG_BRIGHT_RED = "\033[101m"
|
37
|
+
BG_BRIGHT_GREEN = "\033[102m"
|
38
|
+
BG_BRIGHT_YELLOW = "\033[103m"
|
39
|
+
BG_BRIGHT_BLUE = "\033[104m"
|
40
|
+
BG_BRIGHT_PURPLE = "\033[105m"
|
41
|
+
BG_BRIGHT_CYAN = "\033[106m"
|
42
|
+
BG_BRIGHT_WHITE = "\033[107m"
|
43
|
+
|
44
|
+
# Text Styles
|
45
|
+
BOLD = "\033[1m"
|
46
|
+
DIM = "\033[2m"
|
47
|
+
ITALIC = "\033[3m"
|
48
|
+
UNDERLINE = "\033[4m"
|
49
|
+
BLINK = "\033[5m"
|
50
|
+
REVERSE = "\033[7m"
|
51
|
+
HIDDEN = "\033[8m"
|
52
|
+
|
53
|
+
|
54
|
+
class Log:
|
55
|
+
logging.getLogger('oss2.api').setLevel(logging.WARNING)
|
56
|
+
logging.basicConfig(
|
57
|
+
level=logging.INFO,
|
58
|
+
format=f'%(asctime)s %(name)s [%(filename)s:%(lineno)d %(funcName)s]'
|
59
|
+
f' %(levelname)s -> %(message)s'
|
60
|
+
)
|
61
|
+
log = logging.getLogger()
|
62
|
+
|
63
|
+
def set_log_name(self, name):
|
64
|
+
self.__class__.log = logging.getLogger(name)
|
65
|
+
|
66
|
+
@property
|
67
|
+
def debug(self):
|
68
|
+
return self.__class__.log.debug
|
69
|
+
|
70
|
+
@property
|
71
|
+
def info(self):
|
72
|
+
return self.__class__.log.info
|
73
|
+
|
74
|
+
@property
|
75
|
+
def warning(self):
|
76
|
+
return self.__class__.log.warning
|
77
|
+
|
78
|
+
@property
|
79
|
+
def exception(self):
|
80
|
+
return self.__class__.log.exception
|
81
|
+
|
82
|
+
@property
|
83
|
+
def error(self):
|
84
|
+
return self.__class__.log.error
|
85
|
+
|
86
|
+
@property
|
87
|
+
def critical(self):
|
88
|
+
return self.__class__.log.critical
|
89
|
+
|
90
|
+
|
91
|
+
logger = Log()
|
92
|
+
|
93
|
+
|
94
|
+
|
cobweb_/base/request.py
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
import random
|
2
|
+
import requests
|
3
|
+
|
4
|
+
|
5
|
+
class Request:
|
6
|
+
|
7
|
+
__REQUEST_ATTRS__ = {
|
8
|
+
"params",
|
9
|
+
"headers",
|
10
|
+
"cookies",
|
11
|
+
"data",
|
12
|
+
"json",
|
13
|
+
"files",
|
14
|
+
"auth",
|
15
|
+
"timeout",
|
16
|
+
"proxies",
|
17
|
+
"hooks",
|
18
|
+
"stream",
|
19
|
+
"verify",
|
20
|
+
"cert",
|
21
|
+
"allow_redirects",
|
22
|
+
}
|
23
|
+
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
url,
|
27
|
+
seed,
|
28
|
+
random_ua=True,
|
29
|
+
check_status_code=True,
|
30
|
+
**kwargs
|
31
|
+
):
|
32
|
+
self.url = url
|
33
|
+
self.seed = seed
|
34
|
+
self.check_status_code = check_status_code
|
35
|
+
self.request_setting = {}
|
36
|
+
|
37
|
+
for k, v in kwargs.items():
|
38
|
+
if k in self.__class__.__REQUEST_ATTRS__:
|
39
|
+
self.request_setting[k] = v
|
40
|
+
continue
|
41
|
+
self.__setattr__(k, v)
|
42
|
+
|
43
|
+
if not getattr(self, "method", None):
|
44
|
+
self.method = "POST" if self.request_setting.get("data") or self.request_setting.get("json") else "GET"
|
45
|
+
|
46
|
+
if random_ua:
|
47
|
+
self._build_header()
|
48
|
+
|
49
|
+
@property
|
50
|
+
def _random_ua(self) -> str:
|
51
|
+
v1 = random.randint(4, 15)
|
52
|
+
v2 = random.randint(3, 11)
|
53
|
+
v3 = random.randint(1, 16)
|
54
|
+
v4 = random.randint(533, 605)
|
55
|
+
v5 = random.randint(1000, 6000)
|
56
|
+
v6 = random.randint(10, 80)
|
57
|
+
user_agent = (f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_{v1}_{v2}) AppleWebKit/{v4}.{v3} "
|
58
|
+
f"(KHTML, like Gecko) Chrome/105.0.0.0 Safari/{v4}.{v3} Edg/105.0.{v5}.{v6}")
|
59
|
+
return user_agent
|
60
|
+
|
61
|
+
def _build_header(self) -> dict:
|
62
|
+
if not self.request_setting.get("headers"):
|
63
|
+
self.request_setting["headers"] = {"accept": "*/*", "user-agent": self._random_ua}
|
64
|
+
elif "user-agent" not in [key.lower() for key in self.request_setting["headers"].keys()]:
|
65
|
+
self.request_setting["headers"]["user-agent"] = self._random_ua
|
66
|
+
|
67
|
+
def download(self) -> requests.Response:
|
68
|
+
response = requests.request(self.method, self.url, **self.request_setting)
|
69
|
+
if self.check_status_code:
|
70
|
+
response.raise_for_status()
|
71
|
+
return response
|
72
|
+
|
73
|
+
@property
|
74
|
+
def to_dict(self):
|
75
|
+
_dict = self.__dict__.copy()
|
76
|
+
_dict.pop('url')
|
77
|
+
_dict.pop('seed')
|
78
|
+
_dict.pop('check_status_code')
|
79
|
+
_dict.pop('request_setting')
|
80
|
+
return _dict
|
81
|
+
|
82
|
+
|
cobweb_/base/response.py
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
class Response:
|
4
|
+
|
5
|
+
def __init__(
|
6
|
+
self,
|
7
|
+
seed,
|
8
|
+
response,
|
9
|
+
**kwargs
|
10
|
+
):
|
11
|
+
self.seed = seed
|
12
|
+
self.response = response
|
13
|
+
|
14
|
+
for k, v in kwargs.items():
|
15
|
+
self.__setattr__(k, v)
|
16
|
+
|
17
|
+
@property
|
18
|
+
def to_dict(self):
|
19
|
+
_dict = self.__dict__.copy()
|
20
|
+
_dict.pop('seed')
|
21
|
+
_dict.pop('response')
|
22
|
+
return _dict
|
23
|
+
|