cobweb-launcher 1.2.49__py3-none-any.whl → 1.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. cobweb/base/__init__.py +141 -4
  2. cobweb/base/basic.py +28 -82
  3. cobweb/base/common_queue.py +13 -0
  4. cobweb/base/dotting.py +1 -1
  5. cobweb/base/request.py +14 -2
  6. cobweb/base/seed.py +10 -6
  7. cobweb/constant.py +16 -0
  8. cobweb/crawlers/crawler.py +51 -181
  9. cobweb/db/redis_db.py +28 -0
  10. cobweb/launchers/__init__.py +2 -2
  11. cobweb/launchers/launcher.py +110 -141
  12. cobweb/launchers/launcher_api.py +66 -114
  13. cobweb/launchers/launcher_pro.py +76 -194
  14. cobweb/pipelines/base_pipeline.py +54 -0
  15. cobweb/pipelines/loghub_pipeline.py +34 -0
  16. cobweb/pipelines/pipeline.py +25 -49
  17. cobweb/schedulers/__init__.py +0 -2
  18. cobweb/schedulers/scheduler_redis.py +5 -8
  19. cobweb/setting.py +29 -6
  20. cobweb/utils/dotting.py +10 -42
  21. cobweb_/__init__.py +2 -0
  22. cobweb_/base/__init__.py +9 -0
  23. cobweb_/base/common_queue.py +30 -0
  24. cobweb_/base/decorators.py +40 -0
  25. cobweb_/base/item.py +46 -0
  26. cobweb_/base/log.py +94 -0
  27. cobweb_/base/request.py +82 -0
  28. cobweb_/base/response.py +23 -0
  29. cobweb_/base/seed.py +114 -0
  30. cobweb_/constant.py +94 -0
  31. cobweb_/crawlers/__init__.py +1 -0
  32. cobweb_/crawlers/crawler.py +184 -0
  33. cobweb_/db/__init__.py +2 -0
  34. cobweb_/db/api_db.py +82 -0
  35. cobweb_/db/redis_db.py +130 -0
  36. cobweb_/exceptions/__init__.py +1 -0
  37. cobweb_/exceptions/oss_db_exception.py +28 -0
  38. cobweb_/launchers/__init__.py +3 -0
  39. cobweb_/launchers/launcher.py +235 -0
  40. cobweb_/launchers/launcher_air.py +88 -0
  41. cobweb_/launchers/launcher_api.py +221 -0
  42. cobweb_/launchers/launcher_pro.py +222 -0
  43. cobweb_/pipelines/__init__.py +3 -0
  44. cobweb_/pipelines/pipeline.py +69 -0
  45. cobweb_/pipelines/pipeline_console.py +22 -0
  46. cobweb_/pipelines/pipeline_loghub.py +34 -0
  47. cobweb_/setting.py +74 -0
  48. cobweb_/utils/__init__.py +5 -0
  49. cobweb_/utils/bloom.py +58 -0
  50. cobweb_/utils/dotting.py +32 -0
  51. cobweb_/utils/oss.py +94 -0
  52. cobweb_/utils/tools.py +42 -0
  53. {cobweb_launcher-1.2.49.dist-info → cobweb_launcher-1.3.2.dist-info}/METADATA +1 -1
  54. cobweb_launcher-1.3.2.dist-info/RECORD +110 -0
  55. cobweb_launcher-1.3.2.dist-info/top_level.txt +2 -0
  56. cobweb_new/__init__.py +2 -0
  57. cobweb_new/base/__init__.py +72 -0
  58. cobweb_new/base/common_queue.py +53 -0
  59. cobweb_new/base/decorators.py +72 -0
  60. cobweb_new/base/item.py +46 -0
  61. cobweb_new/base/log.py +94 -0
  62. cobweb_new/base/request.py +82 -0
  63. cobweb_new/base/response.py +23 -0
  64. cobweb_new/base/seed.py +118 -0
  65. cobweb_new/constant.py +105 -0
  66. cobweb_new/crawlers/__init__.py +1 -0
  67. cobweb_new/crawlers/crawler-new.py +85 -0
  68. cobweb_new/crawlers/crawler.py +170 -0
  69. cobweb_new/db/__init__.py +2 -0
  70. cobweb_new/db/api_db.py +82 -0
  71. cobweb_new/db/redis_db.py +158 -0
  72. cobweb_new/exceptions/__init__.py +1 -0
  73. cobweb_new/exceptions/oss_db_exception.py +28 -0
  74. cobweb_new/launchers/__init__.py +3 -0
  75. cobweb_new/launchers/launcher.py +237 -0
  76. cobweb_new/launchers/launcher_air.py +88 -0
  77. cobweb_new/launchers/launcher_api.py +161 -0
  78. cobweb_new/launchers/launcher_pro.py +96 -0
  79. cobweb_new/launchers/tesss.py +47 -0
  80. cobweb_new/pipelines/__init__.py +3 -0
  81. cobweb_new/pipelines/pipeline.py +68 -0
  82. cobweb_new/pipelines/pipeline_console.py +22 -0
  83. cobweb_new/pipelines/pipeline_loghub.py +34 -0
  84. cobweb_new/setting.py +95 -0
  85. cobweb_new/utils/__init__.py +5 -0
  86. cobweb_new/utils/bloom.py +58 -0
  87. cobweb_new/utils/oss.py +94 -0
  88. cobweb_new/utils/tools.py +42 -0
  89. cobweb/schedulers/scheduler_api.py +0 -72
  90. cobweb_launcher-1.2.49.dist-info/RECORD +0 -44
  91. cobweb_launcher-1.2.49.dist-info/top_level.txt +0 -1
  92. {cobweb_launcher-1.2.49.dist-info → cobweb_launcher-1.3.2.dist-info}/LICENSE +0 -0
  93. {cobweb_launcher-1.2.49.dist-info → cobweb_launcher-1.3.2.dist-info}/WHEEL +0 -0
@@ -2,68 +2,44 @@ import time
2
2
  import threading
3
3
 
4
4
  from abc import ABC, abstractmethod
5
- from cobweb.base import BaseItem, Queue, logger
5
+ from cobweb.base import BaseItem, TaskQueue, logger, Decorators
6
+ from cobweb import setting
6
7
 
7
8
 
8
- class Pipeline(threading.Thread, ABC):
9
+ class Pipeline(ABC):
9
10
 
10
11
  def __init__(
11
12
  self,
12
- stop: threading.Event,
13
13
  pause: threading.Event,
14
- upload: Queue, done: Queue,
15
- upload_size: int,
16
- wait_seconds: int
17
14
  ):
18
15
  super().__init__()
19
- self._stop = stop
20
- self._pause = pause
21
- self._upload = upload
22
- self._done = done
23
-
24
- self.upload_size = upload_size
25
- self.wait_seconds = wait_seconds
16
+ self.pause = pause
17
+ self.upload_queue_size = setting.UPLOAD_QUEUE_SIZE
18
+ self.upload_wait_time = setting.UPLOAD_WAIT_TIME
26
19
 
27
20
  @abstractmethod
28
21
  def build(self, item: BaseItem) -> dict:
29
- pass
22
+ ...
30
23
 
31
24
  @abstractmethod
32
25
  def upload(self, table: str, data: list) -> bool:
33
- pass
26
+ ...
34
27
 
28
+ @Decorators.pause
35
29
  def run(self):
36
- while not self._stop.is_set():
37
- if not self._upload.length:
38
- time.sleep(self.wait_seconds)
39
- continue
40
- if self._upload.length < self.upload_size:
41
- time.sleep(self.wait_seconds)
42
- status = True
43
- data_info, seeds = {}, []
44
- try:
45
- for _ in range(self.upload_size):
46
- item = self._upload.pop()
47
- if not item:
48
- break
49
- seeds.append(item.seed)
50
- data = self.build(item)
51
- data_info.setdefault(item.table, []).append(data)
52
- for table, datas in data_info.items():
53
- try:
54
- self.upload(table, datas)
55
- except Exception as e:
56
- logger.info(e)
57
- status = False
58
- except Exception as e:
59
- logger.info(e)
60
- status = False
61
- if not status:
62
- for seed in seeds:
63
- seed.params.seed_status = "deal model: fail"
64
- if seeds:
65
- self._done.push(seeds)
66
-
67
- logger.info("upload pipeline close!")
68
-
69
-
30
+ data_info, seeds = {}, []
31
+ thread_sleep = self.upload_wait_time if TaskQueue.UPLOAD.length < self.upload_queue_size else 0.1
32
+ try:
33
+ while item := TaskQueue.UPLOAD.pop() and len(seeds) <= self.upload_queue_size:
34
+ data = self.build(item)
35
+ data_info.setdefault(item.table, []).append(data)
36
+ seeds.append(item.seed)
37
+ for table, datas in data_info.items():
38
+ self.upload(table, datas)
39
+ except Exception as e:
40
+ logger.info(e)
41
+ seeds = None
42
+ finally:
43
+ TaskQueue.DONE.push(seeds)
44
+
45
+ time.sleep(thread_sleep)
@@ -1,3 +1 @@
1
1
  from .scheduler_redis import RedisScheduler
2
- from .scheduler_api import ApiScheduler
3
-
@@ -33,7 +33,7 @@ class RedisScheduler:
33
33
  members = self._db.members(key, source, count=count, _min=0, _max="(1000")
34
34
  for member, priority in members:
35
35
  # seed = Seed(member, priority=priority)
36
- yield member, priority
36
+ yield member.decode(), priority
37
37
 
38
38
  def insert(self, key, items):
39
39
  if items:
@@ -48,8 +48,7 @@ class RedisScheduler:
48
48
  _min = reset_time - int(time.time()) if self.heartbeat else "-inf"
49
49
 
50
50
  for key in keys:
51
- if self._db.exists(key):
52
- self._db.members(key, 0, _min=_min, _max="(0")
51
+ self._db.members(key, 0, _min=_min, _max="(0")
53
52
 
54
53
  if not self.heartbeat:
55
54
  self.working.set()
@@ -59,13 +58,11 @@ class RedisScheduler:
59
58
 
60
59
  def refresh(self, key, items: dict[str, int]):
61
60
  refresh_time = int(time.time())
62
- its = {k: -refresh_time - v / 1000 for k, v in items.items()}
63
- if its:
64
- self._db.zadd(key, item=its, xx=True)
61
+ its = {k: -refresh_time - v / 1000 for k, v in items}
62
+ self._db.zadd(key, item=its, xx=True)
65
63
 
66
64
  def delete(self, key, values):
67
- if values:
68
- self._db.zrem(key, *values)
65
+ self._db.zrem(key, *values)
69
66
 
70
67
 
71
68
 
cobweb/setting.py CHANGED
@@ -37,8 +37,14 @@ PIPELINE = "cobweb.pipelines.pipeline_console.Console"
37
37
  # Launcher 等待时间
38
38
 
39
39
  BEFORE_SCHEDULER_WAIT_SECONDS = 60 # 调度前等待时间,只作用于单次任务
40
- SCHEDULER_WAIT_SECONDS = 15 # 调度等待时间
41
- TODO_QUEUE_FULL_WAIT_SECONDS = 5 # todo队列已满时等待时间
40
+
41
+ SCHEDULING_WAIT_TIME = 15 # SCHEDULER ITEM 调度等待时间
42
+ INSERTING_WAIT_TIME = 30 # INSERT ITEM 等待时间
43
+ REMOVING_WAIT_TIME = 5 # REMOVE ITEM 等待时间
44
+ RESET_WAIT_TIME = 30 # REST ITEM 等待时间
45
+ UPLOAD_WAIT_TIME = 15 # 上传等待时间
46
+
47
+ TODO_QUEUE_FULL_WAIT_SECONDS = 5 # 队列已满时等待时间
42
48
  NEW_QUEUE_WAIT_SECONDS = 30 # new队列等待时间
43
49
  DONE_QUEUE_WAIT_SECONDS = 5 # done队列等待时间
44
50
  UPLOAD_QUEUE_WAIT_SECONDS = 15 # upload队列等待时间
@@ -46,10 +52,27 @@ SEED_RESET_SECONDS = 30 # 种子重制时间
46
52
 
47
53
 
48
54
  # Launcher 队列长度
49
- TODO_QUEUE_SIZE = 100 # todo队列长度
50
- NEW_QUEUE_MAX_SIZE = 100 # new队列长度
51
- DONE_QUEUE_MAX_SIZE = 100 # done队列长度
52
- UPLOAD_QUEUE_MAX_SIZE = 100 # upload队列长度
55
+ SCHEDULING_SIZE = 100 # 调度队列长度
56
+ INSERTING_SIZE = 100 # INSERT 长度
57
+ REMOVING_SIZE = 100 # REMOVE 长度
58
+
59
+ # SEED = Queue() # 添加任务种子队列
60
+ # TODO = Queue() # 任务种子队列
61
+ # REQUEST = Queue() # 请求队列
62
+ # DOWNLOAD = Queue() # 下载任务队列
63
+ # RESPONSE = Queue() # 响应队列
64
+ # DONE = Queue() # 下载完成队列
65
+ # UPLOAD = Queue() # 任务上传队列
66
+ # DELETE = Queue() # 任务删除队列
67
+
68
+ SEED_QUEUE_SIZE = 100 # TODO 队列长度
69
+ TODO_QUEUE_SIZE = 100 # TODO 队列长度
70
+ REQUEST_QUEUE_SIZE = 100 # new队列长度
71
+ DOWNLOAD_QUEUE_SIZE = 100 # done队列长度
72
+ RESPONSE_QUEUE_SIZE = 100 # upload队列长度
73
+ DONE_QUEUE_SIZE = 100 # upload队列长度
74
+ UPLOAD_QUEUE_SIZE = 100 # upload队列长度
75
+ DELETE_QUEUE_SIZE = 100 # upload队列长度
53
76
 
54
77
  # DONE_MODEL IN (0, 1), 种子完成模式
55
78
  DONE_MODEL = 0 # 0:种子消费成功直接从队列移除,失败则添加至失败队列;1:种子消费成功添加至成功队列,失败添加至失败队列
cobweb/utils/dotting.py CHANGED
@@ -1,9 +1,6 @@
1
1
  import json
2
- import time
3
2
 
4
3
  from aliyun.log import LogClient, LogItem, PutLogsRequest
5
-
6
- from cobweb.base import Queue, logger
7
4
  from cobweb import setting
8
5
 
9
6
 
@@ -11,11 +8,11 @@ class LoghubDot:
11
8
 
12
9
  def __init__(self):
13
10
  self.client = LogClient(**setting.LOGHUB_CONFIG)
14
- self.queue = Queue()
15
11
 
16
12
  def build(self, topic, **kwargs):
17
13
 
18
14
  temp = {}
15
+ log_items = []
19
16
  log_item = LogItem()
20
17
  for key, value in kwargs.items():
21
18
  if not isinstance(value, str):
@@ -24,41 +21,12 @@ class LoghubDot:
24
21
  temp[key] = value
25
22
  contents = sorted(temp.items())
26
23
  log_item.set_contents(contents)
27
- # log_items.append(log_item)
28
- # request = PutLogsRequest(
29
- # project="databee-download-log",
30
- # logstore="log",
31
- # topic=topic,
32
- # logitems=log_items,
33
- # compress=True
34
- # )
35
- self.queue.push((topic, log_item), direct_insertion=True)
36
- # self.client.put_logs(request=request)
37
-
38
- def build_run(self):
39
- while True:
40
- start_time = int(time.time())
41
- while True:
42
- cost_time = int(time.time()) - start_time
43
- if self.queue.length >= 1000 or cost_time > 10:
44
- break
45
- time.sleep(0.5)
46
- try:
47
- log_item_info = {}
48
- for _ in range(1000):
49
- its = self.queue.pop()
50
- if not its:
51
- break
52
- topic, item = its
53
- log_item_info.setdefault(topic, []).append(item)
54
- for topic, log_items in log_item_info.items():
55
- request = PutLogsRequest(
56
- project="databee-download-log",
57
- logstore="log",
58
- topic=topic,
59
- logitems=log_items,
60
- compress=True
61
- )
62
- self.client.put_logs(request=request)
63
- except Exception as e:
64
- logger.info(str(e))
24
+ log_items.append(log_item)
25
+ request = PutLogsRequest(
26
+ project="databee-download-log",
27
+ logstore="cobweb_log",
28
+ topic=topic,
29
+ logitems=log_items,
30
+ compress=True
31
+ )
32
+ self.client.put_logs(request=request)
cobweb_/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ from .launchers import LauncherAir, LauncherPro, LauncherApi
2
+ from .constant import CrawlerModel
@@ -0,0 +1,9 @@
1
+ from .common_queue import Queue
2
+ from .response import Response
3
+ from .request import Request
4
+ from .item import BaseItem, ConsoleItem
5
+ from .seed import Seed
6
+
7
+ from .log import logger
8
+ from .decorators import decorator_oss_db
9
+
@@ -0,0 +1,30 @@
1
+ from collections import deque
2
+
3
+
4
+ class Queue:
5
+
6
+ def __init__(self):
7
+ self._queue = deque()
8
+
9
+ @property
10
+ def length(self) -> int:
11
+ return len(self._queue)
12
+
13
+ def push(self, data, left: bool = False, direct_insertion: bool = False):
14
+ try:
15
+ if not data:
16
+ return None
17
+ if not direct_insertion and any(isinstance(data, t) for t in (list, tuple)):
18
+ self._queue.extendleft(data) if left else self._queue.extend(data)
19
+ else:
20
+ self._queue.appendleft(data) if left else self._queue.append(data)
21
+ except AttributeError:
22
+ pass
23
+
24
+ def pop(self, left: bool = True):
25
+ try:
26
+ return self._queue.popleft() if left else self._queue.pop()
27
+ except IndexError:
28
+ return None
29
+ except AttributeError:
30
+ return None
@@ -0,0 +1,40 @@
1
+ from functools import wraps
2
+
3
+
4
+ # def check_redis_status(func):
5
+ # @wraps(func)
6
+ # def wrapper(*args, **kwargs):
7
+ # try:
8
+ # result = func(*args, **kwargs)
9
+ # except Exception:
10
+ # result = False
11
+ # return result
12
+ #
13
+ # return wrapper
14
+
15
+
16
+ def decorator_oss_db(exception, retries=3):
17
+ def decorator(func):
18
+ @wraps(func)
19
+ def wrapper(callback_func, *args, **kwargs):
20
+ result = None
21
+ for i in range(retries):
22
+ msg = None
23
+ try:
24
+ return func(callback_func, *args, **kwargs)
25
+ except Exception as e:
26
+ result = None
27
+ msg = e
28
+ finally:
29
+ if result:
30
+ return result
31
+
32
+ if i >= 2 and msg:
33
+ raise exception(msg)
34
+
35
+ return wrapper
36
+
37
+ return decorator
38
+
39
+
40
+
cobweb_/base/item.py ADDED
@@ -0,0 +1,46 @@
1
+ from .seed import Seed
2
+ from collections import namedtuple
3
+
4
+
5
+ class Item(type):
6
+
7
+ def __new__(cls, name, bases, dct):
8
+ new_class_instance = type.__new__(cls, name, bases, dct)
9
+ if name != "BaseItem":
10
+ table = getattr(new_class_instance, "__TABLE__")
11
+ fields = getattr(new_class_instance, "__FIELDS__")
12
+ new_class_instance.Data = namedtuple(table, fields)
13
+ return new_class_instance
14
+
15
+
16
+ class BaseItem(metaclass=Item):
17
+
18
+ __TABLE__ = ""
19
+ __FIELDS__ = ""
20
+
21
+ def __init__(self, seed: Seed, **kwargs):
22
+ self.seed = seed
23
+
24
+ data = {}
25
+ for key, value in kwargs.items():
26
+ if key not in self.__FIELDS__:
27
+ self.__setattr__(key, value)
28
+ else:
29
+ data[key] = value
30
+
31
+ self.data = self.Data(**data)
32
+
33
+ @property
34
+ def to_dict(self):
35
+ return self.data._asdict()
36
+
37
+ @property
38
+ def table(self):
39
+ return self.Data.__name__
40
+
41
+
42
+ class ConsoleItem(BaseItem):
43
+
44
+ __TABLE__ = "console"
45
+ __FIELDS__ = "data"
46
+
cobweb_/base/log.py ADDED
@@ -0,0 +1,94 @@
1
+ import logging
2
+
3
+
4
+ class ColorCodes:
5
+ # Text Reset
6
+ RESET = "\033[0m"
7
+
8
+ # Regular Colors
9
+ RED = "\033[31m"
10
+ GREEN = "\033[32m"
11
+ YELLOW = "\033[33m"
12
+ BLUE = "\033[34m"
13
+ PURPLE = "\033[35m"
14
+ CYAN = "\033[36m"
15
+ WHITE = "\033[37m"
16
+
17
+ # Bright Colors
18
+ BRIGHT_RED = "\033[91m"
19
+ BRIGHT_GREEN = "\033[92m"
20
+ BRIGHT_YELLOW = "\033[93m"
21
+ BRIGHT_BLUE = "\033[94m"
22
+ BRIGHT_PURPLE = "\033[95m"
23
+ BRIGHT_CYAN = "\033[96m"
24
+ BRIGHT_WHITE = "\033[97m"
25
+
26
+ # Background Colors
27
+ BG_RED = "\033[41m"
28
+ BG_GREEN = "\033[42m"
29
+ BG_YELLOW = "\033[43m"
30
+ BG_BLUE = "\033[44m"
31
+ BG_PURPLE = "\033[45m"
32
+ BG_CYAN = "\033[46m"
33
+ BG_WHITE = "\033[47m"
34
+
35
+ # Bright Background Colors
36
+ BG_BRIGHT_RED = "\033[101m"
37
+ BG_BRIGHT_GREEN = "\033[102m"
38
+ BG_BRIGHT_YELLOW = "\033[103m"
39
+ BG_BRIGHT_BLUE = "\033[104m"
40
+ BG_BRIGHT_PURPLE = "\033[105m"
41
+ BG_BRIGHT_CYAN = "\033[106m"
42
+ BG_BRIGHT_WHITE = "\033[107m"
43
+
44
+ # Text Styles
45
+ BOLD = "\033[1m"
46
+ DIM = "\033[2m"
47
+ ITALIC = "\033[3m"
48
+ UNDERLINE = "\033[4m"
49
+ BLINK = "\033[5m"
50
+ REVERSE = "\033[7m"
51
+ HIDDEN = "\033[8m"
52
+
53
+
54
+ class Log:
55
+ logging.getLogger('oss2.api').setLevel(logging.WARNING)
56
+ logging.basicConfig(
57
+ level=logging.INFO,
58
+ format=f'%(asctime)s %(name)s [%(filename)s:%(lineno)d %(funcName)s]'
59
+ f' %(levelname)s -> %(message)s'
60
+ )
61
+ log = logging.getLogger()
62
+
63
+ def set_log_name(self, name):
64
+ self.__class__.log = logging.getLogger(name)
65
+
66
+ @property
67
+ def debug(self):
68
+ return self.__class__.log.debug
69
+
70
+ @property
71
+ def info(self):
72
+ return self.__class__.log.info
73
+
74
+ @property
75
+ def warning(self):
76
+ return self.__class__.log.warning
77
+
78
+ @property
79
+ def exception(self):
80
+ return self.__class__.log.exception
81
+
82
+ @property
83
+ def error(self):
84
+ return self.__class__.log.error
85
+
86
+ @property
87
+ def critical(self):
88
+ return self.__class__.log.critical
89
+
90
+
91
+ logger = Log()
92
+
93
+
94
+
@@ -0,0 +1,82 @@
1
+ import random
2
+ import requests
3
+
4
+
5
+ class Request:
6
+
7
+ __REQUEST_ATTRS__ = {
8
+ "params",
9
+ "headers",
10
+ "cookies",
11
+ "data",
12
+ "json",
13
+ "files",
14
+ "auth",
15
+ "timeout",
16
+ "proxies",
17
+ "hooks",
18
+ "stream",
19
+ "verify",
20
+ "cert",
21
+ "allow_redirects",
22
+ }
23
+
24
+ def __init__(
25
+ self,
26
+ url,
27
+ seed,
28
+ random_ua=True,
29
+ check_status_code=True,
30
+ **kwargs
31
+ ):
32
+ self.url = url
33
+ self.seed = seed
34
+ self.check_status_code = check_status_code
35
+ self.request_setting = {}
36
+
37
+ for k, v in kwargs.items():
38
+ if k in self.__class__.__REQUEST_ATTRS__:
39
+ self.request_setting[k] = v
40
+ continue
41
+ self.__setattr__(k, v)
42
+
43
+ if not getattr(self, "method", None):
44
+ self.method = "POST" if self.request_setting.get("data") or self.request_setting.get("json") else "GET"
45
+
46
+ if random_ua:
47
+ self._build_header()
48
+
49
+ @property
50
+ def _random_ua(self) -> str:
51
+ v1 = random.randint(4, 15)
52
+ v2 = random.randint(3, 11)
53
+ v3 = random.randint(1, 16)
54
+ v4 = random.randint(533, 605)
55
+ v5 = random.randint(1000, 6000)
56
+ v6 = random.randint(10, 80)
57
+ user_agent = (f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_{v1}_{v2}) AppleWebKit/{v4}.{v3} "
58
+ f"(KHTML, like Gecko) Chrome/105.0.0.0 Safari/{v4}.{v3} Edg/105.0.{v5}.{v6}")
59
+ return user_agent
60
+
61
+ def _build_header(self) -> dict:
62
+ if not self.request_setting.get("headers"):
63
+ self.request_setting["headers"] = {"accept": "*/*", "user-agent": self._random_ua}
64
+ elif "user-agent" not in [key.lower() for key in self.request_setting["headers"].keys()]:
65
+ self.request_setting["headers"]["user-agent"] = self._random_ua
66
+
67
+ def download(self) -> requests.Response:
68
+ response = requests.request(self.method, self.url, **self.request_setting)
69
+ if self.check_status_code:
70
+ response.raise_for_status()
71
+ return response
72
+
73
+ @property
74
+ def to_dict(self):
75
+ _dict = self.__dict__.copy()
76
+ _dict.pop('url')
77
+ _dict.pop('seed')
78
+ _dict.pop('check_status_code')
79
+ _dict.pop('request_setting')
80
+ return _dict
81
+
82
+
@@ -0,0 +1,23 @@
1
+
2
+
3
+ class Response:
4
+
5
+ def __init__(
6
+ self,
7
+ seed,
8
+ response,
9
+ **kwargs
10
+ ):
11
+ self.seed = seed
12
+ self.response = response
13
+
14
+ for k, v in kwargs.items():
15
+ self.__setattr__(k, v)
16
+
17
+ @property
18
+ def to_dict(self):
19
+ _dict = self.__dict__.copy()
20
+ _dict.pop('seed')
21
+ _dict.pop('response')
22
+ return _dict
23
+