cobweb-launcher 1.3.6__py3-none-any.whl → 1.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. cobweb/base/__init__.py +9 -9
  2. cobweb/base/dotting.py +1 -1
  3. cobweb/utils/oss.py +7 -7
  4. {cobweb_launcher-1.3.6.dist-info → cobweb_launcher-1.3.8.dist-info}/METADATA +1 -1
  5. cobweb_launcher-1.3.8.dist-info/RECORD +40 -0
  6. cobweb/base/decorators.py +0 -40
  7. cobweb/crawlers/base_crawler.py +0 -144
  8. cobweb/crawlers/file_crawler.py +0 -98
  9. cobweb/pipelines/base_pipeline.py +0 -54
  10. cobweb/pipelines/loghub_pipeline.py +0 -34
  11. cobweb/utils/dotting.py +0 -32
  12. cobweb_/__init__.py +0 -2
  13. cobweb_/base/__init__.py +0 -9
  14. cobweb_/base/common_queue.py +0 -30
  15. cobweb_/base/decorators.py +0 -40
  16. cobweb_/base/item.py +0 -46
  17. cobweb_/base/log.py +0 -94
  18. cobweb_/base/request.py +0 -82
  19. cobweb_/base/response.py +0 -23
  20. cobweb_/base/seed.py +0 -114
  21. cobweb_/constant.py +0 -94
  22. cobweb_/crawlers/__init__.py +0 -1
  23. cobweb_/crawlers/crawler.py +0 -184
  24. cobweb_/db/__init__.py +0 -2
  25. cobweb_/db/api_db.py +0 -82
  26. cobweb_/db/redis_db.py +0 -130
  27. cobweb_/exceptions/__init__.py +0 -1
  28. cobweb_/exceptions/oss_db_exception.py +0 -28
  29. cobweb_/launchers/__init__.py +0 -3
  30. cobweb_/launchers/launcher.py +0 -235
  31. cobweb_/launchers/launcher_air.py +0 -88
  32. cobweb_/launchers/launcher_api.py +0 -221
  33. cobweb_/launchers/launcher_pro.py +0 -222
  34. cobweb_/pipelines/__init__.py +0 -3
  35. cobweb_/pipelines/pipeline.py +0 -69
  36. cobweb_/pipelines/pipeline_console.py +0 -22
  37. cobweb_/pipelines/pipeline_loghub.py +0 -34
  38. cobweb_/setting.py +0 -74
  39. cobweb_/utils/__init__.py +0 -5
  40. cobweb_/utils/bloom.py +0 -58
  41. cobweb_/utils/dotting.py +0 -32
  42. cobweb_/utils/oss.py +0 -94
  43. cobweb_/utils/tools.py +0 -42
  44. cobweb_launcher-1.3.6.dist-info/RECORD +0 -111
  45. cobweb_new/__init__.py +0 -2
  46. cobweb_new/base/__init__.py +0 -72
  47. cobweb_new/base/common_queue.py +0 -53
  48. cobweb_new/base/decorators.py +0 -72
  49. cobweb_new/base/item.py +0 -46
  50. cobweb_new/base/log.py +0 -94
  51. cobweb_new/base/request.py +0 -82
  52. cobweb_new/base/response.py +0 -23
  53. cobweb_new/base/seed.py +0 -118
  54. cobweb_new/constant.py +0 -105
  55. cobweb_new/crawlers/__init__.py +0 -1
  56. cobweb_new/crawlers/crawler-new.py +0 -85
  57. cobweb_new/crawlers/crawler.py +0 -170
  58. cobweb_new/db/__init__.py +0 -2
  59. cobweb_new/db/api_db.py +0 -82
  60. cobweb_new/db/redis_db.py +0 -158
  61. cobweb_new/exceptions/__init__.py +0 -1
  62. cobweb_new/exceptions/oss_db_exception.py +0 -28
  63. cobweb_new/launchers/__init__.py +0 -3
  64. cobweb_new/launchers/launcher.py +0 -237
  65. cobweb_new/launchers/launcher_air.py +0 -88
  66. cobweb_new/launchers/launcher_api.py +0 -161
  67. cobweb_new/launchers/launcher_pro.py +0 -96
  68. cobweb_new/launchers/tesss.py +0 -47
  69. cobweb_new/pipelines/__init__.py +0 -3
  70. cobweb_new/pipelines/pipeline.py +0 -68
  71. cobweb_new/pipelines/pipeline_console.py +0 -22
  72. cobweb_new/pipelines/pipeline_loghub.py +0 -34
  73. cobweb_new/setting.py +0 -95
  74. cobweb_new/utils/__init__.py +0 -5
  75. cobweb_new/utils/bloom.py +0 -58
  76. cobweb_new/utils/oss.py +0 -94
  77. cobweb_new/utils/tools.py +0 -42
  78. {cobweb_launcher-1.3.6.dist-info → cobweb_launcher-1.3.8.dist-info}/LICENSE +0 -0
  79. {cobweb_launcher-1.3.6.dist-info → cobweb_launcher-1.3.8.dist-info}/WHEEL +0 -0
  80. {cobweb_launcher-1.3.6.dist-info → cobweb_launcher-1.3.8.dist-info}/top_level.txt +0 -0
cobweb_/base/item.py DELETED
@@ -1,46 +0,0 @@
1
- from .seed import Seed
2
- from collections import namedtuple
3
-
4
-
5
- class Item(type):
6
-
7
- def __new__(cls, name, bases, dct):
8
- new_class_instance = type.__new__(cls, name, bases, dct)
9
- if name != "BaseItem":
10
- table = getattr(new_class_instance, "__TABLE__")
11
- fields = getattr(new_class_instance, "__FIELDS__")
12
- new_class_instance.Data = namedtuple(table, fields)
13
- return new_class_instance
14
-
15
-
16
- class BaseItem(metaclass=Item):
17
-
18
- __TABLE__ = ""
19
- __FIELDS__ = ""
20
-
21
- def __init__(self, seed: Seed, **kwargs):
22
- self.seed = seed
23
-
24
- data = {}
25
- for key, value in kwargs.items():
26
- if key not in self.__FIELDS__:
27
- self.__setattr__(key, value)
28
- else:
29
- data[key] = value
30
-
31
- self.data = self.Data(**data)
32
-
33
- @property
34
- def to_dict(self):
35
- return self.data._asdict()
36
-
37
- @property
38
- def table(self):
39
- return self.Data.__name__
40
-
41
-
42
- class ConsoleItem(BaseItem):
43
-
44
- __TABLE__ = "console"
45
- __FIELDS__ = "data"
46
-
cobweb_/base/log.py DELETED
@@ -1,94 +0,0 @@
1
- import logging
2
-
3
-
4
- class ColorCodes:
5
- # Text Reset
6
- RESET = "\033[0m"
7
-
8
- # Regular Colors
9
- RED = "\033[31m"
10
- GREEN = "\033[32m"
11
- YELLOW = "\033[33m"
12
- BLUE = "\033[34m"
13
- PURPLE = "\033[35m"
14
- CYAN = "\033[36m"
15
- WHITE = "\033[37m"
16
-
17
- # Bright Colors
18
- BRIGHT_RED = "\033[91m"
19
- BRIGHT_GREEN = "\033[92m"
20
- BRIGHT_YELLOW = "\033[93m"
21
- BRIGHT_BLUE = "\033[94m"
22
- BRIGHT_PURPLE = "\033[95m"
23
- BRIGHT_CYAN = "\033[96m"
24
- BRIGHT_WHITE = "\033[97m"
25
-
26
- # Background Colors
27
- BG_RED = "\033[41m"
28
- BG_GREEN = "\033[42m"
29
- BG_YELLOW = "\033[43m"
30
- BG_BLUE = "\033[44m"
31
- BG_PURPLE = "\033[45m"
32
- BG_CYAN = "\033[46m"
33
- BG_WHITE = "\033[47m"
34
-
35
- # Bright Background Colors
36
- BG_BRIGHT_RED = "\033[101m"
37
- BG_BRIGHT_GREEN = "\033[102m"
38
- BG_BRIGHT_YELLOW = "\033[103m"
39
- BG_BRIGHT_BLUE = "\033[104m"
40
- BG_BRIGHT_PURPLE = "\033[105m"
41
- BG_BRIGHT_CYAN = "\033[106m"
42
- BG_BRIGHT_WHITE = "\033[107m"
43
-
44
- # Text Styles
45
- BOLD = "\033[1m"
46
- DIM = "\033[2m"
47
- ITALIC = "\033[3m"
48
- UNDERLINE = "\033[4m"
49
- BLINK = "\033[5m"
50
- REVERSE = "\033[7m"
51
- HIDDEN = "\033[8m"
52
-
53
-
54
- class Log:
55
- logging.getLogger('oss2.api').setLevel(logging.WARNING)
56
- logging.basicConfig(
57
- level=logging.INFO,
58
- format=f'%(asctime)s %(name)s [%(filename)s:%(lineno)d %(funcName)s]'
59
- f' %(levelname)s -> %(message)s'
60
- )
61
- log = logging.getLogger()
62
-
63
- def set_log_name(self, name):
64
- self.__class__.log = logging.getLogger(name)
65
-
66
- @property
67
- def debug(self):
68
- return self.__class__.log.debug
69
-
70
- @property
71
- def info(self):
72
- return self.__class__.log.info
73
-
74
- @property
75
- def warning(self):
76
- return self.__class__.log.warning
77
-
78
- @property
79
- def exception(self):
80
- return self.__class__.log.exception
81
-
82
- @property
83
- def error(self):
84
- return self.__class__.log.error
85
-
86
- @property
87
- def critical(self):
88
- return self.__class__.log.critical
89
-
90
-
91
- logger = Log()
92
-
93
-
94
-
cobweb_/base/request.py DELETED
@@ -1,82 +0,0 @@
1
- import random
2
- import requests
3
-
4
-
5
- class Request:
6
-
7
- __REQUEST_ATTRS__ = {
8
- "params",
9
- "headers",
10
- "cookies",
11
- "data",
12
- "json",
13
- "files",
14
- "auth",
15
- "timeout",
16
- "proxies",
17
- "hooks",
18
- "stream",
19
- "verify",
20
- "cert",
21
- "allow_redirects",
22
- }
23
-
24
- def __init__(
25
- self,
26
- url,
27
- seed,
28
- random_ua=True,
29
- check_status_code=True,
30
- **kwargs
31
- ):
32
- self.url = url
33
- self.seed = seed
34
- self.check_status_code = check_status_code
35
- self.request_setting = {}
36
-
37
- for k, v in kwargs.items():
38
- if k in self.__class__.__REQUEST_ATTRS__:
39
- self.request_setting[k] = v
40
- continue
41
- self.__setattr__(k, v)
42
-
43
- if not getattr(self, "method", None):
44
- self.method = "POST" if self.request_setting.get("data") or self.request_setting.get("json") else "GET"
45
-
46
- if random_ua:
47
- self._build_header()
48
-
49
- @property
50
- def _random_ua(self) -> str:
51
- v1 = random.randint(4, 15)
52
- v2 = random.randint(3, 11)
53
- v3 = random.randint(1, 16)
54
- v4 = random.randint(533, 605)
55
- v5 = random.randint(1000, 6000)
56
- v6 = random.randint(10, 80)
57
- user_agent = (f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_{v1}_{v2}) AppleWebKit/{v4}.{v3} "
58
- f"(KHTML, like Gecko) Chrome/105.0.0.0 Safari/{v4}.{v3} Edg/105.0.{v5}.{v6}")
59
- return user_agent
60
-
61
- def _build_header(self) -> dict:
62
- if not self.request_setting.get("headers"):
63
- self.request_setting["headers"] = {"accept": "*/*", "user-agent": self._random_ua}
64
- elif "user-agent" not in [key.lower() for key in self.request_setting["headers"].keys()]:
65
- self.request_setting["headers"]["user-agent"] = self._random_ua
66
-
67
- def download(self) -> requests.Response:
68
- response = requests.request(self.method, self.url, **self.request_setting)
69
- if self.check_status_code:
70
- response.raise_for_status()
71
- return response
72
-
73
- @property
74
- def to_dict(self):
75
- _dict = self.__dict__.copy()
76
- _dict.pop('url')
77
- _dict.pop('seed')
78
- _dict.pop('check_status_code')
79
- _dict.pop('request_setting')
80
- return _dict
81
-
82
-
cobweb_/base/response.py DELETED
@@ -1,23 +0,0 @@
1
-
2
-
3
- class Response:
4
-
5
- def __init__(
6
- self,
7
- seed,
8
- response,
9
- **kwargs
10
- ):
11
- self.seed = seed
12
- self.response = response
13
-
14
- for k, v in kwargs.items():
15
- self.__setattr__(k, v)
16
-
17
- @property
18
- def to_dict(self):
19
- _dict = self.__dict__.copy()
20
- _dict.pop('seed')
21
- _dict.pop('response')
22
- return _dict
23
-
cobweb_/base/seed.py DELETED
@@ -1,114 +0,0 @@
1
- import json
2
- import time
3
- import hashlib
4
-
5
-
6
- class SeedParams:
7
-
8
- def __init__(self, retry, priority, seed_version, seed_status=None):
9
- self.retry = retry or 0
10
- self.priority = priority or 300
11
- self.seed_version = seed_version or int(time.time())
12
- self.seed_status = seed_status
13
-
14
-
15
- class Seed:
16
-
17
- __SEED_PARAMS__ = [
18
- "retry",
19
- "priority",
20
- "seed_version",
21
- "seed_status"
22
- ]
23
-
24
- def __init__(
25
- self,
26
- seed,
27
- sid=None,
28
- retry=None,
29
- priority=None,
30
- seed_version=None,
31
- seed_status=None,
32
- **kwargs
33
- ):
34
- if any(isinstance(seed, t) for t in (str, bytes)):
35
- try:
36
- item = json.loads(seed)
37
- self._init_seed(item)
38
- except json.JSONDecodeError:
39
- self.__setattr__("url", seed)
40
- elif isinstance(seed, dict):
41
- self._init_seed(seed)
42
- else:
43
- raise TypeError(Exception(
44
- f"seed type error, "
45
- f"must be str or dict! "
46
- f"seed: {seed}"
47
- ))
48
-
49
- seed_params = {
50
- "retry": retry,
51
- "priority": priority,
52
- "seed_version": seed_version,
53
- "seed_status": seed_status,
54
- }
55
-
56
- if kwargs:
57
- self._init_seed(kwargs)
58
- seed_params.update({
59
- k:v for k, v in kwargs.items()
60
- if k in self.__SEED_PARAMS__
61
- })
62
- if sid or not getattr(self, "sid", None):
63
- self._init_id(sid)
64
- self.params = SeedParams(**seed_params)
65
-
66
- def __getattr__(self, name):
67
- return None
68
-
69
- def __setitem__(self, key, value):
70
- setattr(self, key, value)
71
-
72
- def __getitem__(self, item):
73
- return getattr(self, item)
74
-
75
- def __str__(self):
76
- return json.dumps(self.__dict__, ensure_ascii=False)
77
-
78
- def __repr__(self):
79
- chars = [f"{k}={v}" for k, v in self.__dict__.items()]
80
- return f'{self.__class__.__name__}({", ".join(chars)})'
81
-
82
- def _init_seed(self, seed_info:dict):
83
- for k, v in seed_info.items():
84
- if k not in self.__SEED_PARAMS__:
85
- self.__setattr__(k, v)
86
-
87
- def _init_id(self, sid):
88
- if not sid:
89
- sid = hashlib.md5(self.to_string.encode()).hexdigest()
90
- self.__setattr__("sid", sid)
91
-
92
- @property
93
- def to_dict(self) -> dict:
94
- seed = self.__dict__.copy()
95
- if seed.get("params"):
96
- del seed["params"]
97
- return seed
98
-
99
- @property
100
- def to_string(self) -> str:
101
- return json.dumps(
102
- self.to_dict,
103
- ensure_ascii=False,
104
- separators=(",", ":")
105
- )
106
-
107
- @property
108
- def get_all(self):
109
- return json.dumps(
110
- self.__dict__,
111
- ensure_ascii=False,
112
- separators=(",", ":")
113
- )
114
-
cobweb_/constant.py DELETED
@@ -1,94 +0,0 @@
1
-
2
- class CrawlerModel:
3
-
4
- default = "cobweb.crawlers.Crawler"
5
- file_air = "cobweb.crawlers.FileCrawlerAir"
6
- file_pro = "cobweb.crawlers.FileCrawlerPro"
7
-
8
-
9
- class LauncherModel:
10
- task = "launcher model: task"
11
- resident = "launcher model: resident"
12
-
13
-
14
- class DownloadModel:
15
- common = "download model: common"
16
- file = "download model: file"
17
-
18
-
19
- class LogModel:
20
- simple = "log model: simple"
21
- common = "log model: common"
22
- detailed = "log model: detailed"
23
-
24
-
25
- class DealModel:
26
- fail = "deal model: fail"
27
- done = "deal model: done"
28
- poll = "deal model: poll"
29
-
30
-
31
- class LogTemplate:
32
-
33
- console_item = """
34
- ----------------------- start - console pipeline -----------------
35
- 种子详情 \n{seed_detail}
36
- 解析详情 \n{parse_detail}
37
- ----------------------- end - console pipeline ------------------
38
- """
39
-
40
- launcher_air_polling = """
41
- ----------------------- start - 轮训日志: {task} -----------------
42
- 内存队列
43
- 种子数: {doing_len}
44
- 待消费: {todo_len}
45
- 已消费: {done_len}
46
- 存储队列
47
- 待上传: {upload_len}
48
- ----------------------- end - 轮训日志: {task} ------------------
49
- """
50
-
51
- launcher_pro_polling = """
52
- ----------------------- start - 轮训日志: {task} -----------------
53
- 内存队列
54
- 种子数: {doing_len}
55
- 待消费: {todo_len}
56
- 已消费: {done_len}
57
- redis队列
58
- 种子数: {redis_seed_count}
59
- 待消费: {redis_todo_len}
60
- 消费中: {redis_doing_len}
61
- 存储队列
62
- 待上传: {upload_len}
63
- ----------------------- end - 轮训日志: {task} ------------------
64
- """
65
-
66
- download_exception = """
67
- ----------------------- download exception -----------------------
68
- 种子详情 \n{detail}
69
- 种子参数
70
- retry : {retry}
71
- priority : {priority}
72
- seed_version : {seed_version}
73
- identifier : {identifier}
74
- exception
75
- msg : {exception}
76
- ------------------------------------------------------------------
77
- """
78
-
79
- download_info = """
80
- ------------------------ download info ---------------------------
81
- 种子详情 \n{detail}
82
- 种子参数
83
- retry : {retry}
84
- priority : {priority}
85
- seed_version : {seed_version}
86
- identifier : {identifier}
87
- response
88
- status : {status} \n{response}
89
- ------------------------------------------------------------------
90
- """
91
-
92
- @staticmethod
93
- def log_info(item: dict) -> str:
94
- return "\n".join([" " * 12 + f"{str(k).ljust(14)}: {str(v)}" for k, v in item.items()])
@@ -1 +0,0 @@
1
- from .crawler import Crawler
@@ -1,184 +0,0 @@
1
- import json
2
- import threading
3
- import time
4
- import traceback
5
- from inspect import isgenerator
6
- from typing import Union, Callable, Mapping
7
-
8
- from cobweb.constant import DealModel, LogTemplate
9
- from cobweb.base import (
10
- Queue,
11
- Seed,
12
- BaseItem,
13
- Request,
14
- Response,
15
- ConsoleItem,
16
- logger
17
- )
18
- from cobweb.utils import LoghubDot
19
-
20
-
21
- class Crawler(threading.Thread):
22
-
23
- def __init__(
24
- self,
25
- task: str,
26
- project: str,
27
- stop: threading.Event,
28
- pause: threading.Event,
29
- # launcher_queue: Union[Mapping[str, Queue]],
30
- get_seed: Callable,
31
- set_seed: Callable,
32
- add_seed: Callable,
33
- delete_seed: Callable,
34
- upload_data: Callable,
35
- custom_func: Union[Mapping[str, Callable]],
36
- thread_num: int,
37
- max_retries: int,
38
- time_sleep: int,
39
- ):
40
- super().__init__()
41
- self.task = task
42
- self.project = project
43
- self._stop = stop
44
- self._pause = pause
45
- self._get_seed = get_seed
46
- self._set_seed = set_seed
47
- self._add_seed = add_seed
48
- self._delete_seed = delete_seed
49
- self._upload_data = upload_data
50
-
51
- for func_name, _callable in custom_func.items():
52
- if isinstance(_callable, Callable):
53
- self.__setattr__(func_name, _callable)
54
-
55
- self.thread_num = thread_num
56
- self.time_sleep = time_sleep
57
- self.max_retries = max_retries
58
-
59
- self.loghub_dot = LoghubDot()
60
-
61
- @staticmethod
62
- def request(seed: Seed) -> Union[Request, BaseItem]:
63
- yield Request(seed.url, seed, timeout=5)
64
-
65
- @staticmethod
66
- def download(item: Request) -> Union[Seed, BaseItem, Response, str]:
67
- response = item.download()
68
- yield Response(item.seed, response, **item.to_dict)
69
-
70
- @staticmethod
71
- def parse(item: Response) -> BaseItem:
72
- upload_item = item.to_dict
73
- upload_item["text"] = item.response.text
74
- yield ConsoleItem(item.seed, data=json.dumps(upload_item, ensure_ascii=False))
75
-
76
- # def get_seed(self) -> Seed:
77
- # return self._todo.pop()
78
-
79
- def distribute(self, item, seed):
80
- if isinstance(item, BaseItem):
81
- self._upload_data(item)
82
- elif isinstance(item, Seed):
83
- self._add_seed(item)
84
- elif isinstance(item, str) and item == DealModel.poll:
85
- self._set_seed(seed)
86
- elif isinstance(item, str) and item == DealModel.done:
87
- self._delete_seed(seed)
88
- elif isinstance(item, str) and item == DealModel.fail:
89
- seed.params.seed_status = DealModel.fail
90
- self._delete_seed(seed)
91
- else:
92
- raise TypeError("yield value type error!")
93
-
94
- def spider(self):
95
- while not self._stop.is_set():
96
-
97
- seed = self._get_seed()
98
-
99
- if not seed:
100
- time.sleep(1)
101
- continue
102
-
103
- elif seed.params.retry > self.max_retries:
104
- seed.params.seed_status = DealModel.fail
105
- self._delete_seed(seed)
106
- continue
107
-
108
- seed_detail_log_info = LogTemplate.log_info(seed.to_dict)
109
-
110
- try:
111
- request_iterators = self.request(seed)
112
-
113
- if not isgenerator(request_iterators):
114
- raise TypeError("request function isn't a generator!")
115
-
116
- iterator_status = False
117
-
118
- for request_item in request_iterators:
119
-
120
- iterator_status = True
121
-
122
- if isinstance(request_item, Request):
123
- iterator_status = False
124
- start_time = time.time()
125
- download_iterators = self.download(request_item)
126
- if not isgenerator(download_iterators):
127
- raise TypeError("download function isn't a generator")
128
-
129
- for download_item in download_iterators:
130
- iterator_status = True
131
- if isinstance(download_item, Response):
132
- iterator_status = False
133
- logger.info(LogTemplate.download_info.format(
134
- detail=seed_detail_log_info,
135
- retry=seed.params.retry,
136
- priority=seed.params.priority,
137
- seed_version=seed.params.seed_version,
138
- identifier=seed.identifier or "",
139
- status=download_item.response,
140
- response=LogTemplate.log_info(download_item.to_dict)
141
- ))
142
- if isinstance(download_item, Response):
143
- end_time = time.time()
144
- self.loghub_dot.build(
145
- topic=f"{self.project}:{self.task}",
146
- cost_time=end_time - start_time,
147
- **download_item.to_dict
148
- )
149
- parse_iterators = self.parse(download_item)
150
- if not isgenerator(parse_iterators):
151
- raise TypeError("parse function isn't a generator")
152
- for parse_item in parse_iterators:
153
- iterator_status = True
154
- if isinstance(parse_item, Response):
155
- raise TypeError("upload_item can't be a Response instance")
156
- self.distribute(parse_item, seed)
157
- else:
158
- self.distribute(download_item, seed)
159
- else:
160
- self.distribute(request_item, seed)
161
-
162
- if not iterator_status:
163
- raise ValueError("request/download/parse function yield value error!")
164
- except Exception as e:
165
- logger.info(LogTemplate.download_exception.format(
166
- detail=seed_detail_log_info,
167
- retry=seed.params.retry,
168
- priority=seed.params.priority,
169
- seed_version=seed.params.seed_version,
170
- identifier=seed.identifier or "",
171
- exception=''.join(traceback.format_exception(type(e), e, e.__traceback__))
172
- ))
173
- seed.params.retry += 1
174
- # self._todo.push(seed)
175
- self._set_seed(seed)
176
- time.sleep(self.time_sleep * seed.params.retry)
177
- finally:
178
- time.sleep(0.1)
179
- logger.info("spider thread close")
180
-
181
- def run(self):
182
- for index in range(self.thread_num):
183
- threading.Thread(name=f"spider_{index}", target=self.spider).start()
184
-
cobweb_/db/__init__.py DELETED
@@ -1,2 +0,0 @@
1
- from .redis_db import RedisDB
2
- from .api_db import ApiDB