cobweb-launcher 1.2.25__py3-none-any.whl → 3.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. cobweb/__init__.py +4 -1
  2. cobweb/base/__init__.py +3 -3
  3. cobweb/base/common_queue.py +37 -16
  4. cobweb/base/item.py +35 -16
  5. cobweb/base/{log.py → logger.py} +3 -3
  6. cobweb/base/request.py +741 -54
  7. cobweb/base/response.py +380 -13
  8. cobweb/base/seed.py +96 -48
  9. cobweb/base/task_queue.py +180 -0
  10. cobweb/base/test.py +257 -0
  11. cobweb/constant.py +10 -1
  12. cobweb/crawlers/crawler.py +12 -155
  13. cobweb/db/api_db.py +3 -2
  14. cobweb/db/redis_db.py +117 -28
  15. cobweb/launchers/__init__.py +4 -3
  16. cobweb/launchers/distributor.py +141 -0
  17. cobweb/launchers/launcher.py +95 -157
  18. cobweb/launchers/uploader.py +68 -0
  19. cobweb/log_dots/__init__.py +2 -0
  20. cobweb/log_dots/dot.py +258 -0
  21. cobweb/log_dots/loghub_dot.py +53 -0
  22. cobweb/pipelines/__init__.py +1 -1
  23. cobweb/pipelines/pipeline.py +5 -55
  24. cobweb/pipelines/pipeline_csv.py +25 -0
  25. cobweb/pipelines/pipeline_loghub.py +32 -12
  26. cobweb/schedulers/__init__.py +1 -0
  27. cobweb/schedulers/scheduler.py +66 -0
  28. cobweb/schedulers/scheduler_with_redis.py +189 -0
  29. cobweb/setting.py +27 -40
  30. cobweb/utils/__init__.py +5 -3
  31. cobweb/utils/bloom.py +58 -58
  32. cobweb/{base → utils}/decorators.py +14 -12
  33. cobweb/utils/dotting.py +300 -0
  34. cobweb/utils/oss.py +113 -94
  35. cobweb/utils/tools.py +3 -15
  36. {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-3.2.20.dist-info}/METADATA +31 -43
  37. cobweb_launcher-3.2.20.dist-info/RECORD +44 -0
  38. {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-3.2.20.dist-info}/WHEEL +1 -1
  39. cobweb/crawlers/base_crawler.py +0 -144
  40. cobweb/crawlers/file_crawler.py +0 -98
  41. cobweb/launchers/launcher_air.py +0 -88
  42. cobweb/launchers/launcher_api.py +0 -221
  43. cobweb/launchers/launcher_pro.py +0 -222
  44. cobweb/pipelines/base_pipeline.py +0 -54
  45. cobweb/pipelines/loghub_pipeline.py +0 -34
  46. cobweb/pipelines/pipeline_console.py +0 -22
  47. cobweb_launcher-1.2.25.dist-info/RECORD +0 -40
  48. {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-3.2.20.dist-info}/LICENSE +0 -0
  49. {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-3.2.20.dist-info}/top_level.txt +0 -0
@@ -1,221 +0,0 @@
1
- import time
2
- import threading
3
-
4
- from cobweb.db import ApiDB
5
- from cobweb.base import Seed, logger
6
- from cobweb.constant import DealModel, LogTemplate
7
- from .launcher import Launcher, check_pause
8
-
9
-
10
- class LauncherApi(Launcher):
11
-
12
- def __init__(self, task, project, custom_setting=None, **kwargs):
13
- super().__init__(task, project, custom_setting, **kwargs)
14
- self._db = ApiDB()
15
-
16
- self._todo_key = "{%s:%s}:todo" % (project, task)
17
- self._done_key = "{%s:%s}:done" % (project, task)
18
- self._fail_key = "{%s:%s}:fail" % (project, task)
19
- self._heartbeat_key = "heartbeat:%s_%s" % (project, task)
20
-
21
- self._statistics_done_key = "statistics:%s:%s:done" % (project, task)
22
- self._statistics_fail_key = "statistics:%s:%s:fail" % (project, task)
23
- self._speed_control_key = "speed_control:%s_%s" % (project, task)
24
-
25
- self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
26
-
27
- # self._bf_key = "bloom_%s_%s" % (project, task)
28
- # self._bf = BloomFilter(self._bf_key)
29
-
30
- self._heartbeat_start_event = threading.Event()
31
- self._redis_queue_empty_event = threading.Event()
32
-
33
- @property
34
- def heartbeat(self):
35
- return self._db.exists(self._heartbeat_key)
36
-
37
- def statistics(self, key, count):
38
- if not self._task_model and not self._db.exists(key):
39
- self._db.setex(key, 86400 * 30, int(count))
40
- else:
41
- self._db.incrby(key, count)
42
-
43
- def _get_seed(self) -> Seed:
44
- """
45
- 从队列中获取种子(频控)
46
- 设置时间窗口为self._time_window(秒),判断在该窗口内的采集量是否满足阈值(self._spider_max_speed)
47
- :return: True -> 种子, False -> None
48
- """
49
- if (self.__LAUNCHER_QUEUE__["todo"].length and
50
- not self._db.auto_incr(self._speed_control_key, t=self._time_window, limit=self._spider_max_count)):
51
- expire_time = self._db.ttl(self._speed_control_key)
52
- logger.info(f"Too fast! Please wait {expire_time} seconds...")
53
- time.sleep(expire_time / 2)
54
- return None
55
- seed = self.__LAUNCHER_QUEUE__["todo"].pop()
56
- return seed
57
-
58
- @check_pause
59
- def _execute_heartbeat(self):
60
- if self._heartbeat_start_event.is_set():
61
- self._db.setex(self._heartbeat_key, 5)
62
- time.sleep(3)
63
-
64
- @check_pause
65
- def _reset(self):
66
- """
67
- 检查过期种子,重新添加到redis缓存中
68
- """
69
- reset_wait_seconds = 30
70
- if self._db.lock(self._reset_lock_key, t=120):
71
-
72
- _min = -int(time.time()) + self._seed_reset_seconds \
73
- if self.heartbeat else "-inf"
74
-
75
- self._db.members(self._todo_key, 0, _min=_min, _max="(0")
76
-
77
- if not self.heartbeat:
78
- self._heartbeat_start_event.set()
79
-
80
- self._db.delete(self._reset_lock_key)
81
-
82
- time.sleep(reset_wait_seconds)
83
-
84
- @check_pause
85
- def _scheduler(self):
86
- """
87
- 调度任务,获取redis队列种子,同时添加到doing字典中
88
- """
89
- if not self._db.zcount(self._todo_key, 0, "(1000"):
90
- time.sleep(self._scheduler_wait_seconds)
91
- elif self.__LAUNCHER_QUEUE__['todo'].length >= self._todo_queue_size:
92
- time.sleep(self._todo_queue_full_wait_seconds)
93
- else:
94
- members = self._db.members(
95
- self._todo_key, int(time.time()),
96
- count=self._todo_queue_size,
97
- _min=0, _max="(1000"
98
- )
99
- for member, priority in members:
100
- seed = Seed(member, priority=priority)
101
- self.__LAUNCHER_QUEUE__['todo'].push(seed)
102
- self.__DOING__[seed.to_string] = seed.params.priority
103
-
104
- @check_pause
105
- def _insert(self):
106
- """
107
- 添加新种子到redis队列中
108
- """
109
- seeds = {}
110
- status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
111
- for _ in range(self._new_queue_max_size):
112
- seed = self.__LAUNCHER_QUEUE__['new'].pop()
113
- if seed:
114
- seeds[seed.to_string] = seed.params.priority
115
- if seeds:
116
- self._db.zadd(self._todo_key, seeds, nx=True)
117
- if status:
118
- time.sleep(self._new_queue_wait_seconds)
119
-
120
- @check_pause
121
- def _refresh(self):
122
- """
123
- 刷新doing种子过期时间,防止reset重新消费
124
- """
125
- if self.__DOING__:
126
- refresh_time = int(time.time())
127
- seeds = {k:-refresh_time - v / 1000 for k, v in self.__DOING__.items()}
128
- self._db.zadd(self._todo_key, item=seeds, xx=True)
129
- time.sleep(15)
130
-
131
- @check_pause
132
- def _delete(self):
133
- """
134
- 删除队列种子,根据状态添加至成功或失败队列,移除doing字典种子索引
135
- """
136
- seed_info = {"count": 0, "failed": [], "succeed": [], "common": []}
137
- status = self.__LAUNCHER_QUEUE__['done'].length < self._done_queue_max_size
138
-
139
- for _ in range(self._done_queue_max_size):
140
- seed = self.__LAUNCHER_QUEUE__['done'].pop()
141
- if not seed:
142
- break
143
- if seed.params.seed_status == DealModel.fail:
144
- seed_info["failed"].append(seed.to_string)
145
- elif self._done_model == 1:
146
- seed_info["succeed"].append(seed.to_string)
147
- else:
148
- seed_info["common"].append(seed.to_string)
149
- seed_info['count'] += 1
150
-
151
- if seed_info["count"]:
152
-
153
- succeed_count = int(self._db.zrem(self._todo_key, *seed_info["common"]) or 0)
154
- succeed_count += int(self._db.done([self._todo_key, self._done_key], *seed_info["succeed"]) or 0)
155
- failed_count = int(self._db.done([self._todo_key, self._fail_key], *seed_info["failed"]) or 0)
156
-
157
- if failed_count:
158
- self.statistics(self._statistics_fail_key, failed_count)
159
- if succeed_count:
160
- self.statistics(self._statistics_done_key, succeed_count)
161
-
162
- self._remove_doing_seeds(seed_info["common"] + seed_info["succeed"] + seed_info["failed"])
163
-
164
- if status:
165
- time.sleep(self._done_queue_wait_seconds)
166
-
167
- def _polling(self):
168
- wait_scheduler_execute = True
169
- check_emtpy_times = 0
170
- while not self._stop.is_set():
171
- queue_not_empty_count = 0
172
- pooling_wait_seconds = 30
173
-
174
- for q in self.__LAUNCHER_QUEUE__.values():
175
- if q.length != 0:
176
- queue_not_empty_count += 1
177
- wait_scheduler_execute = False
178
-
179
- if queue_not_empty_count == 0:
180
- pooling_wait_seconds = 3
181
- if self._pause.is_set():
182
- check_emtpy_times = 0
183
- if not self._task_model and (
184
- not wait_scheduler_execute or
185
- int(time.time()) - self._app_time > self._before_scheduler_wait_seconds
186
- ):
187
- logger.info("Done! ready to close thread...")
188
- self._stop.set()
189
-
190
- elif self._db.zcount(self._todo_key, _min=0, _max="(1000"):
191
- logger.info(f"Recovery {self.task} task run!")
192
- self._pause.clear()
193
- self._execute()
194
- else:
195
- logger.info("pause! waiting for resume...")
196
- elif check_emtpy_times > 2:
197
- self.__DOING__ = {}
198
- if not self._db.zcount(self._todo_key, _min="-inf", _max="(1000"):
199
- self._pause.set()
200
- else:
201
- logger.info(
202
- "check whether the task is complete, "
203
- f"reset times {3 - check_emtpy_times}"
204
- )
205
- check_emtpy_times += 1
206
- else:
207
- logger.info(LogTemplate.launcher_pro_polling.format(
208
- task=self.task,
209
- doing_len=len(self.__DOING__.keys()),
210
- todo_len=self.__LAUNCHER_QUEUE__['todo'].length,
211
- done_len=self.__LAUNCHER_QUEUE__['done'].length,
212
- redis_seed_count=self._db.zcount(self._todo_key, "-inf", "+inf"),
213
- redis_todo_len=self._db.zcount(self._todo_key, 0, "(1000"),
214
- redis_doing_len=self._db.zcount(self._todo_key, "-inf", "(0"),
215
- upload_len=self.__LAUNCHER_QUEUE__['upload'].length,
216
- ))
217
-
218
- time.sleep(pooling_wait_seconds)
219
-
220
- logger.info("Done! Ready to close thread...")
221
-
@@ -1,222 +0,0 @@
1
- import time
2
- import threading
3
-
4
- from cobweb.db import RedisDB
5
- from cobweb.base import Seed, logger
6
- from cobweb.utils import BloomFilter
7
- from cobweb.constant import DealModel, LogTemplate
8
- from .launcher import Launcher, check_pause
9
-
10
-
11
- class LauncherPro(Launcher):
12
-
13
- def __init__(self, task, project, custom_setting=None, **kwargs):
14
- super().__init__(task, project, custom_setting, **kwargs)
15
- self._todo_key = "{%s:%s}:todo" % (project, task)
16
- self._done_key = "{%s:%s}:done" % (project, task)
17
- self._fail_key = "{%s:%s}:fail" % (project, task)
18
- self._heartbeat_key = "heartbeat:%s_%s" % (project, task)
19
-
20
- self._statistics_done_key = "statistics:%s:%s:done" % (project, task)
21
- self._statistics_fail_key = "statistics:%s:%s:fail" % (project, task)
22
- self._speed_control_key = "speed_control:%s_%s" % (project, task)
23
-
24
- self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
25
-
26
- self._bf_key = "bloom_%s_%s" % (project, task)
27
-
28
- self._db = RedisDB()
29
-
30
- self._bf = BloomFilter(self._bf_key)
31
-
32
- self._heartbeat_start_event = threading.Event()
33
- self._redis_queue_empty_event = threading.Event()
34
-
35
- @property
36
- def heartbeat(self):
37
- return self._db.exists(self._heartbeat_key)
38
-
39
- def statistics(self, key, count):
40
- if not self._task_model and not self._db.exists(key):
41
- self._db.setex(key, 86400 * 30, int(count))
42
- else:
43
- self._db._client.incrby(key, count)
44
-
45
- def _get_seed(self) -> Seed:
46
- spider_speed = self._db._client.get(self._speed_control_key)
47
- if int(spider_speed or 0) > self._spider_max_count:
48
- expire_time = self._db.ttl(self._speed_control_key)
49
- if expire_time == -1:
50
- self._db.delete(self._speed_control_key)
51
- else:
52
- logger.info(f"Too fast! Please wait {expire_time} seconds...")
53
- time.sleep(expire_time / 2)
54
- return None
55
- seed = self.__LAUNCHER_QUEUE__["todo"].pop()
56
- if seed and not self._db.lock(self._speed_control_key, t=self._time_window):
57
- self._db._client.incrby(self._speed_control_key, 1)
58
- return seed
59
-
60
- @check_pause
61
- def _execute_heartbeat(self):
62
- if self._heartbeat_start_event.is_set():
63
- self._db.setex(self._heartbeat_key, 5)
64
- time.sleep(3)
65
-
66
- @check_pause
67
- def _reset(self):
68
- """
69
- 检查过期种子,重新添加到redis缓存中
70
- """
71
- reset_wait_seconds = 30
72
- if self._db.lock(self._reset_lock_key, t=120):
73
-
74
- _min = -int(time.time()) + self._seed_reset_seconds \
75
- if self.heartbeat else "-inf"
76
-
77
- self._db.members(self._todo_key, 0, _min=_min, _max="(0")
78
- self._db.delete(self._reset_lock_key)
79
-
80
- if not self.heartbeat:
81
- self._heartbeat_start_event.set()
82
-
83
- time.sleep(reset_wait_seconds)
84
-
85
- @check_pause
86
- def _scheduler(self):
87
- """
88
- 调度任务,获取redis队列种子,同时添加到doing字典中
89
- """
90
- if not self._db.zcount(self._todo_key, 0, "(1000"):
91
- time.sleep(self._scheduler_wait_seconds)
92
- elif self.__LAUNCHER_QUEUE__['todo'].length >= self._todo_queue_size:
93
- time.sleep(self._todo_queue_full_wait_seconds)
94
- else:
95
- members = self._db.members(
96
- self._todo_key, int(time.time()),
97
- count=self._todo_queue_size,
98
- _min=0, _max="(1000"
99
- )
100
- for member, priority in members:
101
- seed = Seed(member, priority=priority)
102
- self.__LAUNCHER_QUEUE__['todo'].push(seed)
103
- self.__DOING__[seed.to_string] = seed.params.priority
104
-
105
- @check_pause
106
- def _insert(self):
107
- """
108
- 添加新种子到redis队列中
109
- """
110
- seeds = {}
111
- status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
112
- for _ in range(self._new_queue_max_size):
113
- seed = self.__LAUNCHER_QUEUE__['new'].pop()
114
- if seed:
115
- seeds[seed.to_string] = seed.params.priority
116
- if seeds:
117
- self._db.zadd(self._todo_key, seeds, nx=True)
118
- if status:
119
- time.sleep(self._new_queue_wait_seconds)
120
-
121
- @check_pause
122
- def _refresh(self):
123
- """
124
- 刷新doing种子过期时间,防止reset重新消费
125
- """
126
- if self.__DOING__:
127
- refresh_time = int(time.time())
128
- seeds = {k:-refresh_time - v / 1000 for k, v in self.__DOING__.items()}
129
- self._db.zadd(self._todo_key, item=seeds, xx=True)
130
- time.sleep(15)
131
-
132
- @check_pause
133
- def _delete(self):
134
- """
135
- 删除队列种子,根据状态添加至成功或失败队列,移除doing字典种子索引
136
- """
137
- seed_info = {"count": 0, "failed": [], "succeed": [], "common": []}
138
- status = self.__LAUNCHER_QUEUE__['done'].length < self._done_queue_max_size
139
-
140
- for _ in range(self._done_queue_max_size):
141
- seed = self.__LAUNCHER_QUEUE__['done'].pop()
142
- if not seed:
143
- break
144
- if seed.params.seed_status == DealModel.fail:
145
- seed_info["failed"].append(seed.to_string)
146
- elif self._done_model == 1:
147
- seed_info["succeed"].append(seed.to_string)
148
- else:
149
- seed_info["common"].append(seed.to_string)
150
- seed_info['count'] += 1
151
-
152
- if seed_info["count"]:
153
-
154
- succeed_count = int(self._db.zrem(self._todo_key, *seed_info["common"]) or 0)
155
- succeed_count += int(self._db.done([self._todo_key, self._done_key], *seed_info["succeed"]) or 0)
156
- failed_count = int(self._db.done([self._todo_key, self._fail_key], *seed_info["failed"]) or 0)
157
-
158
- if failed_count:
159
- self.statistics(self._statistics_fail_key, failed_count)
160
- if succeed_count:
161
- self.statistics(self._statistics_done_key, succeed_count)
162
-
163
- self._remove_doing_seeds(seed_info["common"] + seed_info["succeed"] + seed_info["failed"])
164
-
165
- if status:
166
- time.sleep(self._done_queue_wait_seconds)
167
-
168
- def _polling(self):
169
- wait_scheduler_execute = True
170
- check_emtpy_times = 0
171
- while not self._stop.is_set():
172
- queue_not_empty_count = 0
173
- pooling_wait_seconds = 30
174
-
175
- for q in self.__LAUNCHER_QUEUE__.values():
176
- if q.length != 0:
177
- queue_not_empty_count += 1
178
- wait_scheduler_execute = False
179
-
180
- if queue_not_empty_count == 0:
181
- pooling_wait_seconds = 3
182
- if self._pause.is_set():
183
- check_emtpy_times = 0
184
- if not self._task_model and (
185
- not wait_scheduler_execute or
186
- int(time.time()) - self._app_time > self._before_scheduler_wait_seconds
187
- ):
188
- logger.info("Done! ready to close thread...")
189
- self._stop.set()
190
-
191
- elif self._db.zcount(self._todo_key, _min=0, _max="(1000"):
192
- logger.info(f"Recovery {self.task} task run!")
193
- self._pause.clear()
194
- self._execute()
195
- else:
196
- logger.info("pause! waiting for resume...")
197
- elif check_emtpy_times > 2:
198
- self.__DOING__ = {}
199
- if not self._db.zcount(self._todo_key, _min="-inf", _max="(1000"):
200
- self._pause.set()
201
- else:
202
- logger.info(
203
- "check whether the task is complete, "
204
- f"reset times {3 - check_emtpy_times}"
205
- )
206
- check_emtpy_times += 1
207
- else:
208
- logger.info(LogTemplate.launcher_pro_polling.format(
209
- task=self.task,
210
- doing_len=len(self.__DOING__.keys()),
211
- todo_len=self.__LAUNCHER_QUEUE__['todo'].length,
212
- done_len=self.__LAUNCHER_QUEUE__['done'].length,
213
- redis_seed_count=self._db.zcount(self._todo_key, "-inf", "+inf"),
214
- redis_todo_len=self._db.zcount(self._todo_key, 0, "(1000"),
215
- redis_doing_len=self._db.zcount(self._todo_key, "-inf", "(0"),
216
- upload_len=self.__LAUNCHER_QUEUE__['upload'].length,
217
- ))
218
-
219
- time.sleep(pooling_wait_seconds)
220
-
221
- logger.info("Done! Ready to close thread...")
222
-
@@ -1,54 +0,0 @@
1
- import time
2
- import threading
3
-
4
- from abc import ABC, abstractmethod
5
- from cobweb.base import BaseItem, Queue, logger
6
-
7
-
8
- class Pipeline(threading.Thread, ABC):
9
-
10
- def __init__(
11
- self,
12
- done_queue: Queue,
13
- upload_queue: Queue,
14
- upload_queue_size: int,
15
- upload_wait_seconds: int
16
- ):
17
- super().__init__()
18
- self.done_queue = done_queue
19
- self.upload_queue = upload_queue
20
- self.upload_queue_size = upload_queue_size
21
- self.upload_wait_seconds = upload_wait_seconds
22
-
23
- @abstractmethod
24
- def build(self, item: BaseItem) -> dict:
25
- pass
26
-
27
- @abstractmethod
28
- def upload(self, table: str, data: list) -> bool:
29
- pass
30
-
31
- def run(self):
32
- while True:
33
- status = self.upload_queue.length < self.upload_queue_size
34
- if status:
35
- time.sleep(self.upload_wait_seconds)
36
- data_info, seeds = {}, []
37
- for _ in range(self.upload_queue_size):
38
- item = self.upload_queue.pop()
39
- if not item:
40
- break
41
- data = self.build(item)
42
- seeds.append(item.seed)
43
- data_info.setdefault(item.table, []).append(data)
44
- for table, datas in data_info.items():
45
- try:
46
- self.upload(table, datas)
47
- status = True
48
- except Exception as e:
49
- logger.info(e)
50
- status = False
51
- if status:
52
- self.done_queue.push(seeds)
53
-
54
-
@@ -1,34 +0,0 @@
1
- import json
2
-
3
- from cobweb import setting
4
- from cobweb.base import BaseItem
5
- from cobweb.pipelines import Pipeline
6
- from aliyun.log import LogClient, LogItem, PutLogsRequest
7
-
8
-
9
- class LoghubPipeline(Pipeline):
10
-
11
- def __init__(self, *args, **kwargs):
12
- super().__init__(*args, **kwargs)
13
- self.client = LogClient(**setting.LOGHUB_CONFIG)
14
-
15
- def build(self, item: BaseItem):
16
- log_item = LogItem()
17
- temp = item.to_dict
18
- for key, value in temp.items():
19
- if not isinstance(value, str):
20
- temp[key] = json.dumps(value, ensure_ascii=False)
21
- contents = sorted(temp.items())
22
- log_item.set_contents(contents)
23
- return log_item
24
-
25
- def upload(self, table, datas):
26
- request = PutLogsRequest(
27
- project=setting.LOGHUB_PROJECT,
28
- logstore=table,
29
- topic=setting.LOGHUB_TOPIC,
30
- source=setting.LOGHUB_SOURCE,
31
- logitems=datas,
32
- compress=True
33
- )
34
- self.client.put_logs(request=request)
@@ -1,22 +0,0 @@
1
- from cobweb.base import ConsoleItem, logger
2
- from cobweb.constant import LogTemplate
3
- from cobweb.pipelines import Pipeline
4
-
5
-
6
- class Console(Pipeline):
7
-
8
- def build(self, item: ConsoleItem):
9
- return {
10
- "seed": item.seed.to_dict,
11
- "data": item.to_dict
12
- }
13
-
14
- def upload(self, table, datas):
15
- for data in datas:
16
- parse_detail = LogTemplate.log_info(data["data"])
17
- if len(parse_detail) > 500:
18
- parse_detail = parse_detail[:500] + " ...\n" + " " * 12 + "-- Text is too long and details are omitted!"
19
- logger.info(LogTemplate.console_item.format(
20
- seed_detail=LogTemplate.log_info(data["seed"]),
21
- parse_detail=parse_detail
22
- ))
@@ -1,40 +0,0 @@
1
- cobweb/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
2
- cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
3
- cobweb/setting.py,sha256=47HZsw40HLpsmOmvij1lyQALPQQCN_tWlKZ0wbn2MtM,2216
4
- cobweb/base/__init__.py,sha256=4gwWWQ0Q8cYG9cD7Lwf4XMqRGc5M_mapS3IczR6zeCE,222
5
- cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
6
- cobweb/base/decorators.py,sha256=wDCaQ94aAZGxks9Ljc0aXq6omDXT1_yzFy83ZW6VbVI,930
7
- cobweb/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
8
- cobweb/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
9
- cobweb/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
10
- cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
11
- cobweb/base/seed.py,sha256=Uz_VBRlAxNYQcFHk3tsZFMlU96yPOedHaWGTvk-zKd8,2908
12
- cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
13
- cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
14
- cobweb/crawlers/crawler.py,sha256=xiFNM0t69f5xlm59hPbO2MpqtdirVAUhD84-CLpyHPM,6349
15
- cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
16
- cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
17
- cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
18
- cobweb/db/redis_db.py,sha256=fumNZJiio-uQqRcSrymx8eJ1PqsdOwITe_Y-9JOXxrQ,4298
19
- cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
20
- cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
21
- cobweb/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
22
- cobweb/launchers/launcher.py,sha256=SK4f3Fpuv-QMMriHruXGQ1sh1lxT1DZ2PdG0p2wAzNw,6978
23
- cobweb/launchers/launcher_air.py,sha256=KAk_M8F3029cXYe7m4nn3Nzyi89lbxJ2cqZjqW8iZ0E,2832
24
- cobweb/launchers/launcher_api.py,sha256=Ih8f5xDcFlGBn6VSnlrpxcchMB48ugsj2NTWYgGYWfY,8669
25
- cobweb/launchers/launcher_pro.py,sha256=NBJstQuB0o_jMiySJ14lk0Y3WAxxiScaQvXa1qtTSo4,8683
26
- cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
27
- cobweb/pipelines/base_pipeline.py,sha256=fYnWf79GmhufXpcnMa3te18SbmnVeYLwxfyo-zLd9CY,1577
28
- cobweb/pipelines/loghub_pipeline.py,sha256=cjPO6w6UJ0jNw2fVvdX0BCdlm58T7dmYXlxzXOBpvfY,1027
29
- cobweb/pipelines/pipeline.py,sha256=4TJLX0sUHRxYndF5A4Vs5btUGI-wigkOcFvhTW1hLXI,2009
30
- cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
31
- cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
32
- cobweb/utils/__init__.py,sha256=vBtZTy3EfRE0MmH43URhmr7nw6_oOWTEbGOM9xR_9o8,78
33
- cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
34
- cobweb/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
35
- cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
36
- cobweb_launcher-1.2.25.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
37
- cobweb_launcher-1.2.25.dist-info/METADATA,sha256=I2DC0dBkKiIKUNmGWA2JNMaQhhydniKDJT3gteawVOg,6510
38
- cobweb_launcher-1.2.25.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
39
- cobweb_launcher-1.2.25.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
40
- cobweb_launcher-1.2.25.dist-info/RECORD,,