cobweb-launcher 1.2.49__py3-none-any.whl → 1.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/base/__init__.py +141 -4
- cobweb/base/basic.py +28 -82
- cobweb/base/common_queue.py +13 -0
- cobweb/base/dotting.py +1 -1
- cobweb/base/request.py +14 -2
- cobweb/base/seed.py +10 -6
- cobweb/constant.py +16 -0
- cobweb/crawlers/crawler.py +51 -181
- cobweb/db/redis_db.py +28 -0
- cobweb/launchers/__init__.py +2 -2
- cobweb/launchers/launcher.py +110 -141
- cobweb/launchers/launcher_api.py +66 -114
- cobweb/launchers/launcher_pro.py +76 -194
- cobweb/pipelines/base_pipeline.py +54 -0
- cobweb/pipelines/loghub_pipeline.py +34 -0
- cobweb/pipelines/pipeline.py +25 -49
- cobweb/schedulers/__init__.py +0 -2
- cobweb/schedulers/scheduler_redis.py +5 -8
- cobweb/setting.py +29 -6
- cobweb/utils/dotting.py +10 -42
- cobweb_/__init__.py +2 -0
- cobweb_/base/__init__.py +9 -0
- cobweb_/base/common_queue.py +30 -0
- cobweb_/base/decorators.py +40 -0
- cobweb_/base/item.py +46 -0
- cobweb_/base/log.py +94 -0
- cobweb_/base/request.py +82 -0
- cobweb_/base/response.py +23 -0
- cobweb_/base/seed.py +114 -0
- cobweb_/constant.py +94 -0
- cobweb_/crawlers/__init__.py +1 -0
- cobweb_/crawlers/crawler.py +184 -0
- cobweb_/db/__init__.py +2 -0
- cobweb_/db/api_db.py +82 -0
- cobweb_/db/redis_db.py +130 -0
- cobweb_/exceptions/__init__.py +1 -0
- cobweb_/exceptions/oss_db_exception.py +28 -0
- cobweb_/launchers/__init__.py +3 -0
- cobweb_/launchers/launcher.py +235 -0
- cobweb_/launchers/launcher_air.py +88 -0
- cobweb_/launchers/launcher_api.py +221 -0
- cobweb_/launchers/launcher_pro.py +222 -0
- cobweb_/pipelines/__init__.py +3 -0
- cobweb_/pipelines/pipeline.py +69 -0
- cobweb_/pipelines/pipeline_console.py +22 -0
- cobweb_/pipelines/pipeline_loghub.py +34 -0
- cobweb_/setting.py +74 -0
- cobweb_/utils/__init__.py +5 -0
- cobweb_/utils/bloom.py +58 -0
- cobweb_/utils/dotting.py +32 -0
- cobweb_/utils/oss.py +94 -0
- cobweb_/utils/tools.py +42 -0
- {cobweb_launcher-1.2.49.dist-info → cobweb_launcher-1.3.2.dist-info}/METADATA +1 -1
- cobweb_launcher-1.3.2.dist-info/RECORD +110 -0
- cobweb_launcher-1.3.2.dist-info/top_level.txt +2 -0
- cobweb_new/__init__.py +2 -0
- cobweb_new/base/__init__.py +72 -0
- cobweb_new/base/common_queue.py +53 -0
- cobweb_new/base/decorators.py +72 -0
- cobweb_new/base/item.py +46 -0
- cobweb_new/base/log.py +94 -0
- cobweb_new/base/request.py +82 -0
- cobweb_new/base/response.py +23 -0
- cobweb_new/base/seed.py +118 -0
- cobweb_new/constant.py +105 -0
- cobweb_new/crawlers/__init__.py +1 -0
- cobweb_new/crawlers/crawler-new.py +85 -0
- cobweb_new/crawlers/crawler.py +170 -0
- cobweb_new/db/__init__.py +2 -0
- cobweb_new/db/api_db.py +82 -0
- cobweb_new/db/redis_db.py +158 -0
- cobweb_new/exceptions/__init__.py +1 -0
- cobweb_new/exceptions/oss_db_exception.py +28 -0
- cobweb_new/launchers/__init__.py +3 -0
- cobweb_new/launchers/launcher.py +237 -0
- cobweb_new/launchers/launcher_air.py +88 -0
- cobweb_new/launchers/launcher_api.py +161 -0
- cobweb_new/launchers/launcher_pro.py +96 -0
- cobweb_new/launchers/tesss.py +47 -0
- cobweb_new/pipelines/__init__.py +3 -0
- cobweb_new/pipelines/pipeline.py +68 -0
- cobweb_new/pipelines/pipeline_console.py +22 -0
- cobweb_new/pipelines/pipeline_loghub.py +34 -0
- cobweb_new/setting.py +95 -0
- cobweb_new/utils/__init__.py +5 -0
- cobweb_new/utils/bloom.py +58 -0
- cobweb_new/utils/oss.py +94 -0
- cobweb_new/utils/tools.py +42 -0
- cobweb/schedulers/scheduler_api.py +0 -72
- cobweb_launcher-1.2.49.dist-info/RECORD +0 -44
- cobweb_launcher-1.2.49.dist-info/top_level.txt +0 -1
- {cobweb_launcher-1.2.49.dist-info → cobweb_launcher-1.3.2.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.2.49.dist-info → cobweb_launcher-1.3.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
from cobweb.base import ConsoleItem, logger
|
2
|
+
from cobweb.constant import LogTemplate
|
3
|
+
from cobweb.pipelines import Pipeline
|
4
|
+
|
5
|
+
|
6
|
+
class Console(Pipeline):
|
7
|
+
|
8
|
+
def build(self, item: ConsoleItem):
|
9
|
+
return {
|
10
|
+
"seed": item.seed.to_dict,
|
11
|
+
"data": item.to_dict
|
12
|
+
}
|
13
|
+
|
14
|
+
def upload(self, table, datas):
|
15
|
+
for data in datas:
|
16
|
+
parse_detail = LogTemplate.log_info(data["data"])
|
17
|
+
if len(parse_detail) > 500:
|
18
|
+
parse_detail = parse_detail[:500] + " ...\n" + " " * 12 + "-- Text is too long and details are omitted!"
|
19
|
+
logger.info(LogTemplate.console_item.format(
|
20
|
+
seed_detail=LogTemplate.log_info(data["seed"]),
|
21
|
+
parse_detail=parse_detail
|
22
|
+
))
|
@@ -0,0 +1,34 @@
|
|
1
|
+
import json
|
2
|
+
|
3
|
+
from cobweb import setting
|
4
|
+
from cobweb.base import BaseItem
|
5
|
+
from cobweb.pipelines import Pipeline
|
6
|
+
from aliyun.log import LogClient, LogItem, PutLogsRequest
|
7
|
+
|
8
|
+
|
9
|
+
class Loghub(Pipeline):
|
10
|
+
|
11
|
+
def __init__(self, *args, **kwargs):
|
12
|
+
super().__init__(*args, **kwargs)
|
13
|
+
self.client = LogClient(**setting.LOGHUB_CONFIG)
|
14
|
+
|
15
|
+
def build(self, item: BaseItem):
|
16
|
+
log_item = LogItem()
|
17
|
+
temp = item.to_dict
|
18
|
+
for key, value in temp.items():
|
19
|
+
if not isinstance(value, str):
|
20
|
+
temp[key] = json.dumps(value, ensure_ascii=False)
|
21
|
+
contents = sorted(temp.items())
|
22
|
+
log_item.set_contents(contents)
|
23
|
+
return log_item
|
24
|
+
|
25
|
+
def upload(self, table, datas):
|
26
|
+
request = PutLogsRequest(
|
27
|
+
project=setting.LOGHUB_PROJECT,
|
28
|
+
logstore=table,
|
29
|
+
topic=setting.LOGHUB_TOPIC,
|
30
|
+
source=setting.LOGHUB_SOURCE,
|
31
|
+
logitems=datas,
|
32
|
+
compress=True
|
33
|
+
)
|
34
|
+
self.client.put_logs(request=request)
|
cobweb_new/setting.py
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
# redis db config
|
4
|
+
REDIS_CONFIG = {
|
5
|
+
"host": os.getenv("REDIS_HOST"),
|
6
|
+
"password": os.getenv("REDIS_PASSWORD"),
|
7
|
+
"port": int(os.getenv("REDIS_PORT", 6379)),
|
8
|
+
"db": int(os.getenv("REDIS_DB", 0)),
|
9
|
+
}
|
10
|
+
|
11
|
+
# loghub db config
|
12
|
+
LOGHUB_TOPIC = os.getenv("LOGHUB_TOPIC")
|
13
|
+
LOGHUB_SOURCE = os.getenv("LOGHUB_SOURCE")
|
14
|
+
LOGHUB_PROJECT = os.getenv("LOGHUB_PROJECT")
|
15
|
+
LOGHUB_CONFIG = {
|
16
|
+
"endpoint": os.getenv("LOGHUB_ENDPOINT"),
|
17
|
+
"accessKeyId": os.getenv("LOGHUB_ACCESS_KEY"),
|
18
|
+
"accessKey": os.getenv("LOGHUB_SECRET_KEY")
|
19
|
+
}
|
20
|
+
|
21
|
+
# oss util config
|
22
|
+
OSS_BUCKET = os.getenv("OSS_BUCKET")
|
23
|
+
OSS_ENDPOINT = os.getenv("OSS_ENDPOINT")
|
24
|
+
OSS_ACCESS_KEY = os.getenv("OSS_ACCESS_KEY")
|
25
|
+
OSS_SECRET_KEY = os.getenv("OSS_SECRET_KEY")
|
26
|
+
OSS_CHUNK_SIZE = 10 * 1024 ** 2
|
27
|
+
OSS_MIN_UPLOAD_SIZE = 1024
|
28
|
+
|
29
|
+
|
30
|
+
# 采集器选择
|
31
|
+
CRAWLER = "cobweb.crawlers.Crawler"
|
32
|
+
|
33
|
+
# 数据存储链路
|
34
|
+
PIPELINE = "cobweb.pipelines.pipeline_console.Console"
|
35
|
+
|
36
|
+
|
37
|
+
# Launcher 等待时间
|
38
|
+
|
39
|
+
BEFORE_SCHEDULER_WAIT_SECONDS = 60 # 调度前等待时间,只作用于单次任务
|
40
|
+
|
41
|
+
SCHEDULING_WAIT_TIME = 15 # SCHEDULER ITEM 调度等待时间
|
42
|
+
INSERTING_WAIT_TIME = 30 # INSERT ITEM 等待时间
|
43
|
+
REMOVING_WAIT_TIME = 5 # REMOVE ITEM 等待时间
|
44
|
+
|
45
|
+
TODO_QUEUE_FULL_WAIT_SECONDS = 5 # 队列已满时等待时间
|
46
|
+
NEW_QUEUE_WAIT_SECONDS = 30 # new队列等待时间
|
47
|
+
DONE_QUEUE_WAIT_SECONDS = 5 # done队列等待时间
|
48
|
+
UPLOAD_QUEUE_WAIT_SECONDS = 15 # upload队列等待时间
|
49
|
+
SEED_RESET_SECONDS = 30 # 种子重制时间
|
50
|
+
|
51
|
+
|
52
|
+
# Launcher 队列长度
|
53
|
+
SCHEDULING_SIZE = 100 # 调度队列长度
|
54
|
+
INSERTING_SIZE = 100 # INSERT 长度
|
55
|
+
REMOVING_SIZE = 100 # REMOVE 长度
|
56
|
+
|
57
|
+
# SEED = Queue() # 添加任务种子队列
|
58
|
+
# TODO = Queue() # 任务种子队列
|
59
|
+
# REQUEST = Queue() # 请求队列
|
60
|
+
# DOWNLOAD = Queue() # 下载任务队列
|
61
|
+
# RESPONSE = Queue() # 响应队列
|
62
|
+
# DONE = Queue() # 下载完成队列
|
63
|
+
# UPLOAD = Queue() # 任务上传队列
|
64
|
+
# DELETE = Queue() # 任务删除队列
|
65
|
+
|
66
|
+
SEED_QUEUE_SIZE = 100 # TODO 队列长度
|
67
|
+
TODO_QUEUE_SIZE = 100 # TODO 队列长度
|
68
|
+
REQUEST_QUEUE_SIZE = 100 # new队列长度
|
69
|
+
DOWNLOAD_QUEUE_SIZE = 100 # done队列长度
|
70
|
+
RESPONSE_QUEUE_SIZE = 100 # upload队列长度
|
71
|
+
DONE_QUEUE_SIZE = 100 # upload队列长度
|
72
|
+
UPLOAD_QUEUE_SIZE = 100 # upload队列长度
|
73
|
+
DELETE_QUEUE_SIZE = 100 # upload队列长度
|
74
|
+
|
75
|
+
# DONE_MODEL IN (0, 1), 种子完成模式
|
76
|
+
DONE_MODEL = 0 # 0:种子消费成功直接从队列移除,失败则添加至失败队列;1:种子消费成功添加至成功队列,失败添加至失败队列
|
77
|
+
|
78
|
+
# spider
|
79
|
+
SPIDER_THREAD_NUM = 10
|
80
|
+
SPIDER_MAX_RETRIES = 5
|
81
|
+
SPIDER_TIME_SLEEP = 10
|
82
|
+
|
83
|
+
SPIDER_MAX_COUNT = 1000 # 在规定时间窗口内最大采集数
|
84
|
+
TIME_WINDOW = 60 # 频控固定时间窗口(秒)
|
85
|
+
|
86
|
+
# 任务模式
|
87
|
+
TASK_MODEL = 0 # 0:单次,1:常驻
|
88
|
+
|
89
|
+
|
90
|
+
# bloom过滤器
|
91
|
+
CAPACITY = 100000000
|
92
|
+
ERROR_RATE = 0.001
|
93
|
+
FILTER_FIELD = "url"
|
94
|
+
# 文件下载响应类型过滤
|
95
|
+
# FILE_FILTER_CONTENT_TYPE = ["text/html", "application/xhtml+xml"]
|
@@ -0,0 +1,58 @@
|
|
1
|
+
import math
|
2
|
+
import time
|
3
|
+
|
4
|
+
import mmh3
|
5
|
+
import redis
|
6
|
+
from cobweb import setting
|
7
|
+
|
8
|
+
|
9
|
+
class BloomFilter:
|
10
|
+
|
11
|
+
def __init__(self, key, redis_config=None, capacity=None, error_rate=None):
|
12
|
+
redis_config = redis_config or setting.REDIS_CONFIG
|
13
|
+
capacity = capacity or setting.CAPACITY
|
14
|
+
error_rate = error_rate or setting.ERROR_RATE
|
15
|
+
redis_config['db'] = 3
|
16
|
+
|
17
|
+
self.key = key
|
18
|
+
|
19
|
+
pool = redis.ConnectionPool(**redis_config)
|
20
|
+
self._client = redis.Redis(connection_pool=pool)
|
21
|
+
self.bit_size = self.get_bit_size(capacity, error_rate)
|
22
|
+
self.hash_count = self.get_hash_count(self.bit_size, capacity)
|
23
|
+
self._init_bloom_key()
|
24
|
+
|
25
|
+
def add(self, value):
|
26
|
+
for seed in range(self.hash_count):
|
27
|
+
result = mmh3.hash(value, seed) % self.bit_size
|
28
|
+
self._client.setbit(self.key, result, 1)
|
29
|
+
return True
|
30
|
+
|
31
|
+
def exists(self, value):
|
32
|
+
if not self._client.exists(self.key):
|
33
|
+
return False
|
34
|
+
for seed in range(self.hash_count):
|
35
|
+
result = mmh3.hash(value, seed) % self.bit_size
|
36
|
+
if not self._client.getbit(self.key, result):
|
37
|
+
return False
|
38
|
+
return True
|
39
|
+
|
40
|
+
def _init_bloom_key(self):
|
41
|
+
lua_script = """
|
42
|
+
redis.call("SETBIT", KEYS[1], ARGV[1], ARGV[2])
|
43
|
+
redis.call("EXPIRE", KEYS[1], 604800)
|
44
|
+
"""
|
45
|
+
if self._client.exists(self.key):
|
46
|
+
return True
|
47
|
+
execute = self._client.register_script(lua_script)
|
48
|
+
execute(keys=[self.key], args=[self.bit_size-1, 1])
|
49
|
+
|
50
|
+
@classmethod
|
51
|
+
def get_bit_size(cls, n, p):
|
52
|
+
return int(-(n * math.log(p)) / (math.log(2) ** 2))
|
53
|
+
|
54
|
+
@classmethod
|
55
|
+
def get_hash_count(cls, m, n):
|
56
|
+
return int((m / n) * math.log(2))
|
57
|
+
|
58
|
+
|
cobweb_new/utils/oss.py
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
from typing import List
|
2
|
+
from cobweb import setting
|
3
|
+
from requests import Response
|
4
|
+
from oss2 import Auth, Bucket, models, PartIterator
|
5
|
+
from cobweb.exceptions import oss_db_exception
|
6
|
+
from cobweb.base.decorators import decorator_oss_db
|
7
|
+
|
8
|
+
|
9
|
+
class OssUtil:
|
10
|
+
|
11
|
+
def __init__(
|
12
|
+
self,
|
13
|
+
bucket=None,
|
14
|
+
endpoint=None,
|
15
|
+
access_key=None,
|
16
|
+
secret_key=None,
|
17
|
+
chunk_size=None,
|
18
|
+
min_upload_size=None,
|
19
|
+
**kwargs
|
20
|
+
):
|
21
|
+
self.bucket = bucket or setting.OSS_BUCKET
|
22
|
+
self.endpoint = endpoint or setting.OSS_ENDPOINT
|
23
|
+
self.chunk_size = int(chunk_size or setting.OSS_CHUNK_SIZE)
|
24
|
+
self.min_upload_size = int(min_upload_size or setting.OSS_MIN_UPLOAD_SIZE)
|
25
|
+
|
26
|
+
self._auth = Auth(
|
27
|
+
access_key_id=access_key or setting.OSS_ACCESS_KEY,
|
28
|
+
access_key_secret=secret_key or setting.OSS_SECRET_KEY
|
29
|
+
)
|
30
|
+
self._client = Bucket(
|
31
|
+
auth=self._auth,
|
32
|
+
endpoint=self.endpoint,
|
33
|
+
bucket_name=self.bucket,
|
34
|
+
**kwargs
|
35
|
+
)
|
36
|
+
|
37
|
+
def exists(self, key: str) -> bool:
|
38
|
+
return self._client.object_exists(key)
|
39
|
+
|
40
|
+
def head(self, key: str) -> models.HeadObjectResult:
|
41
|
+
return self._client.head_object(key)
|
42
|
+
|
43
|
+
@decorator_oss_db(exception=oss_db_exception.OssDBInitPartError)
|
44
|
+
def init_part(self, key) -> models.InitMultipartUploadResult:
|
45
|
+
"""初始化分片上传"""
|
46
|
+
return self._client.init_multipart_upload(key)
|
47
|
+
|
48
|
+
@decorator_oss_db(exception=oss_db_exception.OssDBPutObjError)
|
49
|
+
def put(self, key, data) -> models.PutObjectResult:
|
50
|
+
"""文件上传"""
|
51
|
+
return self._client.put_object(key, data)
|
52
|
+
|
53
|
+
@decorator_oss_db(exception=oss_db_exception.OssDBPutPartError)
|
54
|
+
def put_part(self, key, upload_id, position, data) -> models.PutObjectResult:
|
55
|
+
"""分片上传"""
|
56
|
+
return self._client.upload_part(key, upload_id, position, data)
|
57
|
+
|
58
|
+
def list_part(self, key, upload_id): # -> List[models.ListPartsResult]:
|
59
|
+
"""获取分片列表"""
|
60
|
+
return [part_info for part_info in PartIterator(self._client, key, upload_id)]
|
61
|
+
|
62
|
+
@decorator_oss_db(exception=oss_db_exception.OssDBMergeError)
|
63
|
+
def merge(self, key, upload_id, parts=None) -> models.PutObjectResult:
|
64
|
+
"""合并分片"""
|
65
|
+
headers = None if parts else {"x-oss-complete-all": "yes"}
|
66
|
+
return self._client.complete_multipart_upload(key, upload_id, parts, headers=headers)
|
67
|
+
|
68
|
+
@decorator_oss_db(exception=oss_db_exception.OssDBAppendObjError)
|
69
|
+
def append(self, key, position, data) -> models.AppendObjectResult:
|
70
|
+
"""追加上传"""
|
71
|
+
return self._client.append_object(key, position, data)
|
72
|
+
|
73
|
+
def iter_data(self, data, chunk_size=None):
|
74
|
+
chunk_size = chunk_size or self.chunk_size
|
75
|
+
if isinstance(data, Response):
|
76
|
+
for part_data in data.iter_content(chunk_size):
|
77
|
+
yield part_data
|
78
|
+
if isinstance(data, bytes):
|
79
|
+
for i in range(0, len(data), chunk_size):
|
80
|
+
yield data[i:i + chunk_size]
|
81
|
+
|
82
|
+
def assemble(self, ready_data, data, chunk_size=None):
|
83
|
+
upload_data = b""
|
84
|
+
ready_data = ready_data + data
|
85
|
+
chunk_size = chunk_size or self.chunk_size
|
86
|
+
if len(ready_data) >= chunk_size:
|
87
|
+
upload_data = ready_data[:chunk_size]
|
88
|
+
ready_data = ready_data[chunk_size:]
|
89
|
+
return ready_data, upload_data
|
90
|
+
|
91
|
+
def content_length(self, key: str) -> int:
|
92
|
+
head = self.head(key)
|
93
|
+
return head.content_length
|
94
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
import re
|
2
|
+
import hashlib
|
3
|
+
from typing import Union
|
4
|
+
from importlib import import_module
|
5
|
+
|
6
|
+
|
7
|
+
def md5(text: Union[str, bytes]) -> str:
|
8
|
+
if isinstance(text, str):
|
9
|
+
text = text.encode('utf-8')
|
10
|
+
return hashlib.md5(text).hexdigest()
|
11
|
+
|
12
|
+
|
13
|
+
def build_path(site, url, file_type):
|
14
|
+
return f"{site}/{md5(url)}.{file_type}"
|
15
|
+
|
16
|
+
|
17
|
+
def format_size(content_length: int) -> str:
|
18
|
+
units = ["KB", "MB", "GB", "TB"]
|
19
|
+
for i in range(4):
|
20
|
+
num = content_length / (1024 ** (i + 1))
|
21
|
+
if num < 1024:
|
22
|
+
return f"{round(num, 2)} {units[i]}"
|
23
|
+
|
24
|
+
|
25
|
+
def dynamic_load_class(model_info):
|
26
|
+
if isinstance(model_info, str):
|
27
|
+
if "import" in model_info:
|
28
|
+
model_path, class_name = re.search(
|
29
|
+
r"from (.*?) import (.*?)$", model_info
|
30
|
+
).groups()
|
31
|
+
model = import_module(model_path)
|
32
|
+
class_object = getattr(model, class_name)
|
33
|
+
else:
|
34
|
+
model_path, class_name = model_info.rsplit(".", 1)
|
35
|
+
model = import_module(model_path)
|
36
|
+
class_object = getattr(model, class_name)
|
37
|
+
return class_object
|
38
|
+
raise TypeError()
|
39
|
+
|
40
|
+
|
41
|
+
# def download_log_info(item:dict) -> str:
|
42
|
+
# return "\n".join([" " * 12 + f"{str(k).ljust(14)}: {str(v)}" for k, v in item.items()])
|
@@ -1,72 +0,0 @@
|
|
1
|
-
import threading
|
2
|
-
import time
|
3
|
-
|
4
|
-
# from cobweb.base import Seed
|
5
|
-
from cobweb.db import ApiDB
|
6
|
-
|
7
|
-
|
8
|
-
class ApiScheduler:
|
9
|
-
|
10
|
-
def __init__(self, task, project, scheduler_wait_seconds=30):
|
11
|
-
self._todo_key = "{%s:%s}:todo" % (project, task)
|
12
|
-
self._download_key = "{%s:%s}:download" % (project, task)
|
13
|
-
self._heartbeat_key = "heartbeat:%s_%s" % (project, task)
|
14
|
-
self._speed_control_key = "speed_control:%s_%s" % (project, task)
|
15
|
-
self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
|
16
|
-
self._db = ApiDB()
|
17
|
-
|
18
|
-
self.scheduler_wait_seconds = scheduler_wait_seconds
|
19
|
-
self.working = threading.Event()
|
20
|
-
|
21
|
-
@property
|
22
|
-
def heartbeat(self):
|
23
|
-
return self._db.exists(self._heartbeat_key)
|
24
|
-
|
25
|
-
def set_heartbeat(self):
|
26
|
-
return self._db.setex(self._heartbeat_key, 5)
|
27
|
-
|
28
|
-
def schedule(self, key, count):
|
29
|
-
if not self._db.zcount(key, 0, "(1000"):
|
30
|
-
time.sleep(self.scheduler_wait_seconds)
|
31
|
-
else:
|
32
|
-
source = int(time.time())
|
33
|
-
members = self._db.members(key, source, count=count, _min=0, _max="(1000")
|
34
|
-
for member, priority in members:
|
35
|
-
# seed = Seed(member, priority=priority)
|
36
|
-
yield member, priority
|
37
|
-
|
38
|
-
def insert(self, key, items):
|
39
|
-
if items:
|
40
|
-
self._db.zadd(key, items, nx=True)
|
41
|
-
|
42
|
-
def reset(self, keys, reset_time=30):
|
43
|
-
if self._db.lock(self._reset_lock_key, t=120):
|
44
|
-
|
45
|
-
if isinstance(keys, str):
|
46
|
-
keys = [keys]
|
47
|
-
|
48
|
-
_min = reset_time - int(time.time()) if self.heartbeat else "-inf"
|
49
|
-
|
50
|
-
for key in keys:
|
51
|
-
if self._db.exists(key):
|
52
|
-
self._db.members(key, 0, _min=_min, _max="(0")
|
53
|
-
|
54
|
-
if not self.heartbeat:
|
55
|
-
self.working.set()
|
56
|
-
time.sleep(10)
|
57
|
-
|
58
|
-
self._db.delete(self._reset_lock_key)
|
59
|
-
|
60
|
-
def refresh(self, key, items: dict[str, int]):
|
61
|
-
refresh_time = int(time.time())
|
62
|
-
its = {k: -refresh_time - v / 1000 for k, v in items.items()}
|
63
|
-
if its:
|
64
|
-
self._db.zadd(key, item=its, xx=True)
|
65
|
-
|
66
|
-
def delete(self, key, values):
|
67
|
-
if values:
|
68
|
-
self._db.zrem(key, *values)
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
@@ -1,44 +0,0 @@
|
|
1
|
-
cobweb/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
|
2
|
-
cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
|
3
|
-
cobweb/setting.py,sha256=47HZsw40HLpsmOmvij1lyQALPQQCN_tWlKZ0wbn2MtM,2216
|
4
|
-
cobweb/base/__init__.py,sha256=4gwWWQ0Q8cYG9cD7Lwf4XMqRGc5M_mapS3IczR6zeCE,222
|
5
|
-
cobweb/base/basic.py,sha256=Z56SSLB3I2IGHWCCcSy0Qbfzj8Qbg_po3gP32q1jh4k,7741
|
6
|
-
cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
|
7
|
-
cobweb/base/decorators.py,sha256=wDCaQ94aAZGxks9Ljc0aXq6omDXT1_yzFy83ZW6VbVI,930
|
8
|
-
cobweb/base/dotting.py,sha256=lfFXXqnVP__hxlW3qH5Bnuq69KtnFaQLbcz1M8e2Ajg,1239
|
9
|
-
cobweb/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
|
10
|
-
cobweb/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
|
11
|
-
cobweb/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
|
12
|
-
cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
|
13
|
-
cobweb/base/seed.py,sha256=Uz_VBRlAxNYQcFHk3tsZFMlU96yPOedHaWGTvk-zKd8,2908
|
14
|
-
cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
|
15
|
-
cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
|
16
|
-
cobweb/crawlers/crawler.py,sha256=dKyQJOVh1cnuazSryb9vA-IGJJnhw-gxAAlIqsEn6-M,8832
|
17
|
-
cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
|
18
|
-
cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
|
19
|
-
cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
|
20
|
-
cobweb/db/redis_db.py,sha256=fumNZJiio-uQqRcSrymx8eJ1PqsdOwITe_Y-9JOXxrQ,4298
|
21
|
-
cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
|
22
|
-
cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
|
23
|
-
cobweb/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
|
24
|
-
cobweb/launchers/launcher.py,sha256=sPts-xlgxoeIfl1fn1XR2XVZxLzt7He9xrYDfTHRAGo,7029
|
25
|
-
cobweb/launchers/launcher_air.py,sha256=KAk_M8F3029cXYe7m4nn3Nzyi89lbxJ2cqZjqW8iZ0E,2832
|
26
|
-
cobweb/launchers/launcher_api.py,sha256=YFqCTRvKn6icBLWTR1VxkU0WEIte2F7fv_LgPkifqdo,7885
|
27
|
-
cobweb/launchers/launcher_pro.py,sha256=B5FdxvuENRL3XrMl74ENdP1uNgnZOaYCUUfBfM0t3io,7842
|
28
|
-
cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
|
29
|
-
cobweb/pipelines/pipeline.py,sha256=4TJLX0sUHRxYndF5A4Vs5btUGI-wigkOcFvhTW1hLXI,2009
|
30
|
-
cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
|
31
|
-
cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
|
32
|
-
cobweb/schedulers/__init__.py,sha256=y7Lv_7b0zfTl0OhIONb_8u1K1C9gVlBA-xz_XG_kI9g,85
|
33
|
-
cobweb/schedulers/scheduler_api.py,sha256=pFEdS1H4zuzxwMhCV-G7CoLz-rEOPv4EVo3xZUXTyDo,2199
|
34
|
-
cobweb/schedulers/scheduler_redis.py,sha256=E5fjc3nNld8GbUhUGT7uY4smRejj2J2ZIzp2g6lhxFM,2205
|
35
|
-
cobweb/utils/__init__.py,sha256=Ev2LZZ1-S56iQYDqFZrqadizEv4Gk8Of-DraH-_WnKY,109
|
36
|
-
cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
|
37
|
-
cobweb/utils/dotting.py,sha256=_QsC-LCSh1QYk9XmYIWBRlRTs-E9RfqKSkkRkedV2Eo,2082
|
38
|
-
cobweb/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
|
39
|
-
cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
|
40
|
-
cobweb_launcher-1.2.49.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
41
|
-
cobweb_launcher-1.2.49.dist-info/METADATA,sha256=VmuY5V3Hhjw9bZ5JqQfwJi8GfPWq5g6VDb6G39q36IM,6510
|
42
|
-
cobweb_launcher-1.2.49.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
43
|
-
cobweb_launcher-1.2.49.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
44
|
-
cobweb_launcher-1.2.49.dist-info/RECORD,,
|
@@ -1 +0,0 @@
|
|
1
|
-
cobweb
|
File without changes
|
File without changes
|