cobweb-launcher 1.3.6__py3-none-any.whl → 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {cobweb_launcher-1.3.6.dist-info → cobweb_launcher-1.3.7.dist-info}/METADATA +1 -1
  2. cobweb_launcher-1.3.7.dist-info/RECORD +40 -0
  3. cobweb/base/decorators.py +0 -40
  4. cobweb/crawlers/base_crawler.py +0 -144
  5. cobweb/crawlers/file_crawler.py +0 -98
  6. cobweb/pipelines/base_pipeline.py +0 -54
  7. cobweb/pipelines/loghub_pipeline.py +0 -34
  8. cobweb/utils/dotting.py +0 -32
  9. cobweb_/__init__.py +0 -2
  10. cobweb_/base/__init__.py +0 -9
  11. cobweb_/base/common_queue.py +0 -30
  12. cobweb_/base/decorators.py +0 -40
  13. cobweb_/base/item.py +0 -46
  14. cobweb_/base/log.py +0 -94
  15. cobweb_/base/request.py +0 -82
  16. cobweb_/base/response.py +0 -23
  17. cobweb_/base/seed.py +0 -114
  18. cobweb_/constant.py +0 -94
  19. cobweb_/crawlers/__init__.py +0 -1
  20. cobweb_/crawlers/crawler.py +0 -184
  21. cobweb_/db/__init__.py +0 -2
  22. cobweb_/db/api_db.py +0 -82
  23. cobweb_/db/redis_db.py +0 -130
  24. cobweb_/exceptions/__init__.py +0 -1
  25. cobweb_/exceptions/oss_db_exception.py +0 -28
  26. cobweb_/launchers/__init__.py +0 -3
  27. cobweb_/launchers/launcher.py +0 -235
  28. cobweb_/launchers/launcher_air.py +0 -88
  29. cobweb_/launchers/launcher_api.py +0 -221
  30. cobweb_/launchers/launcher_pro.py +0 -222
  31. cobweb_/pipelines/__init__.py +0 -3
  32. cobweb_/pipelines/pipeline.py +0 -69
  33. cobweb_/pipelines/pipeline_console.py +0 -22
  34. cobweb_/pipelines/pipeline_loghub.py +0 -34
  35. cobweb_/setting.py +0 -74
  36. cobweb_/utils/__init__.py +0 -5
  37. cobweb_/utils/bloom.py +0 -58
  38. cobweb_/utils/dotting.py +0 -32
  39. cobweb_/utils/oss.py +0 -94
  40. cobweb_/utils/tools.py +0 -42
  41. cobweb_launcher-1.3.6.dist-info/RECORD +0 -111
  42. cobweb_new/__init__.py +0 -2
  43. cobweb_new/base/__init__.py +0 -72
  44. cobweb_new/base/common_queue.py +0 -53
  45. cobweb_new/base/decorators.py +0 -72
  46. cobweb_new/base/item.py +0 -46
  47. cobweb_new/base/log.py +0 -94
  48. cobweb_new/base/request.py +0 -82
  49. cobweb_new/base/response.py +0 -23
  50. cobweb_new/base/seed.py +0 -118
  51. cobweb_new/constant.py +0 -105
  52. cobweb_new/crawlers/__init__.py +0 -1
  53. cobweb_new/crawlers/crawler-new.py +0 -85
  54. cobweb_new/crawlers/crawler.py +0 -170
  55. cobweb_new/db/__init__.py +0 -2
  56. cobweb_new/db/api_db.py +0 -82
  57. cobweb_new/db/redis_db.py +0 -158
  58. cobweb_new/exceptions/__init__.py +0 -1
  59. cobweb_new/exceptions/oss_db_exception.py +0 -28
  60. cobweb_new/launchers/__init__.py +0 -3
  61. cobweb_new/launchers/launcher.py +0 -237
  62. cobweb_new/launchers/launcher_air.py +0 -88
  63. cobweb_new/launchers/launcher_api.py +0 -161
  64. cobweb_new/launchers/launcher_pro.py +0 -96
  65. cobweb_new/launchers/tesss.py +0 -47
  66. cobweb_new/pipelines/__init__.py +0 -3
  67. cobweb_new/pipelines/pipeline.py +0 -68
  68. cobweb_new/pipelines/pipeline_console.py +0 -22
  69. cobweb_new/pipelines/pipeline_loghub.py +0 -34
  70. cobweb_new/setting.py +0 -95
  71. cobweb_new/utils/__init__.py +0 -5
  72. cobweb_new/utils/bloom.py +0 -58
  73. cobweb_new/utils/oss.py +0 -94
  74. cobweb_new/utils/tools.py +0 -42
  75. {cobweb_launcher-1.3.6.dist-info → cobweb_launcher-1.3.7.dist-info}/LICENSE +0 -0
  76. {cobweb_launcher-1.3.6.dist-info → cobweb_launcher-1.3.7.dist-info}/WHEEL +0 -0
  77. {cobweb_launcher-1.3.6.dist-info → cobweb_launcher-1.3.7.dist-info}/top_level.txt +0 -0
@@ -1,96 +0,0 @@
1
- import time
2
-
3
- from base import TaskQueue
4
- from cobweb.base import decorators
5
- from schedulers.scheduler_redis import RedisScheduler
6
- from .launcher import Launcher
7
-
8
-
9
- class LauncherPro(Launcher):
10
-
11
- def __init__(self, task, project, custom_setting=None, **kwargs):
12
- super().__init__(task, project, custom_setting, **kwargs)
13
- self._redis_download = "{%s:%s}:download" % (project, task)
14
- self._redis_todo = "{%s:%s}:todo" % (project, task)
15
- self._scheduler = RedisScheduler(task, project)
16
-
17
- # @decorators.add_thread()
18
- @decorators.stop
19
- def _schedule(self):
20
- thread_sleep = self.scheduling_wait_time
21
- for q, key, size in [
22
- (TaskQueue.TODO, self._redis_todo, self.todo_queue_size),
23
- (TaskQueue.DOWNLOAD, self._redis_download, self.download_queue_size),
24
- ]:
25
- if q.length < size:
26
- for item in self._scheduler.schedule(
27
- key, self.scheduling_size
28
- ):
29
- q.push(item)
30
- thread_sleep = 0.1
31
- time.sleep(thread_sleep)
32
-
33
- # @decorators.add_thread()
34
- @decorators.pause
35
- def _heartbeat(self):
36
- if self._scheduler.working.is_set():
37
- self._scheduler.set_heartbeat()
38
- time.sleep(3)
39
-
40
- # @decorators.add_thread()
41
- @decorators.pause
42
- def _reset(self):
43
- self._scheduler.reset(
44
- keys=[self._redis_todo, self._redis_download],
45
- reset_time=self.seed_reset_seconds
46
- )
47
- time.sleep(15)
48
-
49
- # @decorators.add_thread()
50
- @decorators.pause
51
- def _insert(self):
52
- thread_sleep = 0.1
53
- for q, key, size in [
54
- (TaskQueue.SEED, self._redis_todo, self.seed_queue_size),
55
- (TaskQueue.REQUEST, self._redis_download, self.request_queue_size),
56
- ]:
57
- items = {}
58
- while item := q.pop() and len(items.keys()) < self.inserting_size:
59
- items[item.to_string] = item.params.priority
60
- if q.length >= size:
61
- thread_sleep = self.inserting_wait_time
62
- self._scheduler.insert(key, items)
63
- time.sleep(thread_sleep)
64
-
65
- # @decorators.add_thread()
66
- @decorators.pause
67
- def _refresh(self):
68
- self._scheduler.refresh(self._redis_todo, self._task_info["todo"])
69
- self._scheduler.refresh(self._redis_download, self._task_info["download"])
70
- time.sleep(3)
71
-
72
- # @decorators.add_thread()
73
- @decorators.pause
74
- def _remove(self):
75
- thread_sleep = self.removing_wait_time
76
- for q, key, size in [
77
- (TaskQueue.DELETE, self._redis_todo, self.delete_queue_size),
78
- (TaskQueue.DONE, self._redis_download, self.done_queue_size),
79
- ]:
80
- items = []
81
- while item := q.pop() and len(items) < self.removing_size:
82
- items.append(item)
83
- self._scheduler.delete(key, *items)
84
- self.remove_working_items(key.split(":")[-1], items)
85
- if q.length >= size:
86
- thread_sleep = 0.1
87
- time.sleep(thread_sleep)
88
-
89
- def _init_schedule_thread(self):
90
- self._add_thread(func=self._heartbeat)
91
- self._add_thread(func=self._reset)
92
- self._add_thread(func=self._refresh)
93
- self._add_thread(func=self._schedule)
94
- self._add_thread(func=self._insert)
95
- self._add_thread(func=self._remove)
96
- self._add_thread(func=self._polling)
@@ -1,47 +0,0 @@
1
- import threading
2
- import time
3
- from functools import wraps
4
-
5
-
6
- def add_thread(num=1):
7
- def decorator(func):
8
- @wraps(func)
9
- def wrapper(self, *args):
10
- for i in range(num):
11
- name = func.__name__ + "_" + str(i) if num > 1 else func.__name__
12
- self._threads.append(threading.Thread(name=name, target=func, args=(self,) + args))
13
- return wrapper
14
-
15
- return decorator
16
-
17
-
18
- def pause(func):
19
- @wraps(func)
20
- def wrapper(*args, **kwargs):
21
- while True:
22
- try:
23
- func(*args, **kwargs)
24
- except Exception as e:
25
- print(str(e))
26
- finally:
27
- time.sleep(0.1)
28
-
29
- return wrapper
30
-
31
-
32
- class TTT:
33
- _threads = []
34
-
35
- @add_thread()
36
- @pause
37
- def tt(self):
38
- print("hello")
39
- time.sleep(1)
40
-
41
- tttt = TTT()
42
- tttt.tt()
43
- print(TTT._threads)
44
-
45
-
46
- for _ in TTT._threads:
47
- _.start()
@@ -1,3 +0,0 @@
1
- from .pipeline import Pipeline
2
- from .pipeline_console import Console
3
- from .pipeline_loghub import Loghub
@@ -1,68 +0,0 @@
1
- import time
2
- import threading
3
-
4
- from abc import ABC, abstractmethod
5
-
6
- from cobweb.utils import TaskQueue
7
- from cobweb.base import BaseItem, logger
8
-
9
-
10
- class Pipeline(threading.Thread, ABC):
11
-
12
- def __init__(
13
- self,
14
- stop: threading.Event,
15
- pause: threading.Event,
16
- upload_size: int,
17
- wait_seconds: int
18
- ):
19
- super().__init__()
20
- self._stop = stop
21
- self._pause = pause
22
-
23
- self.upload_size = upload_size
24
- self.wait_seconds = wait_seconds
25
-
26
- @abstractmethod
27
- def build(self, item: BaseItem) -> dict:
28
- pass
29
-
30
- @abstractmethod
31
- def upload(self, table: str, data: list) -> bool:
32
- pass
33
-
34
- def run(self):
35
- while not self._stop.is_set():
36
- if not TaskQueue.UPLOAD.length:
37
- time.sleep(self.wait_seconds)
38
- continue
39
- if TaskQueue.UPLOAD.length < self.upload_size:
40
- time.sleep(self.wait_seconds)
41
- status = True
42
- data_info, seeds = {}, []
43
- try:
44
- for _ in range(self.upload_size):
45
- item = TaskQueue.UPLOAD.pop()
46
- if not item:
47
- break
48
- seeds.append(item.seed)
49
- data = self.build(item)
50
- data_info.setdefault(item.table, []).append(data)
51
- for table, datas in data_info.items():
52
- try:
53
- self.upload(table, datas)
54
- except Exception as e:
55
- logger.info(e)
56
- status = False
57
- except Exception as e:
58
- logger.info(e)
59
- status = False
60
- if not status:
61
- for seed in seeds:
62
- seed.params.seed_status = "deal model: fail"
63
- if seeds:
64
- TaskQueue.DONE.push(seeds)
65
-
66
- logger.info("upload pipeline close!")
67
-
68
-
@@ -1,22 +0,0 @@
1
- from cobweb.base import ConsoleItem, logger
2
- from cobweb.constant import LogTemplate
3
- from cobweb.pipelines import Pipeline
4
-
5
-
6
- class Console(Pipeline):
7
-
8
- def build(self, item: ConsoleItem):
9
- return {
10
- "seed": item.seed.to_dict,
11
- "data": item.to_dict
12
- }
13
-
14
- def upload(self, table, datas):
15
- for data in datas:
16
- parse_detail = LogTemplate.log_info(data["data"])
17
- if len(parse_detail) > 500:
18
- parse_detail = parse_detail[:500] + " ...\n" + " " * 12 + "-- Text is too long and details are omitted!"
19
- logger.info(LogTemplate.console_item.format(
20
- seed_detail=LogTemplate.log_info(data["seed"]),
21
- parse_detail=parse_detail
22
- ))
@@ -1,34 +0,0 @@
1
- import json
2
-
3
- from cobweb import setting
4
- from cobweb.base import BaseItem
5
- from cobweb.pipelines import Pipeline
6
- from aliyun.log import LogClient, LogItem, PutLogsRequest
7
-
8
-
9
- class Loghub(Pipeline):
10
-
11
- def __init__(self, *args, **kwargs):
12
- super().__init__(*args, **kwargs)
13
- self.client = LogClient(**setting.LOGHUB_CONFIG)
14
-
15
- def build(self, item: BaseItem):
16
- log_item = LogItem()
17
- temp = item.to_dict
18
- for key, value in temp.items():
19
- if not isinstance(value, str):
20
- temp[key] = json.dumps(value, ensure_ascii=False)
21
- contents = sorted(temp.items())
22
- log_item.set_contents(contents)
23
- return log_item
24
-
25
- def upload(self, table, datas):
26
- request = PutLogsRequest(
27
- project=setting.LOGHUB_PROJECT,
28
- logstore=table,
29
- topic=setting.LOGHUB_TOPIC,
30
- source=setting.LOGHUB_SOURCE,
31
- logitems=datas,
32
- compress=True
33
- )
34
- self.client.put_logs(request=request)
cobweb_new/setting.py DELETED
@@ -1,95 +0,0 @@
1
- import os
2
-
3
- # redis db config
4
- REDIS_CONFIG = {
5
- "host": os.getenv("REDIS_HOST"),
6
- "password": os.getenv("REDIS_PASSWORD"),
7
- "port": int(os.getenv("REDIS_PORT", 6379)),
8
- "db": int(os.getenv("REDIS_DB", 0)),
9
- }
10
-
11
- # loghub db config
12
- LOGHUB_TOPIC = os.getenv("LOGHUB_TOPIC")
13
- LOGHUB_SOURCE = os.getenv("LOGHUB_SOURCE")
14
- LOGHUB_PROJECT = os.getenv("LOGHUB_PROJECT")
15
- LOGHUB_CONFIG = {
16
- "endpoint": os.getenv("LOGHUB_ENDPOINT"),
17
- "accessKeyId": os.getenv("LOGHUB_ACCESS_KEY"),
18
- "accessKey": os.getenv("LOGHUB_SECRET_KEY")
19
- }
20
-
21
- # oss util config
22
- OSS_BUCKET = os.getenv("OSS_BUCKET")
23
- OSS_ENDPOINT = os.getenv("OSS_ENDPOINT")
24
- OSS_ACCESS_KEY = os.getenv("OSS_ACCESS_KEY")
25
- OSS_SECRET_KEY = os.getenv("OSS_SECRET_KEY")
26
- OSS_CHUNK_SIZE = 10 * 1024 ** 2
27
- OSS_MIN_UPLOAD_SIZE = 1024
28
-
29
-
30
- # 采集器选择
31
- CRAWLER = "cobweb.crawlers.Crawler"
32
-
33
- # 数据存储链路
34
- PIPELINE = "cobweb.pipelines.pipeline_console.Console"
35
-
36
-
37
- # Launcher 等待时间
38
-
39
- BEFORE_SCHEDULER_WAIT_SECONDS = 60 # 调度前等待时间,只作用于单次任务
40
-
41
- SCHEDULING_WAIT_TIME = 15 # SCHEDULER ITEM 调度等待时间
42
- INSERTING_WAIT_TIME = 30 # INSERT ITEM 等待时间
43
- REMOVING_WAIT_TIME = 5 # REMOVE ITEM 等待时间
44
-
45
- TODO_QUEUE_FULL_WAIT_SECONDS = 5 # 队列已满时等待时间
46
- NEW_QUEUE_WAIT_SECONDS = 30 # new队列等待时间
47
- DONE_QUEUE_WAIT_SECONDS = 5 # done队列等待时间
48
- UPLOAD_QUEUE_WAIT_SECONDS = 15 # upload队列等待时间
49
- SEED_RESET_SECONDS = 30 # 种子重制时间
50
-
51
-
52
- # Launcher 队列长度
53
- SCHEDULING_SIZE = 100 # 调度队列长度
54
- INSERTING_SIZE = 100 # INSERT 长度
55
- REMOVING_SIZE = 100 # REMOVE 长度
56
-
57
- # SEED = Queue() # 添加任务种子队列
58
- # TODO = Queue() # 任务种子队列
59
- # REQUEST = Queue() # 请求队列
60
- # DOWNLOAD = Queue() # 下载任务队列
61
- # RESPONSE = Queue() # 响应队列
62
- # DONE = Queue() # 下载完成队列
63
- # UPLOAD = Queue() # 任务上传队列
64
- # DELETE = Queue() # 任务删除队列
65
-
66
- SEED_QUEUE_SIZE = 100 # TODO 队列长度
67
- TODO_QUEUE_SIZE = 100 # TODO 队列长度
68
- REQUEST_QUEUE_SIZE = 100 # new队列长度
69
- DOWNLOAD_QUEUE_SIZE = 100 # done队列长度
70
- RESPONSE_QUEUE_SIZE = 100 # upload队列长度
71
- DONE_QUEUE_SIZE = 100 # upload队列长度
72
- UPLOAD_QUEUE_SIZE = 100 # upload队列长度
73
- DELETE_QUEUE_SIZE = 100 # upload队列长度
74
-
75
- # DONE_MODEL IN (0, 1), 种子完成模式
76
- DONE_MODEL = 0 # 0:种子消费成功直接从队列移除,失败则添加至失败队列;1:种子消费成功添加至成功队列,失败添加至失败队列
77
-
78
- # spider
79
- SPIDER_THREAD_NUM = 10
80
- SPIDER_MAX_RETRIES = 5
81
- SPIDER_TIME_SLEEP = 10
82
-
83
- SPIDER_MAX_COUNT = 1000 # 在规定时间窗口内最大采集数
84
- TIME_WINDOW = 60 # 频控固定时间窗口(秒)
85
-
86
- # 任务模式
87
- TASK_MODEL = 0 # 0:单次,1:常驻
88
-
89
-
90
- # bloom过滤器
91
- CAPACITY = 100000000
92
- ERROR_RATE = 0.001
93
- FILTER_FIELD = "url"
94
- # 文件下载响应类型过滤
95
- # FILE_FILTER_CONTENT_TYPE = ["text/html", "application/xhtml+xml"]
@@ -1,5 +0,0 @@
1
- from .oss import OssUtil
2
- from .tools import *
3
- from .bloom import BloomFilter
4
- from .task_queue import TaskQueue
5
-
cobweb_new/utils/bloom.py DELETED
@@ -1,58 +0,0 @@
1
- import math
2
- import time
3
-
4
- import mmh3
5
- import redis
6
- from cobweb import setting
7
-
8
-
9
- class BloomFilter:
10
-
11
- def __init__(self, key, redis_config=None, capacity=None, error_rate=None):
12
- redis_config = redis_config or setting.REDIS_CONFIG
13
- capacity = capacity or setting.CAPACITY
14
- error_rate = error_rate or setting.ERROR_RATE
15
- redis_config['db'] = 3
16
-
17
- self.key = key
18
-
19
- pool = redis.ConnectionPool(**redis_config)
20
- self._client = redis.Redis(connection_pool=pool)
21
- self.bit_size = self.get_bit_size(capacity, error_rate)
22
- self.hash_count = self.get_hash_count(self.bit_size, capacity)
23
- self._init_bloom_key()
24
-
25
- def add(self, value):
26
- for seed in range(self.hash_count):
27
- result = mmh3.hash(value, seed) % self.bit_size
28
- self._client.setbit(self.key, result, 1)
29
- return True
30
-
31
- def exists(self, value):
32
- if not self._client.exists(self.key):
33
- return False
34
- for seed in range(self.hash_count):
35
- result = mmh3.hash(value, seed) % self.bit_size
36
- if not self._client.getbit(self.key, result):
37
- return False
38
- return True
39
-
40
- def _init_bloom_key(self):
41
- lua_script = """
42
- redis.call("SETBIT", KEYS[1], ARGV[1], ARGV[2])
43
- redis.call("EXPIRE", KEYS[1], 604800)
44
- """
45
- if self._client.exists(self.key):
46
- return True
47
- execute = self._client.register_script(lua_script)
48
- execute(keys=[self.key], args=[self.bit_size-1, 1])
49
-
50
- @classmethod
51
- def get_bit_size(cls, n, p):
52
- return int(-(n * math.log(p)) / (math.log(2) ** 2))
53
-
54
- @classmethod
55
- def get_hash_count(cls, m, n):
56
- return int((m / n) * math.log(2))
57
-
58
-
cobweb_new/utils/oss.py DELETED
@@ -1,94 +0,0 @@
1
- from typing import List
2
- from cobweb import setting
3
- from requests import Response
4
- from oss2 import Auth, Bucket, models, PartIterator
5
- from cobweb.exceptions import oss_db_exception
6
- from cobweb.base.decorators import decorator_oss_db
7
-
8
-
9
- class OssUtil:
10
-
11
- def __init__(
12
- self,
13
- bucket=None,
14
- endpoint=None,
15
- access_key=None,
16
- secret_key=None,
17
- chunk_size=None,
18
- min_upload_size=None,
19
- **kwargs
20
- ):
21
- self.bucket = bucket or setting.OSS_BUCKET
22
- self.endpoint = endpoint or setting.OSS_ENDPOINT
23
- self.chunk_size = int(chunk_size or setting.OSS_CHUNK_SIZE)
24
- self.min_upload_size = int(min_upload_size or setting.OSS_MIN_UPLOAD_SIZE)
25
-
26
- self._auth = Auth(
27
- access_key_id=access_key or setting.OSS_ACCESS_KEY,
28
- access_key_secret=secret_key or setting.OSS_SECRET_KEY
29
- )
30
- self._client = Bucket(
31
- auth=self._auth,
32
- endpoint=self.endpoint,
33
- bucket_name=self.bucket,
34
- **kwargs
35
- )
36
-
37
- def exists(self, key: str) -> bool:
38
- return self._client.object_exists(key)
39
-
40
- def head(self, key: str) -> models.HeadObjectResult:
41
- return self._client.head_object(key)
42
-
43
- @decorator_oss_db(exception=oss_db_exception.OssDBInitPartError)
44
- def init_part(self, key) -> models.InitMultipartUploadResult:
45
- """初始化分片上传"""
46
- return self._client.init_multipart_upload(key)
47
-
48
- @decorator_oss_db(exception=oss_db_exception.OssDBPutObjError)
49
- def put(self, key, data) -> models.PutObjectResult:
50
- """文件上传"""
51
- return self._client.put_object(key, data)
52
-
53
- @decorator_oss_db(exception=oss_db_exception.OssDBPutPartError)
54
- def put_part(self, key, upload_id, position, data) -> models.PutObjectResult:
55
- """分片上传"""
56
- return self._client.upload_part(key, upload_id, position, data)
57
-
58
- def list_part(self, key, upload_id): # -> List[models.ListPartsResult]:
59
- """获取分片列表"""
60
- return [part_info for part_info in PartIterator(self._client, key, upload_id)]
61
-
62
- @decorator_oss_db(exception=oss_db_exception.OssDBMergeError)
63
- def merge(self, key, upload_id, parts=None) -> models.PutObjectResult:
64
- """合并分片"""
65
- headers = None if parts else {"x-oss-complete-all": "yes"}
66
- return self._client.complete_multipart_upload(key, upload_id, parts, headers=headers)
67
-
68
- @decorator_oss_db(exception=oss_db_exception.OssDBAppendObjError)
69
- def append(self, key, position, data) -> models.AppendObjectResult:
70
- """追加上传"""
71
- return self._client.append_object(key, position, data)
72
-
73
- def iter_data(self, data, chunk_size=None):
74
- chunk_size = chunk_size or self.chunk_size
75
- if isinstance(data, Response):
76
- for part_data in data.iter_content(chunk_size):
77
- yield part_data
78
- if isinstance(data, bytes):
79
- for i in range(0, len(data), chunk_size):
80
- yield data[i:i + chunk_size]
81
-
82
- def assemble(self, ready_data, data, chunk_size=None):
83
- upload_data = b""
84
- ready_data = ready_data + data
85
- chunk_size = chunk_size or self.chunk_size
86
- if len(ready_data) >= chunk_size:
87
- upload_data = ready_data[:chunk_size]
88
- ready_data = ready_data[chunk_size:]
89
- return ready_data, upload_data
90
-
91
- def content_length(self, key: str) -> int:
92
- head = self.head(key)
93
- return head.content_length
94
-
cobweb_new/utils/tools.py DELETED
@@ -1,42 +0,0 @@
1
- import re
2
- import hashlib
3
- from typing import Union
4
- from importlib import import_module
5
-
6
-
7
- def md5(text: Union[str, bytes]) -> str:
8
- if isinstance(text, str):
9
- text = text.encode('utf-8')
10
- return hashlib.md5(text).hexdigest()
11
-
12
-
13
- def build_path(site, url, file_type):
14
- return f"{site}/{md5(url)}.{file_type}"
15
-
16
-
17
- def format_size(content_length: int) -> str:
18
- units = ["KB", "MB", "GB", "TB"]
19
- for i in range(4):
20
- num = content_length / (1024 ** (i + 1))
21
- if num < 1024:
22
- return f"{round(num, 2)} {units[i]}"
23
-
24
-
25
- def dynamic_load_class(model_info):
26
- if isinstance(model_info, str):
27
- if "import" in model_info:
28
- model_path, class_name = re.search(
29
- r"from (.*?) import (.*?)$", model_info
30
- ).groups()
31
- model = import_module(model_path)
32
- class_object = getattr(model, class_name)
33
- else:
34
- model_path, class_name = model_info.rsplit(".", 1)
35
- model = import_module(model_path)
36
- class_object = getattr(model, class_name)
37
- return class_object
38
- raise TypeError()
39
-
40
-
41
- # def download_log_info(item:dict) -> str:
42
- # return "\n".join([" " * 12 + f"{str(k).ljust(14)}: {str(v)}" for k, v in item.items()])