cobweb-launcher 1.3.6__py3-none-any.whl → 1.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. cobweb/base/__init__.py +9 -9
  2. cobweb/base/dotting.py +1 -1
  3. cobweb/utils/oss.py +7 -7
  4. {cobweb_launcher-1.3.6.dist-info → cobweb_launcher-1.3.8.dist-info}/METADATA +1 -1
  5. cobweb_launcher-1.3.8.dist-info/RECORD +40 -0
  6. cobweb/base/decorators.py +0 -40
  7. cobweb/crawlers/base_crawler.py +0 -144
  8. cobweb/crawlers/file_crawler.py +0 -98
  9. cobweb/pipelines/base_pipeline.py +0 -54
  10. cobweb/pipelines/loghub_pipeline.py +0 -34
  11. cobweb/utils/dotting.py +0 -32
  12. cobweb_/__init__.py +0 -2
  13. cobweb_/base/__init__.py +0 -9
  14. cobweb_/base/common_queue.py +0 -30
  15. cobweb_/base/decorators.py +0 -40
  16. cobweb_/base/item.py +0 -46
  17. cobweb_/base/log.py +0 -94
  18. cobweb_/base/request.py +0 -82
  19. cobweb_/base/response.py +0 -23
  20. cobweb_/base/seed.py +0 -114
  21. cobweb_/constant.py +0 -94
  22. cobweb_/crawlers/__init__.py +0 -1
  23. cobweb_/crawlers/crawler.py +0 -184
  24. cobweb_/db/__init__.py +0 -2
  25. cobweb_/db/api_db.py +0 -82
  26. cobweb_/db/redis_db.py +0 -130
  27. cobweb_/exceptions/__init__.py +0 -1
  28. cobweb_/exceptions/oss_db_exception.py +0 -28
  29. cobweb_/launchers/__init__.py +0 -3
  30. cobweb_/launchers/launcher.py +0 -235
  31. cobweb_/launchers/launcher_air.py +0 -88
  32. cobweb_/launchers/launcher_api.py +0 -221
  33. cobweb_/launchers/launcher_pro.py +0 -222
  34. cobweb_/pipelines/__init__.py +0 -3
  35. cobweb_/pipelines/pipeline.py +0 -69
  36. cobweb_/pipelines/pipeline_console.py +0 -22
  37. cobweb_/pipelines/pipeline_loghub.py +0 -34
  38. cobweb_/setting.py +0 -74
  39. cobweb_/utils/__init__.py +0 -5
  40. cobweb_/utils/bloom.py +0 -58
  41. cobweb_/utils/dotting.py +0 -32
  42. cobweb_/utils/oss.py +0 -94
  43. cobweb_/utils/tools.py +0 -42
  44. cobweb_launcher-1.3.6.dist-info/RECORD +0 -111
  45. cobweb_new/__init__.py +0 -2
  46. cobweb_new/base/__init__.py +0 -72
  47. cobweb_new/base/common_queue.py +0 -53
  48. cobweb_new/base/decorators.py +0 -72
  49. cobweb_new/base/item.py +0 -46
  50. cobweb_new/base/log.py +0 -94
  51. cobweb_new/base/request.py +0 -82
  52. cobweb_new/base/response.py +0 -23
  53. cobweb_new/base/seed.py +0 -118
  54. cobweb_new/constant.py +0 -105
  55. cobweb_new/crawlers/__init__.py +0 -1
  56. cobweb_new/crawlers/crawler-new.py +0 -85
  57. cobweb_new/crawlers/crawler.py +0 -170
  58. cobweb_new/db/__init__.py +0 -2
  59. cobweb_new/db/api_db.py +0 -82
  60. cobweb_new/db/redis_db.py +0 -158
  61. cobweb_new/exceptions/__init__.py +0 -1
  62. cobweb_new/exceptions/oss_db_exception.py +0 -28
  63. cobweb_new/launchers/__init__.py +0 -3
  64. cobweb_new/launchers/launcher.py +0 -237
  65. cobweb_new/launchers/launcher_air.py +0 -88
  66. cobweb_new/launchers/launcher_api.py +0 -161
  67. cobweb_new/launchers/launcher_pro.py +0 -96
  68. cobweb_new/launchers/tesss.py +0 -47
  69. cobweb_new/pipelines/__init__.py +0 -3
  70. cobweb_new/pipelines/pipeline.py +0 -68
  71. cobweb_new/pipelines/pipeline_console.py +0 -22
  72. cobweb_new/pipelines/pipeline_loghub.py +0 -34
  73. cobweb_new/setting.py +0 -95
  74. cobweb_new/utils/__init__.py +0 -5
  75. cobweb_new/utils/bloom.py +0 -58
  76. cobweb_new/utils/oss.py +0 -94
  77. cobweb_new/utils/tools.py +0 -42
  78. {cobweb_launcher-1.3.6.dist-info → cobweb_launcher-1.3.8.dist-info}/LICENSE +0 -0
  79. {cobweb_launcher-1.3.6.dist-info → cobweb_launcher-1.3.8.dist-info}/WHEEL +0 -0
  80. {cobweb_launcher-1.3.6.dist-info → cobweb_launcher-1.3.8.dist-info}/top_level.txt +0 -0
cobweb_/utils/bloom.py DELETED
@@ -1,58 +0,0 @@
1
- import math
2
- import time
3
-
4
- import mmh3
5
- import redis
6
- from cobweb import setting
7
-
8
-
9
- class BloomFilter:
10
-
11
- def __init__(self, key, redis_config=None, capacity=None, error_rate=None):
12
- redis_config = redis_config or setting.REDIS_CONFIG
13
- capacity = capacity or setting.CAPACITY
14
- error_rate = error_rate or setting.ERROR_RATE
15
- redis_config['db'] = 3
16
-
17
- self.key = key
18
-
19
- pool = redis.ConnectionPool(**redis_config)
20
- self._client = redis.Redis(connection_pool=pool)
21
- self.bit_size = self.get_bit_size(capacity, error_rate)
22
- self.hash_count = self.get_hash_count(self.bit_size, capacity)
23
- self._init_bloom_key()
24
-
25
- def add(self, value):
26
- for seed in range(self.hash_count):
27
- result = mmh3.hash(value, seed) % self.bit_size
28
- self._client.setbit(self.key, result, 1)
29
- return True
30
-
31
- def exists(self, value):
32
- if not self._client.exists(self.key):
33
- return False
34
- for seed in range(self.hash_count):
35
- result = mmh3.hash(value, seed) % self.bit_size
36
- if not self._client.getbit(self.key, result):
37
- return False
38
- return True
39
-
40
- def _init_bloom_key(self):
41
- lua_script = """
42
- redis.call("SETBIT", KEYS[1], ARGV[1], ARGV[2])
43
- redis.call("EXPIRE", KEYS[1], 604800)
44
- """
45
- if self._client.exists(self.key):
46
- return True
47
- execute = self._client.register_script(lua_script)
48
- execute(keys=[self.key], args=[self.bit_size-1, 1])
49
-
50
- @classmethod
51
- def get_bit_size(cls, n, p):
52
- return int(-(n * math.log(p)) / (math.log(2) ** 2))
53
-
54
- @classmethod
55
- def get_hash_count(cls, m, n):
56
- return int((m / n) * math.log(2))
57
-
58
-
cobweb_/utils/dotting.py DELETED
@@ -1,32 +0,0 @@
1
- import json
2
-
3
- from aliyun.log import LogClient, LogItem, PutLogsRequest
4
- from cobweb import setting
5
-
6
-
7
- class LoghubDot:
8
-
9
- def __init__(self):
10
- self.client = LogClient(**setting.LOGHUB_CONFIG)
11
-
12
- def build(self, topic, **kwargs):
13
-
14
- temp = {}
15
- log_items = []
16
- log_item = LogItem()
17
- for key, value in kwargs.items():
18
- if not isinstance(value, str):
19
- temp[key] = json.dumps(value, ensure_ascii=False)
20
- else:
21
- temp[key] = value
22
- contents = sorted(temp.items())
23
- log_item.set_contents(contents)
24
- log_items.append(log_item)
25
- request = PutLogsRequest(
26
- project="databee-download-log",
27
- logstore="cobweb_log",
28
- topic=topic,
29
- logitems=log_items,
30
- compress=True
31
- )
32
- self.client.put_logs(request=request)
cobweb_/utils/oss.py DELETED
@@ -1,94 +0,0 @@
1
- from typing import List
2
- from cobweb import setting
3
- from requests import Response
4
- from oss2 import Auth, Bucket, models, PartIterator
5
- from cobweb.exceptions import oss_db_exception
6
- from cobweb.base.decorators import decorator_oss_db
7
-
8
-
9
- class OssUtil:
10
-
11
- def __init__(
12
- self,
13
- bucket=None,
14
- endpoint=None,
15
- access_key=None,
16
- secret_key=None,
17
- chunk_size=None,
18
- min_upload_size=None,
19
- **kwargs
20
- ):
21
- self.bucket = bucket or setting.OSS_BUCKET
22
- self.endpoint = endpoint or setting.OSS_ENDPOINT
23
- self.chunk_size = int(chunk_size or setting.OSS_CHUNK_SIZE)
24
- self.min_upload_size = int(min_upload_size or setting.OSS_MIN_UPLOAD_SIZE)
25
-
26
- self._auth = Auth(
27
- access_key_id=access_key or setting.OSS_ACCESS_KEY,
28
- access_key_secret=secret_key or setting.OSS_SECRET_KEY
29
- )
30
- self._client = Bucket(
31
- auth=self._auth,
32
- endpoint=self.endpoint,
33
- bucket_name=self.bucket,
34
- **kwargs
35
- )
36
-
37
- def exists(self, key: str) -> bool:
38
- return self._client.object_exists(key)
39
-
40
- def head(self, key: str) -> models.HeadObjectResult:
41
- return self._client.head_object(key)
42
-
43
- @decorator_oss_db(exception=oss_db_exception.OssDBInitPartError)
44
- def init_part(self, key) -> models.InitMultipartUploadResult:
45
- """初始化分片上传"""
46
- return self._client.init_multipart_upload(key)
47
-
48
- @decorator_oss_db(exception=oss_db_exception.OssDBPutObjError)
49
- def put(self, key, data) -> models.PutObjectResult:
50
- """文件上传"""
51
- return self._client.put_object(key, data)
52
-
53
- @decorator_oss_db(exception=oss_db_exception.OssDBPutPartError)
54
- def put_part(self, key, upload_id, position, data) -> models.PutObjectResult:
55
- """分片上传"""
56
- return self._client.upload_part(key, upload_id, position, data)
57
-
58
- def list_part(self, key, upload_id): # -> List[models.ListPartsResult]:
59
- """获取分片列表"""
60
- return [part_info for part_info in PartIterator(self._client, key, upload_id)]
61
-
62
- @decorator_oss_db(exception=oss_db_exception.OssDBMergeError)
63
- def merge(self, key, upload_id, parts=None) -> models.PutObjectResult:
64
- """合并分片"""
65
- headers = None if parts else {"x-oss-complete-all": "yes"}
66
- return self._client.complete_multipart_upload(key, upload_id, parts, headers=headers)
67
-
68
- @decorator_oss_db(exception=oss_db_exception.OssDBAppendObjError)
69
- def append(self, key, position, data) -> models.AppendObjectResult:
70
- """追加上传"""
71
- return self._client.append_object(key, position, data)
72
-
73
- def iter_data(self, data, chunk_size=None):
74
- chunk_size = chunk_size or self.chunk_size
75
- if isinstance(data, Response):
76
- for part_data in data.iter_content(chunk_size):
77
- yield part_data
78
- if isinstance(data, bytes):
79
- for i in range(0, len(data), chunk_size):
80
- yield data[i:i + chunk_size]
81
-
82
- def assemble(self, ready_data, data, chunk_size=None):
83
- upload_data = b""
84
- ready_data = ready_data + data
85
- chunk_size = chunk_size or self.chunk_size
86
- if len(ready_data) >= chunk_size:
87
- upload_data = ready_data[:chunk_size]
88
- ready_data = ready_data[chunk_size:]
89
- return ready_data, upload_data
90
-
91
- def content_length(self, key: str) -> int:
92
- head = self.head(key)
93
- return head.content_length
94
-
cobweb_/utils/tools.py DELETED
@@ -1,42 +0,0 @@
1
- import re
2
- import hashlib
3
- from typing import Union
4
- from importlib import import_module
5
-
6
-
7
- def md5(text: Union[str, bytes]) -> str:
8
- if isinstance(text, str):
9
- text = text.encode('utf-8')
10
- return hashlib.md5(text).hexdigest()
11
-
12
-
13
- def build_path(site, url, file_type):
14
- return f"{site}/{md5(url)}.{file_type}"
15
-
16
-
17
- def format_size(content_length: int) -> str:
18
- units = ["KB", "MB", "GB", "TB"]
19
- for i in range(4):
20
- num = content_length / (1024 ** (i + 1))
21
- if num < 1024:
22
- return f"{round(num, 2)} {units[i]}"
23
-
24
-
25
- def dynamic_load_class(model_info):
26
- if isinstance(model_info, str):
27
- if "import" in model_info:
28
- model_path, class_name = re.search(
29
- r"from (.*?) import (.*?)$", model_info
30
- ).groups()
31
- model = import_module(model_path)
32
- class_object = getattr(model, class_name)
33
- else:
34
- model_path, class_name = model_info.rsplit(".", 1)
35
- model = import_module(model_path)
36
- class_object = getattr(model, class_name)
37
- return class_object
38
- raise TypeError()
39
-
40
-
41
- # def download_log_info(item:dict) -> str:
42
- # return "\n".join([" " * 12 + f"{str(k).ljust(14)}: {str(v)}" for k, v in item.items()])
@@ -1,111 +0,0 @@
1
- cobweb/__init__.py,sha256=oaEfsGUuGP0s39UbFRwrnsjMUeuB6QvQIAwStKFyUTk,83
2
- cobweb/constant.py,sha256=eofONAntk9O6S-cb4KbYGYHL_u7nBlOqqFOw_HzJHAU,3588
3
- cobweb/setting.py,sha256=pY6LKsgWI3164GiGA1z_y26LVf5-3mpiEgmm86mKRdY,3135
4
- cobweb/base/__init__.py,sha256=Na385Hhl9l2S8aPhcdJVPjmb02wkVM969bWQ84bCSQs,5095
5
- cobweb/base/basic.py,sha256=s5G4LBZiLUfoymV-gLSIqeH-OJ7q7-L35sBa6xEH3EI,7666
6
- cobweb/base/common_queue.py,sha256=Gor7sR3h1hlZWaI0XcNAbf0S15Ftjr3DFRWNTGL13uU,1137
7
- cobweb/base/decorators.py,sha256=wDCaQ94aAZGxks9Ljc0aXq6omDXT1_yzFy83ZW6VbVI,930
8
- cobweb/base/dotting.py,sha256=0SH8F2uAGWZjfODpTAXngYHz8JgfCm-RqpmQbfQ3NCY,1233
9
- cobweb/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
10
- cobweb/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
11
- cobweb/base/request.py,sha256=acGm3OzxsPed5VUTk7D9eeHZPMh7KUNQRUv44G5znZg,2659
12
- cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
13
- cobweb/base/seed.py,sha256=PN5J4gKPEXylwyQeSGOBfauxHktxFr7RJe8nVX1hBw4,2987
14
- cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
15
- cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
16
- cobweb/crawlers/crawler.py,sha256=ZQ6yVA1EaQRdKJEY3DNqShzp9HPMwlSXapnsRW9E5Wc,2987
17
- cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
18
- cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
19
- cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
20
- cobweb/db/redis_db.py,sha256=FvMzckJtmhwKhZqKoS23iXmJti5P2dnMVD5rJ__5LUw,5139
21
- cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
22
- cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
23
- cobweb/launchers/__init__.py,sha256=m_XNG2bWuMbirPt3d0_s-Ezl1xycfUxeqZnwq_kkfuo,116
24
- cobweb/launchers/launcher.py,sha256=NFwpc_0Um0hbDm1A8glWA4fcW6mNYL1eon4t3JAQUlw,7411
25
- cobweb/launchers/launcher_air.py,sha256=yPr395HVIIHAq6lqRcYJu7c0KkfO9V8O-2sn0hC96p0,2990
26
- cobweb/launchers/launcher_api.py,sha256=TfLrLXazFWsOJLI7caMGfZozCttL1WTwTo3uUpN_FV0,3370
27
- cobweb/launchers/launcher_pro.py,sha256=2H-TcvQx-ga78GLNTa-GXMLYAj9nEeCJSWf8xl-1ISQ,3374
28
- cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
29
- cobweb/pipelines/base_pipeline.py,sha256=fYnWf79GmhufXpcnMa3te18SbmnVeYLwxfyo-zLd9CY,1577
30
- cobweb/pipelines/loghub_pipeline.py,sha256=cjPO6w6UJ0jNw2fVvdX0BCdlm58T7dmYXlxzXOBpvfY,1027
31
- cobweb/pipelines/pipeline.py,sha256=Pycm22bHId9a3gdP81D5y7SsuMndYooTb5n4zQxP7dM,1321
32
- cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
33
- cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
34
- cobweb/schedulers/__init__.py,sha256=y7Lv_7b0zfTl0OhIONb_8u1K1C9gVlBA-xz_XG_kI9g,85
35
- cobweb/schedulers/scheduler_api.py,sha256=mC54QOS0PEu4SFvxfD5Qr9239hAxwMrKTg-33rirANE,2112
36
- cobweb/schedulers/scheduler_redis.py,sha256=Aw7de0sXigRAxJgqUhHWu30hMBzgEWjkj-3OXXqmldg,2118
37
- cobweb/utils/__init__.py,sha256=YvD4mIDBd9jmGA6WJBcwkgDU2jRFNBCEbarZCSUBAHE,114
38
- cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
39
- cobweb/utils/dotting.py,sha256=fZ-16TAf51dovGIKaHsQthgTkbSEMAteaJPluZhniJI,879
40
- cobweb/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
41
- cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
42
- cobweb_/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
43
- cobweb_/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
44
- cobweb_/setting.py,sha256=47HZsw40HLpsmOmvij1lyQALPQQCN_tWlKZ0wbn2MtM,2216
45
- cobweb_/base/__init__.py,sha256=4gwWWQ0Q8cYG9cD7Lwf4XMqRGc5M_mapS3IczR6zeCE,222
46
- cobweb_/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
47
- cobweb_/base/decorators.py,sha256=wDCaQ94aAZGxks9Ljc0aXq6omDXT1_yzFy83ZW6VbVI,930
48
- cobweb_/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
49
- cobweb_/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
50
- cobweb_/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
51
- cobweb_/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
52
- cobweb_/base/seed.py,sha256=Uz_VBRlAxNYQcFHk3tsZFMlU96yPOedHaWGTvk-zKd8,2908
53
- cobweb_/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
54
- cobweb_/crawlers/crawler.py,sha256=mPRc9GBfWi5AoSxB1jlARxvG_AzsPVRFil5O8RnOxCY,7018
55
- cobweb_/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
56
- cobweb_/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
57
- cobweb_/db/redis_db.py,sha256=fumNZJiio-uQqRcSrymx8eJ1PqsdOwITe_Y-9JOXxrQ,4298
58
- cobweb_/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
59
- cobweb_/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
60
- cobweb_/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
61
- cobweb_/launchers/launcher.py,sha256=sPts-xlgxoeIfl1fn1XR2XVZxLzt7He9xrYDfTHRAGo,7029
62
- cobweb_/launchers/launcher_air.py,sha256=KAk_M8F3029cXYe7m4nn3Nzyi89lbxJ2cqZjqW8iZ0E,2832
63
- cobweb_/launchers/launcher_api.py,sha256=Ih8f5xDcFlGBn6VSnlrpxcchMB48ugsj2NTWYgGYWfY,8669
64
- cobweb_/launchers/launcher_pro.py,sha256=NBJstQuB0o_jMiySJ14lk0Y3WAxxiScaQvXa1qtTSo4,8683
65
- cobweb_/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
66
- cobweb_/pipelines/pipeline.py,sha256=4TJLX0sUHRxYndF5A4Vs5btUGI-wigkOcFvhTW1hLXI,2009
67
- cobweb_/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
68
- cobweb_/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
69
- cobweb_/utils/__init__.py,sha256=Ev2LZZ1-S56iQYDqFZrqadizEv4Gk8Of-DraH-_WnKY,109
70
- cobweb_/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
71
- cobweb_/utils/dotting.py,sha256=fZ-16TAf51dovGIKaHsQthgTkbSEMAteaJPluZhniJI,879
72
- cobweb_/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
73
- cobweb_/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
74
- cobweb_new/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
75
- cobweb_new/constant.py,sha256=wy2bLpGZrl1MtgVv-Z1Tmtj5uWh-KGzDrrGKdVStxV4,3236
76
- cobweb_new/setting.py,sha256=Ya3X4HbvDfSmMF2kSJwaaP1naxrWETTFW88T11agP7k,3035
77
- cobweb_new/base/__init__.py,sha256=L74KN3qZn6s33EXyxQ_vB3FF8mA7pZJd_ekkWiUKd5Y,2229
78
- cobweb_new/base/common_queue.py,sha256=Po6yY8HqpC6Wt6csd3Co3lBd7ygN2vmEECczgyc_sM8,1292
79
- cobweb_new/base/decorators.py,sha256=8VDpANSIhxhrFnwgQzAxM_8ZyDXKdn3zTH0oZIXqRPE,1801
80
- cobweb_new/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
81
- cobweb_new/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
82
- cobweb_new/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
83
- cobweb_new/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
84
- cobweb_new/base/seed.py,sha256=KBVxVU4jMB6oiw8HPtu-nDUVUZ6jiTjzR917jTYGCZs,2977
85
- cobweb_new/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
86
- cobweb_new/crawlers/crawler-new.py,sha256=TAYMH2E3BTkjU6bFLlIMVfsR3cV2ggjA0moUpaXOe1Y,2762
87
- cobweb_new/crawlers/crawler.py,sha256=xiFNM0t69f5xlm59hPbO2MpqtdirVAUhD84-CLpyHPM,6349
88
- cobweb_new/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
89
- cobweb_new/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
90
- cobweb_new/db/redis_db.py,sha256=FvMzckJtmhwKhZqKoS23iXmJti5P2dnMVD5rJ__5LUw,5139
91
- cobweb_new/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
92
- cobweb_new/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
93
- cobweb_new/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
94
- cobweb_new/launchers/launcher.py,sha256=87P_2rRjzqyQXcG_EJ5Y6lMAk7saM8k1WBJcl9ANX6k,8309
95
- cobweb_new/launchers/launcher_air.py,sha256=KAk_M8F3029cXYe7m4nn3Nzyi89lbxJ2cqZjqW8iZ0E,2832
96
- cobweb_new/launchers/launcher_api.py,sha256=qPazoC7U-UmgebbiTkhl6f4yQmN34XMl6HawekhAhEo,5789
97
- cobweb_new/launchers/launcher_pro.py,sha256=QLjAiN8qMk4NklSY7ldBAR5OEEUB8sECuCCwRrFEC68,3414
98
- cobweb_new/launchers/tesss.py,sha256=pDe0wwhXbdjjmtfc7JLPfVOvs9yuc7Y8wLT1b1ueeEs,912
99
- cobweb_new/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
100
- cobweb_new/pipelines/pipeline.py,sha256=3IRHHqrHblZ_18Cps2bGK6iugDjs-dde7p3AbarfiN8,1958
101
- cobweb_new/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
102
- cobweb_new/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
103
- cobweb_new/utils/__init__.py,sha256=c9macpjc15hrCUCdzO5RR_sgK_B9kvJKreSGprZ1ld4,112
104
- cobweb_new/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
105
- cobweb_new/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
106
- cobweb_new/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
107
- cobweb_launcher-1.3.6.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
108
- cobweb_launcher-1.3.6.dist-info/METADATA,sha256=4q-fHNZ6nArBE6t6q0aTy8WoQYXFUYMW-ZnLZzO51yI,6509
109
- cobweb_launcher-1.3.6.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
110
- cobweb_launcher-1.3.6.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
111
- cobweb_launcher-1.3.6.dist-info/RECORD,,
cobweb_new/__init__.py DELETED
@@ -1,2 +0,0 @@
1
- from .launchers import LauncherAir, LauncherPro, LauncherApi
2
- from .constant import CrawlerModel
@@ -1,72 +0,0 @@
1
- import time
2
- from inspect import isgenerator
3
- from typing import Callable, Union
4
-
5
- from .common_queue import Queue
6
- from .response import Response
7
- from .request import Request
8
- from .item import BaseItem, ConsoleItem
9
- from .seed import Seed
10
-
11
- from .log import logger
12
- # from .decorators import decorator_oss_db, stop, pause
13
- import decorators
14
-
15
-
16
- class TaskQueue:
17
-
18
- SEED = Queue() # 添加任务种子队列
19
- TODO = Queue() # 任务种子队列
20
- REQUEST = Queue() # 请求队列
21
-
22
- DOWNLOAD = Queue() # 下载任务队列
23
- RESPONSE = Queue() # 响应队列
24
- DONE = Queue() # 下载完成队列
25
-
26
- UPLOAD = Queue() # 任务上传队列
27
-
28
- DELETE = Queue() # 任务删除队列
29
-
30
- def __init__(self, db):
31
- self.db = db
32
-
33
- @staticmethod
34
- def is_empty():
35
- total_length = TaskQueue.SEED.length
36
- total_length += TaskQueue.TODO.length
37
- total_length += TaskQueue.REQUEST.length
38
- total_length += TaskQueue.DOWNLOAD.length
39
- total_length += TaskQueue.RESPONSE.length
40
- total_length += TaskQueue.UPLOAD.length
41
- total_length += TaskQueue.DONE.length
42
- total_length += TaskQueue.DELETE.length
43
- return not bool(total_length)
44
- # @staticmethod
45
- # def distribute(it):
46
-
47
- @staticmethod
48
- def process_task(it: Union[Seed, Request, Response, BaseItem], crawler_func: Callable):
49
- try:
50
- iterators = crawler_func(it)
51
- if not isgenerator(iterators):
52
- raise TypeError(f"{crawler_func.__name__} function isn't a generator")
53
- for tk in iterators:
54
- if isinstance(tk, Request):
55
- TaskQueue.DOWNLOAD.push(tk)
56
- elif isinstance(tk, Response):
57
- TaskQueue.RESPONSE.push(tk)
58
- elif isinstance(tk, BaseItem):
59
- TaskQueue.UPLOAD.push(tk)
60
- elif isinstance(tk, Seed):
61
- TaskQueue.SEED.push(tk)
62
- except Exception as e:
63
- if not isinstance(it, BaseItem):
64
- it.seed.params.retry += 1
65
-
66
- time.sleep(5)
67
-
68
-
69
- class Distribute:
70
- """
71
- 数据分发器,将数据分发到各个队列中
72
- """
@@ -1,53 +0,0 @@
1
- import time
2
- from collections import deque
3
-
4
-
5
- class Queue:
6
-
7
- def __init__(self):
8
- self._queue = deque()
9
-
10
- @property
11
- def length(self) -> int:
12
- return len(self._queue)
13
-
14
- def push(self, data, left: bool = False, direct_insertion: bool = False):
15
- try:
16
- if not data:
17
- return None
18
- if not direct_insertion and any(isinstance(data, t) for t in (list, tuple)):
19
- self._queue.extendleft(data) if left else self._queue.extend(data)
20
- else:
21
- self._queue.appendleft(data) if left else self._queue.append(data)
22
- except AttributeError:
23
- pass
24
-
25
- def pop(self, left: bool = True):
26
- try:
27
- return self._queue.popleft() if left else self._queue.pop()
28
- except IndexError:
29
- return None
30
- except AttributeError:
31
- return None
32
-
33
- def clear(self):
34
- self._queue.clear()
35
-
36
- def get(self):
37
- try:
38
- yield self._queue.popleft()
39
- except IndexError:
40
- time.sleep(1)
41
- yield None
42
- except AttributeError:
43
- yield None
44
-
45
-
46
- class RedisQueue(Queue):
47
-
48
- def __init__(self, db):
49
- super().__init__()
50
- self.db = db
51
-
52
- def pop(self, left: bool = True):
53
- ...
@@ -1,72 +0,0 @@
1
- import time
2
- import threading
3
- from functools import wraps
4
-
5
-
6
- def add_thread(num=1):
7
- def decorator(func):
8
- @wraps(func)
9
- def wrapper(self, *args):
10
- for i in range(num):
11
- name = func.__name__ + "_" + str(i) if num > 1 else func.__name__
12
- self._threads.append(threading.Thread(name=name, target=func, args=(self,) + args))
13
- return wrapper
14
-
15
- return decorator
16
-
17
-
18
- def pause(func):
19
- @wraps(func)
20
- def wrapper(self, *args, **kwargs):
21
- while not self.pause.is_set():
22
- try:
23
- func(self, *args, **kwargs)
24
- except Exception as e:
25
- pass
26
- # logger.info(f"{func.__name__}: " + str(e))
27
- finally:
28
- time.sleep(0.1)
29
-
30
- return wrapper
31
-
32
-
33
- def stop(func):
34
- @wraps(func)
35
- def wrapper(self, *args, **kwargs):
36
- while not self.stop.is_set():
37
- try:
38
- func(self, *args, **kwargs)
39
- except Exception as e:
40
- # logger.info(f"{func.__name__}: " + str(e))
41
- pass
42
- finally:
43
- time.sleep(0.1)
44
-
45
- return wrapper
46
-
47
-
48
- def decorator_oss_db(exception, retries=3):
49
- def decorator(func):
50
- @wraps(func)
51
- def wrapper(callback_func, *args, **kwargs):
52
- result = None
53
- for i in range(retries):
54
- msg = None
55
- try:
56
- return func(callback_func, *args, **kwargs)
57
- except Exception as e:
58
- result = None
59
- msg = e
60
- finally:
61
- if result:
62
- return result
63
-
64
- if i >= 2 and msg:
65
- raise exception(msg)
66
-
67
- return wrapper
68
-
69
- return decorator
70
-
71
-
72
-
cobweb_new/base/item.py DELETED
@@ -1,46 +0,0 @@
1
- from .seed import Seed
2
- from collections import namedtuple
3
-
4
-
5
- class Item(type):
6
-
7
- def __new__(cls, name, bases, dct):
8
- new_class_instance = type.__new__(cls, name, bases, dct)
9
- if name != "BaseItem":
10
- table = getattr(new_class_instance, "__TABLE__")
11
- fields = getattr(new_class_instance, "__FIELDS__")
12
- new_class_instance.Data = namedtuple(table, fields)
13
- return new_class_instance
14
-
15
-
16
- class BaseItem(metaclass=Item):
17
-
18
- __TABLE__ = ""
19
- __FIELDS__ = ""
20
-
21
- def __init__(self, seed: Seed, **kwargs):
22
- self.seed = seed
23
-
24
- data = {}
25
- for key, value in kwargs.items():
26
- if key not in self.__FIELDS__:
27
- self.__setattr__(key, value)
28
- else:
29
- data[key] = value
30
-
31
- self.data = self.Data(**data)
32
-
33
- @property
34
- def to_dict(self):
35
- return self.data._asdict()
36
-
37
- @property
38
- def table(self):
39
- return self.Data.__name__
40
-
41
-
42
- class ConsoleItem(BaseItem):
43
-
44
- __TABLE__ = "console"
45
- __FIELDS__ = "data"
46
-
cobweb_new/base/log.py DELETED
@@ -1,94 +0,0 @@
1
- import logging
2
-
3
-
4
- class ColorCodes:
5
- # Text Reset
6
- RESET = "\033[0m"
7
-
8
- # Regular Colors
9
- RED = "\033[31m"
10
- GREEN = "\033[32m"
11
- YELLOW = "\033[33m"
12
- BLUE = "\033[34m"
13
- PURPLE = "\033[35m"
14
- CYAN = "\033[36m"
15
- WHITE = "\033[37m"
16
-
17
- # Bright Colors
18
- BRIGHT_RED = "\033[91m"
19
- BRIGHT_GREEN = "\033[92m"
20
- BRIGHT_YELLOW = "\033[93m"
21
- BRIGHT_BLUE = "\033[94m"
22
- BRIGHT_PURPLE = "\033[95m"
23
- BRIGHT_CYAN = "\033[96m"
24
- BRIGHT_WHITE = "\033[97m"
25
-
26
- # Background Colors
27
- BG_RED = "\033[41m"
28
- BG_GREEN = "\033[42m"
29
- BG_YELLOW = "\033[43m"
30
- BG_BLUE = "\033[44m"
31
- BG_PURPLE = "\033[45m"
32
- BG_CYAN = "\033[46m"
33
- BG_WHITE = "\033[47m"
34
-
35
- # Bright Background Colors
36
- BG_BRIGHT_RED = "\033[101m"
37
- BG_BRIGHT_GREEN = "\033[102m"
38
- BG_BRIGHT_YELLOW = "\033[103m"
39
- BG_BRIGHT_BLUE = "\033[104m"
40
- BG_BRIGHT_PURPLE = "\033[105m"
41
- BG_BRIGHT_CYAN = "\033[106m"
42
- BG_BRIGHT_WHITE = "\033[107m"
43
-
44
- # Text Styles
45
- BOLD = "\033[1m"
46
- DIM = "\033[2m"
47
- ITALIC = "\033[3m"
48
- UNDERLINE = "\033[4m"
49
- BLINK = "\033[5m"
50
- REVERSE = "\033[7m"
51
- HIDDEN = "\033[8m"
52
-
53
-
54
- class Log:
55
- logging.getLogger('oss2.api').setLevel(logging.WARNING)
56
- logging.basicConfig(
57
- level=logging.INFO,
58
- format=f'%(asctime)s %(name)s [%(filename)s:%(lineno)d %(funcName)s]'
59
- f' %(levelname)s -> %(message)s'
60
- )
61
- log = logging.getLogger()
62
-
63
- def set_log_name(self, name):
64
- self.__class__.log = logging.getLogger(name)
65
-
66
- @property
67
- def debug(self):
68
- return self.__class__.log.debug
69
-
70
- @property
71
- def info(self):
72
- return self.__class__.log.info
73
-
74
- @property
75
- def warning(self):
76
- return self.__class__.log.warning
77
-
78
- @property
79
- def exception(self):
80
- return self.__class__.log.exception
81
-
82
- @property
83
- def error(self):
84
- return self.__class__.log.error
85
-
86
- @property
87
- def critical(self):
88
- return self.__class__.log.critical
89
-
90
-
91
- logger = Log()
92
-
93
-
94
-