cobweb-launcher 0.1.8__py3-none-any.whl → 1.2.41__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. cobweb/__init__.py +2 -11
  2. cobweb/base/__init__.py +9 -0
  3. cobweb/base/basic.py +297 -0
  4. cobweb/base/common_queue.py +30 -0
  5. cobweb/base/decorators.py +40 -0
  6. cobweb/base/dotting.py +35 -0
  7. cobweb/base/item.py +46 -0
  8. cobweb/{log.py → base/log.py} +4 -6
  9. cobweb/base/request.py +82 -0
  10. cobweb/base/response.py +23 -0
  11. cobweb/base/seed.py +114 -0
  12. cobweb/constant.py +94 -0
  13. cobweb/crawlers/__init__.py +1 -0
  14. cobweb/crawlers/base_crawler.py +144 -0
  15. cobweb/crawlers/crawler.py +209 -0
  16. cobweb/crawlers/file_crawler.py +98 -0
  17. cobweb/db/__init__.py +2 -2
  18. cobweb/db/api_db.py +82 -0
  19. cobweb/db/redis_db.py +125 -218
  20. cobweb/exceptions/__init__.py +1 -0
  21. cobweb/exceptions/oss_db_exception.py +28 -0
  22. cobweb/launchers/__init__.py +3 -0
  23. cobweb/launchers/launcher.py +235 -0
  24. cobweb/launchers/launcher_air.py +88 -0
  25. cobweb/launchers/launcher_api.py +209 -0
  26. cobweb/launchers/launcher_pro.py +208 -0
  27. cobweb/pipelines/__init__.py +3 -0
  28. cobweb/pipelines/pipeline.py +69 -0
  29. cobweb/pipelines/pipeline_console.py +22 -0
  30. cobweb/pipelines/pipeline_loghub.py +34 -0
  31. cobweb/schedulers/__init__.py +3 -0
  32. cobweb/schedulers/scheduler_api.py +72 -0
  33. cobweb/schedulers/scheduler_redis.py +72 -0
  34. cobweb/setting.py +67 -6
  35. cobweb/utils/__init__.py +5 -0
  36. cobweb/utils/bloom.py +58 -0
  37. cobweb/utils/dotting.py +32 -0
  38. cobweb/utils/oss.py +94 -0
  39. cobweb/utils/tools.py +42 -0
  40. cobweb_launcher-1.2.41.dist-info/METADATA +205 -0
  41. cobweb_launcher-1.2.41.dist-info/RECORD +44 -0
  42. {cobweb_launcher-0.1.8.dist-info → cobweb_launcher-1.2.41.dist-info}/WHEEL +1 -1
  43. cobweb/bbb.py +0 -191
  44. cobweb/db/oss_db.py +0 -127
  45. cobweb/db/scheduler/__init__.py +0 -0
  46. cobweb/db/scheduler/default.py +0 -8
  47. cobweb/db/scheduler/textfile.py +0 -27
  48. cobweb/db/storer/__init__.py +0 -0
  49. cobweb/db/storer/console.py +0 -9
  50. cobweb/db/storer/loghub.py +0 -54
  51. cobweb/db/storer/redis.py +0 -15
  52. cobweb/db/storer/textfile.py +0 -15
  53. cobweb/decorators.py +0 -16
  54. cobweb/distributed/__init__.py +0 -0
  55. cobweb/distributed/launcher.py +0 -243
  56. cobweb/distributed/models.py +0 -143
  57. cobweb/interface.py +0 -34
  58. cobweb/single/__init__.py +0 -0
  59. cobweb/single/launcher.py +0 -231
  60. cobweb/single/models.py +0 -134
  61. cobweb/single/nest.py +0 -153
  62. cobweb/task.py +0 -50
  63. cobweb/utils.py +0 -90
  64. cobweb_launcher-0.1.8.dist-info/METADATA +0 -45
  65. cobweb_launcher-0.1.8.dist-info/RECORD +0 -31
  66. {cobweb_launcher-0.1.8.dist-info → cobweb_launcher-1.2.41.dist-info}/LICENSE +0 -0
  67. {cobweb_launcher-0.1.8.dist-info → cobweb_launcher-1.2.41.dist-info}/top_level.txt +0 -0
cobweb/utils/oss.py ADDED
@@ -0,0 +1,94 @@
1
+ from typing import List
2
+ from cobweb import setting
3
+ from requests import Response
4
+ from oss2 import Auth, Bucket, models, PartIterator
5
+ from cobweb.exceptions import oss_db_exception
6
+ from cobweb.base.decorators import decorator_oss_db
7
+
8
+
9
+ class OssUtil:
10
+
11
+ def __init__(
12
+ self,
13
+ bucket=None,
14
+ endpoint=None,
15
+ access_key=None,
16
+ secret_key=None,
17
+ chunk_size=None,
18
+ min_upload_size=None,
19
+ **kwargs
20
+ ):
21
+ self.bucket = bucket or setting.OSS_BUCKET
22
+ self.endpoint = endpoint or setting.OSS_ENDPOINT
23
+ self.chunk_size = int(chunk_size or setting.OSS_CHUNK_SIZE)
24
+ self.min_upload_size = int(min_upload_size or setting.OSS_MIN_UPLOAD_SIZE)
25
+
26
+ self._auth = Auth(
27
+ access_key_id=access_key or setting.OSS_ACCESS_KEY,
28
+ access_key_secret=secret_key or setting.OSS_SECRET_KEY
29
+ )
30
+ self._client = Bucket(
31
+ auth=self._auth,
32
+ endpoint=self.endpoint,
33
+ bucket_name=self.bucket,
34
+ **kwargs
35
+ )
36
+
37
+ def exists(self, key: str) -> bool:
38
+ return self._client.object_exists(key)
39
+
40
+ def head(self, key: str) -> models.HeadObjectResult:
41
+ return self._client.head_object(key)
42
+
43
+ @decorator_oss_db(exception=oss_db_exception.OssDBInitPartError)
44
+ def init_part(self, key) -> models.InitMultipartUploadResult:
45
+ """初始化分片上传"""
46
+ return self._client.init_multipart_upload(key)
47
+
48
+ @decorator_oss_db(exception=oss_db_exception.OssDBPutObjError)
49
+ def put(self, key, data) -> models.PutObjectResult:
50
+ """文件上传"""
51
+ return self._client.put_object(key, data)
52
+
53
+ @decorator_oss_db(exception=oss_db_exception.OssDBPutPartError)
54
+ def put_part(self, key, upload_id, position, data) -> models.PutObjectResult:
55
+ """分片上传"""
56
+ return self._client.upload_part(key, upload_id, position, data)
57
+
58
+ def list_part(self, key, upload_id): # -> List[models.ListPartsResult]:
59
+ """获取分片列表"""
60
+ return [part_info for part_info in PartIterator(self._client, key, upload_id)]
61
+
62
+ @decorator_oss_db(exception=oss_db_exception.OssDBMergeError)
63
+ def merge(self, key, upload_id, parts=None) -> models.PutObjectResult:
64
+ """合并分片"""
65
+ headers = None if parts else {"x-oss-complete-all": "yes"}
66
+ return self._client.complete_multipart_upload(key, upload_id, parts, headers=headers)
67
+
68
+ @decorator_oss_db(exception=oss_db_exception.OssDBAppendObjError)
69
+ def append(self, key, position, data) -> models.AppendObjectResult:
70
+ """追加上传"""
71
+ return self._client.append_object(key, position, data)
72
+
73
+ def iter_data(self, data, chunk_size=None):
74
+ chunk_size = chunk_size or self.chunk_size
75
+ if isinstance(data, Response):
76
+ for part_data in data.iter_content(chunk_size):
77
+ yield part_data
78
+ if isinstance(data, bytes):
79
+ for i in range(0, len(data), chunk_size):
80
+ yield data[i:i + chunk_size]
81
+
82
+ def assemble(self, ready_data, data, chunk_size=None):
83
+ upload_data = b""
84
+ ready_data = ready_data + data
85
+ chunk_size = chunk_size or self.chunk_size
86
+ if len(ready_data) >= chunk_size:
87
+ upload_data = ready_data[:chunk_size]
88
+ ready_data = ready_data[chunk_size:]
89
+ return ready_data, upload_data
90
+
91
+ def content_length(self, key: str) -> int:
92
+ head = self.head(key)
93
+ return head.content_length
94
+
cobweb/utils/tools.py ADDED
@@ -0,0 +1,42 @@
1
+ import re
2
+ import hashlib
3
+ from typing import Union
4
+ from importlib import import_module
5
+
6
+
7
+ def md5(text: Union[str, bytes]) -> str:
8
+ if isinstance(text, str):
9
+ text = text.encode('utf-8')
10
+ return hashlib.md5(text).hexdigest()
11
+
12
+
13
+ def build_path(site, url, file_type):
14
+ return f"{site}/{md5(url)}.{file_type}"
15
+
16
+
17
+ def format_size(content_length: int) -> str:
18
+ units = ["KB", "MB", "GB", "TB"]
19
+ for i in range(4):
20
+ num = content_length / (1024 ** (i + 1))
21
+ if num < 1024:
22
+ return f"{round(num, 2)} {units[i]}"
23
+
24
+
25
+ def dynamic_load_class(model_info):
26
+ if isinstance(model_info, str):
27
+ if "import" in model_info:
28
+ model_path, class_name = re.search(
29
+ r"from (.*?) import (.*?)$", model_info
30
+ ).groups()
31
+ model = import_module(model_path)
32
+ class_object = getattr(model, class_name)
33
+ else:
34
+ model_path, class_name = model_info.rsplit(".", 1)
35
+ model = import_module(model_path)
36
+ class_object = getattr(model, class_name)
37
+ return class_object
38
+ raise TypeError()
39
+
40
+
41
+ # def download_log_info(item:dict) -> str:
42
+ # return "\n".join([" " * 12 + f"{str(k).ljust(14)}: {str(v)}" for k, v in item.items()])
@@ -0,0 +1,205 @@
1
+ Metadata-Version: 2.1
2
+ Name: cobweb-launcher
3
+ Version: 1.2.41
4
+ Summary: spider_hole
5
+ Home-page: https://github.com/Juannie-PP/cobweb
6
+ Author: Juannie-PP
7
+ Author-email: 2604868278@qq.com
8
+ License: MIT
9
+ Keywords: cobweb-launcher, cobweb
10
+ Platform: UNKNOWN
11
+ Classifier: Programming Language :: Python :: 3
12
+ Requires-Python: >=3.7
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: requests (>=2.19.1)
16
+ Requires-Dist: oss2 (>=2.18.1)
17
+ Requires-Dist: redis (>=4.4.4)
18
+ Requires-Dist: aliyun-log-python-sdk
19
+ Requires-Dist: mmh3
20
+
21
+ # cobweb
22
+ cobweb是一个基于python的分布式爬虫调度框架,目前支持分布式爬虫,单机爬虫,支持自定义数据库,支持自定义数据存储,支持自定义数据处理等操作。
23
+
24
+ cobweb主要由3个模块和一个配置文件组成:Launcher启动器、Crawler采集器、Pipeline存储和setting配置文件。
25
+ 1. Launcher启动器:用于启动爬虫任务,控制爬虫任务的执行流程,以及数据存储和数据处理。
26
+ 框架提供两种启动器模式:LauncherAir、LauncherPro,分别对应单机爬虫模式和分布式调度模式。
27
+ 2. Crawler采集器:用于控制采集流程、数据下载和数据处理。
28
+ 框架提供了基础的采集器,用于控制采集流程、数据下载和数据处理,用户也可在创建任务时自定义请求、下载和解析方法,具体看使用方法介绍。
29
+ 3. Pipeline存储:用于存储采集到的数据,支持自定义数据存储和数据处理。框架提供了Console和Loghub两种存储方式,用户也可继承Pipeline抽象类自定义存储方式。
30
+ 4. setting配置文件:用于配置采集器、存储器、队列长度、采集线程数等参数,框架提供了默认配置,用户也可自定义配置。
31
+ ## 安装
32
+ ```
33
+ pip3 install --upgrade cobweb-launcher
34
+ ```
35
+ ## 使用方法介绍
36
+ ### 1. 任务创建
37
+ - LauncherAir任务创建
38
+ ```python
39
+ from cobweb import LauncherAir
40
+
41
+ # 创建启动器
42
+ app = LauncherAir(task="test", project="test")
43
+
44
+ # 设置采集种子
45
+ app.SEEDS = [{
46
+ "url": "https://www.baidu.com"
47
+ }]
48
+ ...
49
+ # 启动任务
50
+ app.start()
51
+ ```
52
+ - LauncherPro任务创建
53
+ LauncherPro依赖redis实现分布式调度,使用LauncherPro启动器需要完成环境变量的配置或自定义setting文件中的redis配置,如何配置查看`2. 自定义配置文件参数`
54
+ ```python
55
+ from cobweb import LauncherPro
56
+
57
+ # 创建启动器
58
+ app = LauncherPro(
59
+ task="test",
60
+ project="test"
61
+ )
62
+ ...
63
+ # 启动任务
64
+ app.start()
65
+ ```
66
+ ### 2. 自定义配置文件参数
67
+ - 通过自定义setting文件,配置文件导入字符串方式
68
+ > 默认配置文件:import cobweb.setting
69
+ > 不推荐!!!目前有bug,随缘使用...
70
+ 例如:同级目录下自定义创建了setting.py文件。
71
+ ```python
72
+ from cobweb import LauncherAir
73
+
74
+ app = LauncherAir(
75
+ task="test",
76
+ project="test",
77
+ setting="import setting"
78
+ )
79
+
80
+ ...
81
+
82
+ app.start()
83
+ ```
84
+ - 自定义修改setting中对象值
85
+ ```python
86
+ from cobweb import LauncherPro
87
+
88
+ # 创建启动器
89
+ app = LauncherPro(
90
+ task="test",
91
+ project="test",
92
+ REDIS_CONFIG = {
93
+ "host": ...,
94
+ "password":...,
95
+ "port": ...,
96
+ "db": ...
97
+ }
98
+ )
99
+ ...
100
+ # 启动任务
101
+ app.start()
102
+ ```
103
+ ### 3. 自定义请求
104
+ `@app.request`使用装饰器封装自定义请求方法,作用于发生请求前的操作,返回Request对象或继承于BaseItem对象,用于控制请求参数。
105
+ ```python
106
+ from typing import Union
107
+ from cobweb import LauncherAir
108
+ from cobweb.base import Seed, Request, BaseItem
109
+
110
+ app = LauncherAir(
111
+ task="test",
112
+ project="test"
113
+ )
114
+
115
+ ...
116
+
117
+ @app.request
118
+ def request(seed: Seed) -> Union[Request, BaseItem]:
119
+ # 可自定义headers,代理,构造请求参数等操作
120
+ proxies = {"http": ..., "https": ...}
121
+ yield Request(seed.url, seed, ..., proxies=proxies, timeout=15)
122
+ # yield xxxItem(seed, ...) # 跳过请求和解析直接进入数据存储流程
123
+
124
+ ...
125
+
126
+ app.start()
127
+ ```
128
+ > 默认请求方法
129
+ > def request(seed: Seed) -> Union[Request, BaseItem]:
130
+ > &nbsp;&nbsp;&nbsp;&nbsp;yield Request(seed.url, seed, timeout=5)
131
+ ### 4. 自定义下载
132
+ `@app.download`使用装饰器封装自定义下载方法,作用于发生请求时的操作,返回Response对象或继承于BaseItem对象,用于控制请求参数。
133
+ ```python
134
+ from typing import Union
135
+ from cobweb import LauncherAir
136
+ from cobweb.base import Request, Response, BaseItem
137
+
138
+ app = LauncherAir(
139
+ task="test",
140
+ project="test"
141
+ )
142
+
143
+ ...
144
+
145
+ @app.download
146
+ def download(item: Request) -> Union[BaseItem, Response]:
147
+ ...
148
+ response = ...
149
+ ...
150
+ yield Response(item.seed, response, ...) # 返回Response对象,进行解析
151
+ # yield xxxItem(seed, ...) # 跳过请求和解析直接进入数据存储流程
152
+
153
+ ...
154
+
155
+ app.start()
156
+ ```
157
+ > 默认下载方法
158
+ > def download(item: Request) -> Union[Seed, BaseItem, Response, str]:
159
+ > &nbsp;&nbsp;&nbsp;&nbsp;response = item.download()
160
+ > &nbsp;&nbsp;&nbsp;&nbsp;yield Response(item.seed, response, **item.to_dict)
161
+ ### 5. 自定义解析
162
+ 自定义解析需要由一个存储数据类和解析方法组成。存储数据类继承于BaseItem的对象,规定存储表名及字段,
163
+ 解析方法返回继承于BaseItem的对象,yield返回进行控制数据存储流程。
164
+ ```python
165
+ from typing import Union
166
+ from cobweb import LauncherAir
167
+ from cobweb.base import Seed, Response, BaseItem
168
+
169
+ class TestItem(BaseItem):
170
+ __TABLE__ = "test_data" # 表名
171
+ __FIELDS__ = "field1, field2, field3" # 字段名
172
+
173
+ app = LauncherAir(
174
+ task="test",
175
+ project="test"
176
+ )
177
+
178
+ ...
179
+
180
+ @app.parse
181
+ def parse(item: Response) -> Union[Seed, BaseItem]:
182
+ ...
183
+ yield TestItem(item.seed, field1=..., field2=..., field3=...)
184
+ # yield Seed(...) # 构造新种子推送至消费队列
185
+
186
+ ...
187
+
188
+ app.start()
189
+ ```
190
+ > 默认解析方法
191
+ > def parse(item: Request) -> Union[Seed, BaseItem]:
192
+ > &nbsp;&nbsp;&nbsp;&nbsp;upload_item = item.to_dict
193
+ > &nbsp;&nbsp;&nbsp;&nbsp;upload_item["text"] = item.response.text
194
+ > &nbsp;&nbsp;&nbsp;&nbsp;yield ConsoleItem(item.seed, data=json.dumps(upload_item, ensure_ascii=False))
195
+ ## need deal
196
+ - 队列优化完善,使用queue的机制wait()同步各模块执行?
197
+ - 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
198
+ - 去重过滤(布隆过滤器等)
199
+ - 单机防丢失
200
+ - excel、mysql、redis数据完善
201
+
202
+ > 未更新流程图!!!
203
+ ![img.png](https://image-luyuan.oss-cn-hangzhou.aliyuncs.com/image/D2388CDC-B9E5-4CE4-9F2C-7D173763B6A8.png)
204
+
205
+
@@ -0,0 +1,44 @@
1
+ cobweb/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
2
+ cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
3
+ cobweb/setting.py,sha256=47HZsw40HLpsmOmvij1lyQALPQQCN_tWlKZ0wbn2MtM,2216
4
+ cobweb/base/__init__.py,sha256=4gwWWQ0Q8cYG9cD7Lwf4XMqRGc5M_mapS3IczR6zeCE,222
5
+ cobweb/base/basic.py,sha256=Z56SSLB3I2IGHWCCcSy0Qbfzj8Qbg_po3gP32q1jh4k,7741
6
+ cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
7
+ cobweb/base/decorators.py,sha256=wDCaQ94aAZGxks9Ljc0aXq6omDXT1_yzFy83ZW6VbVI,930
8
+ cobweb/base/dotting.py,sha256=lfFXXqnVP__hxlW3qH5Bnuq69KtnFaQLbcz1M8e2Ajg,1239
9
+ cobweb/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
10
+ cobweb/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
11
+ cobweb/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
12
+ cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
13
+ cobweb/base/seed.py,sha256=Uz_VBRlAxNYQcFHk3tsZFMlU96yPOedHaWGTvk-zKd8,2908
14
+ cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
15
+ cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
16
+ cobweb/crawlers/crawler.py,sha256=UojWdymPCwit0MOkqHsYRoe4hXyHdZhgh7-MBPfrhQo,8373
17
+ cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
18
+ cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
19
+ cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
20
+ cobweb/db/redis_db.py,sha256=fumNZJiio-uQqRcSrymx8eJ1PqsdOwITe_Y-9JOXxrQ,4298
21
+ cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
22
+ cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
23
+ cobweb/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
24
+ cobweb/launchers/launcher.py,sha256=sPts-xlgxoeIfl1fn1XR2XVZxLzt7He9xrYDfTHRAGo,7029
25
+ cobweb/launchers/launcher_air.py,sha256=KAk_M8F3029cXYe7m4nn3Nzyi89lbxJ2cqZjqW8iZ0E,2832
26
+ cobweb/launchers/launcher_api.py,sha256=YFqCTRvKn6icBLWTR1VxkU0WEIte2F7fv_LgPkifqdo,7885
27
+ cobweb/launchers/launcher_pro.py,sha256=B5FdxvuENRL3XrMl74ENdP1uNgnZOaYCUUfBfM0t3io,7842
28
+ cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
29
+ cobweb/pipelines/pipeline.py,sha256=4TJLX0sUHRxYndF5A4Vs5btUGI-wigkOcFvhTW1hLXI,2009
30
+ cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
31
+ cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
32
+ cobweb/schedulers/__init__.py,sha256=y7Lv_7b0zfTl0OhIONb_8u1K1C9gVlBA-xz_XG_kI9g,85
33
+ cobweb/schedulers/scheduler_api.py,sha256=pFEdS1H4zuzxwMhCV-G7CoLz-rEOPv4EVo3xZUXTyDo,2199
34
+ cobweb/schedulers/scheduler_redis.py,sha256=E5fjc3nNld8GbUhUGT7uY4smRejj2J2ZIzp2g6lhxFM,2205
35
+ cobweb/utils/__init__.py,sha256=Ev2LZZ1-S56iQYDqFZrqadizEv4Gk8Of-DraH-_WnKY,109
36
+ cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
37
+ cobweb/utils/dotting.py,sha256=PgsWdM-724Jy-MZWUsaygNWV-huqLMmdLgop7gaBxlo,872
38
+ cobweb/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
39
+ cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
40
+ cobweb_launcher-1.2.41.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
41
+ cobweb_launcher-1.2.41.dist-info/METADATA,sha256=ZuTN2RXJGQB6qWfjgTtcvwoVrjxvS6-ho0z7V9BTR8A,6510
42
+ cobweb_launcher-1.2.41.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
43
+ cobweb_launcher-1.2.41.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
44
+ cobweb_launcher-1.2.41.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.40.0)
2
+ Generator: bdist_wheel (0.37.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
cobweb/bbb.py DELETED
@@ -1,191 +0,0 @@
1
- # from typing import Iterable
2
- import json
3
- import time
4
- import hashlib
5
- from .log import log
6
- from .utils import struct_queue_name
7
- from collections import deque, namedtuple
8
-
9
-
10
- class Queue:
11
-
12
- def __init__(self):
13
- self._queue = deque()
14
-
15
- @property
16
- def length(self) -> int:
17
- return len(self._queue)
18
- #
19
- # @property
20
- # def queue_names(self):
21
- # return tuple(self.__dict__.keys())
22
- #
23
- # @property
24
- # def used_memory(self):
25
- # return asizeof.asizeof(self)
26
-
27
- # def create_queue(self, queue_name: str):
28
- # self.__setattr__(queue_name, deque())
29
-
30
- # def push_seed(self, seed):
31
- # self.push("_seed_queue", seed)
32
-
33
- # def pop_seed(self):
34
- # return self.pop("_seed_queue")
35
-
36
- def push(self, data, left: bool = False, direct_insertion: bool = False):
37
- try:
38
- if not data:
39
- return None
40
- if direct_insertion or isinstance(data, Seed):
41
- self._queue.appendleft(data) if left else self._queue.append(data)
42
- elif any(isinstance(data, t) for t in (list, tuple)):
43
- self._queue.extendleft(data) if left else self._queue.extend(data)
44
- except AttributeError as e:
45
- log.exception(e)
46
-
47
- def pop(self, left: bool = True):
48
- try:
49
- return self._queue.popleft() if left else self._queue.pop()
50
- except IndexError:
51
- return None
52
- except AttributeError as e:
53
- log.exception(e)
54
- return None
55
-
56
-
57
- class Seed:
58
-
59
- def __init__(
60
- self,
61
- seed_info=None,
62
- priority=300,
63
- version=0,
64
- retry=0,
65
- **kwargs
66
- ):
67
- if seed_info:
68
- if any(isinstance(seed_info, t) for t in (str, bytes)):
69
- try:
70
- item = json.loads(seed_info)
71
- for k, v in item.items():
72
- self.__setattr__(k, v)
73
- except json.JSONDecodeError:
74
- self.__setattr__("url", seed_info)
75
- elif isinstance(seed_info, dict):
76
- for k, v in seed_info.items():
77
- self.__setattr__(k, v)
78
- else:
79
- raise TypeError(Exception(
80
- f"seed type error, "
81
- f"must be str or dict! "
82
- f"seed_info: {seed_info}"
83
- ))
84
- for k, v in kwargs.items():
85
- self.__setattr__(k, v)
86
- if not getattr(self, "_priority"):
87
- self._priority = min(max(1, int(priority)), 999)
88
- if not getattr(self, "_version"):
89
- self._version = int(version) or int(time.time())
90
- if not getattr(self, "_retry"):
91
- self._retry = retry
92
- if not getattr(self, "sid"):
93
- self.init_id()
94
-
95
- def init_id(self):
96
- item_string = self.format_seed
97
- seed_id = hashlib.md5(item_string.encode()).hexdigest()
98
- self.__setattr__("sid", seed_id)
99
-
100
- def __setitem__(self, key, value):
101
- setattr(self, key, value)
102
-
103
- def __getitem__(self, item):
104
- return getattr(self, item)
105
-
106
- def __getattr__(self, name):
107
- return None
108
-
109
- def __str__(self):
110
- return json.dumps(self.__dict__, ensure_ascii=False)
111
-
112
- def __repr__(self):
113
- chars = [f"{k}={v}" for k, v in self.__dict__.items()]
114
- return f'{self.__class__.__name__}({", ".join(chars)})'
115
-
116
- @property
117
- def dict_seed(self):
118
- seed = self.__dict__.copy()
119
- del seed["_priority"]
120
- del seed["_version"]
121
- del seed["_retry"]
122
- return seed
123
-
124
- @property
125
- def format_seed(self):
126
- return json.dumps(self.dict_seed, ensure_ascii=False)
127
-
128
-
129
- class DBItem:
130
-
131
- def __init__(self, **kwargs):
132
- self.__setattr__("_index", 0, True)
133
- for table in self.__class__.__table__:
134
- if set(kwargs.keys()) == set(table._fields):
135
- break
136
- self._index += 1
137
-
138
- if self._index > len(self.__class__.__table__):
139
- raise Exception()
140
-
141
- table = self.__class__.__table__[self._index]
142
- self.__setattr__("struct_data", table(**kwargs), True)
143
- self.__setattr__("db_name", self.__class__.__name__, True)
144
- self.__setattr__("table_name", self.struct_data.__class__.__name__, True)
145
-
146
- @classmethod
147
- def init_item(cls, table_name, fields):
148
- queue_name = struct_queue_name(cls.__name__, table_name)
149
- if getattr(cls, queue_name, None) is None:
150
- setattr(cls, queue_name, Queue())
151
-
152
- if getattr(cls, "__table__", None) is None:
153
- cls.__table__ = []
154
-
155
- table = namedtuple(table_name, fields)
156
-
157
- if table in getattr(cls, "__table__"):
158
- raise Exception()
159
- getattr(cls, "__table__").append(table)
160
-
161
- def queue(self):
162
- queue_name = struct_queue_name(self.db_name, self.table_name)
163
- return getattr(self.__class__, queue_name)
164
-
165
- def __setitem__(self, key, value):
166
- self.__setattr__(key, value)
167
-
168
- def __getitem__(self, item):
169
- return self.struct_data[item]
170
-
171
- def __getattr__(self, name):
172
- return None
173
-
174
- def __setattr__(self, key, value, init=None):
175
- if init:
176
- super().__setattr__(key, value)
177
- elif not getattr(self, "struct_data"):
178
- raise Exception(f"no struct_data")
179
- else:
180
- self.__setattr__(
181
- "struct_data",
182
- self.struct_data._replace(**{key: value}),
183
- init=True
184
- )
185
-
186
- def __str__(self):
187
- return json.dumps(self.struct_data._asdict(), ensure_ascii=False)
188
-
189
- def __repr__(self):
190
- return f'{self.__class__.__name__}:{self.struct_data}'
191
-