cobweb-launcher 0.1.9__tar.gz → 1.2.42__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. cobweb-launcher-1.2.42/PKG-INFO +200 -0
  2. cobweb-launcher-1.2.42/README.md +183 -0
  3. cobweb-launcher-1.2.42/cobweb/__init__.py +2 -0
  4. cobweb-launcher-1.2.42/cobweb/base/__init__.py +9 -0
  5. cobweb-launcher-1.2.42/cobweb/base/common_queue.py +30 -0
  6. cobweb-launcher-1.2.42/cobweb/base/decorators.py +40 -0
  7. cobweb-launcher-1.2.42/cobweb/base/item.py +46 -0
  8. {cobweb-launcher-0.1.9/cobweb → cobweb-launcher-1.2.42/cobweb/base}/log.py +4 -6
  9. cobweb-launcher-1.2.42/cobweb/base/request.py +82 -0
  10. cobweb-launcher-1.2.42/cobweb/base/response.py +23 -0
  11. cobweb-launcher-1.2.42/cobweb/base/seed.py +114 -0
  12. cobweb-launcher-1.2.42/cobweb/constant.py +94 -0
  13. cobweb-launcher-1.2.42/cobweb/crawlers/__init__.py +1 -0
  14. cobweb-launcher-1.2.42/cobweb/crawlers/base_crawler.py +144 -0
  15. cobweb-launcher-1.2.42/cobweb/crawlers/crawler.py +212 -0
  16. cobweb-launcher-1.2.42/cobweb/crawlers/file_crawler.py +98 -0
  17. cobweb-launcher-1.2.42/cobweb/db/__init__.py +2 -0
  18. cobweb-launcher-1.2.42/cobweb/db/api_db.py +82 -0
  19. cobweb-launcher-1.2.42/cobweb/db/redis_db.py +130 -0
  20. cobweb-launcher-1.2.42/cobweb/exceptions/__init__.py +1 -0
  21. cobweb-launcher-1.2.42/cobweb/exceptions/oss_db_exception.py +28 -0
  22. cobweb-launcher-1.2.42/cobweb/launchers/__init__.py +3 -0
  23. cobweb-launcher-1.2.42/cobweb/launchers/launcher.py +235 -0
  24. cobweb-launcher-1.2.42/cobweb/launchers/launcher_air.py +88 -0
  25. cobweb-launcher-1.2.42/cobweb/launchers/launcher_api.py +209 -0
  26. cobweb-launcher-1.2.42/cobweb/launchers/launcher_pro.py +208 -0
  27. cobweb-launcher-1.2.42/cobweb/pipelines/__init__.py +3 -0
  28. cobweb-launcher-1.2.42/cobweb/pipelines/pipeline.py +69 -0
  29. cobweb-launcher-1.2.42/cobweb/pipelines/pipeline_console.py +22 -0
  30. cobweb-launcher-1.2.42/cobweb/pipelines/pipeline_loghub.py +34 -0
  31. cobweb-launcher-1.2.42/cobweb/setting.py +74 -0
  32. cobweb-launcher-1.2.42/cobweb/utils/__init__.py +5 -0
  33. cobweb-launcher-1.2.42/cobweb/utils/bloom.py +58 -0
  34. cobweb-launcher-1.2.42/cobweb/utils/dotting.py +32 -0
  35. cobweb-launcher-1.2.42/cobweb/utils/oss.py +94 -0
  36. cobweb-launcher-1.2.42/cobweb/utils/tools.py +42 -0
  37. cobweb-launcher-1.2.42/cobweb_launcher.egg-info/PKG-INFO +200 -0
  38. cobweb-launcher-1.2.42/cobweb_launcher.egg-info/SOURCES.txt +43 -0
  39. {cobweb-launcher-0.1.9 → cobweb-launcher-1.2.42}/cobweb_launcher.egg-info/requires.txt +1 -0
  40. {cobweb-launcher-0.1.9 → cobweb-launcher-1.2.42}/setup.py +3 -3
  41. cobweb-launcher-1.2.42/test/test.py +29 -0
  42. cobweb-launcher-0.1.9/PKG-INFO +0 -45
  43. cobweb-launcher-0.1.9/README.md +0 -27
  44. cobweb-launcher-0.1.9/cobweb/__init__.py +0 -7
  45. cobweb-launcher-0.1.9/cobweb/bbb.py +0 -191
  46. cobweb-launcher-0.1.9/cobweb/constant.py +0 -24
  47. cobweb-launcher-0.1.9/cobweb/db/__init__.py +0 -3
  48. cobweb-launcher-0.1.9/cobweb/db/oss_db.py +0 -127
  49. cobweb-launcher-0.1.9/cobweb/db/redis_db.py +0 -213
  50. cobweb-launcher-0.1.9/cobweb/db/scheduler/__init__.py +0 -1
  51. cobweb-launcher-0.1.9/cobweb/db/scheduler/default.py +0 -8
  52. cobweb-launcher-0.1.9/cobweb/db/scheduler/textfile.py +0 -27
  53. cobweb-launcher-0.1.9/cobweb/db/storer/__init__.py +0 -1
  54. cobweb-launcher-0.1.9/cobweb/db/storer/console.py +0 -9
  55. cobweb-launcher-0.1.9/cobweb/db/storer/loghub.py +0 -54
  56. cobweb-launcher-0.1.9/cobweb/db/storer/textfile.py +0 -15
  57. cobweb-launcher-0.1.9/cobweb/decorators.py +0 -16
  58. cobweb-launcher-0.1.9/cobweb/equip/__init__.py +0 -8
  59. cobweb-launcher-0.1.9/cobweb/equip/distributed/__init__.py +0 -0
  60. cobweb-launcher-0.1.9/cobweb/equip/distributed/launcher.py +0 -219
  61. cobweb-launcher-0.1.9/cobweb/equip/distributed/models.py +0 -152
  62. cobweb-launcher-0.1.9/cobweb/equip/single/__init__.py +0 -0
  63. cobweb-launcher-0.1.9/cobweb/equip/single/launcher.py +0 -200
  64. cobweb-launcher-0.1.9/cobweb/equip/single/models.py +0 -144
  65. cobweb-launcher-0.1.9/cobweb/interface.py +0 -34
  66. cobweb-launcher-0.1.9/cobweb/task.py +0 -57
  67. cobweb-launcher-0.1.9/cobweb/utils.py +0 -90
  68. cobweb-launcher-0.1.9/cobweb_launcher.egg-info/PKG-INFO +0 -45
  69. cobweb-launcher-0.1.9/cobweb_launcher.egg-info/SOURCES.txt +0 -33
  70. {cobweb-launcher-0.1.9 → cobweb-launcher-1.2.42}/LICENSE +0 -0
  71. {cobweb-launcher-0.1.9 → cobweb-launcher-1.2.42}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  72. {cobweb-launcher-0.1.9 → cobweb-launcher-1.2.42}/cobweb_launcher.egg-info/top_level.txt +0 -0
  73. {cobweb-launcher-0.1.9 → cobweb-launcher-1.2.42}/setup.cfg +0 -0
@@ -0,0 +1,200 @@
1
+ Metadata-Version: 2.1
2
+ Name: cobweb-launcher
3
+ Version: 1.2.42
4
+ Summary: spider_hole
5
+ Home-page: https://github.com/Juannie-PP/cobweb
6
+ Author: Juannie-PP
7
+ Author-email: 2604868278@qq.com
8
+ License: MIT
9
+ Keywords: cobweb-launcher, cobweb
10
+ Platform: UNKNOWN
11
+ Classifier: Programming Language :: Python :: 3
12
+ Requires-Python: >=3.7
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+
16
+ # cobweb
17
+ cobweb是一个基于python的分布式爬虫调度框架,目前支持分布式爬虫,单机爬虫,支持自定义数据库,支持自定义数据存储,支持自定义数据处理等操作。
18
+
19
+ cobweb主要由3个模块和一个配置文件组成:Launcher启动器、Crawler采集器、Pipeline存储和setting配置文件。
20
+ 1. Launcher启动器:用于启动爬虫任务,控制爬虫任务的执行流程,以及数据存储和数据处理。
21
+ 框架提供两种启动器模式:LauncherAir、LauncherPro,分别对应单机爬虫模式和分布式调度模式。
22
+ 2. Crawler采集器:用于控制采集流程、数据下载和数据处理。
23
+ 框架提供了基础的采集器,用于控制采集流程、数据下载和数据处理,用户也可在创建任务时自定义请求、下载和解析方法,具体看使用方法介绍。
24
+ 3. Pipeline存储:用于存储采集到的数据,支持自定义数据存储和数据处理。框架提供了Console和Loghub两种存储方式,用户也可继承Pipeline抽象类自定义存储方式。
25
+ 4. setting配置文件:用于配置采集器、存储器、队列长度、采集线程数等参数,框架提供了默认配置,用户也可自定义配置。
26
+ ## 安装
27
+ ```
28
+ pip3 install --upgrade cobweb-launcher
29
+ ```
30
+ ## 使用方法介绍
31
+ ### 1. 任务创建
32
+ - LauncherAir任务创建
33
+ ```python
34
+ from cobweb import LauncherAir
35
+
36
+ # 创建启动器
37
+ app = LauncherAir(task="test", project="test")
38
+
39
+ # 设置采集种子
40
+ app.SEEDS = [{
41
+ "url": "https://www.baidu.com"
42
+ }]
43
+ ...
44
+ # 启动任务
45
+ app.start()
46
+ ```
47
+ - LauncherPro任务创建
48
+ LauncherPro依赖redis实现分布式调度,使用LauncherPro启动器需要完成环境变量的配置或自定义setting文件中的redis配置,如何配置查看`2. 自定义配置文件参数`
49
+ ```python
50
+ from cobweb import LauncherPro
51
+
52
+ # 创建启动器
53
+ app = LauncherPro(
54
+ task="test",
55
+ project="test"
56
+ )
57
+ ...
58
+ # 启动任务
59
+ app.start()
60
+ ```
61
+ ### 2. 自定义配置文件参数
62
+ - 通过自定义setting文件,配置文件导入字符串方式
63
+ > 默认配置文件:import cobweb.setting
64
+ > 不推荐!!!目前有bug,随缘使用...
65
+ 例如:同级目录下自定义创建了setting.py文件。
66
+ ```python
67
+ from cobweb import LauncherAir
68
+
69
+ app = LauncherAir(
70
+ task="test",
71
+ project="test",
72
+ setting="import setting"
73
+ )
74
+
75
+ ...
76
+
77
+ app.start()
78
+ ```
79
+ - 自定义修改setting中对象值
80
+ ```python
81
+ from cobweb import LauncherPro
82
+
83
+ # 创建启动器
84
+ app = LauncherPro(
85
+ task="test",
86
+ project="test",
87
+ REDIS_CONFIG = {
88
+ "host": ...,
89
+ "password":...,
90
+ "port": ...,
91
+ "db": ...
92
+ }
93
+ )
94
+ ...
95
+ # 启动任务
96
+ app.start()
97
+ ```
98
+ ### 3. 自定义请求
99
+ `@app.request`使用装饰器封装自定义请求方法,作用于发生请求前的操作,返回Request对象或继承于BaseItem对象,用于控制请求参数。
100
+ ```python
101
+ from typing import Union
102
+ from cobweb import LauncherAir
103
+ from cobweb.base import Seed, Request, BaseItem
104
+
105
+ app = LauncherAir(
106
+ task="test",
107
+ project="test"
108
+ )
109
+
110
+ ...
111
+
112
+ @app.request
113
+ def request(seed: Seed) -> Union[Request, BaseItem]:
114
+ # 可自定义headers,代理,构造请求参数等操作
115
+ proxies = {"http": ..., "https": ...}
116
+ yield Request(seed.url, seed, ..., proxies=proxies, timeout=15)
117
+ # yield xxxItem(seed, ...) # 跳过请求和解析直接进入数据存储流程
118
+
119
+ ...
120
+
121
+ app.start()
122
+ ```
123
+ > 默认请求方法
124
+ > def request(seed: Seed) -> Union[Request, BaseItem]:
125
+ >     yield Request(seed.url, seed, timeout=5)
126
+ ### 4. 自定义下载
127
+ `@app.download`使用装饰器封装自定义下载方法,作用于发生请求时的操作,返回Response对象或继承于BaseItem对象,用于控制请求参数。
128
+ ```python
129
+ from typing import Union
130
+ from cobweb import LauncherAir
131
+ from cobweb.base import Request, Response, BaseItem
132
+
133
+ app = LauncherAir(
134
+ task="test",
135
+ project="test"
136
+ )
137
+
138
+ ...
139
+
140
+ @app.download
141
+ def download(item: Request) -> Union[BaseItem, Response]:
142
+ ...
143
+ response = ...
144
+ ...
145
+ yield Response(item.seed, response, ...) # 返回Response对象,进行解析
146
+ # yield xxxItem(seed, ...) # 跳过请求和解析直接进入数据存储流程
147
+
148
+ ...
149
+
150
+ app.start()
151
+ ```
152
+ > 默认下载方法
153
+ > def download(item: Request) -> Union[Seed, BaseItem, Response, str]:
154
+ >     response = item.download()
155
+ >     yield Response(item.seed, response, **item.to_dict)
156
+ ### 5. 自定义解析
157
+ 自定义解析需要由一个存储数据类和解析方法组成。存储数据类继承于BaseItem的对象,规定存储表名及字段,
158
+ 解析方法返回继承于BaseItem的对象,yield返回进行控制数据存储流程。
159
+ ```python
160
+ from typing import Union
161
+ from cobweb import LauncherAir
162
+ from cobweb.base import Seed, Response, BaseItem
163
+
164
+ class TestItem(BaseItem):
165
+ __TABLE__ = "test_data" # 表名
166
+ __FIELDS__ = "field1, field2, field3" # 字段名
167
+
168
+ app = LauncherAir(
169
+ task="test",
170
+ project="test"
171
+ )
172
+
173
+ ...
174
+
175
+ @app.parse
176
+ def parse(item: Response) -> Union[Seed, BaseItem]:
177
+ ...
178
+ yield TestItem(item.seed, field1=..., field2=..., field3=...)
179
+ # yield Seed(...) # 构造新种子推送至消费队列
180
+
181
+ ...
182
+
183
+ app.start()
184
+ ```
185
+ > 默认解析方法
186
+ > def parse(item: Request) -> Union[Seed, BaseItem]:
187
+ >     upload_item = item.to_dict
188
+ >     upload_item["text"] = item.response.text
189
+ >     yield ConsoleItem(item.seed, data=json.dumps(upload_item, ensure_ascii=False))
190
+ ## need deal
191
+ - 队列优化完善,使用queue的机制wait()同步各模块执行?
192
+ - 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
193
+ - 去重过滤(布隆过滤器等)
194
+ - 单机防丢失
195
+ - excel、mysql、redis数据完善
196
+
197
+ > 未更新流程图!!!
198
+ ![img.png](https://image-luyuan.oss-cn-hangzhou.aliyuncs.com/image/D2388CDC-B9E5-4CE4-9F2C-7D173763B6A8.png)
199
+
200
+
@@ -0,0 +1,183 @@
1
+ # cobweb
2
+ cobweb是一个基于python的分布式爬虫调度框架,目前支持分布式爬虫,单机爬虫,支持自定义数据库,支持自定义数据存储,支持自定义数据处理等操作。
3
+
4
+ cobweb主要由3个模块和一个配置文件组成:Launcher启动器、Crawler采集器、Pipeline存储和setting配置文件。
5
+ 1. Launcher启动器:用于启动爬虫任务,控制爬虫任务的执行流程,以及数据存储和数据处理。
6
+ 框架提供两种启动器模式:LauncherAir、LauncherPro,分别对应单机爬虫模式和分布式调度模式。
7
+ 2. Crawler采集器:用于控制采集流程、数据下载和数据处理。
8
+ 框架提供了基础的采集器,用于控制采集流程、数据下载和数据处理,用户也可在创建任务时自定义请求、下载和解析方法,具体看使用方法介绍。
9
+ 3. Pipeline存储:用于存储采集到的数据,支持自定义数据存储和数据处理。框架提供了Console和Loghub两种存储方式,用户也可继承Pipeline抽象类自定义存储方式。
10
+ 4. setting配置文件:用于配置采集器、存储器、队列长度、采集线程数等参数,框架提供了默认配置,用户也可自定义配置。
11
+ ## 安装
12
+ ```
13
+ pip3 install --upgrade cobweb-launcher
14
+ ```
15
+ ## 使用方法介绍
16
+ ### 1. 任务创建
17
+ - LauncherAir任务创建
18
+ ```python
19
+ from cobweb import LauncherAir
20
+
21
+ # 创建启动器
22
+ app = LauncherAir(task="test", project="test")
23
+
24
+ # 设置采集种子
25
+ app.SEEDS = [{
26
+ "url": "https://www.baidu.com"
27
+ }]
28
+ ...
29
+ # 启动任务
30
+ app.start()
31
+ ```
32
+ - LauncherPro任务创建
33
+ LauncherPro依赖redis实现分布式调度,使用LauncherPro启动器需要完成环境变量的配置或自定义setting文件中的redis配置,如何配置查看`2. 自定义配置文件参数`
34
+ ```python
35
+ from cobweb import LauncherPro
36
+
37
+ # 创建启动器
38
+ app = LauncherPro(
39
+ task="test",
40
+ project="test"
41
+ )
42
+ ...
43
+ # 启动任务
44
+ app.start()
45
+ ```
46
+ ### 2. 自定义配置文件参数
47
+ - 通过自定义setting文件,配置文件导入字符串方式
48
+ > 默认配置文件:import cobweb.setting
49
+ > 不推荐!!!目前有bug,随缘使用...
50
+ 例如:同级目录下自定义创建了setting.py文件。
51
+ ```python
52
+ from cobweb import LauncherAir
53
+
54
+ app = LauncherAir(
55
+ task="test",
56
+ project="test",
57
+ setting="import setting"
58
+ )
59
+
60
+ ...
61
+
62
+ app.start()
63
+ ```
64
+ - 自定义修改setting中对象值
65
+ ```python
66
+ from cobweb import LauncherPro
67
+
68
+ # 创建启动器
69
+ app = LauncherPro(
70
+ task="test",
71
+ project="test",
72
+ REDIS_CONFIG = {
73
+ "host": ...,
74
+ "password":...,
75
+ "port": ...,
76
+ "db": ...
77
+ }
78
+ )
79
+ ...
80
+ # 启动任务
81
+ app.start()
82
+ ```
83
+ ### 3. 自定义请求
84
+ `@app.request`使用装饰器封装自定义请求方法,作用于发生请求前的操作,返回Request对象或继承于BaseItem对象,用于控制请求参数。
85
+ ```python
86
+ from typing import Union
87
+ from cobweb import LauncherAir
88
+ from cobweb.base import Seed, Request, BaseItem
89
+
90
+ app = LauncherAir(
91
+ task="test",
92
+ project="test"
93
+ )
94
+
95
+ ...
96
+
97
+ @app.request
98
+ def request(seed: Seed) -> Union[Request, BaseItem]:
99
+ # 可自定义headers,代理,构造请求参数等操作
100
+ proxies = {"http": ..., "https": ...}
101
+ yield Request(seed.url, seed, ..., proxies=proxies, timeout=15)
102
+ # yield xxxItem(seed, ...) # 跳过请求和解析直接进入数据存储流程
103
+
104
+ ...
105
+
106
+ app.start()
107
+ ```
108
+ > 默认请求方法
109
+ > def request(seed: Seed) -> Union[Request, BaseItem]:
110
+ >     yield Request(seed.url, seed, timeout=5)
111
+ ### 4. 自定义下载
112
+ `@app.download`使用装饰器封装自定义下载方法,作用于发生请求时的操作,返回Response对象或继承于BaseItem对象,用于控制请求参数。
113
+ ```python
114
+ from typing import Union
115
+ from cobweb import LauncherAir
116
+ from cobweb.base import Request, Response, BaseItem
117
+
118
+ app = LauncherAir(
119
+ task="test",
120
+ project="test"
121
+ )
122
+
123
+ ...
124
+
125
+ @app.download
126
+ def download(item: Request) -> Union[BaseItem, Response]:
127
+ ...
128
+ response = ...
129
+ ...
130
+ yield Response(item.seed, response, ...) # 返回Response对象,进行解析
131
+ # yield xxxItem(seed, ...) # 跳过请求和解析直接进入数据存储流程
132
+
133
+ ...
134
+
135
+ app.start()
136
+ ```
137
+ > 默认下载方法
138
+ > def download(item: Request) -> Union[Seed, BaseItem, Response, str]:
139
+ >     response = item.download()
140
+ >     yield Response(item.seed, response, **item.to_dict)
141
+ ### 5. 自定义解析
142
+ 自定义解析需要由一个存储数据类和解析方法组成。存储数据类继承于BaseItem的对象,规定存储表名及字段,
143
+ 解析方法返回继承于BaseItem的对象,yield返回进行控制数据存储流程。
144
+ ```python
145
+ from typing import Union
146
+ from cobweb import LauncherAir
147
+ from cobweb.base import Seed, Response, BaseItem
148
+
149
+ class TestItem(BaseItem):
150
+ __TABLE__ = "test_data" # 表名
151
+ __FIELDS__ = "field1, field2, field3" # 字段名
152
+
153
+ app = LauncherAir(
154
+ task="test",
155
+ project="test"
156
+ )
157
+
158
+ ...
159
+
160
+ @app.parse
161
+ def parse(item: Response) -> Union[Seed, BaseItem]:
162
+ ...
163
+ yield TestItem(item.seed, field1=..., field2=..., field3=...)
164
+ # yield Seed(...) # 构造新种子推送至消费队列
165
+
166
+ ...
167
+
168
+ app.start()
169
+ ```
170
+ > 默认解析方法
171
+ > def parse(item: Request) -> Union[Seed, BaseItem]:
172
+ >     upload_item = item.to_dict
173
+ >     upload_item["text"] = item.response.text
174
+ >     yield ConsoleItem(item.seed, data=json.dumps(upload_item, ensure_ascii=False))
175
+ ## need deal
176
+ - 队列优化完善,使用queue的机制wait()同步各模块执行?
177
+ - 日志功能完善,单机模式调度和保存数据写入文件,结构化输出各任务日志
178
+ - 去重过滤(布隆过滤器等)
179
+ - 单机防丢失
180
+ - excel、mysql、redis数据完善
181
+
182
+ > 未更新流程图!!!
183
+ ![img.png](https://image-luyuan.oss-cn-hangzhou.aliyuncs.com/image/D2388CDC-B9E5-4CE4-9F2C-7D173763B6A8.png)
@@ -0,0 +1,2 @@
1
+ from .launchers import LauncherAir, LauncherPro, LauncherApi
2
+ from .constant import CrawlerModel
@@ -0,0 +1,9 @@
1
+ from .common_queue import Queue
2
+ from .response import Response
3
+ from .request import Request
4
+ from .item import BaseItem, ConsoleItem
5
+ from .seed import Seed
6
+
7
+ from .log import logger
8
+ from .decorators import decorator_oss_db
9
+
@@ -0,0 +1,30 @@
1
+ from collections import deque
2
+
3
+
4
+ class Queue:
5
+
6
+ def __init__(self):
7
+ self._queue = deque()
8
+
9
+ @property
10
+ def length(self) -> int:
11
+ return len(self._queue)
12
+
13
+ def push(self, data, left: bool = False, direct_insertion: bool = False):
14
+ try:
15
+ if not data:
16
+ return None
17
+ if not direct_insertion and any(isinstance(data, t) for t in (list, tuple)):
18
+ self._queue.extendleft(data) if left else self._queue.extend(data)
19
+ else:
20
+ self._queue.appendleft(data) if left else self._queue.append(data)
21
+ except AttributeError:
22
+ pass
23
+
24
+ def pop(self, left: bool = True):
25
+ try:
26
+ return self._queue.popleft() if left else self._queue.pop()
27
+ except IndexError:
28
+ return None
29
+ except AttributeError:
30
+ return None
@@ -0,0 +1,40 @@
1
+ from functools import wraps
2
+
3
+
4
+ # def check_redis_status(func):
5
+ # @wraps(func)
6
+ # def wrapper(*args, **kwargs):
7
+ # try:
8
+ # result = func(*args, **kwargs)
9
+ # except Exception:
10
+ # result = False
11
+ # return result
12
+ #
13
+ # return wrapper
14
+
15
+
16
+ def decorator_oss_db(exception, retries=3):
17
+ def decorator(func):
18
+ @wraps(func)
19
+ def wrapper(callback_func, *args, **kwargs):
20
+ result = None
21
+ for i in range(retries):
22
+ msg = None
23
+ try:
24
+ return func(callback_func, *args, **kwargs)
25
+ except Exception as e:
26
+ result = None
27
+ msg = e
28
+ finally:
29
+ if result:
30
+ return result
31
+
32
+ if i >= 2 and msg:
33
+ raise exception(msg)
34
+
35
+ return wrapper
36
+
37
+ return decorator
38
+
39
+
40
+
@@ -0,0 +1,46 @@
1
+ from .seed import Seed
2
+ from collections import namedtuple
3
+
4
+
5
+ class Item(type):
6
+
7
+ def __new__(cls, name, bases, dct):
8
+ new_class_instance = type.__new__(cls, name, bases, dct)
9
+ if name != "BaseItem":
10
+ table = getattr(new_class_instance, "__TABLE__")
11
+ fields = getattr(new_class_instance, "__FIELDS__")
12
+ new_class_instance.Data = namedtuple(table, fields)
13
+ return new_class_instance
14
+
15
+
16
+ class BaseItem(metaclass=Item):
17
+
18
+ __TABLE__ = ""
19
+ __FIELDS__ = ""
20
+
21
+ def __init__(self, seed: Seed, **kwargs):
22
+ self.seed = seed
23
+
24
+ data = {}
25
+ for key, value in kwargs.items():
26
+ if key not in self.__FIELDS__:
27
+ self.__setattr__(key, value)
28
+ else:
29
+ data[key] = value
30
+
31
+ self.data = self.Data(**data)
32
+
33
+ @property
34
+ def to_dict(self):
35
+ return self.data._asdict()
36
+
37
+ @property
38
+ def table(self):
39
+ return self.Data.__name__
40
+
41
+
42
+ class ConsoleItem(BaseItem):
43
+
44
+ __TABLE__ = "console"
45
+ __FIELDS__ = "data"
46
+
@@ -52,6 +52,7 @@ class ColorCodes:
52
52
 
53
53
 
54
54
  class Log:
55
+ logging.getLogger('oss2.api').setLevel(logging.WARNING)
55
56
  logging.basicConfig(
56
57
  level=logging.INFO,
57
58
  format=f'%(asctime)s %(name)s [%(filename)s:%(lineno)d %(funcName)s]'
@@ -87,10 +88,7 @@ class Log:
87
88
  return self.__class__.log.critical
88
89
 
89
90
 
90
- log = Log()
91
- # log.info("This text will be bold!")
92
- # print(ColorCodes.BOLD + "This text will be bold!" + ColorCodes.RESET)
93
- # print(ColorCodes.UNDERLINE + ColorCodes.BLUE + "This text will be underlined and blue!" + ColorCodes.RESET)
94
- # print(ColorCodes.BG_YELLOW + ColorCodes.RED + "This text will have a yellow background and red text!" + ColorCodes.RESET)
95
- # print(ColorCodes.BLINK + "This text will blink (if supported by the terminal)!" + ColorCodes.RESET)
91
+ logger = Log()
92
+
93
+
96
94
 
@@ -0,0 +1,82 @@
1
+ import random
2
+ import requests
3
+
4
+
5
+ class Request:
6
+
7
+ __REQUEST_ATTRS__ = {
8
+ "params",
9
+ "headers",
10
+ "cookies",
11
+ "data",
12
+ "json",
13
+ "files",
14
+ "auth",
15
+ "timeout",
16
+ "proxies",
17
+ "hooks",
18
+ "stream",
19
+ "verify",
20
+ "cert",
21
+ "allow_redirects",
22
+ }
23
+
24
+ def __init__(
25
+ self,
26
+ url,
27
+ seed,
28
+ random_ua=True,
29
+ check_status_code=True,
30
+ **kwargs
31
+ ):
32
+ self.url = url
33
+ self.seed = seed
34
+ self.check_status_code = check_status_code
35
+ self.request_setting = {}
36
+
37
+ for k, v in kwargs.items():
38
+ if k in self.__class__.__REQUEST_ATTRS__:
39
+ self.request_setting[k] = v
40
+ continue
41
+ self.__setattr__(k, v)
42
+
43
+ if not getattr(self, "method", None):
44
+ self.method = "POST" if self.request_setting.get("data") or self.request_setting.get("json") else "GET"
45
+
46
+ if random_ua:
47
+ self._build_header()
48
+
49
+ @property
50
+ def _random_ua(self) -> str:
51
+ v1 = random.randint(4, 15)
52
+ v2 = random.randint(3, 11)
53
+ v3 = random.randint(1, 16)
54
+ v4 = random.randint(533, 605)
55
+ v5 = random.randint(1000, 6000)
56
+ v6 = random.randint(10, 80)
57
+ user_agent = (f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_{v1}_{v2}) AppleWebKit/{v4}.{v3} "
58
+ f"(KHTML, like Gecko) Chrome/105.0.0.0 Safari/{v4}.{v3} Edg/105.0.{v5}.{v6}")
59
+ return user_agent
60
+
61
+ def _build_header(self) -> dict:
62
+ if not self.request_setting.get("headers"):
63
+ self.request_setting["headers"] = {"accept": "*/*", "user-agent": self._random_ua}
64
+ elif "user-agent" not in [key.lower() for key in self.request_setting["headers"].keys()]:
65
+ self.request_setting["headers"]["user-agent"] = self._random_ua
66
+
67
+ def download(self) -> requests.Response:
68
+ response = requests.request(self.method, self.url, **self.request_setting)
69
+ if self.check_status_code:
70
+ response.raise_for_status()
71
+ return response
72
+
73
+ @property
74
+ def to_dict(self):
75
+ _dict = self.__dict__.copy()
76
+ _dict.pop('url')
77
+ _dict.pop('seed')
78
+ _dict.pop('check_status_code')
79
+ _dict.pop('request_setting')
80
+ return _dict
81
+
82
+
@@ -0,0 +1,23 @@
1
+
2
+
3
+ class Response:
4
+
5
+ def __init__(
6
+ self,
7
+ seed,
8
+ response,
9
+ **kwargs
10
+ ):
11
+ self.seed = seed
12
+ self.response = response
13
+
14
+ for k, v in kwargs.items():
15
+ self.__setattr__(k, v)
16
+
17
+ @property
18
+ def to_dict(self):
19
+ _dict = self.__dict__.copy()
20
+ _dict.pop('seed')
21
+ _dict.pop('response')
22
+ return _dict
23
+