cobweb-launcher 1.2.25__py3-none-any.whl → 3.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/__init__.py +4 -1
- cobweb/base/__init__.py +3 -3
- cobweb/base/common_queue.py +37 -16
- cobweb/base/item.py +35 -16
- cobweb/base/{log.py → logger.py} +3 -3
- cobweb/base/request.py +741 -54
- cobweb/base/response.py +380 -13
- cobweb/base/seed.py +96 -48
- cobweb/base/task_queue.py +180 -0
- cobweb/base/test.py +257 -0
- cobweb/constant.py +10 -1
- cobweb/crawlers/crawler.py +12 -155
- cobweb/db/api_db.py +3 -2
- cobweb/db/redis_db.py +117 -28
- cobweb/launchers/__init__.py +4 -3
- cobweb/launchers/distributor.py +141 -0
- cobweb/launchers/launcher.py +95 -157
- cobweb/launchers/uploader.py +68 -0
- cobweb/log_dots/__init__.py +2 -0
- cobweb/log_dots/dot.py +258 -0
- cobweb/log_dots/loghub_dot.py +53 -0
- cobweb/pipelines/__init__.py +1 -1
- cobweb/pipelines/pipeline.py +5 -55
- cobweb/pipelines/pipeline_csv.py +25 -0
- cobweb/pipelines/pipeline_loghub.py +32 -12
- cobweb/schedulers/__init__.py +1 -0
- cobweb/schedulers/scheduler.py +66 -0
- cobweb/schedulers/scheduler_with_redis.py +189 -0
- cobweb/setting.py +27 -40
- cobweb/utils/__init__.py +5 -3
- cobweb/utils/bloom.py +58 -58
- cobweb/{base → utils}/decorators.py +14 -12
- cobweb/utils/dotting.py +300 -0
- cobweb/utils/oss.py +113 -94
- cobweb/utils/tools.py +3 -15
- {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-3.2.18.dist-info}/METADATA +31 -43
- cobweb_launcher-3.2.18.dist-info/RECORD +44 -0
- {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-3.2.18.dist-info}/WHEEL +1 -1
- cobweb/crawlers/base_crawler.py +0 -144
- cobweb/crawlers/file_crawler.py +0 -98
- cobweb/launchers/launcher_air.py +0 -88
- cobweb/launchers/launcher_api.py +0 -221
- cobweb/launchers/launcher_pro.py +0 -222
- cobweb/pipelines/base_pipeline.py +0 -54
- cobweb/pipelines/loghub_pipeline.py +0 -34
- cobweb/pipelines/pipeline_console.py +0 -22
- cobweb_launcher-1.2.25.dist-info/RECORD +0 -40
- {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-3.2.18.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.2.25.dist-info → cobweb_launcher-3.2.18.dist-info}/top_level.txt +0 -0
cobweb/utils/dotting.py
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import time
|
|
4
|
+
from threading import Event
|
|
5
|
+
from requests import RequestException, Response as requests_Response
|
|
6
|
+
|
|
7
|
+
from cobweb.base import Queue, Request, Seed, Response, BaseItem, logger
|
|
8
|
+
from aliyun.log import LogClient, LogItem, PutLogsRequest
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LoghubDot:
|
|
12
|
+
|
|
13
|
+
def __init__(self, stop: Event, project: str, task: str) -> None:
|
|
14
|
+
self._stop = stop
|
|
15
|
+
self._queue = Queue()
|
|
16
|
+
self._client = LogClient(
|
|
17
|
+
endpoint=os.getenv("LOGHUB_ENDPOINT"),
|
|
18
|
+
accessKeyId=os.getenv("LOGHUB_ACCESS_KEY"),
|
|
19
|
+
accessKey=os.getenv("LOGHUB_SECRET_KEY")
|
|
20
|
+
)
|
|
21
|
+
self.project = project
|
|
22
|
+
self.task = task
|
|
23
|
+
|
|
24
|
+
def logging(self, topic, msg):
|
|
25
|
+
log_item = LogItem()
|
|
26
|
+
log_data = {
|
|
27
|
+
"stage": topic,
|
|
28
|
+
"message": msg,
|
|
29
|
+
"project": self.project,
|
|
30
|
+
"task": self.task,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
for key, value in log_data.items():
|
|
34
|
+
if not isinstance(value, str):
|
|
35
|
+
log_data[key] = json.dumps(value, ensure_ascii=False)
|
|
36
|
+
else:
|
|
37
|
+
log_data[key] = value
|
|
38
|
+
|
|
39
|
+
contents = sorted(log_data.items())
|
|
40
|
+
log_item.set_contents(contents)
|
|
41
|
+
self._queue.push(log_item)
|
|
42
|
+
|
|
43
|
+
def _build_request_log(self, request_item: Request):
|
|
44
|
+
log_item = LogItem()
|
|
45
|
+
|
|
46
|
+
seed: Seed = request_item.seed
|
|
47
|
+
get_time = seed.params.get_time
|
|
48
|
+
start_time = seed.params.start_time
|
|
49
|
+
request_time = seed.params.request_time
|
|
50
|
+
stage_cost = request_time - start_time
|
|
51
|
+
cost = request_time - start_time
|
|
52
|
+
|
|
53
|
+
request_settings = json.dumps(
|
|
54
|
+
request_item.request_settings,
|
|
55
|
+
ensure_ascii=False, separators=(',', ':')
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
log_data = {
|
|
59
|
+
"stage": "request",
|
|
60
|
+
"project": self.project,
|
|
61
|
+
"task": self.task,
|
|
62
|
+
"seed": seed.to_string,
|
|
63
|
+
"request": repr(request_item),
|
|
64
|
+
"request_settings": request_settings,
|
|
65
|
+
"get_time": get_time,
|
|
66
|
+
"start_time": start_time,
|
|
67
|
+
"stage_cost": stage_cost,
|
|
68
|
+
"cost": cost,
|
|
69
|
+
"time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(request_time)),
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
for key, value in log_data.items():
|
|
73
|
+
if not isinstance(value, str):
|
|
74
|
+
log_data[key] = json.dumps(value, ensure_ascii=False)
|
|
75
|
+
else:
|
|
76
|
+
log_data[key] = value
|
|
77
|
+
|
|
78
|
+
contents = sorted(log_data.items())
|
|
79
|
+
log_item.set_contents(contents)
|
|
80
|
+
self._queue.push(log_item)
|
|
81
|
+
|
|
82
|
+
def _build_download_log(self, response_item: Response):
|
|
83
|
+
"""
|
|
84
|
+
构建下载阶段的日志项
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
response_item: 响应对象
|
|
88
|
+
"""
|
|
89
|
+
log_item = LogItem()
|
|
90
|
+
|
|
91
|
+
seed: Seed = response_item.seed
|
|
92
|
+
get_time = seed.params.get_time
|
|
93
|
+
start_time = seed.params.start_time
|
|
94
|
+
request_time = seed.params.request_time
|
|
95
|
+
download_time = seed.params.download_time
|
|
96
|
+
stage_cost = download_time - request_time
|
|
97
|
+
cost = download_time - start_time
|
|
98
|
+
|
|
99
|
+
log_data = {
|
|
100
|
+
"stage": "download",
|
|
101
|
+
"project": self.project,
|
|
102
|
+
"task": self.task,
|
|
103
|
+
"seed": seed.to_string,
|
|
104
|
+
"response": repr(response_item),
|
|
105
|
+
"get_time": get_time,
|
|
106
|
+
"start_time": start_time,
|
|
107
|
+
"request_time": request_time,
|
|
108
|
+
"download_time": download_time,
|
|
109
|
+
"stage_cost": stage_cost,
|
|
110
|
+
"cost": cost,
|
|
111
|
+
"proxy": seed.params.proxy or '-',
|
|
112
|
+
"time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(download_time)),
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
response = response_item.response
|
|
116
|
+
if isinstance(response, requests_Response):
|
|
117
|
+
log_data['request_info'] = {
|
|
118
|
+
'method': response.request.method,
|
|
119
|
+
'url': response.request.url,
|
|
120
|
+
'headers': dict(response.request.headers),
|
|
121
|
+
'body': response.request.body or "-",
|
|
122
|
+
}
|
|
123
|
+
log_data['response_info'] = {
|
|
124
|
+
"status_code": response.status_code,
|
|
125
|
+
"reason": response.reason,
|
|
126
|
+
"headers": dict(response.headers),
|
|
127
|
+
"content": response.text[:500], # 截取内容
|
|
128
|
+
"content_type": response.headers.get('content-type', '-'),
|
|
129
|
+
"content_length": response.headers.get('content-length', '-'),
|
|
130
|
+
"server": response.headers.get('server', '-'),
|
|
131
|
+
"date": response.headers.get('date', '-'),
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
for key, value in log_data.items():
|
|
135
|
+
if not isinstance(value, str):
|
|
136
|
+
log_data[key] = json.dumps(value, ensure_ascii=False)
|
|
137
|
+
else:
|
|
138
|
+
log_data[key] = value
|
|
139
|
+
|
|
140
|
+
contents = sorted(log_data.items())
|
|
141
|
+
log_item.set_contents(contents)
|
|
142
|
+
self._queue.push(log_item)
|
|
143
|
+
|
|
144
|
+
def _build_parse_log(self, parse_item: BaseItem):
|
|
145
|
+
log_item = LogItem()
|
|
146
|
+
|
|
147
|
+
seed: Seed = parse_item.seed
|
|
148
|
+
get_time = seed.params.get_time
|
|
149
|
+
start_time = seed.params.start_time
|
|
150
|
+
request_time = seed.params.request_time
|
|
151
|
+
response_time = seed.params.response_time
|
|
152
|
+
parse_time = seed.params.parse_time
|
|
153
|
+
|
|
154
|
+
pre_time = request_time or response_time
|
|
155
|
+
stage_cost = parse_time - pre_time
|
|
156
|
+
cost = parse_time - start_time
|
|
157
|
+
|
|
158
|
+
log_data = {
|
|
159
|
+
"stage": "parse",
|
|
160
|
+
"project": self.project,
|
|
161
|
+
"task": self.task,
|
|
162
|
+
"seed": seed.to_string,
|
|
163
|
+
"parse": repr(parse_item),
|
|
164
|
+
"get_time": get_time,
|
|
165
|
+
"start_time": start_time,
|
|
166
|
+
"parse_time": parse_time,
|
|
167
|
+
"stage_cost": stage_cost,
|
|
168
|
+
"cost": cost,
|
|
169
|
+
"time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(parse_time)),
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
for key, value in log_data.items():
|
|
173
|
+
if not isinstance(value, str):
|
|
174
|
+
log_data[key] = json.dumps(value, ensure_ascii=False)
|
|
175
|
+
else:
|
|
176
|
+
log_data[key] = value
|
|
177
|
+
|
|
178
|
+
contents = sorted(log_data.items())
|
|
179
|
+
log_item.set_contents(contents)
|
|
180
|
+
self._queue.push(log_item)
|
|
181
|
+
|
|
182
|
+
def _build_http_error_log(self, seed: Seed, e: RequestException):
|
|
183
|
+
log_item = LogItem()
|
|
184
|
+
|
|
185
|
+
status_code = getattr(e.response, 'status_code', '-')
|
|
186
|
+
|
|
187
|
+
request_info = {
|
|
188
|
+
'method': getattr(e.request, 'method', '-'),
|
|
189
|
+
'url': getattr(e.request, 'url', '-'),
|
|
190
|
+
'headers': dict(getattr(e.request, 'headers', {})),
|
|
191
|
+
'body': getattr(e.request, 'body', '-'),
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
response_info = {
|
|
195
|
+
'status_code': getattr(e.response, 'status_code', '-'),
|
|
196
|
+
'reason': getattr(e.response, 'reason', '-'),
|
|
197
|
+
'headers': dict(getattr(e.response, 'headers', {})),
|
|
198
|
+
'content': getattr(e.response, 'text', '')[:500],
|
|
199
|
+
'content_type': e.response.headers.get('content-type', '-') if e.response else '-',
|
|
200
|
+
'content_length': e.response.headers.get('content-length', '-') if e.response else '-',
|
|
201
|
+
'server': e.response.headers.get('server', '-') if e.response else '-',
|
|
202
|
+
'date': e.response.headers.get('date', '-') if e.response else '-',
|
|
203
|
+
}
|
|
204
|
+
retry = seed.params.retry
|
|
205
|
+
get_time = seed.params.get_time
|
|
206
|
+
start_time = seed.params.start_time
|
|
207
|
+
failed_time = seed.params.failed_time
|
|
208
|
+
cost = failed_time - start_time
|
|
209
|
+
|
|
210
|
+
log_data = {
|
|
211
|
+
"stage": "http_error",
|
|
212
|
+
"project": self.project,
|
|
213
|
+
"task": self.task,
|
|
214
|
+
"seed": seed.to_string,
|
|
215
|
+
"status_code": status_code,
|
|
216
|
+
"request_info": request_info,
|
|
217
|
+
"response_info": response_info,
|
|
218
|
+
"retry": retry,
|
|
219
|
+
"proxy": seed.params.proxy or '-',
|
|
220
|
+
"exception_type": type(e).__name__,
|
|
221
|
+
"exception_message": str(e),
|
|
222
|
+
"traceback": seed.params.traceback or '-',
|
|
223
|
+
"get_time": get_time,
|
|
224
|
+
"start_time": start_time,
|
|
225
|
+
"error_time": failed_time,
|
|
226
|
+
"stage_cost": cost,
|
|
227
|
+
"cost": cost,
|
|
228
|
+
"time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(failed_time)),
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
for key, value in log_data.items():
|
|
232
|
+
if not isinstance(value, str):
|
|
233
|
+
log_data[key] = json.dumps(value, ensure_ascii=False)
|
|
234
|
+
else:
|
|
235
|
+
log_data[key] = value
|
|
236
|
+
|
|
237
|
+
contents = sorted(log_data.items())
|
|
238
|
+
log_item.set_contents(contents)
|
|
239
|
+
self._queue.push(log_item)
|
|
240
|
+
|
|
241
|
+
def _build_exception_log(self, seed: Seed, e: Exception):
|
|
242
|
+
log_item = LogItem()
|
|
243
|
+
|
|
244
|
+
retry = seed.params.retry
|
|
245
|
+
get_time = seed.params.get_time
|
|
246
|
+
start_time = seed.params.start_time
|
|
247
|
+
failed_time = seed.params.failed_time
|
|
248
|
+
cost = failed_time - start_time
|
|
249
|
+
|
|
250
|
+
log_data = {
|
|
251
|
+
"stage": "exception",
|
|
252
|
+
"project": self.project,
|
|
253
|
+
"task": self.task,
|
|
254
|
+
"seed": seed.to_string,
|
|
255
|
+
"retry": retry,
|
|
256
|
+
"exception_type": type(e).__name__,
|
|
257
|
+
"exception_message": str(e),
|
|
258
|
+
"traceback": seed.params.traceback or '-',
|
|
259
|
+
"proxy": seed.params.proxy or '-',
|
|
260
|
+
"get_time": get_time,
|
|
261
|
+
"start_time": start_time,
|
|
262
|
+
"error_time": failed_time,
|
|
263
|
+
"stage_cost": cost,
|
|
264
|
+
"cost": cost,
|
|
265
|
+
"time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(failed_time)),
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
for key, value in log_data.items():
|
|
269
|
+
if not isinstance(value, str):
|
|
270
|
+
log_data[key] = json.dumps(value, ensure_ascii=False)
|
|
271
|
+
else:
|
|
272
|
+
log_data[key] = value
|
|
273
|
+
|
|
274
|
+
contents = sorted(log_data.items())
|
|
275
|
+
log_item.set_contents(contents)
|
|
276
|
+
self._queue.push(log_item)
|
|
277
|
+
|
|
278
|
+
def _build_run(self):
|
|
279
|
+
while not self._stop.is_set():
|
|
280
|
+
try:
|
|
281
|
+
items = []
|
|
282
|
+
start_time = int(time.time())
|
|
283
|
+
|
|
284
|
+
while len(items) < 1000:
|
|
285
|
+
log_item = self._queue.pop()
|
|
286
|
+
if not log_item or (int(time.time()) - start_time > 10):
|
|
287
|
+
break
|
|
288
|
+
items.append(log_item)
|
|
289
|
+
|
|
290
|
+
if items:
|
|
291
|
+
request = PutLogsRequest(
|
|
292
|
+
project="databee-download-log",
|
|
293
|
+
logstore="log",
|
|
294
|
+
topic="cobweb",
|
|
295
|
+
logitems=items,
|
|
296
|
+
compress=True
|
|
297
|
+
)
|
|
298
|
+
self._client.put_logs(request=request)
|
|
299
|
+
except Exception as e:
|
|
300
|
+
logger.info(str(e))
|
cobweb/utils/oss.py
CHANGED
|
@@ -1,94 +1,113 @@
|
|
|
1
|
-
|
|
2
|
-
from cobweb import setting
|
|
3
|
-
from requests import Response
|
|
4
|
-
from oss2 import Auth, Bucket, models, PartIterator
|
|
5
|
-
from cobweb.exceptions import oss_db_exception
|
|
6
|
-
from cobweb.
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class OssUtil:
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
1
|
+
#
|
|
2
|
+
# from cobweb import setting
|
|
3
|
+
# from requests import Response
|
|
4
|
+
# from oss2 import Auth, Bucket, models, PartIterator
|
|
5
|
+
# from cobweb.exceptions import oss_db_exception
|
|
6
|
+
# from cobweb.utils.decorators import decorator_oss_db
|
|
7
|
+
#
|
|
8
|
+
#
|
|
9
|
+
# class OssUtil:
|
|
10
|
+
#
|
|
11
|
+
# def __init__(
|
|
12
|
+
# self,
|
|
13
|
+
# bucket=None,
|
|
14
|
+
# endpoint=None,
|
|
15
|
+
# access_key=None,
|
|
16
|
+
# secret_key=None,
|
|
17
|
+
# chunk_size=None,
|
|
18
|
+
# min_upload_size=None,
|
|
19
|
+
# **kwargs
|
|
20
|
+
# ):
|
|
21
|
+
# self.bucket = bucket or setting.OSS_BUCKET
|
|
22
|
+
# self.endpoint = endpoint or setting.OSS_ENDPOINT
|
|
23
|
+
# self.chunk_size = int(chunk_size or setting.OSS_CHUNK_SIZE)
|
|
24
|
+
# self.min_upload_size = int(min_upload_size or setting.OSS_MIN_UPLOAD_SIZE)
|
|
25
|
+
#
|
|
26
|
+
# self.failed_count = 0
|
|
27
|
+
# self._kw = kwargs
|
|
28
|
+
#
|
|
29
|
+
# self._auth = Auth(
|
|
30
|
+
# access_key_id=access_key or setting.OSS_ACCESS_KEY,
|
|
31
|
+
# access_key_secret=secret_key or setting.OSS_SECRET_KEY
|
|
32
|
+
# )
|
|
33
|
+
# self._client = Bucket(
|
|
34
|
+
# auth=self._auth,
|
|
35
|
+
# endpoint=self.endpoint,
|
|
36
|
+
# bucket_name=self.bucket,
|
|
37
|
+
# **self._kw
|
|
38
|
+
# )
|
|
39
|
+
#
|
|
40
|
+
# def failed(self):
|
|
41
|
+
# self.failed_count += 1
|
|
42
|
+
# if self.failed_count >= 5:
|
|
43
|
+
# self._client = Bucket(
|
|
44
|
+
# auth=self._auth,
|
|
45
|
+
# endpoint=self.endpoint,
|
|
46
|
+
# bucket_name=self.bucket,
|
|
47
|
+
# **self._kw
|
|
48
|
+
# )
|
|
49
|
+
#
|
|
50
|
+
# def exists(self, key: str) -> bool:
|
|
51
|
+
# try:
|
|
52
|
+
# result = self._client.object_exists(key)
|
|
53
|
+
# self.failed_count = 0
|
|
54
|
+
# return result
|
|
55
|
+
# except Exception as e:
|
|
56
|
+
# self.failed()
|
|
57
|
+
# raise e
|
|
58
|
+
#
|
|
59
|
+
# def head(self, key: str) -> models.HeadObjectResult:
|
|
60
|
+
# return self._client.head_object(key)
|
|
61
|
+
#
|
|
62
|
+
# @decorator_oss_db(exception=oss_db_exception.OssDBInitPartError)
|
|
63
|
+
# def init_part(self, key) -> models.InitMultipartUploadResult:
|
|
64
|
+
# """初始化分片上传"""
|
|
65
|
+
# return self._client.init_multipart_upload(key)
|
|
66
|
+
#
|
|
67
|
+
# @decorator_oss_db(exception=oss_db_exception.OssDBPutObjError)
|
|
68
|
+
# def put(self, key, data) -> models.PutObjectResult:
|
|
69
|
+
# """文件上传"""
|
|
70
|
+
# return self._client.put_object(key, data)
|
|
71
|
+
#
|
|
72
|
+
# @decorator_oss_db(exception=oss_db_exception.OssDBPutPartError)
|
|
73
|
+
# def put_part(self, key, upload_id, position, data) -> models.PutObjectResult:
|
|
74
|
+
# """分片上传"""
|
|
75
|
+
# return self._client.upload_part(key, upload_id, position, data)
|
|
76
|
+
#
|
|
77
|
+
# def list_part(self, key, upload_id): # -> List[models.ListPartsResult]:
|
|
78
|
+
# """获取分片列表"""
|
|
79
|
+
# return [part_info for part_info in PartIterator(self._client, key, upload_id)]
|
|
80
|
+
#
|
|
81
|
+
# @decorator_oss_db(exception=oss_db_exception.OssDBMergeError)
|
|
82
|
+
# def merge(self, key, upload_id, parts=None) -> models.PutObjectResult:
|
|
83
|
+
# """合并分片"""
|
|
84
|
+
# headers = None if parts else {"x-oss-complete-all": "yes"}
|
|
85
|
+
# return self._client.complete_multipart_upload(key, upload_id, parts, headers=headers)
|
|
86
|
+
#
|
|
87
|
+
# @decorator_oss_db(exception=oss_db_exception.OssDBAppendObjError)
|
|
88
|
+
# def append(self, key, position, data) -> models.AppendObjectResult:
|
|
89
|
+
# """追加上传"""
|
|
90
|
+
# return self._client.append_object(key, position, data)
|
|
91
|
+
#
|
|
92
|
+
# def iter_data(self, data, chunk_size=None):
|
|
93
|
+
# chunk_size = chunk_size or self.chunk_size
|
|
94
|
+
# if isinstance(data, Response):
|
|
95
|
+
# for part_data in data.iter_content(chunk_size):
|
|
96
|
+
# yield part_data
|
|
97
|
+
# if isinstance(data, bytes):
|
|
98
|
+
# for i in range(0, len(data), chunk_size):
|
|
99
|
+
# yield data[i:i + chunk_size]
|
|
100
|
+
#
|
|
101
|
+
# def assemble(self, ready_data, data, chunk_size=None):
|
|
102
|
+
# upload_data = b""
|
|
103
|
+
# ready_data = ready_data + data
|
|
104
|
+
# chunk_size = chunk_size or self.chunk_size
|
|
105
|
+
# if len(ready_data) >= chunk_size:
|
|
106
|
+
# upload_data = ready_data[:chunk_size]
|
|
107
|
+
# ready_data = ready_data[chunk_size:]
|
|
108
|
+
# return ready_data, upload_data
|
|
109
|
+
#
|
|
110
|
+
# def content_length(self, key: str) -> int:
|
|
111
|
+
# head = self.head(key)
|
|
112
|
+
# return head.content_length
|
|
113
|
+
#
|
cobweb/utils/tools.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
2
|
import hashlib
|
|
3
|
+
import inspect
|
|
3
4
|
from typing import Union
|
|
4
5
|
from importlib import import_module
|
|
5
6
|
|
|
@@ -10,18 +11,6 @@ def md5(text: Union[str, bytes]) -> str:
|
|
|
10
11
|
return hashlib.md5(text).hexdigest()
|
|
11
12
|
|
|
12
13
|
|
|
13
|
-
def build_path(site, url, file_type):
|
|
14
|
-
return f"{site}/{md5(url)}.{file_type}"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def format_size(content_length: int) -> str:
|
|
18
|
-
units = ["KB", "MB", "GB", "TB"]
|
|
19
|
-
for i in range(4):
|
|
20
|
-
num = content_length / (1024 ** (i + 1))
|
|
21
|
-
if num < 1024:
|
|
22
|
-
return f"{round(num, 2)} {units[i]}"
|
|
23
|
-
|
|
24
|
-
|
|
25
14
|
def dynamic_load_class(model_info):
|
|
26
15
|
if isinstance(model_info, str):
|
|
27
16
|
if "import" in model_info:
|
|
@@ -35,8 +24,7 @@ def dynamic_load_class(model_info):
|
|
|
35
24
|
model = import_module(model_path)
|
|
36
25
|
class_object = getattr(model, class_name)
|
|
37
26
|
return class_object
|
|
27
|
+
elif inspect.isclass(model_info):
|
|
28
|
+
return model_info
|
|
38
29
|
raise TypeError()
|
|
39
30
|
|
|
40
|
-
|
|
41
|
-
# def download_log_info(item:dict) -> str:
|
|
42
|
-
# return "\n".join([" " * 12 + f"{str(k).ljust(14)}: {str(v)}" for k, v in item.items()])
|