cobweb-launcher 3.1.29__py3-none-any.whl → 3.1.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/base/request.py +124 -2
- {cobweb_launcher-3.1.29.dist-info → cobweb_launcher-3.1.31.dist-info}/METADATA +1 -1
- {cobweb_launcher-3.1.29.dist-info → cobweb_launcher-3.1.31.dist-info}/RECORD +6 -6
- {cobweb_launcher-3.1.29.dist-info → cobweb_launcher-3.1.31.dist-info}/LICENSE +0 -0
- {cobweb_launcher-3.1.29.dist-info → cobweb_launcher-3.1.31.dist-info}/WHEEL +0 -0
- {cobweb_launcher-3.1.29.dist-info → cobweb_launcher-3.1.31.dist-info}/top_level.txt +0 -0
cobweb/base/request.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
import random
|
2
2
|
import logging
|
3
|
+
import time
|
4
|
+
|
3
5
|
import requests
|
4
6
|
|
5
7
|
from urllib.parse import urlparse
|
@@ -215,7 +217,7 @@ class FileTypeDetector:
|
|
215
217
|
result['methods_used'].append('extension')
|
216
218
|
|
217
219
|
# 3. 如果前两种方法不确定,使用文件签名检测
|
218
|
-
if result['confidence'] in ['unknown', 'medium']:
|
220
|
+
if data and result['confidence'] in ['unknown', 'medium']:
|
219
221
|
signature_detected = self.detect_by_signature(data)
|
220
222
|
if signature_detected:
|
221
223
|
if not result['detected_type']:
|
@@ -309,6 +311,8 @@ class Request:
|
|
309
311
|
"""
|
310
312
|
self.scheme = None
|
311
313
|
self.netloc = None
|
314
|
+
self.detector_info = None
|
315
|
+
self.content_length = None
|
312
316
|
self._validate_url(url)
|
313
317
|
|
314
318
|
self.url = url
|
@@ -408,6 +412,124 @@ class Request:
|
|
408
412
|
"""下载方法,为了向后兼容性保留"""
|
409
413
|
return self.execute()
|
410
414
|
|
415
|
+
def normal_download(self, file_type_detect: bool = True) -> bytes:
|
416
|
+
"""普通下载模式"""
|
417
|
+
detect_settings = self.request_settings.copy()
|
418
|
+
detect_settings.pop('stream', None)
|
419
|
+
|
420
|
+
response = requests.request(
|
421
|
+
method=self.method,
|
422
|
+
url=self.url,
|
423
|
+
**detect_settings
|
424
|
+
)
|
425
|
+
|
426
|
+
if self.check_status_code:
|
427
|
+
response.raise_for_status()
|
428
|
+
|
429
|
+
content_type = response.headers.get('content-type')
|
430
|
+
result = response.content
|
431
|
+
response.close()
|
432
|
+
|
433
|
+
if file_type_detect and not self.detector_info:
|
434
|
+
head_data = result[:64]
|
435
|
+
detector = FileTypeDetector()
|
436
|
+
self.detector_info = detector.get_detailed_info(
|
437
|
+
url=self.url, content_type=content_type, data=head_data
|
438
|
+
)
|
439
|
+
|
440
|
+
return result
|
441
|
+
|
442
|
+
def range_download(self, start: int = 0, chunk_size: int = 1024, file_type_detect: bool = True) -> iter(bytes):
|
443
|
+
# 分块下载
|
444
|
+
downloaded = start
|
445
|
+
retry_count = 0
|
446
|
+
max_retries = 3
|
447
|
+
|
448
|
+
detect_settings = self.request_settings.copy()
|
449
|
+
detect_settings.pop('stream', None)
|
450
|
+
|
451
|
+
if file_type_detect and not self.detector_info:
|
452
|
+
detect_settings.setdefault("headers", {})['Range'] = "bytes=0-63"
|
453
|
+
test_response = requests.request(
|
454
|
+
method=self.method,
|
455
|
+
url=self.url,
|
456
|
+
**detect_settings
|
457
|
+
)
|
458
|
+
content_type = test_response.headers.get("Content-Type")
|
459
|
+
head_data = test_response.content
|
460
|
+
test_response.close()
|
461
|
+
|
462
|
+
detector = FileTypeDetector()
|
463
|
+
self.detector_info = detector.get_detailed_info(
|
464
|
+
url=self.url, content_type=content_type, data=head_data
|
465
|
+
)
|
466
|
+
|
467
|
+
while downloaded < self.content_length:
|
468
|
+
_start = downloaded
|
469
|
+
_end = min(downloaded + chunk_size - 1, self.total_size - 1)
|
470
|
+
detect_settings.setdefault("headers", {})['Range'] = f"bytes={_start}-{_end}"
|
471
|
+
|
472
|
+
response = requests.request(
|
473
|
+
method=self.method,
|
474
|
+
url=self.url,
|
475
|
+
**detect_settings
|
476
|
+
)
|
477
|
+
|
478
|
+
if response.status_code == 206:
|
479
|
+
chunk_data = response.content
|
480
|
+
yield chunk_data
|
481
|
+
downloaded += len(chunk_data)
|
482
|
+
retry_count = 0 # 重置重试计数
|
483
|
+
logging.info(f"下载进度: {downloaded}/{self.total_size} ({downloaded / self.total_size * 100:.1f}%)")
|
484
|
+
|
485
|
+
elif response.status_code == 416: # Range Not Satisfiable
|
486
|
+
logging.info("Range请求超出范围")
|
487
|
+
break
|
488
|
+
|
489
|
+
else:
|
490
|
+
logging.debug(f"Range请求失败: {response.status_code}")
|
491
|
+
if retry_count < max_retries:
|
492
|
+
retry_count += 1
|
493
|
+
time.sleep(0.5)
|
494
|
+
continue
|
495
|
+
|
496
|
+
response.close()
|
497
|
+
|
498
|
+
def detect_accept_ranges(self) -> bool:
|
499
|
+
detect_settings = self.request_settings.copy()
|
500
|
+
detect_settings.pop('stream', None)
|
501
|
+
|
502
|
+
head_response = requests.head(self.url, **detect_settings)
|
503
|
+
if head_response.status_code not in [200, 206]:
|
504
|
+
logging.error(f"HEAD请求失败: {head_response.status_code}")
|
505
|
+
raise ValueError("HTTP状态码错误")
|
506
|
+
|
507
|
+
self.content_length = int(head_response.headers.get('content-length', 0))
|
508
|
+
accept_ranges = str(head_response.headers.get('accept-ranges')).lower()
|
509
|
+
|
510
|
+
supports_range = True
|
511
|
+
|
512
|
+
# 根据检测结果使用不同下载方式
|
513
|
+
if accept_ranges == 'none' or not self.content_length:
|
514
|
+
supports_range = False
|
515
|
+
else:
|
516
|
+
test_range_settings = detect_settings.copy()
|
517
|
+
test_range_settings.setdefault("headers", {})['Range'] = "bytes=0-63"
|
518
|
+
test_response = requests.request(
|
519
|
+
method=self.method,
|
520
|
+
url=self.url,
|
521
|
+
**test_range_settings
|
522
|
+
)
|
523
|
+
if test_response.status_code == 206:
|
524
|
+
actual_length = len(test_response.content)
|
525
|
+
if actual_length != 64:
|
526
|
+
logging.debug(f"⚠️ Range请求返回长度不匹配: 期望64, 实际{actual_length}")
|
527
|
+
supports_range = False
|
528
|
+
else:
|
529
|
+
supports_range = False
|
530
|
+
|
531
|
+
return supports_range
|
532
|
+
|
411
533
|
def detect_file_type(self) -> Dict[str, Any]:
|
412
534
|
"""
|
413
535
|
检测文件类型。
|
@@ -452,7 +574,7 @@ class Request:
|
|
452
574
|
|
453
575
|
@property
|
454
576
|
def to_dict(self) -> Dict[str, Any]:
|
455
|
-
excluded_keys = {"request_settings"}
|
577
|
+
excluded_keys = {"request_settings", "url", "seed", "method", "check_status_code"}
|
456
578
|
result = {
|
457
579
|
key: value for key, value in self.__dict__.items()
|
458
580
|
if not key.startswith('_') and key not in excluded_keys
|
@@ -5,7 +5,7 @@ cobweb/base/__init__.py,sha256=NanSxJr0WsqjqCNOQAlxlkt-vQEsERHYBzacFC057oI,222
|
|
5
5
|
cobweb/base/common_queue.py,sha256=hYdaM70KrWjvACuLKaGhkI2VqFCnd87NVvWzmnfIg8Q,1423
|
6
6
|
cobweb/base/item.py,sha256=1bS4U_3vzI2jzSSeoEbLoLT_5CfgLPopWiEYtaahbvw,1674
|
7
7
|
cobweb/base/logger.py,sha256=Vsg1bD4LXW91VgY-ANsmaUu-mD88hU_WS83f7jX3qF8,2011
|
8
|
-
cobweb/base/request.py,sha256=
|
8
|
+
cobweb/base/request.py,sha256=WQq35IjRlUO_3bBBvgyuFb_9vyy7WjK-CJBkcYbEFwg,20597
|
9
9
|
cobweb/base/response.py,sha256=L3sX2PskV744uz3BJ8xMuAoAfGCeh20w8h0Cnd9vLo0,11377
|
10
10
|
cobweb/base/seed.py,sha256=ddaWCq_KaWwpmPl1CToJlfCxEEnoJ16kjo6azJs9uls,5000
|
11
11
|
cobweb/base/task_queue.py,sha256=2MqGpHGNmK5B-kqv7z420RWyihzB9zgDHJUiLsmtzOI,6402
|
@@ -34,8 +34,8 @@ cobweb/utils/decorators.py,sha256=ZwVQlz-lYHgXgKf9KRCp15EWPzTDdhoikYUNUCIqNeM,11
|
|
34
34
|
cobweb/utils/dotting.py,sha256=L-jGSApdnFIP4jUWH6p5qIme0aJ1vyDrxAx8wOJWvcs,1960
|
35
35
|
cobweb/utils/oss.py,sha256=wmToIIVNO8nCQVRmreVaZejk01aCWS35e1NV6cr0yGI,4192
|
36
36
|
cobweb/utils/tools.py,sha256=14TCedqt07m4z6bCnFAsITOFixeGr8V3aOKk--L7Cr0,879
|
37
|
-
cobweb_launcher-3.1.
|
38
|
-
cobweb_launcher-3.1.
|
39
|
-
cobweb_launcher-3.1.
|
40
|
-
cobweb_launcher-3.1.
|
41
|
-
cobweb_launcher-3.1.
|
37
|
+
cobweb_launcher-3.1.31.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
38
|
+
cobweb_launcher-3.1.31.dist-info/METADATA,sha256=m15hyIjwAYFJyAOKXrmTEuWlVPvAySayjWjNHwVO4CY,6051
|
39
|
+
cobweb_launcher-3.1.31.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
40
|
+
cobweb_launcher-3.1.31.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
41
|
+
cobweb_launcher-3.1.31.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|