cobweb-launcher 3.1.30__tar.gz → 3.1.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/PKG-INFO +1 -1
  2. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/base/request.py +132 -1
  3. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb_launcher.egg-info/PKG-INFO +1 -1
  4. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/setup.py +1 -1
  5. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/LICENSE +0 -0
  6. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/README.md +0 -0
  7. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/__init__.py +0 -0
  8. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/base/__init__.py +0 -0
  9. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/base/common_queue.py +0 -0
  10. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/base/item.py +0 -0
  11. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/base/logger.py +0 -0
  12. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/base/response.py +0 -0
  13. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/base/seed.py +0 -0
  14. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/base/task_queue.py +0 -0
  15. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/base/test.py +0 -0
  16. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/constant.py +0 -0
  17. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/crawlers/__init__.py +0 -0
  18. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/crawlers/crawler.py +0 -0
  19. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/db/__init__.py +0 -0
  20. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/db/api_db.py +0 -0
  21. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/db/redis_db.py +0 -0
  22. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/exceptions/__init__.py +0 -0
  23. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/exceptions/oss_db_exception.py +0 -0
  24. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/launchers/__init__.py +0 -0
  25. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/launchers/distributor.py +0 -0
  26. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/launchers/launcher.py +0 -0
  27. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/launchers/uploader.py +0 -0
  28. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/pipelines/__init__.py +0 -0
  29. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/pipelines/pipeline.py +0 -0
  30. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/pipelines/pipeline_csv.py +0 -0
  31. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/pipelines/pipeline_loghub.py +0 -0
  32. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/schedulers/__init__.py +0 -0
  33. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/schedulers/scheduler.py +0 -0
  34. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/schedulers/scheduler_with_redis.py +0 -0
  35. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/setting.py +0 -0
  36. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/utils/__init__.py +0 -0
  37. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/utils/bloom.py +0 -0
  38. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/utils/decorators.py +0 -0
  39. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/utils/dotting.py +0 -0
  40. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/utils/oss.py +0 -0
  41. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb/utils/tools.py +0 -0
  42. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
  43. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  44. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb_launcher.egg-info/requires.txt +0 -0
  45. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/cobweb_launcher.egg-info/top_level.txt +0 -0
  46. {cobweb-launcher-3.1.30 → cobweb-launcher-3.1.32}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 3.1.30
3
+ Version: 3.1.32
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -1,5 +1,7 @@
1
1
  import random
2
2
  import logging
3
+ import time
4
+
3
5
  import requests
4
6
 
5
7
  from urllib.parse import urlparse
@@ -215,7 +217,7 @@ class FileTypeDetector:
215
217
  result['methods_used'].append('extension')
216
218
 
217
219
  # 3. 如果前两种方法不确定,使用文件签名检测
218
- if result['confidence'] in ['unknown', 'medium']:
220
+ if data and result['confidence'] in ['unknown', 'medium']:
219
221
  signature_detected = self.detect_by_signature(data)
220
222
  if signature_detected:
221
223
  if not result['detected_type']:
@@ -309,6 +311,8 @@ class Request:
309
311
  """
310
312
  self.scheme = None
311
313
  self.netloc = None
314
+ self.detector_info = None
315
+ self.content_length = None
312
316
  self._validate_url(url)
313
317
 
314
318
  self.url = url
@@ -408,6 +412,133 @@ class Request:
408
412
  """下载方法,为了向后兼容性保留"""
409
413
  return self.execute()
410
414
 
415
+ def normal_download(self, file_type_detect: bool = True) -> bytes:
416
+ """普通下载模式"""
417
+ detect_settings = self.request_settings.copy()
418
+ detect_settings.pop('stream', None)
419
+
420
+ response = requests.request(
421
+ method=self.method,
422
+ url=self.url,
423
+ **detect_settings
424
+ )
425
+
426
+ if self.check_status_code:
427
+ response.raise_for_status()
428
+
429
+ content_type = response.headers.get('content-type')
430
+ result = response.content
431
+ response.close()
432
+
433
+ if file_type_detect and not self.detector_info:
434
+ head_data = result[:64]
435
+ detector = FileTypeDetector()
436
+ self.detector_info = detector.get_detailed_info(
437
+ url=self.url, content_type=content_type, data=head_data
438
+ )
439
+
440
+ return result
441
+
442
+ def range_download(self, start: int = 0, chunk_size: int = 1024, file_type_detect: bool = True) -> iter(bytes):
443
+ # 分块下载
444
+ downloaded = start
445
+ retry_count = 0
446
+ max_retries = 3
447
+
448
+ detect_settings = self.request_settings.copy()
449
+ detect_settings.pop('stream', None)
450
+
451
+ if file_type_detect and not self.detector_info:
452
+ detect_settings.setdefault("headers", {})['Range'] = "bytes=0-63"
453
+ test_response = requests.request(
454
+ method=self.method,
455
+ url=self.url,
456
+ **detect_settings
457
+ )
458
+ content_type = test_response.headers.get("Content-Type")
459
+ head_data = test_response.content
460
+ test_response.close()
461
+
462
+ detector = FileTypeDetector()
463
+ self.detector_info = detector.get_detailed_info(
464
+ url=self.url, content_type=content_type, data=head_data
465
+ )
466
+
467
+ while downloaded < self.content_length:
468
+ _start = downloaded
469
+ _end = min(downloaded + chunk_size - 1, self.total_size - 1)
470
+ detect_settings.setdefault("headers", {})['Range'] = f"bytes={_start}-{_end}"
471
+
472
+ response = requests.request(
473
+ method=self.method,
474
+ url=self.url,
475
+ **detect_settings
476
+ )
477
+
478
+ if response.status_code == 206:
479
+ chunk_data = response.content
480
+ yield chunk_data
481
+ downloaded += len(chunk_data)
482
+ retry_count = 0 # 重置重试计数
483
+ logging.info(f"下载进度: {downloaded}/{self.total_size} ({downloaded / self.total_size * 100:.1f}%)")
484
+
485
+ elif response.status_code == 416: # Range Not Satisfiable
486
+ logging.info("Range请求超出范围")
487
+ break
488
+
489
+ else:
490
+ logging.debug(f"Range请求失败: {response.status_code}")
491
+ if retry_count < max_retries:
492
+ retry_count += 1
493
+ time.sleep(0.5)
494
+ continue
495
+
496
+ response.close()
497
+
498
+ def detect_accept_ranges(self) -> bool:
499
+ detect_settings = self.request_settings.copy()
500
+ detect_settings.pop('stream', None)
501
+
502
+ head_response = requests.head(self.url, **detect_settings)
503
+ if head_response.status_code not in [200, 206]:
504
+ logging.error(f"HEAD请求失败: {head_response.status_code}")
505
+ raise ValueError("HTTP状态码错误")
506
+
507
+ self.content_length = int(head_response.headers.get('content-length', 0))
508
+ accept_ranges = str(head_response.headers.get('accept-ranges')).lower()
509
+
510
+ supports_range = True
511
+
512
+ # 根据检测结果使用不同下载方式
513
+ if accept_ranges == 'none' or not self.content_length:
514
+ supports_range = False
515
+ else:
516
+ test_range_settings = detect_settings.copy()
517
+ test_range_settings.setdefault("headers", {})['Range'] = "bytes=0-63"
518
+ test_response = requests.request(
519
+ method=self.method,
520
+ url=self.url,
521
+ **test_range_settings
522
+ )
523
+ head_data = test_response.content
524
+ content_type = test_response.headers.get("Content-Type")
525
+
526
+ if test_response.status_code == 206:
527
+ if len(head_data) != 64:
528
+ logging.debug(f"⚠️ Range请求返回长度不匹配: 期望64, 实际{len(head_data)}")
529
+ supports_range = False
530
+ else:
531
+ detector = FileTypeDetector()
532
+ self.detector_info = detector.get_detailed_info(
533
+ url=self.url, content_type=content_type, data=head_data
534
+ )
535
+ else:
536
+ supports_range = False
537
+
538
+ test_response.close()
539
+
540
+ return supports_range
541
+
411
542
  def detect_file_type(self) -> Dict[str, Any]:
412
543
  """
413
544
  检测文件类型。
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 3.1.30
3
+ Version: 3.1.32
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="3.1.30",
8
+ version="3.1.32",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",