cobweb-launcher 3.1.36__tar.gz → 3.1.38__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/PKG-INFO +1 -1
  2. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/base/request.py +60 -38
  3. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb_launcher.egg-info/PKG-INFO +1 -1
  4. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/setup.py +1 -1
  5. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/LICENSE +0 -0
  6. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/README.md +0 -0
  7. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/__init__.py +0 -0
  8. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/base/__init__.py +0 -0
  9. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/base/common_queue.py +0 -0
  10. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/base/item.py +0 -0
  11. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/base/logger.py +0 -0
  12. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/base/response.py +0 -0
  13. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/base/seed.py +0 -0
  14. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/base/task_queue.py +0 -0
  15. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/base/test.py +0 -0
  16. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/constant.py +0 -0
  17. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/crawlers/__init__.py +0 -0
  18. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/crawlers/crawler.py +0 -0
  19. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/db/__init__.py +0 -0
  20. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/db/api_db.py +0 -0
  21. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/db/redis_db.py +0 -0
  22. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/exceptions/__init__.py +0 -0
  23. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/exceptions/oss_db_exception.py +0 -0
  24. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/launchers/__init__.py +0 -0
  25. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/launchers/distributor.py +0 -0
  26. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/launchers/launcher.py +0 -0
  27. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/launchers/uploader.py +0 -0
  28. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/pipelines/__init__.py +0 -0
  29. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/pipelines/pipeline.py +0 -0
  30. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/pipelines/pipeline_csv.py +0 -0
  31. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/pipelines/pipeline_loghub.py +0 -0
  32. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/schedulers/__init__.py +0 -0
  33. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/schedulers/scheduler.py +0 -0
  34. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/schedulers/scheduler_with_redis.py +0 -0
  35. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/setting.py +0 -0
  36. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/utils/__init__.py +0 -0
  37. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/utils/bloom.py +0 -0
  38. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/utils/decorators.py +0 -0
  39. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/utils/dotting.py +0 -0
  40. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/utils/oss.py +0 -0
  41. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb/utils/tools.py +0 -0
  42. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
  43. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  44. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb_launcher.egg-info/requires.txt +0 -0
  45. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/cobweb_launcher.egg-info/top_level.txt +0 -0
  46. {cobweb-launcher-3.1.36 → cobweb-launcher-3.1.38}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 3.1.36
3
+ Version: 3.1.38
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -440,12 +440,31 @@ class Request:
440
440
 
441
441
  return result
442
442
 
443
+ def _log_download_progress(self, start_time, downloaded):
444
+ try:
445
+ elapsed_time = time.time() - start_time
446
+ elapsed_time_str = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))
447
+ progress = downloaded / self.content_length
448
+ downloaded_mb = downloaded / (1024 * 1024)
449
+ total_mb = self.content_length / (1024 * 1024)
450
+ speed = downloaded / elapsed_time / 1024
451
+ filled_length = int(50 * progress)
452
+ bar = '█' * filled_length + '-' * (50 - filled_length)
453
+ logging.info(
454
+ f"\n\r\rDownloading {self.url}: |{bar}| {progress * 100:.1f}% "
455
+ f"{downloaded_mb:.1f}/{total_mb:.1f} MB [Time:{elapsed_time_str}, Speed {speed:.2f} KB/s]"
456
+ )
457
+ except Exception:
458
+ pass
459
+
443
460
  def range_download(self, start: int = 0, chunk_size: int = 1024, file_type_detect: bool = True):
444
461
  # 分块下载
445
462
  downloaded = start
446
463
  retry_count = 0
447
464
  max_retries = 3
448
465
 
466
+ start_time = time.time()
467
+
449
468
  detect_settings = self.request_settings.copy()
450
469
  detect_settings.pop('stream', None)
451
470
 
@@ -469,33 +488,38 @@ class Request:
469
488
  _start = downloaded
470
489
  _end = min(downloaded + chunk_size - 1, self.content_length - 1)
471
490
  detect_settings.setdefault("headers", {})['Range'] = f"bytes={_start}-{_end}"
472
-
473
- self.response = requests.request(
474
- method=self.method,
475
- url=self.url,
476
- **detect_settings
477
- )
478
-
479
- if self.response.status_code == 206:
480
- chunk_data = self.response.content
481
- yield chunk_data
482
- downloaded += len(chunk_data)
483
- retry_count = 0 # 重置重试计数
484
- logging.info(f"下载进度: {downloaded}/{self.content_length} ({downloaded / self.content_length * 100:.1f}%)")
485
-
486
- elif self.response.status_code == 416: # Range Not Satisfiable
487
- logging.info("Range请求超出范围")
488
- break
489
-
490
- else:
491
- logging.debug(f"Range请求失败: {self.response.status_code}")
491
+ try:
492
+
493
+ self.response = requests.request(
494
+ method=self.method,
495
+ url=self.url,
496
+ **detect_settings
497
+ )
498
+
499
+ if self.response.status_code == 206:
500
+ chunk_data = self.response.content
501
+ yield chunk_data
502
+ downloaded += len(chunk_data)
503
+ retry_count = 0 # 重置重试计数
504
+ self._log_download_progress(
505
+ start_time=start_time,
506
+ downloaded=downloaded
507
+ )
508
+ elif self.response.status_code == 416: # Range Not Satisfiable
509
+ logging.info("Range请求超出范围")
510
+ break
511
+
512
+ except Exception as e:
513
+ logging.exception(f"请求失败 - URL: {self.url}, 错误: {e}, 当前重试次数: {retry_count}")
514
+ finally:
515
+ self.response.close()
516
+ self.response = None
492
517
  if retry_count < max_retries:
518
+ time.sleep(0.5 * retry_count)
493
519
  retry_count += 1
494
- time.sleep(0.5)
495
520
  continue
496
-
497
- self.response.close()
498
- self.response = None
521
+ else:
522
+ raise ValueError(f"超过当前最大重试次数,请求失败!当前重试次数: {retry_count}")
499
523
 
500
524
  def detect_accept_ranges(self) -> bool:
501
525
  detect_settings = self.request_settings.copy()
@@ -507,13 +531,7 @@ class Request:
507
531
  raise ValueError("HTTP状态码错误")
508
532
 
509
533
  self.content_length = int(head_response.headers.get('content-length', 0))
510
- # accept_ranges = str(head_response.headers.get('accept-ranges')).lower()
511
- supports_range = True
512
534
 
513
- # 根据检测结果使用不同下载方式
514
- # if accept_ranges == 'none' or not self.content_length:
515
- # supports_range = False
516
- # else:
517
535
  test_range_settings = detect_settings.copy()
518
536
  test_range_settings.setdefault("headers", {})['Range'] = "bytes=0-63"
519
537
  test_response = requests.request(
@@ -524,19 +542,23 @@ class Request:
524
542
  head_data = test_response.content
525
543
  content_type = test_response.headers.get("Content-Type")
526
544
 
527
- if test_response.status_code == 206:
528
- if len(head_data) != 64:
529
- supports_range = False
530
- self.response = test_response
531
- logging.debug(f"⚠️ Range请求返回长度不匹配: 期望64, 实际{len(head_data)}")
532
- head_data = head_data[:64]
545
+ if test_response.status_code == 206 and len(head_data) == 64:
546
+ supports_range = True
547
+ elif test_response.status_code == 200:
548
+ supports_range = False
549
+ self.response = test_response
550
+ head_data = head_data[:64]
551
+ logging.debug(f"Range请求方式不支持, 实际{len(head_data)}")
552
+ else:
553
+ supports_range = False
554
+ logging.error(f"Range请求失败: {test_response.status_code}")
533
555
 
556
+ if not self.detector_info:
557
+ self.response = test_response
534
558
  detector = FileTypeDetector()
535
559
  self.detector_info = detector.get_detailed_info(
536
560
  url=self.url, content_type=content_type, data=head_data
537
561
  )
538
- else:
539
- supports_range = False
540
562
 
541
563
  test_response.close()
542
564
  return supports_range
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 3.1.36
3
+ Version: 3.1.38
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="3.1.36",
8
+ version="3.1.38",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",