oafuncs 0.0.98.5__py3-none-any.whl → 0.0.98.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,11 +19,11 @@ import logging
19
19
  import os
20
20
  import random
21
21
  import re
22
- import time
23
22
  import warnings
24
23
  from concurrent.futures import ThreadPoolExecutor, as_completed
25
24
  from pathlib import Path
26
25
  from threading import Lock
26
+ from oafuncs.oa_tool import pbar
27
27
 
28
28
  import httpx
29
29
  import matplotlib.pyplot as plt
@@ -32,7 +32,6 @@ import numpy as np
32
32
  import pandas as pd
33
33
  import xarray as xr
34
34
  from rich import print
35
- from rich.progress import Progress
36
35
 
37
36
  from oafuncs.oa_down.idm import downloader as idm_downloader
38
37
  from oafuncs.oa_down.user_agent import get_ua
@@ -668,17 +667,19 @@ def _correct_time(nc_file):
668
667
  modify_nc(nc_file, "time", None, time_difference)
669
668
 
670
669
 
670
+ def setup_logger(level=logging.INFO):
671
+ logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=level)
672
+
673
+
671
674
  class _HycomDownloader:
672
675
  def __init__(self, tasks, delay_range=(3, 6), timeout_factor=120, max_var_count=5, max_retries=3):
673
- """
674
- :param tasks: List of (url, save_path)
675
- """
676
676
  self.tasks = tasks
677
677
  self.delay_range = delay_range
678
678
  self.timeout_factor = timeout_factor
679
679
  self.max_var_count = max_var_count
680
680
  self.max_retries = max_retries
681
681
  self.count = {"success": 0, "fail": 0}
682
+ setup_logger()
682
683
 
683
684
  def user_agent(self):
684
685
  return get_ua()
@@ -688,61 +689,84 @@ class _HycomDownloader:
688
689
  headers = {"User-Agent": self.user_agent()}
689
690
  var_count = min(max(url.count("var="), 1), self.max_var_count)
690
691
  timeout_max = self.timeout_factor * var_count
691
- timeout = random.randint(timeout_max // 2, timeout_max)
692
692
 
693
693
  retry = 0
694
694
  while retry <= self.max_retries:
695
+ timeout = random.randint(timeout_max // 2, timeout_max)
695
696
  try:
696
697
  await asyncio.sleep(random.uniform(*self.delay_range))
697
698
  start = datetime.datetime.now()
699
+
698
700
  async with httpx.AsyncClient(
699
701
  timeout=httpx.Timeout(timeout),
700
702
  limits=httpx.Limits(max_connections=2, max_keepalive_connections=2),
701
703
  transport=httpx.AsyncHTTPTransport(retries=2),
702
704
  ) as client:
703
- print(f"[bold #f0f6d0]Requesting {file_name} (Attempt {retry + 1}) ...")
705
+ logging.info(f"Requesting {file_name} (Attempt {retry + 1}) ...")
704
706
  response = await client.get(url, headers=headers, follow_redirects=True)
705
707
  response.raise_for_status()
706
708
  if not response.content:
707
709
  raise ValueError("Empty response received")
708
710
 
709
- print(f"[bold #96cbd7]Downloading {file_name} ...")
711
+ logging.info(f"Downloading {file_name} ...")
710
712
  with open(save_path, "wb") as f:
713
+ total = int(response.headers.get("Content-Length", 0))
714
+ downloaded = 0
715
+ last_percent = -1
716
+
711
717
  async for chunk in response.aiter_bytes(32 * 1024):
712
718
  f.write(chunk)
719
+ downloaded += len(chunk)
720
+
721
+ if total > 0:
722
+ percent = int(downloaded * 100 / total)
723
+ if percent != last_percent:
724
+ logging.info(f"{file_name}: {percent}% ({downloaded / 1024:.1f} KB / {total / 1024:.1f} KB)")
725
+ last_percent = percent
726
+
713
727
 
714
728
  elapsed = datetime.datetime.now() - start
715
- print(f"[#3dfc40]File [bold #dfff73]{file_name} [#3dfc40]downloaded, Time: [#39cbdd]{elapsed}")
729
+ # logging.info(f"File {file_name} downloaded, Time: {elapsed}")
730
+ logging.info(f"Saving {file_name} ...")
731
+ logging.info(f"Timing {elapsed} ...")
716
732
  self.count["success"] += 1
717
733
  count_dict["success"] += 1
734
+ # 输出一条绿色的成功消息
735
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
736
+ # print(f"{timestamp} - INFO - ", end="") # Output log prefix without newline
737
+ # print("[bold #3dfc40]Success")
738
+ print(f"{timestamp} - RESULT - [bold #3dfc40]Success")
718
739
  return
719
740
 
720
741
  except Exception as e:
721
- print(f"[bold red]Failed ({type(e).__name__}): {e}")
742
+ logging.error(f"Failed ({type(e).__name__}): {e}")
722
743
  if retry < self.max_retries:
723
744
  backoff = 2**retry
724
- print(f"[yellow]Retrying in {backoff:.1f}s ...")
745
+ logging.warning(f"Retrying in {backoff:.1f}s ...")
725
746
  await asyncio.sleep(backoff)
726
747
  retry += 1
727
748
  else:
728
- print(f"[red]Giving up on {file_name}")
749
+ logging.error(f"Giving up on {file_name}")
729
750
  self.count["fail"] += 1
730
751
  count_dict["fail"] += 1
752
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
753
+ # print(f"{timestamp} - ERROR - ", end="")
754
+ # print("[bold red]Failed")
755
+ print(f"{timestamp} - RESULT - [bold red]Failure")
731
756
  return
732
757
 
733
758
  async def run(self):
734
- print(f"📥 Starting download of {len(self.tasks)} files ...")
759
+ logging.info(f"📥 Starting download of {len(self.tasks)} files ...")
735
760
  for url, save_path in self.tasks:
736
761
  await self._download_one(url, save_path)
737
762
 
738
- print("\n✅ All tasks completed.")
739
- print(f"✔️ Success: {self.count['success']} | ❌ Fail: {self.count['fail']}")
763
+ logging.info("✅ All tasks completed.")
764
+ logging.info(f"✔️ Success: {self.count['success']} | ❌ Fail: {self.count['fail']}")
740
765
 
741
766
 
742
767
  def _download_file(target_url, store_path, file_name, cover=False):
743
768
  save_path = Path(store_path) / file_name
744
- file_name_split = file_name.split("_")
745
- file_name_split = file_name_split[:-1]
769
+ file_name_split = file_name.split("_")[:-1]
746
770
  same_file = "_".join(file_name_split) + "*nc"
747
771
 
748
772
  if match_time is not None:
@@ -759,10 +783,12 @@ def _download_file(target_url, store_path, file_name, cover=False):
759
783
  print(f"[bold green]{file_name} is correct")
760
784
  return
761
785
 
762
- if not cover and os.path.exists(save_path):
763
- print(f"[bold #FFA54F]{save_path} exists, skipping ...")
764
- count_dict["skip"] += 1
765
- return
786
+ # if not cover and os.path.exists(save_path):
787
+ # print(f"[bold #FFA54F]{save_path} exists, skipping ...")
788
+ # count_dict["skip"] += 1
789
+ # return
790
+ if cover and os.path.exists(save_path):
791
+ _clear_existing_file(save_path)
766
792
 
767
793
  if same_file not in fsize_dict.keys():
768
794
  check_nc(save_path, delete_if_invalid=True, print_messages=False)
@@ -770,6 +796,7 @@ def _download_file(target_url, store_path, file_name, cover=False):
770
796
  get_mean_size = _get_mean_size_move(same_file, save_path)
771
797
 
772
798
  if _check_existing_file(save_path, get_mean_size):
799
+ print(f"[bold #FFA54F]{save_path} exists, skipping ...")
773
800
  count_dict["skip"] += 1
774
801
  return
775
802
 
@@ -778,12 +805,12 @@ def _download_file(target_url, store_path, file_name, cover=False):
778
805
  if not use_idm:
779
806
  python_downloader = _HycomDownloader([(target_url, save_path)])
780
807
  asyncio.run(python_downloader.run())
781
- time.sleep(3 + random.uniform(0, 10))
808
+ # time.sleep(3 + random.uniform(0, 10))
782
809
  else:
783
810
  idm_downloader(target_url, store_path, file_name, given_idm_engine)
784
811
  idm_download_list.append(save_path)
785
812
  # print(f"[bold #3dfc40]File [bold #dfff73]{save_path} [#3dfc40]has been submit to IDM for downloading")
786
- time.sleep(3 + random.uniform(0, 10))
813
+ # time.sleep(3 + random.uniform(0, 10))
787
814
 
788
815
 
789
816
  def _check_hour_is_valid(ymdh_str):
@@ -874,7 +901,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
874
901
  else:
875
902
  if download_time < "2024081012":
876
903
  varlist = [_ for _ in var]
877
- for key, value in var_group.items():
904
+ for key, value in pbar(var_group.items(), description=f"Var Group {download_time} ->", total=len(var_group), color="#d7feb9", next_line=True):
878
905
  current_group = []
879
906
  for v in varlist:
880
907
  if v in value:
@@ -896,7 +923,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
896
923
  file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc"
897
924
  _download_file(submit_url, store_path, file_name, cover)
898
925
  else:
899
- for v in var:
926
+ for v in pbar(var, description=f"Var {download_time} ->", total=len(var), color="#d7feb9", next_line=True):
900
927
  submit_url = _get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
901
928
  file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
902
929
  if download_time_end is not None:
@@ -930,7 +957,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
930
957
  if num_workers is not None and num_workers > 1:
931
958
  global parallel_counter
932
959
  parallel_counter = 0
933
- counter_lock = Lock()
960
+ counter_lock = Lock() # noqa: F841
934
961
  if ymdh_time_s == ymdh_time_e:
935
962
  _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, cover)
936
963
  elif int(ymdh_time_s) < int(ymdh_time_e):
@@ -938,17 +965,19 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
938
965
  print("*" * mark_len)
939
966
  print("Downloading a series of files...")
940
967
  time_list = _get_time_list(ymdh_time_s, ymdh_time_e, interval_hour, "hour")
941
- with Progress() as progress:
942
- task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
943
- if num_workers is None or num_workers <= 1:
944
- for i, time_str in enumerate(time_list):
945
- _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
946
- progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
947
- else:
948
- with ThreadPoolExecutor(max_workers=num_workers) as executor:
949
- futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
950
- for feature in as_completed(futures):
951
- _done_callback(feature, progress, task, len(time_list), counter_lock)
968
+ # with Progress() as progress:
969
+ # task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
970
+ if num_workers is None or num_workers <= 1:
971
+ for i, time_str in pbar(enumerate(time_list), description=f"{bar_desc}", total=len(time_list), cmap='colorful_1', next_line=True):
972
+ _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
973
+ # progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
974
+ else:
975
+ with ThreadPoolExecutor(max_workers=num_workers) as executor:
976
+ futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
977
+ """ for feature in as_completed(futures):
978
+ _done_callback(feature, progress, task, len(time_list), counter_lock) """
979
+ for _ in pbar(as_completed(futures),description=f"{bar_desc}", total=len(futures),cmap='colorful_1',next_line=True):
980
+ pass
952
981
  else:
953
982
  print("[bold red]Please ensure the time_s is no more than time_e")
954
983
 
@@ -1032,7 +1061,6 @@ def download(
1032
1061
  interval_hours=3,
1033
1062
  )
1034
1063
  """
1035
- from oafuncs.oa_tool import pbar
1036
1064
 
1037
1065
  _get_initial_data()
1038
1066
 
@@ -1093,10 +1121,10 @@ def download(
1093
1121
  workers = 1
1094
1122
  given_idm_engine = idm_path
1095
1123
  idm_download_list = []
1096
- bar_desc = "Submitting to IDM ..."
1124
+ bar_desc = "Submitting to IDM ->"
1097
1125
  else:
1098
1126
  use_idm = False
1099
- bar_desc = "Downloading ..."
1127
+ bar_desc = "Downloading ->"
1100
1128
 
1101
1129
  global match_time
1102
1130
  match_time = validate_time
@@ -1108,7 +1136,7 @@ def download(
1108
1136
  workers = 1
1109
1137
  print("*" * mark_len)
1110
1138
  print("[bold red]Only checking the time of existing files.")
1111
- bar_desc = "Checking time ..."
1139
+ bar_desc = "Checking time ->"
1112
1140
 
1113
1141
  _download_hourly_func(
1114
1142
  variables,
@@ -1134,7 +1162,7 @@ def download(
1134
1162
  print("[bold #ecdbfe]*" * mark_len)
1135
1163
  if idm_download_list:
1136
1164
  remain_list = idm_download_list.copy()
1137
- for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading: "):
1165
+ for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading ->"):
1138
1166
  success = False
1139
1167
  while not success:
1140
1168
  for f in remain_list:
@@ -1190,6 +1218,7 @@ if __name__ == "__main__":
1190
1218
  "validate_time": None,
1191
1219
  # "idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
1192
1220
  "interval_hours": 24,
1221
+ "proxy_txt": None,
1193
1222
  }
1194
1223
 
1195
1224
  if single_var: