oafuncs 0.0.98.5__py3-none-any.whl → 0.0.98.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,7 @@ import warnings
24
24
  from concurrent.futures import ThreadPoolExecutor, as_completed
25
25
  from pathlib import Path
26
26
  from threading import Lock
27
+ from oafuncs.oa_tool import pbar
27
28
 
28
29
  import httpx
29
30
  import matplotlib.pyplot as plt
@@ -32,7 +33,6 @@ import numpy as np
32
33
  import pandas as pd
33
34
  import xarray as xr
34
35
  from rich import print
35
- from rich.progress import Progress
36
36
 
37
37
  from oafuncs.oa_down.idm import downloader as idm_downloader
38
38
  from oafuncs.oa_down.user_agent import get_ua
@@ -668,17 +668,19 @@ def _correct_time(nc_file):
668
668
  modify_nc(nc_file, "time", None, time_difference)
669
669
 
670
670
 
671
+ def setup_logger(level=logging.INFO):
672
+ logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=level)
673
+
674
+
671
675
  class _HycomDownloader:
672
676
  def __init__(self, tasks, delay_range=(3, 6), timeout_factor=120, max_var_count=5, max_retries=3):
673
- """
674
- :param tasks: List of (url, save_path)
675
- """
676
677
  self.tasks = tasks
677
678
  self.delay_range = delay_range
678
679
  self.timeout_factor = timeout_factor
679
680
  self.max_var_count = max_var_count
680
681
  self.max_retries = max_retries
681
682
  self.count = {"success": 0, "fail": 0}
683
+ setup_logger()
682
684
 
683
685
  def user_agent(self):
684
686
  return get_ua()
@@ -688,55 +690,69 @@ class _HycomDownloader:
688
690
  headers = {"User-Agent": self.user_agent()}
689
691
  var_count = min(max(url.count("var="), 1), self.max_var_count)
690
692
  timeout_max = self.timeout_factor * var_count
691
- timeout = random.randint(timeout_max // 2, timeout_max)
692
693
 
693
694
  retry = 0
694
695
  while retry <= self.max_retries:
696
+ timeout = random.randint(timeout_max // 2, timeout_max)
695
697
  try:
696
698
  await asyncio.sleep(random.uniform(*self.delay_range))
697
699
  start = datetime.datetime.now()
700
+
698
701
  async with httpx.AsyncClient(
699
702
  timeout=httpx.Timeout(timeout),
700
703
  limits=httpx.Limits(max_connections=2, max_keepalive_connections=2),
701
704
  transport=httpx.AsyncHTTPTransport(retries=2),
702
705
  ) as client:
703
- print(f"[bold #f0f6d0]Requesting {file_name} (Attempt {retry + 1}) ...")
706
+ logging.info(f"Requesting {file_name} (Attempt {retry + 1}) ...")
704
707
  response = await client.get(url, headers=headers, follow_redirects=True)
705
708
  response.raise_for_status()
706
709
  if not response.content:
707
710
  raise ValueError("Empty response received")
708
711
 
709
- print(f"[bold #96cbd7]Downloading {file_name} ...")
712
+ logging.info(f"Downloading {file_name} ...")
710
713
  with open(save_path, "wb") as f:
714
+ total = int(response.headers.get("Content-Length", 0))
715
+ downloaded = 0
716
+ last_percent = -1
717
+
711
718
  async for chunk in response.aiter_bytes(32 * 1024):
712
719
  f.write(chunk)
720
+ downloaded += len(chunk)
721
+
722
+ if total > 0:
723
+ percent = int(downloaded * 100 / total)
724
+ if percent != last_percent:
725
+ logging.info(f"{file_name}: {percent}% ({downloaded / 1024:.1f} KB / {total / 1024:.1f} KB)")
726
+ last_percent = percent
727
+
713
728
 
714
729
  elapsed = datetime.datetime.now() - start
715
- print(f"[#3dfc40]File [bold #dfff73]{file_name} [#3dfc40]downloaded, Time: [#39cbdd]{elapsed}")
730
+ # logging.info(f"File {file_name} downloaded, Time: {elapsed}")
731
+ logging.info(f"Saving {file_name}, Time: {elapsed}")
716
732
  self.count["success"] += 1
717
733
  count_dict["success"] += 1
718
734
  return
719
735
 
720
736
  except Exception as e:
721
- print(f"[bold red]Failed ({type(e).__name__}): {e}")
737
+ logging.error(f"Failed ({type(e).__name__}): {e}")
722
738
  if retry < self.max_retries:
723
739
  backoff = 2**retry
724
- print(f"[yellow]Retrying in {backoff:.1f}s ...")
740
+ logging.warning(f"Retrying in {backoff:.1f}s ...")
725
741
  await asyncio.sleep(backoff)
726
742
  retry += 1
727
743
  else:
728
- print(f"[red]Giving up on {file_name}")
744
+ logging.error(f"Giving up on {file_name}")
729
745
  self.count["fail"] += 1
730
746
  count_dict["fail"] += 1
731
747
  return
732
748
 
733
749
  async def run(self):
734
- print(f"📥 Starting download of {len(self.tasks)} files ...")
750
+ logging.info(f"📥 Starting download of {len(self.tasks)} files ...")
735
751
  for url, save_path in self.tasks:
736
752
  await self._download_one(url, save_path)
737
753
 
738
- print("\n✅ All tasks completed.")
739
- print(f"✔️ Success: {self.count['success']} | ❌ Fail: {self.count['fail']}")
754
+ logging.info("✅ All tasks completed.")
755
+ logging.info(f"✔️ Success: {self.count['success']} | ❌ Fail: {self.count['fail']}")
740
756
 
741
757
 
742
758
  def _download_file(target_url, store_path, file_name, cover=False):
@@ -874,7 +890,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
874
890
  else:
875
891
  if download_time < "2024081012":
876
892
  varlist = [_ for _ in var]
877
- for key, value in var_group.items():
893
+ for key, value in pbar(var_group.items(), description=f"Var_group {download_time} ->", total=len(var_group), cmap="bwr", next_line=True):
878
894
  current_group = []
879
895
  for v in varlist:
880
896
  if v in value:
@@ -896,7 +912,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
896
912
  file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc"
897
913
  _download_file(submit_url, store_path, file_name, cover)
898
914
  else:
899
- for v in var:
915
+ for v in pbar(var,description=f'Var {download_time} ->', total=len(var), cmap='bwr', next_line=True):
900
916
  submit_url = _get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
901
917
  file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
902
918
  if download_time_end is not None:
@@ -930,7 +946,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
930
946
  if num_workers is not None and num_workers > 1:
931
947
  global parallel_counter
932
948
  parallel_counter = 0
933
- counter_lock = Lock()
949
+ counter_lock = Lock() # noqa: F841
934
950
  if ymdh_time_s == ymdh_time_e:
935
951
  _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, cover)
936
952
  elif int(ymdh_time_s) < int(ymdh_time_e):
@@ -938,17 +954,19 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
938
954
  print("*" * mark_len)
939
955
  print("Downloading a series of files...")
940
956
  time_list = _get_time_list(ymdh_time_s, ymdh_time_e, interval_hour, "hour")
941
- with Progress() as progress:
942
- task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
943
- if num_workers is None or num_workers <= 1:
944
- for i, time_str in enumerate(time_list):
945
- _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
946
- progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
947
- else:
948
- with ThreadPoolExecutor(max_workers=num_workers) as executor:
949
- futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
950
- for feature in as_completed(futures):
951
- _done_callback(feature, progress, task, len(time_list), counter_lock)
957
+ # with Progress() as progress:
958
+ # task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
959
+ if num_workers is None or num_workers <= 1:
960
+ for i, time_str in pbar(enumerate(time_list), description=f"{bar_desc}", total=len(time_list), cmap='colorful_1', next_line=True):
961
+ _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
962
+ # progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
963
+ else:
964
+ with ThreadPoolExecutor(max_workers=num_workers) as executor:
965
+ futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
966
+ """ for feature in as_completed(futures):
967
+ _done_callback(feature, progress, task, len(time_list), counter_lock) """
968
+ for _ in pbar(as_completed(futures),description=f"{bar_desc}", total=len(futures),cmap='colorful_1',next_line=True):
969
+ pass
952
970
  else:
953
971
  print("[bold red]Please ensure the time_s is no more than time_e")
954
972
 
@@ -1032,7 +1050,6 @@ def download(
1032
1050
  interval_hours=3,
1033
1051
  )
1034
1052
  """
1035
- from oafuncs.oa_tool import pbar
1036
1053
 
1037
1054
  _get_initial_data()
1038
1055
 
@@ -1093,10 +1110,10 @@ def download(
1093
1110
  workers = 1
1094
1111
  given_idm_engine = idm_path
1095
1112
  idm_download_list = []
1096
- bar_desc = "Submitting to IDM ..."
1113
+ bar_desc = "Submitting to IDM ->"
1097
1114
  else:
1098
1115
  use_idm = False
1099
- bar_desc = "Downloading ..."
1116
+ bar_desc = "Downloading ->"
1100
1117
 
1101
1118
  global match_time
1102
1119
  match_time = validate_time
@@ -1108,7 +1125,7 @@ def download(
1108
1125
  workers = 1
1109
1126
  print("*" * mark_len)
1110
1127
  print("[bold red]Only checking the time of existing files.")
1111
- bar_desc = "Checking time ..."
1128
+ bar_desc = "Checking time ->"
1112
1129
 
1113
1130
  _download_hourly_func(
1114
1131
  variables,
@@ -1134,7 +1151,7 @@ def download(
1134
1151
  print("[bold #ecdbfe]*" * mark_len)
1135
1152
  if idm_download_list:
1136
1153
  remain_list = idm_download_list.copy()
1137
- for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading: "):
1154
+ for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading ->"):
1138
1155
  success = False
1139
1156
  while not success:
1140
1157
  for f in remain_list:
@@ -1190,6 +1207,7 @@ if __name__ == "__main__":
1190
1207
  "validate_time": None,
1191
1208
  # "idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
1192
1209
  "interval_hours": 24,
1210
+ "proxy_txt": None,
1193
1211
  }
1194
1212
 
1195
1213
  if single_var: