oafuncs 0.0.98.3__py3-none-any.whl → 0.0.98.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,8 @@ import netCDF4 as nc
28
28
  import numpy as np
29
29
  import pandas as pd
30
30
  import requests
31
+ from requests.adapters import HTTPAdapter
32
+ import httpx
31
33
  import xarray as xr
32
34
  from rich import print
33
35
  from rich.progress import Progress
@@ -419,8 +421,10 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
419
421
  print(f"[bold red]{time_input_str} is in the following dataset and version:")
420
422
  if have_data:
421
423
  if match_time is None:
424
+ dv_num = 1
422
425
  for d, v, trange in zip(d_list, v_list, trange_list):
423
- print(f"[bold blue]{d} {v} {trange}")
426
+ print(f"{dv_num} -> [bold blue]{d} - {v} : {trange}")
427
+ dv_num += 1
424
428
  if is_single_time:
425
429
  return True
426
430
  else:
@@ -611,7 +615,7 @@ def _get_mean_size_move(same_file, current_file):
611
615
  size_difference_ratio = (current_file_size - fsize_dict[same_file]["mean_size"]) / fsize_dict[same_file]["mean_size"]
612
616
 
613
617
  if abs(size_difference_ratio) > tolerance_ratio:
614
- if check_nc(current_file,print_messages=False):
618
+ if check_nc(current_file, print_messages=False):
615
619
  fsize_dict[same_file]["size_list"] = [current_file_size]
616
620
  fsize_dict[same_file]["mean_size"] = current_file_size
617
621
  else:
@@ -662,6 +666,98 @@ def _correct_time(nc_file):
662
666
  modify_nc(nc_file, "time", None, time_difference)
663
667
 
664
668
 
669
+ def _download_within_python_requests(file_name, target_url, fname):
670
+ print(f"[bold #f0f6d0]Requesting {file_name} ...")
671
+
672
+ # Session configuration
673
+ session = requests.Session()
674
+ adapter = HTTPAdapter(pool_connections=10, pool_maxsize=10, max_retries=0)
675
+ session.mount("http://", adapter)
676
+ session.mount("https://", adapter)
677
+
678
+ # Timeout and retry config
679
+ num_var = max(target_url.count("var="), 1)
680
+ max_timeout = 5 * 30 * num_var
681
+ order_terms = ["1st", "2nd", "3rd"]
682
+ download_start = datetime.datetime.now()
683
+ max_attempts = 5
684
+
685
+ for attempt in range(max_attempts):
686
+ if attempt > 0:
687
+ retry_desc = order_terms[attempt - 1] if attempt - 1 < len(order_terms) else f"{attempt}th"
688
+ print(f"[bold #ffe5c0]Retrying the {retry_desc} time...")
689
+ time.sleep(2 + random.uniform(0, 2))
690
+
691
+ timeout = random.randint(max_timeout // 5, max_timeout)
692
+ print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
693
+
694
+ try:
695
+ headers = {"User-Agent": get_ua()}
696
+ with session.get(target_url, headers=headers, stream=True, timeout=timeout) as response:
697
+ response.raise_for_status()
698
+ print(f"[bold #96cbd7]Downloading {file_name} ...")
699
+ with open(fname, "wb") as f:
700
+ for chunk in response.iter_content(chunk_size=32 * 1024):
701
+ if chunk:
702
+ f.write(chunk)
703
+ elapsed = datetime.datetime.now() - download_start
704
+ print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
705
+ count_dict["success"] += 1
706
+ return
707
+ except Exception as e:
708
+ if hasattr(e, "response") and getattr(e.response, "status_code", None):
709
+ err_msg = f"HTTP {e.response.status_code} Error"
710
+ elif isinstance(e, requests.exceptions.Timeout):
711
+ err_msg = "Timeout Error"
712
+ elif isinstance(e, requests.exceptions.ConnectionError):
713
+ err_msg = "Connection Error"
714
+ elif isinstance(e, requests.exceptions.RequestException):
715
+ err_msg = "Request Error"
716
+ else:
717
+ err_msg = "Unexpected Error"
718
+ print(f"[bold red]Download failed for {file_name}: {err_msg}. Details: {e}")
719
+
720
+ print(f"[bold #ffe5c0]Download failed after {max_attempts} attempts. Target URL: \n{target_url}")
721
+ count_dict["fail"] += 1
722
+
723
+
724
+
725
+ def _download_within_python(file_name, target_url, fname):
726
+ print(f"[bold #f0f6d0]Requesting {file_name} ...")
727
+
728
+ # 创建 httpx 同步客户端
729
+ limits = httpx.Limits(max_connections=10, max_keepalive_connections=10)
730
+ transport = httpx.HTTPTransport(retries=3)
731
+ client = httpx.Client(limits=limits, transport=transport, timeout=None)
732
+
733
+ num_var = max(target_url.count("var="), 1)
734
+ max_timeout = 5 * 30 * num_var
735
+ timeout = random.randint(max_timeout // 2, max_timeout)
736
+ download_start = datetime.datetime.now()
737
+
738
+ print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
739
+ headers = {"User-Agent": get_ua()}
740
+
741
+ try:
742
+ response = client.get(target_url, headers=headers, timeout=timeout, follow_redirects=True)
743
+ response.raise_for_status()
744
+ print(f"[bold #96cbd7]Downloading {file_name} ...")
745
+ with open(fname, "wb") as f:
746
+ for chunk in response.iter_bytes(32 * 1024):
747
+ if chunk:
748
+ f.write(chunk)
749
+ elapsed = datetime.datetime.now() - download_start
750
+ print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
751
+ count_dict["success"] += 1
752
+ except Exception as e:
753
+ err_type = type(e).__name__
754
+ print(f"[bold red]Download failed for {file_name} ...\n{err_type}. Details: {e}")
755
+ print(f"[bold #ffe5c0]Target URL: \n{target_url}")
756
+ count_dict["fail"] += 1
757
+ finally:
758
+ client.close()
759
+
760
+
665
761
  def _download_file(target_url, store_path, file_name, cover=False):
666
762
  fname = Path(store_path) / file_name
667
763
  file_name_split = file_name.split("_")
@@ -699,69 +795,7 @@ def _download_file(target_url, store_path, file_name, cover=False):
699
795
  _clear_existing_file(fname)
700
796
 
701
797
  if not use_idm:
702
- print(f"[bold #f0f6d0]Requesting {file_name} ...")
703
- s = requests.Session()
704
- download_success = False
705
- request_times = 0
706
-
707
- def calculate_wait_time(time_str, target_url):
708
- time_pattern = r"\d{10}"
709
- times_in_str = re.findall(time_pattern, time_str)
710
- num_times_str = len(times_in_str)
711
-
712
- if num_times_str > 1:
713
- delta_t = datetime.datetime.strptime(times_in_str[1], "%Y%m%d%H") - datetime.datetime.strptime(times_in_str[0], "%Y%m%d%H")
714
- delta_t = delta_t.total_seconds() / 3600
715
- delta_t = delta_t / 3 + 1
716
- else:
717
- delta_t = 1
718
- num_var = int(target_url.count("var="))
719
- if num_var <= 0:
720
- num_var = 1
721
- return int(delta_t * 5 * 60 * num_var)
722
-
723
- max_timeout = calculate_wait_time(file_name, target_url)
724
- print(f"[bold #912dbc]Max timeout: {max_timeout} seconds")
725
-
726
- download_time_s = datetime.datetime.now()
727
- order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
728
- while not download_success:
729
- if request_times >= 10:
730
- print(f"[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later")
731
- count_dict["fail"] += 1
732
- break
733
- if request_times > 0:
734
- print(f"[bold #ffe5c0]Retrying the {order_list[request_times - 1]} time...")
735
- try:
736
- headers = {"User-Agent": get_ua()}
737
- response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))
738
- response.raise_for_status()
739
- with open(fname, "wb") as f:
740
- print(f"[bold #96cbd7]Downloading {file_name} ...")
741
- for chunk in response.iter_content(chunk_size=1024):
742
- if chunk:
743
- f.write(chunk)
744
-
745
- f.close()
746
-
747
- if os.path.exists(fname):
748
- download_success = True
749
- download_time_e = datetime.datetime.now()
750
- download_delta = download_time_e - download_time_s
751
- print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
752
- count_dict["success"] += 1
753
-
754
- except requests.exceptions.HTTPError as errh:
755
- print(f"Http Error: {errh}")
756
- except requests.exceptions.ConnectionError as errc:
757
- print(f"Error Connecting: {errc}")
758
- except requests.exceptions.Timeout as errt:
759
- print(f"Timeout Error: {errt}")
760
- except requests.exceptions.RequestException as err:
761
- print(f"OOps: Something Else: {err}")
762
-
763
- time.sleep(3)
764
- request_times += 1
798
+ _download_within_python(file_name, target_url, fname)
765
799
  else:
766
800
  idm_downloader(target_url, store_path, file_name, given_idm_engine)
767
801
  idm_download_list.append(fname)
@@ -992,7 +1026,7 @@ def download(
992
1026
 
993
1027
  Returns:
994
1028
  None
995
-
1029
+
996
1030
  Example:
997
1031
  >>> download(
998
1032
  variables='u',
@@ -1088,7 +1122,7 @@ def download(
1088
1122
 
1089
1123
  if validate_time is not None:
1090
1124
  workers = 1
1091
- print('*' * mark_len)
1125
+ print("*" * mark_len)
1092
1126
  print("[bold red]Only checking the time of existing files.")
1093
1127
  bar_desc = "Checking time ..."
1094
1128
 
@@ -1158,20 +1192,20 @@ if __name__ == "__main__":
1158
1192
 
1159
1193
  options = {
1160
1194
  "variables": var_list,
1161
- "start_time": "2025010300",
1162
- "end_time": "2025010309",
1163
- "output_dir": r"I:\Data\HYCOM\3hourly_test",
1195
+ "start_time": "2018010100",
1196
+ "end_time": "2021010100",
1197
+ "output_dir": r"G:\Data\HYCOM\china_sea\hourly_24",
1164
1198
  "lon_min": 105,
1165
- "lon_max": 130,
1166
- "lat_min": 15,
1199
+ "lon_max": 135,
1200
+ "lat_min": 10,
1167
1201
  "lat_max": 45,
1168
1202
  "workers": 1,
1169
1203
  "overwrite": False,
1170
1204
  "depth": None,
1171
1205
  "level": None,
1172
- "validate_time": True,
1173
- "idm_path": r'D:\Programs\Internet Download Manager\IDMan.exe',
1174
- "interval_hours": 3,
1206
+ "validate_time": None,
1207
+ # "idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
1208
+ "interval_hours": 24,
1175
1209
  }
1176
1210
 
1177
1211
  if single_var: