oafuncs 0.0.98.5__py3-none-any.whl → 0.0.98.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/oa_down/hycom_3hourly.py +51 -33
- oafuncs/oa_down/hycom_3hourly_proxy.py +1230 -0
- oafuncs/oa_down/read_proxy.py +108 -0
- {oafuncs-0.0.98.5.dist-info → oafuncs-0.0.98.6.dist-info}/METADATA +1 -1
- {oafuncs-0.0.98.5.dist-info → oafuncs-0.0.98.6.dist-info}/RECORD +8 -6
- {oafuncs-0.0.98.5.dist-info → oafuncs-0.0.98.6.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.98.5.dist-info → oafuncs-0.0.98.6.dist-info}/licenses/LICENSE.txt +0 -0
- {oafuncs-0.0.98.5.dist-info → oafuncs-0.0.98.6.dist-info}/top_level.txt +0 -0
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -24,6 +24,7 @@ import warnings
|
|
24
24
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
25
25
|
from pathlib import Path
|
26
26
|
from threading import Lock
|
27
|
+
from oafuncs.oa_tool import pbar
|
27
28
|
|
28
29
|
import httpx
|
29
30
|
import matplotlib.pyplot as plt
|
@@ -32,7 +33,6 @@ import numpy as np
|
|
32
33
|
import pandas as pd
|
33
34
|
import xarray as xr
|
34
35
|
from rich import print
|
35
|
-
from rich.progress import Progress
|
36
36
|
|
37
37
|
from oafuncs.oa_down.idm import downloader as idm_downloader
|
38
38
|
from oafuncs.oa_down.user_agent import get_ua
|
@@ -668,17 +668,19 @@ def _correct_time(nc_file):
|
|
668
668
|
modify_nc(nc_file, "time", None, time_difference)
|
669
669
|
|
670
670
|
|
671
|
+
def setup_logger(level=logging.INFO):
|
672
|
+
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=level)
|
673
|
+
|
674
|
+
|
671
675
|
class _HycomDownloader:
|
672
676
|
def __init__(self, tasks, delay_range=(3, 6), timeout_factor=120, max_var_count=5, max_retries=3):
|
673
|
-
"""
|
674
|
-
:param tasks: List of (url, save_path)
|
675
|
-
"""
|
676
677
|
self.tasks = tasks
|
677
678
|
self.delay_range = delay_range
|
678
679
|
self.timeout_factor = timeout_factor
|
679
680
|
self.max_var_count = max_var_count
|
680
681
|
self.max_retries = max_retries
|
681
682
|
self.count = {"success": 0, "fail": 0}
|
683
|
+
setup_logger()
|
682
684
|
|
683
685
|
def user_agent(self):
|
684
686
|
return get_ua()
|
@@ -688,55 +690,69 @@ class _HycomDownloader:
|
|
688
690
|
headers = {"User-Agent": self.user_agent()}
|
689
691
|
var_count = min(max(url.count("var="), 1), self.max_var_count)
|
690
692
|
timeout_max = self.timeout_factor * var_count
|
691
|
-
timeout = random.randint(timeout_max // 2, timeout_max)
|
692
693
|
|
693
694
|
retry = 0
|
694
695
|
while retry <= self.max_retries:
|
696
|
+
timeout = random.randint(timeout_max // 2, timeout_max)
|
695
697
|
try:
|
696
698
|
await asyncio.sleep(random.uniform(*self.delay_range))
|
697
699
|
start = datetime.datetime.now()
|
700
|
+
|
698
701
|
async with httpx.AsyncClient(
|
699
702
|
timeout=httpx.Timeout(timeout),
|
700
703
|
limits=httpx.Limits(max_connections=2, max_keepalive_connections=2),
|
701
704
|
transport=httpx.AsyncHTTPTransport(retries=2),
|
702
705
|
) as client:
|
703
|
-
|
706
|
+
logging.info(f"Requesting {file_name} (Attempt {retry + 1}) ...")
|
704
707
|
response = await client.get(url, headers=headers, follow_redirects=True)
|
705
708
|
response.raise_for_status()
|
706
709
|
if not response.content:
|
707
710
|
raise ValueError("Empty response received")
|
708
711
|
|
709
|
-
|
712
|
+
logging.info(f"Downloading {file_name} ...")
|
710
713
|
with open(save_path, "wb") as f:
|
714
|
+
total = int(response.headers.get("Content-Length", 0))
|
715
|
+
downloaded = 0
|
716
|
+
last_percent = -1
|
717
|
+
|
711
718
|
async for chunk in response.aiter_bytes(32 * 1024):
|
712
719
|
f.write(chunk)
|
720
|
+
downloaded += len(chunk)
|
721
|
+
|
722
|
+
if total > 0:
|
723
|
+
percent = int(downloaded * 100 / total)
|
724
|
+
if percent != last_percent:
|
725
|
+
logging.info(f"{file_name}: {percent}% ({downloaded / 1024:.1f} KB / {total / 1024:.1f} KB)")
|
726
|
+
last_percent = percent
|
727
|
+
|
713
728
|
|
714
729
|
elapsed = datetime.datetime.now() - start
|
715
|
-
|
730
|
+
# logging.info(f"File {file_name} downloaded, Time: {elapsed}")
|
731
|
+
logging.info(f"Saving {file_name}, Time: {elapsed}")
|
716
732
|
self.count["success"] += 1
|
717
733
|
count_dict["success"] += 1
|
718
734
|
return
|
719
735
|
|
720
736
|
except Exception as e:
|
721
|
-
|
737
|
+
logging.error(f"Failed ({type(e).__name__}): {e}")
|
722
738
|
if retry < self.max_retries:
|
723
739
|
backoff = 2**retry
|
724
|
-
|
740
|
+
logging.warning(f"Retrying in {backoff:.1f}s ...")
|
725
741
|
await asyncio.sleep(backoff)
|
726
742
|
retry += 1
|
727
743
|
else:
|
728
|
-
|
744
|
+
logging.error(f"Giving up on {file_name}")
|
729
745
|
self.count["fail"] += 1
|
730
746
|
count_dict["fail"] += 1
|
731
747
|
return
|
732
748
|
|
733
749
|
async def run(self):
|
734
|
-
|
750
|
+
logging.info(f"📥 Starting download of {len(self.tasks)} files ...")
|
735
751
|
for url, save_path in self.tasks:
|
736
752
|
await self._download_one(url, save_path)
|
737
753
|
|
738
|
-
|
739
|
-
|
754
|
+
logging.info("✅ All tasks completed.")
|
755
|
+
logging.info(f"✔️ Success: {self.count['success']} | ❌ Fail: {self.count['fail']}")
|
740
756
|
|
741
757
|
|
742
758
|
def _download_file(target_url, store_path, file_name, cover=False):
|
@@ -874,7 +890,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
|
|
874
890
|
else:
|
875
891
|
if download_time < "2024081012":
|
876
892
|
varlist = [_ for _ in var]
|
877
|
-
for key, value in var_group.items():
|
893
|
+
for key, value in pbar(var_group.items(), description=f"Var_group {download_time} ->", total=len(var_group), cmap="bwr", next_line=True):
|
878
894
|
current_group = []
|
879
895
|
for v in varlist:
|
880
896
|
if v in value:
|
@@ -896,7 +912,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
|
|
896
912
|
file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc"
|
897
913
|
_download_file(submit_url, store_path, file_name, cover)
|
898
914
|
else:
|
899
|
-
for v in var:
|
915
|
+
for v in pbar(var,description=f'Var {download_time} ->', total=len(var), cmap='bwr', next_line=True):
|
900
916
|
submit_url = _get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
|
901
917
|
file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
|
902
918
|
if download_time_end is not None:
|
@@ -930,7 +946,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
|
|
930
946
|
if num_workers is not None and num_workers > 1:
|
931
947
|
global parallel_counter
|
932
948
|
parallel_counter = 0
|
933
|
-
counter_lock = Lock()
|
949
|
+
counter_lock = Lock() # noqa: F841
|
934
950
|
if ymdh_time_s == ymdh_time_e:
|
935
951
|
_prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, cover)
|
936
952
|
elif int(ymdh_time_s) < int(ymdh_time_e):
|
@@ -938,17 +954,19 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
|
|
938
954
|
print("*" * mark_len)
|
939
955
|
print("Downloading a series of files...")
|
940
956
|
time_list = _get_time_list(ymdh_time_s, ymdh_time_e, interval_hour, "hour")
|
941
|
-
with Progress() as progress:
|
942
|
-
task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
957
|
+
# with Progress() as progress:
|
958
|
+
# task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
|
959
|
+
if num_workers is None or num_workers <= 1:
|
960
|
+
for i, time_str in pbar(enumerate(time_list), description=f"{bar_desc}", total=len(time_list), cmap='colorful_1', next_line=True):
|
961
|
+
_prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
|
962
|
+
# progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
|
963
|
+
else:
|
964
|
+
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
965
|
+
futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
|
966
|
+
""" for feature in as_completed(futures):
|
967
|
+
_done_callback(feature, progress, task, len(time_list), counter_lock) """
|
968
|
+
for _ in pbar(as_completed(futures),description=f"{bar_desc}", total=len(futures),cmap='colorful_1',next_line=True):
|
969
|
+
pass
|
952
970
|
else:
|
953
971
|
print("[bold red]Please ensure the time_s is no more than time_e")
|
954
972
|
|
@@ -1032,7 +1050,6 @@ def download(
|
|
1032
1050
|
interval_hours=3,
|
1033
1051
|
)
|
1034
1052
|
"""
|
1035
|
-
from oafuncs.oa_tool import pbar
|
1036
1053
|
|
1037
1054
|
_get_initial_data()
|
1038
1055
|
|
@@ -1093,10 +1110,10 @@ def download(
|
|
1093
1110
|
workers = 1
|
1094
1111
|
given_idm_engine = idm_path
|
1095
1112
|
idm_download_list = []
|
1096
|
-
bar_desc = "Submitting to IDM
|
1113
|
+
bar_desc = "Submitting to IDM ->"
|
1097
1114
|
else:
|
1098
1115
|
use_idm = False
|
1099
|
-
bar_desc = "Downloading
|
1116
|
+
bar_desc = "Downloading ->"
|
1100
1117
|
|
1101
1118
|
global match_time
|
1102
1119
|
match_time = validate_time
|
@@ -1108,7 +1125,7 @@ def download(
|
|
1108
1125
|
workers = 1
|
1109
1126
|
print("*" * mark_len)
|
1110
1127
|
print("[bold red]Only checking the time of existing files.")
|
1111
|
-
bar_desc = "Checking time
|
1128
|
+
bar_desc = "Checking time ->"
|
1112
1129
|
|
1113
1130
|
_download_hourly_func(
|
1114
1131
|
variables,
|
@@ -1134,7 +1151,7 @@ def download(
|
|
1134
1151
|
print("[bold #ecdbfe]*" * mark_len)
|
1135
1152
|
if idm_download_list:
|
1136
1153
|
remain_list = idm_download_list.copy()
|
1137
|
-
for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading
|
1154
|
+
for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading ->"):
|
1138
1155
|
success = False
|
1139
1156
|
while not success:
|
1140
1157
|
for f in remain_list:
|
@@ -1190,6 +1207,7 @@ if __name__ == "__main__":
|
|
1190
1207
|
"validate_time": None,
|
1191
1208
|
# "idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
|
1192
1209
|
"interval_hours": 24,
|
1210
|
+
"proxy_txt": None,
|
1193
1211
|
}
|
1194
1212
|
|
1195
1213
|
if single_var:
|