oafuncs 0.0.98.5__py3-none-any.whl → 0.0.98.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/oa_down/hycom_3hourly.py +71 -42
- oafuncs/oa_down/hycom_3hourly_proxy.py +1230 -0
- oafuncs/oa_down/read_proxy.py +108 -0
- oafuncs/oa_draw.py +65 -0
- {oafuncs-0.0.98.5.dist-info → oafuncs-0.0.98.7.dist-info}/METADATA +2 -1
- {oafuncs-0.0.98.5.dist-info → oafuncs-0.0.98.7.dist-info}/RECORD +9 -7
- {oafuncs-0.0.98.5.dist-info → oafuncs-0.0.98.7.dist-info}/WHEEL +1 -1
- {oafuncs-0.0.98.5.dist-info → oafuncs-0.0.98.7.dist-info}/licenses/LICENSE.txt +0 -0
- {oafuncs-0.0.98.5.dist-info → oafuncs-0.0.98.7.dist-info}/top_level.txt +0 -0
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -19,11 +19,11 @@ import logging
|
|
19
19
|
import os
|
20
20
|
import random
|
21
21
|
import re
|
22
|
-
import time
|
23
22
|
import warnings
|
24
23
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
25
24
|
from pathlib import Path
|
26
25
|
from threading import Lock
|
26
|
+
from oafuncs.oa_tool import pbar
|
27
27
|
|
28
28
|
import httpx
|
29
29
|
import matplotlib.pyplot as plt
|
@@ -32,7 +32,6 @@ import numpy as np
|
|
32
32
|
import pandas as pd
|
33
33
|
import xarray as xr
|
34
34
|
from rich import print
|
35
|
-
from rich.progress import Progress
|
36
35
|
|
37
36
|
from oafuncs.oa_down.idm import downloader as idm_downloader
|
38
37
|
from oafuncs.oa_down.user_agent import get_ua
|
@@ -668,17 +667,19 @@ def _correct_time(nc_file):
|
|
668
667
|
modify_nc(nc_file, "time", None, time_difference)
|
669
668
|
|
670
669
|
|
670
|
+
def setup_logger(level=logging.INFO):
|
671
|
+
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=level)
|
672
|
+
|
673
|
+
|
671
674
|
class _HycomDownloader:
|
672
675
|
def __init__(self, tasks, delay_range=(3, 6), timeout_factor=120, max_var_count=5, max_retries=3):
|
673
|
-
"""
|
674
|
-
:param tasks: List of (url, save_path)
|
675
|
-
"""
|
676
676
|
self.tasks = tasks
|
677
677
|
self.delay_range = delay_range
|
678
678
|
self.timeout_factor = timeout_factor
|
679
679
|
self.max_var_count = max_var_count
|
680
680
|
self.max_retries = max_retries
|
681
681
|
self.count = {"success": 0, "fail": 0}
|
682
|
+
setup_logger()
|
682
683
|
|
683
684
|
def user_agent(self):
|
684
685
|
return get_ua()
|
@@ -688,61 +689,84 @@ class _HycomDownloader:
|
|
688
689
|
headers = {"User-Agent": self.user_agent()}
|
689
690
|
var_count = min(max(url.count("var="), 1), self.max_var_count)
|
690
691
|
timeout_max = self.timeout_factor * var_count
|
691
|
-
timeout = random.randint(timeout_max // 2, timeout_max)
|
692
692
|
|
693
693
|
retry = 0
|
694
694
|
while retry <= self.max_retries:
|
695
|
+
timeout = random.randint(timeout_max // 2, timeout_max)
|
695
696
|
try:
|
696
697
|
await asyncio.sleep(random.uniform(*self.delay_range))
|
697
698
|
start = datetime.datetime.now()
|
699
|
+
|
698
700
|
async with httpx.AsyncClient(
|
699
701
|
timeout=httpx.Timeout(timeout),
|
700
702
|
limits=httpx.Limits(max_connections=2, max_keepalive_connections=2),
|
701
703
|
transport=httpx.AsyncHTTPTransport(retries=2),
|
702
704
|
) as client:
|
703
|
-
|
705
|
+
logging.info(f"Requesting {file_name} (Attempt {retry + 1}) ...")
|
704
706
|
response = await client.get(url, headers=headers, follow_redirects=True)
|
705
707
|
response.raise_for_status()
|
706
708
|
if not response.content:
|
707
709
|
raise ValueError("Empty response received")
|
708
710
|
|
709
|
-
|
711
|
+
logging.info(f"Downloading {file_name} ...")
|
710
712
|
with open(save_path, "wb") as f:
|
713
|
+
total = int(response.headers.get("Content-Length", 0))
|
714
|
+
downloaded = 0
|
715
|
+
last_percent = -1
|
716
|
+
|
711
717
|
async for chunk in response.aiter_bytes(32 * 1024):
|
712
718
|
f.write(chunk)
|
719
|
+
downloaded += len(chunk)
|
720
|
+
|
721
|
+
if total > 0:
|
722
|
+
percent = int(downloaded * 100 / total)
|
723
|
+
if percent != last_percent:
|
724
|
+
logging.info(f"{file_name}: {percent}% ({downloaded / 1024:.1f} KB / {total / 1024:.1f} KB)")
|
725
|
+
last_percent = percent
|
726
|
+
|
713
727
|
|
714
728
|
elapsed = datetime.datetime.now() - start
|
715
|
-
|
729
|
+
# logging.info(f"File {file_name} downloaded, Time: {elapsed}")
|
730
|
+
logging.info(f"Saving {file_name} ...")
|
731
|
+
logging.info(f"Timing {elapsed} ...")
|
716
732
|
self.count["success"] += 1
|
717
733
|
count_dict["success"] += 1
|
734
|
+
# 输出一条绿色的成功消息
|
735
|
+
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
|
736
|
+
# print(f"{timestamp} - INFO - ", end="") # Output log prefix without newline
|
737
|
+
# print("[bold #3dfc40]Success")
|
738
|
+
print(f"{timestamp} - RESULT - [bold #3dfc40]Success")
|
718
739
|
return
|
719
740
|
|
720
741
|
except Exception as e:
|
721
|
-
|
742
|
+
logging.error(f"Failed ({type(e).__name__}): {e}")
|
722
743
|
if retry < self.max_retries:
|
723
744
|
backoff = 2**retry
|
724
|
-
|
745
|
+
logging.warning(f"Retrying in {backoff:.1f}s ...")
|
725
746
|
await asyncio.sleep(backoff)
|
726
747
|
retry += 1
|
727
748
|
else:
|
728
|
-
|
749
|
+
logging.error(f"Giving up on {file_name}")
|
729
750
|
self.count["fail"] += 1
|
730
751
|
count_dict["fail"] += 1
|
752
|
+
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
|
753
|
+
# print(f"{timestamp} - ERROR - ", end="")
|
754
|
+
# print("[bold red]Failed")
|
755
|
+
print(f"{timestamp} - RESULT - [bold red]Failure")
|
731
756
|
return
|
732
757
|
|
733
758
|
async def run(self):
|
734
|
-
|
759
|
+
logging.info(f"📥 Starting download of {len(self.tasks)} files ...")
|
735
760
|
for url, save_path in self.tasks:
|
736
761
|
await self._download_one(url, save_path)
|
737
762
|
|
738
|
-
|
739
|
-
|
763
|
+
logging.info("✅ All tasks completed.")
|
764
|
+
logging.info(f"✔️ Success: {self.count['success']} | ❌ Fail: {self.count['fail']}")
|
740
765
|
|
741
766
|
|
742
767
|
def _download_file(target_url, store_path, file_name, cover=False):
|
743
768
|
save_path = Path(store_path) / file_name
|
744
|
-
file_name_split = file_name.split("_")
|
745
|
-
file_name_split = file_name_split[:-1]
|
769
|
+
file_name_split = file_name.split("_")[:-1]
|
746
770
|
same_file = "_".join(file_name_split) + "*nc"
|
747
771
|
|
748
772
|
if match_time is not None:
|
@@ -759,10 +783,12 @@ def _download_file(target_url, store_path, file_name, cover=False):
|
|
759
783
|
print(f"[bold green]{file_name} is correct")
|
760
784
|
return
|
761
785
|
|
762
|
-
if not cover and os.path.exists(save_path):
|
763
|
-
|
764
|
-
|
765
|
-
|
786
|
+
# if not cover and os.path.exists(save_path):
|
787
|
+
# print(f"[bold #FFA54F]{save_path} exists, skipping ...")
|
788
|
+
# count_dict["skip"] += 1
|
789
|
+
# return
|
790
|
+
if cover and os.path.exists(save_path):
|
791
|
+
_clear_existing_file(save_path)
|
766
792
|
|
767
793
|
if same_file not in fsize_dict.keys():
|
768
794
|
check_nc(save_path, delete_if_invalid=True, print_messages=False)
|
@@ -770,6 +796,7 @@ def _download_file(target_url, store_path, file_name, cover=False):
|
|
770
796
|
get_mean_size = _get_mean_size_move(same_file, save_path)
|
771
797
|
|
772
798
|
if _check_existing_file(save_path, get_mean_size):
|
799
|
+
print(f"[bold #FFA54F]{save_path} exists, skipping ...")
|
773
800
|
count_dict["skip"] += 1
|
774
801
|
return
|
775
802
|
|
@@ -778,12 +805,12 @@ def _download_file(target_url, store_path, file_name, cover=False):
|
|
778
805
|
if not use_idm:
|
779
806
|
python_downloader = _HycomDownloader([(target_url, save_path)])
|
780
807
|
asyncio.run(python_downloader.run())
|
781
|
-
time.sleep(3 + random.uniform(0, 10))
|
808
|
+
# time.sleep(3 + random.uniform(0, 10))
|
782
809
|
else:
|
783
810
|
idm_downloader(target_url, store_path, file_name, given_idm_engine)
|
784
811
|
idm_download_list.append(save_path)
|
785
812
|
# print(f"[bold #3dfc40]File [bold #dfff73]{save_path} [#3dfc40]has been submit to IDM for downloading")
|
786
|
-
time.sleep(3 + random.uniform(0, 10))
|
813
|
+
# time.sleep(3 + random.uniform(0, 10))
|
787
814
|
|
788
815
|
|
789
816
|
def _check_hour_is_valid(ymdh_str):
|
@@ -874,7 +901,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
|
|
874
901
|
else:
|
875
902
|
if download_time < "2024081012":
|
876
903
|
varlist = [_ for _ in var]
|
877
|
-
for key, value in var_group.items():
|
904
|
+
for key, value in pbar(var_group.items(), description=f"Var Group {download_time} ->", total=len(var_group), color="#d7feb9", next_line=True):
|
878
905
|
current_group = []
|
879
906
|
for v in varlist:
|
880
907
|
if v in value:
|
@@ -896,7 +923,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
|
|
896
923
|
file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc"
|
897
924
|
_download_file(submit_url, store_path, file_name, cover)
|
898
925
|
else:
|
899
|
-
for v in var:
|
926
|
+
for v in pbar(var, description=f"Var {download_time} ->", total=len(var), color="#d7feb9", next_line=True):
|
900
927
|
submit_url = _get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
|
901
928
|
file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
|
902
929
|
if download_time_end is not None:
|
@@ -930,7 +957,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
|
|
930
957
|
if num_workers is not None and num_workers > 1:
|
931
958
|
global parallel_counter
|
932
959
|
parallel_counter = 0
|
933
|
-
counter_lock = Lock()
|
960
|
+
counter_lock = Lock() # noqa: F841
|
934
961
|
if ymdh_time_s == ymdh_time_e:
|
935
962
|
_prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, cover)
|
936
963
|
elif int(ymdh_time_s) < int(ymdh_time_e):
|
@@ -938,17 +965,19 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
|
|
938
965
|
print("*" * mark_len)
|
939
966
|
print("Downloading a series of files...")
|
940
967
|
time_list = _get_time_list(ymdh_time_s, ymdh_time_e, interval_hour, "hour")
|
941
|
-
with Progress() as progress:
|
942
|
-
task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
968
|
+
# with Progress() as progress:
|
969
|
+
# task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
|
970
|
+
if num_workers is None or num_workers <= 1:
|
971
|
+
for i, time_str in pbar(enumerate(time_list), description=f"{bar_desc}", total=len(time_list), cmap='colorful_1', next_line=True):
|
972
|
+
_prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
|
973
|
+
# progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
|
974
|
+
else:
|
975
|
+
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
976
|
+
futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
|
977
|
+
""" for feature in as_completed(futures):
|
978
|
+
_done_callback(feature, progress, task, len(time_list), counter_lock) """
|
979
|
+
for _ in pbar(as_completed(futures),description=f"{bar_desc}", total=len(futures),cmap='colorful_1',next_line=True):
|
980
|
+
pass
|
952
981
|
else:
|
953
982
|
print("[bold red]Please ensure the time_s is no more than time_e")
|
954
983
|
|
@@ -1032,7 +1061,6 @@ def download(
|
|
1032
1061
|
interval_hours=3,
|
1033
1062
|
)
|
1034
1063
|
"""
|
1035
|
-
from oafuncs.oa_tool import pbar
|
1036
1064
|
|
1037
1065
|
_get_initial_data()
|
1038
1066
|
|
@@ -1093,10 +1121,10 @@ def download(
|
|
1093
1121
|
workers = 1
|
1094
1122
|
given_idm_engine = idm_path
|
1095
1123
|
idm_download_list = []
|
1096
|
-
bar_desc = "Submitting to IDM
|
1124
|
+
bar_desc = "Submitting to IDM ->"
|
1097
1125
|
else:
|
1098
1126
|
use_idm = False
|
1099
|
-
bar_desc = "Downloading
|
1127
|
+
bar_desc = "Downloading ->"
|
1100
1128
|
|
1101
1129
|
global match_time
|
1102
1130
|
match_time = validate_time
|
@@ -1108,7 +1136,7 @@ def download(
|
|
1108
1136
|
workers = 1
|
1109
1137
|
print("*" * mark_len)
|
1110
1138
|
print("[bold red]Only checking the time of existing files.")
|
1111
|
-
bar_desc = "Checking time
|
1139
|
+
bar_desc = "Checking time ->"
|
1112
1140
|
|
1113
1141
|
_download_hourly_func(
|
1114
1142
|
variables,
|
@@ -1134,7 +1162,7 @@ def download(
|
|
1134
1162
|
print("[bold #ecdbfe]*" * mark_len)
|
1135
1163
|
if idm_download_list:
|
1136
1164
|
remain_list = idm_download_list.copy()
|
1137
|
-
for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading
|
1165
|
+
for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading ->"):
|
1138
1166
|
success = False
|
1139
1167
|
while not success:
|
1140
1168
|
for f in remain_list:
|
@@ -1190,6 +1218,7 @@ if __name__ == "__main__":
|
|
1190
1218
|
"validate_time": None,
|
1191
1219
|
# "idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
|
1192
1220
|
"interval_hours": 24,
|
1221
|
+
"proxy_txt": None,
|
1193
1222
|
}
|
1194
1223
|
|
1195
1224
|
if single_var:
|