oafuncs 0.0.98.4__py3-none-any.whl → 0.0.98.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/oa_down/hycom_3hourly.py +132 -130
- oafuncs/oa_down/{hycom_3hourly_20250416.py → hycom_3hourly_proxy.py} +139 -100
- oafuncs/oa_down/read_proxy.py +108 -0
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.6.dist-info}/METADATA +1 -1
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.6.dist-info}/RECORD +8 -8
- oafuncs/oa_down/hycom_3hourly_20250407.py +0 -1295
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.6.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.6.dist-info}/licenses/LICENSE.txt +0 -0
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.6.dist-info}/top_level.txt +0 -0
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -13,7 +13,9 @@ SystemInfo: Windows 11
|
|
13
13
|
Python Version: 3.12
|
14
14
|
"""
|
15
15
|
|
16
|
+
import asyncio
|
16
17
|
import datetime
|
18
|
+
import logging
|
17
19
|
import os
|
18
20
|
import random
|
19
21
|
import re
|
@@ -22,17 +24,15 @@ import warnings
|
|
22
24
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
23
25
|
from pathlib import Path
|
24
26
|
from threading import Lock
|
27
|
+
from oafuncs.oa_tool import pbar
|
25
28
|
|
29
|
+
import httpx
|
26
30
|
import matplotlib.pyplot as plt
|
27
31
|
import netCDF4 as nc
|
28
32
|
import numpy as np
|
29
33
|
import pandas as pd
|
30
|
-
import requests
|
31
|
-
from requests.adapters import HTTPAdapter
|
32
|
-
import httpx
|
33
34
|
import xarray as xr
|
34
35
|
from rich import print
|
35
|
-
from rich.progress import Progress
|
36
36
|
|
37
37
|
from oafuncs.oa_down.idm import downloader as idm_downloader
|
38
38
|
from oafuncs.oa_down.user_agent import get_ua
|
@@ -40,6 +40,9 @@ from oafuncs.oa_file import file_size
|
|
40
40
|
from oafuncs.oa_nc import check as check_nc
|
41
41
|
from oafuncs.oa_nc import modify as modify_nc
|
42
42
|
|
43
|
+
logging.getLogger("httpx").setLevel(logging.WARNING) # 关闭 httpx 的 INFO 日志,只显示 WARNING 及以上
|
44
|
+
|
45
|
+
|
43
46
|
warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
|
44
47
|
|
45
48
|
__all__ = ["draw_time_range", "download"]
|
@@ -416,11 +419,9 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
|
|
416
419
|
trange_list.append(f"{time_s}-{time_e}")
|
417
420
|
have_data = True
|
418
421
|
|
419
|
-
# 输出结果
|
420
|
-
if match_time is None:
|
421
|
-
print(f"[bold red]{time_input_str} is in the following dataset and version:")
|
422
422
|
if have_data:
|
423
423
|
if match_time is None:
|
424
|
+
print(f"[bold red]Time {time_input_str} included in:")
|
424
425
|
dv_num = 1
|
425
426
|
for d, v, trange in zip(d_list, v_list, trange_list):
|
426
427
|
print(f"{dv_num} -> [bold blue]{d} - {v} : {trange}")
|
@@ -436,7 +437,7 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
|
|
436
437
|
print(f"[bold red]{time_start} to {time_end} is in different datasets or versions, so you can't download them together")
|
437
438
|
return False
|
438
439
|
else:
|
439
|
-
print(f"[bold red]{time_input_str}
|
440
|
+
print(f"[bold red]Time {time_input_str} has no data")
|
440
441
|
return False
|
441
442
|
|
442
443
|
|
@@ -511,7 +512,8 @@ def _direct_choose_dataset_and_version(time_input, time_end=None):
|
|
511
512
|
|
512
513
|
if dataset_name_out is not None and version_name_out is not None:
|
513
514
|
if match_time is None:
|
514
|
-
print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
|
515
|
+
# print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
|
516
|
+
print(f"[bold purple]Chosen dataset: {dataset_name_out} - {version_name_out}")
|
515
517
|
|
516
518
|
# 如果没有找到匹配的数据集和版本,会返回 None
|
517
519
|
return dataset_name_out, version_name_out
|
@@ -666,140 +668,138 @@ def _correct_time(nc_file):
|
|
666
668
|
modify_nc(nc_file, "time", None, time_difference)
|
667
669
|
|
668
670
|
|
669
|
-
def
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
with
|
700
|
-
|
701
|
-
|
671
|
+
def setup_logger(level=logging.INFO):
|
672
|
+
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=level)
|
673
|
+
|
674
|
+
|
675
|
+
class _HycomDownloader:
|
676
|
+
def __init__(self, tasks, delay_range=(3, 6), timeout_factor=120, max_var_count=5, max_retries=3):
|
677
|
+
self.tasks = tasks
|
678
|
+
self.delay_range = delay_range
|
679
|
+
self.timeout_factor = timeout_factor
|
680
|
+
self.max_var_count = max_var_count
|
681
|
+
self.max_retries = max_retries
|
682
|
+
self.count = {"success": 0, "fail": 0}
|
683
|
+
setup_logger()
|
684
|
+
|
685
|
+
def user_agent(self):
|
686
|
+
return get_ua()
|
687
|
+
|
688
|
+
async def _download_one(self, url, save_path):
|
689
|
+
file_name = os.path.basename(save_path)
|
690
|
+
headers = {"User-Agent": self.user_agent()}
|
691
|
+
var_count = min(max(url.count("var="), 1), self.max_var_count)
|
692
|
+
timeout_max = self.timeout_factor * var_count
|
693
|
+
|
694
|
+
retry = 0
|
695
|
+
while retry <= self.max_retries:
|
696
|
+
timeout = random.randint(timeout_max // 2, timeout_max)
|
697
|
+
try:
|
698
|
+
await asyncio.sleep(random.uniform(*self.delay_range))
|
699
|
+
start = datetime.datetime.now()
|
700
|
+
|
701
|
+
async with httpx.AsyncClient(
|
702
|
+
timeout=httpx.Timeout(timeout),
|
703
|
+
limits=httpx.Limits(max_connections=2, max_keepalive_connections=2),
|
704
|
+
transport=httpx.AsyncHTTPTransport(retries=2),
|
705
|
+
) as client:
|
706
|
+
logging.info(f"Requesting {file_name} (Attempt {retry + 1}) ...")
|
707
|
+
response = await client.get(url, headers=headers, follow_redirects=True)
|
708
|
+
response.raise_for_status()
|
709
|
+
if not response.content:
|
710
|
+
raise ValueError("Empty response received")
|
711
|
+
|
712
|
+
logging.info(f"Downloading {file_name} ...")
|
713
|
+
with open(save_path, "wb") as f:
|
714
|
+
total = int(response.headers.get("Content-Length", 0))
|
715
|
+
downloaded = 0
|
716
|
+
last_percent = -1
|
717
|
+
|
718
|
+
async for chunk in response.aiter_bytes(32 * 1024):
|
702
719
|
f.write(chunk)
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
client = httpx.Client(limits=limits, transport=transport, timeout=None)
|
732
|
-
|
733
|
-
num_var = max(target_url.count("var="), 1)
|
734
|
-
max_timeout = 5 * 30 * num_var
|
735
|
-
timeout = random.randint(max_timeout // 2, max_timeout)
|
736
|
-
download_start = datetime.datetime.now()
|
720
|
+
downloaded += len(chunk)
|
721
|
+
|
722
|
+
if total > 0:
|
723
|
+
percent = int(downloaded * 100 / total)
|
724
|
+
if percent != last_percent:
|
725
|
+
logging.info(f"{file_name}: {percent}% ({downloaded / 1024:.1f} KB / {total / 1024:.1f} KB)")
|
726
|
+
last_percent = percent
|
727
|
+
|
728
|
+
|
729
|
+
elapsed = datetime.datetime.now() - start
|
730
|
+
# logging.info(f"File {file_name} downloaded, Time: {elapsed}")
|
731
|
+
logging.info(f"Saving {file_name}, Time: {elapsed}")
|
732
|
+
self.count["success"] += 1
|
733
|
+
count_dict["success"] += 1
|
734
|
+
return
|
735
|
+
|
736
|
+
except Exception as e:
|
737
|
+
logging.error(f"Failed ({type(e).__name__}): {e}")
|
738
|
+
if retry < self.max_retries:
|
739
|
+
backoff = 2**retry
|
740
|
+
logging.warning(f"Retrying in {backoff:.1f}s ...")
|
741
|
+
await asyncio.sleep(backoff)
|
742
|
+
retry += 1
|
743
|
+
else:
|
744
|
+
logging.error(f"Giving up on {file_name}")
|
745
|
+
self.count["fail"] += 1
|
746
|
+
count_dict["fail"] += 1
|
747
|
+
return
|
737
748
|
|
738
|
-
|
739
|
-
|
749
|
+
async def run(self):
|
750
|
+
logging.info(f"📥 Starting download of {len(self.tasks)} files ...")
|
751
|
+
for url, save_path in self.tasks:
|
752
|
+
await self._download_one(url, save_path)
|
740
753
|
|
741
|
-
|
742
|
-
|
743
|
-
response.raise_for_status()
|
744
|
-
print(f"[bold #96cbd7]Downloading {file_name} ...")
|
745
|
-
with open(fname, "wb") as f:
|
746
|
-
for chunk in response.iter_bytes(32 * 1024):
|
747
|
-
if chunk:
|
748
|
-
f.write(chunk)
|
749
|
-
elapsed = datetime.datetime.now() - download_start
|
750
|
-
print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
|
751
|
-
count_dict["success"] += 1
|
752
|
-
except Exception as e:
|
753
|
-
err_type = type(e).__name__
|
754
|
-
print(f"[bold red]Download failed for {file_name} ...\n{err_type}. Details: {e}")
|
755
|
-
print(f"[bold #ffe5c0]Target URL: \n{target_url}")
|
756
|
-
count_dict["fail"] += 1
|
757
|
-
finally:
|
758
|
-
client.close()
|
754
|
+
logging.info("✅ All tasks completed.")
|
755
|
+
logging.info(f"✔️ Success: {self.count['success']} | ❌ Fail: {self.count['fail']}")
|
759
756
|
|
760
757
|
|
761
758
|
def _download_file(target_url, store_path, file_name, cover=False):
|
762
|
-
|
759
|
+
save_path = Path(store_path) / file_name
|
763
760
|
file_name_split = file_name.split("_")
|
764
761
|
file_name_split = file_name_split[:-1]
|
765
762
|
same_file = "_".join(file_name_split) + "*nc"
|
766
763
|
|
767
764
|
if match_time is not None:
|
768
|
-
if check_nc(
|
769
|
-
if not _check_ftime(
|
765
|
+
if check_nc(save_path, print_messages=False):
|
766
|
+
if not _check_ftime(save_path, if_print=True):
|
770
767
|
if match_time:
|
771
|
-
_correct_time(
|
768
|
+
_correct_time(save_path)
|
772
769
|
count_dict["skip"] += 1
|
773
770
|
else:
|
774
|
-
_clear_existing_file(
|
771
|
+
_clear_existing_file(save_path)
|
775
772
|
count_dict["no_data"] += 1
|
776
773
|
else:
|
777
774
|
count_dict["skip"] += 1
|
778
775
|
print(f"[bold green]{file_name} is correct")
|
779
776
|
return
|
780
777
|
|
781
|
-
if not cover and os.path.exists(
|
782
|
-
print(f"[bold #FFA54F]{
|
778
|
+
if not cover and os.path.exists(save_path):
|
779
|
+
print(f"[bold #FFA54F]{save_path} exists, skipping ...")
|
783
780
|
count_dict["skip"] += 1
|
784
781
|
return
|
785
782
|
|
786
783
|
if same_file not in fsize_dict.keys():
|
787
|
-
check_nc(
|
784
|
+
check_nc(save_path, delete_if_invalid=True, print_messages=False)
|
788
785
|
|
789
|
-
get_mean_size = _get_mean_size_move(same_file,
|
786
|
+
get_mean_size = _get_mean_size_move(same_file, save_path)
|
790
787
|
|
791
|
-
if _check_existing_file(
|
788
|
+
if _check_existing_file(save_path, get_mean_size):
|
792
789
|
count_dict["skip"] += 1
|
793
790
|
return
|
794
791
|
|
795
|
-
_clear_existing_file(
|
792
|
+
_clear_existing_file(save_path)
|
796
793
|
|
797
794
|
if not use_idm:
|
798
|
-
|
795
|
+
python_downloader = _HycomDownloader([(target_url, save_path)])
|
796
|
+
asyncio.run(python_downloader.run())
|
797
|
+
time.sleep(3 + random.uniform(0, 10))
|
799
798
|
else:
|
800
799
|
idm_downloader(target_url, store_path, file_name, given_idm_engine)
|
801
|
-
idm_download_list.append(
|
802
|
-
print(f"[bold #3dfc40]File [bold #dfff73]{
|
800
|
+
idm_download_list.append(save_path)
|
801
|
+
# print(f"[bold #3dfc40]File [bold #dfff73]{save_path} [#3dfc40]has been submit to IDM for downloading")
|
802
|
+
time.sleep(3 + random.uniform(0, 10))
|
803
803
|
|
804
804
|
|
805
805
|
def _check_hour_is_valid(ymdh_str):
|
@@ -890,7 +890,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
|
|
890
890
|
else:
|
891
891
|
if download_time < "2024081012":
|
892
892
|
varlist = [_ for _ in var]
|
893
|
-
for key, value in var_group.items():
|
893
|
+
for key, value in pbar(var_group.items(), description=f"Var_group {download_time} ->", total=len(var_group), cmap="bwr", next_line=True):
|
894
894
|
current_group = []
|
895
895
|
for v in varlist:
|
896
896
|
if v in value:
|
@@ -912,7 +912,7 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
|
|
912
912
|
file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc"
|
913
913
|
_download_file(submit_url, store_path, file_name, cover)
|
914
914
|
else:
|
915
|
-
for v in var:
|
915
|
+
for v in pbar(var,description=f'Var {download_time} ->', total=len(var), cmap='bwr', next_line=True):
|
916
916
|
submit_url = _get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
|
917
917
|
file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
|
918
918
|
if download_time_end is not None:
|
@@ -946,7 +946,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
|
|
946
946
|
if num_workers is not None and num_workers > 1:
|
947
947
|
global parallel_counter
|
948
948
|
parallel_counter = 0
|
949
|
-
counter_lock = Lock()
|
949
|
+
counter_lock = Lock() # noqa: F841
|
950
950
|
if ymdh_time_s == ymdh_time_e:
|
951
951
|
_prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, cover)
|
952
952
|
elif int(ymdh_time_s) < int(ymdh_time_e):
|
@@ -954,17 +954,19 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
|
|
954
954
|
print("*" * mark_len)
|
955
955
|
print("Downloading a series of files...")
|
956
956
|
time_list = _get_time_list(ymdh_time_s, ymdh_time_e, interval_hour, "hour")
|
957
|
-
with Progress() as progress:
|
958
|
-
task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
957
|
+
# with Progress() as progress:
|
958
|
+
# task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
|
959
|
+
if num_workers is None or num_workers <= 1:
|
960
|
+
for i, time_str in pbar(enumerate(time_list), description=f"{bar_desc}", total=len(time_list), cmap='colorful_1', next_line=True):
|
961
|
+
_prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
|
962
|
+
# progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
|
963
|
+
else:
|
964
|
+
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
965
|
+
futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
|
966
|
+
""" for feature in as_completed(futures):
|
967
|
+
_done_callback(feature, progress, task, len(time_list), counter_lock) """
|
968
|
+
for _ in pbar(as_completed(futures),description=f"{bar_desc}", total=len(futures),cmap='colorful_1',next_line=True):
|
969
|
+
pass
|
968
970
|
else:
|
969
971
|
print("[bold red]Please ensure the time_s is no more than time_e")
|
970
972
|
|
@@ -1048,7 +1050,6 @@ def download(
|
|
1048
1050
|
interval_hours=3,
|
1049
1051
|
)
|
1050
1052
|
"""
|
1051
|
-
from oafuncs.oa_tool import pbar
|
1052
1053
|
|
1053
1054
|
_get_initial_data()
|
1054
1055
|
|
@@ -1109,10 +1110,10 @@ def download(
|
|
1109
1110
|
workers = 1
|
1110
1111
|
given_idm_engine = idm_path
|
1111
1112
|
idm_download_list = []
|
1112
|
-
bar_desc = "Submitting to IDM
|
1113
|
+
bar_desc = "Submitting to IDM ->"
|
1113
1114
|
else:
|
1114
1115
|
use_idm = False
|
1115
|
-
bar_desc = "Downloading
|
1116
|
+
bar_desc = "Downloading ->"
|
1116
1117
|
|
1117
1118
|
global match_time
|
1118
1119
|
match_time = validate_time
|
@@ -1124,7 +1125,7 @@ def download(
|
|
1124
1125
|
workers = 1
|
1125
1126
|
print("*" * mark_len)
|
1126
1127
|
print("[bold red]Only checking the time of existing files.")
|
1127
|
-
bar_desc = "Checking time
|
1128
|
+
bar_desc = "Checking time ->"
|
1128
1129
|
|
1129
1130
|
_download_hourly_func(
|
1130
1131
|
variables,
|
@@ -1150,7 +1151,7 @@ def download(
|
|
1150
1151
|
print("[bold #ecdbfe]*" * mark_len)
|
1151
1152
|
if idm_download_list:
|
1152
1153
|
remain_list = idm_download_list.copy()
|
1153
|
-
for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading
|
1154
|
+
for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading ->"):
|
1154
1155
|
success = False
|
1155
1156
|
while not success:
|
1156
1157
|
for f in remain_list:
|
@@ -1193,7 +1194,7 @@ if __name__ == "__main__":
|
|
1193
1194
|
options = {
|
1194
1195
|
"variables": var_list,
|
1195
1196
|
"start_time": "2018010100",
|
1196
|
-
"end_time": "
|
1197
|
+
"end_time": "2019063000",
|
1197
1198
|
"output_dir": r"G:\Data\HYCOM\china_sea\hourly_24",
|
1198
1199
|
"lon_min": 105,
|
1199
1200
|
"lon_max": 135,
|
@@ -1206,6 +1207,7 @@ if __name__ == "__main__":
|
|
1206
1207
|
"validate_time": None,
|
1207
1208
|
# "idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
|
1208
1209
|
"interval_hours": 24,
|
1210
|
+
"proxy_txt": None,
|
1209
1211
|
}
|
1210
1212
|
|
1211
1213
|
if single_var:
|