oafuncs 0.0.98.4__py3-none-any.whl → 0.0.98.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/oa_down/hycom_3hourly.py +93 -109
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.5.dist-info}/METADATA +1 -1
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.5.dist-info}/RECORD +6 -8
- oafuncs/oa_down/hycom_3hourly_20250407.py +0 -1295
- oafuncs/oa_down/hycom_3hourly_20250416.py +0 -1191
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.5.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.5.dist-info}/licenses/LICENSE.txt +0 -0
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.5.dist-info}/top_level.txt +0 -0
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -13,7 +13,9 @@ SystemInfo: Windows 11
|
|
13
13
|
Python Version: 3.12
|
14
14
|
"""
|
15
15
|
|
16
|
+
import asyncio
|
16
17
|
import datetime
|
18
|
+
import logging
|
17
19
|
import os
|
18
20
|
import random
|
19
21
|
import re
|
@@ -23,13 +25,11 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
23
25
|
from pathlib import Path
|
24
26
|
from threading import Lock
|
25
27
|
|
28
|
+
import httpx
|
26
29
|
import matplotlib.pyplot as plt
|
27
30
|
import netCDF4 as nc
|
28
31
|
import numpy as np
|
29
32
|
import pandas as pd
|
30
|
-
import requests
|
31
|
-
from requests.adapters import HTTPAdapter
|
32
|
-
import httpx
|
33
33
|
import xarray as xr
|
34
34
|
from rich import print
|
35
35
|
from rich.progress import Progress
|
@@ -40,6 +40,9 @@ from oafuncs.oa_file import file_size
|
|
40
40
|
from oafuncs.oa_nc import check as check_nc
|
41
41
|
from oafuncs.oa_nc import modify as modify_nc
|
42
42
|
|
43
|
+
logging.getLogger("httpx").setLevel(logging.WARNING) # 关闭 httpx 的 INFO 日志,只显示 WARNING 及以上
|
44
|
+
|
45
|
+
|
43
46
|
warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
|
44
47
|
|
45
48
|
__all__ = ["draw_time_range", "download"]
|
@@ -416,11 +419,9 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
|
|
416
419
|
trange_list.append(f"{time_s}-{time_e}")
|
417
420
|
have_data = True
|
418
421
|
|
419
|
-
# 输出结果
|
420
|
-
if match_time is None:
|
421
|
-
print(f"[bold red]{time_input_str} is in the following dataset and version:")
|
422
422
|
if have_data:
|
423
423
|
if match_time is None:
|
424
|
+
print(f"[bold red]Time {time_input_str} included in:")
|
424
425
|
dv_num = 1
|
425
426
|
for d, v, trange in zip(d_list, v_list, trange_list):
|
426
427
|
print(f"{dv_num} -> [bold blue]{d} - {v} : {trange}")
|
@@ -436,7 +437,7 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
|
|
436
437
|
print(f"[bold red]{time_start} to {time_end} is in different datasets or versions, so you can't download them together")
|
437
438
|
return False
|
438
439
|
else:
|
439
|
-
print(f"[bold red]{time_input_str}
|
440
|
+
print(f"[bold red]Time {time_input_str} has no data")
|
440
441
|
return False
|
441
442
|
|
442
443
|
|
@@ -511,7 +512,8 @@ def _direct_choose_dataset_and_version(time_input, time_end=None):
|
|
511
512
|
|
512
513
|
if dataset_name_out is not None and version_name_out is not None:
|
513
514
|
if match_time is None:
|
514
|
-
print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
|
515
|
+
# print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
|
516
|
+
print(f"[bold purple]Chosen dataset: {dataset_name_out} - {version_name_out}")
|
515
517
|
|
516
518
|
# 如果没有找到匹配的数据集和版本,会返回 None
|
517
519
|
return dataset_name_out, version_name_out
|
@@ -666,140 +668,122 @@ def _correct_time(nc_file):
|
|
666
668
|
modify_nc(nc_file, "time", None, time_difference)
|
667
669
|
|
668
670
|
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
671
|
+
class _HycomDownloader:
|
672
|
+
def __init__(self, tasks, delay_range=(3, 6), timeout_factor=120, max_var_count=5, max_retries=3):
|
673
|
+
"""
|
674
|
+
:param tasks: List of (url, save_path)
|
675
|
+
"""
|
676
|
+
self.tasks = tasks
|
677
|
+
self.delay_range = delay_range
|
678
|
+
self.timeout_factor = timeout_factor
|
679
|
+
self.max_var_count = max_var_count
|
680
|
+
self.max_retries = max_retries
|
681
|
+
self.count = {"success": 0, "fail": 0}
|
682
|
+
|
683
|
+
def user_agent(self):
|
684
|
+
return get_ua()
|
685
|
+
|
686
|
+
async def _download_one(self, url, save_path):
|
687
|
+
file_name = os.path.basename(save_path)
|
688
|
+
headers = {"User-Agent": self.user_agent()}
|
689
|
+
var_count = min(max(url.count("var="), 1), self.max_var_count)
|
690
|
+
timeout_max = self.timeout_factor * var_count
|
691
|
+
timeout = random.randint(timeout_max // 2, timeout_max)
|
692
|
+
|
693
|
+
retry = 0
|
694
|
+
while retry <= self.max_retries:
|
695
|
+
try:
|
696
|
+
await asyncio.sleep(random.uniform(*self.delay_range))
|
697
|
+
start = datetime.datetime.now()
|
698
|
+
async with httpx.AsyncClient(
|
699
|
+
timeout=httpx.Timeout(timeout),
|
700
|
+
limits=httpx.Limits(max_connections=2, max_keepalive_connections=2),
|
701
|
+
transport=httpx.AsyncHTTPTransport(retries=2),
|
702
|
+
) as client:
|
703
|
+
print(f"[bold #f0f6d0]Requesting {file_name} (Attempt {retry + 1}) ...")
|
704
|
+
response = await client.get(url, headers=headers, follow_redirects=True)
|
705
|
+
response.raise_for_status()
|
706
|
+
if not response.content:
|
707
|
+
raise ValueError("Empty response received")
|
708
|
+
|
709
|
+
print(f"[bold #96cbd7]Downloading {file_name} ...")
|
710
|
+
with open(save_path, "wb") as f:
|
711
|
+
async for chunk in response.aiter_bytes(32 * 1024):
|
702
712
|
f.write(chunk)
|
703
|
-
elapsed = datetime.datetime.now() - download_start
|
704
|
-
print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
|
705
|
-
count_dict["success"] += 1
|
706
|
-
return
|
707
|
-
except Exception as e:
|
708
|
-
if hasattr(e, "response") and getattr(e.response, "status_code", None):
|
709
|
-
err_msg = f"HTTP {e.response.status_code} Error"
|
710
|
-
elif isinstance(e, requests.exceptions.Timeout):
|
711
|
-
err_msg = "Timeout Error"
|
712
|
-
elif isinstance(e, requests.exceptions.ConnectionError):
|
713
|
-
err_msg = "Connection Error"
|
714
|
-
elif isinstance(e, requests.exceptions.RequestException):
|
715
|
-
err_msg = "Request Error"
|
716
|
-
else:
|
717
|
-
err_msg = "Unexpected Error"
|
718
|
-
print(f"[bold red]Download failed for {file_name}: {err_msg}. Details: {e}")
|
719
|
-
|
720
|
-
print(f"[bold #ffe5c0]Download failed after {max_attempts} attempts. Target URL: \n{target_url}")
|
721
|
-
count_dict["fail"] += 1
|
722
|
-
|
723
713
|
|
714
|
+
elapsed = datetime.datetime.now() - start
|
715
|
+
print(f"[#3dfc40]File [bold #dfff73]{file_name} [#3dfc40]downloaded, Time: [#39cbdd]{elapsed}")
|
716
|
+
self.count["success"] += 1
|
717
|
+
count_dict["success"] += 1
|
718
|
+
return
|
719
|
+
|
720
|
+
except Exception as e:
|
721
|
+
print(f"[bold red]Failed ({type(e).__name__}): {e}")
|
722
|
+
if retry < self.max_retries:
|
723
|
+
backoff = 2**retry
|
724
|
+
print(f"[yellow]Retrying in {backoff:.1f}s ...")
|
725
|
+
await asyncio.sleep(backoff)
|
726
|
+
retry += 1
|
727
|
+
else:
|
728
|
+
print(f"[red]Giving up on {file_name}")
|
729
|
+
self.count["fail"] += 1
|
730
|
+
count_dict["fail"] += 1
|
731
|
+
return
|
724
732
|
|
725
|
-
def
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
limits = httpx.Limits(max_connections=10, max_keepalive_connections=10)
|
730
|
-
transport = httpx.HTTPTransport(retries=3)
|
731
|
-
client = httpx.Client(limits=limits, transport=transport, timeout=None)
|
732
|
-
|
733
|
-
num_var = max(target_url.count("var="), 1)
|
734
|
-
max_timeout = 5 * 30 * num_var
|
735
|
-
timeout = random.randint(max_timeout // 2, max_timeout)
|
736
|
-
download_start = datetime.datetime.now()
|
737
|
-
|
738
|
-
print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
|
739
|
-
headers = {"User-Agent": get_ua()}
|
733
|
+
async def run(self):
|
734
|
+
print(f"📥 Starting download of {len(self.tasks)} files ...")
|
735
|
+
for url, save_path in self.tasks:
|
736
|
+
await self._download_one(url, save_path)
|
740
737
|
|
741
|
-
|
742
|
-
|
743
|
-
response.raise_for_status()
|
744
|
-
print(f"[bold #96cbd7]Downloading {file_name} ...")
|
745
|
-
with open(fname, "wb") as f:
|
746
|
-
for chunk in response.iter_bytes(32 * 1024):
|
747
|
-
if chunk:
|
748
|
-
f.write(chunk)
|
749
|
-
elapsed = datetime.datetime.now() - download_start
|
750
|
-
print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
|
751
|
-
count_dict["success"] += 1
|
752
|
-
except Exception as e:
|
753
|
-
err_type = type(e).__name__
|
754
|
-
print(f"[bold red]Download failed for {file_name} ...\n{err_type}. Details: {e}")
|
755
|
-
print(f"[bold #ffe5c0]Target URL: \n{target_url}")
|
756
|
-
count_dict["fail"] += 1
|
757
|
-
finally:
|
758
|
-
client.close()
|
738
|
+
print("\n✅ All tasks completed.")
|
739
|
+
print(f"✔️ Success: {self.count['success']} | ❌ Fail: {self.count['fail']}")
|
759
740
|
|
760
741
|
|
761
742
|
def _download_file(target_url, store_path, file_name, cover=False):
|
762
|
-
|
743
|
+
save_path = Path(store_path) / file_name
|
763
744
|
file_name_split = file_name.split("_")
|
764
745
|
file_name_split = file_name_split[:-1]
|
765
746
|
same_file = "_".join(file_name_split) + "*nc"
|
766
747
|
|
767
748
|
if match_time is not None:
|
768
|
-
if check_nc(
|
769
|
-
if not _check_ftime(
|
749
|
+
if check_nc(save_path, print_messages=False):
|
750
|
+
if not _check_ftime(save_path, if_print=True):
|
770
751
|
if match_time:
|
771
|
-
_correct_time(
|
752
|
+
_correct_time(save_path)
|
772
753
|
count_dict["skip"] += 1
|
773
754
|
else:
|
774
|
-
_clear_existing_file(
|
755
|
+
_clear_existing_file(save_path)
|
775
756
|
count_dict["no_data"] += 1
|
776
757
|
else:
|
777
758
|
count_dict["skip"] += 1
|
778
759
|
print(f"[bold green]{file_name} is correct")
|
779
760
|
return
|
780
761
|
|
781
|
-
if not cover and os.path.exists(
|
782
|
-
print(f"[bold #FFA54F]{
|
762
|
+
if not cover and os.path.exists(save_path):
|
763
|
+
print(f"[bold #FFA54F]{save_path} exists, skipping ...")
|
783
764
|
count_dict["skip"] += 1
|
784
765
|
return
|
785
766
|
|
786
767
|
if same_file not in fsize_dict.keys():
|
787
|
-
check_nc(
|
768
|
+
check_nc(save_path, delete_if_invalid=True, print_messages=False)
|
788
769
|
|
789
|
-
get_mean_size = _get_mean_size_move(same_file,
|
770
|
+
get_mean_size = _get_mean_size_move(same_file, save_path)
|
790
771
|
|
791
|
-
if _check_existing_file(
|
772
|
+
if _check_existing_file(save_path, get_mean_size):
|
792
773
|
count_dict["skip"] += 1
|
793
774
|
return
|
794
775
|
|
795
|
-
_clear_existing_file(
|
776
|
+
_clear_existing_file(save_path)
|
796
777
|
|
797
778
|
if not use_idm:
|
798
|
-
|
779
|
+
python_downloader = _HycomDownloader([(target_url, save_path)])
|
780
|
+
asyncio.run(python_downloader.run())
|
781
|
+
time.sleep(3 + random.uniform(0, 10))
|
799
782
|
else:
|
800
783
|
idm_downloader(target_url, store_path, file_name, given_idm_engine)
|
801
|
-
idm_download_list.append(
|
802
|
-
print(f"[bold #3dfc40]File [bold #dfff73]{
|
784
|
+
idm_download_list.append(save_path)
|
785
|
+
# print(f"[bold #3dfc40]File [bold #dfff73]{save_path} [#3dfc40]has been submit to IDM for downloading")
|
786
|
+
time.sleep(3 + random.uniform(0, 10))
|
803
787
|
|
804
788
|
|
805
789
|
def _check_hour_is_valid(ymdh_str):
|
@@ -1193,7 +1177,7 @@ if __name__ == "__main__":
|
|
1193
1177
|
options = {
|
1194
1178
|
"variables": var_list,
|
1195
1179
|
"start_time": "2018010100",
|
1196
|
-
"end_time": "
|
1180
|
+
"end_time": "2019063000",
|
1197
1181
|
"output_dir": r"G:\Data\HYCOM\china_sea\hourly_24",
|
1198
1182
|
"lon_min": 105,
|
1199
1183
|
"lon_max": 135,
|
@@ -21,9 +21,7 @@ oafuncs/_script/plot_dataset.py,sha256=zkSEnO_-biyagorwWXPoihts_cwuvripzEt-l9bHJ
|
|
21
21
|
oafuncs/_script/replace_file_content.py,sha256=eCFZjnZcwyRvy6b4mmIfBna-kylSZTyJRfgXd6DdCjk,5982
|
22
22
|
oafuncs/oa_down/User_Agent-list.txt,sha256=pHaMlElMvZ8TG4vf4BqkZYKqe0JIGkr4kCN0lM1Y9FQ,514295
|
23
23
|
oafuncs/oa_down/__init__.py,sha256=kRX5eTUCbAiz3zTaQM1501paOYS_3fizDN4Pa0mtNUA,585
|
24
|
-
oafuncs/oa_down/hycom_3hourly.py,sha256=
|
25
|
-
oafuncs/oa_down/hycom_3hourly_20250407.py,sha256=DQd_NmQgmSqu7jsrfpDB7k23mkUEy9kyWs-dLUg7GDw,64472
|
26
|
-
oafuncs/oa_down/hycom_3hourly_20250416.py,sha256=X_fcV_xeJtYD-PB3GRdFWNHMPOVUYgh17MgWOtrdIbc,53493
|
24
|
+
oafuncs/oa_down/hycom_3hourly.py,sha256=ERH24OOT62IiOSk5IiSnqad_c13zL7-uD5lAMdbJDiQ,53576
|
27
25
|
oafuncs/oa_down/idm.py,sha256=4z5IvgfTyIKEI1kOtqXZwN7Jnfjwp6qDBOIoVyOLp0I,1823
|
28
26
|
oafuncs/oa_down/literature.py,sha256=2bF9gSKQbzcci9LcKE81j8JEjIJwON7jbwQB3gDDA3E,11331
|
29
27
|
oafuncs/oa_down/test_ua.py,sha256=l8MCD6yU2W75zRPTDKUZTJhCWNF9lfk-MiSFqAqKH1M,1398
|
@@ -37,8 +35,8 @@ oafuncs/oa_sign/__init__.py,sha256=QKqTFrJDFK40C5uvk48GlRRbGFzO40rgkYwu6dYxatM,5
|
|
37
35
|
oafuncs/oa_sign/meteorological.py,sha256=8091SHo2L8kl4dCFmmSH5NGVHDku5i5lSiLEG5DLnOQ,6489
|
38
36
|
oafuncs/oa_sign/ocean.py,sha256=xrW-rWD7xBWsB5PuCyEwQ1Q_RDKq2KCLz-LOONHgldU,5932
|
39
37
|
oafuncs/oa_sign/scientific.py,sha256=a4JxOBgm9vzNZKpJ_GQIQf7cokkraV5nh23HGbmTYKw,5064
|
40
|
-
oafuncs-0.0.98.
|
41
|
-
oafuncs-0.0.98.
|
42
|
-
oafuncs-0.0.98.
|
43
|
-
oafuncs-0.0.98.
|
44
|
-
oafuncs-0.0.98.
|
38
|
+
oafuncs-0.0.98.5.dist-info/licenses/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
|
39
|
+
oafuncs-0.0.98.5.dist-info/METADATA,sha256=YwSyTAse2NhMIfM_22BNOJCDLh8Mgy3X_G5zvtCKyPc,4242
|
40
|
+
oafuncs-0.0.98.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
41
|
+
oafuncs-0.0.98.5.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
|
42
|
+
oafuncs-0.0.98.5.dist-info/RECORD,,
|