oafuncs 0.0.98.3__py3-none-any.whl → 0.0.98.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/_script/parallel.py +158 -509
- oafuncs/_script/parallel_test.py +14 -0
- oafuncs/oa_down/User_Agent-list.txt +1 -1611
- oafuncs/oa_down/hycom_3hourly.py +109 -75
- oafuncs/oa_down/hycom_3hourly_20250416.py +1191 -0
- oafuncs/oa_down/test_ua.py +27 -138
- oafuncs/oa_tool.py +118 -30
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/METADATA +2 -1
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/RECORD +12 -11
- oafuncs/_script/parallel_example_usage.py +0 -83
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/licenses/LICENSE.txt +0 -0
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/top_level.txt +0 -0
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -28,6 +28,8 @@ import netCDF4 as nc
|
|
28
28
|
import numpy as np
|
29
29
|
import pandas as pd
|
30
30
|
import requests
|
31
|
+
from requests.adapters import HTTPAdapter
|
32
|
+
import httpx
|
31
33
|
import xarray as xr
|
32
34
|
from rich import print
|
33
35
|
from rich.progress import Progress
|
@@ -419,8 +421,10 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
|
|
419
421
|
print(f"[bold red]{time_input_str} is in the following dataset and version:")
|
420
422
|
if have_data:
|
421
423
|
if match_time is None:
|
424
|
+
dv_num = 1
|
422
425
|
for d, v, trange in zip(d_list, v_list, trange_list):
|
423
|
-
print(f"[bold blue]{d} {v} {trange}")
|
426
|
+
print(f"{dv_num} -> [bold blue]{d} - {v} : {trange}")
|
427
|
+
dv_num += 1
|
424
428
|
if is_single_time:
|
425
429
|
return True
|
426
430
|
else:
|
@@ -611,7 +615,7 @@ def _get_mean_size_move(same_file, current_file):
|
|
611
615
|
size_difference_ratio = (current_file_size - fsize_dict[same_file]["mean_size"]) / fsize_dict[same_file]["mean_size"]
|
612
616
|
|
613
617
|
if abs(size_difference_ratio) > tolerance_ratio:
|
614
|
-
if check_nc(current_file,print_messages=False):
|
618
|
+
if check_nc(current_file, print_messages=False):
|
615
619
|
fsize_dict[same_file]["size_list"] = [current_file_size]
|
616
620
|
fsize_dict[same_file]["mean_size"] = current_file_size
|
617
621
|
else:
|
@@ -662,6 +666,98 @@ def _correct_time(nc_file):
|
|
662
666
|
modify_nc(nc_file, "time", None, time_difference)
|
663
667
|
|
664
668
|
|
669
|
+
def _download_within_python_requests(file_name, target_url, fname):
|
670
|
+
print(f"[bold #f0f6d0]Requesting {file_name} ...")
|
671
|
+
|
672
|
+
# Session configuration
|
673
|
+
session = requests.Session()
|
674
|
+
adapter = HTTPAdapter(pool_connections=10, pool_maxsize=10, max_retries=0)
|
675
|
+
session.mount("http://", adapter)
|
676
|
+
session.mount("https://", adapter)
|
677
|
+
|
678
|
+
# Timeout and retry config
|
679
|
+
num_var = max(target_url.count("var="), 1)
|
680
|
+
max_timeout = 5 * 30 * num_var
|
681
|
+
order_terms = ["1st", "2nd", "3rd"]
|
682
|
+
download_start = datetime.datetime.now()
|
683
|
+
max_attempts = 5
|
684
|
+
|
685
|
+
for attempt in range(max_attempts):
|
686
|
+
if attempt > 0:
|
687
|
+
retry_desc = order_terms[attempt - 1] if attempt - 1 < len(order_terms) else f"{attempt}th"
|
688
|
+
print(f"[bold #ffe5c0]Retrying the {retry_desc} time...")
|
689
|
+
time.sleep(2 + random.uniform(0, 2))
|
690
|
+
|
691
|
+
timeout = random.randint(max_timeout // 5, max_timeout)
|
692
|
+
print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
|
693
|
+
|
694
|
+
try:
|
695
|
+
headers = {"User-Agent": get_ua()}
|
696
|
+
with session.get(target_url, headers=headers, stream=True, timeout=timeout) as response:
|
697
|
+
response.raise_for_status()
|
698
|
+
print(f"[bold #96cbd7]Downloading {file_name} ...")
|
699
|
+
with open(fname, "wb") as f:
|
700
|
+
for chunk in response.iter_content(chunk_size=32 * 1024):
|
701
|
+
if chunk:
|
702
|
+
f.write(chunk)
|
703
|
+
elapsed = datetime.datetime.now() - download_start
|
704
|
+
print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
|
705
|
+
count_dict["success"] += 1
|
706
|
+
return
|
707
|
+
except Exception as e:
|
708
|
+
if hasattr(e, "response") and getattr(e.response, "status_code", None):
|
709
|
+
err_msg = f"HTTP {e.response.status_code} Error"
|
710
|
+
elif isinstance(e, requests.exceptions.Timeout):
|
711
|
+
err_msg = "Timeout Error"
|
712
|
+
elif isinstance(e, requests.exceptions.ConnectionError):
|
713
|
+
err_msg = "Connection Error"
|
714
|
+
elif isinstance(e, requests.exceptions.RequestException):
|
715
|
+
err_msg = "Request Error"
|
716
|
+
else:
|
717
|
+
err_msg = "Unexpected Error"
|
718
|
+
print(f"[bold red]Download failed for {file_name}: {err_msg}. Details: {e}")
|
719
|
+
|
720
|
+
print(f"[bold #ffe5c0]Download failed after {max_attempts} attempts. Target URL: \n{target_url}")
|
721
|
+
count_dict["fail"] += 1
|
722
|
+
|
723
|
+
|
724
|
+
|
725
|
+
def _download_within_python(file_name, target_url, fname):
|
726
|
+
print(f"[bold #f0f6d0]Requesting {file_name} ...")
|
727
|
+
|
728
|
+
# 创建 httpx 同步客户端
|
729
|
+
limits = httpx.Limits(max_connections=10, max_keepalive_connections=10)
|
730
|
+
transport = httpx.HTTPTransport(retries=3)
|
731
|
+
client = httpx.Client(limits=limits, transport=transport, timeout=None)
|
732
|
+
|
733
|
+
num_var = max(target_url.count("var="), 1)
|
734
|
+
max_timeout = 5 * 30 * num_var
|
735
|
+
timeout = random.randint(max_timeout // 2, max_timeout)
|
736
|
+
download_start = datetime.datetime.now()
|
737
|
+
|
738
|
+
print(f"[bold #ffe5c0]Timeout: {timeout} seconds")
|
739
|
+
headers = {"User-Agent": get_ua()}
|
740
|
+
|
741
|
+
try:
|
742
|
+
response = client.get(target_url, headers=headers, timeout=timeout, follow_redirects=True)
|
743
|
+
response.raise_for_status()
|
744
|
+
print(f"[bold #96cbd7]Downloading {file_name} ...")
|
745
|
+
with open(fname, "wb") as f:
|
746
|
+
for chunk in response.iter_bytes(32 * 1024):
|
747
|
+
if chunk:
|
748
|
+
f.write(chunk)
|
749
|
+
elapsed = datetime.datetime.now() - download_start
|
750
|
+
print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{elapsed}")
|
751
|
+
count_dict["success"] += 1
|
752
|
+
except Exception as e:
|
753
|
+
err_type = type(e).__name__
|
754
|
+
print(f"[bold red]Download failed for {file_name} ...\n{err_type}. Details: {e}")
|
755
|
+
print(f"[bold #ffe5c0]Target URL: \n{target_url}")
|
756
|
+
count_dict["fail"] += 1
|
757
|
+
finally:
|
758
|
+
client.close()
|
759
|
+
|
760
|
+
|
665
761
|
def _download_file(target_url, store_path, file_name, cover=False):
|
666
762
|
fname = Path(store_path) / file_name
|
667
763
|
file_name_split = file_name.split("_")
|
@@ -699,69 +795,7 @@ def _download_file(target_url, store_path, file_name, cover=False):
|
|
699
795
|
_clear_existing_file(fname)
|
700
796
|
|
701
797
|
if not use_idm:
|
702
|
-
|
703
|
-
s = requests.Session()
|
704
|
-
download_success = False
|
705
|
-
request_times = 0
|
706
|
-
|
707
|
-
def calculate_wait_time(time_str, target_url):
|
708
|
-
time_pattern = r"\d{10}"
|
709
|
-
times_in_str = re.findall(time_pattern, time_str)
|
710
|
-
num_times_str = len(times_in_str)
|
711
|
-
|
712
|
-
if num_times_str > 1:
|
713
|
-
delta_t = datetime.datetime.strptime(times_in_str[1], "%Y%m%d%H") - datetime.datetime.strptime(times_in_str[0], "%Y%m%d%H")
|
714
|
-
delta_t = delta_t.total_seconds() / 3600
|
715
|
-
delta_t = delta_t / 3 + 1
|
716
|
-
else:
|
717
|
-
delta_t = 1
|
718
|
-
num_var = int(target_url.count("var="))
|
719
|
-
if num_var <= 0:
|
720
|
-
num_var = 1
|
721
|
-
return int(delta_t * 5 * 60 * num_var)
|
722
|
-
|
723
|
-
max_timeout = calculate_wait_time(file_name, target_url)
|
724
|
-
print(f"[bold #912dbc]Max timeout: {max_timeout} seconds")
|
725
|
-
|
726
|
-
download_time_s = datetime.datetime.now()
|
727
|
-
order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
|
728
|
-
while not download_success:
|
729
|
-
if request_times >= 10:
|
730
|
-
print(f"[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later")
|
731
|
-
count_dict["fail"] += 1
|
732
|
-
break
|
733
|
-
if request_times > 0:
|
734
|
-
print(f"[bold #ffe5c0]Retrying the {order_list[request_times - 1]} time...")
|
735
|
-
try:
|
736
|
-
headers = {"User-Agent": get_ua()}
|
737
|
-
response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))
|
738
|
-
response.raise_for_status()
|
739
|
-
with open(fname, "wb") as f:
|
740
|
-
print(f"[bold #96cbd7]Downloading {file_name} ...")
|
741
|
-
for chunk in response.iter_content(chunk_size=1024):
|
742
|
-
if chunk:
|
743
|
-
f.write(chunk)
|
744
|
-
|
745
|
-
f.close()
|
746
|
-
|
747
|
-
if os.path.exists(fname):
|
748
|
-
download_success = True
|
749
|
-
download_time_e = datetime.datetime.now()
|
750
|
-
download_delta = download_time_e - download_time_s
|
751
|
-
print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
|
752
|
-
count_dict["success"] += 1
|
753
|
-
|
754
|
-
except requests.exceptions.HTTPError as errh:
|
755
|
-
print(f"Http Error: {errh}")
|
756
|
-
except requests.exceptions.ConnectionError as errc:
|
757
|
-
print(f"Error Connecting: {errc}")
|
758
|
-
except requests.exceptions.Timeout as errt:
|
759
|
-
print(f"Timeout Error: {errt}")
|
760
|
-
except requests.exceptions.RequestException as err:
|
761
|
-
print(f"OOps: Something Else: {err}")
|
762
|
-
|
763
|
-
time.sleep(3)
|
764
|
-
request_times += 1
|
798
|
+
_download_within_python(file_name, target_url, fname)
|
765
799
|
else:
|
766
800
|
idm_downloader(target_url, store_path, file_name, given_idm_engine)
|
767
801
|
idm_download_list.append(fname)
|
@@ -992,7 +1026,7 @@ def download(
|
|
992
1026
|
|
993
1027
|
Returns:
|
994
1028
|
None
|
995
|
-
|
1029
|
+
|
996
1030
|
Example:
|
997
1031
|
>>> download(
|
998
1032
|
variables='u',
|
@@ -1088,7 +1122,7 @@ def download(
|
|
1088
1122
|
|
1089
1123
|
if validate_time is not None:
|
1090
1124
|
workers = 1
|
1091
|
-
print(
|
1125
|
+
print("*" * mark_len)
|
1092
1126
|
print("[bold red]Only checking the time of existing files.")
|
1093
1127
|
bar_desc = "Checking time ..."
|
1094
1128
|
|
@@ -1158,20 +1192,20 @@ if __name__ == "__main__":
|
|
1158
1192
|
|
1159
1193
|
options = {
|
1160
1194
|
"variables": var_list,
|
1161
|
-
"start_time": "
|
1162
|
-
"end_time": "
|
1163
|
-
"output_dir": r"
|
1195
|
+
"start_time": "2018010100",
|
1196
|
+
"end_time": "2021010100",
|
1197
|
+
"output_dir": r"G:\Data\HYCOM\china_sea\hourly_24",
|
1164
1198
|
"lon_min": 105,
|
1165
|
-
"lon_max":
|
1166
|
-
"lat_min":
|
1199
|
+
"lon_max": 135,
|
1200
|
+
"lat_min": 10,
|
1167
1201
|
"lat_max": 45,
|
1168
1202
|
"workers": 1,
|
1169
1203
|
"overwrite": False,
|
1170
1204
|
"depth": None,
|
1171
1205
|
"level": None,
|
1172
|
-
"validate_time":
|
1173
|
-
"idm_path": r
|
1174
|
-
"interval_hours":
|
1206
|
+
"validate_time": None,
|
1207
|
+
# "idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
|
1208
|
+
"interval_hours": 24,
|
1175
1209
|
}
|
1176
1210
|
|
1177
1211
|
if single_var:
|