PyPI - oafuncs - Versions diffs - 0.0.91__tar.gz → 0.0.92__tar.gz - Mend

oafuncs 0.0.91tar.gz → 0.0.92tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{oafuncs-0.0.91/oafuncs.egg-info → oafuncs-0.0.92}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: oafuncs
-Version: 0.0.91
+Version: 0.0.92
 Summary: Oceanic and Atmospheric Functions
 Home-page: https://github.com/Industry-Pays/OAFuncs
 Author: Kun Liu
@@ -30,6 +30,16 @@ Requires-Dist: matplotlib
 Requires-Dist: Cartopy
 Requires-Dist: netCDF4
 Requires-Dist: xlrd
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
 # oafuncs

{oafuncs-0.0.91 → oafuncs-0.0.92}/oafuncs/oa_down/hycom_3hourly.py RENAMED Viewed

@@ -26,13 +26,17 @@ from threading import Lock
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+import xarray as xr
 import requests
 from rich import print
 from rich.progress import Progress
+import netCDF4 as nc
 from oafuncs.oa_down.user_agent import get_ua
 from oafuncs.oa_file import file_size, mean_size
 from oafuncs.oa_nc import check as check_nc
+from oafuncs.oa_nc import modify as modify_nc
+from oafuncs.oa_down.idm import downloader as idm_downloader
 warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
@@ -571,12 +575,18 @@ def _check_existing_file(file_full_path, avg_size):
         if abs(delta_size_ratio) > 0.025:
             if check_nc(file_full_path):
                 # print(f"File size is abnormal but can be opened normally, file size: {fsize:.2f} KB")
-                return True
+                if not _check_ftime(file_full_path,if_print=True):
+                    return False
+                else:
+                    return True
             else:
                 print(f"File size is abnormal and cannot be opened, {file_full_path}: {fsize:.2f} KB")
                 return False
         else:
-            return True
+            if not _check_ftime(file_full_path,if_print=True):
+                return False
+            else:
+                return True
     else:
         return False
@@ -633,6 +643,62 @@ def _get_mean_size_move(same_file, current_file):
     return fsize_dict[same_file]["mean_size"]
+def _check_ftime(nc_file, tname="time", if_print=False):
+    if not os.path.exists(nc_file):
+        return False
+    nc_file = str(nc_file)
+    try:
+        ds = xr.open_dataset(nc_file)
+        real_time = ds[tname].values[0]
+        ds.close()
+        real_time = str(real_time)[:13]
+        real_time = real_time.replace("-", "").replace("T", "")
+        # -----------------------------------------------------
+        f_time = re.findall(r"\d{10}", nc_file)[0]
+        if real_time == f_time:
+            return True
+        else:
+            if if_print:
+                print(f"[bold #daff5c]File time error, file/real time: [bold blue]{f_time}/{real_time}")
+            return False
+    except Exception as e:
+        if if_print:
+            print(f"[bold #daff5c]File time check failed, {nc_file}: {e}")
+        return False
+def _correct_time(nc_file):
+    # 打开NC文件
+    dataset = nc.Dataset(nc_file)
+    # 读取时间单位
+    time_units = dataset.variables["time"].units
+    # 关闭文件
+    dataset.close()
+    # 解析时间单位字符串以获取时间原点
+    origin_str = time_units.split("since")[1].strip()
+    origin_datetime = datetime.datetime.strptime(origin_str, "%Y-%m-%d %H:%M:%S")
+    # 从文件名中提取日期字符串
+    given_date_str = re.findall(r"\d{10}", str(nc_file))[0]
+    # 将提取的日期字符串转换为datetime对象
+    given_datetime = datetime.datetime.strptime(given_date_str, "%Y%m%d%H")
+    # 计算给定日期与时间原点之间的差值（以小时为单位）
+    time_difference = (given_datetime - origin_datetime).total_seconds()
+    if "hours" in time_units:
+        time_difference /= 3600
+    elif "days" in time_units:
+        time_difference /= 3600 * 24
+    # 修改NC文件中的时间变量
+    modify_nc(nc_file, "time", None, time_difference)
 def _download_file(target_url, store_path, file_name, check=False):
     # Check if the file exists
     fname = Path(store_path) / file_name
@@ -641,7 +707,7 @@ def _download_file(target_url, store_path, file_name, check=False):
     # same_file = f"{file_name_split[0]}_{file_name_split[1]}*nc"
     same_file = "_".join(file_name_split) + "*nc"
-    if check:
+    if check:
         if same_file not in fsize_dict.keys(): # 对第一个文件单独进行检查，因为没有大小可以对比
             check_nc(fname,if_delete=True)
@@ -653,95 +719,103 @@ def _download_file(target_url, store_path, file_name, check=False):
             return
     _clear_existing_file(fname)
-    # -----------------------------------------------
-    print(f"[bold #f0f6d0]Requesting {file_name}...")
-    # 创建会话
-    s = requests.Session()
-    download_success = False
-    request_times = 0
+    if not use_idm:
+        # -----------------------------------------------
+        print(f"[bold #f0f6d0]Requesting {file_name} ...")
+        # 创建会话
+        s = requests.Session()
+        download_success = False
+        request_times = 0
-    def calculate_wait_time(time_str, target_url):
-        # 定义正则表达式，匹配YYYYMMDDHH格式的时间
-        time_pattern = r"\d{10}"
+        def calculate_wait_time(time_str, target_url):
+            # 定义正则表达式，匹配YYYYMMDDHH格式的时间
+            time_pattern = r"\d{10}"
-        # 定义两个字符串
-        # str1 = 'HYCOM_water_u_2018010100-2018010112.nc'
-        # str2 = 'HYCOM_water_u_2018010100.nc'
+            # 定义两个字符串
+            # str1 = 'HYCOM_water_u_2018010100-2018010112.nc'
+            # str2 = 'HYCOM_water_u_2018010100.nc'
-        # 使用正则表达式查找时间
-        times_in_str = re.findall(time_pattern, time_str)
+            # 使用正则表达式查找时间
+            times_in_str = re.findall(time_pattern, time_str)
-        # 计算每个字符串中的时间数量
-        num_times_str = len(times_in_str)
+            # 计算每个字符串中的时间数量
+            num_times_str = len(times_in_str)
-        if num_times_str > 1:
-            delta_t = datetime.datetime.strptime(times_in_str[1], "%Y%m%d%H") - datetime.datetime.strptime(times_in_str[0], "%Y%m%d%H")
-            delta_t = delta_t.total_seconds() / 3600
-            delta_t = delta_t / 3 + 1
-        else:
-            delta_t = 1
-        # 单个要素最多等待5分钟，不宜太短，太短可能请求失败；也不宜太长，太长可能会浪费时间
-        num_var = int(target_url.count("var="))
-        if num_var <= 0:
-            num_var = 1
-        return int(delta_t * 5 * 60 * num_var)
-    max_timeout = calculate_wait_time(file_name, target_url)
-    print(f"[bold #912dbc]Max timeout: {max_timeout} seconds")
-    # print(f'Download_start_time: {datetime.datetime.now()}')
-    download_time_s = datetime.datetime.now()
-    order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
-    while not download_success:
-        if request_times >= 10:
-            # print(f'下载失败，已重试 {request_times} 次\n可先跳过，后续再试')
-            print(f"[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later")
-            count_dict["fail"] += 1
-            break
-        if request_times > 0:
-            # print(f'\r正在重试第 {request_times} 次', end="")
-            print(f"[bold #ffe5c0]Retrying the {order_list[request_times-1]} time...")
-        # 尝试下载文件
-        try:
-            headers = {"User-Agent": get_ua()}
-            """ response = s.get(target_url, headers=headers, timeout=random.randint(5, max_timeout))
-            response.raise_for_status()  # 如果请求返回的不是200，将抛出HTTPError异常
-            # 保存文件
-            with open(filename, 'wb') as f:
-                f.write(response.content) """
-            response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))  # 启用流式传输
-            response.raise_for_status()  # 如果请求返回的不是200，将抛出HTTPError异常
-            # 保存文件
-            with open(fname, "wb") as f:
-                print(f"[bold #96cbd7]Downloading {file_name}...")
-                for chunk in response.iter_content(chunk_size=1024):
-                    if chunk:
-                        f.write(chunk)
-            f.close()
-            # print(f'\r文件 {fname} 下载成功', end="")
-            if os.path.exists(fname):
-                download_success = True
-                download_time_e = datetime.datetime.now()
-                download_delta = download_time_e - download_time_s
-                print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
-                count_dict["success"] += 1
-                # print(f'Download_end_time: {datetime.datetime.now()}')
-        except requests.exceptions.HTTPError as errh:
-            print(f"Http Error: {errh}")
-        except requests.exceptions.ConnectionError as errc:
-            print(f"Error Connecting: {errc}")
-        except requests.exceptions.Timeout as errt:
-            print(f"Timeout Error: {errt}")
-        except requests.exceptions.RequestException as err:
-            print(f"OOps: Something Else: {err}")
-        time.sleep(3)
-        request_times += 1
+            if num_times_str > 1:
+                delta_t = datetime.datetime.strptime(times_in_str[1], "%Y%m%d%H") - datetime.datetime.strptime(times_in_str[0], "%Y%m%d%H")
+                delta_t = delta_t.total_seconds() / 3600
+                delta_t = delta_t / 3 + 1
+            else:
+                delta_t = 1
+            # 单个要素最多等待5分钟，不宜太短，太短可能请求失败；也不宜太长，太长可能会浪费时间
+            num_var = int(target_url.count("var="))
+            if num_var <= 0:
+                num_var = 1
+            return int(delta_t * 5 * 60 * num_var)
+        max_timeout = calculate_wait_time(file_name, target_url)
+        print(f"[bold #912dbc]Max timeout: {max_timeout} seconds")
+        # print(f'Download_start_time: {datetime.datetime.now()}')
+        download_time_s = datetime.datetime.now()
+        order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
+        while not download_success:
+            if request_times >= 10:
+                # print(f'下载失败，已重试 {request_times} 次\n可先跳过，后续再试')
+                print(f"[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later")
+                count_dict["fail"] += 1
+                break
+            if request_times > 0:
+                # print(f'\r正在重试第 {request_times} 次', end="")
+                print(f"[bold #ffe5c0]Retrying the {order_list[request_times-1]} time...")
+            # 尝试下载文件
+            try:
+                headers = {"User-Agent": get_ua()}
+                """ response = s.get(target_url, headers=headers, timeout=random.randint(5, max_timeout))
+                response.raise_for_status()  # 如果请求返回的不是200，将抛出HTTPError异常
+                # 保存文件
+                with open(filename, 'wb') as f:
+                    f.write(response.content) """
+                response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))  # 启用流式传输
+                response.raise_for_status()  # 如果请求返回的不是200，将抛出HTTPError异常
+                # 保存文件
+                with open(fname, "wb") as f:
+                    print(f"[bold #96cbd7]Downloading {file_name} ...")
+                    for chunk in response.iter_content(chunk_size=1024):
+                        if chunk:
+                            f.write(chunk)
+                f.close()
+                if not _check_ftime(fname):
+                    _correct_time(fname)
+                # print(f'\r文件 {fname} 下载成功', end="")
+                if os.path.exists(fname):
+                    download_success = True
+                    download_time_e = datetime.datetime.now()
+                    download_delta = download_time_e - download_time_s
+                    print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
+                    count_dict["success"] += 1
+                    # print(f'Download_end_time: {datetime.datetime.now()}')
+            except requests.exceptions.HTTPError as errh:
+                print(f"Http Error: {errh}")
+            except requests.exceptions.ConnectionError as errc:
+                print(f"Error Connecting: {errc}")
+            except requests.exceptions.Timeout as errt:
+                print(f"Timeout Error: {errt}")
+            except requests.exceptions.RequestException as err:
+                print(f"OOps: Something Else: {err}")
+            time.sleep(3)
+            request_times += 1
+    else:
+        idm_downloader(target_url, store_path, file_name, given_idm_engine)
+        idm_download_list.append(fname)
+        print(f"[bold #3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been submit to IDM for downloading")
 def _check_hour_is_valid(ymdh_str):
@@ -980,7 +1054,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
         print("Please ensure the time_s is no more than time_e")
-def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
+def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1, idm_engine=None):
     """
     Description:
         Download the data of single time or a series of time
@@ -1001,6 +1075,7 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
         num_workers: int, the number of workers, default is None, if not set, the number of workers will be 1; suggest not to set the number of workers too large
         check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
         ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
+        idm_engine: str, the IDM engine, default is None, if set, the IDM will be used to download the data; example: "D:\\Programs\\Internet Download Manager\\IDMan.exe"
     Returns:
         None
@@ -1071,8 +1146,34 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
     global fsize_dict_lock
     fsize_dict_lock = Lock()
+    global use_idm, given_idm_engine, idm_download_list
+    if idm_engine is not None:
+        use_idm = True
+        given_idm_engine = idm_engine
+        idm_download_list = []
+    else:
+        use_idm = False
     _download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
+    if idm_download_list:
+        for f in idm_download_list:
+            wait_success = 0
+            success = False
+            while not success:
+                if check_nc(f):
+                    _correct_time(f)
+                    success = True
+                    count_dict["success"] += 1
+                else:
+                    wait_success += 1
+                    time.sleep(3)
+                    if wait_success >= 20:
+                        success = True
+                        # print(f'{f} download failed')
+                        count_dict["fail"] += 1
     count_dict["total"] = count_dict["success"] + count_dict["fail"] + count_dict["skip"] + count_dict["no_data"]
@@ -1140,9 +1241,9 @@ def how_to_use():
 if __name__ == "__main__":
-    time_s, time_e = "2024101012", "2024101018"
+    time_s, time_e = "2018010800", "2024083121"
     merge_name = f"{time_s}_{time_e}"  # 合并后的文件名
-    root_path = r"G:\Data\HYCOM\3hourly_test"
+    root_path = r"G:\Data\HYCOM\3hourly"
     location_dict = {"west": 105, "east": 130, "south": 15, "north": 45}
     download_dict = {
         "water_u": {"simple_name": "u", "download": 1},
@@ -1162,10 +1263,11 @@ if __name__ == "__main__":
     # if you wanna download all depth or level, set both False
     depth = None  # or 0-5000 meters
     level = None  # or 1-40 levels
-    num_workers = 3
+    num_workers = 1
     check = True
     ftimes = 1
+    idm_engine = r"D:\Programs\Internet Download Manager\IDMan.exe"
     download_switch, single_var = True, False
     combine_switch = False
@@ -1176,9 +1278,9 @@ if __name__ == "__main__":
     if download_switch:
         if single_var:
             for var_name in var_list:
-                download(var=var_name, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict["west"], lon_max=location_dict["east"], lat_min=location_dict["south"], lat_max=location_dict["north"], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes)
+                download(var=var_name, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict["west"], lon_max=location_dict["east"], lat_min=location_dict["south"], lat_max=location_dict["north"], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes, idm_engine=idm_engine)
         else:
-            download(var=var_list, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict["west"], lon_max=location_dict["east"], lat_min=location_dict["south"], lat_max=location_dict["north"], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes)
+            download(var=var_list, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict["west"], lon_max=location_dict["east"], lat_min=location_dict["south"], lat_max=location_dict["north"], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes, idm_engine=idm_engine)
     """ if combine_switch or copy_switch:
         time_list = get_time_list(time_s, time_e, 3, 'hour')

{oafuncs-0.0.91 → oafuncs-0.0.92}/oafuncs/oa_down/idm.py RENAMED Viewed

@@ -38,7 +38,7 @@ def downloader(task_url, folder_path, file_name, idm_engine=r"D:\Programs\Intern
     Return:
         None
     Example:
-        downloader("https://www.test.com/data.nc", r"E:\Data", "test.nc", r"D:\Programs\Internet Download Manager\IDMan.exe")
+        downloader("https://www.test.com/data.nc", "E:\\Data", "test.nc", "D:\\Programs\\Internet Download Manager\\IDMan.exe")
     """
     os.makedirs(folder_path, exist_ok=True)
     # 将任务添加至队列

{oafuncs-0.0.91 → oafuncs-0.0.92}/oafuncs/oa_down/literature.py RENAMED Viewed

@@ -23,6 +23,7 @@ import requests
 from rich import print
 from rich.progress import track
 from oafuncs.oa_down.user_agent import get_ua
+from oafuncs.oa_file import remove
 __all__ = ["download5doi"]
@@ -274,6 +275,7 @@ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None,
         doi_list = read_txt(txt_file)
     if excel_file:
         doi_list = read_excel(excel_file, col_name)
+    remove(Path(store_path) / "wrong_record.txt")
     print(f"Downloading {len(doi_list)} PDF files...")
     for doi in track(doi_list, description="Downloading..."):
         download = _Downloader(doi, store_path)
@@ -281,7 +283,6 @@ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None,
 if __name__ == "__main__":
-    store_path = r"F:\AAA-Delete\DOI_Reference\pdf"
-    excel_file = r"F:\AAA-Delete\DOI_Reference\savedrecs.xls"
-    # download5doi(store_path, doi_list='10.1007/s00382-022-06260-x')
+    store_path = r"F:\AAA-Delete\DOI_Reference\5\pdf"
+    excel_file = r"F:\AAA-Delete\DOI_Reference\5\savedrecs.xls"
     download5doi(store_path, excel_file=excel_file)

{oafuncs-0.0.91 → oafuncs-0.0.92/oafuncs.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: oafuncs
-Version: 0.0.91
+Version: 0.0.92
 Summary: Oceanic and Atmospheric Functions
 Home-page: https://github.com/Industry-Pays/OAFuncs
 Author: Kun Liu
@@ -30,6 +30,16 @@ Requires-Dist: matplotlib
 Requires-Dist: Cartopy
 Requires-Dist: netCDF4
 Requires-Dist: xlrd
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
 # oafuncs

{oafuncs-0.0.91 → oafuncs-0.0.92}/setup.py RENAMED Viewed

@@ -18,7 +18,7 @@ URL = 'https://github.com/Industry-Pays/OAFuncs'
 EMAIL = 'liukun0312@stu.ouc.edu.cn'
 AUTHOR = 'Kun Liu'
 REQUIRES_PYTHON = '>=3.9.0' # 2025/01/05
-VERSION = '0.0.91'
+VERSION = '0.0.92'
 # What packages are required for this module to be executed?
 REQUIRED = [