PyPI - oafuncs - Versions diffs - 0.0.90__py2.py3-none-any.whl → 0.0.91__py2.py3-none-any.whl - Mend

oafuncs 0.0.90py2.py3-none-any.whl → 0.0.91py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

oafuncs/data_store/OAFuncs.png +0 -0
oafuncs/oa_data.py +9 -82
oafuncs/oa_down/__init__.py +1 -0
oafuncs/oa_down/hycom_3hourly.py +125 -79
oafuncs/oa_down/idm.py +50 -0
oafuncs/oa_down/literature.py +53 -29
oafuncs/oa_file.py +58 -14
oafuncs/oa_help.py +7 -1
oafuncs/oa_nc.py +20 -18
oafuncs/oa_tool/__init__.py +6 -6
oafuncs/oa_tool/parallel.py +90 -0
{oafuncs-0.0.90.dist-info → oafuncs-0.0.91.dist-info}/METADATA +1 -1
oafuncs-0.0.91.dist-info/RECORD +28 -0
{oafuncs-0.0.90.dist-info → oafuncs-0.0.91.dist-info}/WHEEL +1 -1
oafuncs-0.0.90.dist-info/RECORD +0 -26
{oafuncs-0.0.90.dist-info → oafuncs-0.0.91.dist-info}/LICENSE.txt +0 -0
{oafuncs-0.0.90.dist-info → oafuncs-0.0.91.dist-info}/top_level.txt +0 -0

oafuncs/data_store/OAFuncs.png CHANGED Viewed

Binary file

oafuncs/oa_data.py CHANGED Viewed

@@ -15,13 +15,14 @@ Python Version: 3.11
 import itertools
 import multiprocessing as mp
-from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
+from concurrent.futures import ThreadPoolExecutor
 import numpy as np
-from scipy.interpolate import griddata
 from rich import print
+from scipy.interpolate import griddata
-__all__ = ["interp_2d","ParallelExecutor"]
+__all__ = ["interp_2d"]
 def interp_2d(target_x, target_y, origin_x, origin_y, data, method="linear", parallel=True):
@@ -91,70 +92,7 @@ def interp_2d(target_x, target_y, origin_x, origin_y, data, method="linear", par
     return np.array(interpolated_data)
-class ParallelExecutor:
-    """
-    通用并行计算类，支持多进程和多线程模式。
-    使用说明：
-        1. 创建实例时选择模式：
-            - mode="process" 使用多进程（适合 CPU 密集型任务）。
-            - mode="thread" 使用多线程（适合 IO 密集型任务）。
-        2. 调用 run 方法：
-            - 参数 func：需要并行执行的函数。
-            - 参数 param_list：参数列表，每个元素是传递给 func 的参数元组。
-    示例：
-        # 示例 1：计算平方
-        def compute_square(x):
-            return x * x
-        params = [(i,) for i in range(10)]
-        executor = ParallelExecutor(mode="process", max_workers=4)
-        results = executor.run(compute_square, params)
-        print("Results:", results)
-        # 示例 2：计算两数之和
-        def compute_sum(a, b):
-            return a + b
-        params = [(1, 2), (3, 4), (5, 6)]
-        executor = ParallelExecutor(mode="thread", max_workers=2)
-        results = executor.run(compute_sum, params)
-        print("Results:", results)
-    参数：
-        mode (str): 并行模式，"process" 表示多进程，"thread" 表示多线程。
-        max_workers (int): 最大并行工作数，默认为 CPU 核心数减 2。
-    """
-    def __init__(self, mode="process", max_workers=mp.cpu_count() - 2):
-        self.mode = mode
-        self.max_workers = max_workers
-        self.executor = ProcessPoolExecutor if mode == "process" else ThreadPoolExecutor
-    def run(self, func, param_list):
-        """
-        并行运行指定函数，并确保结果顺序与输入参数顺序一致。
-        参数：
-            func (callable): 需要并行执行的函数。
-            param_list (list): 参数列表，每个元素是传递给 func 的参数元组。
-        返回：
-            results (list): 按输入顺序返回的结果。
-        """
-        results = [None] * len(param_list)  # 预分配结果数组
-        with self.executor(max_workers=self.max_workers) as executor:
-            # 提交任务并保存其索引
-            future_to_index = {executor.submit(func, *params): idx for idx, params in enumerate(param_list)}
-            for future in future_to_index:
-                idx = future_to_index[future]  # 获取原始索引
-                results[idx] = future.result()  # 将结果存放到对应位置
-        return results
 # ---------------------------------------------------------------------------------- not used below ----------------------------------------------------------------------------------
@@ -203,7 +141,7 @@ def interp_2d_20241213(target_x, target_y, origin_x, origin_y, data, method="lin
         for i in range(dims[0]):
             dt = griddata(origin_points, np.ravel(data[i, :, :]), target_points, method=method)
             interpolated_data.append(np.reshape(dt, target_y.shape))
-            print(f"Interpolating {i+1}/{dims[0]}...")
+            print(f"Interpolating {i + 1}/{dims[0]}...")
         interpolated_data = np.array(interpolated_data)
     elif len_dims == 4:
         interpolated_data = []
@@ -212,7 +150,7 @@ def interp_2d_20241213(target_x, target_y, origin_x, origin_y, data, method="lin
             for j in range(dims[1]):
                 dt = griddata(origin_points, np.ravel(data[i, j, :, :]), target_points, method=method)
                 interpolated_data[i].append(np.reshape(dt, target_y.shape))
-                print(f"\rInterpolating {i*dims[1]+j+1}/{dims[0]*dims[1]}...", end="")
+                print(f"\rInterpolating {i * dims[1] + j + 1}/{dims[0] * dims[1]}...", end="")
         print("\n")
         interpolated_data = np.array(interpolated_data)
@@ -270,7 +208,7 @@ def interp_2d_parallel_20241213(target_x, target_y, origin_x, origin_y, data, me
     # 使用多线程进行插值
     with ThreadPoolExecutor(max_workers=mp.cpu_count() - 2) as executor:
-        print(f"Using {mp.cpu_count()-2} threads...")
+        print(f"Using {mp.cpu_count() - 2} threads...")
         if len_dims == 2:
             interpolated_data = list(executor.map(interp_single2d, [target_y], [target_x], [origin_y], [origin_x], [data], [method]))
         elif len_dims == 3:
@@ -296,23 +234,12 @@ def interp_2d_parallel_20241213(target_x, target_y, origin_x, origin_y, data, me
     return interpolated_data
-def _test_sum(a,b):
-    return a+b
+def _test_sum(a, b):
+    return a + b
 if __name__ == "__main__":
-    # 参数列表：每个参数是元组
-    params_list = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)]
-    # 创建并行执行器
-    executor = ParallelExecutor()
-    # 并行运行
-    results = executor.run(_test_sum, params_list)
-    # 验证结果顺序
-    print("Params:", params_list)
-    print("Results:", results)
     pass
     """ import time

oafuncs/oa_down/__init__.py CHANGED Viewed

@@ -19,3 +19,4 @@ Python Version: 3.11
 from .hycom_3hourly import *
 from .literature import *
 from .user_agent import *
+from .idm import *

oafuncs/oa_down/hycom_3hourly.py CHANGED Viewed

@@ -32,13 +32,14 @@ from rich.progress import Progress
 from oafuncs.oa_down.user_agent import get_ua
 from oafuncs.oa_file import file_size, mean_size
+from oafuncs.oa_nc import check as check_nc
 warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
 __all__ = ["draw_time_range", "download", "how_to_use", "get_time_list"]
-def get_initial_data():
+def _get_initial_data():
     global variable_info, data_info, var_group, single_var_group
     # ----------------------------------------------
     # variable
@@ -305,14 +306,14 @@ def get_time_list(time_s, time_e, delta, interval_type="hour"):
     return dt_list
-def transform_time(time_str):
+def _transform_time(time_str):
     # old_time = '2023080203'
     # time_new = '2023-08-02T03%3A00%3A00Z'
     time_new = f"{time_str[:4]}-{time_str[4:6]}-{time_str[6:8]}T{time_str[8:10]}%3A00%3A00Z"
     return time_new
-def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_str_end=None, mode="single_depth", depth=None, level_num=None):
+def _get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_str_end=None, mode="single_depth", depth=None, level_num=None):
     query_dict = {
         "var": variable_info[var]["var_name"],
         "north": lat_max,
@@ -331,11 +332,11 @@ def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_
     }
     if time_str_end is not None:
-        query_dict["time_start"] = transform_time(time_str_ymdh)
-        query_dict["time_end"] = transform_time(time_str_end)
+        query_dict["time_start"] = _transform_time(time_str_ymdh)
+        query_dict["time_end"] = _transform_time(time_str_end)
         query_dict["timeStride"] = 1
     else:
-        query_dict["time"] = transform_time(time_str_ymdh)
+        query_dict["time"] = _transform_time(time_str_ymdh)
     def get_nearest_level_index(depth):
         level_depth = [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 125.0, 150.0, 200.0, 250.0, 300.0, 350.0, 400.0, 500.0, 600.0, 700.0, 800.0, 900.0, 1000.0, 1250.0, 1500.0, 2000.0, 2500.0, 3000.0, 4000.0, 5000]
@@ -360,7 +361,7 @@ def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_
     return query_dict
-def check_time_in_dataset_and_version(time_input, time_end=None):
+def _check_time_in_dataset_and_version(time_input, time_end=None):
     # 判断是处理单个时间点还是时间范围
     is_single_time = time_end is None
@@ -417,8 +418,8 @@ def check_time_in_dataset_and_version(time_input, time_end=None):
         if is_single_time:
             return True
         else:
-            base_url_s = get_base_url(d_list[0], v_list[0], "u", str(time_start))
-            base_url_e = get_base_url(d_list[0], v_list[0], "u", str(time_end))
+            base_url_s = _get_base_url(d_list[0], v_list[0], "u", str(time_start))
+            base_url_e = _get_base_url(d_list[0], v_list[0], "u", str(time_end))
             if base_url_s == base_url_e:
                 return True
             else:
@@ -429,7 +430,7 @@ def check_time_in_dataset_and_version(time_input, time_end=None):
         return False
-def ensure_time_in_specific_dataset_and_version(dataset_name, version_name, time_input, time_end=None):
+def _ensure_time_in_specific_dataset_and_version(dataset_name, version_name, time_input, time_end=None):
     # 根据时间长度补全时间格式
     if len(str(time_input)) == 8:
         time_input = str(time_input) + "00"
@@ -468,7 +469,7 @@ def ensure_time_in_specific_dataset_and_version(dataset_name, version_name, time
         return False
-def direct_choose_dataset_and_version(time_input, time_end=None):
+def _direct_choose_dataset_and_version(time_input, time_end=None):
     # 假设 data_info 是一个字典，包含了数据集和版本的信息
     # 示例结构：data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range']
@@ -507,7 +508,7 @@ def direct_choose_dataset_and_version(time_input, time_end=None):
     return dataset_name_out, version_name_out
-def get_base_url(dataset_name, version_name, var, ymdh_str):
+def _get_base_url(dataset_name, version_name, var, ymdh_str):
     year_str = int(ymdh_str[:4])
     url_dict = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["url"]
     classification_method = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["classification"]
@@ -548,67 +549,109 @@ def get_base_url(dataset_name, version_name, var, ymdh_str):
     return base_url
-def get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict):
-    base_url = get_base_url(dataset_name, version_name, var, ymdh_str)
+def _get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict):
+    base_url = _get_base_url(dataset_name, version_name, var, ymdh_str)
     if isinstance(query_dict["var"], str):
         query_dict["var"] = [query_dict["var"]]
     target_url = base_url + "&".join(f"var={var}" for var in query_dict["var"]) + "&" + "&".join(f"{key}={value}" for key, value in query_dict.items() if key != "var")
     return target_url
-def clear_existing_file(file_full_path):
+def _clear_existing_file(file_full_path):
     if os.path.exists(file_full_path):
         os.remove(file_full_path)
         print(f"{file_full_path} has been removed")
-def check_existing_file(file_full_path, min_size):
+def _check_existing_file(file_full_path, avg_size):
     if os.path.exists(file_full_path):
         print(f"[bold #FFA54F]{file_full_path} exists")
         fsize = file_size(file_full_path)
-        if min_size:
-            if fsize < min_size:
-                print(f"[bold #FFA54F]{file_full_path} ({fsize:.2f} KB) may be incomplete")
-                # clear_existing_file(file_full_path)
-                return False
-            else:
+        delta_size_ratio = (fsize - avg_size) / avg_size
+        if abs(delta_size_ratio) > 0.025:
+            if check_nc(file_full_path):
+                # print(f"File size is abnormal but can be opened normally, file size: {fsize:.2f} KB")
                 return True
-        if fsize < 5:
-            print(f"[bold #FFA54F]{file_full_path} ({fsize:.2f} KB) may be incomplete")
-            # clear_existing_file(file_full_path)
-            return False
+            else:
+                print(f"File size is abnormal and cannot be opened, {file_full_path}: {fsize:.2f} KB")
+                return False
         else:
             return True
     else:
-        # print(f'{file_full_path} does not exist')
         return False
-def download_file(target_url, store_path, file_name, check=False):
-    # Check if the file exists
-    fname = Path(store_path) / file_name
-    file_name_split = file_name.split("_")
-    file_name_split = file_name_split[:-1]
-    # same_file = f"{file_name_split[0]}_{file_name_split[1]}*nc"
-    same_file = "_".join(file_name_split) + "*nc"
+def _get_mean_size30(store_path, same_file):
     if same_file not in fsize_dict.keys():
-        # print(f'Same file name: {same_file}')
-        fsize_dict[same_file] = {"size": 0, "count": 0}
+            # print(f'Same file name: {same_file}')
+            fsize_dict[same_file] = {"size": 0, "count": 0}
     if fsize_dict[same_file]["count"] < 30 or fsize_dict[same_file]["size"] == 0:
         # 更新30次文件最小值，后续认为可以代表所有文件，不再更新占用时间
         fsize_mean = mean_size(store_path, same_file, max_num=30)
-        set_min_size = fsize_mean * 0.8
+        set_min_size = fsize_mean * 0.95
         fsize_dict[same_file]["size"] = set_min_size
         fsize_dict[same_file]["count"] += 1
     else:
         set_min_size = fsize_dict[same_file]["size"]
+    return set_min_size
+def _get_mean_size_move(same_file, current_file):
+    # 获取锁
+    with fsize_dict_lock: # 全局锁，确保同一时间只能有一个线程访问
+        # 初始化字典中的值，如果文件不在字典中
+        if same_file not in fsize_dict.keys():
+            fsize_dict[same_file] = {"size_list": [], "mean_size": 1.0}
+        tolerance_ratio = 0.025  # 容忍的阈值比例
+        current_file_size = file_size(current_file)
+        # 如果列表不为空，则计算平均值，否则保持为1
+        if fsize_dict[same_file]["size_list"]:
+            fsize_dict[same_file]["mean_size"] = sum(fsize_dict[same_file]["size_list"]) / len(fsize_dict[same_file]["size_list"])
+            fsize_dict[same_file]["mean_size"] = max(fsize_dict[same_file]["mean_size"], 1.0)
+        else:
+            fsize_dict[same_file]["mean_size"] = 1.0
+        size_difference_ratio = (current_file_size - fsize_dict[same_file]["mean_size"]) / fsize_dict[same_file]["mean_size"]
+        if abs(size_difference_ratio) > tolerance_ratio:
+            if check_nc(current_file):
+                # print(f"File size is abnormal but can be opened normally, file size: {current_file_size:.2f} KB")
+                # 文件可以正常打开，但大小异常，保留当前文件大小
+                fsize_dict[same_file]["size_list"] = [current_file_size]
+                fsize_dict[same_file]["mean_size"] = current_file_size
+            else:
+                _clear_existing_file(current_file)
+                print(f"File size is abnormal, may need to be downloaded again, file size: {current_file_size:.2f} KB")
+        else:
+            # 添加当前文件大小到列表中，并更新计数
+            fsize_dict[same_file]["size_list"].append(current_file_size)
+    # 返回调整后的平均值，这里根据您的需求，返回的是添加新值之前的平均值
+    return fsize_dict[same_file]["mean_size"]
+def _download_file(target_url, store_path, file_name, check=False):
+    # Check if the file exists
+    fname = Path(store_path) / file_name
+    file_name_split = file_name.split("_")
+    file_name_split = file_name_split[:-1]
+    # same_file = f"{file_name_split[0]}_{file_name_split[1]}*nc"
+    same_file = "_".join(file_name_split) + "*nc"
     if check:
-        if check_existing_file(fname, set_min_size):
+        if same_file not in fsize_dict.keys(): # 对第一个文件单独进行检查，因为没有大小可以对比
+            check_nc(fname,if_delete=True)
+        # set_min_size = _get_mean_size30(store_path, same_file) # 原方案，只30次取平均值；若遇变化，无法判断
+        get_mean_size = _get_mean_size_move(same_file, fname)
+        if _check_existing_file(fname, get_mean_size):
             count_dict["skip"] += 1
             return
-    clear_existing_file(fname)
+    _clear_existing_file(fname)
     # -----------------------------------------------
     print(f"[bold #f0f6d0]Requesting {file_name}...")
@@ -701,7 +744,7 @@ def download_file(target_url, store_path, file_name, check=False):
         request_times += 1
-def check_hour_is_valid(ymdh_str):
+def _check_hour_is_valid(ymdh_str):
     # hour should be 00, 03, 06, 09, 12, 15, 18, 21
     hh = int(str(ymdh_str[-2:]))
     if hh in [0, 3, 6, 9, 12, 15, 18, 21]:
@@ -710,9 +753,9 @@ def check_hour_is_valid(ymdh_str):
         return False
-def check_dataset_version(dataset_name, version_name, download_time, download_time_end=None):
+def _check_dataset_version(dataset_name, version_name, download_time, download_time_end=None):
     if dataset_name is not None and version_name is not None:
-        just_ensure = ensure_time_in_specific_dataset_and_version(dataset_name, version_name, download_time, download_time_end)
+        just_ensure = _ensure_time_in_specific_dataset_and_version(dataset_name, version_name, download_time, download_time_end)
         if just_ensure:
             return dataset_name, version_name
         else:
@@ -725,7 +768,7 @@ def check_dataset_version(dataset_name, version_name, download_time, download_ti
         download_time_str = download_time_str + "00"
     # 检查小时是否有效（如果需要的话）
-    if download_time_end is None and not check_hour_is_valid(download_time_str):
+    if download_time_end is None and not _check_hour_is_valid(download_time_str):
         print("Please ensure the hour is 00, 03, 06, 09, 12, 15, 18, 21")
         raise ValueError("The hour is invalid")
@@ -733,18 +776,18 @@ def check_dataset_version(dataset_name, version_name, download_time, download_ti
     if download_time_end is not None:
         if len(str(download_time_end)) == 8:
             download_time_end = str(download_time_end) + "21"
-        have_data = check_time_in_dataset_and_version(download_time_str, download_time_end)
+        have_data = _check_time_in_dataset_and_version(download_time_str, download_time_end)
         if have_data:
-            return direct_choose_dataset_and_version(download_time_str, download_time_end)
+            return _direct_choose_dataset_and_version(download_time_str, download_time_end)
     else:
-        have_data = check_time_in_dataset_and_version(download_time_str)
+        have_data = _check_time_in_dataset_and_version(download_time_str)
         if have_data:
-            return direct_choose_dataset_and_version(download_time_str)
+            return _direct_choose_dataset_and_version(download_time_str)
     return None, None
-def get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end=None):
+def _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end=None):
     # year_str = str(download_time)[:4]
     ymdh_str = str(download_time)
     if depth is not None and level_num is not None:
@@ -760,19 +803,19 @@ def get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max
     else:
         # print("Full depth or full level data will be downloaded...")
         which_mode = "full"
-    query_dict = get_query_dict(var, lon_min, lon_max, lat_min, lat_max, download_time, download_time_end, which_mode, depth, level_num)
-    submit_url = get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict)
+    query_dict = _get_query_dict(var, lon_min, lon_max, lat_min, lat_max, download_time, download_time_end, which_mode, depth, level_num)
+    submit_url = _get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict)
     return submit_url
-def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time="2024083100", download_time_end=None, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
+def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time="2024083100", download_time_end=None, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
     print("[bold #ecdbfe]-" * 160)
     download_time = str(download_time)
     if download_time_end is not None:
         download_time_end = str(download_time_end)
-        dataset_name, version_name = check_dataset_version(dataset_name, version_name, download_time, download_time_end)
+        dataset_name, version_name = _check_dataset_version(dataset_name, version_name, download_time, download_time_end)
     else:
-        dataset_name, version_name = check_dataset_version(dataset_name, version_name, download_time)
+        dataset_name, version_name = _check_dataset_version(dataset_name, version_name, download_time)
     if dataset_name is None and version_name is None:
         count_dict["no_data"] += 1
         if download_time_end is not None:
@@ -787,11 +830,11 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
     if isinstance(var, list):
         if len(var) == 1:
             var = var[0]
-            submit_url = get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
+            submit_url = _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
             file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
             if download_time_end is not None:
                 file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}-{download_time_end}.nc"  # 这里时间不能用下划线，不然后续处理查找同一变量文件会出问题
-            download_file(submit_url, store_path, file_name, check)
+            _download_file(submit_url, store_path, file_name, check)
         else:
             if download_time < "2024081012":
                 varlist = [_ for _ in var]
@@ -804,7 +847,7 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
                         continue
                     var = current_group[0]
-                    submit_url = get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
+                    submit_url = _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
                     file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
                     old_str = f'var={variable_info[var]["var_name"]}'
                     new_str = f'var={variable_info[var]["var_name"]}'
@@ -816,17 +859,17 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
                         file_name = f"HYCOM_{key}_{download_time}.nc"
                         if download_time_end is not None:
                             file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc"  # 这里时间不能用下划线，不然后续处理查找同一变量文件会出问题
-                    download_file(submit_url, store_path, file_name, check)
+                    _download_file(submit_url, store_path, file_name, check)
             else:
                 for v in var:
-                    submit_url = get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
+                    submit_url = _get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
                     file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
                     if download_time_end is not None:
                         file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}-{download_time_end}.nc"
-                    download_file(submit_url, store_path, file_name, check)
+                    _download_file(submit_url, store_path, file_name, check)
-def convert_full_name_to_short_name(full_name):
+def _convert_full_name_to_short_name(full_name):
     for var, info in variable_info.items():
         if full_name == info["var_name"] or full_name == info["standard_name"] or full_name == var:
             return var
@@ -836,7 +879,7 @@ def convert_full_name_to_short_name(full_name):
     return False
-def download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check):
+def _download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check):
     """
     # 并行下载任务
     # 这个函数是为了并行下载而设置的，是必须的，直接调用direct_download并行下载会出问题
@@ -847,10 +890,10 @@ def download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_ma
     因此，即使多个任务同时执行，也不会出现数据交互错乱的问题。
     """
-    prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
+    _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
-def done_callback(future, progress, task, total, counter_lock):
+def _done_callback(future, progress, task, total, counter_lock):
     """
     # 并行下载任务的回调函数
     # 这个函数是为了并行下载而设置的，是必须的，直接调用direct_download并行下载会出问题
@@ -866,7 +909,7 @@ def done_callback(future, progress, task, total, counter_lock):
         progress.update(task, advance=1, description=f"[cyan]Downloading... {parallel_counter}/{total}")
-def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
+def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
     """
     Description:
     Download the data of single time or a series of time
@@ -895,7 +938,7 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
         parallel_counter = 0
         counter_lock = Lock()  # 创建一个锁，线程安全的计数器
     if ymdh_time_s == ymdh_time_e:
-        prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, check)
+        _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, check)
     elif int(ymdh_time_s) < int(ymdh_time_e):
         print("Downloading a series of files...")
         time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3, "hour")
@@ -905,16 +948,16 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
                 if num_workers is None or num_workers <= 1:
                     # 串行方式
                     for i, time_str in enumerate(time_list):
-                        prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, check)
+                        _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, check)
                         progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}")
                 else:
                     # 并行方式
                     with ThreadPoolExecutor(max_workers=num_workers) as executor:
-                        futures = [executor.submit(download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
+                        futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
                         """ for i, future in enumerate(futures):
                             future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}")) """
                         for feature in as_completed(futures):
-                            done_callback(feature, progress, task, len(time_list), counter_lock)
+                            _done_callback(feature, progress, task, len(time_list), counter_lock)
             else:
                 new_time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3 * ftimes, "hour")
                 total_num = len(new_time_list)
@@ -923,16 +966,16 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
                     for i, time_str in enumerate(new_time_list):
                         time_str_end_index = int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))
                         time_str_end = time_list[time_str_end_index]
-                        prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
+                        _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
                         progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}")
                 else:
                     # 并行方式
                     with ThreadPoolExecutor(max_workers=num_workers) as executor:
-                        futures = [executor.submit(download_task, var, new_time_list[i], time_list[int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))], lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for i in range(total_num)]
+                        futures = [executor.submit(_download_task, var, new_time_list[i], time_list[int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))], lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for i in range(total_num)]
                         """ for i, future in enumerate(futures):
                             future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}")) """
                         for feature in as_completed(futures):
-                            done_callback(feature, progress, task, len(time_list), counter_lock)
+                            _done_callback(feature, progress, task, len(time_list), counter_lock)
     else:
         print("Please ensure the time_s is no more than time_e")
@@ -962,7 +1005,7 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
     Returns:
         None
     """
-    get_initial_data()
+    _get_initial_data()
     # 打印信息并处理数据集和版本名称
     if dataset_name is None and version_name is None:
@@ -980,11 +1023,11 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
     if isinstance(var, list):
         if len(var) == 1:
-            var = convert_full_name_to_short_name(var[0])
+            var = _convert_full_name_to_short_name(var[0])
         else:
-            var = [convert_full_name_to_short_name(v) for v in var]
+            var = [_convert_full_name_to_short_name(v) for v in var]
     elif isinstance(var, str):
-        var = convert_full_name_to_short_name(var)
+        var = _convert_full_name_to_short_name(var)
     else:
         raise ValueError("The var is invalid")
     if var is False:
@@ -1005,8 +1048,8 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
         os.makedirs(str(store_path), exist_ok=True)
     if num_workers is not None:
-        num_workers = max(min(num_workers, 10), 1)
+        num_workers = max(min(num_workers, 10), 1) # 暂时不限制最大值，再检查的时候可以多开一些线程
+        # num_workers = int(max(num_workers, 1))
     time_s = str(time_s)
     if len(time_s) == 8:
         time_s += "00"
@@ -1025,8 +1068,11 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
     global fsize_dict
     fsize_dict = {}
+    global fsize_dict_lock
+    fsize_dict_lock = Lock()
-    download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
+    _download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
     count_dict["total"] = count_dict["success"] + count_dict["fail"] + count_dict["skip"] + count_dict["no_data"]

oafuncs/oa_down/idm.py ADDED Viewed

@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Author: Liu Kun && 16031215@qq.com
+Date: 2025-01-11 16:19:12
+LastEditors: Liu Kun && 16031215@qq.com
+LastEditTime: 2025-01-11 16:25:47
+FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\idm.py
+Description:
+EditPlatform: vscode
+ComputerInfo: XPS 15 9510
+SystemInfo: Windows 11
+Python Version: 3.12
+"""
+import datetime
+import os
+from subprocess import call
+from rich import print
+__all__ = ["downloader"]
+def downloader(task_url, folder_path, file_name, idm_engine=r"D:\Programs\Internet Download Manager\IDMan.exe"):
+    """
+    Description:
+        Use IDM to download files.
+    Parameter:
+        task_url: str
+            The download link of the file.
+        folder_path: str
+            The path of the folder where the file is saved.
+        file_name: str
+            The name of the file to be saved.
+        idm_engine: str
+            The path of the IDM engine. Note: "IDMan.exe"
+    Return:
+        None
+    Example:
+        downloader("https://www.test.com/data.nc", r"E:\Data", "test.nc", r"D:\Programs\Internet Download Manager\IDMan.exe")
+    """
+    os.makedirs(folder_path, exist_ok=True)
+    # 将任务添加至队列
+    call([idm_engine, "/d", task_url, "/p", folder_path, "/f", file_name, "/a"])
+    # 开始任务队列
+    call([idm_engine, "/s"])
+    # print(f"IDM下载器：{file_name}下载任务已添加至队列...")
+    print("[purple]-" * 50 + f"\n{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n" + "[purple]-" * 50)
+    print(f"[green]IDM Downloader: {file_name} download task has been added to the queue...[/green]")

oafuncs/oa_down/literature.py CHANGED Viewed

@@ -64,7 +64,21 @@ class _Downloader:
             r"https://sci-hub.se",
             r"https://sci-hub.ren",
             r"https://sci-hub.st",
-            r"https://sci-hub.ru",
+            r"https://sci-hub.ru", # 最好用的一个网站
+            # ------------------------------------- 以下网站没验证
+            r"https://sci-hub.wf",
+            r"https://sci-hub.yt",
+            r"https://sci-hub.ee",
+            r"https://sci-hub.cat",
+            r"https://sci-hub.in",
+            r"https://www.pismin.com",
+            r"https://sci-hub.vkif.top",
+            r"https://www.bothonce.com",
+            r"https://sci-hub.et-fine.com",
+            r"https://sci-hub.hkvisa.net",
+            # r"https://sci-hub.3800808.com", # 这个只能手动保存
+            r"https://sci-hub.zidianzhan.net",
+            r"https://sci-hub.usualwant.com",
         ]
         self.base_url = None
         self.url = None
@@ -86,33 +100,37 @@ class _Downloader:
         self.try_times = 0
     def get_pdf_url(self):
-        print("[bold #E6E6FA]-" * 100)
+        print("[bold #E6E6FA]-" * 120)
         print(f"DOI: {self.doi}")
         print(f"Requesting: {self.url}...")
-        response = requests.get(self.url, headers=self.headers)
-        if response.status_code == 200:
-            self.cookies = response.cookies
-            text = response.text.replace("\\", "")
-            # text = text.replace(' ', '')  # It is important to remove the space
-            # print(text)
-            pattern = re.compile(r'onclick = "location.href=\'(.*?\.pdf\?download=true)\'"')
-            match = pattern.search(text)
-            if match:
-                got_url = match.group(1)
-                if r"http" not in got_url:
-                    if got_url[:2] == "//":
-                        self.pdf_url = "https:" + got_url
+        try:
+            response = requests.get(self.url, headers=self.headers)
+            if response.status_code == 200:
+                self.cookies = response.cookies
+                text = response.text.replace("\\", "")
+                # text = text.replace(' ', '')  # It is important to remove the space
+                # print(text)
+                pattern = re.compile(r'onclick = "location.href=\'(.*?\.pdf\?download=true)\'"')
+                match = pattern.search(text)
+                if match:
+                    got_url = match.group(1)
+                    if r"http" not in got_url:
+                        if got_url[:2] == "//":
+                            self.pdf_url = "https:" + got_url
+                        else:
+                            self.pdf_url = self.base_url + got_url
                     else:
-                        self.pdf_url = self.base_url + got_url
+                        self.pdf_url = got_url
+                    print(f"URL: {self.pdf_url}")
                 else:
-                    self.pdf_url = got_url
-                print(f"URL: {self.pdf_url}")
+                    print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
+                    self.try_times = self.try_times_each_url_max + 1
             else:
+                print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
                 print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
                 self.try_times = self.try_times_each_url_max + 1
-        else:
-            print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
-            print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
+        except Exception as e:
+            print(f"Failed to retrieve the webpage. Error: {e}")
             self.try_times = self.try_times_each_url_max + 1
     def url_iterate(self):
@@ -129,6 +147,12 @@ class _Downloader:
         #         break
     def write_wrong_record(self):
+        # 先读取txt中的内容，如果已经存在则不再写入
+        if self.wrong_record_file.exists():
+            with open(self.wrong_record_file, "r") as f:
+                lines = f.readlines()
+            if self.doi in lines:
+                return
         with open(self.wrong_record_file, "a") as f:
             f.write(self.doi + "\n")
@@ -140,7 +164,7 @@ class _Downloader:
                 os.remove(self.fpath)
                 print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
             else:
-                print("[bold #E6E6FA]-" * 100)
+                print("[bold #E6E6FA]-" * 120)
                 print(f"[bold purple]The PDF file {self.fpath} already exists.")
                 return
         self.url_index = 0
@@ -230,11 +254,11 @@ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None,
     Example:
         download5doi(doi_list='10.3389/feart.2021.698876')
-        download5doi(store_path=r'I:\Delete\ref_pdf', doi_list='10.3389/feart.2021.698876')
-        download5doi(store_path=r'I:\Delete\ref_pdf', doi_list=['10.3389/feart.2021.698876', '10.3389/feart.2021.698876'])
-        download5doi(store_path=r'I:\Delete\ref_pdf', txt_file=r'I:\Delete\ref_pdf\wrong_record.txt')
-        download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx')
-        download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx', col_name='DOI')
+        download5doi(store_path='I:\\Delete\\ref_pdf', doi_list='10.3389/feart.2021.698876')
+        download5doi(store_path='I:\\Delete\\ref_pdf', doi_list=['10.3389/feart.2021.698876', '10.3389/feart.2021.698876'])
+        download5doi(store_path='I:\\Delete\\ref_pdf', txt_file='I:\\Delete\\ref_pdf\\wrong_record.txt')
+        download5doi(store_path='I:\\Delete\\ref_pdf', excel_file='I:\\Delete\\ref_pdf\\wrong_record.xlsx')
+        download5doi(store_path='I:\\Delete\\ref_pdf', excel_file='I:\\Delete\\ref_pdf\\wrong_record.xlsx', col_name='DOI')
     """
     if not store_path:
         store_path = Path.cwd()
@@ -257,7 +281,7 @@ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None,
 if __name__ == "__main__":
-    store_path = r"I:\Delete\ref_pdf"
-    excel_file = r"I:\Delete\Ref_DA_ROMS\savedrecs.xls"
+    store_path = r"F:\AAA-Delete\DOI_Reference\pdf"
+    excel_file = r"F:\AAA-Delete\DOI_Reference\savedrecs.xls"
     # download5doi(store_path, doi_list='10.1007/s00382-022-06260-x')
     download5doi(store_path, excel_file=excel_file)

oafuncs/oa_file.py CHANGED Viewed

@@ -19,7 +19,7 @@ import re
 import shutil
 from rich import print
-__all__ = ["find_file", "link_file", "copy_file", "rename_file", "make_folder", "clear_folder", "remove_empty_folder", "remove", "file_size"]
+__all__ = ["find_file", "link_file", "copy_file", "rename_file", "make_folder", "clear_folder", "remove_empty_folder", "remove", "file_size", "mean_size", "make_dir"]
 # ** 查找文件，支持通配符
@@ -191,7 +191,7 @@ def rename_file(directory, old_str, new_str):
 # ** 创建子文件夹（可选清空）
-def make_folder(rootpath=None, folder_name=None, clear=0) -> str:
+def make_folder(rootpath=None, folder_name=None, clear=False) -> str:
     """
     # 描述：创建子文件夹（可选清空）
     # 使用示例
@@ -210,6 +210,26 @@ def make_folder(rootpath=None, folder_name=None, clear=0) -> str:
     return folder_path
+# ** 创建路径
+def make_dir(directory):
+    """
+    Description:
+        Create a directory if it does not exist
+    Parameters:
+        directory: The directory path to create
+    Returns:
+        None
+    Example:
+        make_dir(r"E:\Data\2024\09\17\var1")
+    """
+    directory = str(directory)
+    os.makedirs(directory, exist_ok=True)
+    print(f"Created directory: {directory}")
 # ** 清空文件夹
 def clear_folder(folder_path):
     """
@@ -270,27 +290,45 @@ def remove_empty_folder(path, print_info=1):
 # ** 删除相关文件，可使用通配符
 def remove(pattern):
     """
-    # 描述：删除相关文件，可使用通配符
+    Delete files or directories that match the given wildcard pattern.
+    Parameters:
+    pattern : str
+        File path or string containing wildcards. For example:
+        - r'E:\Code\Python\Model\WRF\Radar2\bzip2-radar-0*'
+        - 'bzip2-radar-0*' (assuming you are already in the target directory)
+    Usage examples:
     remove(r'E:\Code\Python\Model\WRF\Radar2\bzip2-radar-0*')
-    # or
+    or
     os.chdir(r'E:\Code\Python\Model\WRF\Radar2')
     remove('bzip2-radar-0*')
-    param        {*} pattern # 文件路径或通配符
+    last updated: 2025-01-10 11:49:13
     """
-    # 使用glob.glob来获取所有匹配的文件
-    # 可以使用通配符*来匹配所有文件
     pattern = str(pattern)
+    # Use glob.glob to get all matching files or directories
     file_list = glob.glob(pattern)
+    if not file_list:
+        print(f"No files or directories found matching '{pattern}'.")
+        return
     for file_path in file_list:
         if os.path.exists(file_path):
             try:
-                shutil.rmtree(file_path)
-                print(f"成功删除文件: {file_path}")
+                if os.path.isdir(file_path):
+                    shutil.rmtree(file_path)
+                    print(f"Successfully deleted directory: {file_path}")
+                else:
+                    os.remove(file_path)
+                    print(f"Successfully deleted file: {file_path}")
             except Exception as e:
-                print(f"删除文件失败: {file_path}")
-                print(e)
+                print(f"Deletion failed: {file_path}")
+                print(f"Error message: {e}")
         else:
-            print(f"文件不存在: {file_path}")
+            print(f"File or directory does not exist: {file_path}")
 # ** 获取文件大小
@@ -307,7 +345,10 @@ def file_size(file_path, unit="KB"):
     """
     # 检查文件是否存在
     if not os.path.exists(file_path):
-        return "文件不存在"
+        # return "文件不存在"
+        # print(f"文件不存在: {file_path}\n返回0.0")
+        print(f'File does not exist: {file_path}\nReturn 0.0')
+        return 0.0
     # 获取文件大小（字节）
     file_size = os.path.getsize(file_path)
@@ -317,7 +358,10 @@ def file_size(file_path, unit="KB"):
     # 检查传入的单位是否合法
     if unit not in unit_dict:
-        return "单位不合法，请选择PB、TB、GB、MB、KB中的一个"
+        # return "单位不合法，请选择PB、TB、GB、MB、KB中的一个"
+        # print("单位不合法，请选择PB、TB、GB、MB、KB中的一个\n返回0.0")
+        print("Invalid unit, please choose one of PB, TB, GB, MB, KB\nReturn 0.0")
+        return 0.0
     # 转换文件大小到指定单位
     converted_size = file_size / unit_dict[unit]

oafuncs/oa_help.py CHANGED Viewed

@@ -116,12 +116,18 @@ def log():
         log()
     """
     print("更新日志：")
+    print(
+        """
+        2025-01-15
+        1. 优化了doi下载文献函数，增加下载途径及优化异常处理
+        """
+    )
     print(
         """
         2025-01-07
         1. 测试Python版本最低为3.9
         2. 优化了部分函数说明
-        3. 优化hycom_3hourly模块，仅更新30次件大小，避免每次计算，提高下载速度。
+        3. 优化hycom_3hourly模块，滑动判断文件是否正常
         """
     )
     print(

oafuncs/oa_nc.py CHANGED Viewed

@@ -20,7 +20,7 @@ import numpy as np
 import xarray as xr
 from rich import print
-__all__ = ["get_var", "extract", "save", "merge", "modify", "rename", "check_file", "convert_longitude", "isel"]
+__all__ = ["get_var", "extract", "save", "merge", "modify", "rename", "check", "convert_longitude", "isel"]
 def get_var(file, *vars):
@@ -222,8 +222,10 @@ def merge(file_list, var_name=None, dim_name=None, target_filename=None):
     merged_data = {}
     # 遍历文件列表
+    print('Reading file ...')
     for i, file in enumerate(file_list):
-        print(f"\rReading file {i + 1}/{len(file_list)}...", end="")
+        # 更新track描述进度
+        # print(f"\rReading file {i + 1}/{len(file_list)}...", end="")
         ds = xr.open_dataset(file)
         for var_name in var_names:
             var = ds[var_name]
@@ -239,17 +241,17 @@ def merge(file_list, var_name=None, dim_name=None, target_filename=None):
                     merged_data[var_name] = var
         ds.close()
-    print("\nMerging data...")
+    print("\nMerging data ...")
     for var_name in merged_data:
         if isinstance(merged_data[var_name], list):
             merged_data[var_name] = xr.concat(merged_data[var_name], dim=dim_name)
     merged_data = xr.Dataset(merged_data)
-    print("Writing data to file...")
+    print("Writing data to file ...")
     if os.path.exists(target_filename):
         print("Warning: The target file already exists.")
-        print("Removing existing file...")
+        print("Removing existing file ...")
         os.remove(target_filename)
     merged_data.to_netcdf(target_filename)
     print(f'File "{target_filename}" has been created.')
@@ -370,38 +372,38 @@ def rename(ncfile_path, old_name, new_name):
         print(f"An error occurred: {e}")
-def check_file(ncfile, if_delete=False):
-    '''
+def check(ncfile, if_delete=False):
+    """
     Description:
-        Check if the NetCDF file is corrupted.
+        Check if the NetCDF file is corrupted using xarray.
     Parameters:
         ncfile (str): The path to the NetCDF file.
         if_delete (bool): Whether to delete the file if it is corrupted, default is False.
     Returns:
         bool: True if the file is not corrupted, False otherwise.
-    '''
+    """
     if not os.path.exists(ncfile):
         return False
     try:
-        with nc.Dataset(ncfile, "r") as f:
-            # 确保f被使用，这里我们检查文件中变量的数量
-            if len(f.variables) > 0:
+        with xr.open_dataset(ncfile) as ds:
+            if len(ds.variables) > 0:
                 return True
             else:
-                # 如果没有变量，我们可以认为文件是损坏的
-                raise ValueError("File is empty or corrupted.")
+                print(f"File {ncfile} is empty or corrupted.")
+                if if_delete:
+                    os.remove(ncfile)
+                    print(f"File {ncfile} has been deleted.")
+                return False
     except OSError as e:
-        # 捕获文件打开时可能发生的OSError
         print(f"An error occurred while opening the file: {e}")
         if if_delete:
             os.remove(ncfile)
             print(f"File {ncfile} has been deleted.")
         return False
     except Exception as e:
-        # 捕获其他可能的异常
         print(f"An unexpected error occurred: {e}")
         if if_delete:
             os.remove(ncfile)

oafuncs/oa_tool/__init__.py CHANGED Viewed

@@ -1,18 +1,18 @@
 #!/usr/bin/env python
 # coding=utf-8
-'''
+"""
 Author: Liu Kun && 16031215@qq.com
 Date: 2024-11-21 09:48:00
 LastEditors: Liu Kun && 16031215@qq.com
-LastEditTime: 2024-11-21 10:18:33
-FilePath: \\Python\\My_Funcs\\OAFuncs\\OAFuncs\\oa_tool\\__init__.py
-Description:
+LastEditTime: 2025-01-11 20:09:09
+FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_tool\\__init__.py
+Description:
 EditPlatform: vscode
 ComputerInfo: XPS 15 9510
 SystemInfo: Windows 11
 Python Version: 3.12
-'''
+"""
 # 会导致OAFuncs直接导入所有函数，不符合模块化设计
 from .email import *
+from .parallel import *

oafuncs/oa_tool/parallel.py ADDED Viewed

@@ -0,0 +1,90 @@
+import logging
+import multiprocessing as mp
+from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+__all__ = ["ParallelExecutor"]
+class ParallelExecutor:
+    """
+    A class for parallel execution of tasks using threads or processes.
+    If mode is "process", the tasks are executed in separate processes.
+    If mode is "thread", the tasks are executed in separate threads.
+    Parameters:
+        mode (str): The execution mode. Supported values are "process" and "thread".
+                    process ~ Must use top function to run, can't use in jupyter notebook
+                    thread ~ Function can not be top function, can use in jupyter notebook
+        max_workers (int): The maximum number of workers to use. Defaults to CPU count - 1.
+    Note:!!!
+    If Jupyter notebook is used, the mode should be "thread" to avoid hanging issues.
+    """
+    def __init__(self, mode="process", max_workers=None):
+        if mode not in {"process", "thread"}:
+            raise ValueError("Invalid mode. Supported values are 'process' and 'thread'.")
+        # process: Must use top function to run, can't use in jupyter notebook
+        # thread: Can use in jupyter notebook
+        self.mode = mode
+        self.max_workers = max_workers or max(1, mp.cpu_count() - 1)
+        self.executor_class = ProcessPoolExecutor if mode == "process" else ThreadPoolExecutor
+    def run(self, func, param_list):
+        """
+        Run a function in parallel using the specified executor.
+        Args:
+            func (callable): The function to execute.
+            param_list (list): A list of parameter tuples to pass to the function.
+        Returns:
+            list: Results of the function execution.
+        """
+        if not callable(func):
+            raise ValueError("func must be callable.")
+        if not isinstance(param_list, list) or not all(isinstance(p, tuple) for p in param_list):
+            raise ValueError("param_list must be a list of tuples.")
+        results = [None] * len(param_list)
+        logging.info("Starting parallel execution in %s mode with %d workers.", self.mode, self.max_workers)
+        with self.executor_class(max_workers=self.max_workers) as executor:
+            future_to_index = {executor.submit(func, *params): idx for idx, params in enumerate(param_list)}
+            for future in as_completed(future_to_index):
+                idx = future_to_index[future]
+                try:
+                    results[idx] = future.result()
+                except Exception as e:
+                    logging.error("Task %d failed with error: %s", idx, e)
+                    results[idx] = e
+        logging.info("Parallel execution completed.")
+        return results
+def _compute_square(x):
+    return x * x
+def _example():
+    def _compute_sum(a, b):
+        return a + b
+    executor1 = ParallelExecutor(mode="process", max_workers=4)
+    params1 = [(i,) for i in range(10)]
+    results1 = executor1.run(_compute_square, params1)
+    print("Results (compute_square):", results1)
+    executor2 = ParallelExecutor(mode="thread", max_workers=2)
+    params2 = [(1, 2), (3, 4), (5, 6)]
+    results2 = executor2.run(_compute_sum, params2)
+    print("Results (compute_sum):", results2)
+if __name__ == "__main__":
+    _example()
+    # 也可以不要装饰器，直接运行没啥问题，就是避免在ipynb中使用，最好使用ipynb，或者把这个函数放到一个独立的py文件中运行
+    # 或者，jupyter中使用thread，不要使用process，因为process会导致jupyter挂掉

{oafuncs-0.0.90.dist-info → oafuncs-0.0.91.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: oafuncs
-Version: 0.0.90
+Version: 0.0.91
 Summary: Oceanic and Atmospheric Functions
 Home-page: https://github.com/Industry-Pays/OAFuncs
 Author: Kun Liu

oafuncs-0.0.91.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,28 @@
+oafuncs/__init__.py,sha256=glcIlhQ9xSK4WtL58dq7Od2S3JPqsuEyhUQ-VWO8hOc,1426
+oafuncs/oa_cmap.py,sha256=azVg9QR_IlG9lXCCXXVs1LS1kFci8yjxDmb_VA_TdTQ,7408
+oafuncs/oa_data.py,sha256=nENfszcOaALRse70fWFKi2vKH35EhRSCr65oIAKHiS8,12774
+oafuncs/oa_draw.py,sha256=QypQp4vJIrbAyFddEVxd9K9Q4d85PRYqYQi9xDUmSZw,11150
+oafuncs/oa_file.py,sha256=FVffpW3p6C8l1zrDrNr9aQeuCrA1qt4u4YssSwcTkkE,14106
+oafuncs/oa_help.py,sha256=loyzTbjU_0VpSIBvAEUA_tqxG8MVsO0xFE_2hgQ3zMw,4188
+oafuncs/oa_nc.py,sha256=CVZlv2EIehdgzrf1MHXYOUFcNkdOnmE1GYQYLldzrk0,17499
+oafuncs/oa_python.py,sha256=Q-6UGGw_dJff7Ef8i87fsLPoGeHV5jBzfb-7HP4THR0,4018
+oafuncs/data_store/OAFuncs.png,sha256=Cc0TDi9H5mWBporXYw9K0bUWC0oSsI-Qj3FGAXUtGKM,3332020
+oafuncs/oa_down/User_Agent-list.txt,sha256=pazxSip8_lphEBOPHG902zmIBUg8sBKXgmqp_g6j_E4,661062
+oafuncs/oa_down/__init__.py,sha256=kRX5eTUCbAiz3zTaQM1501paOYS_3fizDN4Pa0mtNUA,585
+oafuncs/oa_down/hycom_3hourly.py,sha256=ZJpsx2D_x-C1Z4R1Wwr2vzUuT6iNPTZVDxusCG_q330,62113
+oafuncs/oa_down/idm.py,sha256=lOiDQ5i5JPhj5ca3uDM9dw5DnHtj1EyJ17owhy7luLg,1666
+oafuncs/oa_down/literature.py,sha256=n9pvL_N7pk-MZHHNIqc8OUYK_9ycASjDq0-D0wLSZ3s,11329
+oafuncs/oa_down/test_ua.py,sha256=0IQq3NjqfNr7KkyjS_U-a4mYu-r-E7gzawwo4IfEa6Y,10851
+oafuncs/oa_down/user_agent.py,sha256=TsPcAxFmMTYAEHRFjurI1bQBJfDhcA70MdHoUPwQmks,785
+oafuncs/oa_sign/__init__.py,sha256=QKqTFrJDFK40C5uvk48GlRRbGFzO40rgkYwu6dYxatM,563
+oafuncs/oa_sign/meteorological.py,sha256=mLbupsZSq427HTfVbZMvIlFzDHwSzQAbK3X19o8anFY,6525
+oafuncs/oa_sign/ocean.py,sha256=xrW-rWD7xBWsB5PuCyEwQ1Q_RDKq2KCLz-LOONHgldU,5932
+oafuncs/oa_sign/scientific.py,sha256=a4JxOBgm9vzNZKpJ_GQIQf7cokkraV5nh23HGbmTYKw,5064
+oafuncs/oa_tool/__init__.py,sha256=bNTy9abznDhg3k_Irx0YieXl37r-oDRMtTAxf57Stzs,487
+oafuncs/oa_tool/email.py,sha256=4lJxV_KUzhxgLYfVwYTqp0qxRugD7fvsZkXDe5WkUKo,3052
+oafuncs/oa_tool/parallel.py,sha256=kYbiIFDB7EoxasmXGSomaEDVUsg9Rfvdgbw93lBOY7o,3770
+oafuncs-0.0.91.dist-info/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
+oafuncs-0.0.91.dist-info/METADATA,sha256=KT2rJ-ZeMPNYiqfyN5tRDdvf7eP5DB5NnesLZVsFG5A,3321
+oafuncs-0.0.91.dist-info/WHEEL,sha256=M1ikteR9eetPNvm1LyQ3rpXxNYuGd90oakQO1a-ohSk,109
+oafuncs-0.0.91.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
+oafuncs-0.0.91.dist-info/RECORD,,

{oafuncs-0.0.90.dist-info → oafuncs-0.0.91.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.6.0)
+Generator: setuptools (75.7.0)
 Root-Is-Purelib: true
 Tag: py2-none-any
 Tag: py3-none-any

oafuncs-0.0.90.dist-info/RECORD DELETED Viewed

@@ -1,26 +0,0 @@
-oafuncs/__init__.py,sha256=glcIlhQ9xSK4WtL58dq7Od2S3JPqsuEyhUQ-VWO8hOc,1426
-oafuncs/oa_cmap.py,sha256=azVg9QR_IlG9lXCCXXVs1LS1kFci8yjxDmb_VA_TdTQ,7408
-oafuncs/oa_data.py,sha256=21HC_7GVFAtU9AMYKGSSzY9J6_0Ju-5n8dJKwOOx5HI,15641
-oafuncs/oa_draw.py,sha256=QypQp4vJIrbAyFddEVxd9K9Q4d85PRYqYQi9xDUmSZw,11150
-oafuncs/oa_file.py,sha256=9b2uXTOqJqds5IhEqA_702G-qzyCZiguGY5JcT9CZ78,12728
-oafuncs/oa_help.py,sha256=42xvmv6BSTyrKfQtW0bvedyv6ElhFJLMblq5jhziuB4,4076
-oafuncs/oa_nc.py,sha256=m_80xWzoyY2niupfpTSvej1D_k4WvTnDYlnlYbIfqGI,17525
-oafuncs/oa_python.py,sha256=Q-6UGGw_dJff7Ef8i87fsLPoGeHV5jBzfb-7HP4THR0,4018
-oafuncs/data_store/OAFuncs.png,sha256=HZORbnBSRX0MZSLTGAZAPK24RBUTmihguMeG9YiU_So,3261697
-oafuncs/oa_down/User_Agent-list.txt,sha256=pazxSip8_lphEBOPHG902zmIBUg8sBKXgmqp_g6j_E4,661062
-oafuncs/oa_down/__init__.py,sha256=pKPqxD0z09NEXWCemuemfgTct7Kcu3APPJqqB1FPXRM,565
-oafuncs/oa_down/hycom_3hourly.py,sha256=Bt4MjcshhAyDckfFvdqxjNvzU7JuBVYCwvY8b1OPbPw,59501
-oafuncs/oa_down/literature.py,sha256=Txv1YGSG-Z7m4o7FGHvXOR40EFxYozMsyM0-gy5CMEg,10086
-oafuncs/oa_down/test_ua.py,sha256=0IQq3NjqfNr7KkyjS_U-a4mYu-r-E7gzawwo4IfEa6Y,10851
-oafuncs/oa_down/user_agent.py,sha256=TsPcAxFmMTYAEHRFjurI1bQBJfDhcA70MdHoUPwQmks,785
-oafuncs/oa_sign/__init__.py,sha256=QKqTFrJDFK40C5uvk48GlRRbGFzO40rgkYwu6dYxatM,563
-oafuncs/oa_sign/meteorological.py,sha256=mLbupsZSq427HTfVbZMvIlFzDHwSzQAbK3X19o8anFY,6525
-oafuncs/oa_sign/ocean.py,sha256=xrW-rWD7xBWsB5PuCyEwQ1Q_RDKq2KCLz-LOONHgldU,5932
-oafuncs/oa_sign/scientific.py,sha256=a4JxOBgm9vzNZKpJ_GQIQf7cokkraV5nh23HGbmTYKw,5064
-oafuncs/oa_tool/__init__.py,sha256=IKOlqpWlb4cMDCtq2VKR_RTxQHDNqR_vfqqsOsp_lKQ,466
-oafuncs/oa_tool/email.py,sha256=4lJxV_KUzhxgLYfVwYTqp0qxRugD7fvsZkXDe5WkUKo,3052
-oafuncs-0.0.90.dist-info/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
-oafuncs-0.0.90.dist-info/METADATA,sha256=s3X6lHw6yv20rd2528K-5cOk7zcYRGSIGYEg4SeIqqI,3321
-oafuncs-0.0.90.dist-info/WHEEL,sha256=pxeNX5JdtCe58PUSYP9upmc7jdRPgvT0Gm9kb1SHlVw,109
-oafuncs-0.0.90.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
-oafuncs-0.0.90.dist-info/RECORD,,

{oafuncs-0.0.90.dist-info → oafuncs-0.0.91.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{oafuncs-0.0.90.dist-info → oafuncs-0.0.91.dist-info}/top_level.txt RENAMED Viewed

File without changes

oafuncs 0.0.90__py2.py3-none-any.whl → 0.0.91__py2.py3-none-any.whl

oafuncs 0.0.90py2.py3-none-any.whl → 0.0.91py2.py3-none-any.whl