PyPI - oafuncs - Versions diffs - 0.0.92__tar.gz → 0.0.93__tar.gz - Mend

oafuncs 0.0.92tar.gz → 0.0.93tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{oafuncs-0.0.92/oafuncs.egg-info → oafuncs-0.0.93}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: oafuncs
-Version: 0.0.92
+Version: 0.0.93
 Summary: Oceanic and Atmospheric Functions
 Home-page: https://github.com/Industry-Pays/OAFuncs
 Author: Kun Liu

oafuncs-0.0.93/oafuncs/oa_data.py ADDED Viewed

@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Author: Liu Kun && 16031215@qq.com
+Date: 2024-09-17 17:12:47
+LastEditors: Liu Kun && 16031215@qq.com
+LastEditTime: 2024-12-13 19:11:08
+FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_data.py
+Description:
+EditPlatform: vscode
+ComputerInfo: XPS 15 9510
+SystemInfo: Windows 11
+Python Version: 3.11
+"""
+import itertools
+import multiprocessing as mp
+from concurrent.futures import ThreadPoolExecutor
+import numpy as np
+from scipy.interpolate import griddata
+__all__ = ["interp_2d", "ensure_list"]
+def ensure_list(input_data):
+    """
+    Ensures that the input is converted into a list.
+    If the input is already a list, it returns it directly.
+    If the input is a string, it wraps it in a list and returns.
+    For other types of input, it converts them to a string and then wraps in a list.
+    :param input_data: The input which can be a list, a string, or any other type.
+    :return: A list containing the input or the string representation of the input.
+    """
+    if isinstance(input_data, list):
+        return input_data
+    elif isinstance(input_data, str):
+        return [input_data]
+    else:
+        # For non-list and non-string inputs, convert to string and wrap in a list
+        return [str(input_data)]
+def interp_2d(target_x, target_y, origin_x, origin_y, data, method="linear", parallel=True):
+    """
+    Perform 2D interpolation on the last two dimensions of a multi-dimensional array.
+    Parameters:
+    - target_x (array-like): 1D array of target grid's x-coordinates.
+    - target_y (array-like): 1D array of target grid's y-coordinates.
+    - origin_x (array-like): 1D array of original grid's x-coordinates.
+    - origin_y (array-like): 1D array of original grid's y-coordinates.
+    - data (numpy.ndarray): Multi-dimensional array where the last two dimensions correspond to the original grid.
+    - method (str, optional): Interpolation method, default is 'linear'. Other options include 'nearest', 'cubic', etc.
+    - parallel (bool, optional): Flag to enable parallel processing. Default is True.
+    Returns:
+    - interpolated_data (numpy.ndarray): Interpolated data with the same leading dimensions as the input data, but with the last two dimensions corresponding to the target grid.
+    Raises:
+    - ValueError: If the shape of the data does not match the shape of the origin_x or origin_y grids.
+    Usage:
+    - Interpolate a 2D array:
+        result = interp_2d(target_x, target_y, origin_x, origin_y, data_2d)
+    - Interpolate a 3D array (where the last two dimensions are spatial):
+        result = interp_2d(target_x, target_y, origin_x, origin_y, data_3d)
+    - Interpolate a 4D array (where the last two dimensions are spatial):
+        result = interp_2d(target_x, target_y, origin_x, origin_y, data_4d)
+    """
+    def interp_single(data_slice, target_points, origin_points, method):
+        return griddata(origin_points, data_slice.ravel(), target_points, method=method).reshape(target_y.shape)
+    # 确保目标网格和初始网格都是二维的
+    if len(target_y.shape) == 1:
+        target_x, target_y = np.meshgrid(target_x, target_y)
+    if len(origin_y.shape) == 1:
+        origin_x, origin_y = np.meshgrid(origin_x, origin_y)
+    # 根据经纬度网格判断输入数据的形状是否匹配
+    if origin_x.shape != data.shape[-2:] or origin_y.shape != data.shape[-2:]:
+        raise ValueError("Shape of data does not match shape of origin_x or origin_y.")
+    # 创建网格和展平数据
+    target_points = np.column_stack((target_y.ravel(), target_x.ravel()))
+    origin_points = np.column_stack((origin_y.ravel(), origin_x.ravel()))
+    # 根据是否并行选择不同的执行方式
+    if parallel:
+        with ThreadPoolExecutor(max_workers=mp.cpu_count() - 2) as executor:
+            if len(data.shape) == 2:
+                interpolated_data = list(executor.map(interp_single, [data], [target_points], [origin_points], [method]))
+            elif len(data.shape) == 3:
+                interpolated_data = list(executor.map(interp_single, [data[i] for i in range(data.shape[0])], [target_points] * data.shape[0], [origin_points] * data.shape[0], [method] * data.shape[0]))
+            elif len(data.shape) == 4:
+                index_combinations = list(itertools.product(range(data.shape[0]), range(data.shape[1])))
+                interpolated_data = list(executor.map(interp_single, [data[i, j] for i, j in index_combinations], [target_points] * len(index_combinations), [origin_points] * len(index_combinations), [method] * len(index_combinations)))
+                interpolated_data = np.array(interpolated_data).reshape(data.shape[0], data.shape[1], *target_y.shape)
+    else:
+        if len(data.shape) == 2:
+            interpolated_data = interp_single(data, target_points, origin_points, method)
+        elif len(data.shape) == 3:
+            interpolated_data = np.stack([interp_single(data[i], target_points, origin_points, method) for i in range(data.shape[0])])
+        elif len(data.shape) == 4:
+            interpolated_data = np.stack([np.stack([interp_single(data[i, j], target_points, origin_points, method) for j in range(data.shape[1])]) for i in range(data.shape[0])])
+    return np.array(interpolated_data)
+if __name__ == "__main__":
+    pass
+    """ import time
+    import matplotlib.pyplot as plt
+    # 测试数据
+    origin_x = np.linspace(0, 10, 11)
+    origin_y = np.linspace(0, 10, 11)
+    target_x = np.linspace(0, 10, 101)
+    target_y = np.linspace(0, 10, 101)
+    data = np.random.rand(11, 11)
+    # 高维插值
+    origin_x = np.linspace(0, 10, 11)
+    origin_y = np.linspace(0, 10, 11)
+    target_x = np.linspace(0, 10, 101)
+    target_y = np.linspace(0, 10, 101)
+    data = np.random.rand(10, 10, 11, 11)
+    start = time.time()
+    interpolated_data = interp_2d(target_x, target_y, origin_x, origin_y, data, parallel=False)
+    print(f"Interpolation time: {time.time()-start:.2f}s")
+    print(interpolated_data.shape)
+    # 高维插值多线程
+    start = time.time()
+    interpolated_data = interp_2d(target_x, target_y, origin_x, origin_y, data)
+    print(f"Interpolation time: {time.time()-start:.2f}s")
+    print(interpolated_data.shape)
+    print(interpolated_data[0, 0, :, :].shape)
+    plt.figure()
+    plt.contourf(target_x, target_y, interpolated_data[0, 0, :, :])
+    plt.colorbar()
+    plt.show() """

{oafuncs-0.0.92 → oafuncs-0.0.93}/oafuncs/oa_down/hycom_3hourly.py RENAMED Viewed

@@ -575,7 +575,7 @@ def _check_existing_file(file_full_path, avg_size):
         if abs(delta_size_ratio) > 0.025:
             if check_nc(file_full_path):
                 # print(f"File size is abnormal but can be opened normally, file size: {fsize:.2f} KB")
-                if not _check_ftime(file_full_path,if_print=True):
+                if not _check_ftime(file_full_path, if_print=True):
                     return False
                 else:
                     return True
@@ -583,7 +583,7 @@ def _check_existing_file(file_full_path, avg_size):
                 print(f"File size is abnormal and cannot be opened, {file_full_path}: {fsize:.2f} KB")
                 return False
         else:
-            if not _check_ftime(file_full_path,if_print=True):
+            if not _check_ftime(file_full_path, if_print=True):
                 return False
             else:
                 return True
@@ -593,8 +593,8 @@ def _check_existing_file(file_full_path, avg_size):
 def _get_mean_size30(store_path, same_file):
     if same_file not in fsize_dict.keys():
-            # print(f'Same file name: {same_file}')
-            fsize_dict[same_file] = {"size": 0, "count": 0}
+        # print(f'Same file name: {same_file}')
+        fsize_dict[same_file] = {"size": 0, "count": 0}
     if fsize_dict[same_file]["count"] < 30 or fsize_dict[same_file]["size"] == 0:
         # 更新30次文件最小值，后续认为可以代表所有文件，不再更新占用时间
@@ -609,7 +609,7 @@ def _get_mean_size30(store_path, same_file):
 def _get_mean_size_move(same_file, current_file):
     # 获取锁
-    with fsize_dict_lock: # 全局锁，确保同一时间只能有一个线程访问
+    with fsize_dict_lock:  # 全局锁，确保同一时间只能有一个线程访问
         # 初始化字典中的值，如果文件不在字典中
         if same_file not in fsize_dict.keys():
             fsize_dict[same_file] = {"size_list": [], "mean_size": 1.0}
@@ -698,7 +698,6 @@ def _correct_time(nc_file):
     modify_nc(nc_file, "time", None, time_difference)
 def _download_file(target_url, store_path, file_name, check=False):
     # Check if the file exists
     fname = Path(store_path) / file_name
@@ -706,14 +705,14 @@ def _download_file(target_url, store_path, file_name, check=False):
     file_name_split = file_name_split[:-1]
     # same_file = f"{file_name_split[0]}_{file_name_split[1]}*nc"
     same_file = "_".join(file_name_split) + "*nc"
-    if check:
-        if same_file not in fsize_dict.keys(): # 对第一个文件单独进行检查，因为没有大小可以对比
-            check_nc(fname,if_delete=True)
+    if check:
+        if same_file not in fsize_dict.keys():  # 对第一个文件单独进行检查，因为没有大小可以对比
+            check_nc(fname, if_delete=True)
         # set_min_size = _get_mean_size30(store_path, same_file) # 原方案，只30次取平均值；若遇变化，无法判断
         get_mean_size = _get_mean_size_move(same_file, fname)
         if _check_existing_file(fname, get_mean_size):
             count_dict["skip"] += 1
             return
@@ -767,7 +766,7 @@ def _download_file(target_url, store_path, file_name, check=False):
                 break
             if request_times > 0:
                 # print(f'\r正在重试第 {request_times} 次', end="")
-                print(f"[bold #ffe5c0]Retrying the {order_list[request_times-1]} time...")
+                print(f"[bold #ffe5c0]Retrying the {order_list[request_times - 1]} time...")
             # 尝试下载文件
             try:
                 headers = {"User-Agent": get_ua()}
@@ -788,9 +787,15 @@ def _download_file(target_url, store_path, file_name, check=False):
                             f.write(chunk)
                 f.close()
-                if not _check_ftime(fname):
-                    _correct_time(fname)
+                if not _check_ftime(fname, if_print=True):
+                    if match_time:
+                        _correct_time(fname)
+                    else:
+                        _clear_existing_file(fname)
+                        # print(f"[bold #ffe5c0]File time error, {fname}")
+                        count_dict["no_data"] += 1
+                        break
                 # print(f'\r文件 {fname} 下载成功', end="")
                 if os.path.exists(fname):
@@ -923,11 +928,11 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
                     var = current_group[0]
                     submit_url = _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
                     file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
-                    old_str = f'var={variable_info[var]["var_name"]}'
-                    new_str = f'var={variable_info[var]["var_name"]}'
+                    old_str = f"var={variable_info[var]['var_name']}"
+                    new_str = f"var={variable_info[var]['var_name']}"
                     if len(current_group) > 1:
                         for v in current_group[1:]:
-                            new_str = f'{new_str}&var={variable_info[v]["var_name"]}'
+                            new_str = f"{new_str}&var={variable_info[v]['var_name']}"
                         submit_url = submit_url.replace(old_str, new_str)
                         # file_name = f'HYCOM_{'-'.join([variable_info[v]["var_name"] for v in current_group])}_{download_time}.nc'
                         file_name = f"HYCOM_{key}_{download_time}.nc"
@@ -1023,7 +1028,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
                     # 串行方式
                     for i, time_str in enumerate(time_list):
                         _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, check)
-                        progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}")
+                        progress.update(task, advance=1, description=f"[cyan]Downloading... {i + 1}/{len(time_list)}")
                 else:
                     # 并行方式
                     with ThreadPoolExecutor(max_workers=num_workers) as executor:
@@ -1041,7 +1046,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
                         time_str_end_index = int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))
                         time_str_end = time_list[time_str_end_index]
                         _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
-                        progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}")
+                        progress.update(task, advance=1, description=f"[cyan]Downloading... {i + 1}/{total_num}")
                 else:
                     # 并行方式
                     with ThreadPoolExecutor(max_workers=num_workers) as executor:
@@ -1051,10 +1056,10 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
                         for feature in as_completed(futures):
                             _done_callback(feature, progress, task, len(time_list), counter_lock)
     else:
-        print("Please ensure the time_s is no more than time_e")
+        print("[bold red]Please ensure the time_s is no more than time_e")
-def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1, idm_engine=None):
+def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1, idm_engine=None, fill_time=False):
     """
     Description:
         Download the data of single time or a series of time
@@ -1076,6 +1081,7 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
         check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
         ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
         idm_engine: str, the IDM engine, default is None, if set, the IDM will be used to download the data; example: "D:\\Programs\\Internet Download Manager\\IDMan.exe"
+        fill_time: bool, whether to match the time, default is False, if set to True, the time in the file name will be corrected according to the time in the file; else, the data will be skip if the time is not correct. Because the real time of some data that has been downloaded does not match the time in the file name, eg. the required time is 2024110100, but the time in the file name is 2024110103, so the data will be skip if the fill_time is False. Note: it is not the right time data, so it is not recommended to set fill_time to True
     Returns:
         None
@@ -1123,7 +1129,7 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
         os.makedirs(str(store_path), exist_ok=True)
     if num_workers is not None:
-        num_workers = max(min(num_workers, 10), 1) # 暂时不限制最大值，再检查的时候可以多开一些线程
+        num_workers = max(min(num_workers, 10), 1)  # 暂时不限制最大值，再检查的时候可以多开一些线程
         # num_workers = int(max(num_workers, 1))
     time_s = str(time_s)
     if len(time_s) == 8:
@@ -1143,30 +1149,44 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
     global fsize_dict
     fsize_dict = {}
     global fsize_dict_lock
     fsize_dict_lock = Lock()
     global use_idm, given_idm_engine, idm_download_list
     if idm_engine is not None:
         use_idm = True
+        num_workers = 1
         given_idm_engine = idm_engine
         idm_download_list = []
     else:
         use_idm = False
+    global match_time
+    if fill_time:
+        match_time = True
+    else:
+        match_time = False
     _download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
     if idm_download_list:
         for f in idm_download_list:
             wait_success = 0
             success = False
             while not success:
                 if check_nc(f):
-                    _correct_time(f)
+                    if match_time:
+                        _correct_time(f)
+                        count_dict["success"] += 1
+                    else:
+                        if not _check_ftime(f):
+                            _clear_existing_file(f)
+                            count_dict["no_data"] += 1
+                            count_dict["no_data_list"].append(str(f).split("_")[-1].split(".")[0])
+                        else:
+                            count_dict["success"] += 1
                     success = True
-                    count_dict["success"] += 1
                 else:
                     wait_success += 1
                     time.sleep(3)
@@ -1241,10 +1261,6 @@ def how_to_use():
 if __name__ == "__main__":
-    time_s, time_e = "2018010800", "2024083121"
-    merge_name = f"{time_s}_{time_e}"  # 合并后的文件名
-    root_path = r"G:\Data\HYCOM\3hourly"
-    location_dict = {"west": 105, "east": 130, "south": 15, "north": 45}
     download_dict = {
         "water_u": {"simple_name": "u", "download": 1},
         "water_v": {"simple_name": "v", "download": 1},
@@ -1259,51 +1275,31 @@ if __name__ == "__main__":
     var_list = [var_name for var_name in download_dict.keys() if download_dict[var_name]["download"]]
-    # set depth or level, only one can be True
-    # if you wanna download all depth or level, set both False
-    depth = None  # or 0-5000 meters
-    level = None  # or 1-40 levels
-    num_workers = 1
-    check = True
-    ftimes = 1
-    idm_engine = r"D:\Programs\Internet Download Manager\IDMan.exe"
-    download_switch, single_var = True, False
-    combine_switch = False
-    copy_switch, copy_dir = False, r"G:\Data\HYCOM\3hourly"
+    single_var = False
     # draw_time_range(pic_save_folder=r'I:\Delete')
-    if download_switch:
-        if single_var:
-            for var_name in var_list:
-                download(var=var_name, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict["west"], lon_max=location_dict["east"], lat_min=location_dict["south"], lat_max=location_dict["north"], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes, idm_engine=idm_engine)
-        else:
-            download(var=var_list, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict["west"], lon_max=location_dict["east"], lat_min=location_dict["south"], lat_max=location_dict["north"], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes, idm_engine=idm_engine)
+    options = {
+        "var": var_list,
+        "time_s": "2018010100",
+        "time_e": "2020123121",
+        "store_path": r"F:\Data\HYCOM\3hourly",
+        "lon_min": 105,
+        "lon_max": 130,
+        "lat_min": 15,
+        "lat_max": 45,
+        "num_workers": 3,
+        "check": True,
+        "depth": None, # or 0-5000 meters
+        "level": None, # or 1-40 levels
+        "ftimes": 1,
+        "idm_engine": r"D:\Programs\Internet Download Manager\IDMan.exe", # 查漏补缺不建议开启
+        "fill_time": False
+    }
-    """ if combine_switch or copy_switch:
-        time_list = get_time_list(time_s, time_e, 3, 'hour')
+    if single_var:
         for var_name in var_list:
-            file_list = []
-            if single_var:
-                for time_str in time_list:
-                    file_list.append(Path(root_path)/f'HYCOM_{var_name}_{time_str}.nc')
-                merge_path_name = Path(root_path)/f'HYCOM_{var_name}_{merge_name}.nc'
-            else:
-                # 如果混合，需要看情况获取文件列表
-                fname = ''
-                if var_name in ['water_u', 'water_v', 'water_u_bottom', 'water_v_bottom']:
-                    fname = 'uv3z'
-                elif var_name in ['water_temp', 'salinity', 'water_temp_bottom', 'salinity_bottom']:
-                    fname = 'ts3z'
-                elif var_name in ['surf_el']:
-                    fname = 'surf_el'
-                for time_str in time_list:
-                    file_list.append(Path(root_path)/f'HYCOM_{fname}_{time_str}.nc')
-                merge_path_name = Path(root_path)/f'HYCOM_{fname}_{merge_name}.nc'
-            if combine_switch:
-                # 这里的var_name必须是官方变量名，不能再是简写了
-                merge(file_list, var_name, 'time', merge_path_name)
-            if copy_switch:
-                copy_file(merge_path_name, copy_dir) """
+            options["var"] = var_name
+            download(**options)
+    else:
+        download(**options)

{oafuncs-0.0.92 → oafuncs-0.0.93}/oafuncs/oa_down/literature.py RENAMED Viewed

@@ -24,6 +24,7 @@ from rich import print
 from rich.progress import track
 from oafuncs.oa_down.user_agent import get_ua
 from oafuncs.oa_file import remove
+from oafuncs.oa_data import ensure_list
 __all__ = ["download5doi"]
@@ -222,7 +223,7 @@ class _Downloader:
                     print("Try another URL...")
-def read_excel(file, col_name=r"DOI"):
+def _read_excel(file, col_name=r"DOI"):
     df = pd.read_excel(file)
     df_list = df[col_name].tolist()
     # 去掉nan
@@ -230,7 +231,7 @@ def read_excel(file, col_name=r"DOI"):
     return df_list
-def read_txt(file):
+def _read_txt(file):
     with open(file, "r") as f:
         lines = f.readlines()
     # 去掉换行符以及空行
@@ -268,13 +269,12 @@ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None,
     store_path.mkdir(parents=True, exist_ok=True)
     store_path = str(store_path)
-    # 如果doi_list是str，转换为list
-    if isinstance(doi_list, str) and doi_list:
-        doi_list = [doi_list]
+    if doi_list:
+        doi_list = ensure_list(doi_list)
     if txt_file:
-        doi_list = read_txt(txt_file)
+        doi_list = _read_txt(txt_file)
     if excel_file:
-        doi_list = read_excel(excel_file, col_name)
+        doi_list = _read_excel(excel_file, col_name)
     remove(Path(store_path) / "wrong_record.txt")
     print(f"Downloading {len(doi_list)} PDF files...")
     for doi in track(doi_list, description="Downloading..."):

{oafuncs-0.0.92 → oafuncs-0.0.93}/oafuncs/oa_file.py RENAMED Viewed

@@ -226,8 +226,12 @@ def make_dir(directory):
         make_dir(r"E:\Data\2024\09\17\var1")
     """
     directory = str(directory)
-    os.makedirs(directory, exist_ok=True)
-    print(f"Created directory: {directory}")
+    if os.path.exists(directory):
+        print(f"Directory already exists: {directory}")
+        return
+    else:
+        os.makedirs(directory, exist_ok=True)
+        print(f"Created directory: {directory}")
 # ** 清空文件夹

{oafuncs-0.0.92 → oafuncs-0.0.93/oafuncs.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: oafuncs
-Version: 0.0.92
+Version: 0.0.93
 Summary: Oceanic and Atmospheric Functions
 Home-page: https://github.com/Industry-Pays/OAFuncs
 Author: Kun Liu

{oafuncs-0.0.92 → oafuncs-0.0.93}/setup.py RENAMED Viewed

@@ -18,7 +18,7 @@ URL = 'https://github.com/Industry-Pays/OAFuncs'
 EMAIL = 'liukun0312@stu.ouc.edu.cn'
 AUTHOR = 'Kun Liu'
 REQUIRES_PYTHON = '>=3.9.0' # 2025/01/05
-VERSION = '0.0.92'
+VERSION = '0.0.93'
 # What packages are required for this module to be executed?
 REQUIRED = [

oafuncs-0.0.92/oafuncs/oa_data.py DELETED Viewed

@@ -1,278 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-"""
-Author: Liu Kun && 16031215@qq.com
-Date: 2024-09-17 17:12:47
-LastEditors: Liu Kun && 16031215@qq.com
-LastEditTime: 2024-12-13 19:11:08
-FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_data.py
-Description:
-EditPlatform: vscode
-ComputerInfo: XPS 15 9510
-SystemInfo: Windows 11
-Python Version: 3.11
-"""
-import itertools
-import multiprocessing as mp
-from concurrent.futures import ThreadPoolExecutor
-import numpy as np
-from rich import print
-from scipy.interpolate import griddata
-__all__ = ["interp_2d"]
-def interp_2d(target_x, target_y, origin_x, origin_y, data, method="linear", parallel=True):
-    """
-    Perform 2D interpolation on the last two dimensions of a multi-dimensional array.
-    Parameters:
-    - target_x (array-like): 1D array of target grid's x-coordinates.
-    - target_y (array-like): 1D array of target grid's y-coordinates.
-    - origin_x (array-like): 1D array of original grid's x-coordinates.
-    - origin_y (array-like): 1D array of original grid's y-coordinates.
-    - data (numpy.ndarray): Multi-dimensional array where the last two dimensions correspond to the original grid.
-    - method (str, optional): Interpolation method, default is 'linear'. Other options include 'nearest', 'cubic', etc.
-    - parallel (bool, optional): Flag to enable parallel processing. Default is True.
-    Returns:
-    - interpolated_data (numpy.ndarray): Interpolated data with the same leading dimensions as the input data, but with the last two dimensions corresponding to the target grid.
-    Raises:
-    - ValueError: If the shape of the data does not match the shape of the origin_x or origin_y grids.
-    Usage:
-    - Interpolate a 2D array:
-        result = interp_2d(target_x, target_y, origin_x, origin_y, data_2d)
-    - Interpolate a 3D array (where the last two dimensions are spatial):
-        result = interp_2d(target_x, target_y, origin_x, origin_y, data_3d)
-    - Interpolate a 4D array (where the last two dimensions are spatial):
-        result = interp_2d(target_x, target_y, origin_x, origin_y, data_4d)
-    """
-    def interp_single(data_slice, target_points, origin_points, method):
-        return griddata(origin_points, data_slice.ravel(), target_points, method=method).reshape(target_y.shape)
-    # 确保目标网格和初始网格都是二维的
-    if len(target_y.shape) == 1:
-        target_x, target_y = np.meshgrid(target_x, target_y)
-    if len(origin_y.shape) == 1:
-        origin_x, origin_y = np.meshgrid(origin_x, origin_y)
-    # 根据经纬度网格判断输入数据的形状是否匹配
-    if origin_x.shape != data.shape[-2:] or origin_y.shape != data.shape[-2:]:
-        raise ValueError("Shape of data does not match shape of origin_x or origin_y.")
-    # 创建网格和展平数据
-    target_points = np.column_stack((target_y.ravel(), target_x.ravel()))
-    origin_points = np.column_stack((origin_y.ravel(), origin_x.ravel()))
-    # 根据是否并行选择不同的执行方式
-    if parallel:
-        with ThreadPoolExecutor(max_workers=mp.cpu_count() - 2) as executor:
-            if len(data.shape) == 2:
-                interpolated_data = list(executor.map(interp_single, [data], [target_points], [origin_points], [method]))
-            elif len(data.shape) == 3:
-                interpolated_data = list(executor.map(interp_single, [data[i] for i in range(data.shape[0])], [target_points] * data.shape[0], [origin_points] * data.shape[0], [method] * data.shape[0]))
-            elif len(data.shape) == 4:
-                index_combinations = list(itertools.product(range(data.shape[0]), range(data.shape[1])))
-                interpolated_data = list(executor.map(interp_single, [data[i, j] for i, j in index_combinations], [target_points] * len(index_combinations), [origin_points] * len(index_combinations), [method] * len(index_combinations)))
-                interpolated_data = np.array(interpolated_data).reshape(data.shape[0], data.shape[1], *target_y.shape)
-    else:
-        if len(data.shape) == 2:
-            interpolated_data = interp_single(data, target_points, origin_points, method)
-        elif len(data.shape) == 3:
-            interpolated_data = np.stack([interp_single(data[i], target_points, origin_points, method) for i in range(data.shape[0])])
-        elif len(data.shape) == 4:
-            interpolated_data = np.stack([np.stack([interp_single(data[i, j], target_points, origin_points, method) for j in range(data.shape[1])]) for i in range(data.shape[0])])
-    return np.array(interpolated_data)
-# ---------------------------------------------------------------------------------- not used below ----------------------------------------------------------------------------------
-# ** 高维插值函数，插值最后两个维度
-def interp_2d_20241213(target_x, target_y, origin_x, origin_y, data, method="linear"):
-    """
-    高维插值函数，默认插值最后两个维度，传输数据前请确保数据的维度正确
-    参数:
-    target_y (array-like): 目标经度网格 1D 或 2D
-    target_x (array-like): 目标纬度网格 1D 或 2D
-    origin_y (array-like): 初始经度网格 1D 或 2D
-    origin_x (array-like): 初始纬度网格 1D 或 2D
-    data (array-like): 数据 (*, lat, lon) 2D, 3D, 4D
-    method (str, optional): 插值方法，可选 'linear', 'nearest', 'cubic' 等，默认为 'linear'
-    返回:
-    array-like: 插值结果
-    """
-    # 确保目标网格和初始网格都是二维的
-    if len(target_y.shape) == 1:
-        target_x, target_y = np.meshgrid(target_x, target_y)
-    if len(origin_y.shape) == 1:
-        origin_x, origin_y = np.meshgrid(origin_x, origin_y)
-    dims = data.shape
-    len_dims = len(dims)
-    # print(dims[-2:])
-    # 根据经纬度网格判断输入数据的形状是否匹配
-    if origin_x.shape != dims[-2:] or origin_y.shape != dims[-2:]:
-        print(origin_x.shape, dims[-2:])
-        raise ValueError("Shape of data does not match shape of origin_x or origin_y.")
-    # 将目标网格展平成一维数组
-    target_points = np.column_stack((np.ravel(target_y), np.ravel(target_x)))
-    # 将初始网格展平成一维数组
-    origin_points = np.column_stack((np.ravel(origin_y), np.ravel(origin_x)))
-    # 进行插值
-    if len_dims == 2:
-        interpolated_data = griddata(origin_points, np.ravel(data), target_points, method=method)
-        interpolated_data = np.reshape(interpolated_data, target_y.shape)
-    elif len_dims == 3:
-        interpolated_data = []
-        for i in range(dims[0]):
-            dt = griddata(origin_points, np.ravel(data[i, :, :]), target_points, method=method)
-            interpolated_data.append(np.reshape(dt, target_y.shape))
-            print(f"Interpolating {i + 1}/{dims[0]}...")
-        interpolated_data = np.array(interpolated_data)
-    elif len_dims == 4:
-        interpolated_data = []
-        for i in range(dims[0]):
-            interpolated_data.append([])
-            for j in range(dims[1]):
-                dt = griddata(origin_points, np.ravel(data[i, j, :, :]), target_points, method=method)
-                interpolated_data[i].append(np.reshape(dt, target_y.shape))
-                print(f"\rInterpolating {i * dims[1] + j + 1}/{dims[0] * dims[1]}...", end="")
-        print("\n")
-        interpolated_data = np.array(interpolated_data)
-    return interpolated_data
-# ** 高维插值函数，插值最后两个维度，使用多线程进行插值
-# 在本地电脑上可以提速三倍左右，超算上暂时无法加速
-def interp_2d_parallel_20241213(target_x, target_y, origin_x, origin_y, data, method="linear"):
-    """
-    param        {*} target_x 目标经度网格 1D 或 2D
-    param        {*} target_y 目标纬度网格 1D 或 2D
-    param        {*} origin_x 初始经度网格 1D 或 2D
-    param        {*} origin_y 初始纬度网格 1D 或 2D
-    param        {*} data 数据 (*, lat, lon) 2D, 3D, 4D
-    param        {*} method 插值方法，可选 'linear', 'nearest', 'cubic' 等，默认为 'linear'
-    return       {*} 插值结果
-    description : 高维插值函数，默认插值最后两个维度，传输数据前请确保数据的维度正确
-    example     : interpolated_data = interp_2d_parallel(target_x, target_y, origin_x, origin_y, data, method='linear')
-    """
-    def interp_single2d(target_y, target_x, origin_y, origin_x, data, method="linear"):
-        target_points = np.column_stack((np.ravel(target_y), np.ravel(target_x)))
-        origin_points = np.column_stack((np.ravel(origin_y), np.ravel(origin_x)))
-        dt = griddata(origin_points, np.ravel(data[:, :]), target_points, method=method)
-        return np.reshape(dt, target_y.shape)
-    def interp_single3d(i, target_y, target_x, origin_y, origin_x, data, method="linear"):
-        target_points = np.column_stack((np.ravel(target_y), np.ravel(target_x)))
-        origin_points = np.column_stack((np.ravel(origin_y), np.ravel(origin_x)))
-        dt = griddata(origin_points, np.ravel(data[i, :, :]), target_points, method=method)
-        return np.reshape(dt, target_y.shape)
-    def interp_single4d(i, j, target_y, target_x, origin_y, origin_x, data, method="linear"):
-        target_points = np.column_stack((np.ravel(target_y), np.ravel(target_x)))
-        origin_points = np.column_stack((np.ravel(origin_y), np.ravel(origin_x)))
-        dt = griddata(origin_points, np.ravel(data[i, j, :, :]), target_points, method=method)
-        return np.reshape(dt, target_y.shape)
-    if len(target_y.shape) == 1:
-        target_x, target_y = np.meshgrid(target_x, target_y)
-    if len(origin_y.shape) == 1:
-        origin_x, origin_y = np.meshgrid(origin_x, origin_y)
-    dims = data.shape
-    len_dims = len(dims)
-    if origin_x.shape != dims[-2:] or origin_y.shape != dims[-2:]:
-        raise ValueError("数据形状与 origin_x 或 origin_y 的形状不匹配.")
-    interpolated_data = []
-    # 使用多线程进行插值
-    with ThreadPoolExecutor(max_workers=mp.cpu_count() - 2) as executor:
-        print(f"Using {mp.cpu_count() - 2} threads...")
-        if len_dims == 2:
-            interpolated_data = list(executor.map(interp_single2d, [target_y], [target_x], [origin_y], [origin_x], [data], [method]))
-        elif len_dims == 3:
-            interpolated_data = list(executor.map(interp_single3d, [i for i in range(dims[0])], [target_y] * dims[0], [target_x] * dims[0], [origin_y] * dims[0], [origin_x] * dims[0], [data] * dims[0], [method] * dims[0]))
-        elif len_dims == 4:
-            interpolated_data = list(
-                executor.map(
-                    interp_single4d,
-                    [i for i in range(dims[0]) for j in range(dims[1])],
-                    [j for i in range(dims[0]) for j in range(dims[1])],
-                    [target_y] * dims[0] * dims[1],
-                    [target_x] * dims[0] * dims[1],
-                    [origin_y] * dims[0] * dims[1],
-                    [origin_x] * dims[0] * dims[1],
-                    [data] * dims[0] * dims[1],
-                    [method] * dims[0] * dims[1],
-                )
-            )
-            interpolated_data = np.array(interpolated_data).reshape(dims[0], dims[1], target_y.shape[0], target_x.shape[1])
-    interpolated_data = np.array(interpolated_data)
-    return interpolated_data
-def _test_sum(a, b):
-    return a + b
-if __name__ == "__main__":
-    pass
-    """ import time
-    import matplotlib.pyplot as plt
-    # 测试数据
-    origin_x = np.linspace(0, 10, 11)
-    origin_y = np.linspace(0, 10, 11)
-    target_x = np.linspace(0, 10, 101)
-    target_y = np.linspace(0, 10, 101)
-    data = np.random.rand(11, 11)
-    # 高维插值
-    origin_x = np.linspace(0, 10, 11)
-    origin_y = np.linspace(0, 10, 11)
-    target_x = np.linspace(0, 10, 101)
-    target_y = np.linspace(0, 10, 101)
-    data = np.random.rand(10, 10, 11, 11)
-    start = time.time()
-    interpolated_data = interp_2d(target_x, target_y, origin_x, origin_y, data, parallel=False)
-    print(f"Interpolation time: {time.time()-start:.2f}s")
-    print(interpolated_data.shape)
-    # 高维插值多线程
-    start = time.time()
-    interpolated_data = interp_2d(target_x, target_y, origin_x, origin_y, data)
-    print(f"Interpolation time: {time.time()-start:.2f}s")
-    print(interpolated_data.shape)
-    print(interpolated_data[0, 0, :, :].shape)
-    plt.figure()
-    plt.contourf(target_x, target_y, interpolated_data[0, 0, :, :])
-    plt.colorbar()
-    plt.show() """