PyPI - oafuncs - Versions diffs - 0.0.93__py2.py3-none-any.whl → 0.0.95__py2.py3-none-any.whl - Mend

oafuncs 0.0.93py2.py3-none-any.whl → 0.0.95py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

oafuncs/data_store/OAFuncs.png +0 -0
oafuncs/oa_data.py +33 -5
oafuncs/oa_down/hycom_3hourly.py +53 -58
oafuncs/oa_down/hycom_3hourly_20250129.py +1307 -0
oafuncs/oa_nc.py +46 -28
{oafuncs-0.0.93.dist-info → oafuncs-0.0.95.dist-info}/METADATA +5 -2
{oafuncs-0.0.93.dist-info → oafuncs-0.0.95.dist-info}/RECORD +10 -9
{oafuncs-0.0.93.dist-info → oafuncs-0.0.95.dist-info}/LICENSE.txt +0 -0
{oafuncs-0.0.93.dist-info → oafuncs-0.0.95.dist-info}/WHEEL +0 -0
{oafuncs-0.0.93.dist-info → oafuncs-0.0.95.dist-info}/top_level.txt +0 -0

oafuncs/data_store/OAFuncs.png CHANGED Viewed

Binary file

oafuncs/oa_data.py CHANGED Viewed

@@ -18,10 +18,11 @@ import multiprocessing as mp
 from concurrent.futures import ThreadPoolExecutor
 import numpy as np
+import xarray as xr
 from scipy.interpolate import griddata
+import salem
-__all__ = ["interp_2d", "ensure_list"]
+__all__ = ["interp_2d", "ensure_list", "mask_shapefile"]
 def ensure_list(input_data):
@@ -44,7 +45,6 @@ def ensure_list(input_data):
         return [str(input_data)]
 def interp_2d(target_x, target_y, origin_x, origin_y, data, method="linear", parallel=True):
     """
     Perform 2D interpolation on the last two dimensions of a multi-dimensional array.
@@ -87,6 +87,8 @@ def interp_2d(target_x, target_y, origin_x, origin_y, data, method="linear", par
         raise ValueError("Shape of data does not match shape of origin_x or origin_y.")
     # 创建网格和展平数据
+    target_x, target_y = np.array(target_x), np.array(target_y)
+    origin_x, origin_y = np.array(origin_x), np.array(origin_y)
     target_points = np.column_stack((target_y.ravel(), target_x.ravel()))
     origin_points = np.column_stack((origin_y.ravel(), origin_x.ravel()))
@@ -109,12 +111,38 @@ def interp_2d(target_x, target_y, origin_x, origin_y, data, method="linear", par
         elif len(data.shape) == 4:
             interpolated_data = np.stack([np.stack([interp_single(data[i, j], target_points, origin_points, method) for j in range(data.shape[1])]) for i in range(data.shape[0])])
-    return np.array(interpolated_data)
+    return np.squeeze(np.array(interpolated_data))
+def mask_shapefile(data: np.ndarray, lons: np.ndarray, lats: np.ndarray, shapefile_path: str) -> xr.DataArray:
+    """
+    Masks a 2D data array using a shapefile.
+    Parameters:
+    - data: 2D numpy array of data to be masked.
+    - lons: 1D numpy array of longitudes.
+    - lats: 1D numpy array of latitudes.
+    - shapefile_path: Path to the shapefile used for masking.
+    Returns:
+    - Masked xarray DataArray.
+    """
+    """
+    https://cloud.tencent.com/developer/article/1701896
+    """
+    try:
+        # import geopandas as gpd
+        # shp_f = gpd.read_file(shapefile_path)
+        shp_f = salem.read_shapefile(shapefile_path)
+        data_da = xr.DataArray(data, coords=[("latitude", lats), ("longitude", lons)])
+        masked_data = data_da.salem.roi(shape=shp_f)
+        return masked_data
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return None
-if __name__ == "__main__":
+if __name__ == "__main__":
     pass
     """ import time

oafuncs/oa_down/hycom_3hourly.py CHANGED Viewed

@@ -2,10 +2,10 @@
 # coding=utf-8
 """
 Author: Liu Kun && 16031215@qq.com
-Date: 2024-11-02 11:07:49
+Date: 2025-01-29 19:05:09
 LastEditors: Liu Kun && 16031215@qq.com
-LastEditTime: 2025-01-07 16:31:36
-FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
+LastEditTime: 2025-01-29 19:05:10
+FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly_20250129 copy.py
 Description:
 EditPlatform: vscode
 ComputerInfo: XPS 15 9510
@@ -13,6 +13,9 @@ SystemInfo: Windows 11
 Python Version: 3.12
 """
 import datetime
 import os
 import random
@@ -24,19 +27,19 @@ from pathlib import Path
 from threading import Lock
 import matplotlib.pyplot as plt
+import netCDF4 as nc
 import numpy as np
 import pandas as pd
-import xarray as xr
 import requests
+import xarray as xr
 from rich import print
 from rich.progress import Progress
-import netCDF4 as nc
+from oafuncs.oa_down.idm import downloader as idm_downloader
 from oafuncs.oa_down.user_agent import get_ua
 from oafuncs.oa_file import file_size, mean_size
 from oafuncs.oa_nc import check as check_nc
 from oafuncs.oa_nc import modify as modify_nc
-from oafuncs.oa_down.idm import downloader as idm_downloader
 warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
@@ -575,18 +578,12 @@ def _check_existing_file(file_full_path, avg_size):
         if abs(delta_size_ratio) > 0.025:
             if check_nc(file_full_path):
                 # print(f"File size is abnormal but can be opened normally, file size: {fsize:.2f} KB")
-                if not _check_ftime(file_full_path, if_print=True):
-                    return False
-                else:
-                    return True
+                return True
             else:
                 print(f"File size is abnormal and cannot be opened, {file_full_path}: {fsize:.2f} KB")
                 return False
         else:
-            if not _check_ftime(file_full_path, if_print=True):
-                return False
-            else:
-                return True
+            return True
     else:
         return False
@@ -705,10 +702,25 @@ def _download_file(target_url, store_path, file_name, check=False):
     file_name_split = file_name_split[:-1]
     # same_file = f"{file_name_split[0]}_{file_name_split[1]}*nc"
     same_file = "_".join(file_name_split) + "*nc"
+    if match_time is not None:
+        if check_nc(fname):
+            if not _check_ftime(fname, if_print=True):
+                if match_time:
+                    _correct_time(fname)
+                    count_dict['skip'] += 1
+                else:
+                    _clear_existing_file(fname)
+                    # print(f"[bold #ffe5c0]File time error, {fname}")
+                    count_dict["no_data"] += 1
+            else:
+                count_dict["skip"] += 1
+                print(f"[bold green]{file_name} is correct")
+        return
     if check:
         if same_file not in fsize_dict.keys():  # 对第一个文件单独进行检查，因为没有大小可以对比
-            check_nc(fname, if_delete=True)
+            check_nc(fname, delete_switch=True)
         # set_min_size = _get_mean_size30(store_path, same_file) # 原方案，只30次取平均值；若遇变化，无法判断
         get_mean_size = _get_mean_size_move(same_file, fname)
@@ -788,15 +800,6 @@ def _download_file(target_url, store_path, file_name, check=False):
                 f.close()
-                if not _check_ftime(fname, if_print=True):
-                    if match_time:
-                        _correct_time(fname)
-                    else:
-                        _clear_existing_file(fname)
-                        # print(f"[bold #ffe5c0]File time error, {fname}")
-                        count_dict["no_data"] += 1
-                        break
                 # print(f'\r文件 {fname} 下载成功', end="")
                 if os.path.exists(fname):
                     download_success = True
@@ -1059,7 +1062,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
         print("[bold red]Please ensure the time_s is no more than time_e")
-def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1, idm_engine=None, fill_time=False):
+def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1, idm_engine=None, fill_time=None):
     """
     Description:
         Download the data of single time or a series of time
@@ -1081,7 +1084,7 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
         check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
         ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
         idm_engine: str, the IDM engine, default is None, if set, the IDM will be used to download the data; example: "D:\\Programs\\Internet Download Manager\\IDMan.exe"
-        fill_time: bool, whether to match the time, default is False, if set to True, the time in the file name will be corrected according to the time in the file; else, the data will be skip if the time is not correct. Because the real time of some data that has been downloaded does not match the time in the file name, eg. the required time is 2024110100, but the time in the file name is 2024110103, so the data will be skip if the fill_time is False. Note: it is not the right time data, so it is not recommended to set fill_time to True
+        fill_time: bool or None, the mode to fill the time, default is None. None: only download the data; True: modify the real time of data to the time in the file name; False: check the time in the file name and the real time of data, if not match, delete the file
     Returns:
         None
@@ -1152,6 +1155,9 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
     global fsize_dict_lock
     fsize_dict_lock = Lock()
+    if fill_time is not None:
+        num_workers = 1
     global use_idm, given_idm_engine, idm_download_list
     if idm_engine is not None:
@@ -1163,42 +1169,31 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
         use_idm = False
     global match_time
-    if fill_time:
-        match_time = True
-    else:
-        match_time = False
+    match_time = fill_time
     _download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
-    if idm_download_list:
-        for f in idm_download_list:
-            wait_success = 0
-            success = False
-            while not success:
-                if check_nc(f):
-                    if match_time:
-                        _correct_time(f)
+    if idm_engine is not None:
+        if idm_download_list:
+            for f in idm_download_list:
+                wait_success = 0
+                success = False
+                while not success:
+                    if check_nc(f):
                         count_dict["success"] += 1
-                    else:
-                        if not _check_ftime(f):
-                            _clear_existing_file(f)
-                            count_dict["no_data"] += 1
-                            count_dict["no_data_list"].append(str(f).split("_")[-1].split(".")[0])
-                        else:
-                            count_dict["success"] += 1
-                    success = True
-                else:
-                    wait_success += 1
-                    time.sleep(3)
-                    if wait_success >= 20:
                         success = True
-                        # print(f'{f} download failed')
-                        count_dict["fail"] += 1
+                    else:
+                        wait_success += 1
+                        time.sleep(3)
+                        if wait_success >= 20:
+                            success = True
+                            # print(f'{f} download failed')
+                            count_dict["fail"] += 1
     count_dict["total"] = count_dict["success"] + count_dict["fail"] + count_dict["skip"] + count_dict["no_data"]
     print("[bold #ecdbfe]-" * 160)
-    print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}")
+    print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}\nNo data: {count_dict['no_data']}")
+    print("[bold #ecdbfe]-" * 160)
     if count_dict["fail"] > 0:
         print("[bold #be5528]Please try again to download the failed data later")
     if count_dict["no_data"] > 0:
@@ -1290,11 +1285,11 @@ if __name__ == "__main__":
         "lat_max": 45,
         "num_workers": 3,
         "check": True,
-        "depth": None, # or 0-5000 meters
-        "level": None, # or 1-40 levels
+        "depth": None,  # or 0-5000 meters
+        "level": None,  # or 1-40 levels
         "ftimes": 1,
-        "idm_engine": r"D:\Programs\Internet Download Manager\IDMan.exe", # 查漏补缺不建议开启
-        "fill_time": False
+        # "idm_engine": r"D:\Programs\Internet Download Manager\IDMan.exe",  # 查漏补缺不建议开启
+        "fill_time": False,
     }
     if single_var:

oafuncs 0.0.93__py2.py3-none-any.whl → 0.0.95__py2.py3-none-any.whl

oafuncs 0.0.93py2.py3-none-any.whl → 0.0.95py2.py3-none-any.whl