PyPI - oafuncs - Versions diffs - 0.0.98.2__tar.gz → 0.0.98.4__tar.gz - Mend

oafuncs 0.0.98.2tar.gz → 0.0.98.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

{oafuncs-0.0.98.2/oafuncs.egg-info → oafuncs-0.0.98.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: oafuncs
-Version: 0.0.98.2
+Version: 0.0.98.4
 Summary: Oceanic and Atmospheric Functions
 Home-page: https://github.com/Industry-Pays/OAFuncs
 Author: Kun Liu
@@ -25,6 +25,7 @@ Requires-Dist: rich
 Requires-Dist: pathlib
 Requires-Dist: requests
 Requires-Dist: bs4
+Requires-Dist: httpx
 Requires-Dist: matplotlib
 Requires-Dist: netCDF4
 Requires-Dist: xlrd

oafuncs-0.0.98.4/oafuncs/_script/netcdf_write.py ADDED Viewed

@@ -0,0 +1,203 @@
+import os
+import netCDF4 as nc
+import numpy as np
+import xarray as xr
+import warnings
+warnings.filterwarnings("ignore", category=RuntimeWarning)
+def _numpy_to_nc_type(numpy_type):
+    """将 NumPy 数据类型映射到 NetCDF 数据类型"""
+    numpy_to_nc = {
+        "float32": "f4",
+        "float64": "f8",
+        "int8": "i1",
+        "int16": "i2",
+        "int32": "i4",
+        "int64": "i8",
+        "uint8": "u1",
+        "uint16": "u2",
+        "uint32": "u4",
+        "uint64": "u8",
+    }
+    numpy_type_str = str(numpy_type) if not isinstance(numpy_type, str) else numpy_type
+    return numpy_to_nc.get(numpy_type_str, "f4")
+def _calculate_scale_and_offset(data, n=16):
+    """
+    计算数值型数据的 scale_factor 与 add_offset，
+    将数据映射到 [0, 2**n - 1] 的范围。
+    要求 data 为数值型的 NumPy 数组，不允许全 NaN 值。
+    """
+    if not isinstance(data, np.ndarray):
+        raise ValueError("Input data must be a NumPy array.")
+    data_min = np.nanmin(data)
+    data_max = np.nanmax(data)
+    if np.isnan(data_min) or np.isnan(data_max):
+        raise ValueError("Input data contains NaN values.")
+    if data_max == data_min:
+        scale_factor = 1.0
+        add_offset = data_min
+    else:
+        scale_factor = (data_max - data_min) / (2**n - 1)
+        add_offset = data_min + 2 ** (n - 1) * scale_factor
+    return scale_factor, add_offset
+def _data_to_scale_offset(data, scale, offset):
+    """
+    将数据转换为 scale_factor 和 add_offset 的形式。
+    此处同时替换 NaN、正无穷和负无穷为填充值 -32767，
+    以确保转换后的数据可安全转为 int16。
+    """
+    if not isinstance(data, np.ndarray):
+        raise ValueError("Input data must be a NumPy array.")
+    # 先计算转换后的数据
+    result = np.around((data - offset) / scale)
+    # 替换 NaN, 正负无穷（posinf, neginf）为 -32767
+    result = np.nan_to_num(result, nan=-32767, posinf=-32767, neginf=-32767)
+    result = np.clip(result, -32767, 32767)  # 限制范围在 int16 的有效范围内
+    result = np.where(np.isfinite(result), result, -32767)  # 替换无效值为 -32767
+    new_data = result.astype(np.int16)
+    return new_data
+def save_to_nc(file, data, varname=None, coords=None, mode="w", scale_offset_switch=True, compile_switch=True):
+    """
+    保存数据到 NetCDF 文件，支持 xarray 对象（DataArray 或 Dataset）和 numpy 数组。
+    仅对数据变量中数值型数据进行压缩转换（利用 scale_factor/add_offset 转换后转为 int16），
+    非数值型数据以及所有坐标变量将禁用任何压缩，直接保存原始数据。
+    参数：
+      - file: 保存文件的路径
+      - data: xarray.DataArray、xarray.Dataset 或 numpy 数组
+      - varname: 变量名（仅适用于传入 numpy 数组或 DataArray 时）
+      - coords: 坐标字典（numpy 数组分支时使用），所有坐标变量均不压缩
+      - mode: "w"（覆盖）或 "a"（追加）
+      - scale_offset_switch: 是否对数值型数据变量进行压缩转换
+      - compile_switch: 是否启用 NetCDF4 的 zlib 压缩（仅针对数值型数据有效）
+    """
+    # 处理 xarray 对象（DataArray 或 Dataset）的情况
+    if isinstance(data, (xr.DataArray, xr.Dataset)):
+        encoding = {}  # 用于保存数据变量的编码信息
+        if isinstance(data, xr.DataArray):
+            if data.name is None:
+                data = data.rename("data")
+            varname = data.name if varname is None else varname
+            # 判断数据是否为数值型
+            if np.issubdtype(data.values.dtype, np.number) and scale_offset_switch:
+                scale, offset = _calculate_scale_and_offset(data.values)
+                new_values = _data_to_scale_offset(data.values, scale, offset)
+                # 生成新 DataArray，保留原坐标和属性，同时写入转换参数到属性中
+                new_da = data.copy(data=new_values)
+                new_da.attrs["scale_factor"] = float(scale)
+                new_da.attrs["add_offset"] = float(offset)
+                encoding[varname] = {
+                    "zlib": compile_switch,
+                    "complevel": 4,
+                    "dtype": "int16",
+                    "_FillValue": -32767,
+                }
+                new_da.to_dataset(name=varname).to_netcdf(file, mode=mode, encoding=encoding)
+            else:
+                data.to_dataset(name=varname).to_netcdf(file, mode=mode)
+            return
+        else:
+            # 处理 Dataset 的情况，仅处理 data_vars 数据变量，坐标变量保持原样
+            new_vars = {}
+            encoding = {}
+            for var in data.data_vars:
+                da = data[var]
+                if np.issubdtype(np.asarray(da.values).dtype, np.number) and scale_offset_switch:
+                    scale, offset = _calculate_scale_and_offset(da.values)
+                    new_values = _data_to_scale_offset(da.values, scale, offset)
+                    new_da = xr.DataArray(new_values, dims=da.dims, coords=da.coords, attrs=da.attrs)
+                    new_da.attrs["scale_factor"] = float(scale)
+                    new_da.attrs["add_offset"] = float(offset)
+                    new_vars[var] = new_da
+                    encoding[var] = {
+                        "zlib": compile_switch,
+                        "complevel": 4,
+                        "dtype": "int16",
+                        "_FillValue": -32767,
+                    }
+                else:
+                    new_vars[var] = da
+            new_ds = xr.Dataset(new_vars, coords=data.coords)
+            if encoding:
+                new_ds.to_netcdf(file, mode=mode, encoding=encoding)
+            else:
+                new_ds.to_netcdf(file, mode=mode)
+        return
+    # 处理纯 numpy 数组情况
+    if mode == "w" and os.path.exists(file):
+        os.remove(file)
+    elif mode == "a" and not os.path.exists(file):
+        mode = "w"
+    data = np.asarray(data)
+    is_numeric = np.issubdtype(data.dtype, np.number)
+    try:
+        with nc.Dataset(file, mode, format="NETCDF4") as ncfile:
+            # 坐标变量直接写入，不做压缩
+            if coords is not None:
+                for dim, values in coords.items():
+                    if dim not in ncfile.dimensions:
+                        ncfile.createDimension(dim, len(values))
+                        var_obj = ncfile.createVariable(dim, _numpy_to_nc_type(np.asarray(values).dtype), (dim,))
+                        var_obj[:] = values
+            dims = list(coords.keys()) if coords else []
+            if is_numeric and scale_offset_switch:
+                scale, offset = _calculate_scale_and_offset(data)
+                new_data = _data_to_scale_offset(data, scale, offset)
+                var = ncfile.createVariable(varname, "i2", dims, fill_value=-32767, zlib=compile_switch)
+                var.scale_factor = scale
+                var.add_offset = offset
+                # Ensure no invalid values in new_data before assignment
+                var[:] = new_data
+            else:
+                # 非数值型数据，禁止压缩
+                dtype = _numpy_to_nc_type(data.dtype)
+                var = ncfile.createVariable(varname, dtype, dims, zlib=False)
+                var[:] = data
+    except Exception as e:
+        raise RuntimeError(f"netCDF4 保存失败: {str(e)}") from e
+# 测试用例
+if __name__ == "__main__":
+    # --------------------------------
+    # dataset
+    file = r"F:\roms_rst.nc"
+    ds = xr.open_dataset(file)
+    outfile = r"F:\roms_rst_test.nc"
+    save_to_nc(outfile, ds)
+    ds.close()
+    # --------------------------------
+    # dataarray
+    data = np.random.rand(4, 3, 2)
+    coords = {"x": np.arange(4), "y": np.arange(3), "z": np.arange(2)}
+    varname = "test_var"
+    data = xr.DataArray(data, dims=("x", "y", "z"), coords=coords, name=varname)
+    outfile = r"F:\test_dataarray.nc"
+    save_to_nc(outfile, data)
+    # --------------------------------
+    # numpy array
+    data = np.random.rand(4, 3, 2)
+    coords = {"x": np.arange(4), "y": np.arange(3), "z": np.arange(2)}
+    varname = "test_var"
+    outfile = r"F:\test_numpy.nc"
+    save_to_nc(outfile, data, varname=varname, coords=coords)
+    # --------------------------------

oafuncs-0.0.98.4/oafuncs/_script/parallel.py ADDED Viewed

@@ -0,0 +1,214 @@
+import atexit
+import logging
+import multiprocessing as mp
+import platform
+import threading
+import time
+from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
+from typing import Any, Callable, Dict, List, Optional, Tuple
+import psutil
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+__all__ = ["ParallelExecutor"]
+class ParallelExecutor:
+    def __init__(
+        self,
+        max_workers: Optional[int] = None,
+        chunk_size: Optional[int] = None,
+        mem_per_process: float = 1.0,  # GB
+        timeout_per_task: int = 3600,
+        max_retries: int = 3,
+    ):
+        self.platform = self._detect_platform()
+        self.mem_per_process = mem_per_process
+        self.timeout_per_task = timeout_per_task
+        self.max_retries = max_retries
+        self.running = True
+        self.task_history = []
+        self._executor = None
+        self.mode, default_workers = self._determine_optimal_settings()
+        self.max_workers = max_workers or default_workers
+        self.chunk_size = chunk_size or self._get_default_chunk_size()
+        self._init_platform_settings()
+        self._start_resource_monitor()
+        atexit.register(self.shutdown)
+        logging.info(f"Initialized {self.__class__.__name__} on {self.platform} (mode={self.mode}, workers={self.max_workers})")
+    def _detect_platform(self) -> str:
+        system = platform.system().lower()
+        if system == "linux":
+            return "wsl" if "microsoft" in platform.release().lower() else "linux"
+        return system
+    def _init_platform_settings(self):
+        if self.platform in ["linux", "wsl"]:
+            self.mp_context = mp.get_context("fork")
+        elif self.platform == "windows":
+            mp.set_start_method("spawn", force=True)
+            self.mp_context = mp.get_context("spawn")
+        else:
+            self.mp_context = None
+    def _determine_optimal_settings(self) -> Tuple[str, int]:
+        logical_cores = psutil.cpu_count(logical=True) or 1
+        available_mem = psutil.virtual_memory().available / 1024**3  # GB
+        mem_limit = max(1, int(available_mem / self.mem_per_process))
+        return ("process", min(logical_cores, mem_limit))
+    def _get_default_chunk_size(self) -> int:
+        return max(10, 100 // (psutil.cpu_count() or 1))
+    def _start_resource_monitor(self):
+        def monitor():
+            threshold = self.mem_per_process * 1024**3
+            while self.running:
+                try:
+                    if psutil.virtual_memory().available < threshold:
+                        self._scale_down_workers()
+                    time.sleep(1)
+                except Exception as e:
+                    logging.error(f"Resource monitor error: {e}")
+        threading.Thread(target=monitor, daemon=True).start()
+    def _scale_down_workers(self):
+        if self.max_workers > 1:
+            new_count = self.max_workers - 1
+            logging.warning(f"Scaling down workers from {self.max_workers} to {new_count}")
+            self.max_workers = new_count
+            self._restart_executor()
+    def _restart_executor(self):
+        if self._executor:
+            self._executor.shutdown(wait=False)
+            self._executor = None
+    def _get_executor(self):
+        if not self._executor:
+            Executor = ThreadPoolExecutor if self.mode == "thread" else ProcessPoolExecutor
+            self._executor = Executor(max_workers=self.max_workers, mp_context=self.mp_context if self.mode == "process" else None)
+        return self._executor
+    def run(self, func: Callable, params: List[Tuple], chunk_size: Optional[int] = None) -> List[Any]:
+        chunk_size = chunk_size or self.chunk_size
+        for retry in range(self.max_retries + 1):
+            try:
+                start_time = time.monotonic()
+                results = self._execute_batch(func, params, chunk_size)
+                self._update_settings(time.monotonic() - start_time, len(params))
+                return results
+            except Exception as e:
+                logging.error(f"Attempt {retry + 1} failed: {e}")
+                self._handle_failure()
+        raise RuntimeError(f"Failed after {self.max_retries} retries")
+    def _execute_batch(self, func: Callable, params: List[Tuple], chunk_size: int) -> List[Any]:
+        if not params:
+            return []
+        if len(params) > chunk_size * 2:
+            return self._chunked_execution(func, params, chunk_size)
+        results = [None] * len(params)
+        with self._get_executor() as executor:
+            futures = {executor.submit(func, *args): idx for idx, args in enumerate(params)}
+            for future in as_completed(futures):
+                idx = futures[future]
+                try:
+                    results[idx] = future.result(timeout=self.timeout_per_task)
+                except Exception as e:
+                    results[idx] = self._handle_error(e, func, params[idx])
+        return results
+    def _chunked_execution(self, func: Callable, params: List[Tuple], chunk_size: int) -> List[Any]:
+        results = []
+        with self._get_executor() as executor:
+            futures = []
+            for i in range(0, len(params), chunk_size):
+                chunk = params[i : i + chunk_size]
+                futures.append(executor.submit(self._process_chunk, func, chunk))
+            for future in as_completed(futures):
+                try:
+                    results.extend(future.result(timeout=self.timeout_per_task))
+                except Exception as e:
+                    logging.error(f"Chunk failed: {e}")
+                    results.extend([None] * chunk_size)
+        return results
+    @staticmethod
+    def _process_chunk(func: Callable, chunk: List[Tuple]) -> List[Any]:
+        return [func(*args) for args in chunk]
+    def _update_settings(self, duration: float, task_count: int):
+        self.task_history.append((duration, task_count))
+        self.chunk_size = max(5, min(100, self.chunk_size + (1 if duration < 5 else -1)))
+    def _handle_error(self, error: Exception, func: Callable, args: Tuple) -> Any:
+        if isinstance(error, TimeoutError):
+            logging.warning(f"Timeout processing {func.__name__}{args}")
+        elif isinstance(error, MemoryError):
+            logging.warning("Memory error detected")
+            self._scale_down_workers()
+        else:
+            logging.error(f"Error processing {func.__name__}{args}: {str(error)}")
+        return None
+    def _handle_failure(self):
+        if self.max_workers > 2:
+            self.max_workers = max(1, self.max_workers // 2)
+            self._restart_executor()
+    def shutdown(self):
+        self.running = False
+        if self._executor:
+            try:
+                self._executor.shutdown(wait=False)
+            except Exception as e:
+                logging.error(f"Shutdown error: {e}")
+            finally:
+                self._executor = None
+    def __enter__(self):
+        return self
+    def __exit__(self, *exc_info):
+        self.shutdown()
+    def get_stats(self) -> Dict[str, Any]:
+        stats = {
+            "platform": self.platform,
+            "mode": self.mode,
+            "workers": self.max_workers,
+            "chunk_size": self.chunk_size,
+            "total_tasks": sum(count for _, count in self.task_history),
+        }
+        if self.task_history:
+            total_time = sum(time for time, _ in self.task_history)
+            stats["avg_task_throughput"] = stats["total_tasks"] / total_time if total_time else 0
+        return stats
+def _test_func(a, b):
+    time.sleep(0.01)
+    return a + b
+if __name__ == "__main__":
+    params = [(i, i * 2) for i in range(1000)]
+    with ParallelExecutor() as executor:
+        results = executor.run(_test_func, params)
+    # print("Results:", results)
+    print(f"Processed {len(results)} tasks")
+    print("Execution stats:", executor.get_stats())

oafuncs-0.0.98.4/oafuncs/_script/parallel_test.py ADDED Viewed

@@ -0,0 +1,14 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Author: Liu Kun && 16031215@qq.com
+Date: 2025-04-08 16:18:49
+LastEditors: Liu Kun && 16031215@qq.com
+LastEditTime: 2025-04-08 16:18:50
+FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\_script\\parallel_test.py
+Description:
+EditPlatform: vscode
+ComputerInfo: XPS 15 9510
+SystemInfo: Windows 11
+Python Version: 3.12
+"""

oafuncs 0.0.98.2__tar.gz → 0.0.98.4__tar.gz

oafuncs 0.0.98.2tar.gz → 0.0.98.4tar.gz