PyPI - oafuncs - Versions diffs - 0.0.88__py2.py3-none-any.whl → 0.0.90__py2.py3-none-any.whl - Mend

oafuncs 0.0.88py2.py3-none-any.whl → 0.0.90py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

oafuncs/data_store/OAFuncs.png +0 -0
oafuncs/oa_data.py +1 -0
oafuncs/oa_down/hycom_3hourly.py +213 -345
oafuncs/oa_down/literature.py +57 -126
oafuncs/oa_down/user_agent.py +0 -3
oafuncs/oa_draw.py +1 -0
oafuncs/oa_file.py +23 -7
oafuncs/oa_help.py +3 -2
oafuncs/oa_nc.py +1 -0
oafuncs/oa_python.py +1 -0
{oafuncs-0.0.88.dist-info → oafuncs-0.0.90.dist-info}/METADATA +9 -10
oafuncs-0.0.90.dist-info/RECORD +26 -0
oafuncs-0.0.88.dist-info/RECORD +0 -26
{oafuncs-0.0.88.dist-info → oafuncs-0.0.90.dist-info}/LICENSE.txt +0 -0
{oafuncs-0.0.88.dist-info → oafuncs-0.0.90.dist-info}/WHEEL +0 -0
{oafuncs-0.0.88.dist-info → oafuncs-0.0.90.dist-info}/top_level.txt +0 -0

oafuncs/oa_down/hycom_3hourly.py CHANGED Viewed

@@ -2,9 +2,9 @@
 # coding=utf-8
 """
 Author: Liu Kun && 16031215@qq.com
-Date: 2024-11-01 10:31:09
+Date: 2024-11-02 11:07:49
 LastEditors: Liu Kun && 16031215@qq.com
-LastEditTime: 2024-12-08 10:20:45
+LastEditTime: 2025-01-07 16:31:36
 FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
 Description:
 EditPlatform: vscode
@@ -16,182 +16,188 @@ Python Version: 3.12
 import datetime
 import os
 import random
+import re
 import time
 import warnings
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 from threading import Lock
-import re
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import requests
-from bs4 import BeautifulSoup
 from rich import print
 from rich.progress import Progress
-import glob
+from oafuncs.oa_down.user_agent import get_ua
+from oafuncs.oa_file import file_size, mean_size
 warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
 __all__ = ["draw_time_range", "download", "how_to_use", "get_time_list"]
-# time resolution
-data_info = {"yearly": {}, "monthly": {}, "daily": {}, "hourly": {}}
-# hourly data
-# dataset: GLBv0.08, GLBu0.08, GLBy0.08
-data_info["hourly"]["dataset"] = {"GLBv0.08": {}, "GLBu0.08": {}, "GLBy0.08": {}, "ESPC_D": {}}
-# version
-# version of GLBv0.08: 53.X, 56.3, 57.2, 92.8, 57.7, 92.9, 93.0
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"] = {"53.X": {}, "56.3": {}, "57.2": {}, "92.8": {}, "57.7": {}, "92.9": {}, "93.0": {}}
-# version of GLBu0.08: 93.0
-data_info["hourly"]["dataset"]["GLBu0.08"]["version"] = {"93.0": {}}
-# version of GLBy0.08: 93.0
-data_info["hourly"]["dataset"]["GLBy0.08"]["version"] = {"93.0": {}}
-# version of ESPC_D: V02
-data_info["hourly"]["dataset"]["ESPC_D"]["version"] = {"V02": {}}
-# info details
-# time range
-# GLBv0.08
-# 在网页上提交超过范围的时间，会返回该数据集实际时间范围，从而纠正下面的时间范围
-# 目前只纠正了GLBv0.08 93.0的时间范围，具体到小时了
-# 其他数据集的时刻暂时默认为00起，21止
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "1994010112", "time_end": "2015123109"}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "2014070112", "time_end": "2016093009"}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "2016050112", "time_end": "2017020109"}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "2017020112", "time_end": "2017060109"}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "2017060112", "time_end": "2017100109"}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "2017100112", "time_end": "2018032009"}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018010112", "time_end": "2020021909"}
-# GLBu0.08
-data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018091912", "time_end": "2018120909"}
-# GLBy0.08
-data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018120412", "time_end": "2024090509"}
-# ESPC-D
-data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["time_range"] = {"time_start": "2024081012", "time_end": "2030010100"}
-# variable
-variable_info = {
-    "u": {"var_name": "water_u", "standard_name": "eastward_sea_water_velocity"},
-    "v": {"var_name": "water_v", "standard_name": "northward_sea_water_velocity"},
-    "temp": {"var_name": "water_temp", "standard_name": "sea_water_potential_temperature"},
-    "salt": {"var_name": "salinity", "standard_name": "sea_water_salinity"},
-    "ssh": {"var_name": "surf_el", "standard_name": "sea_surface_elevation"},
-    "u_b": {"var_name": "water_u_bottom", "standard_name": "eastward_sea_water_velocity_at_sea_floor"},
-    "v_b": {"var_name": "water_v_bottom", "standard_name": "northward_sea_water_velocity_at_sea_floor"},
-    "temp_b": {"var_name": "water_temp_bottom", "standard_name": "sea_water_potential_temperature_at_sea_floor"},
-    "salt_b": {"var_name": "salinity_bottom", "standard_name": "sea_water_salinity_at_sea_floor"},
-}
-# classification method
-# year_different: the data of different years is stored in different files
-# same_path: the data of different years is stored in the same file
-# var_different: the data of different variables is stored in different files
-# var_year_different: the data of different variables and years is stored in different files
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["classification"] = "year_different"
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["classification"] = "same_path"
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["classification"] = "same_path"
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["classification"] = "var_different"
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["classification"] = "same_path"
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["classification"] = "var_different"
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["classification"] = "var_different"
-data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["classification"] = "var_different"
-data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["classification"] = "var_year_different"
-data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["classification"] = "single_var_year_different"
-# download info
-# base url
-# GLBv0.08 53.X
-url_53x = {}
-for y_53x in range(1994, 2016):
-    # r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2013?'
-    url_53x[str(y_53x)] = rf"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?"
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["url"] = url_53x
-# GLBv0.08 56.3
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?"
-# GLBv0.08 57.2
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.2?"
-# GLBv0.08 92.8
-url_928 = {
-    "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/uv3z?",
-    "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ts3z?",
-    "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ssh?",
-}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["url"] = url_928
-# GLBv0.08 57.7
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.7?"
-# GLBv0.08 92.9
-url_929 = {
-    "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/uv3z?",
-    "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ts3z?",
-    "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ssh?",
-}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["url"] = url_929
-# GLBv0.08 93.0
-url_930_v = {
-    "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/uv3z?",
-    "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ts3z?",
-    "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ssh?",
-}
-data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["url"] = url_930_v
-# GLBu0.08 93.0
-url_930_u = {
-    "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/uv3z?",
-    "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ts3z?",
-    "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ssh?",
-}
-data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["url"] = url_930_u
-# GLBy0.08 93.0
-uv3z_930_y = {}
-ts3z_930_y = {}
-ssh_930_y = {}
-for y_930_y in range(2018, 2025):
-    uv3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?"
-    ts3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?"
-    ssh_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?"
-# GLBy0.08 93.0 data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
-url_930_y = {
-    "uv3z": uv3z_930_y,
-    "ts3z": ts3z_930_y,
-    "ssh": ssh_930_y,
-}
-data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["url"] = url_930_y
-# ESPC-D-V02
-u3z_espc_d_v02_y = {}
-v3z_espc_d_v02_y = {}
-t3z_espc_d_v02_y = {}
-s3z_espc_d_v02_y = {}
-ssh_espc_d_v02_y = {}
-for y_espc_d_v02 in range(2024, 2030):
-    u3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/u3z/{y_espc_d_v02}?"
-    v3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/v3z/{y_espc_d_v02}?"
-    t3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/t3z/{y_espc_d_v02}?"
-    s3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/s3z/{y_espc_d_v02}?"
-    ssh_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/ssh/{y_espc_d_v02}?"
-url_espc_d_v02_y = {
-    "u3z": u3z_espc_d_v02_y,
-    "v3z": v3z_espc_d_v02_y,
-    "t3z": t3z_espc_d_v02_y,
-    "s3z": s3z_espc_d_v02_y,
-    "ssh": ssh_espc_d_v02_y,
-}
-data_info['hourly']['dataset']['ESPC_D']['version']['V02']['url'] = url_espc_d_v02_y
-var_group = {
-    "uv3z": ["u", "v", "u_b", "v_b"],
-    "ts3z": ["temp", "salt", "temp_b", "salt_b"],
-    "ssh": ["ssh"],
-}
-single_var_group = {
-    "u3z": ["u"],
-    "v3z": ["v"],
-    "t3z": ["temp"],
-    "s3z": ["salt"],
-    "ssh": ["ssh"],
-}
+def get_initial_data():
+    global variable_info, data_info, var_group, single_var_group
+    # ----------------------------------------------
+    # variable
+    variable_info = {
+        "u": {"var_name": "water_u", "standard_name": "eastward_sea_water_velocity"},
+        "v": {"var_name": "water_v", "standard_name": "northward_sea_water_velocity"},
+        "temp": {"var_name": "water_temp", "standard_name": "sea_water_potential_temperature"},
+        "salt": {"var_name": "salinity", "standard_name": "sea_water_salinity"},
+        "ssh": {"var_name": "surf_el", "standard_name": "sea_surface_elevation"},
+        "u_b": {"var_name": "water_u_bottom", "standard_name": "eastward_sea_water_velocity_at_sea_floor"},
+        "v_b": {"var_name": "water_v_bottom", "standard_name": "northward_sea_water_velocity_at_sea_floor"},
+        "temp_b": {"var_name": "water_temp_bottom", "standard_name": "sea_water_potential_temperature_at_sea_floor"},
+        "salt_b": {"var_name": "salinity_bottom", "standard_name": "sea_water_salinity_at_sea_floor"},
+    }
+    # ----------------------------------------------
+    # time resolution
+    data_info = {"yearly": {}, "monthly": {}, "daily": {}, "hourly": {}}
+    # hourly data
+    # dataset: GLBv0.08, GLBu0.08, GLBy0.08
+    data_info["hourly"]["dataset"] = {"GLBv0.08": {}, "GLBu0.08": {}, "GLBy0.08": {}, "ESPC_D": {}}
+    # version
+    # version of GLBv0.08: 53.X, 56.3, 57.2, 92.8, 57.7, 92.9, 93.0
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"] = {"53.X": {}, "56.3": {}, "57.2": {}, "92.8": {}, "57.7": {}, "92.9": {}, "93.0": {}}
+    # version of GLBu0.08: 93.0
+    data_info["hourly"]["dataset"]["GLBu0.08"]["version"] = {"93.0": {}}
+    # version of GLBy0.08: 93.0
+    data_info["hourly"]["dataset"]["GLBy0.08"]["version"] = {"93.0": {}}
+    # version of ESPC_D: V02
+    data_info["hourly"]["dataset"]["ESPC_D"]["version"] = {"V02": {}}
+    # info details
+    # time range
+    # GLBv0.08
+    # 在网页上提交超过范围的时间，会返回该数据集实际时间范围，从而纠正下面的时间范围
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "1994010112", "time_end": "2015123109"}
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "2014070112", "time_end": "2016093009"}
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "2016050112", "time_end": "2017020109"}
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "2017020112", "time_end": "2017060109"}
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "2017060112", "time_end": "2017100109"}
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "2017100112", "time_end": "2018032009"}
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018010112", "time_end": "2020021909"}
+    # GLBu0.08
+    data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018091912", "time_end": "2018120909"}
+    # GLBy0.08
+    data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018120412", "time_end": "2024090509"}
+    # ESPC-D
+    data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["time_range"] = {"time_start": "2024081012", "time_end": "2030010100"}
+    # classification method
+    # year_different: the data of different years is stored in different files
+    # same_path: the data of different years is stored in the same file
+    # var_different: the data of different variables is stored in different files
+    # var_year_different: the data of different variables and years is stored in different files
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["classification"] = "year_different"
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["classification"] = "same_path"
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["classification"] = "same_path"
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["classification"] = "var_different"
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["classification"] = "same_path"
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["classification"] = "var_different"
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["classification"] = "var_different"
+    data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["classification"] = "var_different"
+    data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["classification"] = "var_year_different"
+    data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["classification"] = "single_var_year_different"
+    # download info
+    # base url
+    # GLBv0.08 53.X
+    url_53x = {}
+    for y_53x in range(1994, 2016):
+        # r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2013?'
+        url_53x[str(y_53x)] = rf"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?"
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["url"] = url_53x
+    # GLBv0.08 56.3
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?"
+    # GLBv0.08 57.2
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.2?"
+    # GLBv0.08 92.8
+    url_928 = {
+        "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/uv3z?",
+        "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ts3z?",
+        "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ssh?",
+    }
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["url"] = url_928
+    # GLBv0.08 57.7
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.7?"
+    # GLBv0.08 92.9
+    url_929 = {
+        "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/uv3z?",
+        "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ts3z?",
+        "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ssh?",
+    }
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["url"] = url_929
+    # GLBv0.08 93.0
+    url_930_v = {
+        "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/uv3z?",
+        "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ts3z?",
+        "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ssh?",
+    }
+    data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["url"] = url_930_v
+    # GLBu0.08 93.0
+    url_930_u = {
+        "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/uv3z?",
+        "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ts3z?",
+        "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ssh?",
+    }
+    data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["url"] = url_930_u
+    # GLBy0.08 93.0
+    uv3z_930_y = {}
+    ts3z_930_y = {}
+    ssh_930_y = {}
+    for y_930_y in range(2018, 2025):
+        uv3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?"
+        ts3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?"
+        ssh_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?"
+    # GLBy0.08 93.0 data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
+    url_930_y = {
+        "uv3z": uv3z_930_y,
+        "ts3z": ts3z_930_y,
+        "ssh": ssh_930_y,
+    }
+    data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["url"] = url_930_y
+    # ESPC-D-V02
+    u3z_espc_d_v02_y = {}
+    v3z_espc_d_v02_y = {}
+    t3z_espc_d_v02_y = {}
+    s3z_espc_d_v02_y = {}
+    ssh_espc_d_v02_y = {}
+    for y_espc_d_v02 in range(2024, 2030):
+        u3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/u3z/{y_espc_d_v02}?"
+        v3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/v3z/{y_espc_d_v02}?"
+        t3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/t3z/{y_espc_d_v02}?"
+        s3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/s3z/{y_espc_d_v02}?"
+        ssh_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/ssh/{y_espc_d_v02}?"
+    url_espc_d_v02_y = {
+        "u3z": u3z_espc_d_v02_y,
+        "v3z": v3z_espc_d_v02_y,
+        "t3z": t3z_espc_d_v02_y,
+        "s3z": s3z_espc_d_v02_y,
+        "ssh": ssh_espc_d_v02_y,
+    }
+    data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["url"] = url_espc_d_v02_y
+    # ----------------------------------------------
+    var_group = {
+        "uv3z": ["u", "v", "u_b", "v_b"],
+        "ts3z": ["temp", "salt", "temp_b", "salt_b"],
+        "ssh": ["ssh"],
+    }
+    # ----------------------------------------------
+    single_var_group = {
+        "u3z": ["u"],
+        "v3z": ["v"],
+        "t3z": ["temp"],
+        "s3z": ["salt"],
+        "ssh": ["ssh"],
+    }
+    return variable_info, data_info, var_group, single_var_group
 def draw_time_range(pic_save_folder=None):
@@ -556,79 +562,6 @@ def clear_existing_file(file_full_path):
         print(f"{file_full_path} has been removed")
-def find_file(parent_path, fname, mode="path"):
-    """
-    description:
-    param {*} parent_path: The parent path where the files are located
-    param {*} fname: The file name pattern to search for
-    param {*} mode: 'path' to return the full path of the files, 'file' to return only the file names
-    return {*} A list of file paths or file names if files are found, None otherwise
-    """
-    def natural_sort_key(s):
-        """生成一个用于自然排序的键"""
-        return [int(text) if text.isdigit() else text.lower() for text in re.split("([0-9]+)", s)]
-    # 将parent_path和fname结合成完整的搜索路径
-    search_pattern = os.path.join(str(parent_path), fname)
-    # 使用glob模块查找所有匹配的文件
-    matched_files = glob.glob(search_pattern)
-    # 如果没有找到任何文件，则返回False
-    if not matched_files:
-        return None
-    # 在find_files函数中替换natsorted调用
-    matched_files = sorted(matched_files, key=natural_sort_key)
-    # 根据mode参数决定返回的内容
-    if mode == "file":
-        # 只返回文件名
-        result = [os.path.basename(file) for file in matched_files]
-    else:  # 默认为'path'
-        # 返回文件的绝对路径
-        result = [os.path.abspath(file) for file in matched_files]
-    return result
-def file_size(file_path, unit="KB"):
-    # 检查文件是否存在
-    if not os.path.exists(file_path):
-        return "文件不存在"
-    # 获取文件大小（字节）
-    file_size = os.path.getsize(file_path)
-    # 单位转换字典
-    unit_dict = {"PB": 1024**5, "TB": 1024**4, "GB": 1024**3, "MB": 1024**2, "KB": 1024}
-    # 检查传入的单位是否合法
-    if unit not in unit_dict:
-        return "单位不合法，请选择PB、TB、GB、MB、KB中的一个"
-    # 转换文件大小到指定单位
-    converted_size = file_size / unit_dict[unit]
-    return converted_size
-# ** 计算文件夹下指定相关文件的平均大小
-def mean_size(parent_path, fname):
-    flist = find_file(parent_path, fname)
-    if flist:
-        size_list = [file_size(f) for f in flist if file_size(f) != 0]
-    else:
-        size_list = []
-    if size_list:
-        min_size, max_size = min(size_list), max(size_list)
-        mean_size = sum(size_list) / len(size_list)
-    else:
-        mean_size, min_size, max_size = 0, 0, 0
-    return mean_size, min_size, max_size
 def check_existing_file(file_full_path, min_size):
     if os.path.exists(file_full_path):
         print(f"[bold #FFA54F]{file_full_path} exists")
@@ -651,87 +584,28 @@ def check_existing_file(file_full_path, min_size):
         return False
-def get_ua():
-    current_dir = os.path.dirname(os.path.abspath(__file__))
-    ua_file_txt = os.path.join(current_dir, "User_Agent-list.txt")
-    with open(ua_file_txt, "r") as f:
-        ua_list = f.readlines()
-        # 去掉换行符和空行
-        ua_list = [line.strip() for line in ua_list if line.strip()]
-    # if current_platform == 'Linux':
-    #     ua_list = [line for line in ua_list if 'Linux' in line]
-    return random.choice(ua_list)
-def get_proxy_file():
-    # 获取当前脚本的绝对路径
-    script_dir = os.path.dirname(os.path.abspath(__file__))
-    # 构建ip.txt的绝对路径
-    ip_file_txt = os.path.join(script_dir, "ip.txt")
-    with open(ip_file_txt, "r") as f:
-        ips = f.readlines()
-    ip_list = []
-    for ip in ips:
-        ip_list.append(ip.strip())
-    choose_ip = random.choice(ip_list)
-    proxies = {"http": "http://" + choose_ip, "https": "https://" + choose_ip}
-    # print(f'Using proxy: {proxies}')
-    return proxies
-def scrape_and_categorize_proxies(choose_protocol="http"):
-    url = "https://topproxylinks.com/"
-    # 发送HTTP请求获取网页内容
-    response = requests.get(url)
-    # 使用BeautifulSoup解析网页
-    soup = BeautifulSoup(response.text, "html.parser")
-    # 初始化字典来存储不同协议的代理
-    proxies_dict = {"http": [], "socks4": [], "socks5": []}
-    # 查找表格中的所有行
-    tbody = soup.find("tbody")
-    if tbody:
-        for row in tbody.find_all("tr"):
-            # 提取协议、代理和国家的单元格
-            cells = row.find_all("td")
-            protocol = cells[0].text.strip().lower()
-            proxy = cells[1].text.strip()
-            # 根据协议分类存储代理
-            if protocol in proxies_dict:
-                proxies_dict[protocol].append(proxy)
-    if choose_protocol in proxies_dict:
-        proxies_list = proxies_dict[choose_protocol]
-    else:
-        proxies_list = proxies_dict["http"]
-    return proxies_list
-def get_proxy():
-    ip_list = scrape_and_categorize_proxies(choose_protocol="http")
-    choose_ip = random.choice(ip_list)
-    proxies = {"http": f"http://{choose_ip}", "https": f"http://{choose_ip}"}
-    print(f"Using proxy: {proxies}")
-    return proxies
 def download_file(target_url, store_path, file_name, check=False):
     # Check if the file exists
     fname = Path(store_path) / file_name
     file_name_split = file_name.split("_")
-    same_file = f'{file_name_split[0]}_{file_name_split[1]}*nc'
-    fsize_mean, fsize_min, fsize_max = mean_size(store_path, same_file)
-    set_min_size = fsize_mean - 0.5 * (fsize_max - fsize_min)
-    set_min_size = set_min_size if set_min_size > 0 else fsize_min
+    file_name_split = file_name_split[:-1]
+    # same_file = f"{file_name_split[0]}_{file_name_split[1]}*nc"
+    same_file = "_".join(file_name_split) + "*nc"
+    if same_file not in fsize_dict.keys():
+        # print(f'Same file name: {same_file}')
+        fsize_dict[same_file] = {"size": 0, "count": 0}
+    if fsize_dict[same_file]["count"] < 30 or fsize_dict[same_file]["size"] == 0:
+        # 更新30次文件最小值，后续认为可以代表所有文件，不再更新占用时间
+        fsize_mean = mean_size(store_path, same_file, max_num=30)
+        set_min_size = fsize_mean * 0.8
+        fsize_dict[same_file]["size"] = set_min_size
+        fsize_dict[same_file]["count"] += 1
+    else:
+        set_min_size = fsize_dict[same_file]["size"]
     if check:
-        if check_existing_file(fname, set_min_size*0.8):
+        if check_existing_file(fname, set_min_size):
             count_dict["skip"] += 1
             return
     clear_existing_file(fname)
@@ -744,13 +618,11 @@ def download_file(target_url, store_path, file_name, check=False):
     request_times = 0
     def calculate_wait_time(time_str, target_url):
-        import re
         # 定义正则表达式，匹配YYYYMMDDHH格式的时间
         time_pattern = r"\d{10}"
         # 定义两个字符串
-        # str1 = 'HYCOM_water_u_2018010100_2018010112.nc'
+        # str1 = 'HYCOM_water_u_2018010100-2018010112.nc'
         # str2 = 'HYCOM_water_u_2018010100.nc'
         # 使用正则表达式查找时间
@@ -796,13 +668,8 @@ def download_file(target_url, store_path, file_name, check=False):
             with open(filename, 'wb') as f:
                 f.write(response.content) """
-            if find_proxy:
-                proxies = get_proxy()
-                response = s.get(target_url, headers=headers, proxies=proxies, stream=True, timeout=random.randint(5, max_timeout))
-            else:
-                response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))  # 启用流式传输
+            response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))  # 启用流式传输
             response.raise_for_status()  # 如果请求返回的不是200，将抛出HTTPError异常
             # 保存文件
             with open(fname, "wb") as f:
                 print(f"[bold #96cbd7]Downloading {file_name}...")
@@ -923,7 +790,7 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
             submit_url = get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
             file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
             if download_time_end is not None:
-                file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}_{download_time_end}.nc"
+                file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}-{download_time_end}.nc"  # 这里时间不能用下划线，不然后续处理查找同一变量文件会出问题
             download_file(submit_url, store_path, file_name, check)
         else:
             if download_time < "2024081012":
@@ -948,14 +815,14 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
                         # file_name = f'HYCOM_{'-'.join([variable_info[v]["var_name"] for v in current_group])}_{download_time}.nc'
                         file_name = f"HYCOM_{key}_{download_time}.nc"
                         if download_time_end is not None:
-                            file_name = f"HYCOM_{key}_{download_time}_{download_time_end}.nc"
+                            file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc"  # 这里时间不能用下划线，不然后续处理查找同一变量文件会出问题
                     download_file(submit_url, store_path, file_name, check)
             else:
                 for v in var:
                     submit_url = get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
                     file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
                     if download_time_end is not None:
-                        file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}_{download_time_end}.nc"
+                        file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}-{download_time_end}.nc"
                     download_file(submit_url, store_path, file_name, check)
@@ -1073,28 +940,30 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
 def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
     """
     Description:
-    Download the data of single time or a series of time
+        Download the data of single time or a series of time
     Parameters:
-    var: str or list, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
-    time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
-    time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21; default is None, if not set, the data of single time will be downloaded; or same as time_s, the data of single time will be downloaded
-    lon_min: float, the minimum longitude, default is 0
-    lon_max: float, the maximum longitude, default is 359.92
-    lat_min: float, the minimum latitude, default is -80
-    lat_max: float, the maximum latitude, default is 90
-    depth: float, the depth, default is None, if you wanna get the data of single depth, you can set the depth, suggest to set the depth in [0, 5000]
-    level: int, the level number, default is None, if you wanna get the data of single level, you can set the level, suggest to set the level in [1, 40]
-    store_path: str, the path to store the data, default is None, if not set, the data will be stored in the current working directory
-    dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08', if not set, the dataset will be chosen according to the download_time
-    version_name: str, the version name, default is None, example: '53.X', '56.3', if not set, the version will be chosen according to the download_time
-    num_workers: int, the number of workers, default is None, if not set, the number of workers will be 1; suggest not to set the number of workers too large
-    check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
-    ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
+        var: str or list, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
+        time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
+        time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21; default is None, if not set, the data of single time will be downloaded; or same as time_s, the data of single time will be downloaded
+        lon_min: float, the minimum longitude, default is 0
+        lon_max: float, the maximum longitude, default is 359.92
+        lat_min: float, the minimum latitude, default is -80
+        lat_max: float, the maximum latitude, default is 90
+        depth: float, the depth, default is None, if you wanna get the data of single depth, you can set the depth, suggest to set the depth in [0, 5000]
+        level: int, the level number, default is None, if you wanna get the data of single level, you can set the level, suggest to set the level in [1, 40]
+        store_path: str, the path to store the data, default is None, if not set, the data will be stored in the current working directory
+        dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08', if not set, the dataset will be chosen according to the download_time
+        version_name: str, the version name, default is None, example: '53.X', '56.3', if not set, the version will be chosen according to the download_time
+        num_workers: int, the number of workers, default is None, if not set, the number of workers will be 1; suggest not to set the number of workers too large
+        check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
+        ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
     Returns:
-    None
+        None
     """
+    get_initial_data()
     # 打印信息并处理数据集和版本名称
     if dataset_name is None and version_name is None:
         print("The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.")
@@ -1154,8 +1023,8 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
     """ global current_platform
     current_platform = platform.system() """
-    global find_proxy
-    find_proxy = False
+    global fsize_dict
+    fsize_dict = {}
     download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
@@ -1225,8 +1094,7 @@ def how_to_use():
 if __name__ == "__main__":
-    # help(hycom3h.download)
-    time_s, time_e = "2024081012", "2024081115"
+    time_s, time_e = "2024101012", "2024101018"
     merge_name = f"{time_s}_{time_e}"  # 合并后的文件名
     root_path = r"G:\Data\HYCOM\3hourly_test"
     location_dict = {"west": 105, "east": 130, "south": 15, "north": 45}
@@ -1256,7 +1124,7 @@ if __name__ == "__main__":
     download_switch, single_var = True, False
     combine_switch = False
     copy_switch, copy_dir = False, r"G:\Data\HYCOM\3hourly"
     # draw_time_range(pic_save_folder=r'I:\Delete')
     if download_switch:
@@ -1288,6 +1156,6 @@ if __name__ == "__main__":
                 merge_path_name = Path(root_path)/f'HYCOM_{fname}_{merge_name}.nc'
             if combine_switch:
                 # 这里的var_name必须是官方变量名，不能再是简写了
-                merge5nc(file_list, var_name, 'time', merge_path_name)
+                merge(file_list, var_name, 'time', merge_path_name)
             if copy_switch:
                 copy_file(merge_path_name, copy_dir) """

oafuncs 0.0.88__py2.py3-none-any.whl → 0.0.90__py2.py3-none-any.whl

oafuncs 0.0.88py2.py3-none-any.whl → 0.0.90py2.py3-none-any.whl