PyPI - oafuncs - Versions diffs - 0.0.97.16__py3-none-any.whl → 0.0.97.17__py3-none-any.whl - Mend

oafuncs 0.0.97.16py3-none-any.whl → 0.0.97.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

oafuncs/_script/netcdf_modify.py +10 -2
oafuncs/oa_cmap.py +211 -95
oafuncs/oa_data.py +157 -218
oafuncs/oa_date.py +71 -37
oafuncs/oa_down/hycom_3hourly.py +208 -319
oafuncs/oa_down/hycom_3hourly_20250407.py +1295 -0
oafuncs/oa_down/idm.py +4 -4
oafuncs/oa_draw.py +224 -124
oafuncs/oa_file.py +279 -333
oafuncs/oa_help.py +10 -0
oafuncs/oa_nc.py +197 -164
oafuncs/oa_python.py +51 -25
oafuncs/oa_tool.py +61 -22
{oafuncs-0.0.97.16.dist-info → oafuncs-0.0.97.17.dist-info}/METADATA +1 -1
{oafuncs-0.0.97.16.dist-info → oafuncs-0.0.97.17.dist-info}/RECORD +19 -18
/oafuncs/_script/{replace_file_concent.py → replace_file_content.py} +0 -0
{oafuncs-0.0.97.16.dist-info → oafuncs-0.0.97.17.dist-info}/WHEEL +0 -0
{oafuncs-0.0.97.16.dist-info → oafuncs-0.0.97.17.dist-info}/licenses/LICENSE.txt +0 -0
{oafuncs-0.0.97.16.dist-info → oafuncs-0.0.97.17.dist-info}/top_level.txt +0 -0

oafuncs/oa_down/hycom_3hourly.py CHANGED Viewed

@@ -2,10 +2,10 @@
 # coding=utf-8
 """
 Author: Liu Kun && 16031215@qq.com
-Date: 2025-01-29 19:05:09
+Date: 2025-04-07 10:51:09
 LastEditors: Liu Kun && 16031215@qq.com
-LastEditTime: 2025-01-29 19:05:10
-FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly_20250129 copy.py
+LastEditTime: 2025-04-07 10:51:09
+FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly copy.py
 Description:
 EditPlatform: vscode
 ComputerInfo: XPS 15 9510
@@ -34,7 +34,7 @@ from rich.progress import Progress
 from oafuncs.oa_down.idm import downloader as idm_downloader
 from oafuncs.oa_down.user_agent import get_ua
-from oafuncs.oa_file import file_size, mean_size
+from oafuncs.oa_file import file_size
 from oafuncs.oa_nc import check as check_nc
 from oafuncs.oa_nc import modify as modify_nc
@@ -415,10 +415,12 @@ def _check_time_in_dataset_and_version(time_input, time_end=None):
                     have_data = True
     # 输出结果
-    print(f"[bold red]{time_input_str} is in the following dataset and version:")
+    if match_time is None:
+        print(f"[bold red]{time_input_str} is in the following dataset and version:")
     if have_data:
-        for d, v, trange in zip(d_list, v_list, trange_list):
-            print(f"[bold blue]{d} {v} {trange}")
+        if match_time is None:
+            for d, v, trange in zip(d_list, v_list, trange_list):
+                print(f"[bold blue]{d} {v} {trange}")
         if is_single_time:
             return True
         else:
@@ -501,12 +503,11 @@ def _direct_choose_dataset_and_version(time_input, time_end=None):
             # 检查时间是否在数据集版本的时间范围内
             if time_start >= time_s and time_end <= time_e:
-                # print(f'[bold purple]dataset: {dataset_name}, version: {version_name} is chosen')
-                # return dataset_name, version_name
                 dataset_name_out, version_name_out = dataset_name, version_name
     if dataset_name_out is not None and version_name_out is not None:
-        print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
+        if match_time is None:
+            print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
     # 如果没有找到匹配的数据集和版本，会返回 None
     return dataset_name_out, version_name_out
@@ -583,10 +584,9 @@ def _check_existing_file(file_full_path, avg_size):
         delta_size_ratio = (fsize - avg_size) / avg_size
         if abs(delta_size_ratio) > 0.025:
             if check_nc(file_full_path):
-                # print(f"File size is abnormal but can be opened normally, file size: {fsize:.2f} KB")
                 return True
             else:
-                print(f"File size is abnormal and cannot be opened, {file_full_path}: {fsize:.2f} KB")
+                # print(f"File size is abnormal and cannot be opened, {file_full_path}: {fsize:.2f} KB")
                 return False
         else:
             return True
@@ -594,33 +594,14 @@ def _check_existing_file(file_full_path, avg_size):
         return False
-def _get_mean_size30(store_path, same_file):
-    if same_file not in fsize_dict.keys():
-        # print(f'Same file name: {same_file}')
-        fsize_dict[same_file] = {"size": 0, "count": 0}
-    if fsize_dict[same_file]["count"] < 30 or fsize_dict[same_file]["size"] == 0:
-        # 更新30次文件最小值，后续认为可以代表所有文件，不再更新占用时间
-        fsize_mean = mean_size(store_path, same_file, max_num=30)
-        set_min_size = fsize_mean * 0.95
-        fsize_dict[same_file]["size"] = set_min_size
-        fsize_dict[same_file]["count"] += 1
-    else:
-        set_min_size = fsize_dict[same_file]["size"]
-    return set_min_size
 def _get_mean_size_move(same_file, current_file):
-    # 获取锁
-    with fsize_dict_lock:  # 全局锁，确保同一时间只能有一个线程访问
-        # 初始化字典中的值，如果文件不在字典中
+    with fsize_dict_lock:
         if same_file not in fsize_dict.keys():
             fsize_dict[same_file] = {"size_list": [], "mean_size": 1.0}
-        tolerance_ratio = 0.025  # 容忍的阈值比例
+        tolerance_ratio = 0.025
         current_file_size = file_size(current_file)
-        # 如果列表不为空，则计算平均值，否则保持为1
         if fsize_dict[same_file]["size_list"]:
             fsize_dict[same_file]["mean_size"] = sum(fsize_dict[same_file]["size_list"]) / len(fsize_dict[same_file]["size_list"])
             fsize_dict[same_file]["mean_size"] = max(fsize_dict[same_file]["mean_size"], 1.0)
@@ -630,19 +611,15 @@ def _get_mean_size_move(same_file, current_file):
         size_difference_ratio = (current_file_size - fsize_dict[same_file]["mean_size"]) / fsize_dict[same_file]["mean_size"]
         if abs(size_difference_ratio) > tolerance_ratio:
-            if check_nc(current_file):
-                # print(f"File size is abnormal but can be opened normally, file size: {current_file_size:.2f} KB")
-                # 文件可以正常打开，但大小异常，保留当前文件大小
+            if check_nc(current_file,print_messages=False):
                 fsize_dict[same_file]["size_list"] = [current_file_size]
                 fsize_dict[same_file]["mean_size"] = current_file_size
             else:
                 _clear_existing_file(current_file)
-                print(f"File size is abnormal, may need to be downloaded again, file size: {current_file_size:.2f} KB")
+                # print(f"File size is abnormal, may need to be downloaded again, file size: {current_file_size:.2f} KB")
         else:
-            # 添加当前文件大小到列表中，并更新计数
             fsize_dict[same_file]["size_list"].append(current_file_size)
-    # 返回调整后的平均值，这里根据您的需求，返回的是添加新值之前的平均值
     return fsize_dict[same_file]["mean_size"]
@@ -656,7 +633,6 @@ def _check_ftime(nc_file, tname="time", if_print=False):
         ds.close()
         real_time = str(real_time)[:13]
         real_time = real_time.replace("-", "").replace("T", "")
-        # -----------------------------------------------------
         f_time = re.findall(r"\d{10}", nc_file)[0]
         if real_time == f_time:
             return True
@@ -671,91 +647,66 @@ def _check_ftime(nc_file, tname="time", if_print=False):
 def _correct_time(nc_file):
-    # 打开NC文件
     dataset = nc.Dataset(nc_file)
-    # 读取时间单位
     time_units = dataset.variables["time"].units
-    # 关闭文件
     dataset.close()
-    # 解析时间单位字符串以获取时间原点
     origin_str = time_units.split("since")[1].strip()
     origin_datetime = datetime.datetime.strptime(origin_str, "%Y-%m-%d %H:%M:%S")
-    # 从文件名中提取日期字符串
     given_date_str = re.findall(r"\d{10}", str(nc_file))[0]
-    # 将提取的日期字符串转换为datetime对象
     given_datetime = datetime.datetime.strptime(given_date_str, "%Y%m%d%H")
-    # 计算给定日期与时间原点之间的差值（以小时为单位）
     time_difference = (given_datetime - origin_datetime).total_seconds()
     if "hours" in time_units:
         time_difference /= 3600
     elif "days" in time_units:
         time_difference /= 3600 * 24
-    # 修改NC文件中的时间变量
     modify_nc(nc_file, "time", None, time_difference)
-def _download_file(target_url, store_path, file_name, check=False):
-    # Check if the file exists
+def _download_file(target_url, store_path, file_name, cover=False):
     fname = Path(store_path) / file_name
     file_name_split = file_name.split("_")
     file_name_split = file_name_split[:-1]
-    # same_file = f"{file_name_split[0]}_{file_name_split[1]}*nc"
     same_file = "_".join(file_name_split) + "*nc"
     if match_time is not None:
-        if check_nc(fname):
+        if check_nc(fname, print_messages=False):
             if not _check_ftime(fname, if_print=True):
                 if match_time:
                     _correct_time(fname)
                     count_dict["skip"] += 1
                 else:
                     _clear_existing_file(fname)
-                    # print(f"[bold #ffe5c0]File time error, {fname}")
                     count_dict["no_data"] += 1
             else:
                 count_dict["skip"] += 1
                 print(f"[bold green]{file_name} is correct")
         return
-    if check:
-        if same_file not in fsize_dict.keys():  # 对第一个文件单独进行检查，因为没有大小可以对比
-            check_nc(fname, delete_switch=True)
+    if not cover and os.path.exists(fname):
+        print(f"[bold #FFA54F]{fname} exists, skipping ...")
+        count_dict["skip"] += 1
+        return
+    if same_file not in fsize_dict.keys():
+        check_nc(fname, delete_if_invalid=True, print_messages=False)
-        # set_min_size = _get_mean_size30(store_path, same_file) # 原方案，只30次取平均值；若遇变化，无法判断
-        get_mean_size = _get_mean_size_move(same_file, fname)
+    get_mean_size = _get_mean_size_move(same_file, fname)
+    if _check_existing_file(fname, get_mean_size):
+        count_dict["skip"] += 1
+        return
-        if _check_existing_file(fname, get_mean_size):
-            count_dict["skip"] += 1
-            return
     _clear_existing_file(fname)
     if not use_idm:
-        # -----------------------------------------------
         print(f"[bold #f0f6d0]Requesting {file_name} ...")
-        # 创建会话
         s = requests.Session()
         download_success = False
         request_times = 0
         def calculate_wait_time(time_str, target_url):
-            # 定义正则表达式，匹配YYYYMMDDHH格式的时间
             time_pattern = r"\d{10}"
-            # 定义两个字符串
-            # str1 = 'HYCOM_water_u_2018010100-2018010112.nc'
-            # str2 = 'HYCOM_water_u_2018010100.nc'
-            # 使用正则表达式查找时间
             times_in_str = re.findall(time_pattern, time_str)
-            # 计算每个字符串中的时间数量
             num_times_str = len(times_in_str)
             if num_times_str > 1:
@@ -764,7 +715,6 @@ def _download_file(target_url, store_path, file_name, check=False):
                 delta_t = delta_t / 3 + 1
             else:
                 delta_t = 1
-            # 单个要素最多等待5分钟，不宜太短，太短可能请求失败；也不宜太长，太长可能会浪费时间
             num_var = int(target_url.count("var="))
             if num_var <= 0:
                 num_var = 1
@@ -773,31 +723,19 @@ def _download_file(target_url, store_path, file_name, check=False):
         max_timeout = calculate_wait_time(file_name, target_url)
         print(f"[bold #912dbc]Max timeout: {max_timeout} seconds")
-        # print(f'Download_start_time: {datetime.datetime.now()}')
         download_time_s = datetime.datetime.now()
         order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
         while not download_success:
             if request_times >= 10:
-                # print(f'下载失败，已重试 {request_times} 次\n可先跳过，后续再试')
                 print(f"[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later")
                 count_dict["fail"] += 1
                 break
             if request_times > 0:
-                # print(f'\r正在重试第 {request_times} 次', end="")
                 print(f"[bold #ffe5c0]Retrying the {order_list[request_times - 1]} time...")
-            # 尝试下载文件
             try:
                 headers = {"User-Agent": get_ua()}
-                """ response = s.get(target_url, headers=headers, timeout=random.randint(5, max_timeout))
-                response.raise_for_status()  # 如果请求返回的不是200，将抛出HTTPError异常
-                # 保存文件
-                with open(filename, 'wb') as f:
-                    f.write(response.content) """
-                response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))  # 启用流式传输
-                response.raise_for_status()  # 如果请求返回的不是200，将抛出HTTPError异常
-                # 保存文件
+                response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))
+                response.raise_for_status()
                 with open(fname, "wb") as f:
                     print(f"[bold #96cbd7]Downloading {file_name} ...")
                     for chunk in response.iter_content(chunk_size=1024):
@@ -806,14 +744,12 @@ def _download_file(target_url, store_path, file_name, check=False):
                 f.close()
-                # print(f'\r文件 {fname} 下载成功', end="")
                 if os.path.exists(fname):
                     download_success = True
                     download_time_e = datetime.datetime.now()
                     download_delta = download_time_e - download_time_s
                     print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
                     count_dict["success"] += 1
-                    # print(f'Download_end_time: {datetime.datetime.now()}')
             except requests.exceptions.HTTPError as errh:
                 print(f"Http Error: {errh}")
@@ -833,7 +769,6 @@ def _download_file(target_url, store_path, file_name, check=False):
 def _check_hour_is_valid(ymdh_str):
-    # hour should be 00, 03, 06, 09, 12, 15, 18, 21
     hh = int(str(ymdh_str[-2:]))
     if hh in [0, 3, 6, 9, 12, 15, 18, 21]:
         return True
@@ -849,18 +784,15 @@ def _check_dataset_version(dataset_name, version_name, download_time, download_t
         else:
             return None, None
-    # 确保下载时间是一个字符串
     download_time_str = str(download_time)
     if len(download_time_str) == 8:
         download_time_str = download_time_str + "00"
-    # 检查小时是否有效（如果需要的话）
     if download_time_end is None and not _check_hour_is_valid(download_time_str):
         print("Please ensure the hour is 00, 03, 06, 09, 12, 15, 18, 21")
         raise ValueError("The hour is invalid")
-    # 根据是否检查整个天来设置时间范围
     if download_time_end is not None:
         if len(str(download_time_end)) == 8:
             download_time_end = str(download_time_end) + "21"
@@ -876,7 +808,6 @@ def _check_dataset_version(dataset_name, version_name, download_time, download_t
 def _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end=None):
-    # year_str = str(download_time)[:4]
     ymdh_str = str(download_time)
     if depth is not None and level_num is not None:
         print("Please ensure the depth or level_num is None")
@@ -889,14 +820,13 @@ def _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_ma
         print(f"Data of single level ({level_num}) will be downloaded...")
         which_mode = "level"
     else:
-        # print("Full depth or full level data will be downloaded...")
         which_mode = "full"
     query_dict = _get_query_dict(var, lon_min, lon_max, lat_min, lat_max, download_time, download_time_end, which_mode, depth, level_num)
     submit_url = _get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict)
     return submit_url
-def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time="2024083100", download_time_end=None, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
+def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time="2024083100", download_time_end=None, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, cover=False):
     print("[bold #ecdbfe]-" * mark_len)
     download_time = str(download_time)
     if download_time_end is not None:
@@ -921,8 +851,8 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
             submit_url = _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
             file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
             if download_time_end is not None:
-                file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}-{download_time_end}.nc"  # 这里时间不能用下划线，不然后续处理查找同一变量文件会出问题
-            _download_file(submit_url, store_path, file_name, check)
+                file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}-{download_time_end}.nc"
+            _download_file(submit_url, store_path, file_name, cover)
         else:
             if download_time < "2024081012":
                 varlist = [_ for _ in var]
@@ -943,18 +873,17 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
                         for v in current_group[1:]:
                             new_str = f"{new_str}&var={variable_info[v]['var_name']}"
                         submit_url = submit_url.replace(old_str, new_str)
-                        # file_name = f'HYCOM_{'-'.join([variable_info[v]["var_name"] for v in current_group])}_{download_time}.nc'
                         file_name = f"HYCOM_{key}_{download_time}.nc"
                         if download_time_end is not None:
-                            file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc"  # 这里时间不能用下划线，不然后续处理查找同一变量文件会出问题
-                    _download_file(submit_url, store_path, file_name, check)
+                            file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc"
+                    _download_file(submit_url, store_path, file_name, cover)
             else:
                 for v in var:
                     submit_url = _get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
                     file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
                     if download_time_end is not None:
                         file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}-{download_time_end}.nc"
-                    _download_file(submit_url, store_path, file_name, check)
+                    _download_file(submit_url, store_path, file_name, cover)
 def _convert_full_name_to_short_name(full_name):
@@ -967,214 +896,184 @@ def _convert_full_name_to_short_name(full_name):
     return False
-def _download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check):
-    """
-    # 并行下载任务
-    # 这个函数是为了并行下载而设置的，是必须的，直接调用direct_download并行下载会出问题
-    任务封装：将每个任务需要的数据和操作封装在一个函数中，这样每个任务都是独立的，不会相互干扰。
-    本情况下，download_task函数的作用是将每个下载任务封装起来，包括它所需的所有参数。
-    这样，每个任务都是独立的，有自己的参数和数据，不会与其他任务共享或修改任何数据。
-    因此，即使多个任务同时执行，也不会出现数据交互错乱的问题。
-    """
-    _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
+def _download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover):
+    _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, cover)
 def _done_callback(future, progress, task, total, counter_lock):
-    """
-    # 并行下载任务的回调函数
-    # 这个函数是为了并行下载而设置的，是必须的，直接调用direct_download并行下载会出问题
-    回调函数：当一个任务完成后，会调用这个函数，这样可以及时更新进度条，显示任务的完成情况。
-    本情况下，done_callback函数的作用是当一个任务完成后，更新进度条的进度，显示任务的完成情况。
-    这样，即使多个任务同时执行，也可以及时看到每个任务的完成情况，不会等到所有任务都完成才显示。
-    """
     global parallel_counter
     with counter_lock:
         parallel_counter += 1
         progress.update(task, advance=1, description=f"[cyan]{bar_desc} {parallel_counter}/{total}")
-def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1, interval_hour=3):
-    """
-    Description:
-    Download the data of single time or a series of time
-    Parameters:
-    var: str, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
-    time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
-    time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
-    lon_min: float, the minimum longitude, default is 0
-    lon_max: float, the maximum longitude, default is 359.92
-    lat_min: float, the minimum latitude, default is -80
-    lat_max: float, the maximum latitude, default is 90
-    depth: float, the depth, default is None
-    level: int, the level number, default is None
-    store_path: str, the path to store the data, default is None
-    dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08'
-    version_name: str, the version name, default is None, example: '53.X', '56.3'
-    num_workers: int, the number of workers, default is None
-    Returns:
-    None
-    """
+def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, cover=False, interval_hour=3):
     ymdh_time_s, ymdh_time_e = str(time_s), str(time_e)
-    if num_workers is not None and num_workers > 1:  # 如果使用多线程下载，用于进度条显示
+    if num_workers is not None and num_workers > 1:
         global parallel_counter
         parallel_counter = 0
-        counter_lock = Lock()  # 创建一个锁，线程安全的计数器
+        counter_lock = Lock()
     if ymdh_time_s == ymdh_time_e:
-        _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, check)
+        _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, cover)
     elif int(ymdh_time_s) < int(ymdh_time_e):
-        print("Downloading a series of files...")
+        if match_time is None:
+            print("*" * mark_len)
+            print("Downloading a series of files...")
         time_list = _get_time_list(ymdh_time_s, ymdh_time_e, interval_hour, "hour")
         with Progress() as progress:
             task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
-            if ftimes == 1:
-                if num_workers is None or num_workers <= 1:
-                    # 串行方式
-                    for i, time_str in enumerate(time_list):
-                        _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, check)
-                        progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
-                else:
-                    # 并行方式
-                    with ThreadPoolExecutor(max_workers=num_workers) as executor:
-                        futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
-                        """ for i, future in enumerate(futures):
-                            future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i+1}/{len(time_list)}")) """
-                        for feature in as_completed(futures):
-                            _done_callback(feature, progress, task, len(time_list), counter_lock)
+            if num_workers is None or num_workers <= 1:
+                for i, time_str in enumerate(time_list):
+                    _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
+                    progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
             else:
-                # new_time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3 * ftimes, "hour")
-                new_time_list = _get_time_list(ymdh_time_s, ymdh_time_e, interval_hour * ftimes, "hour")
-                total_num = len(new_time_list)
-                if num_workers is None or num_workers <= 1:
-                    # 串行方式
-                    for i, time_str in enumerate(new_time_list):
-                        time_str_end_index = int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))
-                        time_str_end = time_list[time_str_end_index]
-                        _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
-                        progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{total_num}")
-                else:
-                    # 并行方式
-                    with ThreadPoolExecutor(max_workers=num_workers) as executor:
-                        futures = [executor.submit(_download_task, var, new_time_list[i], time_list[int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))], lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for i in range(total_num)]
-                        """ for i, future in enumerate(futures):
-                            future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i+1}/{total_num}")) """
-                        for feature in as_completed(futures):
-                            _done_callback(feature, progress, task, len(time_list), counter_lock)
+                with ThreadPoolExecutor(max_workers=num_workers) as executor:
+                    futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
+                    for feature in as_completed(futures):
+                        _done_callback(feature, progress, task, len(time_list), counter_lock)
     else:
         print("[bold red]Please ensure the time_s is no more than time_e")
-def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1, idm_engine=None, fill_time=None, interval_hour=3):
+def download(
+    variables,
+    start_time,
+    end_time=None,
+    lon_min=0,
+    lon_max=359.92,
+    lat_min=-80,
+    lat_max=90,
+    depth=None,
+    level=None,
+    output_dir=None,
+    dataset=None,
+    version=None,
+    workers=None,
+    overwrite=False,
+    idm_path=None,
+    validate_time=None,
+    interval_hours=3,
+):
     """
-    Description:
-        Download the data of single time or a series of time
+    Download data for a single time or a series of times.
     Parameters:
-        var: str or list, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
-        time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
-        time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21; default is None, if not set, the data of single time will be downloaded; or same as time_s, the data of single time will be downloaded
-        lon_min: float, the minimum longitude, default is 0
-        lon_max: float, the maximum longitude, default is 359.92
-        lat_min: float, the minimum latitude, default is -80
-        lat_max: float, the maximum latitude, default is 90
-        depth: float, the depth, default is None, if you wanna get the data of single depth, you can set the depth, suggest to set the depth in [0, 5000]
-        level: int, the level number, default is None, if you wanna get the data of single level, you can set the level, suggest to set the level in [1, 40]
-        store_path: str, the path to store the data, default is None, if not set, the data will be stored in the current working directory
-        dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08', if not set, the dataset will be chosen according to the download_time
-        version_name: str, the version name, default is None, example: '53.X', '56.3', if not set, the version will be chosen according to the download_time
-        num_workers: int, the number of workers, default is None, if not set, the number of workers will be 1; suggest not to set the number of workers too large
-        check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
-        ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
-        idm_engine: str, the IDM engine, default is None, if set, the IDM will be used to download the data; example: "D:\\Programs\\Internet Download Manager\\IDMan.exe"
-        fill_time: bool or None, the mode to fill the time, default is None. None: only download the data; True: modify the real time of data to the time in the file name; False: check the time in the file name and the real time of data, if not match, delete the file
-        interval_hour: int, the interval time to download the data, default is 3, if set, the interval time will be used to download the data; example: 3, 6, ...
+        variables (str or list): Variable names to download. Examples include:
+            'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b'
+            or their full names like 'water_u', 'water_v', etc.
+        start_time (str): Start time in the format 'YYYYMMDDHH' or 'YYYYMMDD'.
+            If hour is included, it must be one of [00, 03, 06, 09, 12, 15, 18, 21].
+        end_time (str, optional): End time in the format 'YYYYMMDDHH' or 'YYYYMMDD'.
+            If not provided, only data for the start_time will be downloaded.
+        lon_min (float, optional): Minimum longitude. Default is 0.
+        lon_max (float, optional): Maximum longitude. Default is 359.92.
+        lat_min (float, optional): Minimum latitude. Default is -80.
+        lat_max (float, optional): Maximum latitude. Default is 90.
+        depth (float, optional): Depth in meters. If specified, data for a single depth
+            will be downloaded. Suggested range: [0, 5000].
+        level (int, optional): Vertical level number. If specified, data for a single
+            level will be downloaded. Suggested range: [1, 40].
+        output_dir (str, optional): Directory to save downloaded files. If not provided,
+            files will be saved in the current working directory.
+        dataset (str, optional): Dataset name. Examples: 'GLBv0.08', 'GLBu0.08', etc.
+            If not provided, the dataset will be chosen based on the time range.
+        version (str, optional): Dataset version. Examples: '53.X', '56.3', etc.
+            If not provided, the version will be chosen based on the time range.
+        workers (int, optional): Number of parallel workers. Default is 1. Maximum is 10.
+        overwrite (bool, optional): Whether to overwrite existing files. Default is False.
+        idm_path (str, optional): Path to the Internet Download Manager (IDM) executable.
+            If provided, IDM will be used for downloading.
+        validate_time (bool, optional): Time validation mode. Default is None.
+            - None: Only download data.
+            - True: Modify the real time in the data to match the file name.
+            - False: Check if the real time matches the file name. If not, delete the file.
+        interval_hours (int, optional): Time interval in hours for downloading data.
+            Default is 3. Examples: 3, 6, etc.
     Returns:
         None
+    Example:
+        >>> download(
+        variables='u',
+        start_time='2024083100',
+        end_time='2024090100',
+        lon_min=0,
+        lon_max=359.92,
+        lat_min=-80,
+        lat_max=90,
+        depth=None,
+        level=None,
+        output_dir=None,
+        dataset=None,
+        version=None,
+        workers=4,
+        overwrite=False,
+        idm_path=None,
+        validate_time=None,
+        interval_hours=3,
+        )
     """
     from oafuncs.oa_tool import pbar
-    from oafuncs.oa_cmap import get as get_cmap
     _get_initial_data()
-    # 打印信息并处理数据集和版本名称
-    if dataset_name is None and version_name is None:
-        print("The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.")
-        print("If you wanna choose the dataset and version by yourself, please set the dataset_name and version_name together.")
-    elif dataset_name is None and version_name is not None:
-        print("Please ensure the dataset_name is not None")
-        print("If you do not add the dataset_name, both the dataset and version will be chosen according to the download_time.")
-    elif dataset_name is not None and version_name is None:
-        print("Please ensure the version_name is not None")
-        print("If you do not add the version_name, both the dataset and version will be chosen according to the download_time.")
+    if dataset is None and version is None:
+        if validate_time is None:
+            print("Dataset and version will be chosen based on the time range.")
+            print("If multiple datasets or versions exist, the latest one will be used.")
+    elif dataset is None:
+        print("Please provide a dataset name if specifying a version.")
+    elif version is None:
+        print("Please provide a version if specifying a dataset name.")
     else:
-        print("The dataset_name and version_name are both set by yourself.")
-        print("Please ensure the dataset_name and version_name are correct.")
+        print("Using the specified dataset and version.")
-    if isinstance(var, list):
-        if len(var) == 1:
-            var = _convert_full_name_to_short_name(var[0])
+    if isinstance(variables, list):
+        if len(variables) == 1:
+            variables = _convert_full_name_to_short_name(variables[0])
         else:
-            var = [_convert_full_name_to_short_name(v) for v in var]
-    elif isinstance(var, str):
-        var = _convert_full_name_to_short_name(var)
+            variables = [_convert_full_name_to_short_name(v) for v in variables]
+    elif isinstance(variables, str):
+        variables = _convert_full_name_to_short_name(variables)
     else:
-        raise ValueError("The var is invalid")
-    if var is False:
-        raise ValueError("The var is invalid")
-    if lon_min < 0 or lon_min > 359.92 or lon_max < 0 or lon_max > 359.92 or lat_min < -80 or lat_min > 90 or lat_max < -80 or lat_max > 90:
-        print("Please ensure the lon_min, lon_max, lat_min, lat_max are in the range")
-        print("The range of lon_min, lon_max is 0~359.92")
-        print("The range of lat_min, lat_max is -80~90")
-        raise ValueError("The lon or lat is invalid")
-    if ftimes != 1:
-        print("Please ensure the ftimes is in [1, 8]")
-        ftimes = max(min(ftimes, 8), 1)
-    if store_path is None:
-        store_path = str(Path.cwd())
+        raise ValueError("Invalid variable(s) provided.")
+    if variables is False:
+        raise ValueError("Invalid variable(s) provided.")
+    if not (0 <= lon_min <= 359.92 and 0 <= lon_max <= 359.92 and -80 <= lat_min <= 90 and -80 <= lat_max <= 90):
+        raise ValueError("Longitude or latitude values are out of range.")
+    if output_dir is None:
+        output_dir = str(Path.cwd())
     else:
-        os.makedirs(str(store_path), exist_ok=True)
-    if num_workers is not None:
-        num_workers = max(min(num_workers, 10), 1)  # 暂时不限制最大值，再检查的时候可以多开一些线程
-        # num_workers = int(max(num_workers, 1))
-    time_s = str(time_s)
-    if len(time_s) == 8:
-        time_s += "00"
-    if time_e is None:
-        time_e = time_s[:]
+        os.makedirs(output_dir, exist_ok=True)
+    if workers is not None:
+        workers = max(min(workers, 10), 1)
+    start_time = str(start_time)
+    if len(start_time) == 8:
+        start_time += "00"
+    if end_time is None:
+        end_time = start_time[:]
     else:
-        time_e = str(time_e)
-        if len(time_e) == 8:
-            time_e += "21"
+        end_time = str(end_time)
+        if len(end_time) == 8:
+            end_time += "21"
     global count_dict
     count_dict = {"success": 0, "fail": 0, "skip": 0, "no_data": 0, "total": 0, "no_data_list": []}
-    """ global current_platform
-    current_platform = platform.system() """
     global fsize_dict
     fsize_dict = {}
     global fsize_dict_lock
     fsize_dict_lock = Lock()
-    if fill_time is not None:
-        num_workers = 1
     global use_idm, given_idm_engine, idm_download_list, bar_desc
-    if idm_engine is not None:
+    if idm_path is not None:
         use_idm = True
-        num_workers = 1
-        given_idm_engine = idm_engine
+        workers = 1
+        given_idm_engine = idm_path
         idm_download_list = []
         bar_desc = "Submitting to IDM ..."
     else:
@@ -1182,51 +1081,46 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
         bar_desc = "Downloading ..."
     global match_time
-    match_time = fill_time
+    match_time = validate_time
     global mark_len
     mark_len = 100
-    _download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes, int(interval_hour))
-    if idm_engine is not None:
+    if validate_time is not None:
+        workers = 1
+        print('*' * mark_len)
+        print("[bold red]Only checking the time of existing files.")
+        bar_desc = "Checking time ..."
+    _download_hourly_func(
+        variables,
+        start_time,
+        end_time,
+        lon_min,
+        lon_max,
+        lat_min,
+        lat_max,
+        depth,
+        level,
+        output_dir,
+        dataset,
+        version,
+        workers,
+        overwrite,
+        int(interval_hours),
+    )
+    if idm_path is not None:
         print("[bold #ecdbfe]*" * mark_len)
-        str_info = "All files have been submitted to IDM for downloading"
-        str_info = str_info.center(mark_len, "*")
-        print(f"[bold #3dfc40]{str_info}")
+        print(f"[bold #3dfc40]{'All files have been submitted to IDM for downloading'.center(mark_len, '*')}")
         print("[bold #ecdbfe]*" * mark_len)
         if idm_download_list:
-            """ file_download_time = 60  # 预设下载时间为1分钟
-            for f in pbar(idm_download_list,cmap='bwr',prefix='HYCOM: '):
-                file_download_start_time = time.time()
-                wait_success = 0
-                success = False
-                while not success:
-                    if check_nc(f,print_switch=False):
-                        count_dict["success"] += 1
-                        success = True
-                        # print(f"[bold #3dfc40]File [bold #dfff73]{f} [#3dfc40]has been downloaded successfully")
-                        file_download_end_time = time.time()
-                        file_download_time = file_download_end_time - file_download_start_time
-                        file_download_time = int(file_download_time)
-                        # print(f"[bold #3dfc40]Time: {file_download_time} seconds")
-                        file_download_time = max(60, file_download_time)  # 预设下载时间为1分钟起步
-                    else:
-                        wait_success += 1
-                        # print(f"[bold #ffe5c0]Waiting {file_download_time} seconds to check the file {f}...")
-                        time.sleep(file_download_time)
-                        if wait_success >= 10:
-                            success = True
-                            # print(f'{f} download failed')
-                            print(f"[bold #ffe5c0]Waiting for more than 10 times, skipping the file {f}...")
-                            count_dict["fail"] += 1
-                # print("[bold #ecdbfe]-" * mark_len) """
             remain_list = idm_download_list.copy()
-            for f_count in pbar(range(len(idm_download_list)), cmap=get_cmap('diverging_1'), prefix="HYCOM: "):
+            for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading: "):
                 success = False
                 while not success:
                     for f in remain_list:
-                        if check_nc(f, print_switch=False):
+                        if check_nc(f, print_messages=False):
                             count_dict["success"] += 1
                             success = True
                             remain_list.remove(f)
@@ -1237,12 +1131,9 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
     print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}\nNo data: {count_dict['no_data']}")
     print("[bold #ecdbfe]=" * mark_len)
     if count_dict["fail"] > 0:
-        print("[bold #be5528]Please try again to download the failed data later")
+        print("[bold #be5528]Please try again to download the failed data later.")
     if count_dict["no_data"] > 0:
-        if count_dict["no_data"] == 1:
-            print(f"[bold #f90000]There is {count_dict['no_data']} data that does not exist in any dataset and version")
-        else:
-            print(f"[bold #f90000]These are {count_dict['no_data']} data that do not exist in any dataset and version")
+        print(f"[bold #f90000]{count_dict['no_data']} data entries do not exist in any dataset or version.")
         for no_data in count_dict["no_data_list"]:
             print(f"[bold #d81b60]{no_data}")
     print("[bold #ecdbfe]=" * mark_len)
@@ -1265,24 +1156,22 @@ if __name__ == "__main__":
     single_var = False
-    # draw_time_range(pic_save_folder=r'I:\Delete')
     options = {
-        "var": var_list,
-        "time_s": "2025010300",
-        "time_e": "2025010321",
-        "store_path": r"I:\Data\HYCOM\3hourly",
+        "variables": var_list,
+        "start_time": "2025010300",
+        "end_time": "2025010309",
+        "output_dir": r"I:\Data\HYCOM\3hourly_test",
         "lon_min": 105,
         "lon_max": 130,
         "lat_min": 15,
         "lat_max": 45,
-        "num_workers": 3,
-        "check": True,
-        "depth": None,  # or 0-5000 meters
-        "level": None,  # or 1-40 levels
-        "ftimes": 1,
-        # "idm_engine": r"D:\Programs\Internet Download Manager\IDMan.exe",  # 查漏补缺不建议开启
-        "fill_time": None,
+        "workers": 1,
+        "overwrite": False,
+        "depth": None,
+        "level": None,
+        "validate_time": True,
+        "idm_path": r'D:\Programs\Internet Download Manager\IDMan.exe',
+        "interval_hours": 3,
     }
     if single_var:

oafuncs 0.0.97.16__py3-none-any.whl → 0.0.97.17__py3-none-any.whl

oafuncs 0.0.97.16py3-none-any.whl → 0.0.97.17py3-none-any.whl