oafuncs 0.0.93__py2.py3-none-any.whl → 0.0.95__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
oafuncs/oa_data.py CHANGED
@@ -18,10 +18,11 @@ import multiprocessing as mp
18
18
  from concurrent.futures import ThreadPoolExecutor
19
19
 
20
20
  import numpy as np
21
+ import xarray as xr
21
22
  from scipy.interpolate import griddata
23
+ import salem
22
24
 
23
-
24
- __all__ = ["interp_2d", "ensure_list"]
25
+ __all__ = ["interp_2d", "ensure_list", "mask_shapefile"]
25
26
 
26
27
 
27
28
  def ensure_list(input_data):
@@ -44,7 +45,6 @@ def ensure_list(input_data):
44
45
  return [str(input_data)]
45
46
 
46
47
 
47
-
48
48
  def interp_2d(target_x, target_y, origin_x, origin_y, data, method="linear", parallel=True):
49
49
  """
50
50
  Perform 2D interpolation on the last two dimensions of a multi-dimensional array.
@@ -87,6 +87,8 @@ def interp_2d(target_x, target_y, origin_x, origin_y, data, method="linear", par
87
87
  raise ValueError("Shape of data does not match shape of origin_x or origin_y.")
88
88
 
89
89
  # 创建网格和展平数据
90
+ target_x, target_y = np.array(target_x), np.array(target_y)
91
+ origin_x, origin_y = np.array(origin_x), np.array(origin_y)
90
92
  target_points = np.column_stack((target_y.ravel(), target_x.ravel()))
91
93
  origin_points = np.column_stack((origin_y.ravel(), origin_x.ravel()))
92
94
 
@@ -109,12 +111,38 @@ def interp_2d(target_x, target_y, origin_x, origin_y, data, method="linear", par
109
111
  elif len(data.shape) == 4:
110
112
  interpolated_data = np.stack([np.stack([interp_single(data[i, j], target_points, origin_points, method) for j in range(data.shape[1])]) for i in range(data.shape[0])])
111
113
 
112
- return np.array(interpolated_data)
114
+ return np.squeeze(np.array(interpolated_data))
115
+
113
116
 
117
+ def mask_shapefile(data: np.ndarray, lons: np.ndarray, lats: np.ndarray, shapefile_path: str) -> xr.DataArray:
118
+ """
119
+ Masks a 2D data array using a shapefile.
114
120
 
121
+ Parameters:
122
+ - data: 2D numpy array of data to be masked.
123
+ - lons: 1D numpy array of longitudes.
124
+ - lats: 1D numpy array of latitudes.
125
+ - shapefile_path: Path to the shapefile used for masking.
126
+
127
+ Returns:
128
+ - Masked xarray DataArray.
129
+ """
130
+ """
131
+ https://cloud.tencent.com/developer/article/1701896
132
+ """
133
+ try:
134
+ # import geopandas as gpd
135
+ # shp_f = gpd.read_file(shapefile_path)
136
+ shp_f = salem.read_shapefile(shapefile_path)
137
+ data_da = xr.DataArray(data, coords=[("latitude", lats), ("longitude", lons)])
138
+ masked_data = data_da.salem.roi(shape=shp_f)
139
+ return masked_data
140
+ except Exception as e:
141
+ print(f"An error occurred: {e}")
142
+ return None
115
143
 
116
- if __name__ == "__main__":
117
144
 
145
+ if __name__ == "__main__":
118
146
  pass
119
147
  """ import time
120
148
 
@@ -2,10 +2,10 @@
2
2
  # coding=utf-8
3
3
  """
4
4
  Author: Liu Kun && 16031215@qq.com
5
- Date: 2024-11-02 11:07:49
5
+ Date: 2025-01-29 19:05:09
6
6
  LastEditors: Liu Kun && 16031215@qq.com
7
- LastEditTime: 2025-01-07 16:31:36
8
- FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
7
+ LastEditTime: 2025-01-29 19:05:10
8
+ FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly_20250129 copy.py
9
9
  Description:
10
10
  EditPlatform: vscode
11
11
  ComputerInfo: XPS 15 9510
@@ -13,6 +13,9 @@ SystemInfo: Windows 11
13
13
  Python Version: 3.12
14
14
  """
15
15
 
16
+
17
+
18
+
16
19
  import datetime
17
20
  import os
18
21
  import random
@@ -24,19 +27,19 @@ from pathlib import Path
24
27
  from threading import Lock
25
28
 
26
29
  import matplotlib.pyplot as plt
30
+ import netCDF4 as nc
27
31
  import numpy as np
28
32
  import pandas as pd
29
- import xarray as xr
30
33
  import requests
34
+ import xarray as xr
31
35
  from rich import print
32
36
  from rich.progress import Progress
33
- import netCDF4 as nc
34
37
 
38
+ from oafuncs.oa_down.idm import downloader as idm_downloader
35
39
  from oafuncs.oa_down.user_agent import get_ua
36
40
  from oafuncs.oa_file import file_size, mean_size
37
41
  from oafuncs.oa_nc import check as check_nc
38
42
  from oafuncs.oa_nc import modify as modify_nc
39
- from oafuncs.oa_down.idm import downloader as idm_downloader
40
43
 
41
44
  warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
42
45
 
@@ -575,18 +578,12 @@ def _check_existing_file(file_full_path, avg_size):
575
578
  if abs(delta_size_ratio) > 0.025:
576
579
  if check_nc(file_full_path):
577
580
  # print(f"File size is abnormal but can be opened normally, file size: {fsize:.2f} KB")
578
- if not _check_ftime(file_full_path, if_print=True):
579
- return False
580
- else:
581
- return True
581
+ return True
582
582
  else:
583
583
  print(f"File size is abnormal and cannot be opened, {file_full_path}: {fsize:.2f} KB")
584
584
  return False
585
585
  else:
586
- if not _check_ftime(file_full_path, if_print=True):
587
- return False
588
- else:
589
- return True
586
+ return True
590
587
  else:
591
588
  return False
592
589
 
@@ -705,10 +702,25 @@ def _download_file(target_url, store_path, file_name, check=False):
705
702
  file_name_split = file_name_split[:-1]
706
703
  # same_file = f"{file_name_split[0]}_{file_name_split[1]}*nc"
707
704
  same_file = "_".join(file_name_split) + "*nc"
705
+
706
+ if match_time is not None:
707
+ if check_nc(fname):
708
+ if not _check_ftime(fname, if_print=True):
709
+ if match_time:
710
+ _correct_time(fname)
711
+ count_dict['skip'] += 1
712
+ else:
713
+ _clear_existing_file(fname)
714
+ # print(f"[bold #ffe5c0]File time error, {fname}")
715
+ count_dict["no_data"] += 1
716
+ else:
717
+ count_dict["skip"] += 1
718
+ print(f"[bold green]{file_name} is correct")
719
+ return
708
720
 
709
721
  if check:
710
722
  if same_file not in fsize_dict.keys(): # 对第一个文件单独进行检查,因为没有大小可以对比
711
- check_nc(fname, if_delete=True)
723
+ check_nc(fname, delete_switch=True)
712
724
 
713
725
  # set_min_size = _get_mean_size30(store_path, same_file) # 原方案,只30次取平均值;若遇变化,无法判断
714
726
  get_mean_size = _get_mean_size_move(same_file, fname)
@@ -788,15 +800,6 @@ def _download_file(target_url, store_path, file_name, check=False):
788
800
 
789
801
  f.close()
790
802
 
791
- if not _check_ftime(fname, if_print=True):
792
- if match_time:
793
- _correct_time(fname)
794
- else:
795
- _clear_existing_file(fname)
796
- # print(f"[bold #ffe5c0]File time error, {fname}")
797
- count_dict["no_data"] += 1
798
- break
799
-
800
803
  # print(f'\r文件 {fname} 下载成功', end="")
801
804
  if os.path.exists(fname):
802
805
  download_success = True
@@ -1059,7 +1062,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
1059
1062
  print("[bold red]Please ensure the time_s is no more than time_e")
1060
1063
 
1061
1064
 
1062
- def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1, idm_engine=None, fill_time=False):
1065
+ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1, idm_engine=None, fill_time=None):
1063
1066
  """
1064
1067
  Description:
1065
1068
  Download the data of single time or a series of time
@@ -1081,7 +1084,7 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
1081
1084
  check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
1082
1085
  ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
1083
1086
  idm_engine: str, the IDM engine, default is None, if set, the IDM will be used to download the data; example: "D:\\Programs\\Internet Download Manager\\IDMan.exe"
1084
- fill_time: bool, whether to match the time, default is False, if set to True, the time in the file name will be corrected according to the time in the file; else, the data will be skip if the time is not correct. Because the real time of some data that has been downloaded does not match the time in the file name, eg. the required time is 2024110100, but the time in the file name is 2024110103, so the data will be skip if the fill_time is False. Note: it is not the right time data, so it is not recommended to set fill_time to True
1087
+ fill_time: bool or None, the mode to fill the time, default is None. None: only download the data; True: modify the real time of data to the time in the file name; False: check the time in the file name and the real time of data, if not match, delete the file
1085
1088
 
1086
1089
  Returns:
1087
1090
  None
@@ -1152,6 +1155,9 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
1152
1155
 
1153
1156
  global fsize_dict_lock
1154
1157
  fsize_dict_lock = Lock()
1158
+
1159
+ if fill_time is not None:
1160
+ num_workers = 1
1155
1161
 
1156
1162
  global use_idm, given_idm_engine, idm_download_list
1157
1163
  if idm_engine is not None:
@@ -1163,42 +1169,31 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
1163
1169
  use_idm = False
1164
1170
 
1165
1171
  global match_time
1166
- if fill_time:
1167
- match_time = True
1168
- else:
1169
- match_time = False
1172
+ match_time = fill_time
1170
1173
 
1171
1174
  _download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
1172
1175
 
1173
- if idm_download_list:
1174
- for f in idm_download_list:
1175
- wait_success = 0
1176
- success = False
1177
- while not success:
1178
- if check_nc(f):
1179
- if match_time:
1180
- _correct_time(f)
1176
+ if idm_engine is not None:
1177
+ if idm_download_list:
1178
+ for f in idm_download_list:
1179
+ wait_success = 0
1180
+ success = False
1181
+ while not success:
1182
+ if check_nc(f):
1181
1183
  count_dict["success"] += 1
1182
- else:
1183
- if not _check_ftime(f):
1184
- _clear_existing_file(f)
1185
- count_dict["no_data"] += 1
1186
- count_dict["no_data_list"].append(str(f).split("_")[-1].split(".")[0])
1187
- else:
1188
- count_dict["success"] += 1
1189
- success = True
1190
- else:
1191
- wait_success += 1
1192
- time.sleep(3)
1193
- if wait_success >= 20:
1194
1184
  success = True
1195
- # print(f'{f} download failed')
1196
- count_dict["fail"] += 1
1185
+ else:
1186
+ wait_success += 1
1187
+ time.sleep(3)
1188
+ if wait_success >= 20:
1189
+ success = True
1190
+ # print(f'{f} download failed')
1191
+ count_dict["fail"] += 1
1197
1192
 
1198
1193
  count_dict["total"] = count_dict["success"] + count_dict["fail"] + count_dict["skip"] + count_dict["no_data"]
1199
-
1200
1194
  print("[bold #ecdbfe]-" * 160)
1201
- print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}")
1195
+ print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}\nNo data: {count_dict['no_data']}")
1196
+ print("[bold #ecdbfe]-" * 160)
1202
1197
  if count_dict["fail"] > 0:
1203
1198
  print("[bold #be5528]Please try again to download the failed data later")
1204
1199
  if count_dict["no_data"] > 0:
@@ -1290,11 +1285,11 @@ if __name__ == "__main__":
1290
1285
  "lat_max": 45,
1291
1286
  "num_workers": 3,
1292
1287
  "check": True,
1293
- "depth": None, # or 0-5000 meters
1294
- "level": None, # or 1-40 levels
1288
+ "depth": None, # or 0-5000 meters
1289
+ "level": None, # or 1-40 levels
1295
1290
  "ftimes": 1,
1296
- "idm_engine": r"D:\Programs\Internet Download Manager\IDMan.exe", # 查漏补缺不建议开启
1297
- "fill_time": False
1291
+ # "idm_engine": r"D:\Programs\Internet Download Manager\IDMan.exe", # 查漏补缺不建议开启
1292
+ "fill_time": False,
1298
1293
  }
1299
1294
 
1300
1295
  if single_var: