jacksung-dev 0.0.4.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. jacksung/__init__.py +1 -0
  2. jacksung/ai/GeoAttX.py +356 -0
  3. jacksung/ai/GeoNet/__init__.py +0 -0
  4. jacksung/ai/GeoNet/m_block.py +393 -0
  5. jacksung/ai/GeoNet/m_blockV2.py +442 -0
  6. jacksung/ai/GeoNet/m_network.py +107 -0
  7. jacksung/ai/GeoNet/m_networkV2.py +91 -0
  8. jacksung/ai/__init__.py +0 -0
  9. jacksung/ai/latex_tool.py +199 -0
  10. jacksung/ai/metrics.py +181 -0
  11. jacksung/ai/utils/__init__.py +0 -0
  12. jacksung/ai/utils/cmorph.py +42 -0
  13. jacksung/ai/utils/data_parallelV2.py +90 -0
  14. jacksung/ai/utils/fy.py +333 -0
  15. jacksung/ai/utils/goes.py +161 -0
  16. jacksung/ai/utils/gsmap.py +24 -0
  17. jacksung/ai/utils/imerg.py +159 -0
  18. jacksung/ai/utils/metsat.py +164 -0
  19. jacksung/ai/utils/norm_util.py +109 -0
  20. jacksung/ai/utils/util.py +300 -0
  21. jacksung/libs/times.ttf +0 -0
  22. jacksung/utils/__init__.py +1 -0
  23. jacksung/utils/base_db.py +72 -0
  24. jacksung/utils/cache.py +71 -0
  25. jacksung/utils/data_convert.py +273 -0
  26. jacksung/utils/exception.py +27 -0
  27. jacksung/utils/fastnumpy.py +115 -0
  28. jacksung/utils/figure.py +251 -0
  29. jacksung/utils/hash.py +26 -0
  30. jacksung/utils/image.py +221 -0
  31. jacksung/utils/log.py +86 -0
  32. jacksung/utils/login.py +149 -0
  33. jacksung/utils/mean_std.py +66 -0
  34. jacksung/utils/multi_task.py +129 -0
  35. jacksung/utils/number.py +6 -0
  36. jacksung/utils/nvidia.py +140 -0
  37. jacksung/utils/time.py +87 -0
  38. jacksung/utils/web.py +63 -0
  39. jacksung_dev-0.0.4.15.dist-info/LICENSE +201 -0
  40. jacksung_dev-0.0.4.15.dist-info/METADATA +228 -0
  41. jacksung_dev-0.0.4.15.dist-info/RECORD +44 -0
  42. jacksung_dev-0.0.4.15.dist-info/WHEEL +5 -0
  43. jacksung_dev-0.0.4.15.dist-info/entry_points.txt +3 -0
  44. jacksung_dev-0.0.4.15.dist-info/top_level.txt +1 -0
@@ -0,0 +1,333 @@
1
+ import sys
2
+ import threading
3
+
4
+ sys.path.append('../')
5
+ import shutil
6
+ from osgeo import gdal, osr
7
+ import numpy as np
8
+ import matplotlib.pyplot as plt
9
+ from random import randint
10
+ import os
11
+ from jacksung.utils.data_convert import nc2np, np2tif
12
+ from jacksung.utils.image import crop_png, zoom_image, zoomAndDock
13
+ from jacksung.utils.cache import Cache
14
+ import rasterio
15
+ from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
16
+ from matplotlib.colors import LinearSegmentedColormap
17
+ import cartopy.feature as cfeature
18
+ import cartopy.crs as ccrs
19
+ from datetime import datetime, timedelta
20
+ import netCDF4 as nc
21
+ from jacksung.utils.data_convert import np2tif, Coordinate
22
+ from tqdm import tqdm
23
+ from jacksung.utils.multi_task import MultiTasks, type_process
24
+ import cv2
25
+
26
+ reference_cache = Cache(10)
27
+
28
+ x_range = {'left': -60, 'top': 60, 'bottom': -60, 'right': 60, 'width': 2400, 'height': 2400}
29
+ min_x_range = {'left': -60, 'top': 60, 'bottom': -60, 'right': 60, 'width': 480, 'height': 480}
30
+ static_params = {4000: {'l': 2747, 'c': 2747, 'COFF': 1373.5, 'CFAC': 10233137, 'LOFF': 1373.5, 'LFAC': 10233137},
31
+ 2000: {'l': 5495, 'c': 5495, 'COFF': 2747.5, 'CFAC': 20466274, 'LOFF': 2747.5, 'LFAC': 20466274},
32
+ 1000: {'l': 10991, 'c': 10991, 'COFF': 5495.5, 'CFAC': 40932549, 'LOFF': 5495.5, 'LFAC': 40932549},
33
+ 500: {'l': 21983, 'c': 21983, 'COFF': 10991.5, 'CFAC': 81865099, 'LOFF': 10991.5, 'LFAC': 81865099},
34
+ 250: {'l': 43967, 'c': 43967, 'COFF': 21983.5, 'CFAC': 163730199, 'LOFF': 21983.5, 'LFAC': 163730199}}
35
+
36
+
37
+ def getFY_coord(ld):
38
+ return Coordinate(left=ld + x_range['left'], top=x_range['top'], right=ld + x_range['right'],
39
+ bottom=x_range['bottom'], h=x_range['height'], w=x_range['width'])
40
+ # return Coordinate(left=ld - 45, top=36, right=ld + 45, bottom=-36, h=1571, w=1963)
41
+
42
+
43
+ def getFY_coord_min(ld):
44
+ return Coordinate(left=ld + min_x_range['left'], top=min_x_range['top'], right=ld + min_x_range['right'],
45
+ bottom=min_x_range['bottom'], h=min_x_range['height'], w=min_x_range['width'])
46
+
47
+
48
+ def getFY_coord_clip(area=((100, 140, 10), (20, 60, 10))):
49
+ return Coordinate(left=area[0][0], top=area[1][1], right=area[0][1], bottom=area[1][0], h=800, w=800)
50
+
51
+
52
+ # FY4星下点行列号转经纬度
53
+ def xy2coordinate(l, c, ld=105, res=4000):
54
+ ea = 6378.137
55
+ eb = 6356.7523
56
+ h = 42164
57
+ # 4000m分辨率
58
+ COFF = static_params[res]['COFF']
59
+ CFAC = static_params[res]['CFAC']
60
+ LOFF = static_params[res]['LOFF']
61
+ LFAC = static_params[res]['LFAC']
62
+
63
+ x = (np.pi * (c - COFF)) / (180 * (2 ** -16) * CFAC)
64
+ y = (np.pi * (l - LOFF)) / (180 * (2 ** -16) * LFAC)
65
+
66
+ sd_t1 = np.square(h * np.cos(x) * np.cos(y))
67
+ sd_t2 = np.square(np.cos(y)) + np.square(ea) / np.square(eb) * np.square(np.sin(y))
68
+ sd_t3 = np.square(h) - np.square(ea)
69
+ sd = np.sqrt(sd_t1 - sd_t2 * sd_t3)
70
+ sn = (h * np.cos(x) * np.cos(y) - sd) / (np.cos(y) ** 2 + ea ** 2 / eb ** 2 * np.sin(y) ** 2)
71
+
72
+ s1 = h - sn * np.cos(x) * np.cos(y)
73
+ s2 = sn * np.sin(x) * np.cos(y)
74
+ s3 = -sn * np.sin(y)
75
+ sxy = np.sqrt(s1 ** 2 + s2 ** 2)
76
+
77
+ lon = 180 / np.pi * np.arctan(s2 / s1) + ld
78
+ lat = 180 / np.pi * np.arctan(ea ** 2 / eb ** 2 * s3 / sxy)
79
+ return lon, lat
80
+
81
+
82
+ def convert_file2idx(file_name):
83
+ file_name = file_name.replace('.npy', '')
84
+ h, m = file_name[:2], file_name[2:4]
85
+ return int(h) * 4 + int(m) // 15
86
+
87
+
88
+ def get_reference(ld):
89
+ # 构造控制点列表 gcps_list
90
+ gcps_list = []
91
+ step = 50
92
+ last_corrd = []
93
+ lc_list = []
94
+ latlon_list = []
95
+ for l in range(0, 2748, step):
96
+ for c in range(0, 2748, step):
97
+ lon, lat = xy2coordinate(l, c, ld=ld)
98
+ if str(lon) == 'nan' or str(lat) == 'nan':
99
+ continue
100
+ skip_flag = False
101
+ for corrd in last_corrd:
102
+ if (corrd[0] - lon) ** 2 + (corrd[1] - lat) ** 2 <= 100:
103
+ skip_flag = True
104
+ break
105
+ if skip_flag:
106
+ continue
107
+ last_corrd.append([lon, lat])
108
+ gcps_list.append(gdal.GCP(lon, lat, 0, c, l))
109
+ lc_list.append((l, c))
110
+ latlon_list.append((lon, lat))
111
+ # 设置空间参考
112
+ # print('控制点数目:', len(gcps_list))
113
+ # print([(l, c) for l, c in lc_list])
114
+ # print([(lon, lat) for lon, lat in latlon_list])
115
+ spatial_reference = osr.SpatialReference()
116
+ spatial_reference.SetWellKnownGeogCS('WGS84')
117
+ return spatial_reference, gcps_list
118
+
119
+
120
+ def get_agri_file_path(data_path, data_date):
121
+ e_date = data_date + timedelta(minutes=14, seconds=59)
122
+ # /mnt/data1/szj/FY/downloaded_file
123
+ # FY4B-_AGRI--_N_DISK_1330E_L1-_FDI-_MULT_NOM_20221007234500_20221007235959_4000M_V0001.HDF
124
+ parent_dir = rf'{data_path}/{data_date.year}/{data_date.month}/{data_date.day}'
125
+ start_date_str = data_date.strftime('%Y%m%d%H%M%S')
126
+ end_date_str = e_date.strftime('%Y%m%d%H%M%S')
127
+ file_path = rf'{parent_dir}/FY4B-_AGRI--_N_DISK_{get_ld(data_date)}0E_L1-_FDI-_MULT_NOM_{start_date_str}_{end_date_str}_4000M_V0001.HDF'
128
+ if os.path.exists(file_path):
129
+ return file_path
130
+ else:
131
+ return None
132
+
133
+
134
+ def getNPfromHDFClip(ld, file_path, file_type='FDI', lock=None, area=((100, 140, 10), (20, 60, 10)), cache=None):
135
+ lon_d = int((ld - (area[0][0] + area[0][1]) / 2) * 20)
136
+ lat_d = int(((area[1][0] + area[1][1]) / 2) * 20)
137
+ np_data = getNPfromHDF(file_path, file_type, lock, cache=cache)
138
+ np_data = np_data[:, 800 - lat_d:1600 - lat_d, 800 - lon_d:1600 - lon_d]
139
+ return np_data
140
+
141
+
142
+ def getNPfromHDF(hdf_path, file_type='FDI', lock=None, cache=None, return_coord=False, only_coord=False):
143
+ prase_data = prase_filename(os.path.basename(hdf_path))
144
+ ld = prase_data['position']
145
+ coord = Coordinate(left=ld - 60, top=60, right=ld + 60, bottom=-60, x_res=0.05, y_res=0.05)
146
+ if only_coord:
147
+ return coord
148
+ if cache is not None:
149
+ n_data = cache.get_key_in_cache(hdf_path + file_type)
150
+ if n_data is None:
151
+ n_data = _getNPfromHDF(hdf_path, file_type=file_type, lock=lock)
152
+ cache.add_key(hdf_path + file_type, 'None' if n_data is None else n_data)
153
+ if type(n_data) == str and n_data == 'None':
154
+ if return_coord:
155
+ return None, None
156
+ else:
157
+ return None
158
+ else:
159
+ if return_coord:
160
+ return n_data, coord
161
+ else:
162
+ return n_data
163
+ else:
164
+ return _getNPfromHDF(hdf_path, file_type=file_type, lock=lock), coord
165
+
166
+
167
+ def _getNPfromHDF(hdf_path, file_type='FDI', lock=None):
168
+ file_name = hdf_path.split(os.sep)[-1]
169
+ file_info = prase_filename(file_name)
170
+ if lock:
171
+ lock.acquire()
172
+ try:
173
+ ds = nc.Dataset(hdf_path)
174
+ except:
175
+ print(f'open {hdf_path} failed')
176
+ if lock:
177
+ lock.release()
178
+ if file_type == 'FDI':
179
+ f = ds.groups['Data']
180
+ np_data = np.zeros((15, 2748, 2748), dtype=np.float32)
181
+ for i in range(1, 16):
182
+ s_i = '0' + str(i) if i < 10 else str(i)
183
+ data = np.array(f[f'NOMChannel{s_i}'][:]).astype(np.float32)
184
+ data[data > 10000] = np.nan
185
+ np_data[i - 1] = data
186
+ # np_data = np_data[6:15]
187
+ in_out_idx = [6, 15]
188
+ elif file_type == 'QPE':
189
+ np_data = np.array(ds['Precipitation'][:]).astype(np.float32)
190
+ np_data = np_data[np.newaxis, :]
191
+ in_out_idx = [0, 1]
192
+ else:
193
+ np_data = None
194
+ in_out_idx = None
195
+ raise Exception(rf'file_type {file_type} err')
196
+ ds.close()
197
+ r = reference_cache.get_key_in_cache(file_info['position'])
198
+ if r is None:
199
+ print(f'get reference of {file_info["position"]}')
200
+ r = reference_cache.add_key(file_info['position'], get_reference(ld=file_info['position']))
201
+
202
+ np_data = _getNPfromHDF_worker(np_data, file_info['start'], r=r, ld=file_info['position'], to_file=False,
203
+ in_out_idx=in_out_idx)
204
+ return np_data
205
+
206
+
207
+ def _getNPfromHDF_worker(read_np_data, current_date, data_dir=None, ld=None, r=None, to_file=True, in_out_idx=(6, 15)):
208
+ tmp_dir = 'make_temp' + str(randint(10000000, 99999999))
209
+ file = current_date.strftime("%H%M") + '.npy'
210
+ os.makedirs(tmp_dir, exist_ok=True)
211
+ save_path = f'{current_date.year}{current_date.month}{current_date.day}{file.replace(".npy", "")}'
212
+ np2tif(read_np_data, tmp_dir, save_path, print_log=False)
213
+ in_idx, out_idx = in_out_idx
214
+ np_data = np.zeros((out_idx - in_idx, x_range['height'], x_range['width']), dtype=np.float16)
215
+ for i in range(in_idx, out_idx):
216
+ out_path = f'{tmp_dir}/{save_path}-{i}-ctrl.tif'
217
+ _registration(f'{tmp_dir}/{save_path}-{i}.tif', out_path, ld, *r)
218
+ img = cv2.imread(out_path, -1)
219
+ if np.isnan(img).any():
220
+ shutil.rmtree(tmp_dir)
221
+ return None
222
+ np_data[i - in_idx] = img.astype(np.float16)
223
+ # raise Exception('manual stop')
224
+ shutil.rmtree(tmp_dir)
225
+ if to_file:
226
+ os.makedirs(f'{data_dir}/dataset/{current_date.year}/{current_date.month}/{current_date.day}', exist_ok=True)
227
+ np.save(
228
+ f'{data_dir}/dataset/{current_date.year}/{current_date.month}/{current_date.day}/{convert_file2idx(file)}',
229
+ np_data)
230
+ else:
231
+ return np_data
232
+
233
+
234
+ # 解析FY文件的文件名
235
+ def prase_filename(filename):
236
+ m_list = filename.replace('.HDF', '').split('_')
237
+ # FY4B-_AGRI--_N_DISK_1050E_L1-_FDI-_MULT_NOM_20250606171500_20250606172959_4000M_V0001.HDF
238
+ return {'satellite': m_list[0], 'sensor': m_list[1], 'area': m_list[3], 'position': round(float(m_list[4][:3]), 2),
239
+ 'file_level': m_list[5], 'data_name': m_list[6], 'start': datetime.strptime(m_list[9], '%Y%m%d%H%M%S'),
240
+ 'end': datetime.strptime(m_list[10], '%Y%m%d%H%M%S'), 'resolution': m_list[11]}
241
+
242
+
243
+ def _registration(input_path, out_path, ld, spatial_reference, gcps_list):
244
+ """
245
+ 基于python GDAL配准
246
+ :param input_path: 需要配准的栅格文件
247
+ :param out_path: 输出配准后的栅格文件位置
248
+ :param top_left: 左上角坐标
249
+ :param bottom_right: 右下角坐标
250
+ :param ik: 行空白分辨率
251
+ :param jk: 列空白分辨率
252
+ :return:
253
+ """
254
+ # 打开栅格文件
255
+ dataset = gdal.Open(input_path, gdal.GA_Update)
256
+ # 添加控制点
257
+ dataset.SetGCPs(gcps_list, spatial_reference.ExportToWkt())
258
+ # tps校正 重采样:最邻近法
259
+ gdal.Warp(out_path, dataset,
260
+ format='GTiff',
261
+ outputBounds=[ld + x_range['left'], x_range['bottom'], ld + x_range['right'], x_range['top']],
262
+ resampleAlg=gdal.GRIORA_NearestNeighbour,
263
+ width=x_range['width'],
264
+ height=x_range['height'],
265
+ tps=True,
266
+ dstSRS='EPSG:4326')
267
+
268
+
269
+ def _prase_nc_worker(root_path, target_dir, file, lock=None):
270
+ ps = prase_filename(file)
271
+ file_name = ps["start"].strftime("%H%M")
272
+ hdf_path = f'{root_path}/{target_dir}/{ps["start"].year}/{ps["start"].month}/{ps["start"].day}/{file}'
273
+ save_path = f'{root_path}/npy/{ps["start"].year}/{ps["start"].month}/{ps["start"].day}'
274
+ if not os.path.exists(hdf_path):
275
+ tqdm.write(f'## {ps["start"].strftime("%Y%m%d %H:%M")} None ##')
276
+ return False
277
+ if os.path.exists(f'{save_path}/{convert_file2idx(file_name)}.npy'):
278
+ tqdm.write(f'## {ps["start"].strftime("%Y%m%d %H:%M")} already exist ##')
279
+ return True
280
+ try:
281
+ n_data = getNPfromHDF(hdf_path, lock=lock)
282
+ os.makedirs(save_path, exist_ok=True)
283
+ if n_data is None:
284
+ tqdm.write(f'## {ps["start"].strftime("%Y%m%d %H:%M")} None ##')
285
+ return False
286
+ # raise Exception(f'nan in cliped {hdf_path}')
287
+ np.save(f'{save_path}/{convert_file2idx(file_name)}', n_data)
288
+ except Exception as e:
289
+ # raise e
290
+ tqdm.write(f'## {ps["start"].strftime("%Y%m%d %H:%M")} err ##')
291
+ return False
292
+ tqdm.write(f'{ps["start"].strftime("%Y%m%d %H:%M")} down')
293
+ return True
294
+
295
+
296
+ # 把FY4的HDF文件转为npy文件
297
+ def make_fynp(root_path, target_dir, time_set):
298
+ mt = MultiTasks(40)
299
+ for current_date in time_set:
300
+ for file in os.listdir(
301
+ f'{root_path}/{target_dir}/{current_date.year}/{current_date.month}/{current_date.day}'):
302
+ if file.endswith('.HDF'):
303
+ mt.add_task(file, _prase_nc_worker, [root_path, target_dir, file, mt.thread_mutex])
304
+ # _prase_nc_worker(root_path, target_dir, file, None)
305
+ err_list = []
306
+ for key, flag in mt.execute_task().items():
307
+ if not flag:
308
+ err_list.append(key + '\n')
309
+ with open(f'err.log', 'w') as f:
310
+ f.writelines(err_list)
311
+ print(f'all done, {len(err_list)} in err.log')
312
+
313
+
314
+ def get_ld(data):
315
+ if data < datetime(2024, 3, 1):
316
+ return 133
317
+ else:
318
+ return 105
319
+
320
+
321
+ # 根据日期获取星下点位置
322
+ def get_filename_by_date(file_date):
323
+ ld = get_ld(file_date)
324
+ filename = rf'FY4B-_AGRI--_N_DISK_{ld}E_L1-_FDI-_MULT_NOM_{file_date.strftime("%Y%m%d%H%M%S")}_{(file_date + timedelta(minutes=14, seconds=59)).strftime("%Y%m%d%H%M%S")}_4000M_V0001.HDF'
325
+ return filename
326
+
327
+
328
+ if __name__ == '__main__':
329
+ # getNPfromHDF(rf'FY4B-_AGRI--_N_DISK_1330E_L2-_QPE-_MULT_NOM_20220702121500_20220702122959_4000M_V0001.NC',
330
+ # file_type='QPE')
331
+ np_data = getNPfromHDF(
332
+ rf'FY4B-_AGRI--_N_DISK_1330E_L1-_FDI-_MULT_NOM_20221230030000_20221230031459_4000M_V0001.HDF')
333
+ np2tif(np_data, save_path='.', out_name='FY4B', left=133 - 60, top=60, x_res=0.05, y_res=0.05, dtype=np.float32)
@@ -0,0 +1,161 @@
1
+ import os
2
+ from datetime import datetime, timedelta
3
+ import netCDF4 as nc
4
+ import numpy as np
5
+ from einops import rearrange, repeat
6
+ from jacksung.utils.data_convert import np2tif, get_transform_from_lonlat_matrices, Coordinate
7
+ import xarray as xr
8
+ from pyresample import create_area_def, kd_tree
9
+ from pyresample.geometry import AreaDefinition
10
+ from jacksung.utils.time import Stopwatch
11
+ from jacksung.utils.cache import Cache
12
+
13
+
14
+ def get_resample_infos(hdf_path, lock=None, cache=None):
15
+ if lock:
16
+ lock.acquire()
17
+ ds = nc.Dataset(hdf_path)
18
+ if lock:
19
+ lock.release()
20
+ ld = float(ds['nominal_satellite_subpoint_lon'][:])
21
+ ld = round(ld, 2)
22
+ if cache:
23
+ cache_result = cache.get_key_in_cache(ld)
24
+ if cache_result is not None:
25
+ return cache_result
26
+ # 原始GEOS投影
27
+ goes_proj_str = ds['goes_imager_projection']
28
+ h = float(goes_proj_str.perspective_point_height)
29
+ x = ds['x'][:] * h # 投影x坐标 (radians)
30
+ y = ds['y'][:] * h # 投影y坐标 (radians)
31
+ # 计算投影范围 (根据x, y的边界)
32
+ half_pixel_width = (x[1] - x[0]) / 2.0
33
+ half_pixel_height = (y[1] - y[0]) / 2.0
34
+ area_extent = (x[0] - half_pixel_width, y[-1] - half_pixel_height,
35
+ x[-1] + half_pixel_width, y[0] + half_pixel_height)
36
+ goes_area = AreaDefinition(
37
+ area_id='goes_fixed_grid', proj_id='goes_geos', description='GOES Fixed Grid',
38
+ projection={
39
+ 'proj': 'geos',
40
+ 'lon_0': goes_proj_str.longitude_of_projection_origin,
41
+ 'h': goes_proj_str.perspective_point_height,
42
+ 'x_0': 0,
43
+ 'y_0': 0,
44
+ 'a': goes_proj_str.semi_major_axis,
45
+ 'b': goes_proj_str.semi_minor_axis,
46
+ 'sweep': goes_proj_str.sweep_angle_axis
47
+ },
48
+
49
+ width=len(x), height=len(y), area_extent=area_extent)
50
+ left = ld - 60
51
+ right = ld + 60
52
+ target_areas = []
53
+ if left < -180:
54
+ # 跨越180度经线,分两部分重采样
55
+ # 左半部分
56
+ target_area_left = create_area_def(
57
+ area_id='wgs84_left', projection='EPSG:4326', area_extent=[left + 360, -60, 180, 60],
58
+ resolution=(0.05, 0.05), units='degrees')
59
+ target_areas.append(target_area_left)
60
+ # 右半部分
61
+ target_area_right = create_area_def(
62
+ area_id='wgs84_right', projection='EPSG:4326', area_extent=[-180, -60, right, 60],
63
+ resolution=(0.05, 0.05), units='degrees')
64
+ target_areas.append(target_area_right)
65
+ else:
66
+ target_area = create_area_def(
67
+ area_id='wgs84', projection='EPSG:4326', area_extent=[left, -60, right, 60],
68
+ resolution=(0.05, 0.05), units='degrees')
69
+ target_areas.append(target_area)
70
+ resample_infos = []
71
+ for target_area in target_areas:
72
+ # 使用最近邻法重采样,对于分类数据;对于连续数据,可以使用 ‘bilinear’
73
+ resample_infos.append(
74
+ kd_tree.get_neighbour_info(goes_area, target_area, radius_of_influence=5000, neighbours=1))
75
+ if cache:
76
+ cache.add_key(ld, resample_infos)
77
+ return resample_infos
78
+
79
+
80
+ def getSingleChannelNPfromHDF(hdf_path, lock=None, return_coord=False, only_coord=False, resample_infos=None):
81
+ if lock:
82
+ lock.acquire()
83
+ ds = nc.Dataset(hdf_path)
84
+ if lock:
85
+ lock.release()
86
+ ld = float(ds['nominal_satellite_subpoint_lon'][:])
87
+ np_data = np.array(ds['Rad'][:]).astype(np.float32)
88
+ ld = round(ld, 2)
89
+ left = ld - 60
90
+ right = ld + 60
91
+ coord = Coordinate(left=left, bottom=-60, right=right, top=60, x_res=0.05, y_res=0.05)
92
+ if only_coord:
93
+ return coord
94
+ np_datas = []
95
+ if resample_infos is None:
96
+ resample_infos = get_resample_infos(hdf_path, lock=lock)
97
+ for info in resample_infos:
98
+ valid_input_index, valid_output_index, index_array, distance_array = info
99
+ # 使用最近邻法重采样,对于分类数据;对于连续数据,可以使用 ‘bilinear’
100
+ results = kd_tree.get_sample_from_neighbour_info(
101
+ 'nn', output_shape=(coord.h, int(len(info[1]) / 2400)), data=np_data, valid_input_index=valid_input_index,
102
+ valid_output_index=valid_output_index, index_array=index_array, fill_value=np.nan)
103
+ np_datas.append(results)
104
+ # 合并两部分数据
105
+ np_data = np.concatenate(np_datas, axis=1)
106
+ if return_coord:
107
+ return np_data, coord
108
+ else:
109
+ return np_data
110
+
111
+
112
+ def get_filename_by_date_from_dir(dir_path, date, satellite='G18'):
113
+ file_lists = {}
114
+ for file in os.listdir(dir_path):
115
+ if not file.endswith('.nc'):
116
+ continue
117
+ splits = file.split('_')
118
+ year = int(splits[3][1:5])
119
+ doy = int(splits[3][5:8])
120
+ hour = int(splits[3][8:10])
121
+ minute = int(splits[3][10:12])
122
+ file_date = datetime(year=year, month=1, day=1) + timedelta(days=doy - 1, hours=hour, minutes=minute)
123
+ if date == file_date and splits[2] == satellite:
124
+ file_lists[int(splits[1].split('-')[3][3:])] = file
125
+ return file_lists
126
+
127
+
128
+ def getNPfromDir(dir_path, date, satellite='G18', lock=None, return_coord=False, infos=None, cache=None):
129
+ np_data = None
130
+ coord = None
131
+ data_channel_count = 0
132
+ files = get_filename_by_date_from_dir(dir_path, date, satellite)
133
+ for channel, file in files.items():
134
+ if infos is None:
135
+ infos = get_resample_infos(os.path.join(dir_path, file), lock=lock, cache=cache)
136
+ channel_data, coord = getSingleChannelNPfromHDF(
137
+ os.path.join(dir_path, file), return_coord=True, resample_infos=infos)
138
+ if channel_data is None:
139
+ raise Exception(f"文件{file},通道 {channel} 数据获取失败")
140
+ if np_data is None:
141
+ np_data = np.full([9] + list(channel_data.shape), np.nan)
142
+ np_data[channel - 8] = channel_data
143
+ data_channel_count += 1
144
+ if data_channel_count < 9:
145
+ raise Exception(
146
+ f"文件夹{dir_path}中,卫星 {satellite} 在时间 {date} 的数据通道不完整,仅获取到 {data_channel_count} 个通道")
147
+ if return_coord:
148
+ return np_data, coord
149
+ else:
150
+ return np_data
151
+
152
+
153
+ if __name__ == '__main__':
154
+ np_data = getNPfromDir(rf'D:\python_Project\Huayu_Global\file_download\2022\12\30',
155
+ datetime(year=2022, month=12, day=30, hour=3, minute=0), satellite='G18')
156
+ np2tif(np_data, save_path='test_goes', out_name='GOES18',
157
+ left=-137 - 60, top=60, x_res=0.05, y_res=0.05, dtype=np.float32)
158
+ np_data = getNPfromDir(rf'D:\python_Project\Huayu_Global\file_download\2022\12\30',
159
+ datetime(year=2022, month=12, day=30, hour=3, minute=0), satellite='G16')
160
+ np2tif(np_data, save_path='test_goes', out_name='GOES16',
161
+ left=-75.2 - 60, top=60, x_res=0.05, y_res=0.05, dtype=np.float32)
@@ -0,0 +1,24 @@
1
+ import netCDF4 as nc
2
+ import numpy as np
3
+ from einops import rearrange, repeat
4
+ from jacksung.utils.data_convert import np2tif, get_transform_from_lonlat_matrices
5
+
6
+
7
+ def getNPfromHDF(hdf_path, lock=None, save_file=True):
8
+ if lock:
9
+ lock.acquire()
10
+ ds = nc.Dataset(hdf_path)
11
+ if lock:
12
+ lock.release()
13
+ np_data = np.array(ds['hourlyPrecipRateGC'][:]).astype(np.float32)[0]
14
+ ds.close()
15
+ np_data[np_data < 0] = np.nan
16
+ np_data = np_data[::-1, :]
17
+ if save_file:
18
+ np2tif(np_data, save_path='np2tif_dir', out_name='gsmap', dtype='float32',
19
+ left=-180, top=90, x_res=0.1, y_res=0.1)
20
+ return np_data
21
+
22
+
23
+ if __name__ == '__main__':
24
+ getNPfromHDF(rf'D:\python_Project\Huayu_Global\file_download\gsmap_now_rain.20220702.0300.nc')
@@ -0,0 +1,159 @@
1
+ from jacksung.utils.data_convert import nc2np, np2tif
2
+ import numpy as np
3
+ import netCDF4 as nc
4
+ from einops import rearrange
5
+ import os
6
+ import shutil
7
+ import requests
8
+ import time
9
+ from selenium import webdriver
10
+ from selenium.webdriver.chrome.service import Service
11
+ from selenium.webdriver.common.by import By
12
+ import platform
13
+
14
+
15
+ class Downloader:
16
+ def __init__(self, download_file_path, username,passwd,save_path=None):
17
+ self.download_file_path = download_file_path
18
+ if save_path is not None:
19
+ self.save_path = save_path
20
+ else:
21
+ if platform.system().lower() == 'windows':
22
+ self.save_path = 'D:\\imerg'
23
+ else:
24
+ self.save_path = '/mnt/data1/szj/imerg'
25
+ self.username=username
26
+ self.passwd=passwd
27
+
28
+ def make_driver(self, url, is_headless=False, tmp_path=None, download_dir=None):
29
+ options = webdriver.ChromeOptions()
30
+ if tmp_path:
31
+ options.add_argument("crash-dumps-dir=" + tmp_path)
32
+ options.add_argument("--no-sandbox")
33
+ # options.add_argument("--auto-open-devtools-for-tabs")
34
+ options.add_argument('--disable-dev-shm-usage')
35
+ options.add_argument("--disable-web-security") # 禁用Web安全
36
+ options.add_argument("--allow-running-insecure-content") # 允许不安全的内容
37
+ options.add_argument('--user-agent=Mozilla/5.0')
38
+ options.add_argument('--ignore-ssl-errors=yes')
39
+ options.add_argument('--allow-insecure-localhost')
40
+ options.add_argument('--ignore-certificate-errors')
41
+ options.add_argument("--lang=zh-CN") # 将语言设置为简体中文,英文为en-US
42
+ options.add_experimental_option("detach", True)
43
+ if download_dir:
44
+ options.add_experimental_option("prefs", {
45
+ "download.default_directory": download_dir,
46
+ # "download.prompt_for_download": False,
47
+ # "download.directory_upgrade": True,
48
+ # "safebrowsing.enabled": True
49
+ })
50
+ options.set_capability('pageLoadStrategy', 'none')
51
+ options.set_capability("unhandledPromptBehavior", "accept")
52
+ if is_headless:
53
+ options.add_argument('--headless') # 浏览器隐式启动
54
+ # driver_path = os.path.expanduser("~/chrome/chromedriver.exe")
55
+ print('driver is going to init!')
56
+ if platform.system().lower() == 'windows':
57
+ driver_path = None
58
+ # driver_path = os.path.expanduser("~/chrome/chromedriver.exe")
59
+ else:
60
+ driver_path = os.path.expanduser("~/chrome/chromedriver")
61
+ driver = webdriver.Chrome(service=Service(driver_path) if driver_path else None, options=options)
62
+ # driver.maximize_window()
63
+ driver.implicitly_wait(10)
64
+ driver.set_page_load_timeout(10)
65
+ print('driver is inited!')
66
+ print(f'请求地址:{url}')
67
+ driver.get(url)
68
+ return driver
69
+
70
+ # 进度条模块
71
+ def progressbar(self, url, path):
72
+ if not os.path.exists(path): # 看是否有该文件夹,没有则创建文件夹
73
+ os.mkdir(path)
74
+ start = time.time() # 下载开始时间
75
+ response = requests.get(url, stream=True)
76
+ # https://data.gesdisc.earthdata.nasa.gov/data/GPM_L3/GPM_3IMERGHH.07/2023/001/3B-HHR.MS.MRG.3IMERG.20230101-S000000-E002959.0000.V07B.HDF5
77
+ name = url. \
78
+ replace(
79
+ 'https://data.gesdisc.earthdata.nasa.gov/data/GPM_L3/GPM_3IMERGHH.07/2023/001/3B-HHR.MS.MRG.3IMERG.',
80
+ '').replace('.0000.V07B.HDF5', '.HDF5')
81
+ size = 0 # 初始化已下载大小
82
+ chunk_size = 1024 # 每次下载的数据大小
83
+ content_size = int(response.headers['content-length']) # 下载文件总大小
84
+ if response.status_code == 200: # 判断是否响应成功
85
+ print('Start download,[File size]:{size:.2f} MB'.format(
86
+ size=content_size / chunk_size / 1024)) # 开始下载,显示下载文件大小
87
+ filepath = rf'{path}\{name}' # 设置图片name,注:必须加上扩展名
88
+ with open(filepath, 'wb') as file: # 显示进度条
89
+ for data in response.iter_content(chunk_size=chunk_size):
90
+ file.write(data)
91
+ size += len(data)
92
+ print('\r' + '[下载进度]:%s%.2f%%' % (
93
+ '>' * int(size * 50 / content_size), float(size / content_size * 100)), end=' ')
94
+ end = time.time() # 下载结束时间
95
+ print('Download completed!,times: %.2f秒' % (end - start)) # 输出下载用时时间
96
+
97
+ def simulate(self, url, driver, path):
98
+ start = time.time() # 下载开始时间
99
+ driver.get(url)
100
+ while not os.path.exists(path):
101
+ time.sleep(1)
102
+ # 3B-HHR-E.MS.MRG.3IMERG.20230101-S033000-E035959.0210.V07B.HDF5
103
+ names = path.split('/')[-1].split('.')
104
+ date = names[4].split('-')[0]
105
+ move_path = path.replace('.'.join(names[:4]), f'{date}{os.path.sep}{".".join(names[:4])}')
106
+ if not os.path.exists(os.path.dirname(move_path)):
107
+ os.makedirs(os.path.dirname(move_path))
108
+ shutil.move(path, move_path)
109
+ end = time.time() # 下载结束时间
110
+ print('Download completed!,times: %.2f秒' % (end - start)) # 输出下载用时时间
111
+
112
+ def start_download(self):
113
+ download_file_path = self.download_file_path
114
+ print(f'开始下载:{download_file_path}')
115
+ f = open(download_file_path, 'r')
116
+ save_f = open('downloaded.txt', 'r')
117
+
118
+ downloaded_list = save_f.readlines()
119
+ downloaded_list = [u.replace('\n', '') for u in downloaded_list if u.count('.pdf') == 0]
120
+ save_f.close()
121
+ with open('downloaded.txt', 'a') as save_f_w:
122
+ driver = self.make_driver('https://urs.earthdata.nasa.gov', download_dir=self.save_path, is_headless=True)
123
+ time.sleep(10)
124
+ username = driver.find_element(By.ID, 'username')
125
+ passwd = driver.find_element(By.ID, 'password')
126
+ username.send_keys(self.username)
127
+ passwd.send_keys(self.passwd)
128
+ driver.find_element(By.NAME, 'commit').click()
129
+ time.sleep(5)
130
+ for line in f.readlines():
131
+ url = line.strip()
132
+ if line.replace('\n', '') not in downloaded_list:
133
+ print(url)
134
+ file_path = self.save_path + os.sep + url.split('/')[-1]
135
+ try:
136
+ self.simulate(url, driver, file_path)
137
+ save_f_w.write(url + '\n')
138
+ except Exception as e:
139
+ print(f'{url} download failed')
140
+ else:
141
+ print(f'{url} already downloaded')
142
+ driver.close()
143
+
144
+
145
+ def getNPfromHDF(hdf_path, lock=None, save_file=True):
146
+ if lock:
147
+ lock.acquire()
148
+ ds = nc.Dataset(hdf_path)
149
+ if lock:
150
+ lock.release()
151
+ np_data = np.array(ds.groups['Grid']['precipitation'][:]).astype(np.float32)
152
+ np_data = np_data[np.newaxis, :]
153
+ ds.close()
154
+ np_data = rearrange(np_data[0][0], 'w h->h w')[::-1, :]
155
+ np_data[np_data < 0] = 0
156
+ if save_file:
157
+ np2tif(np_data, save_path='np2tif_dir', left=-180, top=90, x_res=0.1, y_res=0.1, out_name='IMERG',
158
+ dtype='float32')
159
+ return np_data