metradar 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. metradar/__init__.py +7 -0
  2. metradar/cnrad_level2.py +1326 -0
  3. metradar/comm_func.py +135 -0
  4. metradar/construct_aws_refvpr_mainprog.py +515 -0
  5. metradar/construct_aws_refvpr_mainprog_cams.py +310 -0
  6. metradar/construct_aws_refvpr_mainprog_datan3d.py +386 -0
  7. metradar/construct_aws_refvpr_mainprog_swan.py +306 -0
  8. metradar/decode_fmt_pyart.py +200 -0
  9. metradar/decode_pup_rose.py +1993 -0
  10. metradar/draw_mosaic_new.py +421 -0
  11. metradar/draw_radar_aws_jilin_new.py +206 -0
  12. metradar/draw_radar_comp_func.py +1379 -0
  13. metradar/exceptions.py +50 -0
  14. metradar/geo_transforms_pyart.py +627 -0
  15. metradar/get_cross_section_from_pyart.py +354 -0
  16. metradar/get_tlogp_from_sharppy.py +93 -0
  17. metradar/grid.py +281 -0
  18. metradar/grid_data.py +64 -0
  19. metradar/main_pydda.py +653 -0
  20. metradar/make_gif.py +24 -0
  21. metradar/make_mosaic_mp_archive.py +538 -0
  22. metradar/mosaic_merge.py +64 -0
  23. metradar/mosaic_quickdraw.py +338 -0
  24. metradar/nowcast_by_pysteps.py +219 -0
  25. metradar/oa_couhua.py +166 -0
  26. metradar/oa_dig_func.py +955 -0
  27. metradar/parse_pal.py +148 -0
  28. metradar/pgmb_io.py +169 -0
  29. metradar/prepare_for_radar_draw.py +197 -0
  30. metradar/read_new_mosaic.py +33 -0
  31. metradar/read_new_mosaic_func.py +231 -0
  32. metradar/retrieve_cmadaas.py +3126 -0
  33. metradar/retrieve_micaps_server.py +2061 -0
  34. metradar/rose_structer.py +807 -0
  35. metradar/trans_nc_pgmb.py +62 -0
  36. metradar/trans_new_mosaic_nc.py +309 -0
  37. metradar/trans_polor2grid_func.py +203 -0
  38. metradar-0.1.0.dist-info/METADATA +12 -0
  39. metradar-0.1.0.dist-info/RECORD +41 -0
  40. metradar-0.1.0.dist-info/WHEEL +5 -0
  41. metradar-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2061 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # Copyright (c) 2019 NMC Developers.
4
+ # Distributed under the terms of the GPL V3 License.
5
+
6
+ """
7
+ This is the retrieve module which get data from MICAPS cassandra service
8
+ with Python API.
9
+ Checking url, like:
10
+ http://10.32.8.164:8080/DataService?requestType=getLatestDataName&directory=ECMWF_HR/TMP/850&fileName=&filter=*.024
11
+ """
12
+
13
+ import warnings
14
+ import re
15
+ import http.client
16
+ import urllib.parse
17
+ import pickle
18
+ import bz2
19
+ from io import BytesIO
20
+ from datetime import datetime, timedelta
21
+ import numpy as np
22
+ import xarray as xr
23
+ import pandas as pd
24
+ from tqdm import tqdm
25
+ import DataBlock_pb2
26
+ import config as CONFIG
27
+ import os
28
+ # add by zwj 20210311
29
+ import struct
30
+ def _structure_size(structure):
31
+ """ Find the size of a structure in bytes. """
32
+ return struct.calcsize('<' + ''.join([i[1] for i in structure]))
33
+
34
+ def _unpack_from_buf(buf, pos, structure):
35
+ """ Unpack a structure from a buffer. """
36
+ size = _structure_size(structure)
37
+ return _unpack_structure(buf[pos:pos + size], structure)
38
+
39
+ def _unpack_structure(string, structure):
40
+ """ Unpack a structure from a string """
41
+ fmt = '<' + ''.join([i[1] for i in structure]) # little-endian
42
+ lst = struct.unpack(fmt, string)
43
+ return dict(zip([i[0] for i in structure], lst))
44
+
45
+ def get_http_result(host, port, url):
46
+ """
47
+ Get the http contents.
48
+ """
49
+
50
+ http_client = None
51
+ try:
52
+ http_client = http.client.HTTPConnection(host, port, timeout=120)
53
+ http_client.request('GET', url)
54
+ response = http_client.getresponse()
55
+ return response.status, response.read()
56
+ except Exception as e:
57
+ print(e)
58
+ return 0,
59
+ finally:
60
+ if http_client:
61
+ http_client.close()
62
+
63
+ gds_element_id_dict ={
64
+ "经度": 1,
65
+ "纬度": 2,
66
+ "测站高度": 3,
67
+ "测站级别": 4,
68
+ "测站类型": 5,
69
+ "气压传感器海拔高度": 6,
70
+ "温湿传感器离地面高度": 7,
71
+ "温湿传感器距水面高度": 8,
72
+ "风速传感器距地面高度": 9,
73
+ "风传感器距甲板平台高度": 10,
74
+ "风速传感器距水面高度": 11,
75
+ "移动平台移动方向": 12,
76
+ "移动平台移动速度": 13,
77
+ "海盐传感器距海面深度": 14,
78
+ "浪高传感器距海面高度": 15,
79
+ "浮标方位": 16,
80
+ "总水深": 17,
81
+ "海面_水面以下深度": 18,
82
+ "船面距海面高度": 19,
83
+ "方位或方位角": 20,
84
+ "字符型站名": 21,
85
+ "风向": 201,
86
+ "风速": 203,
87
+ "平均风向_1分钟": 205,
88
+ "平均风速_1分钟": 207,
89
+ "平均风向_2分钟": 209,
90
+ "平均风速_2分钟": 211,
91
+ "平均风向_10分钟": 213,
92
+ "平均风速_10分钟": 215,
93
+ "最大风速的风向": 217,
94
+ "最大风速": 219,
95
+ "瞬时风向": 221,
96
+ "瞬时风速": 223,
97
+ "极大风速的风向": 225,
98
+ "极大风速": 227,
99
+ "过去6小时极大瞬时风速的风向": 229,
100
+ "过去6小时极大瞬时风速": 231,
101
+ "过去12小时极大瞬时风速的风向": 233,
102
+ "过去12小时极大瞬时风速": 235,
103
+ "风力": 237,
104
+ "海平面气压": 401,
105
+ "变压_3小时": 403,
106
+ "变压_24小时": 405,
107
+ "本站气压": 407,
108
+ "最高气压": 409,
109
+ "最低气压": 411,
110
+ "气压": 413,
111
+ "日平均气压": 415,
112
+ "日平均海平面气压": 417,
113
+ "高度_探空": 419,
114
+ "位势高度_探空": 421,
115
+ "温度": 601,
116
+ "最高气温": 603,
117
+ "最低气温": 605,
118
+ "变温_24小时": 607,
119
+ "过去24小时最高气温": 609,
120
+ "过去24小时最低气温": 611,
121
+ "日平均气温": 613,
122
+ "露点温度": 801,
123
+ "温度露点差": 803,
124
+ "相对湿度": 805,
125
+ "最小相对湿度": 807,
126
+ "日平均相对湿度": 809,
127
+ "水汽压": 811,
128
+ "日平均水汽压": 813,
129
+ "降水量": 1001,
130
+ "降水_1小时": 1003,
131
+ "降水_3小时": 1005,
132
+ "降水_6小时": 1007,
133
+ "降水_12小时": 1009,
134
+ "降水_24小时": 1011,
135
+ "日总降水": 1013,
136
+ "降水量_20_08时": 1015,
137
+ "降水量_08_20时": 1017,
138
+ "降水量_20_20时": 1019,
139
+ "降水量_08_08时": 1021,
140
+ "蒸发": 1023,
141
+ "蒸发_大型": 1025,
142
+ "可降水分_预报降水量": 1027,
143
+ "平均水平能见度_1分钟": 1201,
144
+ "平均水平能见度_10分钟": 1203,
145
+ "最小水平能见度": 1205,
146
+ "水平能见度_人工": 1207,
147
+ "总云量": 1401,
148
+ "低云量": 1403,
149
+ "云底高度": 1405,
150
+ "低云状": 1407,
151
+ "中云状": 1409,
152
+ "高云状": 1411,
153
+ "日平均总云量": 1413,
154
+ "日平均低云量": 1415,
155
+ "云量_低云或中云": 1417,
156
+ "云类型 ": 1419,
157
+ "现在天气": 1601,
158
+ "过去天气1": 1603,
159
+ "过去天气2": 1605,
160
+ "龙卷类型": 1801,
161
+ "龙卷所在方位": 1803,
162
+ "最大冰雹直径": 1805,
163
+ "雷暴": 1807,
164
+ "电流强度_闪电定位": 1809,
165
+ "地面温度": 2001,
166
+ "最高地面温度": 2003,
167
+ "最低地面温度": 2005,
168
+ "过去12小时最低地面温度": 2007,
169
+ "地温_5cm": 2009,
170
+ "地温_10cm": 2011,
171
+ "地温_15cm": 2013,
172
+ "地温_20cm": 2015,
173
+ "地温_40cm": 2017,
174
+ "地温_80cm": 2019,
175
+ "地温_160cm": 2021,
176
+ "地温_320cm": 2023,
177
+ "草面_雪面温度": 2025,
178
+ "草面_雪面最高温度": 2027,
179
+ "草面_雪面最低温度": 2029,
180
+ "日平均地面温度": 2031,
181
+ "日平均5cm地温": 2033,
182
+ "日平均10cm地温": 2035,
183
+ "日平均15cm地温": 2037,
184
+ "日平均20cm地温": 2039,
185
+ "日平均40cm地温": 2041,
186
+ "日平均80cm地温": 2043,
187
+ "日平均160cm地温": 2045,
188
+ "日平均320cm地温": 2047,
189
+ "日平均草面_雪面温度": 2049,
190
+ "地面状态": 2201,
191
+ "积雪深度": 2203,
192
+ "雪压": 2205,
193
+ "电线积冰直径": 2207,
194
+ "电线积冰_现象": 2209,
195
+ "电线积冰_南北方向直径": 2211,
196
+ "电线积冰_南北方向厚度": 2213,
197
+ "电线积冰_南北方向重量": 2215,
198
+ "电线积冰_东西方向直径": 2217,
199
+ "电线积冰_东西方向厚度": 2219,
200
+ "电线积冰_东西方向重量": 2221,
201
+ "船上结冰原因": 2223,
202
+ "船上结冰厚度": 2225,
203
+ "船上结冰速度": 2227,
204
+ "海冰密集度": 2229,
205
+ "冰情发展": 2231,
206
+ "冰总量和类型": 2233,
207
+ "冰缘方位": 2235,
208
+ "冰情": 2237,
209
+ "最高气压出现时间": 10001,
210
+ "最低气压出现时间": 10003,
211
+ "最高气温出现时间": 10005,
212
+ "最低气温出现时间": 10007,
213
+ "最小相对湿度出现时间": 10009,
214
+ "最大风速出现时间": 10011,
215
+ "极大风速出现时间": 10013,
216
+ "最高地面温度出现时间": 10015,
217
+ "最低地面温度出现时间": 10017,
218
+ "草面_雪面最低温度出现时间": 10019,
219
+ "草面_雪面最高温度出现时间": 10021,
220
+ "最小水平能见度出现时间": 10023,
221
+ "天气出现时间": 10025,
222
+ "海表最高温度出现时间": 10027,
223
+ "海表最低温度出现时间": 10029,
224
+ "最大波高出现时间": 10031,
225
+ "风速表类型": 2401,
226
+ "湿球温度测量方法": 2403,
227
+ "海面温度测量方法": 2405,
228
+ "洋流测量方法": 2407,
229
+ "气压倾向特征": 2409,
230
+ "海面温度": 2601,
231
+ "湿球温度": 2603,
232
+ "海面盐度": 2605,
233
+ "海表最高温度": 2607,
234
+ "海表最低温度": 2609,
235
+ "海水温度": 2611,
236
+ "海水盐度": 2613,
237
+ "海面海流方向": 2801,
238
+ "海面海流速度": 2803,
239
+ "洋流方向和速度的平均周期": 2805,
240
+ "表层海洋面流速": 2807,
241
+ "表层海洋面波向": 2809,
242
+ "海流方向": 2811,
243
+ "海流速度": 2813,
244
+ "波浪方向": 3001,
245
+ "波浪周期": 3003,
246
+ "波浪高度": 3005,
247
+ "风浪方向": 3007,
248
+ "风浪周期": 3009,
249
+ "风浪高度": 3011,
250
+ "第一涌浪方向": 3013,
251
+ "第一涌浪周期": 3015,
252
+ "第一涌浪高度": 3017,
253
+ "第二涌浪方向": 3019,
254
+ "第二涌浪周期": 3021,
255
+ "第二涌浪高度": 3023,
256
+ "有效波高": 3025,
257
+ "有效波高的周期": 3027,
258
+ "平均波高": 3029,
259
+ "平均波周期": 3031,
260
+ "最大波高": 3033,
261
+ "最大波高的周期": 3035,
262
+ "人工测量浪高": 3037,
263
+ "仪器测量浪高": 3039,
264
+ "浪级代码": 3041
265
+ }
266
+
267
+ class GDSDataService:
268
+ def __init__(self):
269
+ # set MICAPS GDS服务器地址
270
+ self.berror = False
271
+ if CONFIG is None:
272
+ self.berror=True
273
+
274
+ cc = CONFIG._get_config_from_rcfile()
275
+
276
+ self.gdsIp = cc['MICAPS']['GDS_IP']
277
+ self.gdsPort = cc['MICAPS']['GDS_PORT']
278
+ # print(self.gdsIp)
279
+ # print(self.gdsPort)
280
+ def getLatestDataName(self, directory, filter):
281
+ return get_http_result(
282
+ self.gdsIp, self.gdsPort, "/DataService" +
283
+ self.get_concate_url("getLatestDataName", directory, "", filter))
284
+
285
+ def getData(self, directory, fileName):
286
+ return get_http_result(
287
+ self.gdsIp, self.gdsPort, "/DataService" +
288
+ self.get_concate_url("getData", directory, fileName, ""))
289
+
290
+ def getFileList(self,directory):
291
+ return get_http_result(
292
+ self.gdsIp, self.gdsPort, "/DataService" +
293
+ self.get_concate_url("getFileList", directory, "",""))
294
+
295
+ # 将请求参数拼接到url
296
+ def get_concate_url(self, requestType, directory, fileName, filter):
297
+ url = ""
298
+ url += "?requestType=" + requestType
299
+ url += "&directory=" + directory
300
+ url += "&fileName=" + fileName
301
+ url += "&filter=" + filter
302
+ return urllib.parse.quote(url, safe=':/?=&')
303
+
304
+
305
+ def get_file_list(path, latest=None):
306
+ """return file list of cassandra data servere path
307
+
308
+ Args:
309
+ path (string): cassandra data servere path.
310
+ latest (integer): get the latest n files.
311
+
312
+ Returns:
313
+ list: list of filenames.
314
+ """
315
+
316
+ # connect to data service
317
+ service = GDSDataService()
318
+ if service.berror == True:
319
+ return None
320
+ # 获得指定目录下的所有文件
321
+ status, response = service.getFileList(path)
322
+ MappingResult = DataBlock_pb2.MapResult()
323
+ file_list = []
324
+ if status == 200:
325
+ if MappingResult is not None:
326
+ # Protobuf的解析
327
+ MappingResult.ParseFromString(response)
328
+ results = MappingResult.resultMap
329
+ # 遍历指定目录
330
+ for name_size_pair in results.items():
331
+ if (name_size_pair[1] != 'D'):
332
+ file_list.append(name_size_pair[0])
333
+
334
+ # sort the file list
335
+ if latest is not None:
336
+ file_list.sort(reverse=True)
337
+ file_list = file_list[0:min(len(file_list), latest)]
338
+
339
+ return file_list
340
+
341
+
342
+ def get_latest_initTime(directory, suffix="*.006"):
343
+ """
344
+ Get the latest initial time string.
345
+
346
+ Args:
347
+ directory (string): the data directory on the service.
348
+ suffix (string, optional): the filename filter pattern.
349
+ Examples:
350
+ >>> initTime = get_latest_initTime("ECMWF_HR/TMP/850")
351
+ """
352
+
353
+ # connect to data service
354
+ service = GDSDataService()
355
+ if service.berror == True:
356
+ return None
357
+ # get lastest data filename
358
+ try:
359
+ status, response = service.getLatestDataName(directory, suffix)
360
+ except ValueError:
361
+ print('Can not retrieve data from ' + directory)
362
+ return None
363
+ StringResult = DataBlock_pb2.StringResult()
364
+ if status == 200:
365
+ StringResult.ParseFromString(response)
366
+ if StringResult is not None:
367
+ filename = StringResult.name
368
+ if filename == '':
369
+ return None
370
+ else:
371
+ return filename.split('.')[0]
372
+ else:
373
+ return None
374
+
375
+ # extract initial time
376
+ # return filename.split(".")[0]
377
+ return None
378
+
379
+
380
+ def get_model_grid(directory, filename=None, suffix="*.024",
381
+ varname='data', varattrs={'units':''}, scale_off=None,
382
+ levattrs={'long_name':'pressure_level', 'units':'hPa',
383
+ '_CoordinateAxisType':'Pressure'}, cache=True):
384
+ """
385
+ Retrieve numeric model grid forecast from MICAPS cassandra service.
386
+ Support ensemble member forecast.
387
+ :param directory: the data directory on the service
388
+ :param filename: the data filename, if none, will be the latest file.
389
+ :param suffix: the filename filter pattern which will be used to
390
+ find the specified file.
391
+ :param varname: set variable name.
392
+ :param varattrs: set variable attributes, dictionary type.
393
+ :param scale_off: [scale, offset], return values = values*scale + offset.
394
+ :param levattrs: set level coordinate attributes, diectionary type.
395
+ :param cache: cache retrieved data to local directory, default is True.
396
+ :return: data, xarray type
397
+ :Examples:
398
+ >>> data = get_model_grid("ECMWF_HR/TMP/850")
399
+ >>> data_ens = get_model_grid("ECMWF_ENSEMBLE/RAW/HGT/500", filename='18021708.024')
400
+ >>> data_ens = get_model_grid('ECMWF_ENSEMBLE/RAW/TMP_2M', '19083008.024')
401
+ """
402
+
403
+ # get data file name
404
+ if filename is None:
405
+ try:
406
+ # connect to data service
407
+ service = GDSDataService()
408
+ if service.berror == True:
409
+ return None
410
+ status, response = service.getLatestDataName(directory, suffix)
411
+ except ValueError:
412
+ print('Can not retrieve data from ' + directory)
413
+ return None
414
+ StringResult = DataBlock_pb2.StringResult()
415
+ if status == 200:
416
+ StringResult.ParseFromString(response)
417
+ if StringResult is not None:
418
+ filename = StringResult.name
419
+ if filename == '':
420
+ return None
421
+ else:
422
+ return None
423
+
424
+ # retrieve data from cached file
425
+ if cache:
426
+ cache_file = CONFIG.get_cache_file(directory, filename, name="MICAPS_DATA")
427
+ if cache_file.is_file():
428
+ with open(cache_file, 'rb') as f:
429
+ data = pickle.load(f)
430
+ return data
431
+
432
+ # get data contents
433
+ try:
434
+ service = GDSDataService()
435
+ if service.berror == True:
436
+ return None
437
+ status, response = service.getData(directory, filename)
438
+ except ValueError:
439
+ print('Can not retrieve data' + filename + ' from ' + directory)
440
+ return None
441
+ ByteArrayResult = DataBlock_pb2.ByteArrayResult()
442
+ if status == 200:
443
+ ByteArrayResult.ParseFromString(response)
444
+ if ByteArrayResult.errorCode == 1:
445
+ return None
446
+ if ByteArrayResult is not None:
447
+ byteArray = ByteArrayResult.byteArray
448
+ if byteArray == '':
449
+ print('There is no data ' + filename + ' in ' + directory)
450
+ return None
451
+
452
+ # define head information structure (278 bytes)
453
+ head_dtype = [('discriminator', 'S4'), ('type', 'i2'),
454
+ ('modelName', 'S20'), ('element', 'S50'),
455
+ ('description', 'S30'), ('level', 'f4'),
456
+ ('year', 'i4'), ('month', 'i4'), ('day', 'i4'),
457
+ ('hour', 'i4'), ('timezone', 'i4'),
458
+ ('period', 'i4'), ('startLongitude', 'f4'),
459
+ ('endLongitude', 'f4'), ('longitudeGridSpace', 'f4'),
460
+ ('longitudeGridNumber', 'i4'),
461
+ ('startLatitude', 'f4'), ('endLatitude', 'f4'),
462
+ ('latitudeGridSpace', 'f4'),
463
+ ('latitudeGridNumber', 'i4'),
464
+ ('isolineStartValue', 'f4'),
465
+ ('isolineEndValue', 'f4'),
466
+ ('isolineSpace', 'f4'),
467
+ ('perturbationNumber', 'i2'),
468
+ ('ensembleTotalNumber', 'i2'),
469
+ ('minute', 'i2'), ('second', 'i2'),
470
+ ('Extent', 'S92')]
471
+
472
+ # read head information
473
+ head_info = np.frombuffer(byteArray[0:278], dtype=head_dtype)
474
+
475
+ # get required grid information
476
+ data_type = head_info['type'][0]
477
+ nlon = head_info['longitudeGridNumber'][0]
478
+ nlat = head_info['latitudeGridNumber'][0]
479
+ nmem = head_info['ensembleTotalNumber'][0]
480
+
481
+ # define data structure
482
+ if data_type == 4:
483
+ data_dtype = [('data', 'f4', (nlat, nlon))]
484
+ data_len = nlat * nlon * 4
485
+ elif data_type == 11:
486
+ data_dtype = [('data', 'f4', (2, nlat, nlon))]
487
+ data_len = 2 * nlat * nlon * 4
488
+ else:
489
+ raise Exception("Data type is not supported")
490
+
491
+ # read data
492
+ if nmem == 0:
493
+ data = np.frombuffer(byteArray[278:], dtype=data_dtype)
494
+ data = np.squeeze(data['data'])
495
+ else:
496
+ if data_type == 4:
497
+ data = np.full((nmem, nlat, nlon), np.nan)
498
+ else:
499
+ data = np.full((2, nmem, nlat, nlon), np.nan)
500
+ ind = 0
501
+ for imem in range(nmem):
502
+ head_info_mem = np.frombuffer(
503
+ byteArray[ind:(ind+278)], dtype=head_dtype)
504
+ ind += 278
505
+ data_mem = np.frombuffer(
506
+ byteArray[ind:(ind+data_len)], dtype=data_dtype)
507
+ ind += data_len
508
+ number = head_info_mem['perturbationNumber'][0]
509
+ if data_type == 4:
510
+ data[number, :, :] = np.squeeze(data_mem['data'])
511
+ else:
512
+ data[:, number, :, :] = np.squeeze(data_mem['data'])
513
+
514
+ # scale and offset the data, if necessary.
515
+ if scale_off is not None:
516
+ data = data * scale_off[0] + scale_off[1]
517
+
518
+ # construct longitude and latitude coordinates
519
+ slon = head_info['startLongitude'][0]
520
+ dlon = head_info['longitudeGridSpace'][0]
521
+ slat = head_info['startLatitude'][0]
522
+ dlat = head_info['latitudeGridSpace'][0]
523
+ lon = np.arange(nlon) * dlon + slon
524
+ lat = np.arange(nlat) * dlat + slat
525
+ level = np.array([head_info['level'][0]])
526
+
527
+ # construct initial time and forecast hour
528
+ init_time = datetime(head_info['year'][0], head_info['month'][0],
529
+ head_info['day'][0], head_info['hour'][0])
530
+ fhour = np.array([head_info['period'][0]], dtype=np.float)
531
+ time = init_time + timedelta(hours=fhour[0])
532
+ init_time = np.array([init_time], dtype='datetime64[ms]')
533
+ time = np.array([time], dtype='datetime64[ms]')
534
+
535
+ # construct ensemble number
536
+ if nmem != 0:
537
+ number = np.arange(nmem)
538
+
539
+ # define coordinates
540
+ time_coord = ('time', time)
541
+ lon_coord = ('lon', lon, {
542
+ 'long_name':'longitude', 'units':'degrees_east', '_CoordinateAxisType':'Lon'})
543
+ lat_coord = ('lat', lat, {
544
+ 'long_name':'latitude', 'units':'degrees_north', '_CoordinateAxisType':'Lat'})
545
+ if level[0] != 0:
546
+ level_coord = ('level', level, levattrs)
547
+ if nmem != 0:
548
+ number_coord = ('number', number, {'_CoordinateAxisType':'Ensemble'})
549
+
550
+ # create to xarray
551
+ if data_type == 4:
552
+ if nmem == 0:
553
+ if level[0] == 0:
554
+ data = data[np.newaxis, ...]
555
+ data = xr.Dataset({
556
+ varname:(['time', 'lat', 'lon'], data, varattrs)},
557
+ coords={
558
+ 'time':time_coord, 'lat':lat_coord, 'lon':lon_coord})
559
+ else:
560
+ data = data[np.newaxis, np.newaxis, ...]
561
+ data = xr.Dataset({
562
+ varname:(['time', 'level', 'lat', 'lon'], data, varattrs)},
563
+ coords={
564
+ 'time':time_coord, 'level':level_coord,
565
+ 'lat':lat_coord, 'lon':lon_coord})
566
+ else:
567
+ if level[0] == 0:
568
+ data = data[np.newaxis, ...]
569
+ data = xr.Dataset({
570
+ varname:(['time', 'number', 'lat', 'lon'], data, varattrs)},
571
+ coords={
572
+ 'time':time_coord, 'number':number_coord,
573
+ 'lat':lat_coord, 'lon':lon_coord})
574
+ else:
575
+ data = data[np.newaxis, :, np.newaxis, ...]
576
+ data = xr.Dataset({
577
+ varname:(['time', 'number', 'level', 'lat', 'lon'], data, varattrs)},
578
+ coords={
579
+ 'time':time_coord, 'number':number_coord, 'level':level_coord,
580
+ 'lat':lat_coord, 'lon':lon_coord})
581
+ elif data_type == 11:
582
+ speedattrs = {'long_name':'wind speed', 'units':'m/s'}
583
+ angleattrs = {'long_name':'wind angle', 'units':'degree'}
584
+ if nmem == 0:
585
+ speed = np.squeeze(data[0, :, :])
586
+ angle = np.squeeze(data[1, :, :])
587
+ if level[0] == 0:
588
+ speed = speed[np.newaxis, ...]
589
+ angle = angle[np.newaxis, ...]
590
+ data = xr.Dataset({
591
+ 'speed': (['time', 'lat', 'lon'], speed, speedattrs),
592
+ 'angle': (['time', 'lat', 'lon'], angle, angleattrs)},
593
+ coords={'lon': lon_coord, 'lat': lat_coord, 'time': time_coord})
594
+ else:
595
+ speed = speed[np.newaxis, np.newaxis, ...]
596
+ angle = angle[np.newaxis, np.newaxis, ...]
597
+ data = xr.Dataset({
598
+ 'speed': (['time', 'level', 'lat', 'lon'], speed, speedattrs),
599
+ 'angle': (['time', 'level', 'lat', 'lon'], angle, angleattrs)},
600
+ coords={'lon': lon_coord, 'lat': lat_coord,
601
+ 'level': level_coord, 'time': time_coord})
602
+ else:
603
+ speed = np.squeeze(data[0, :, :, :])
604
+ angle = np.squeeze(data[1, :, :, :])
605
+ if level[0] == 0:
606
+ speed = speed[np.newaxis, ...]
607
+ angle = angle[np.newaxis, ...]
608
+ data = xr.Dataset({
609
+ 'speed': (
610
+ ['time', 'number', 'lat', 'lon'], speed, speedattrs),
611
+ 'angle': (
612
+ ['time', 'number', 'lat', 'lon'], angle, angleattrs)},
613
+ coords={
614
+ 'lon': lon_coord, 'lat': lat_coord,
615
+ 'number': number_coord, 'time': time_coord})
616
+ else:
617
+ speed = speed[np.newaxis, :, np.newaxis, ...]
618
+ angle = angle[np.newaxis, :, np.newaxis, ...]
619
+ data = xr.Dataset({
620
+ 'speed': (
621
+ ['time', 'number', 'level', 'lat', 'lon'],
622
+ speed, speedattrs),
623
+ 'angle': (
624
+ ['time', 'number', 'level', 'lat', 'lon'],
625
+ angle, angleattrs)},
626
+ coords={
627
+ 'lon': lon_coord, 'lat': lat_coord, 'level': level_coord,
628
+ 'number': number_coord, 'time': time_coord})
629
+
630
+ # add time coordinates
631
+ data.coords['forecast_reference_time'] = init_time[0]
632
+ data.coords['forecast_period'] = ('time', fhour, {
633
+ 'long_name':'forecast_period', 'units':'hour'})
634
+
635
+ # add attributes
636
+ data.attrs['Conventions'] = "CF-1.6"
637
+ data.attrs['Origin'] = 'MICAPS Cassandra Server'
638
+
639
+ # sort latitude coordinates
640
+ data = data.loc[{'lat':sorted(data.coords['lat'].values)}]
641
+
642
+ # cache data
643
+ if cache:
644
+ with open(cache_file, 'wb') as f:
645
+ pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
646
+
647
+ # return data
648
+ return data
649
+
650
+ else:
651
+ return None
652
+ else:
653
+ return None
654
+
655
+
656
+ def get_model_grids(directory, filenames, allExists=True, pbar=False, **kargs):
657
+ """
658
+ Retrieve multiple time grids from MICAPS cassandra service.
659
+
660
+ Args:
661
+ directory (string): the data directory on the service.
662
+ filenames (list): the list of filenames.
663
+ allExists (boolean): all files should exist, or return None.
664
+ pbar (boolean): Show progress bar, default to False.
665
+ **kargs: key arguments passed to get_model_grid function.
666
+ """
667
+
668
+ dataset = []
669
+ if pbar:
670
+ tqdm_filenames = tqdm(filenames, desc=directory + ": ")
671
+ else:
672
+ tqdm_filenames = filenames
673
+ for filename in tqdm_filenames:
674
+ data = get_model_grid(directory, filename=filename, **kargs)
675
+ if data:
676
+ dataset.append(data)
677
+ else:
678
+ if allExists:
679
+ warnings.warn("{} doese not exists.".format(directory+'/'+filename))
680
+ return None
681
+
682
+ return xr.concat(dataset, dim='time')
683
+
684
+
685
+ def get_model_points(directory, filenames, points, **kargs):
686
+ """
687
+ Retrieve point time series from MICAPS cassandra service.
688
+ Return xarray, (time, points)
689
+
690
+ Args:
691
+ directory (string): the data directory on the service.
692
+ filenames (list): the list of filenames.
693
+ points (dict): dictionary, {'lon':[...], 'lat':[...]}.
694
+ **kargs: key arguments passed to get_model_grids function.
695
+ Examples:
696
+ >>> directory = "NWFD_SCMOC/TMP/2M_ABOVE_GROUND"
697
+ >>> fhours = np.arange(3, 75, 3)
698
+ >>> filenames = ["19083008."+str(fhour).zfill(3) for fhour in fhours]
699
+ >>> points = {'lon':[116.3833, 110.0], 'lat':[39.9, 32]}
700
+ >>> data = get_model_points(dataDir, filenames, points)
701
+ """
702
+
703
+ data = get_model_grids(directory, filenames, **kargs)
704
+ if data:
705
+ return data.interp(lon=('points', points['lon']), lat=('points', points['lat']))
706
+ else:
707
+ return None
708
+
709
+
710
+ def get_model_3D_grid(directory, filename, levels, allExists=True, pbar=False, **kargs):
711
+ """
712
+ Retrieve 3D [level, lat, lon] grids from MICAPS cassandra service.
713
+
714
+ Args:
715
+ directory (string): the data directory on the service, which includes all levels.
716
+ filename (string): the data file name.
717
+ levels (list): the high levels.
718
+ allExists (boolean): all levels should be exist, if not, return None.
719
+ pbar (boolean): show progress bar.
720
+ **kargs: key arguments passed to get_model_grid function.
721
+ Examples:
722
+ >>> directory = "ECMWF_HR/TMP"
723
+ >>> levels = [1000, 950, 925, 900, 850, 800, 700, 600, 500, 400, 300, 250, 200, 100]
724
+ >>> filename = "19083008.024"
725
+ >>> data = get_model_3D_grid(directory, filename, levels)
726
+ """
727
+
728
+ dataset = []
729
+ if pbar:
730
+ tqdm_levels = tqdm(levels, desc=directory+": ")
731
+ else:
732
+ tqdm_levels = levels
733
+ for level in tqdm_levels:
734
+ if directory[-1] == '/':
735
+ dataDir = directory + str(int(level)).strip()
736
+ else:
737
+ dataDir = directory + '/' + str(int(level)).strip()
738
+ data = get_model_grid(dataDir, filename=filename, **kargs)
739
+ if data:
740
+ dataset.append(data)
741
+ else:
742
+ if allExists:
743
+ warnings.warn("{} doese not exists.".format(dataDir+'/'+filename))
744
+ return None
745
+
746
+ return xr.concat(dataset, dim='level')
747
+
748
+
749
+ def get_model_3D_grids(directory, filenames, levels, allExists=True, pbar=True, **kargs):
750
+ """
751
+ Retrieve 3D [time, level, lat, lon] grids from MICAPS cassandra service.
752
+
753
+ Args:
754
+ directory (string): the data directory on the service, which includes all levels.
755
+ filenames (list): the list of data filenames, should be the same initial time.
756
+ levels (list): the high levels.
757
+ allExists (bool, optional): all files should exist, or return None.. Defaults to True.
758
+ pbar (boolean): Show progress bar, default to True.
759
+ **kargs: key arguments passed to get_model_grid function.
760
+ Examples:
761
+ >>> directory = "ECMWF_HR/TMP"
762
+ >>> levels = [1000, 950, 925, 900, 850, 800, 700, 600, 500, 400, 300, 250, 200, 100]
763
+ >>> fhours = np.arange(0, 75, 3)
764
+ >>> filenames = ["19083008."+str(fhour).zfill(3) for fhour in fhours]
765
+ >>> data = get_model_3D_grids(directory, filenames, levels)
766
+ """
767
+
768
+ dataset = []
769
+ if pbar:
770
+ tqdm_filenames = tqdm(filenames, desc=directory+": ")
771
+ else:
772
+ tqdm_filenames = filenames
773
+ for filename in tqdm_filenames:
774
+ dataset_temp = []
775
+ for level in levels:
776
+ if directory[-1] == '/':
777
+ dataDir = directory + str(int(level)).strip()
778
+ else:
779
+ dataDir = directory + '/' + str(int(level)).strip()
780
+ data = get_model_grid(dataDir, filename=filename, **kargs)
781
+ if data:
782
+ dataset_temp.append(data)
783
+ else:
784
+ if allExists:
785
+ warnings.warn("{} doese not exists.".format(dataDir+'/'+filename))
786
+ return None
787
+ dataset.append(xr.concat(dataset_temp, dim='level'))
788
+
789
+ return xr.concat(dataset, dim='time')
790
+
791
+
792
+ def get_model_profiles(directory, filenames, levels, points, **kargs):
793
+ """
794
+ Retrieve time series of vertical profile from 3D [time, level, lat, lon] grids from MICAPS cassandra service.
795
+
796
+ Args:
797
+ directory (string): the data directory on the service, which includes all levels.
798
+ filenames (list): the list of data filenames or one file.
799
+ levels (list): the high levels.
800
+ points (dict): dictionary, {'lon':[...], 'lat':[...]}.
801
+ **kargs: key arguments passed to get_model_3D_grids function.
802
+ Examples:
803
+ directory = "ECMWF_HR/TMP"
804
+ levels = [1000, 950, 925, 900, 850, 800, 700, 600, 500, 400, 300, 250, 200, 100]
805
+ filenames = ["20021320.024"]
806
+ points = {'lon':[116.3833, 110.0], 'lat':[39.9, 32]}
807
+ data = get_model_profiles(directory, filenames, levels, points)
808
+ """
809
+
810
+ data = get_model_3D_grids(directory, filenames, levels, **kargs)
811
+ if data:
812
+ return data.interp(lon=('points', points['lon']), lat=('points', points['lat']))
813
+ else:
814
+ return None
815
+
816
+
817
+ def get_station_data(directory, filename=None, suffix="*.000", dropna=True, cache=True):
818
+ """
819
+ Retrieve station data from MICAPS cassandra service.
820
+ :param directory: the data directory on the service
821
+ :param filename: the data filename, if none, will be the latest file.
822
+ :param suffix: the filename filter pattern which will
823
+ be used to find the specified file.
824
+ :param dropna: the column which values is all na will be dropped.
825
+ :param cache: cache retrieved data to local directory, default is True.
826
+ :return: pandas DataFrame.
827
+ :example:
828
+ >>> data = get_station_data("SURFACE/PLOT_10MIN")
829
+ >>> data = get_station_data("SURFACE/TMP_MAX_24H_NATIONAL", filename="20190705150000.000")
830
+ """
831
+
832
+ # get data file name
833
+ if filename is None:
834
+ try:
835
+ # connect to data service
836
+ service = GDSDataService()
837
+ if service.berror == True:
838
+ return None
839
+ status, response = service.getLatestDataName(directory, suffix)
840
+ except ValueError:
841
+ print('Can not retrieve data from ' + directory)
842
+ return None
843
+ StringResult = DataBlock_pb2.StringResult()
844
+ if status == 200:
845
+ StringResult.ParseFromString(response)
846
+ if StringResult is not None:
847
+ filename = StringResult.name
848
+ if filename == '':
849
+ return None
850
+ else:
851
+ return None
852
+
853
+ # retrieve data from cached file
854
+ if cache:
855
+ cache_file = CONFIG.get_cache_file(directory, filename, name="MICAPS_DATA")
856
+ if cache_file.is_file():
857
+ with open(cache_file, 'rb') as f:
858
+ data = pickle.load(f)
859
+ return data
860
+
861
+ # get data contents
862
+ try:
863
+ service = GDSDataService()
864
+ if service.berror == True:
865
+ return None
866
+ status, response = service.getData(directory, filename)
867
+ except ValueError:
868
+ print('Can not retrieve data' + filename + ' from ' + directory)
869
+ return None
870
+ ByteArrayResult = DataBlock_pb2.ByteArrayResult()
871
+ if status == 200:
872
+ ByteArrayResult.ParseFromString(response)
873
+ if ByteArrayResult is not None:
874
+ byteArray = ByteArrayResult.byteArray
875
+
876
+ # define head structure
877
+ head_dtype = [('discriminator', 'S4'), ('type', 'i2'),
878
+ ('description', 'S100'),
879
+ ('level', 'f4'), ('levelDescription', 'S50'),
880
+ ('year', 'i4'), ('month', 'i4'), ('day', 'i4'),
881
+ ('hour', 'i4'), ('minute', 'i4'), ('second', 'i4'),
882
+ ('Timezone', 'i4'), ('extent', 'S100')]
883
+
884
+ # read head information
885
+ head_info = np.frombuffer(byteArray[0:288], dtype=head_dtype)
886
+ ind = 288
887
+ # zwj add 20210830
888
+ if head_info.size==0:
889
+ return None
890
+ # read the number of stations
891
+ station_number = np.frombuffer(
892
+ byteArray[ind:(ind+4)], dtype='i4')[0]
893
+ ind += 4
894
+
895
+ # read the number of elements
896
+ element_number = np.frombuffer(
897
+ byteArray[ind:(ind+2)], dtype='i2')[0]
898
+ ind += 2
899
+
900
+ # construct record structure
901
+ element_type_map = {
902
+ 1: 'b1', 2: 'i2', 3: 'i4', 4: 'i8', 5: 'f4', 6: 'f8', 7: 'S'}
903
+ element_map = {}
904
+ for i in range(element_number):
905
+ element_id = str(
906
+ np.frombuffer(byteArray[ind:(ind+2)], dtype='i2')[0])
907
+ ind += 2
908
+ element_type = np.frombuffer(
909
+ byteArray[ind:(ind+2)], dtype='i2')[0]
910
+ ind += 2
911
+ element_map[element_id] = element_type_map[element_type]
912
+
913
+ # loop every station to retrieve record
914
+ record_head_dtype = [
915
+ ('ID', 'i4'), ('lon', 'f4'), ('lat', 'f4'), ('numb', 'i2')]
916
+ records = []
917
+ # zwj add 20210830
918
+ if station_number==0:
919
+ return None
920
+
921
+ for i in range(station_number):
922
+ record_head = np.frombuffer(
923
+ byteArray[ind:(ind+14)], dtype=record_head_dtype)
924
+ ind += 14
925
+ record = {
926
+ 'ID': record_head['ID'][0], 'lon': record_head['lon'][0],
927
+ 'lat': record_head['lat'][0]}
928
+ for j in range(record_head['numb'][0]): # the record element number is not same, missing value is not included.
929
+ element_id = str(
930
+ np.frombuffer(byteArray[ind:(ind + 2)], dtype='i2')[0])
931
+ ind += 2
932
+ element_type = element_map[element_id]
933
+ if element_type == 'S': # if the element type is string, we need get the length of string
934
+ str_len = np.frombuffer(byteArray[ind:(ind + 2)], dtype='i2')[0]
935
+ ind += 2
936
+ element_type = element_type + str(str_len)
937
+ element_len = int(element_type[1:])
938
+ record[element_id] = np.frombuffer(
939
+ byteArray[ind:(ind + element_len)],
940
+ dtype=element_type)[0]
941
+ ind += element_len
942
+ records += [record]
943
+
944
+ # convert to pandas data frame
945
+ records = pd.DataFrame(records)
946
+ records.set_index('ID')
947
+
948
+ # get time
949
+ time = datetime(
950
+ head_info['year'][0], head_info['month'][0],
951
+ head_info['day'][0], head_info['hour'][0],
952
+ head_info['minute'][0], head_info['second'][0])
953
+ records['time'] = time
954
+
955
+ # change column name for common observation
956
+ cur_dict=dict([(str(gds_element_id_dict[key]),key) for key in gds_element_id_dict.keys()])
957
+
958
+ records.rename(columns=cur_dict,inplace=True)
959
+ # records.rename(columns={'3': 'Alt', '4': 'Grade', '5': 'Type', '21': 'Name',
960
+ # '201': 'Wind_angle', '203': 'Wind_speed', '205': 'Wind_angle_1m_avg', '207': 'Wind_speed_1m_avg',
961
+ # '209': 'Wind_angle_2m_avg', '211': 'Wind_speed_2m_avg', '213': 'Wind_angle_10m_avg', '215': 'Wind_speed_10m_avg',
962
+ # '217': 'Wind_angle_max', '219': 'Wind_speed_max', '221': 'Wind_angle_instant', '223': 'Wind_speed_instant',
963
+ # '225': 'Gust_angle', '227': 'Gust_speed', '229': 'Gust_angle_6h', '231': 'Gust_speed_6h',
964
+ # '233': 'Gust_angle_12h', '235': 'Gust_speed_12h', '237': 'Wind_power',
965
+ # '401': 'Sea_level_pressure', '403': 'Pressure_3h_trend', '405': 'Pressure_24h_trend',
966
+ # '407': 'Station_pressure', '409': 'Pressure_max', '411': 'Pressure_min', '413': 'Pressure',
967
+ # '415': 'Pressure_day_avg', '417': 'SLP_day_avg', '419': 'Hight', '421': 'Geopotential_hight',
968
+ # '601': 'Temp', '603': 'Temp_max', '605': 'Temp_min', '607': 'Temp_24h_trend',
969
+ # '609': 'Temp_24h_max', '611':'Temp_24h_min', '613': 'Temp_dav_avg',
970
+ # '801': 'Dewpoint', '803': 'Dewpoint_depression', '805': 'Relative_humidity',
971
+ # '807': 'Relative_humidity_min', '809': 'Relative_humidity_day_avg',
972
+ # '811': 'Water_vapor_pressure', '813': 'Water_vapor_pressure_day_avg',
973
+ # '1001': 'Rain', '1003': 'Rain_1h', '1005': 'Rain_3h', '1007': 'Rain_6h', '1009': 'Rain_12h', '1013': 'Rain_day',
974
+ # '1015': 'Rain_20-08', '1017': 'Rain_08-20', '1019': 'Rain_20-20', '1021': 'Rain_08-08',
975
+ # '1023': 'Evaporation', '1025': 'Evaporation_large', '1027': 'Precipitable_water',
976
+ # '1201': 'Vis_1min', '1203': 'Vis_10min', '1205': 'Vis_min', '1207': 'Vis_manual',
977
+ # '1401': 'Total_cloud_cover', '1403': 'Low_cloud_cover', '1405': 'Cloud_base_hight',
978
+ # '1407': 'Low_cloud', '1409': 'Middle_cloud', '1411': 'High_cloud',
979
+ # '1413': 'TCC_day_avg', '1415': 'LCC_day_avg', '1417': 'Cloud_cover', '1419': 'Cloud_type',
980
+ # '1601': 'Weather_current', '1603': 'Weather_past_1', '1605': 'Weather_past_2',
981
+ # '2001': 'Surface_temp', '2003': 'Surface_temp_max', '2005': 'Surface_temp_min'},
982
+ # inplace=True)
983
+
984
+ # drop all NaN columns
985
+ if dropna:
986
+ records = records.dropna(axis=1, how='all')
987
+
988
+ # cache records
989
+ if cache:
990
+ with open(cache_file, 'wb') as f:
991
+ pickle.dump(records, f, protocol=pickle.HIGHEST_PROTOCOL)
992
+
993
+ # return
994
+ print(filename)
995
+ return records
996
+ else:
997
+ return None
998
+ else:
999
+ return None
1000
+
1001
+
1002
+ def get_station_dataset(directory, filenames, allExists=True, pbar=False, **kargs):
1003
+ """
1004
+ Retrieve multiple station observation from MICAPS cassandra service.
1005
+
1006
+ Args:
1007
+ directory (string): the data directory on the service.
1008
+ filenames (list): the list of filenames.
1009
+ allExists (boolean): all files should exist, or return None.
1010
+ pbar (boolean): Show progress bar, default to False.
1011
+ **kargs: key arguments passed to get_fy_awx function.
1012
+ """
1013
+
1014
+ dataset = []
1015
+ if pbar:
1016
+ tqdm_filenames = tqdm(filenames, desc=directory + ": ")
1017
+ else:
1018
+ tqdm_filenames = filenames
1019
+ for filename in filenames:
1020
+ data = get_station_data(directory, filename=filename, **kargs)
1021
+ if data:
1022
+ dataset.append(data)
1023
+ else:
1024
+ if allExists:
1025
+ warnings.warn("{} doese not exists.".format(directory+'/'+filename))
1026
+ return None
1027
+
1028
+ return pd.concat(dataset)
1029
+
1030
+ # zwj 20210311
1031
+ def get_fy_awx_zwj(directory, filename=None, suffix="*.AWX", units='', cache=True):
1032
+ """
1033
+ Retrieve FY satellite cloud awx format file.
1034
+ The awx file format is refered to “气象卫星分发产品及其格式规范AWX2.1”
1035
+ http://satellite.nsmc.org.cn/PortalSite/StaticContent/DocumentDownload.aspx?TypeID=10
1036
+ :param directory: the data directory on the service
1037
+ :param filename: the data filename, if none, will be the latest file.
1038
+ :param suffix: the filename filter pattern which will be used to
1039
+ find the specified file.
1040
+ :param units: data units, default is ''.
1041
+ :param cache: cache retrieved data to local directory, default is True.
1042
+ :return: satellite information and data.
1043
+ :Examples:
1044
+ >>> directory = "SATELLITE/FY2E/L1/IR1/EQUAL"
1045
+ >>> data = get_fy_awx(directory)
1046
+ """
1047
+
1048
+ service = GDSDataService()
1049
+ if service.berror == True:
1050
+ return None
1051
+ status, response = service.getData(directory, filename)
1052
+
1053
+ ByteArrayResult = DataBlock_pb2.ByteArrayResult()
1054
+ if status == 200:
1055
+ ByteArrayResult.ParseFromString(response)
1056
+ if ByteArrayResult is not None:
1057
+ byteArray = ByteArrayResult.byteArray
1058
+ if byteArray == b'':
1059
+ print('There is no data ' + filename + ' in ' + directory)
1060
+ return None
1061
+
1062
+ return byteArray
1063
+
1064
+ def get_stadata_from_mdfs(filename=None):
1065
+
1066
+ if not os.path.exists(filename):
1067
+ print(filename + ' :file not exist!')
1068
+ return None
1069
+
1070
+ fin = open(filename,'rb')
1071
+ byteArray = fin.read()
1072
+ # byteArray = ByteArrayResult.byteArray
1073
+
1074
+ # define head structure
1075
+ head_dtype = [('discriminator', 'S4'), ('type', 'i2'),
1076
+ ('description', 'S100'),
1077
+ ('level', 'f4'), ('levelDescription', 'S50'),
1078
+ ('year', 'i4'), ('month', 'i4'), ('day', 'i4'),
1079
+ ('hour', 'i4'), ('minute', 'i4'), ('second', 'i4'),
1080
+ ('Timezone', 'i4'), ('extent', 'S100')]
1081
+
1082
+ # read head information
1083
+ head_info = np.frombuffer(byteArray[0:288], dtype=head_dtype)
1084
+ ind = 288
1085
+ # zwj add 20210830
1086
+ if head_info.size==0:
1087
+ return None
1088
+ # read the number of stations
1089
+ station_number = np.frombuffer(
1090
+ byteArray[ind:(ind+4)], dtype='i4')[0]
1091
+ ind += 4
1092
+
1093
+ # read the number of elements
1094
+ element_number = np.frombuffer(
1095
+ byteArray[ind:(ind+2)], dtype='i2')[0]
1096
+ ind += 2
1097
+
1098
+ # construct record structure
1099
+ element_type_map = {
1100
+ 1: 'b1', 2: 'i2', 3: 'i4', 4: 'i8', 5: 'f4', 6: 'f8', 7: 'S'}
1101
+ element_map = {}
1102
+ for i in range(element_number):
1103
+ element_id = str(
1104
+ np.frombuffer(byteArray[ind:(ind+2)], dtype='i2')[0])
1105
+ ind += 2
1106
+ element_type = np.frombuffer(
1107
+ byteArray[ind:(ind+2)], dtype='i2')[0]
1108
+ ind += 2
1109
+ element_map[element_id] = element_type_map[element_type]
1110
+
1111
+ # loop every station to retrieve record
1112
+ record_head_dtype = [
1113
+ ('ID', 'i4'), ('lon', 'f4'), ('lat', 'f4'), ('numb', 'i2')]
1114
+ records = []
1115
+ # zwj add 20210830
1116
+ if station_number==0:
1117
+ return None
1118
+
1119
+ for i in range(station_number):
1120
+ record_head = np.frombuffer(
1121
+ byteArray[ind:(ind+14)], dtype=record_head_dtype)
1122
+ ind += 14
1123
+ record = {
1124
+ 'ID': record_head['ID'][0], 'lon': record_head['lon'][0],
1125
+ 'lat': record_head['lat'][0]}
1126
+ for j in range(record_head['numb'][0]): # the record element number is not same, missing value is not included.
1127
+ element_id = str(
1128
+ np.frombuffer(byteArray[ind:(ind + 2)], dtype='i2')[0])
1129
+ ind += 2
1130
+ element_type = element_map[element_id]
1131
+ if element_type == 'S': # if the element type is string, we need get the length of string
1132
+ str_len = np.frombuffer(byteArray[ind:(ind + 2)], dtype='i2')[0]
1133
+ ind += 2
1134
+ element_type = element_type + str(str_len)
1135
+ element_len = int(element_type[1:])
1136
+ record[element_id] = np.frombuffer(
1137
+ byteArray[ind:(ind + element_len)],
1138
+ dtype=element_type)[0]
1139
+ ind += element_len
1140
+ records += [record]
1141
+
1142
+ # convert to pandas data frame
1143
+ records = pd.DataFrame(records)
1144
+ records.set_index('ID')
1145
+
1146
+ # get time
1147
+ time = datetime(
1148
+ head_info['year'][0], head_info['month'][0],
1149
+ head_info['day'][0], head_info['hour'][0],
1150
+ head_info['minute'][0], head_info['second'][0])
1151
+ records['time'] = time
1152
+
1153
+ # change column name for common observation
1154
+ cur_dict=dict([(str(gds_element_id_dict[key]),key) for key in gds_element_id_dict.keys()])
1155
+
1156
+ records.rename(columns=cur_dict,inplace=True)
1157
+
1158
+ # drop all NaN columns
1159
+ # if dropna:
1160
+ # records = records.dropna(axis=1, how='all')
1161
+
1162
+ return records
1163
+
1164
+ def get_fy_awx(directory, filename=None, suffix="*.AWX", units='', cache=True):
1165
+ """
1166
+ Retrieve FY satellite cloud awx format file.
1167
+ The awx file format is refered to “气象卫星分发产品及其格式规范AWX2.1”
1168
+ http://satellite.nsmc.org.cn/PortalSite/StaticContent/DocumentDownload.aspx?TypeID=10
1169
+ :param directory: the data directory on the service
1170
+ :param filename: the data filename, if none, will be the latest file.
1171
+ :param suffix: the filename filter pattern which will be used to
1172
+ find the specified file.
1173
+ :param units: data units, default is ''.
1174
+ :param cache: cache retrieved data to local directory, default is True.
1175
+ :return: satellite information and data.
1176
+ :Examples:
1177
+ >>> directory = "SATELLITE/FY2E/L1/IR1/EQUAL"
1178
+ >>> data = get_fy_awx(directory)
1179
+ """
1180
+
1181
+ # get data file name
1182
+ if filename is None:
1183
+ try:
1184
+ # connect to data service
1185
+ service = GDSDataService()
1186
+ if service.berror == True:
1187
+ return None
1188
+ status, response = service.getLatestDataName(directory, suffix)
1189
+ except ValueError:
1190
+ print('Can not retrieve data from ' + directory)
1191
+ return None
1192
+ StringResult = DataBlock_pb2.StringResult()
1193
+ if status == 200:
1194
+ StringResult.ParseFromString(response)
1195
+ if StringResult is not None:
1196
+ filename = StringResult.name
1197
+ if filename == '':
1198
+ return None
1199
+ else:
1200
+ return None
1201
+
1202
+ # retrieve data from cached file
1203
+ if cache:
1204
+ cache_file = CONFIG.get_cache_file(directory, filename, name="MICAPS_DATA")
1205
+ if cache_file.is_file():
1206
+ with open(cache_file, 'rb') as f:
1207
+ data = pickle.load(f)
1208
+ return data
1209
+
1210
+ # get data contents
1211
+ try:
1212
+ service = GDSDataService()
1213
+ if service.berror == True:
1214
+ return None
1215
+ status, response = service.getData(directory, filename)
1216
+ except ValueError:
1217
+ print('Can not retrieve data' + filename + ' from ' + directory)
1218
+ return None
1219
+ ByteArrayResult = DataBlock_pb2.ByteArrayResult()
1220
+ if status == 200:
1221
+ ByteArrayResult.ParseFromString(response)
1222
+ if ByteArrayResult is not None:
1223
+ byteArray = ByteArrayResult.byteArray
1224
+ if byteArray == b'':
1225
+ print('There is no data ' + filename + ' in ' + directory)
1226
+ return None
1227
+ CODE1 = 'B'
1228
+ CODE2 = 'H'
1229
+ INT1 = 'B'
1230
+ INT2 = 'H'
1231
+ INT4 = 'I'
1232
+ REAL4 = 'f'
1233
+ REAL8 = 'd'
1234
+ SINT1 = 'b'
1235
+ SINT2 = 'h'
1236
+ SINT4 = 'i'
1237
+ # the first class file head
1238
+ head1_dtype = [
1239
+ ('SAT96', '12s'), # SAT96 filename
1240
+ ('byteSequence', SINT2), # 整型数的字节顺序, 0 低字节在前, 高字节在后; !=0 高字节在前, 低字节在后.
1241
+ ('firstClassHeadLength', SINT2), # 第一节文件头长度
1242
+ ('secondClassHeadLength', SINT2), # 第二节文件头长度
1243
+ ('padDataLength', SINT2), # 填充段数据长度
1244
+ ('recordLength', SINT2), # 记录长度(字节), 图像产品: 记录长度=图形宽度, 格点场产品: 记录长度=横向格点数x格点数据字长
1245
+ ('headRecordNumber', SINT2), # 文件头占用记录数, 一级文件头、二填充段扩展以及的所占用总记录个数
1246
+ ('dataRecordNumber', SINT2), # 产品数据占用记录数
1247
+ ('productCategory', SINT2), # 产品类别, 1:静止, 2:极轨, 3:格点定量, 4:离散, 5: 图形和分析
1248
+ ('compressMethod', SINT2), # 压缩方式, 0: 未压缩; 1 行程编码压缩; 2 LZW方式压缩; 3 特点方式压缩
1249
+ ('formatString', '8s'), # 格式说明字符串, 'SAT2004'
1250
+ ('qualityFlag', SINT2)] # 产品数据质量标记, 1 完全可靠; 2 基本可靠; 3 有缺值, 可用; 4 不可用
1251
+ # head1_info = np.frombuffer(byteArray[0:40], dtype=head1_dtype)
1252
+ head1_info = _unpack_from_buf(byteArray,0, head1_dtype) # zwj modified 20210311
1253
+ head1_info['SAT96'] = 'SAT2004 '
1254
+ head1_info['formatString'] = 'SAT2004 '
1255
+
1256
+ ind = 40
1257
+
1258
+ if head1_info['productCategory']:
1259
+ # the second class file head
1260
+ head2_dtype = [
1261
+ ('satelliteName', '8s'), # 卫星名
1262
+ ('year', SINT2), ('month', SINT2),
1263
+ ('day', SINT2), ('hour', SINT2),
1264
+ ('minute', SINT2),
1265
+ ('channel', SINT2), # 通道号, 1红外, 2水汽, 3红外分裂, 4可见光, 5中红外, 6备用
1266
+ ('flagOfProjection', SINT2), # 投影, 0为投影, 1兰勃托, 2麦卡托, 3极射, 4等经纬度, 5等面积
1267
+ ('widthOfImage', SINT2),
1268
+ ('heightOfImage', SINT2),
1269
+ ('scanLineNumberOfImageTopLeft', SINT2),
1270
+ ('pixelNumberOfImageTopLeft', SINT2),
1271
+ ('sampleRatio', SINT2),
1272
+ ('latitudeOfNorth', SINT2),
1273
+ ('latitudeOfSouth', SINT2),
1274
+ ('longitudeOfWest', SINT2),
1275
+ ('longitudeOfEast', SINT2),
1276
+ ('centerLatitudeOfProjection', SINT2),
1277
+ ('centerLongitudeOfProjection', SINT2),
1278
+ ('standardLatitude1', SINT2),
1279
+ ('standardLatitude2', SINT2),
1280
+ ('horizontalResolution', SINT2),
1281
+ ('verticalResolution', SINT2),
1282
+ ('overlapFlagGeoGrid', SINT2),
1283
+ ('overlapValueGeoGrid', SINT2),
1284
+ ('dataLengthOfColorTable', SINT2),
1285
+ ('dataLengthOfCalibration', SINT2),
1286
+ ('dataLengthOfGeolocation', SINT2),
1287
+ ('reserved', SINT2)]
1288
+
1289
+ # head2_info = np.frombuffer(byteArray[ind:(ind+64)], dtype=head2_dtype)
1290
+ head2_info = _unpack_from_buf(byteArray,ind, head2_dtype) # zwj modified 20210311
1291
+ head2_info['satelliteName'] = 'SAT2004 '
1292
+
1293
+
1294
+ ind += 64
1295
+
1296
+ # color table
1297
+ if head2_info['dataLengthOfColorTable'] != 0:
1298
+ table_R = np.frombuffer(byteArray[ind:(ind + 256)], dtype='u1')
1299
+ ind += 256
1300
+ table_G = np.frombuffer(byteArray[ind:(ind + 256)], dtype='u1')
1301
+ ind += 256
1302
+ table_B = np.frombuffer(byteArray[ind:(ind + 256)], dtype='u1')
1303
+ ind += 256
1304
+
1305
+ # calibration table
1306
+ calibration_table = None
1307
+ if head2_info['dataLengthOfCalibration'] != 0:
1308
+ calibration_table = np.frombuffer(byteArray[ind:(ind + 2048)], dtype='i2')
1309
+ calibration_table = calibration_table * 0.01
1310
+ if (np.array_equal(calibration_table[0::4], calibration_table[1::4]) and
1311
+ np.array_equal(calibration_table[0::4], calibration_table[2::4]) and
1312
+ np.array_equal(calibration_table[0::4], calibration_table[3::4])):
1313
+ # This is a trick, refer to http://bbs.06climate.com/forum.php?mod=viewthread&tid=89296
1314
+ calibration_table = calibration_table[0::4]
1315
+ ind += 2048
1316
+
1317
+ # geolocation table
1318
+ if head2_info['dataLengthOfGeolocation'] != 0:
1319
+ geolocation_dtype = [
1320
+ ('coordinate', SINT2),
1321
+ ('source', SINT2),
1322
+ ('delta', SINT2),
1323
+ ('left_top_lat', SINT2),
1324
+ ('left_top_lon', SINT2),
1325
+ ('horizontalNumber', SINT2),
1326
+ ('verticalNumber', SINT2),
1327
+ ('reserved', SINT2)]
1328
+ # geolocation_info = np.frombuffer(byteArray[ind:(ind+16)], dtype=geolocation_dtype)
1329
+ geolocation_info = _unpack_from_buf(byteArray,ind, geolocation_info) # zwj modified 20210311
1330
+ ind += 16
1331
+ geolocation_length = geolocation_info['horizontal_number'] * geolocation_info['vertical_number'] * 2
1332
+ geolocation_table = np.frombuffer(byteArray[ind:(ind+geolocation_length)], dtype='i2')
1333
+ ind += geolocation_length
1334
+
1335
+ # pad field
1336
+ pad_field = np.frombuffer(byteArray[ind:(ind+head1_info['padDataLength'])], dtype='u1')
1337
+ ind += head1_info['padDataLength']
1338
+
1339
+ # retrieve data records
1340
+ data_len = (int(head1_info['dataRecordNumber'])*
1341
+ head1_info['recordLength'])
1342
+ data = np.frombuffer(byteArray[ind:(ind + data_len)], dtype='u1', count=data_len)
1343
+ if calibration_table is not None:
1344
+ data = calibration_table[data]
1345
+ data.shape = (head1_info['dataRecordNumber'], head1_info['recordLength'])
1346
+
1347
+ # construct longitude and latitude coordinates
1348
+ lat = (
1349
+ head2_info['latitudeOfNorth']/100. -
1350
+ np.arange(head2_info['heightOfImage'])*head2_info['verticalResolution']/100.)
1351
+ lon = (
1352
+ head2_info['longitudeOfWest']/100. +
1353
+ np.arange(head2_info['widthOfImage'])*head2_info['horizontalResolution']/100.)
1354
+
1355
+ # construct time
1356
+ time = datetime(
1357
+ head2_info['year'], head2_info['month'],
1358
+ head2_info['day'], head2_info['hour'], head2_info['minute'])
1359
+ time = np.array([time], dtype='datetime64[ms]')
1360
+
1361
+ # define coordinates
1362
+ time_coord = ('time', time)
1363
+ lon_coord = ('lon', lon, {
1364
+ 'long_name':'longitude', 'units':'degrees_east', '_CoordinateAxisType':'Lon'})
1365
+ lat_coord = ('lat', lat, {
1366
+ 'long_name':'latitude', 'units':'degrees_north', '_CoordinateAxisType':'Lat'})
1367
+ channel_coord = ('channel', head2_info['channel'], {'long_name':'channel', 'units':''})
1368
+
1369
+ # create xarray
1370
+ data = data[np.newaxis, ...]
1371
+ varattrs = {
1372
+ 'productCategory': head1_info['productCategory'], # 产品类型, 1:静止, 2:极轨, 3:格点, 4:离散, 5:图形和分析
1373
+ 'formatString': head1_info['formatString'], # 产品格式名称
1374
+ 'qualityFlag': head1_info['qualityFlag'], # 产品质量标识
1375
+ 'satelliteName': head2_info['satelliteName'], # 卫星名称
1376
+ 'flagOfProjection': head2_info['flagOfProjection'], # 投影方式, 0:未投影, 1:兰勃托, 2:麦卡托, 3:极射, 4:等经纬, 5:等面积
1377
+ 'units': units}
1378
+ data = xr.Dataset({
1379
+ 'image':(['time', 'lat', 'lon'], data, varattrs)},
1380
+ coords={ 'time':time_coord, 'lat':lat_coord, 'lon':lon_coord})
1381
+
1382
+ # add attributes
1383
+ data.attrs['Conventions'] = "CF-1.6"
1384
+ data.attrs['Origin'] = 'MICAPS Cassandra Server'
1385
+
1386
+ # cache data
1387
+ if cache:
1388
+ with open(cache_file, 'wb') as f:
1389
+ pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
1390
+
1391
+ # return
1392
+ return data
1393
+ else:
1394
+ print("The productCategory is not supported.")
1395
+ return None
1396
+ else:
1397
+ return None
1398
+ else:
1399
+ return None
1400
+
1401
+
1402
+ def get_fy_awxs(directory, filenames, allExists=True, pbar=False, **kargs):
1403
+ """
1404
+ Retrieve multiple satellite images from MICAPS cassandra service.
1405
+
1406
+ Args:
1407
+ directory (string): the data directory on the service.
1408
+ filenames (list): the list of filenames.
1409
+ allExists (boolean): all files should exist, or return None.
1410
+ pbar (boolean): Show progress bar, default to False.
1411
+ **kargs: key arguments passed to get_fy_awx function.
1412
+ """
1413
+
1414
+ dataset = []
1415
+ if pbar:
1416
+ tqdm_filenames = tqdm(filenames, desc=directory + ": ")
1417
+ else:
1418
+ tqdm_filenames = filenames
1419
+ for filename in tqdm_filenames:
1420
+ data = get_fy_awx(directory, filename=filename, **kargs)
1421
+ if data:
1422
+ dataset.append(data)
1423
+ else:
1424
+ if allExists:
1425
+ warnings.warn("{} doese not exists.".format(directory+'/'+filename))
1426
+ return None
1427
+
1428
+ return xr.concat(dataset, dim='time')
1429
+
1430
+
1431
+ def get_radar_mosaic(directory, filename=None, suffix="*.BIN", cache=True):
1432
+ """
1433
+ 该程序主要用于读取和处理中国气象局CRaMS系统的雷达回波全国拼图数据.
1434
+ :param directory: the data directory on the service
1435
+ :param filename: the data filename, if none, will be the latest file.
1436
+ :param suffix: the filename filter pattern which will be used to
1437
+ find the specified file.
1438
+ :param cache: cache retrieved data to local directory, default is True.
1439
+ :return: xarray object.
1440
+ :Example:
1441
+ >>> data = get_radar_mosaic("RADARMOSAIC/CREF/")
1442
+ """
1443
+
1444
+ # get data file name
1445
+ if filename is None:
1446
+ try:
1447
+ # connect to data service
1448
+ service = GDSDataService()
1449
+ if service.berror == True:
1450
+ return None
1451
+ status, response = service.getLatestDataName(directory, suffix)
1452
+ except ValueError:
1453
+ print('Can not retrieve data from ' + directory)
1454
+ return None
1455
+ StringResult = DataBlock_pb2.StringResult()
1456
+ if status == 200:
1457
+ StringResult.ParseFromString(response)
1458
+ if StringResult is not None:
1459
+ filename = StringResult.name
1460
+ if filename == '':
1461
+ return None
1462
+ else:
1463
+ return None
1464
+
1465
+ # retrieve data from cached file
1466
+ if cache:
1467
+ cache_file = CONFIG.get_cache_file(directory, filename, name="MICAPS_DATA")
1468
+ if cache_file.is_file():
1469
+ with open(cache_file, 'rb') as f:
1470
+ data = pickle.load(f)
1471
+ return data
1472
+
1473
+ # get data contents
1474
+ try:
1475
+ service = GDSDataService()
1476
+ if service.berror == True:
1477
+ return None
1478
+ status, response = service.getData(directory, filename)
1479
+ except ValueError:
1480
+ print('Can not retrieve data' + filename + ' from ' + directory)
1481
+ return None
1482
+ ByteArrayResult = DataBlock_pb2.ByteArrayResult()
1483
+ if status == 200:
1484
+ ByteArrayResult.ParseFromString(response)
1485
+ if ByteArrayResult is not None:
1486
+ byteArray = ByteArrayResult.byteArray
1487
+ if byteArray == b'':
1488
+ print('There is no data ' + filename + ' in ' + directory)
1489
+ return None
1490
+ return byteArray
1491
+ # # define head structure
1492
+ # head_dtype = [
1493
+ # ('description', 'S128'),
1494
+ # # product name, QREF=基本反射率, CREF=组合反射率,
1495
+ # # VIL=液态水含量, OHP=一小时降水
1496
+ # ('name', 'S32'),
1497
+ # ('organization', 'S16'),
1498
+ # ('grid_flag', 'u2'), # 经纬网格数据标识,固定值19532
1499
+ # ('data_byte', 'i2'), # 数据单元字节数,固定值2
1500
+ # ('slat', 'f4'), # 数据区的南纬(度)
1501
+ # ('wlon', 'f4'), # 数据区的西经(度)
1502
+ # ('nlat', 'f4'), # 数据区的北纬(度)
1503
+ # ('elon', 'f4'), # 数据区的东经(度)
1504
+ # ('clat', 'f4'), # 数据区中心纬度(度)
1505
+ # ('clon', 'f4'), # 数据区中心经度(度)
1506
+ # ('rows', 'i4'), # 数据区的行数
1507
+ # ('cols', 'i4'), # 每行数据的列数
1508
+ # ('dlat', 'f4'), # 纬向分辨率(度)
1509
+ # ('dlon', 'f4'), # 经向分辨率(度)
1510
+ # ('nodata', 'f4'), # 无数据区的编码值
1511
+ # ('levelbybtes', 'i4'), # 单层数据字节数
1512
+ # ('levelnum', 'i2'), # 数据层个数
1513
+ # ('amp', 'i2'), # 数值放大系数
1514
+ # ('compmode', 'i2'), # 数据压缩存储时为1,否则为0
1515
+ # ('dates', 'u2'), # 数据观测时间,为1970年1月1日以来的天数
1516
+ # ('seconds', 'i4'), # 数据观测时间的秒数
1517
+ # ('min_value', 'i2'), # 放大后的数据最小取值
1518
+ # ('max_value', 'i2'), # 放大后的数据最大取值
1519
+ # ('reserved', 'i2', 6) # 保留字节
1520
+ # ]
1521
+
1522
+ # # read head information
1523
+ # head_info = np.frombuffer(byteArray[0:256], dtype=head_dtype)
1524
+ # ind = 256
1525
+
1526
+ # # get data information
1527
+ # varname = head_info['name'][0].decode("utf-8", 'ignore').rsplit('\x00')[0]
1528
+ # longname = {'CREF': 'Composite Reflectivity', 'QREF': 'Basic Reflectivity',
1529
+ # 'VIL': 'Vertically Integrated Liquid', 'OHP': 'One Hour Precipitation'}
1530
+ # units = head_info['organization'][0].decode("utf-8", 'ignore').rsplit('\x00')[0]
1531
+ # amp = head_info['amp'][0]
1532
+
1533
+ # # define data variable
1534
+ # rows = head_info['rows'][0]
1535
+ # cols = head_info['cols'][0]
1536
+ # dlat = head_info['dlat'][0]
1537
+ # dlon = head_info['dlon'][0]
1538
+ # data = np.full(rows*cols, -9999, dtype=np.int32)
1539
+
1540
+ # # put data into array
1541
+ # while ind < len(byteArray):
1542
+ # irow = np.frombuffer(byteArray[ind:(ind + 2)], dtype='i2')[0]
1543
+ # ind += 2
1544
+ # icol = np.frombuffer(byteArray[ind:(ind + 2)], dtype='i2')[0]
1545
+ # ind += 2
1546
+ # if irow == -1 or icol == -1:
1547
+ # break
1548
+ # nrec = np.frombuffer(byteArray[ind:(ind + 2)], dtype='i2')[0]
1549
+ # ind += 2
1550
+ # recd = np.frombuffer(
1551
+ # byteArray[ind:(ind + 2*nrec)], dtype='i2', count=nrec)
1552
+ # ind += 2*nrec
1553
+ # position = (irow-1)*cols+icol-1
1554
+ # data[position:(position+nrec)] = recd
1555
+
1556
+ # # reshape data
1557
+ # data.shape = (rows, cols)
1558
+
1559
+ # # deal missing data and restore values
1560
+ # data = data.astype(np.float32)
1561
+ # data[data < 0] = np.nan
1562
+ # data /= amp
1563
+
1564
+ # # set longitude and latitude coordinates
1565
+ # lat = head_info['nlat'][0] - np.arange(rows)*dlat - dlat/2.0
1566
+ # lon = head_info['wlon'][0] + np.arange(cols)*dlon - dlon/2.0
1567
+
1568
+ # # reverse latitude axis
1569
+ # data = np.flip(data, 0)
1570
+ # lat = lat[::-1]
1571
+
1572
+ # # set time coordinates
1573
+ # time = datetime(1970, 1, 1) + timedelta(
1574
+ # days=head_info['dates'][0].astype(np.float64),
1575
+ # seconds=head_info['seconds'][0].astype(np.float64))
1576
+ # time = np.array([time], dtype='datetime64[m]')
1577
+ # data = np.expand_dims(data, axis=0)
1578
+
1579
+ # # define coordinates
1580
+ # time_coord = ('time', time)
1581
+ # lon_coord = ('lon', lon, {
1582
+ # 'long_name':'longitude', 'units':'degrees_east', '_CoordinateAxisType':'Lon'})
1583
+ # lat_coord = ('lat', lat, {
1584
+ # 'long_name':'latitude', 'units':'degrees_north', '_CoordinateAxisType':'Lat'})
1585
+
1586
+ # # create xarray
1587
+ # varattrs = {'long_name': longname.get(varname, 'radar mosaic'),
1588
+ # 'short_name': varname, 'units': units}
1589
+ # data = xr.Dataset({'data':(['time', 'lat', 'lon'], data, varattrs)},
1590
+ # coords={'time':time_coord, 'lat':lat_coord, 'lon':lon_coord})
1591
+
1592
+ # # add attributes
1593
+ # data.attrs['Conventions'] = "CF-1.6"
1594
+ # data.attrs['Origin'] = 'MICAPS Cassandra Server'
1595
+
1596
+ # # cache data
1597
+ # if cache:
1598
+ # with open(cache_file, 'wb') as f:
1599
+ # pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
1600
+
1601
+ # # return
1602
+ # return data
1603
+ else:
1604
+ return None
1605
+ else:
1606
+ return None
1607
+
1608
+ # 朱文剑 获取标准格式雷达基数据产品
1609
+ def get_radar_fmt(directory, filename=None, suffix="*.BZ2", cache=True):
1610
+ """
1611
+ 该程序主要用于读取标准格式的FMT雷达基数据.
1612
+ :param directory: the data directory on the service
1613
+ :param filename: the data filename, if none, will be the latest file.
1614
+ :param suffix: the filename filter pattern which will be used to
1615
+ find the specified file.
1616
+ :param cache: cache retrieved data to local directory, default is True.
1617
+ :return: xarray object.
1618
+ :Example:
1619
+ >>> data = get_radar_fmt("SINGLERADAR/ARCHIVES/PRE_QC/北京大兴")
1620
+ """
1621
+
1622
+ # get data file name
1623
+ if filename is None:
1624
+ try:
1625
+ # connect to data service
1626
+ service = GDSDataService()
1627
+ if service.berror == True:
1628
+ return None
1629
+ status, response = service.getLatestDataName(directory, suffix)
1630
+ except ValueError:
1631
+ print('Can not retrieve data from ' + directory)
1632
+ return None
1633
+ StringResult = DataBlock_pb2.StringResult()
1634
+ if status == 200:
1635
+ StringResult.ParseFromString(response)
1636
+ if StringResult is not None:
1637
+ filename = StringResult.name
1638
+ if filename == '':
1639
+ return None
1640
+ else:
1641
+ return None
1642
+
1643
+ # retrieve data from cached file
1644
+ if cache:
1645
+ cache_file = CONFIG.get_cache_file(directory, filename, name="MICAPS_DATA")
1646
+ if cache_file.is_file():
1647
+ with open(cache_file, 'rb') as f:
1648
+ data = pickle.load(f)
1649
+ return data
1650
+
1651
+ # get data contents
1652
+ try:
1653
+ service = GDSDataService()
1654
+ if service.berror == True:
1655
+ return None
1656
+ status, response = service.getData(directory, filename)
1657
+ except ValueError:
1658
+ print('Can not retrieve data' + filename + ' from ' + directory)
1659
+ return None
1660
+ ByteArrayResult = DataBlock_pb2.ByteArrayResult()
1661
+ if status == 200:
1662
+ ByteArrayResult.ParseFromString(response)
1663
+ if ByteArrayResult is not None:
1664
+ byteArray = ByteArrayResult.byteArray
1665
+ if byteArray == b'':
1666
+ print('There is no data ' + filename + ' in ' + directory)
1667
+ return None
1668
+
1669
+ return byteArray
1670
+
1671
+ else:
1672
+ return None
1673
+ else:
1674
+ return None
1675
+
1676
+ def get_radar_mosaics(directory, filenames, allExists=True, pbar=False, **kargs):
1677
+ """
1678
+ Retrieve multiple radar mosaics from MICAPS cassandra service.
1679
+
1680
+ Args:
1681
+ directory (string): the data directory on the service.
1682
+ filenames (list): the list of filenames.
1683
+ allExists (boolean): all files should exist, or return None.
1684
+ pbar (boolean): Show progress bar, default to False.
1685
+ **kargs: key arguments passed to get_fy_awx function.
1686
+ """
1687
+
1688
+ dataset = []
1689
+ if pbar:
1690
+ tqdm_filenames = tqdm(filenames, desc=directory + ": ")
1691
+ else:
1692
+ tqdm_filenames = filenames
1693
+ for filename in tqdm_filenames:
1694
+ data = get_radar_mosaic(directory, filename=filename, **kargs)
1695
+ if data:
1696
+ dataset.append(data)
1697
+ else:
1698
+ if allExists:
1699
+ warnings.warn("{} doese not exists.".format(directory+'/'+filename))
1700
+ return None
1701
+
1702
+ return xr.concat(dataset, dim='time')
1703
+
1704
+
1705
+ def get_tlogp(directory, filename=None, suffix="*.000", cache=True):
1706
+ """
1707
+ 该程序用于读取micaps服务器上TLOGP数据信息, 文件格式与MICAPS第5类格式相同.
1708
+ :param directory: the data directory on the service
1709
+ :param filename: the data filename, if none, will be the latest file.
1710
+ :param suffix: the filename filter pattern which will be used to
1711
+ find the specified file.
1712
+ :param cache: cache retrieved data to local directory, default is True.
1713
+ :return: pandas DataFrame object.
1714
+ >>> data = get_tlogp("UPPER_AIR/TLOGP/")
1715
+ """
1716
+
1717
+ # get data file name
1718
+ if filename is None:
1719
+ try:
1720
+ # connect to data service
1721
+ service = GDSDataService()
1722
+ if service.berror == True:
1723
+ return None
1724
+ status, response = service.getLatestDataName(directory, suffix)
1725
+ except ValueError:
1726
+ print('Can not retrieve data from ' + directory)
1727
+ return None
1728
+ StringResult = DataBlock_pb2.StringResult()
1729
+ if status == 200:
1730
+ StringResult.ParseFromString(response)
1731
+ if StringResult is not None:
1732
+ filename = StringResult.name
1733
+ if filename == '':
1734
+ return None
1735
+ else:
1736
+ return None
1737
+
1738
+ # retrieve data from cached file
1739
+ if cache:
1740
+ cache_file = CONFIG.get_cache_file(directory, filename, name="MICAPS_DATA")
1741
+ if cache_file.is_file():
1742
+ with open(cache_file, 'rb') as f:
1743
+ records = pickle.load(f)
1744
+ return records
1745
+
1746
+ # get data contents
1747
+ try:
1748
+ service = GDSDataService()
1749
+ if service.berror == True:
1750
+ return None
1751
+ status, response = service.getData(directory, filename)
1752
+ except ValueError:
1753
+ print('Can not retrieve data' + filename + ' from ' + directory)
1754
+ return None
1755
+ ByteArrayResult = DataBlock_pb2.ByteArrayResult()
1756
+ if status == 200:
1757
+ ByteArrayResult.ParseFromString(response)
1758
+ if ByteArrayResult is not None:
1759
+ byteArray = ByteArrayResult.byteArray
1760
+ if byteArray == b'':
1761
+ print('There is no data ' + filename + ' in ' + directory)
1762
+ return None
1763
+
1764
+ # decode bytes to string
1765
+ txt = byteArray.decode("utf-8")
1766
+ txt = list(filter(None, re.split(' |\n', txt)))
1767
+
1768
+ # observation date and time
1769
+ if len(txt[3]) < 4:
1770
+ year = int(txt[3]) + 2000
1771
+ else:
1772
+ year = int(txt[3])
1773
+ month = int(txt[4])
1774
+ day = int(txt[5])
1775
+ hour = int(txt[6])
1776
+ time = datetime(year, month, day, hour)
1777
+
1778
+ # the number of records
1779
+ number = int(txt[7])
1780
+ if number < 1:
1781
+ return None
1782
+
1783
+ # cut the data
1784
+ txt = txt[8:]
1785
+
1786
+ # put the data into dictionary
1787
+ index = 0
1788
+ records = []
1789
+ while index < len(txt):
1790
+ # get the record information
1791
+ ID = txt[index].strip()
1792
+ lon = float(txt[index+1])
1793
+ lat = float(txt[index+2])
1794
+ alt = float(txt[index+3])
1795
+ number = int(int(txt[index+4])/6)
1796
+ index += 5
1797
+
1798
+ # get the sounding records
1799
+ for i in range(number):
1800
+ record = {
1801
+ 'ID': ID, 'lon': lon, 'lat': lat, 'alt': alt,
1802
+ 'time': time,
1803
+ 'p': float(txt[index]), 'h': float(txt[index+1]),
1804
+ 't': float(txt[index+2]), 'td': float(txt[index+3]),
1805
+ 'wind_angle': float(txt[index+4]),
1806
+ 'wind_speed': float(txt[index+5])}
1807
+ records.append(record)
1808
+ index += 6
1809
+
1810
+ # transform to pandas data frame
1811
+ records = pd.DataFrame(records)
1812
+ records.set_index('ID')
1813
+
1814
+ # cache data
1815
+ if cache:
1816
+ with open(cache_file, 'wb') as f:
1817
+ pickle.dump(records, f, protocol=pickle.HIGHEST_PROTOCOL)
1818
+
1819
+ # return
1820
+ return records
1821
+ else:
1822
+ return None
1823
+ else:
1824
+ return None
1825
+
1826
+
1827
+ def get_tlogps(directory, filenames, allExists=True, pbar=False, **kargs):
1828
+ """
1829
+ Retrieve multiple tlog observation from MICAPS cassandra service.
1830
+
1831
+ Args:
1832
+ directory (string): the data directory on the service.
1833
+ filenames (list): the list of filenames.
1834
+ allExists (boolean): all files should exist, or return None.
1835
+ pbar (boolean): Show progress bar, default to False.
1836
+ **kargs: key arguments passed to get_fy_awx function.
1837
+ """
1838
+
1839
+ dataset = []
1840
+ if pbar:
1841
+ tqdm_filenames = tqdm(filenames, desc=directory + ": ")
1842
+ else:
1843
+ tqdm_filenames = filenames
1844
+ for filename in tqdm_filenames:
1845
+ data = get_tlogp(directory, filename=filename, **kargs)
1846
+ if data:
1847
+ dataset.append(data)
1848
+ else:
1849
+ if allExists:
1850
+ warnings.warn("{} doese not exists.".format(directory+'/'+filename))
1851
+ return None
1852
+
1853
+ return pd.concat(dataset)
1854
+
1855
+
1856
+ def get_swan_radar(directory, filename=None, suffix="*.000", scale=[0.1, 0],
1857
+ varattrs={'long_name': 'quantitative_precipitation_forecast', 'short_name': 'QPF', 'units': 'mm'},
1858
+ cache=True, attach_forecast_period=True):
1859
+ """
1860
+ 该程序用于读取micaps服务器上SWAN的D131格点数据格式.
1861
+ refer to https://www.taodocs.com/p-274692126.html
1862
+ :param directory: the data directory on the service
1863
+ :param filename: the data filename, if none, will be the latest file.
1864
+ :param suffix: the filename filter pattern which will be used to
1865
+ find the specified file.
1866
+ :param scale: data value will be scaled = (data + scale[1]) * scale[0], normally,
1867
+ CREF, CAPPI: [0.5, -66]
1868
+ radar echo height, VIL, OHP, ...: [0.1, 0]
1869
+ :param varattrs: dictionary, variable attributes.
1870
+ :param cache: cache retrieved data to local directory, default is True.
1871
+ :return: pandas DataFrame object.
1872
+ >>> data = get_swan_radar("RADARMOSAIC/EXTRAPOLATION/QPF/")
1873
+ """
1874
+
1875
+ # get data file name
1876
+ if filename is None:
1877
+ try:
1878
+ # connect to data service
1879
+ service = GDSDataService()
1880
+ if service.berror == True:
1881
+ return None
1882
+ status, response = service.getLatestDataName(directory, suffix)
1883
+ except ValueError:
1884
+ print('Can not retrieve data from ' + directory)
1885
+ return None
1886
+ StringResult = DataBlock_pb2.StringResult()
1887
+ if status == 200:
1888
+ StringResult.ParseFromString(response)
1889
+ if StringResult is not None:
1890
+ filename = StringResult.name
1891
+ if filename == '':
1892
+ return None
1893
+ else:
1894
+ return None
1895
+
1896
+ # retrieve data from cached file
1897
+ if cache:
1898
+ cache_file = CONFIG.get_cache_file(directory, filename, name="MICAPS_DATA")
1899
+ if cache_file.is_file():
1900
+ with open(cache_file, 'rb') as f:
1901
+ data = pickle.load(f)
1902
+ return data
1903
+
1904
+ # get data contents
1905
+ try:
1906
+ service = GDSDataService()
1907
+ if service.berror == True:
1908
+ return None
1909
+ status, response = service.getData(directory, filename)
1910
+ except ValueError:
1911
+ print('Can not retrieve data' + filename + ' from ' + directory)
1912
+ return None
1913
+ ByteArrayResult = DataBlock_pb2.ByteArrayResult()
1914
+ if status == 200:
1915
+ ByteArrayResult.ParseFromString(response)
1916
+ if ByteArrayResult is not None:
1917
+ byteArray = ByteArrayResult.byteArray
1918
+ if byteArray == b'':
1919
+ print('There is no data ' + filename + ' in ' + directory)
1920
+ return None
1921
+
1922
+ # define head structure
1923
+ head_dtype = [
1924
+ ('ZonName', 'S12'),
1925
+ ('DataName', 'S38'),
1926
+ ('Flag', 'S8'),
1927
+ ('Version', 'S8'),
1928
+ ('year', 'i2'),
1929
+ ('month', 'i2'),
1930
+ ('day', 'i2'),
1931
+ ('hour', 'i2'),
1932
+ ('minute', 'i2'),
1933
+ ('interval', 'i2'),
1934
+ ('XNumGrids', 'i2'),
1935
+ ('YNumGrids', 'i2'),
1936
+ ('ZNumGrids', 'i2'),
1937
+ ('RadarCount', 'i4'),
1938
+ ('StartLon', 'f4'),
1939
+ ('StartLat', 'f4'),
1940
+ ('CenterLon', 'f4'),
1941
+ ('CenterLat', 'f4'),
1942
+ ('XReso', 'f4'),
1943
+ ('YReso', 'f4'),
1944
+ ('ZhighGrids', 'f4', 40),
1945
+ ('RadarStationName', 'S20', 16),
1946
+ ('RadarLongitude', 'f4', 20),
1947
+ ('RadarLatitude', 'f4', 20),
1948
+ ('RadarAltitude', 'f4', 20),
1949
+ ('MosaicFlag', 'S1', 20),
1950
+ ('m_iDataType', 'i2'),
1951
+ ('m_iLevelDimension', 'i2'),
1952
+ ('Reserved', 'S168')]
1953
+
1954
+ # read head information
1955
+ head_info = np.frombuffer(byteArray[0:1024], dtype=head_dtype)
1956
+ ind = 1024
1957
+
1958
+ # get coordinates
1959
+ nlon = head_info['XNumGrids'][0].astype(np.int64)
1960
+ nlat = head_info['YNumGrids'][0].astype(np.int64)
1961
+ nlev = head_info['ZNumGrids'][0].astype(np.int64)
1962
+ dlon = head_info['XReso'][0].astype(np.float)
1963
+ dlat = head_info['YReso'][0].astype(np.float)
1964
+ lat = head_info['StartLat'][0] - np.arange(nlat)*dlat - dlat/2.0
1965
+ lon = head_info['StartLon'][0] + np.arange(nlon)*dlon - dlon/2.0
1966
+ level = head_info['ZhighGrids'][0][0:nlev]
1967
+
1968
+ # retrieve data records
1969
+ data_type = ['u1', 'u1', 'u2', 'i2']
1970
+ data_type = data_type[head_info['m_iDataType'][0]]
1971
+ data_len = (nlon * nlat * nlev)
1972
+ data = np.frombuffer(
1973
+ byteArray[ind:(ind + data_len*int(data_type[1]))],
1974
+ dtype=data_type, count=data_len)
1975
+
1976
+ # convert data type
1977
+ data.shape = (nlev, nlat, nlon)
1978
+ data = data.astype(np.float32)
1979
+ data = (data + scale[1]) * scale[0]
1980
+
1981
+ # reverse latitude axis
1982
+ data = np.flip(data, 1)
1983
+ lat = lat[::-1]
1984
+
1985
+ # set time coordinates
1986
+ init_time = datetime(
1987
+ head_info['year'][0], head_info['month'][0],
1988
+ head_info['day'][0], head_info['hour'][0], head_info['minute'][0])
1989
+ if attach_forecast_period:
1990
+ fhour = int(filename.split('.')[1])/60.0
1991
+ else:
1992
+ fhour = 0
1993
+ fhour = np.array([fhour], dtype=np.float)
1994
+ time = init_time + timedelta(hours=fhour[0])
1995
+ init_time = np.array([init_time], dtype='datetime64[ms]')
1996
+ time = np.array([time], dtype='datetime64[ms]')
1997
+
1998
+ # define coordinates
1999
+ time_coord = ('time', time)
2000
+ lon_coord = ('lon', lon, {
2001
+ 'long_name':'longitude', 'units':'degrees_east', '_CoordinateAxisType':'Lon'})
2002
+ lat_coord = ('lat', lat, {
2003
+ 'long_name':'latitude', 'units':'degrees_north', '_CoordinateAxisType':'Lat'})
2004
+ level_coord = ('level', level, {
2005
+ 'long_name':'height', 'units':'m'})
2006
+
2007
+ # create xarray
2008
+ data = np.expand_dims(data, axis=0)
2009
+ data = xr.Dataset({'data':(['time', 'level', 'lat', 'lon'], data, varattrs)},
2010
+ coords={'time':time_coord, 'level':level_coord, 'lat':lat_coord, 'lon':lon_coord})
2011
+
2012
+ # add time coordinates
2013
+ data.coords['forecast_reference_time'] = init_time[0]
2014
+ data.coords['forecast_period'] = ('time', fhour, {
2015
+ 'long_name':'forecast_period', 'units':'hour'})
2016
+
2017
+ # add attributes
2018
+ data.attrs['Conventions'] = "CF-1.6"
2019
+ data.attrs['Origin'] = 'MICAPS Cassandra Server'
2020
+
2021
+ # cache data
2022
+ if cache:
2023
+ with open(cache_file, 'wb') as f:
2024
+ pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
2025
+
2026
+ # return
2027
+ return data
2028
+ else:
2029
+ return None
2030
+ else:
2031
+ return None
2032
+
2033
+
2034
+ def get_swan_radars(directory, filenames, allExists=True, pbar=False, **kargs):
2035
+ """
2036
+ Retrieve multiple swan 131 radar from MICAPS cassandra service.
2037
+
2038
+ Args:
2039
+ directory (string): the data directory on the service.
2040
+ filenames (list): the list of filenames.
2041
+ allExists (boolean): all files should exist, or return None.
2042
+ pbar (boolean): Show progress bar, default to False.
2043
+ **kargs: key arguments passed to get_fy_awx function.
2044
+ """
2045
+
2046
+ dataset = []
2047
+ if pbar:
2048
+ tqdm_filenames = tqdm(filenames, desc=directory + ": ")
2049
+ else:
2050
+ tqdm_filenames = filenames
2051
+ for filename in tqdm_filenames:
2052
+ data = get_swan_radar(directory, filename=filename, **kargs)
2053
+ if data:
2054
+ dataset.append(data)
2055
+ else:
2056
+ if allExists:
2057
+ warnings.warn("{} doese not exists.".format(directory+'/'+filename))
2058
+ return None
2059
+
2060
+ return xr.concat(dataset, dim='time')
2061
+