geospacelab 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geospacelab/__init__.py +1 -1
- geospacelab/datahub/sources/cdaweb/downloader.py +1 -0
- geospacelab/datahub/sources/madrigal/downloader.py +33 -8
- geospacelab/datahub/sources/madrigal/isr/eiscat/__init__.py +146 -47
- geospacelab/datahub/sources/madrigal/isr/eiscat/downloader.py +235 -9
- geospacelab/datahub/sources/madrigal/isr/eiscat/loader.py +71 -36
- geospacelab/datahub/sources/madrigal/isr/millstonehill/basic/variable_config.py +10 -11
- geospacelab/datahub/sources/madrigal/isr/pfisr/downloader.py +13 -4
- geospacelab/datahub/sources/madrigal/isr/pfisr/fitted/__init__.py +100 -51
- geospacelab/datahub/sources/madrigal/isr/pfisr/fitted/downloader.py +3 -1
- geospacelab/datahub/sources/madrigal/isr/pfisr/fitted/variable_config.py +10 -11
- geospacelab/datahub/sources/madrigal/isr/pfisr/vi/__init__.py +95 -52
- geospacelab/datahub/sources/madrigal/isr/pfisr/vi/downloader.py +1 -1
- geospacelab/datahub/sources/madrigal/isr/pfisr/vi/variable_config.py +3 -1
- geospacelab/datahub/sources/supermag/supermag_api.py +2 -1
- geospacelab/toolbox/utilities/numpyarray.py +137 -1
- geospacelab/visualization/mpl/panels.py +20 -4
- {geospacelab-0.9.4.dist-info → geospacelab-0.10.1.dist-info}/METADATA +2 -2
- {geospacelab-0.9.4.dist-info → geospacelab-0.10.1.dist-info}/RECORD +22 -22
- {geospacelab-0.9.4.dist-info → geospacelab-0.10.1.dist-info}/WHEEL +1 -1
- {geospacelab-0.9.4.dist-info → geospacelab-0.10.1.dist-info}/licenses/LICENSE +0 -0
- {geospacelab-0.9.4.dist-info → geospacelab-0.10.1.dist-info}/top_level.txt +0 -0
geospacelab/__init__.py
CHANGED
|
@@ -6,7 +6,7 @@ __author__ = "Lei Cai"
|
|
|
6
6
|
__copyright__ = "Copyright 2021, GeospaceLAB"
|
|
7
7
|
__credits__ = ["Lei Cai"]
|
|
8
8
|
__license__ = "BSD-3-Clause License"
|
|
9
|
-
__version__ = "0.
|
|
9
|
+
__version__ = "0.10.1"
|
|
10
10
|
__maintainer__ = "Lei Cai"
|
|
11
11
|
__email__ = "lei.cai@oulu.fi"
|
|
12
12
|
__status__ = "Developing"
|
|
@@ -116,6 +116,7 @@ class Downloader(object):
|
|
|
116
116
|
if file_path_local.is_file():
|
|
117
117
|
mylog.simpleinfo.info("The file {} has been downloaded.".format(file_path_local.name))
|
|
118
118
|
if not self.force_download:
|
|
119
|
+
self.done = True
|
|
119
120
|
return
|
|
120
121
|
files_error = []
|
|
121
122
|
mylog.simpleinfo.info("Downloading {} ...".format(file_path_remote))
|
|
@@ -192,8 +193,12 @@ class Downloader(object):
|
|
|
192
193
|
for file in files:
|
|
193
194
|
matching = 0
|
|
194
195
|
for fnp in include_file_name_patterns:
|
|
196
|
+
if isinstance(fnp, str):
|
|
197
|
+
fnp = [fnp]
|
|
195
198
|
if isinstance(fnp, list):
|
|
196
|
-
fnp = '.*' + '.*'.join(fnp) + '.*'
|
|
199
|
+
fnp = r'.*' + '.*'.join(fnp) + '.*'
|
|
200
|
+
else:
|
|
201
|
+
raise AttributeError
|
|
197
202
|
rc = re.compile(fnp)
|
|
198
203
|
file_name = pathlib.Path(file.name).name
|
|
199
204
|
rm = rc.match(file_name.lower())
|
|
@@ -212,8 +217,12 @@ class Downloader(object):
|
|
|
212
217
|
for file in files:
|
|
213
218
|
matching = 1
|
|
214
219
|
for fnp in exclude_file_name_patterns:
|
|
220
|
+
if isinstance(fnp, str):
|
|
221
|
+
fnp = [fnp]
|
|
215
222
|
if isinstance(fnp, list):
|
|
216
|
-
fnp = '.*' + '.*'.join(fnp) + '.*'
|
|
223
|
+
fnp = r'.*' + '.*'.join(fnp) + '.*'
|
|
224
|
+
else:
|
|
225
|
+
raise AttributeError
|
|
217
226
|
rc = re.compile(fnp)
|
|
218
227
|
file_name = pathlib.Path(file.name).name
|
|
219
228
|
rm = rc.match(file_name.lower())
|
|
@@ -232,8 +241,12 @@ class Downloader(object):
|
|
|
232
241
|
for file in files:
|
|
233
242
|
matching = 0
|
|
234
243
|
for fnp in include_file_type_patterns:
|
|
244
|
+
if isinstance(fnp, str):
|
|
245
|
+
fnp = [fnp]
|
|
235
246
|
if isinstance(fnp, list):
|
|
236
|
-
fnp = '.*' + '.*'.join(fnp) + '.*'
|
|
247
|
+
fnp = r'.*' + '.*'.join(fnp) + '.*'
|
|
248
|
+
else:
|
|
249
|
+
raise AttributeError
|
|
237
250
|
rc = re.compile(fnp)
|
|
238
251
|
rm = rc.match(file.kindatdesc.lower())
|
|
239
252
|
if rm is not None:
|
|
@@ -251,8 +264,12 @@ class Downloader(object):
|
|
|
251
264
|
for file in files:
|
|
252
265
|
matching = 1
|
|
253
266
|
for fnp in exclude_file_type_patterns:
|
|
267
|
+
if isinstance(fnp, str):
|
|
268
|
+
fnp = [fnp]
|
|
254
269
|
if isinstance(fnp, list):
|
|
255
|
-
fnp = '.*' + '.*'.join(fnp) + '.*'
|
|
270
|
+
fnp = r'.*' + '.*'.join(fnp) + '.*'
|
|
271
|
+
else:
|
|
272
|
+
raise AttributeError
|
|
256
273
|
rc = re.compile(fnp)
|
|
257
274
|
rm = rc.match(file.kindatdesc.lower())
|
|
258
275
|
if rm is not None:
|
|
@@ -281,7 +298,7 @@ class Downloader(object):
|
|
|
281
298
|
if display:
|
|
282
299
|
mylog.simpleinfo.info("Listing matched experiments and files ...")
|
|
283
300
|
exp_info = Downloader.get_exp_info(exps, include_file_info=True)
|
|
284
|
-
mylog.simpleinfo.info("{:>10s}\t{:<24s}\t{:<24s}\t{:<16s}\t{:<15s}\t{:<40.40s}\t{:<
|
|
301
|
+
mylog.simpleinfo.info("{:>10s}\t{:<24s}\t{:<24s}\t{:<16s}\t{:<15s}\t{:<40.40s}\t{:<50.50s}\t{:<80.80s}".format(
|
|
285
302
|
'EXP NUM', 'START TIME', 'END TIME', 'DURATION (hour)', 'EXP ID', 'EXP Name', 'File Name', 'File Type'
|
|
286
303
|
)
|
|
287
304
|
)
|
|
@@ -293,7 +310,7 @@ class Downloader(object):
|
|
|
293
310
|
):
|
|
294
311
|
|
|
295
312
|
for file in exp_info['FILES'][ind]:
|
|
296
|
-
line_str = "{:>10d}\t{:<24s}\t{:<24s}\t{:<16.1f}\t{:<15d}\t{:<40.40s}\t{:<
|
|
313
|
+
line_str = "{:>10d}\t{:<24s}\t{:<24s}\t{:<16.1f}\t{:<15d}\t{:<40.40s}\t{:<50.50s}\t{:<80.80s}".format(
|
|
297
314
|
ind + 1,
|
|
298
315
|
dt_fr.strftime("%Y-%m-%d %H:%M:%S"),
|
|
299
316
|
dt_to.strftime("%Y-%m-%d %H:%M:%S"),
|
|
@@ -437,8 +454,12 @@ class Downloader(object):
|
|
|
437
454
|
for exp in exps:
|
|
438
455
|
matching = 0
|
|
439
456
|
for enp in include_exp_name_patterns:
|
|
457
|
+
if isinstance(enp, str):
|
|
458
|
+
enp = [enp]
|
|
440
459
|
if isinstance(enp, list):
|
|
441
|
-
enp = '.*' + '.*'.join(enp) + '.*'
|
|
460
|
+
enp = r'.*' + '.*'.join(enp) + '.*'
|
|
461
|
+
else:
|
|
462
|
+
raise AttributeError
|
|
442
463
|
rc = re.compile(enp)
|
|
443
464
|
rm = rc.match(exp.name.lower())
|
|
444
465
|
if rm is not None:
|
|
@@ -456,8 +477,12 @@ class Downloader(object):
|
|
|
456
477
|
for exp in exps:
|
|
457
478
|
matching = 1
|
|
458
479
|
for enp in exclude_exp_name_patterns:
|
|
480
|
+
if isinstance(enp, str):
|
|
481
|
+
enp = [enp]
|
|
459
482
|
if isinstance(enp, list):
|
|
460
|
-
enp = '.*' + '.*'.join(enp) + '.*'
|
|
483
|
+
enp = r'.*' + '.*'.join(enp) + '.*'
|
|
484
|
+
else:
|
|
485
|
+
raise AttributeError
|
|
461
486
|
rc = re.compile(enp)
|
|
462
487
|
rm = rc.match(exp.name.lower())
|
|
463
488
|
if rm is not None:
|
|
@@ -10,6 +10,8 @@ __docformat__ = "reStructureText"
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
import datetime
|
|
13
|
+
import re
|
|
14
|
+
|
|
13
15
|
import numpy as np
|
|
14
16
|
|
|
15
17
|
import geospacelab.datahub as datahub
|
|
@@ -27,9 +29,9 @@ default_dataset_attrs = {
|
|
|
27
29
|
'kind': 'sourced',
|
|
28
30
|
'database': madrigal_database,
|
|
29
31
|
'facility': 'EISCAT',
|
|
30
|
-
'data_file_type': '
|
|
32
|
+
'data_file_type': 'madrigal-hdf5',
|
|
31
33
|
'data_file_ext': 'hdf5',
|
|
32
|
-
'data_root_dir': prf.datahub_data_root_dir / 'Madrigal' / 'EISCAT' / '
|
|
34
|
+
'data_root_dir': prf.datahub_data_root_dir / 'Madrigal' / 'EISCAT' / 'Analyzed',
|
|
33
35
|
'allow_download': True,
|
|
34
36
|
'status_control': False,
|
|
35
37
|
'rasidual_contorl': False,
|
|
@@ -49,7 +51,7 @@ default_variable_names = [
|
|
|
49
51
|
|
|
50
52
|
# default_data_search_recursive = True
|
|
51
53
|
|
|
52
|
-
default_attrs_required = ['site', 'antenna',
|
|
54
|
+
default_attrs_required = ['site', 'antenna',]
|
|
53
55
|
|
|
54
56
|
|
|
55
57
|
class Dataset(datahub.DatasetSourced):
|
|
@@ -63,13 +65,15 @@ class Dataset(datahub.DatasetSourced):
|
|
|
63
65
|
self.site = kwargs.pop('site', '')
|
|
64
66
|
self.antenna = kwargs.pop('antenna', '')
|
|
65
67
|
self.experiment = kwargs.pop('experiment', '')
|
|
68
|
+
self.experiment_ids = kwargs.pop('exp_ids', [])
|
|
66
69
|
self.pulse_code = kwargs.pop('pulse_code', '')
|
|
67
70
|
self.scan_mode = kwargs.pop('scan_mode', '')
|
|
68
71
|
self.modulation = kwargs.pop('modulation', '')
|
|
69
72
|
self.data_file_type = kwargs.pop('data_file_type', '')
|
|
70
73
|
self.affiliation = kwargs.pop('affiliation', '')
|
|
71
74
|
self.allow_download = kwargs.pop('allow_download', True)
|
|
72
|
-
self.
|
|
75
|
+
self.gate_num = kwargs.pop('gate_num', None)
|
|
76
|
+
self.metadata = {}
|
|
73
77
|
self.add_AACGM = kwargs.pop('add_AACGM', True)
|
|
74
78
|
self.add_APEX = kwargs.pop('add_APEX', False)
|
|
75
79
|
|
|
@@ -102,6 +106,8 @@ class Dataset(datahub.DatasetSourced):
|
|
|
102
106
|
|
|
103
107
|
if str(self.data_file_type):
|
|
104
108
|
self.data_file_ext = self.data_file_type.split('-')[1]
|
|
109
|
+
if (self.load_mode == 'AUTO') and (self.data_file_type=='eiscat-mat'):
|
|
110
|
+
raise AttributeError
|
|
105
111
|
|
|
106
112
|
def label(self, **kwargs):
|
|
107
113
|
label = super().label()
|
|
@@ -111,7 +117,9 @@ class Dataset(datahub.DatasetSourced):
|
|
|
111
117
|
self.check_data_files(**kwargs)
|
|
112
118
|
|
|
113
119
|
for file_path in self.data_file_paths:
|
|
114
|
-
load_obj = self.loader(file_path, file_type=self.data_file_type)
|
|
120
|
+
load_obj = self.loader(file_path, file_type=self.data_file_type, gate_num=self.gate_num)
|
|
121
|
+
if self.gate_num is None:
|
|
122
|
+
self.gate_num = load_obj.gate_num
|
|
115
123
|
|
|
116
124
|
for var_name in self._variables.keys():
|
|
117
125
|
self._variables[var_name].join(load_obj.variables[var_name])
|
|
@@ -125,6 +133,12 @@ class Dataset(datahub.DatasetSourced):
|
|
|
125
133
|
self.experiment = rawdata_path.split('/')[-1].split('@')[0]
|
|
126
134
|
self.affiliation = load_obj.metadata['affiliation']
|
|
127
135
|
self.metadata = load_obj.metadata
|
|
136
|
+
|
|
137
|
+
inds_cmb = np.argsort(self['DATETIME'].flatten())
|
|
138
|
+
if any(np.diff(np.array(inds_cmb))<0):
|
|
139
|
+
for var_name in self.keys():
|
|
140
|
+
self[var_name].value = self[var_name].value[inds_cmb, :]
|
|
141
|
+
|
|
128
142
|
if self.add_AACGM or self.add_APEX:
|
|
129
143
|
self.calc_lat_lon()
|
|
130
144
|
# self.select_beams(field_aligned=True)
|
|
@@ -253,57 +267,142 @@ class Dataset(datahub.DatasetSourced):
|
|
|
253
267
|
def search_data_files(self, **kwargs):
|
|
254
268
|
dt_fr = self.dt_fr
|
|
255
269
|
dt_to = self.dt_to
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
file_patterns
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
270
|
+
done = False
|
|
271
|
+
if not list(self.experiment_ids):
|
|
272
|
+
diff_days = dttool.get_diff_days(dt_fr, dt_to)
|
|
273
|
+
day0 = dttool.get_start_of_the_day(dt_fr)
|
|
274
|
+
for i in range(diff_days + 1):
|
|
275
|
+
thisday = day0 + datetime.timedelta(days=i)
|
|
276
|
+
initial_file_dir = self.data_root_dir / self.site / thisday.strftime('%Y')
|
|
277
|
+
|
|
278
|
+
file_patterns = []
|
|
279
|
+
if self.data_file_type == 'eiscat-hdf5':
|
|
280
|
+
file_patterns.append('EISCAT')
|
|
281
|
+
elif self.data_file_type == 'madrigal-hdf5':
|
|
282
|
+
file_patterns.append('MAD6400')
|
|
283
|
+
else:
|
|
284
|
+
raise NotImplementedError
|
|
285
|
+
file_patterns.append(thisday.strftime('%Y-%m-%d'))
|
|
286
|
+
if str(self.pulse_code):
|
|
287
|
+
file_patterns.append(self.pulse_code)
|
|
288
|
+
if str(self.modulation):
|
|
289
|
+
file_patterns.append(self.modulation)
|
|
290
|
+
file_patterns.append(self.antenna.lower())
|
|
291
|
+
|
|
292
|
+
# remove empty str
|
|
293
|
+
file_patterns = [pattern for pattern in file_patterns if str(pattern)]
|
|
294
|
+
|
|
295
|
+
search_pattern = '*' + '*'.join(file_patterns) + '*'
|
|
296
|
+
done = super().search_data_files(
|
|
297
|
+
initial_file_dir=initial_file_dir,
|
|
298
|
+
search_pattern=search_pattern, allow_multiple_files=True)
|
|
299
|
+
|
|
300
|
+
# Validate file paths
|
|
301
|
+
if not done and self.allow_download:
|
|
302
|
+
done = self.download_data()
|
|
303
|
+
if done:
|
|
304
|
+
done = super().search_data_files(
|
|
305
|
+
initial_file_dir=initial_file_dir,
|
|
306
|
+
search_pattern=search_pattern,
|
|
307
|
+
allow_multiple_files=True
|
|
308
|
+
)
|
|
309
|
+
else:
|
|
310
|
+
print('Cannot find files from the online database!')
|
|
311
|
+
else:
|
|
312
|
+
initial_file_dir = self.data_root_dir
|
|
313
|
+
for exp_id in self.experiment_ids:
|
|
314
|
+
file_patterns = []
|
|
315
|
+
if self.data_file_type == 'eiscat-hdf5':
|
|
316
|
+
file_patterns.append('EISCAT')
|
|
317
|
+
elif self.data_file_type == 'madrigal-hdf5':
|
|
318
|
+
file_patterns.append('MAD')
|
|
289
319
|
else:
|
|
290
|
-
|
|
320
|
+
raise NotImplementedError
|
|
321
|
+
file_patterns.append(thisday.strftime('%Y-%m-%d'))
|
|
322
|
+
if str(self.pulse_code):
|
|
323
|
+
file_patterns.append(self.pulse_code)
|
|
324
|
+
if str(self.modulation):
|
|
325
|
+
file_patterns.append(self.modulation)
|
|
326
|
+
file_patterns.append(self.antenna.lower())
|
|
327
|
+
# remove empty str
|
|
328
|
+
file_patterns = [pattern for pattern in file_patterns if str(pattern)]
|
|
329
|
+
search_pattern = f"*EID-{exp_id}*/*{'*'.join(file_patterns)}*"
|
|
330
|
+
done = super().search_data_files(
|
|
331
|
+
initial_file_dir=initial_file_dir,
|
|
332
|
+
search_pattern=search_pattern, recursive=True, allow_multiple_files=True
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
if not done and self.allow_download:
|
|
336
|
+
done = self.download_data()
|
|
337
|
+
if done:
|
|
338
|
+
done = super().search_data_files(
|
|
339
|
+
initial_file_dir=initial_file_dir,
|
|
340
|
+
search_pattern=search_pattern, recursive=True, allow_multiple_files=True
|
|
341
|
+
)
|
|
342
|
+
else:
|
|
343
|
+
print('The requested experiment (ID: {}) does not exist in the online database!'.format(exp_id))
|
|
344
|
+
if len(done) > 1:
|
|
345
|
+
mylog.StreamLogger.warning(
|
|
346
|
+
"Multiple data files detected! " +
|
|
347
|
+
"Specify the experiment pulse code and modulation may constrain the searching condition.")
|
|
348
|
+
for fp in done:
|
|
349
|
+
mylog.simpleinfo.info(str(fp))
|
|
350
|
+
self._check_multiple_files()
|
|
291
351
|
|
|
292
352
|
return done
|
|
293
353
|
|
|
354
|
+
def _check_multiple_files(self):
|
|
355
|
+
file_paths = self.data_file_paths
|
|
356
|
+
exp_ids = []
|
|
357
|
+
for fp in file_paths:
|
|
358
|
+
rc = re.compile(r"EID\-([\d]+)")
|
|
359
|
+
res = rc.search(str(fp))
|
|
360
|
+
exp_ids.append(res.groups()[0])
|
|
361
|
+
exp_ids_unique = [eid for eid in np.unique(exp_ids)]
|
|
362
|
+
|
|
363
|
+
file_paths_new = []
|
|
364
|
+
for eid in exp_ids_unique:
|
|
365
|
+
inds_id = np.where(np.array(exp_ids)==eid)[0]
|
|
366
|
+
fps_sub = []
|
|
367
|
+
for ii in inds_id:
|
|
368
|
+
fp = file_paths[ii]
|
|
369
|
+
rc = re.compile(r".*_([\d]{8}T[\d]{6}).*_([\d]{8}T[\d]{6}).*[\d]{4}\-[\d]{2}\-[\d]{2}_([\w.]+)@.*")
|
|
370
|
+
res = rc.search(str(fp))
|
|
371
|
+
dt_0 = datetime.datetime.strptime(res.groups()[0], '%Y%m%dT%H%M%S')
|
|
372
|
+
dt_1 = datetime.datetime.strptime(res.groups()[1], '%Y%m%dT%H%M%S')
|
|
373
|
+
if (dt_0 >= self.dt_to) or (dt_1<=self.dt_fr):
|
|
374
|
+
continue
|
|
375
|
+
if str(self.pulse_code):
|
|
376
|
+
if self.pulse_code not in res.groups()[2].lower():
|
|
377
|
+
continue
|
|
378
|
+
if str(self.modulation):
|
|
379
|
+
if self.modulation not in res.groups()[2].lower():
|
|
380
|
+
continue
|
|
381
|
+
if '_v' in res.groups()[2].lower():
|
|
382
|
+
continue
|
|
383
|
+
fps_sub.extend([fp])
|
|
384
|
+
if len(fps_sub) > 1:
|
|
385
|
+
mylog.StreamLogger.warning("Multiple data files for a single experiment detected!")
|
|
386
|
+
# for fp in fps_sub:
|
|
387
|
+
# mylog.simpleinfo.info(str(fp))
|
|
388
|
+
# fps_sub = fps_sub[0]
|
|
389
|
+
file_paths_new.extend(fps_sub)
|
|
390
|
+
self.data_file_paths = file_paths_new
|
|
391
|
+
|
|
392
|
+
|
|
294
393
|
def download_data(self):
|
|
295
394
|
if self.data_file_type == 'eiscat-hdf5':
|
|
296
395
|
download_obj = self.downloader(dt_fr=self.dt_fr, dt_to=self.dt_to,
|
|
297
|
-
|
|
298
|
-
data_file_root_dir=self.data_root_dir
|
|
396
|
+
antennas=[self.antenna], kind_data='eiscat',
|
|
397
|
+
data_file_root_dir=self.data_root_dir,
|
|
398
|
+
exclude_file_type_patterns=['pp']
|
|
399
|
+
)
|
|
299
400
|
elif self.data_file_type == 'madrigal-hdf5':
|
|
300
401
|
download_obj = self.downloader(dt_fr=self.dt_fr, dt_to=self.dt_to,
|
|
301
|
-
|
|
302
|
-
data_file_root_dir=self.data_root_dir
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
sites=[self.site], kind_data='eiscat',
|
|
306
|
-
data_file_root_dir=self.data_root_dir)
|
|
402
|
+
antennas=[self.antenna], kind_data='madrigal',
|
|
403
|
+
data_file_root_dir=self.data_root_dir,
|
|
404
|
+
exclude_file_type_patterns=['pp']
|
|
405
|
+
)
|
|
307
406
|
else:
|
|
308
407
|
raise TypeError
|
|
309
408
|
return download_obj.done
|
|
@@ -28,17 +28,242 @@ import geospacelab.datahub.sources.madrigal as madrigal
|
|
|
28
28
|
import geospacelab.toolbox.utilities.pylogging as mylog
|
|
29
29
|
import geospacelab.toolbox.utilities.pydatetime as dttool
|
|
30
30
|
|
|
31
|
+
from geospacelab.datahub.sources.madrigal.downloader import Downloader as DownloaderBase
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Downloader(DownloaderBase):
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
dt_fr: datetime.datetime, dt_to: datetime,
|
|
39
|
+
antennas=None,
|
|
40
|
+
data_file_root_dir=None,
|
|
41
|
+
kind_data="madrigal",
|
|
42
|
+
user_fullname=madrigal.default_user_fullname,
|
|
43
|
+
user_email=madrigal.default_user_email,
|
|
44
|
+
user_affiliation=madrigal.default_user_affiliation,
|
|
45
|
+
include_exp_name_patterns: list=None,
|
|
46
|
+
exclude_exp_name_patterns: list=None,
|
|
47
|
+
include_exp_ids: list=None,
|
|
48
|
+
exclude_exp_ids: list=None,
|
|
49
|
+
include_file_name_patterns: list = None,
|
|
50
|
+
exclude_file_name_patterns: list = None,
|
|
51
|
+
include_file_type_patterns=None,
|
|
52
|
+
exclude_file_type_patterns=None,
|
|
53
|
+
direct_download = True,
|
|
54
|
+
force_download = False,
|
|
55
|
+
dry_run: bool=False,
|
|
56
|
+
madrigal_url: str = "http://madrigal.eiscat.se/",
|
|
57
|
+
):
|
|
58
|
+
|
|
59
|
+
# dt_fr = dttool.get_start_of_the_day(dt_fr)
|
|
60
|
+
# dt_to = dttool.get_end_of_the_day(dt_to)
|
|
61
|
+
|
|
62
|
+
icodes = []
|
|
63
|
+
for a in antennas:
|
|
64
|
+
icodes.extend(instrument_codes[a])
|
|
65
|
+
icodes = [int(i) for i in np.unique(icodes)]
|
|
66
|
+
|
|
67
|
+
self.kind_data=kind_data
|
|
31
68
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
# download_obj = Downloader(dt_fr, dt_to, sites=sites, kind_data="madrigal")
|
|
37
|
-
# schedule = EISCATSchedule(dt_fr=dt_fr, dt_to=dt_to)
|
|
38
|
-
# schedule.to_txt()
|
|
69
|
+
if data_file_root_dir is None:
|
|
70
|
+
self.data_file_root_dir = pfr.datahub_data_root_dir / 'Madrigal' / 'EISCAT' / 'Analyzed'
|
|
71
|
+
else:
|
|
72
|
+
self.data_file_root_dir = data_file_root_dir
|
|
39
73
|
|
|
74
|
+
super().__init__(
|
|
75
|
+
dt_fr=dt_fr, dt_to=dt_to, icodes=icodes,
|
|
76
|
+
include_exp_name_patterns=include_exp_name_patterns,
|
|
77
|
+
exclude_exp_name_patterns=exclude_exp_name_patterns,
|
|
78
|
+
include_exp_ids=include_exp_ids,
|
|
79
|
+
exclude_exp_ids=exclude_exp_ids,
|
|
80
|
+
include_file_name_patterns=include_file_name_patterns,
|
|
81
|
+
exclude_file_name_patterns=exclude_file_name_patterns,
|
|
82
|
+
include_file_type_patterns=include_file_type_patterns,
|
|
83
|
+
exclude_file_type_patterns=exclude_file_type_patterns,
|
|
84
|
+
data_file_root_dir=data_file_root_dir,
|
|
85
|
+
force_download=force_download, direct_download=direct_download, dry_run=dry_run,
|
|
86
|
+
madrigal_url=madrigal_url,
|
|
87
|
+
user_fullname=user_fullname, user_email=user_email, user_affiliation=user_affiliation)
|
|
88
|
+
|
|
89
|
+
def download(self, **kwargs):
|
|
90
|
+
if self.kind_data.lower() == 'eiscat':
|
|
91
|
+
self.download_EISCAT()
|
|
92
|
+
elif self.kind_data.lower() == 'madrigal':
|
|
93
|
+
self.download_MAD()
|
|
94
|
+
else:
|
|
95
|
+
raise NotImplementedError
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
def download_EISCAT(self):
|
|
99
|
+
exps, database = self.get_exp_list(
|
|
100
|
+
dt_fr=dttool.get_start_of_the_day(self.dt_fr),
|
|
101
|
+
dt_to=dttool.get_end_of_the_day(self.dt_to),
|
|
102
|
+
include_exp_name_patterns=self.include_exp_name_patterns,
|
|
103
|
+
exclude_exp_name_patterns=self.exclude_exp_name_patterns,
|
|
104
|
+
include_exp_ids=self.include_exp_ids,
|
|
105
|
+
exclude_exp_ids=self.exclude_exp_ids,
|
|
106
|
+
icodes=self.icodes,
|
|
107
|
+
madrigal_url=self.madrigal_url,
|
|
108
|
+
display=True)
|
|
109
|
+
self.exp_list = list(exps)
|
|
110
|
+
self.database = database
|
|
111
|
+
|
|
112
|
+
cookies = {
|
|
113
|
+
'user_email': self.user_email,
|
|
114
|
+
'user_fullname': self.user_fullname,
|
|
115
|
+
'user_affiliation': self.user_affiliation
|
|
116
|
+
}
|
|
117
|
+
for exp in exps:
|
|
118
|
+
dt_fr_exp = datetime.datetime(
|
|
119
|
+
exp.startyear, exp.startmonth, exp.startday, exp.starthour, exp.startmin, exp.startsec
|
|
120
|
+
)
|
|
121
|
+
dt_to_exp = datetime.datetime(
|
|
122
|
+
exp.endyear, exp.endmonth, exp.endday, exp.endhour, exp.endmin, exp.endsec
|
|
123
|
+
)
|
|
124
|
+
if (dt_fr_exp >= self.dt_to) or (dt_to_exp <= self.dt_fr):
|
|
125
|
+
continue
|
|
126
|
+
try:
|
|
127
|
+
res = re.search(r'([\d]{4}\-[\d]{2}\-[\d]{2})_(\w+)@(\w+)', exp.name)
|
|
128
|
+
thisday = datetime.datetime.strptime(res.groups()[0], "%Y-%m-%d")
|
|
129
|
+
pulse_code = res.groups()[1]
|
|
130
|
+
antenna_ = res.groups()[2]
|
|
131
|
+
if 'uhf' in antenna_.lower():
|
|
132
|
+
antenna = 'UHF'
|
|
133
|
+
elif 'vhf' in antenna_.lower():
|
|
134
|
+
antenna = 'VHF'
|
|
135
|
+
elif any([a in antenna_.lower() for a in ['32m', '42m', 'esr']]):
|
|
136
|
+
antenna = 'ESR'
|
|
137
|
+
elif 'sod' in antenna_.lower():
|
|
138
|
+
antenna = 'SOD'
|
|
139
|
+
elif 'kir' in antenna_.lower():
|
|
140
|
+
antenna = 'KIR'
|
|
141
|
+
else:
|
|
142
|
+
raise NotImplementedError
|
|
143
|
+
except Exception as e:
|
|
144
|
+
print(e)
|
|
145
|
+
mylog.StreamLogger.warning("Parsing the experiment name was failed! EXP: {}".format(exp.name))
|
|
146
|
+
|
|
147
|
+
url = "https://madrigal.eiscat.se/madrigal/showExperiment?experiment_list=" + str(exp.id) + "&show_plots="
|
|
148
|
+
|
|
149
|
+
r = requests.get(url, cookies=cookies)
|
|
150
|
+
soup = bs4.BeautifulSoup(r.text, 'html.parser')
|
|
151
|
+
links = soup.find_all('a', href=True)
|
|
152
|
+
for link in links:
|
|
153
|
+
href = link['href']
|
|
154
|
+
if any(href.endswith(s) for s in ['.png', '.tar.gz', '.hdf5']):
|
|
155
|
+
filename = href.split('/')[-1]
|
|
156
|
+
file_dir_local = self.data_file_root_dir / antenna / thisday.strftime("%Y") / \
|
|
157
|
+
(exp.name + '_EID-' + str(exp.id) + '_'
|
|
158
|
+
+ dt_fr_exp.strftime("%Y%m%dT%H%M%S") + '_'
|
|
159
|
+
+ dt_to_exp.strftime("%Y%m%dT%H%M%S"))
|
|
160
|
+
file_dir_local.mkdir(parents=True, exist_ok=True)
|
|
161
|
+
|
|
162
|
+
remote_file = requests.get(href)
|
|
163
|
+
file_path = file_dir_local / filename
|
|
164
|
+
if file_path.is_file() and not self.force_download:
|
|
165
|
+
print("The file {} has been downloaded.".format(filename))
|
|
166
|
+
self.done=True
|
|
167
|
+
continue
|
|
168
|
+
mylog.simpleinfo.info(
|
|
169
|
+
'Downloading "{} ..."'.format(filename)
|
|
170
|
+
)
|
|
171
|
+
with open(file_path, "wb") as eiscat:
|
|
172
|
+
eiscat.write(remote_file.content)
|
|
173
|
+
mylog.simpleinfo.info('Saved to {}.'.format(file_dir_local))
|
|
174
|
+
self.done = True
|
|
175
|
+
return
|
|
176
|
+
|
|
177
|
+
def download_MAD(self, **kwargs):
|
|
178
|
+
|
|
179
|
+
exps, database = self.get_exp_list(
|
|
180
|
+
dt_fr=dttool.get_start_of_the_day(self.dt_fr),
|
|
181
|
+
dt_to=dttool.get_end_of_the_day(self.dt_to),
|
|
182
|
+
include_exp_name_patterns=self.include_exp_name_patterns,
|
|
183
|
+
exclude_exp_name_patterns=self.exclude_exp_name_patterns,
|
|
184
|
+
include_exp_ids=self.include_exp_ids,
|
|
185
|
+
exclude_exp_ids=self.exclude_exp_ids,
|
|
186
|
+
icodes=self.icodes,
|
|
187
|
+
madrigal_url=self.madrigal_url,
|
|
188
|
+
display=True)
|
|
189
|
+
self.exp_list = list(exps)
|
|
190
|
+
self.database = database
|
|
191
|
+
|
|
192
|
+
exps, exps_error = self.get_online_file_list(
|
|
193
|
+
exp_list=self.exp_list, database=database,
|
|
194
|
+
include_file_name_patterns=self.include_file_name_patterns,
|
|
195
|
+
exclude_file_name_patterns=self.exclude_file_name_patterns,
|
|
196
|
+
include_file_type_patterns=self.include_file_type_patterns,
|
|
197
|
+
exclude_file_type_patterns=self.exclude_file_type_patterns,
|
|
198
|
+
display=True
|
|
199
|
+
)
|
|
200
|
+
self.exp_list_error = list(exps_error)
|
|
201
|
+
|
|
202
|
+
file_paths = []
|
|
203
|
+
for exp in exps:
|
|
204
|
+
dt_fr_exp = datetime.datetime(
|
|
205
|
+
exp.startyear, exp.startmonth, exp.startday, exp.starthour, exp.startmin, exp.startsec
|
|
206
|
+
)
|
|
207
|
+
dt_to_exp = datetime.datetime(
|
|
208
|
+
exp.endyear, exp.endmonth, exp.endday, exp.endhour, exp.endmin, exp.endsec
|
|
209
|
+
)
|
|
210
|
+
# if (dt_fr_exp >= self.dt_to) or (dt_to_exp <= self.dt_fr):
|
|
211
|
+
# continue
|
|
212
|
+
for file in list(exp.files):
|
|
213
|
+
|
|
214
|
+
file_path_remote = pathlib.Path(file.name)
|
|
215
|
+
file_name_remote = file_path_remote.name
|
|
216
|
+
|
|
217
|
+
try:
|
|
218
|
+
res = re.search(r'([\d]{4}\-[\d]{2}\-[\d]{2})_(\w+)@(\w+)', exp.name)
|
|
219
|
+
thisday = datetime.datetime.strptime(res.groups()[0], "%Y-%m-%d")
|
|
220
|
+
pulse_code = res.groups()[1]
|
|
221
|
+
antenna_ = res.groups()[2]
|
|
222
|
+
if 'uhf' in antenna_.lower():
|
|
223
|
+
antenna = 'UHF'
|
|
224
|
+
elif 'vhf' in antenna_.lower():
|
|
225
|
+
antenna = 'VHF'
|
|
226
|
+
elif any([a in antenna_.lower() for a in ['32m', '42m', 'esr']]):
|
|
227
|
+
antenna = 'ESR'
|
|
228
|
+
elif 'sod' in antenna_.lower():
|
|
229
|
+
antenna = 'SOD'
|
|
230
|
+
elif 'kir' in antenna_.lower():
|
|
231
|
+
antenna = 'KIR'
|
|
232
|
+
else:
|
|
233
|
+
raise NotImplementedError
|
|
234
|
+
except Exception as e:
|
|
235
|
+
print(e)
|
|
236
|
+
mylog.StreamLogger.warning("Parsing the experiment name was failed! EXP: {}".format(exp.name))
|
|
237
|
+
|
|
238
|
+
file_dir_local = self.data_file_root_dir / antenna / thisday.strftime("%Y") / \
|
|
239
|
+
(exp.name + '_EID-' + str(exp.id) + '_'
|
|
240
|
+
+ dt_fr_exp.strftime("%Y%m%dT%H%M%S") + '_'
|
|
241
|
+
+ dt_to_exp.strftime("%Y%m%dT%H%M%S"))
|
|
242
|
+
file_dir_local.mkdir(parents=True, exist_ok=True)
|
|
40
243
|
|
|
41
|
-
|
|
244
|
+
file_name_local = file_name_remote
|
|
245
|
+
file_path_local = file_dir_local / file_name_local
|
|
246
|
+
|
|
247
|
+
super().download(
|
|
248
|
+
file_path_remote=file.name, file_path_local=file_path_local,
|
|
249
|
+
file_format='hdf5')
|
|
250
|
+
file_paths.append(file_path_local)
|
|
251
|
+
return file_paths
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def test():
|
|
255
|
+
dt_0 = datetime.datetime(2016, 3, 9, 19)
|
|
256
|
+
dt_1 = datetime.datetime(2016, 3, 9, 23, 59)
|
|
257
|
+
downloader = DownloaderNew(
|
|
258
|
+
dt_fr=dt_0, dt_to=dt_1,
|
|
259
|
+
kind_data='EISCAT',
|
|
260
|
+
antennas=['UHF', 'VHF'],
|
|
261
|
+
force_download=True,
|
|
262
|
+
exclude_file_type_patterns=['pp']
|
|
263
|
+
)
|
|
264
|
+
pass
|
|
265
|
+
|
|
266
|
+
class DownloaderOld(object):
|
|
42
267
|
"""Download the quickplots and archieved analyzed results from EISCAT schedule webpage
|
|
43
268
|
"""
|
|
44
269
|
|
|
@@ -220,7 +445,8 @@ class Downloader(object):
|
|
|
220
445
|
instrument_codes = {
|
|
221
446
|
'UHF': [72],
|
|
222
447
|
'VHF': [74],
|
|
223
|
-
'
|
|
448
|
+
'42m': [95],
|
|
449
|
+
'32m': [95],
|
|
224
450
|
'SOD': [73, 76],
|
|
225
451
|
'KIR': [71, 75]
|
|
226
452
|
}
|