geospacelab 0.11.4__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geospacelab/__init__.py +1 -1
- geospacelab/datahub/sources/cdaweb/dmsp/ssusi/downloader.py +103 -0
- geospacelab/datahub/sources/cdaweb/dmsp/ssusi/edr_aur/__init__.py +17 -7
- geospacelab/datahub/sources/cdaweb/dmsp/ssusi/edr_aur/downloader.py +13 -62
- geospacelab/datahub/sources/cdaweb/dmsp/ssusi/sdr_disk/__init__.py +317 -0
- geospacelab/datahub/sources/cdaweb/dmsp/ssusi/sdr_disk/downloader.py +44 -0
- geospacelab/datahub/sources/cdaweb/dmsp/ssusi/sdr_disk/loader.py +198 -0
- geospacelab/datahub/sources/cdaweb/dmsp/ssusi/sdr_disk/variable_config.py +149 -0
- geospacelab/datahub/sources/cdaweb/downloader.py +396 -97
- geospacelab/datahub/sources/cdaweb/downloader_backup.py +93 -0
- geospacelab/datahub/sources/cdaweb/omni/__init__.py +26 -14
- geospacelab/datahub/sources/cdaweb/omni/downloader.py +97 -84
- geospacelab/datahub/sources/esa_eo/swarm/advanced/efi_tct02/__init__.py +1 -1
- geospacelab/datahub/sources/esa_eo/swarm/advanced/efi_tct02/downloader.py +1 -1
- geospacelab/datahub/sources/gfz/hpo/__init__.py +1 -1
- geospacelab/datahub/sources/gfz/hpo/variable_config.py +3 -1
- geospacelab/datahub/sources/madrigal/isr/pfisr/fitted/loader.py +1 -1
- geospacelab/datahub/sources/madrigal/satellites/dmsp/downloader.py +2 -1
- geospacelab/datahub/sources/tud/champ/dns_acc/__init__.py +24 -7
- geospacelab/datahub/sources/tud/champ/dns_acc/downloader.py +29 -36
- geospacelab/datahub/sources/tud/champ/dns_acc/loader.py +28 -2
- geospacelab/datahub/sources/tud/champ/wnd_acc/__init__.py +68 -10
- geospacelab/datahub/sources/tud/champ/wnd_acc/downloader.py +29 -36
- geospacelab/datahub/sources/tud/champ/wnd_acc/loader.py +36 -7
- geospacelab/datahub/sources/tud/champ/wnd_acc/variable_config.py +3 -3
- geospacelab/datahub/sources/tud/downloader.py +288 -113
- geospacelab/datahub/sources/tud/goce/dns_acc/__init__.py +354 -0
- geospacelab/datahub/sources/tud/goce/dns_acc/downloader.py +42 -0
- geospacelab/datahub/sources/tud/goce/dns_acc/loader.py +66 -0
- geospacelab/datahub/sources/tud/goce/dns_acc/variable_config.py +139 -0
- geospacelab/datahub/sources/tud/goce/dns_wnd_acc/__init__.py +3 -3
- geospacelab/datahub/sources/tud/goce/dns_wnd_acc_v01/__init__.py +339 -0
- geospacelab/datahub/sources/tud/goce/dns_wnd_acc_v01/downloader.py +42 -0
- geospacelab/datahub/sources/tud/goce/dns_wnd_acc_v01/loader.py +84 -0
- geospacelab/datahub/sources/tud/goce/dns_wnd_acc_v01/variable_config.py +212 -0
- geospacelab/datahub/sources/tud/goce/wnd_acc/__init__.py +339 -0
- geospacelab/datahub/sources/tud/goce/wnd_acc/downloader.py +42 -0
- geospacelab/datahub/sources/tud/goce/wnd_acc/loader.py +65 -0
- geospacelab/datahub/sources/tud/goce/wnd_acc/variable_config.py +188 -0
- geospacelab/datahub/sources/tud/grace/dns_acc/__init__.py +6 -3
- geospacelab/datahub/sources/tud/grace/dns_acc/downloader.py +29 -37
- geospacelab/datahub/sources/tud/grace/wnd_acc/__init__.py +21 -4
- geospacelab/datahub/sources/tud/grace/wnd_acc/downloader.py +29 -39
- geospacelab/datahub/sources/tud/grace/wnd_acc/loader.py +5 -1
- geospacelab/datahub/sources/tud/grace/wnd_acc/variable_config.py +74 -0
- geospacelab/datahub/sources/tud/grace_fo/dns_acc/__init__.py +6 -3
- geospacelab/datahub/sources/tud/grace_fo/dns_acc/downloader.py +35 -40
- geospacelab/datahub/sources/tud/grace_fo/wnd_acc/__init__.py +20 -4
- geospacelab/datahub/sources/tud/grace_fo/wnd_acc/downloader.py +29 -44
- geospacelab/datahub/sources/tud/grace_fo/wnd_acc/loader.py +4 -0
- geospacelab/datahub/sources/tud/grace_fo/wnd_acc/variable_config.py +73 -0
- geospacelab/datahub/sources/tud/swarm/dns_acc/__init__.py +27 -5
- geospacelab/datahub/sources/tud/swarm/dns_acc/downloader.py +29 -38
- geospacelab/datahub/sources/tud/swarm/dns_pod/__init__.py +24 -5
- geospacelab/datahub/sources/tud/swarm/dns_pod/downloader.py +29 -38
- geospacelab/datahub/sources/tud/swarm/dns_pod/loader.py +3 -0
- geospacelab/datahub/sources/wdc/asysym/downloader.py +2 -2
- geospacelab/visualization/mpl/panels.py +7 -3
- {geospacelab-0.11.4.dist-info → geospacelab-0.12.0.dist-info}/METADATA +1 -1
- {geospacelab-0.11.4.dist-info → geospacelab-0.12.0.dist-info}/RECORD +63 -45
- {geospacelab-0.11.4.dist-info → geospacelab-0.12.0.dist-info}/WHEEL +1 -1
- {geospacelab-0.11.4.dist-info → geospacelab-0.12.0.dist-info}/licenses/LICENSE +0 -0
- {geospacelab-0.11.4.dist-info → geospacelab-0.12.0.dist-info}/top_level.txt +0 -0
|
@@ -12,6 +12,7 @@ __docformat__ = "reStructureText"
|
|
|
12
12
|
import datetime
|
|
13
13
|
import pathlib
|
|
14
14
|
|
|
15
|
+
import ftplib
|
|
15
16
|
import requests
|
|
16
17
|
import bs4
|
|
17
18
|
import re
|
|
@@ -22,132 +23,430 @@ import geospacelab.toolbox.utilities.pylogging as mylog
|
|
|
22
23
|
from geospacelab.config import prf
|
|
23
24
|
|
|
24
25
|
|
|
25
|
-
class
|
|
26
|
-
|
|
26
|
+
class DownloaderBase(object):
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
dt_fr : datetime.datetime
|
|
32
|
+
The start datetime for downloading data.
|
|
33
|
+
dt_to : datetime.datetime
|
|
34
|
+
The end datetime for downloading data.
|
|
35
|
+
root_dir_local : str or pathlib.Path
|
|
36
|
+
The root directory in the local disk to store the downloaded data files.
|
|
37
|
+
direct_download : bool
|
|
38
|
+
Whether to start downloading once the Downloader object is created.
|
|
39
|
+
force_download : bool
|
|
40
|
+
Whether to force re-download even the data files are already in the local disk.
|
|
41
|
+
dry_run : bool
|
|
42
|
+
Whether to only print the downloading information without actual downloading.
|
|
43
|
+
done : list of bool
|
|
44
|
+
Whether the downloading is done. The length of the list is the number of files to be downloaded.
|
|
45
|
+
file_paths_local : list of pathlib.Path
|
|
46
|
+
The local file paths of the downloaded data files.
|
|
47
|
+
file_paths_remote : list of str
|
|
48
|
+
The remote file paths of the data files to be downloaded.
|
|
49
|
+
|
|
50
|
+
"""
|
|
27
51
|
def __init__(
|
|
28
|
-
self,
|
|
29
|
-
dt_fr,
|
|
30
|
-
dt_to,
|
|
31
|
-
|
|
52
|
+
self,
|
|
53
|
+
dt_fr=None,
|
|
54
|
+
dt_to=None,
|
|
55
|
+
root_dir_local=None,
|
|
56
|
+
direct_download=False,
|
|
32
57
|
force_download=False,
|
|
33
|
-
data_file_root_dir=None,
|
|
34
58
|
dry_run=False,
|
|
35
|
-
|
|
36
|
-
|
|
59
|
+
download_from = None,
|
|
60
|
+
**kwargs
|
|
61
|
+
):
|
|
37
62
|
|
|
38
|
-
self.url_base = "https://cdaweb.gsfc.nasa.gov/pub/data/"
|
|
39
63
|
self.force_download = force_download
|
|
40
|
-
self.dry_run=dry_run
|
|
41
|
-
self.from_ftp=from_ftp
|
|
64
|
+
self.dry_run = dry_run
|
|
42
65
|
|
|
43
66
|
self.dt_fr = dt_fr
|
|
44
67
|
self.dt_to = dt_to
|
|
45
|
-
self.
|
|
46
|
-
self.
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
68
|
+
self.file_paths_source = []
|
|
69
|
+
self.file_paths_local = []
|
|
70
|
+
self.done = None
|
|
71
|
+
self.file_paths_local = []
|
|
72
|
+
self.file_paths_remote = []
|
|
73
|
+
|
|
74
|
+
if root_dir_local is None:
|
|
75
|
+
self.root_dir_local = prf.datahub_data_root_dir
|
|
50
76
|
else:
|
|
51
|
-
self.
|
|
77
|
+
self.root_dir_local = root_dir_local
|
|
52
78
|
|
|
53
79
|
if direct_download:
|
|
54
80
|
self.download()
|
|
81
|
+
|
|
82
|
+
def download(self, *args, **kwargs):
|
|
83
|
+
pass
|
|
84
|
+
|
|
85
|
+
def _to_download(self, file_path, with_suffix=None):
|
|
86
|
+
|
|
87
|
+
to_download = True
|
|
88
|
+
|
|
89
|
+
if with_suffix is not None:
|
|
90
|
+
is_file = file_path.with_suffix(with_suffix).is_file()
|
|
91
|
+
else:
|
|
92
|
+
is_file = file_path.is_file()
|
|
93
|
+
if is_file:
|
|
94
|
+
if self.force_download:
|
|
95
|
+
mylog.simpleinfo.info(
|
|
96
|
+
"The file {} exists in the directory {}: Forced redownloading the file ...".format(
|
|
97
|
+
file_path.name, file_path.parent.resolve()
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
else:
|
|
101
|
+
mylog.simpleinfo.info(
|
|
102
|
+
"The file {} exists in the directory {}: Skipped downloading.".format(
|
|
103
|
+
file_path.name, file_path.parent.resolve()
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
to_download = False
|
|
107
|
+
return to_download
|
|
108
|
+
|
|
109
|
+
def _search_files(self, file_list, file_name_patterns):
|
|
110
|
+
search_pattern = '.*' + '.*'.join(file_name_patterns) + '.*'
|
|
111
|
+
fn_regex = re.compile(search_pattern)
|
|
112
|
+
file_names = list(filter(fn_regex.match, file_list))
|
|
113
|
+
return file_names
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class DownloaderFromFTPBase(DownloaderBase):
|
|
118
|
+
"""
|
|
119
|
+
Base downloader for downloading data files from FTP server.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
ftp_host : str
|
|
124
|
+
The FTP host address.
|
|
125
|
+
ftp_port : int
|
|
126
|
+
The FTP port [21].
|
|
127
|
+
root_dir_remote : str
|
|
128
|
+
The directory in the FTP that stores the data.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
root_dir_remote = None
|
|
55
132
|
|
|
56
|
-
def
|
|
57
|
-
|
|
58
|
-
|
|
133
|
+
def __init__(
|
|
134
|
+
self,
|
|
135
|
+
dt_fr, dt_to,
|
|
136
|
+
ftp_host=None, ftp_port=21,
|
|
137
|
+
username='anonymous', password='',
|
|
138
|
+
root_dir_local=None,
|
|
139
|
+
root_dir_remote=None,
|
|
140
|
+
direct_download=False, force_download=False, dry_run=False,
|
|
141
|
+
**kwargs
|
|
142
|
+
):
|
|
143
|
+
|
|
144
|
+
self.ftp_host = ftp_host
|
|
145
|
+
self.ftp_port = ftp_port
|
|
146
|
+
self.username = username
|
|
147
|
+
self.password = password
|
|
148
|
+
if root_dir_remote is not None:
|
|
149
|
+
self.root_dir_remote = root_dir_remote
|
|
59
150
|
else:
|
|
60
|
-
self.
|
|
151
|
+
self.root_dir_remote = '/'
|
|
152
|
+
|
|
153
|
+
super(DownloaderFromFTPBase, self).__init__(
|
|
154
|
+
dt_fr, dt_to,
|
|
155
|
+
root_dir_local=root_dir_local,
|
|
156
|
+
direct_download=direct_download,
|
|
157
|
+
force_download=force_download,
|
|
158
|
+
dry_run=dry_run,
|
|
159
|
+
**kwargs
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
def download(
|
|
163
|
+
self,
|
|
164
|
+
with_TLS=False,
|
|
165
|
+
subdirs: None | list=None,
|
|
166
|
+
file_name_patterns: None | list=None,
|
|
167
|
+
**kwargs
|
|
168
|
+
):
|
|
169
|
+
|
|
170
|
+
timeout = kwargs.pop('timeout', 30) # seconds
|
|
171
|
+
encoding = kwargs.pop('encoding', 'utf-8')
|
|
172
|
+
|
|
173
|
+
if with_TLS:
|
|
174
|
+
FTP_CLASS = ftplib.FTP_TLS
|
|
175
|
+
else:
|
|
176
|
+
FTP_CLASS = ftplib.FTP
|
|
61
177
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
178
|
+
with FTP_CLASS(encoding=encoding) as ftp:
|
|
179
|
+
ftp.connect(self.ftp_host, self.ftp_port, timeout)
|
|
180
|
+
ftp.login(user=self.username, passwd=self.password)
|
|
181
|
+
ftp.cwd(self.root_dir_remote)
|
|
182
|
+
|
|
183
|
+
self.file_paths_remote = self.search_from_ftp(
|
|
184
|
+
ftp, subdirs=subdirs, file_name_patterns=file_name_patterns
|
|
185
|
+
)
|
|
186
|
+
self.save_files_from_ftp(ftp)
|
|
187
|
+
|
|
188
|
+
def search_from_ftp(self, ftp, subdirs: None | list=None, file_name_patterns: None | list=None):
|
|
189
|
+
if subdirs is not None:
|
|
190
|
+
for subdir in subdirs:
|
|
191
|
+
ftp.cwd(subdir)
|
|
192
|
+
try:
|
|
193
|
+
files = ftp.nlst()
|
|
194
|
+
except ftplib.error_perm as resp:
|
|
195
|
+
if str(resp) == "550 No files found":
|
|
196
|
+
mylog.StreamLogger.warning("No files in this directory.")
|
|
197
|
+
else:
|
|
198
|
+
raise
|
|
199
|
+
|
|
200
|
+
file_names = self._search_files(
|
|
201
|
+
file_list=files, file_name_patterns=file_name_patterns
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
file_paths_remote = []
|
|
205
|
+
for fn in file_names:
|
|
206
|
+
file_paths_remote.append(ftp.pwd() + '/' + fn)
|
|
207
|
+
return file_paths_remote
|
|
208
|
+
|
|
209
|
+
def save_files_from_ftp(self, ftp, file_paths_local=None, root_dir_remote=None):
|
|
210
|
+
if root_dir_remote is None:
|
|
211
|
+
root_dir_remote = self.root_dir_remote
|
|
212
|
+
|
|
213
|
+
if file_paths_local is None:
|
|
214
|
+
self.file_paths_local = []
|
|
215
|
+
for fp_remote in self.file_paths_remote:
|
|
216
|
+
self.file_paths_local.append(
|
|
217
|
+
self.root_dir_local / fp_remote.replace(root_dir_remote, '')
|
|
218
|
+
)
|
|
219
|
+
self.done = [False] * len(self.file_paths_remote)
|
|
220
|
+
|
|
221
|
+
if self.dry_run:
|
|
222
|
+
for i, (fp_remote, fp_local) in enumerate(zip(self.file_paths_remote, self.file_paths_local)):
|
|
223
|
+
mylog.simpleinfo.info(
|
|
224
|
+
f"Dry run: Downloading the file {fp_remote} to {fp_local} ..."
|
|
225
|
+
)
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
for i, (fp_remote, fp_local) in enumerate(zip(self.file_paths_remote, self.file_paths_local)):
|
|
229
|
+
to_download = self._to_download(fp_local)
|
|
230
|
+
|
|
231
|
+
if not to_download:
|
|
232
|
+
self.done[i] = True
|
|
233
|
+
continue
|
|
234
|
+
mylog.simpleinfo.info(
|
|
235
|
+
f"Downloading the file: {fp_remote} ..."
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
file_dir_remote = fp_remote.rsplit('/', 1)[0]
|
|
239
|
+
file_name_remote = fp_remote.split('/')[-1]
|
|
240
|
+
|
|
241
|
+
file_dir_local = fp_local.parent.resolve()
|
|
242
|
+
file_dir_local.mkdir(exist_ok=True, parents=True)
|
|
243
|
+
|
|
244
|
+
with open(fp_local, 'w+b') as f:
|
|
245
|
+
if file_dir_remote != ftp.pwd():
|
|
246
|
+
ftp.cwd(file_dir_remote)
|
|
247
|
+
|
|
248
|
+
bufsize=1024
|
|
249
|
+
total=ftp.size(fp_remote)
|
|
250
|
+
pbar=tqdm(total=total)
|
|
251
|
+
def bar(data):
|
|
252
|
+
f.write(data)
|
|
253
|
+
pbar.update(len(data))
|
|
254
|
+
res = ftp.retrbinary('RETR '+ file_name_remote, bar, bufsize)
|
|
255
|
+
pbar.close()
|
|
256
|
+
# res = ftp.retrbinary('RETR ' + file_name_remote, f.write)
|
|
257
|
+
# mylog.simpleinfo.info(res)
|
|
258
|
+
if not res.startswith('226'):
|
|
259
|
+
mylog.StreamLogger.warning('The downloading is not compiled...: {}'.format(res))
|
|
260
|
+
fp_local.unlink(missing_ok=True)
|
|
261
|
+
self.done[i] = True
|
|
262
|
+
mylog.simpleinfo.info("Saved as {}.".format(fp_local))
|
|
263
|
+
if not self.done[i]:
|
|
264
|
+
mylog.StreamLogger.error("Error in downloading the file {}.".format(file_name_remote))
|
|
265
|
+
return
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
class DownloaderFromHTTPBase(DownloaderBase):
|
|
269
|
+
"""
|
|
270
|
+
Base downloader for downloading data files from HTTP server.
|
|
271
|
+
|
|
272
|
+
Parameters
|
|
273
|
+
----------
|
|
274
|
+
base_url : str
|
|
275
|
+
The base URL of the HTTP server.
|
|
276
|
+
root_dir_remote : str
|
|
277
|
+
The directory in the HTTP server that stores the data.
|
|
278
|
+
"""
|
|
279
|
+
|
|
280
|
+
root_dir_remote = None
|
|
65
281
|
|
|
282
|
+
def __init__(
|
|
283
|
+
self,
|
|
284
|
+
dt_fr, dt_to,
|
|
285
|
+
base_url=None,
|
|
286
|
+
root_dir_local=None,
|
|
287
|
+
root_dir_remote=None,
|
|
288
|
+
direct_download=False, force_download=False, dry_run=False,
|
|
289
|
+
**kwargs
|
|
290
|
+
):
|
|
291
|
+
|
|
292
|
+
self.base_url = base_url
|
|
293
|
+
|
|
294
|
+
if root_dir_remote is not None:
|
|
295
|
+
self.root_dir_remote = root_dir_remote
|
|
296
|
+
|
|
297
|
+
super(DownloaderFromHTTPBase, self).__init__(
|
|
298
|
+
dt_fr, dt_to,
|
|
299
|
+
root_dir_local=root_dir_local,
|
|
300
|
+
direct_download=direct_download,
|
|
301
|
+
force_download=force_download,
|
|
302
|
+
dry_run=dry_run,
|
|
303
|
+
**kwargs
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def download(
|
|
307
|
+
self,
|
|
308
|
+
subdirs: None | list=None,
|
|
309
|
+
file_name_patterns: None | list=None,
|
|
310
|
+
**kwargs
|
|
311
|
+
):
|
|
312
|
+
|
|
313
|
+
self.file_paths_remote = self.search_from_http(
|
|
314
|
+
subdirs=subdirs, file_name_patterns=file_name_patterns
|
|
315
|
+
)
|
|
316
|
+
self.save_files_from_http()
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def search_from_http(self, subdirs: None | list=None, file_name_patterns: None | list=None, **kwargs):
|
|
320
|
+
url_patterns = [self.base_url]
|
|
321
|
+
if str(self.root_dir_remote):
|
|
322
|
+
url_patterns.append(self.root_dir_remote)
|
|
323
|
+
if subdirs is not None:
|
|
324
|
+
url_patterns.extend(subdirs)
|
|
325
|
+
url = '/'.join(url_patterns)
|
|
326
|
+
r = requests.get(url)
|
|
66
327
|
soup = bs4.BeautifulSoup(r.text, 'html.parser')
|
|
67
328
|
a_tags = soup.find_all('a', href=True)
|
|
68
329
|
hrefs = [a_tag['href'] for a_tag in a_tags]
|
|
330
|
+
|
|
331
|
+
hrefs = self._search_files(file_list=hrefs, file_name_patterns=file_name_patterns)
|
|
69
332
|
|
|
70
|
-
|
|
71
|
-
fn_regex = re.compile(search_pattern)
|
|
72
|
-
hrefs = list(filter(fn_regex.match, hrefs))
|
|
73
|
-
|
|
74
|
-
paths = []
|
|
333
|
+
file_paths_remote = []
|
|
75
334
|
for href in hrefs:
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
335
|
+
file_paths_remote.append(url + '/' + href)
|
|
336
|
+
r.close()
|
|
337
|
+
return file_paths_remote
|
|
338
|
+
|
|
339
|
+
def save_files_from_http(self, file_paths_local=None, root_dir_remote=None):
|
|
340
|
+
if root_dir_remote is None:
|
|
341
|
+
root_dir_remote = self.root_dir_remote
|
|
342
|
+
if str(root_dir_remote):
|
|
343
|
+
root_url = self.base_url + '/' + root_dir_remote
|
|
344
|
+
else:
|
|
345
|
+
root_url = self.base_url
|
|
346
|
+
|
|
347
|
+
if file_paths_local is None:
|
|
348
|
+
self.file_paths_local = []
|
|
349
|
+
for fp_remote in self.file_paths_remote:
|
|
350
|
+
self.file_paths_local.append(
|
|
351
|
+
self.root_dir_local / fp_remote.replace(root_url, '')
|
|
352
|
+
)
|
|
353
|
+
else:
|
|
354
|
+
self.file_paths_local = file_paths_local
|
|
355
|
+
self.done = [False] * len(self.file_paths_remote)
|
|
356
|
+
|
|
357
|
+
if self.dry_run:
|
|
358
|
+
for i, (fp_remote, fp_local) in enumerate(zip(self.file_paths_remote, self.file_paths_local)):
|
|
94
359
|
mylog.simpleinfo.info(
|
|
95
|
-
"
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
360
|
+
f"Dry run: Downloading the file {fp_remote} to {fp_local} ..."
|
|
361
|
+
)
|
|
362
|
+
return
|
|
363
|
+
|
|
364
|
+
for i, (fp_remote, fp_local) in enumerate(zip(self.file_paths_remote, self.file_paths_local)):
|
|
365
|
+
to_download = self._to_download(fp_local)
|
|
366
|
+
|
|
367
|
+
if not to_download:
|
|
368
|
+
self.done[i] = True
|
|
369
|
+
continue
|
|
370
|
+
|
|
371
|
+
mylog.simpleinfo.info(
|
|
372
|
+
f"Downloading the file: {fp_remote} ..."
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
res = self._download_by_requests_get(fp_remote, fp_local)
|
|
103
376
|
if res:
|
|
104
|
-
mylog.simpleinfo.info(f
|
|
105
|
-
self.done = True
|
|
106
|
-
else:
|
|
107
|
-
mylog.StreamLogger.error(f"Error during downloading. Code: {res}.")
|
|
377
|
+
mylog.simpleinfo.info(f"Saved as {fp_local}.")
|
|
378
|
+
self.done[i] = True
|
|
108
379
|
|
|
109
|
-
|
|
380
|
+
return
|
|
110
381
|
|
|
111
382
|
@staticmethod
|
|
112
|
-
def _download_by_requests_get(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
383
|
+
def _download_by_requests_get(
|
|
384
|
+
url, file_path_local,
|
|
385
|
+
params=None, stream=True, allow_redirects=True,
|
|
386
|
+
file_block_size=1024,
|
|
387
|
+
**kwargs
|
|
388
|
+
):
|
|
389
|
+
try:
|
|
390
|
+
r = requests.get(url, params=params, stream=stream, allow_redirects=allow_redirects, **kwargs)
|
|
391
|
+
if r.status_code != 200:
|
|
392
|
+
return -2
|
|
393
|
+
|
|
394
|
+
total_size_in_bytes = int(r.headers.get('content-length', 0))
|
|
395
|
+
|
|
396
|
+
progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
|
|
397
|
+
|
|
398
|
+
file_path_local.parent.mkdir(parents=True, exist_ok=True)
|
|
399
|
+
with open(file_path_local, 'wb') as file:
|
|
400
|
+
for data in r.iter_content(file_block_size):
|
|
401
|
+
progress_bar.update(len(data))
|
|
402
|
+
file.write(data)
|
|
403
|
+
progress_bar.close()
|
|
404
|
+
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
|
|
405
|
+
mylog.StreamLogger.error("Something wrong during the download!")
|
|
406
|
+
file_path_local.unlink(missing_ok=True)
|
|
407
|
+
return -1
|
|
408
|
+
r.close()
|
|
409
|
+
except Exception as e:
|
|
410
|
+
mylog.StreamLogger.error(f"Exception during downloading the file from {url}: {e}")
|
|
129
411
|
return -1
|
|
130
|
-
|
|
131
412
|
return 1
|
|
132
413
|
|
|
133
|
-
def search_from_ftp(self, ftp, file_name_patterns=None):
|
|
134
|
-
paths = []
|
|
135
|
-
|
|
136
|
-
return paths
|
|
137
414
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
if file_path.is_file():
|
|
142
|
-
mylog.simpleinfo.info(
|
|
143
|
-
"The file {} exists in the directory {}.".format(file_path.name, file_path.parent.resolve()))
|
|
415
|
+
class CDAWebHTTPDownloader(DownloaderFromHTTPBase):
|
|
416
|
+
"""
|
|
417
|
+
Downloader for downloading data files from CDAWeb HTTP server.
|
|
144
418
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
mylog.simpleinfo.info("Done")
|
|
419
|
+
Parameters
|
|
420
|
+
----------
|
|
421
|
+
base_url : str
|
|
422
|
+
The base URL of the CDAWeb HTTP server.
|
|
423
|
+
root_dir_remote : str
|
|
424
|
+
The directory in the CDAWeb HTTP server that stores the data.
|
|
425
|
+
"""
|
|
153
426
|
|
|
427
|
+
root_dir_remote = 'pub/data'
|
|
428
|
+
|
|
429
|
+
def __init__(
|
|
430
|
+
self,
|
|
431
|
+
dt_fr, dt_to,
|
|
432
|
+
root_dir_local=None,
|
|
433
|
+
root_dir_remote=None,
|
|
434
|
+
direct_download=False, force_download=False, dry_run=False,
|
|
435
|
+
**kwargs
|
|
436
|
+
):
|
|
437
|
+
|
|
438
|
+
base_url = 'https://cdaweb.gsfc.nasa.gov'
|
|
439
|
+
|
|
440
|
+
if root_dir_local is None:
|
|
441
|
+
root_dir_local = prf.datahub_data_root_dir / 'CDAWeb'
|
|
442
|
+
|
|
443
|
+
super().__init__(
|
|
444
|
+
dt_fr, dt_to,
|
|
445
|
+
base_url=base_url,
|
|
446
|
+
root_dir_local=root_dir_local,
|
|
447
|
+
root_dir_remote=root_dir_remote,
|
|
448
|
+
direct_download=direct_download,
|
|
449
|
+
force_download=force_download,
|
|
450
|
+
dry_run=dry_run,
|
|
451
|
+
**kwargs
|
|
452
|
+
)
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import pathlib
|
|
3
|
+
import copy
|
|
4
|
+
|
|
5
|
+
import geospacelab.toolbox.utilities.pydatetime as dttool
|
|
6
|
+
import geospacelab.toolbox.utilities.pylogging as mylog
|
|
7
|
+
from geospacelab.config import prf
|
|
8
|
+
from geospacelab.datahub.sources.cdaweb.downloader import Downloader as DownloaderBase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Downloader(DownloaderBase):
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
dt_fr=None, dt_to=None,
|
|
16
|
+
sat_id=None,
|
|
17
|
+
orbit_id=None,
|
|
18
|
+
direct_download=True,
|
|
19
|
+
force_download=False,
|
|
20
|
+
data_file_root_dir = None,
|
|
21
|
+
dry_run=False,
|
|
22
|
+
):
|
|
23
|
+
product = 'EDR_AUR'
|
|
24
|
+
if data_file_root_dir is None:
|
|
25
|
+
data_file_root_dir = prf.datahub_data_root_dir / 'CDAWeb' / 'DMSP' / 'SSUSI' / product
|
|
26
|
+
self.sat_id = sat_id
|
|
27
|
+
self.orbit_id = orbit_id
|
|
28
|
+
self.source_subdirs = ['dmsp', 'dmsp'+self.sat_id.lower(), 'ssusi', 'data', 'edr-aurora']
|
|
29
|
+
|
|
30
|
+
super().__init__(
|
|
31
|
+
dt_fr, dt_to,
|
|
32
|
+
data_file_root_dir=data_file_root_dir,
|
|
33
|
+
direct_download=direct_download,force_download=force_download,dry_run=dry_run
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def search_from_http(self, file_name_patterns=None, allow_multiple_files=True):
|
|
38
|
+
|
|
39
|
+
dt_fr_1 = self.dt_fr - datetime.timedelta(hours=3)
|
|
40
|
+
dt_to_1 = self.dt_to + datetime.timedelta(hours=3)
|
|
41
|
+
diff_days = dttool.get_diff_days(dt_fr_1, dt_to_1)
|
|
42
|
+
dt0 = dttool.get_start_of_the_day(dt_fr_1)
|
|
43
|
+
source_file_paths = []
|
|
44
|
+
for nd in range(diff_days + 1):
|
|
45
|
+
this_day = dt0 + datetime.timedelta(days=nd)
|
|
46
|
+
doy = dttool.get_doy(this_day)
|
|
47
|
+
sdoy = '{:03d}'.format(doy)
|
|
48
|
+
subdirs = copy.deepcopy(self.source_subdirs)
|
|
49
|
+
subdirs.extend(
|
|
50
|
+
[str(this_day.year), sdoy]
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
if self.orbit_id is None:
|
|
54
|
+
file_name_patterns = [
|
|
55
|
+
'dmsp' + self.sat_id.lower(),
|
|
56
|
+
'ssusi',
|
|
57
|
+
'edr-aurora',
|
|
58
|
+
this_day.strftime("%Y") + sdoy + 'T',
|
|
59
|
+
'.nc'
|
|
60
|
+
]
|
|
61
|
+
else:
|
|
62
|
+
file_name_patterns = [
|
|
63
|
+
'dmsp' + self.sat_id.lower(),
|
|
64
|
+
'ssusi',
|
|
65
|
+
'edr-aurora',
|
|
66
|
+
'REV',
|
|
67
|
+
self.orbit_id,
|
|
68
|
+
'.nc'
|
|
69
|
+
]
|
|
70
|
+
paths = super().search_from_http(subdirs=subdirs, file_name_patterns=file_name_patterns)
|
|
71
|
+
source_file_paths.extend(paths)
|
|
72
|
+
return source_file_paths
|
|
73
|
+
|
|
74
|
+
def save_file_from_http(self, url, file_dir=None, file_name=None):
|
|
75
|
+
|
|
76
|
+
sy = url.split('/')[-3]
|
|
77
|
+
sdoy = url.split('/')[-2]
|
|
78
|
+
year = int(sy)
|
|
79
|
+
this_day = dttool.convert_doy_to_datetime(year, int(sdoy))
|
|
80
|
+
if file_dir is None:
|
|
81
|
+
file_dir = self.data_file_root_dir / self.sat_id.upper() / sy / this_day.strftime("%Y%m%d")
|
|
82
|
+
super().save_file_from_http(url, file_dir=file_dir)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
downloader = Downloader(
|
|
88
|
+
dt_fr = datetime.datetime(2011, 1, 6),
|
|
89
|
+
dt_to = datetime.datetime(2011, 1, 6, 12),
|
|
90
|
+
sat_id='F17',
|
|
91
|
+
orbit_id='21523',
|
|
92
|
+
dry_run=False,
|
|
93
|
+
)
|