geospacelab 0.11.4__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. geospacelab/__init__.py +1 -1
  2. geospacelab/datahub/sources/cdaweb/dmsp/ssusi/downloader.py +103 -0
  3. geospacelab/datahub/sources/cdaweb/dmsp/ssusi/edr_aur/__init__.py +17 -7
  4. geospacelab/datahub/sources/cdaweb/dmsp/ssusi/edr_aur/downloader.py +13 -62
  5. geospacelab/datahub/sources/cdaweb/dmsp/ssusi/sdr_disk/__init__.py +317 -0
  6. geospacelab/datahub/sources/cdaweb/dmsp/ssusi/sdr_disk/downloader.py +44 -0
  7. geospacelab/datahub/sources/cdaweb/dmsp/ssusi/sdr_disk/loader.py +198 -0
  8. geospacelab/datahub/sources/cdaweb/dmsp/ssusi/sdr_disk/variable_config.py +149 -0
  9. geospacelab/datahub/sources/cdaweb/downloader.py +396 -97
  10. geospacelab/datahub/sources/cdaweb/downloader_backup.py +93 -0
  11. geospacelab/datahub/sources/cdaweb/omni/__init__.py +26 -14
  12. geospacelab/datahub/sources/cdaweb/omni/downloader.py +97 -84
  13. geospacelab/datahub/sources/esa_eo/swarm/advanced/efi_tct02/__init__.py +1 -1
  14. geospacelab/datahub/sources/esa_eo/swarm/advanced/efi_tct02/downloader.py +1 -1
  15. geospacelab/datahub/sources/gfz/hpo/__init__.py +1 -1
  16. geospacelab/datahub/sources/gfz/hpo/variable_config.py +3 -1
  17. geospacelab/datahub/sources/madrigal/isr/pfisr/fitted/loader.py +1 -1
  18. geospacelab/datahub/sources/madrigal/satellites/dmsp/downloader.py +2 -1
  19. geospacelab/datahub/sources/tud/champ/dns_acc/__init__.py +24 -7
  20. geospacelab/datahub/sources/tud/champ/dns_acc/downloader.py +29 -36
  21. geospacelab/datahub/sources/tud/champ/dns_acc/loader.py +28 -2
  22. geospacelab/datahub/sources/tud/champ/wnd_acc/__init__.py +68 -10
  23. geospacelab/datahub/sources/tud/champ/wnd_acc/downloader.py +29 -36
  24. geospacelab/datahub/sources/tud/champ/wnd_acc/loader.py +36 -7
  25. geospacelab/datahub/sources/tud/champ/wnd_acc/variable_config.py +3 -3
  26. geospacelab/datahub/sources/tud/downloader.py +288 -113
  27. geospacelab/datahub/sources/tud/goce/dns_acc/__init__.py +354 -0
  28. geospacelab/datahub/sources/tud/goce/dns_acc/downloader.py +42 -0
  29. geospacelab/datahub/sources/tud/goce/dns_acc/loader.py +66 -0
  30. geospacelab/datahub/sources/tud/goce/dns_acc/variable_config.py +139 -0
  31. geospacelab/datahub/sources/tud/goce/dns_wnd_acc/__init__.py +3 -3
  32. geospacelab/datahub/sources/tud/goce/dns_wnd_acc_v01/__init__.py +339 -0
  33. geospacelab/datahub/sources/tud/goce/dns_wnd_acc_v01/downloader.py +42 -0
  34. geospacelab/datahub/sources/tud/goce/dns_wnd_acc_v01/loader.py +84 -0
  35. geospacelab/datahub/sources/tud/goce/dns_wnd_acc_v01/variable_config.py +212 -0
  36. geospacelab/datahub/sources/tud/goce/wnd_acc/__init__.py +339 -0
  37. geospacelab/datahub/sources/tud/goce/wnd_acc/downloader.py +42 -0
  38. geospacelab/datahub/sources/tud/goce/wnd_acc/loader.py +65 -0
  39. geospacelab/datahub/sources/tud/goce/wnd_acc/variable_config.py +188 -0
  40. geospacelab/datahub/sources/tud/grace/dns_acc/__init__.py +6 -3
  41. geospacelab/datahub/sources/tud/grace/dns_acc/downloader.py +29 -37
  42. geospacelab/datahub/sources/tud/grace/wnd_acc/__init__.py +21 -4
  43. geospacelab/datahub/sources/tud/grace/wnd_acc/downloader.py +29 -39
  44. geospacelab/datahub/sources/tud/grace/wnd_acc/loader.py +5 -1
  45. geospacelab/datahub/sources/tud/grace/wnd_acc/variable_config.py +74 -0
  46. geospacelab/datahub/sources/tud/grace_fo/dns_acc/__init__.py +6 -3
  47. geospacelab/datahub/sources/tud/grace_fo/dns_acc/downloader.py +35 -40
  48. geospacelab/datahub/sources/tud/grace_fo/wnd_acc/__init__.py +20 -4
  49. geospacelab/datahub/sources/tud/grace_fo/wnd_acc/downloader.py +29 -44
  50. geospacelab/datahub/sources/tud/grace_fo/wnd_acc/loader.py +4 -0
  51. geospacelab/datahub/sources/tud/grace_fo/wnd_acc/variable_config.py +73 -0
  52. geospacelab/datahub/sources/tud/swarm/dns_acc/__init__.py +27 -5
  53. geospacelab/datahub/sources/tud/swarm/dns_acc/downloader.py +29 -38
  54. geospacelab/datahub/sources/tud/swarm/dns_pod/__init__.py +24 -5
  55. geospacelab/datahub/sources/tud/swarm/dns_pod/downloader.py +29 -38
  56. geospacelab/datahub/sources/tud/swarm/dns_pod/loader.py +3 -0
  57. geospacelab/datahub/sources/wdc/asysym/downloader.py +2 -2
  58. geospacelab/visualization/mpl/panels.py +7 -3
  59. {geospacelab-0.11.4.dist-info → geospacelab-0.12.0.dist-info}/METADATA +1 -1
  60. {geospacelab-0.11.4.dist-info → geospacelab-0.12.0.dist-info}/RECORD +63 -45
  61. {geospacelab-0.11.4.dist-info → geospacelab-0.12.0.dist-info}/WHEEL +1 -1
  62. {geospacelab-0.11.4.dist-info → geospacelab-0.12.0.dist-info}/licenses/LICENSE +0 -0
  63. {geospacelab-0.11.4.dist-info → geospacelab-0.12.0.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,7 @@ __docformat__ = "reStructureText"
12
12
  import datetime
13
13
  import pathlib
14
14
 
15
+ import ftplib
15
16
  import requests
16
17
  import bs4
17
18
  import re
@@ -22,132 +23,430 @@ import geospacelab.toolbox.utilities.pylogging as mylog
22
23
  from geospacelab.config import prf
23
24
 
24
25
 
25
- class Downloader(object):
26
-
26
+ class DownloaderBase(object):
27
+ """
28
+
29
+ Parameters
30
+ ----------
31
+ dt_fr : datetime.datetime
32
+ The start datetime for downloading data.
33
+ dt_to : datetime.datetime
34
+ The end datetime for downloading data.
35
+ root_dir_local : str or pathlib.Path
36
+ The root directory in the local disk to store the downloaded data files.
37
+ direct_download : bool
38
+ Whether to start downloading once the Downloader object is created.
39
+ force_download : bool
40
+ Whether to force re-download even the data files are already in the local disk.
41
+ dry_run : bool
42
+ Whether to only print the downloading information without actual downloading.
43
+ done : list of bool
44
+ Whether the downloading is done. The length of the list is the number of files to be downloaded.
45
+ file_paths_local : list of pathlib.Path
46
+ The local file paths of the downloaded data files.
47
+ file_paths_remote : list of str
48
+ The remote file paths of the data files to be downloaded.
49
+
50
+ """
27
51
  def __init__(
28
- self,
29
- dt_fr,
30
- dt_to,
31
- direct_download=True,
52
+ self,
53
+ dt_fr=None,
54
+ dt_to=None,
55
+ root_dir_local=None,
56
+ direct_download=False,
32
57
  force_download=False,
33
- data_file_root_dir=None,
34
58
  dry_run=False,
35
- from_ftp=False,
36
- ):
59
+ download_from = None,
60
+ **kwargs
61
+ ):
37
62
 
38
- self.url_base = "https://cdaweb.gsfc.nasa.gov/pub/data/"
39
63
  self.force_download = force_download
40
- self.dry_run=dry_run
41
- self.from_ftp=from_ftp
64
+ self.dry_run = dry_run
42
65
 
43
66
  self.dt_fr = dt_fr
44
67
  self.dt_to = dt_to
45
- self.source_file_paths = []
46
- self.done = False
47
-
48
- if data_file_root_dir is None:
49
- self.data_file_root_dir = prf.datahub_data_root_dir
68
+ self.file_paths_source = []
69
+ self.file_paths_local = []
70
+ self.done = None
71
+ self.file_paths_local = []
72
+ self.file_paths_remote = []
73
+
74
+ if root_dir_local is None:
75
+ self.root_dir_local = prf.datahub_data_root_dir
50
76
  else:
51
- self.data_file_root_dir = data_file_root_dir
77
+ self.root_dir_local = root_dir_local
52
78
 
53
79
  if direct_download:
54
80
  self.download()
81
+
82
+ def download(self, *args, **kwargs):
83
+ pass
84
+
85
+ def _to_download(self, file_path, with_suffix=None):
86
+
87
+ to_download = True
88
+
89
+ if with_suffix is not None:
90
+ is_file = file_path.with_suffix(with_suffix).is_file()
91
+ else:
92
+ is_file = file_path.is_file()
93
+ if is_file:
94
+ if self.force_download:
95
+ mylog.simpleinfo.info(
96
+ "The file {} exists in the directory {}: Forced redownloading the file ...".format(
97
+ file_path.name, file_path.parent.resolve()
98
+ )
99
+ )
100
+ else:
101
+ mylog.simpleinfo.info(
102
+ "The file {} exists in the directory {}: Skipped downloading.".format(
103
+ file_path.name, file_path.parent.resolve()
104
+ )
105
+ )
106
+ to_download = False
107
+ return to_download
108
+
109
+ def _search_files(self, file_list, file_name_patterns):
110
+ search_pattern = '.*' + '.*'.join(file_name_patterns) + '.*'
111
+ fn_regex = re.compile(search_pattern)
112
+ file_names = list(filter(fn_regex.match, file_list))
113
+ return file_names
114
+
115
+
116
+
117
+ class DownloaderFromFTPBase(DownloaderBase):
118
+ """
119
+ Base downloader for downloading data files from FTP server.
120
+
121
+ Parameters
122
+ ----------
123
+ ftp_host : str
124
+ The FTP host address.
125
+ ftp_port : int
126
+ The FTP port [21].
127
+ root_dir_remote : str
128
+ The directory in the FTP that stores the data.
129
+ """
130
+
131
+ root_dir_remote = None
55
132
 
56
- def download(self,):
57
- if self.from_ftp:
58
- raise NotImplementedError
133
+ def __init__(
134
+ self,
135
+ dt_fr, dt_to,
136
+ ftp_host=None, ftp_port=21,
137
+ username='anonymous', password='',
138
+ root_dir_local=None,
139
+ root_dir_remote=None,
140
+ direct_download=False, force_download=False, dry_run=False,
141
+ **kwargs
142
+ ):
143
+
144
+ self.ftp_host = ftp_host
145
+ self.ftp_port = ftp_port
146
+ self.username = username
147
+ self.password = password
148
+ if root_dir_remote is not None:
149
+ self.root_dir_remote = root_dir_remote
59
150
  else:
60
- self.download_from_http()
151
+ self.root_dir_remote = '/'
152
+
153
+ super(DownloaderFromFTPBase, self).__init__(
154
+ dt_fr, dt_to,
155
+ root_dir_local=root_dir_local,
156
+ direct_download=direct_download,
157
+ force_download=force_download,
158
+ dry_run=dry_run,
159
+ **kwargs
160
+ )
161
+
162
+ def download(
163
+ self,
164
+ with_TLS=False,
165
+ subdirs: None | list=None,
166
+ file_name_patterns: None | list=None,
167
+ **kwargs
168
+ ):
169
+
170
+ timeout = kwargs.pop('timeout', 30) # seconds
171
+ encoding = kwargs.pop('encoding', 'utf-8')
172
+
173
+ if with_TLS:
174
+ FTP_CLASS = ftplib.FTP_TLS
175
+ else:
176
+ FTP_CLASS = ftplib.FTP
61
177
 
62
- def search_from_http(self, subdirs=None, file_name_patterns=None):
63
- url = self.url_base + '/'.join(subdirs)
64
- r = requests.get(url)
178
+ with FTP_CLASS(encoding=encoding) as ftp:
179
+ ftp.connect(self.ftp_host, self.ftp_port, timeout)
180
+ ftp.login(user=self.username, passwd=self.password)
181
+ ftp.cwd(self.root_dir_remote)
182
+
183
+ self.file_paths_remote = self.search_from_ftp(
184
+ ftp, subdirs=subdirs, file_name_patterns=file_name_patterns
185
+ )
186
+ self.save_files_from_ftp(ftp)
187
+
188
+ def search_from_ftp(self, ftp, subdirs: None | list=None, file_name_patterns: None | list=None):
189
+ if subdirs is not None:
190
+ for subdir in subdirs:
191
+ ftp.cwd(subdir)
192
+ try:
193
+ files = ftp.nlst()
194
+ except ftplib.error_perm as resp:
195
+ if str(resp) == "550 No files found":
196
+ mylog.StreamLogger.warning("No files in this directory.")
197
+ else:
198
+ raise
199
+
200
+ file_names = self._search_files(
201
+ file_list=files, file_name_patterns=file_name_patterns
202
+ )
203
+
204
+ file_paths_remote = []
205
+ for fn in file_names:
206
+ file_paths_remote.append(ftp.pwd() + '/' + fn)
207
+ return file_paths_remote
208
+
209
+ def save_files_from_ftp(self, ftp, file_paths_local=None, root_dir_remote=None):
210
+ if root_dir_remote is None:
211
+ root_dir_remote = self.root_dir_remote
212
+
213
+ if file_paths_local is None:
214
+ self.file_paths_local = []
215
+ for fp_remote in self.file_paths_remote:
216
+ self.file_paths_local.append(
217
+ self.root_dir_local / fp_remote.replace(root_dir_remote, '')
218
+ )
219
+ self.done = [False] * len(self.file_paths_remote)
220
+
221
+ if self.dry_run:
222
+ for i, (fp_remote, fp_local) in enumerate(zip(self.file_paths_remote, self.file_paths_local)):
223
+ mylog.simpleinfo.info(
224
+ f"Dry run: Downloading the file {fp_remote} to {fp_local} ..."
225
+ )
226
+ return
227
+
228
+ for i, (fp_remote, fp_local) in enumerate(zip(self.file_paths_remote, self.file_paths_local)):
229
+ to_download = self._to_download(fp_local)
230
+
231
+ if not to_download:
232
+ self.done[i] = True
233
+ continue
234
+ mylog.simpleinfo.info(
235
+ f"Downloading the file: {fp_remote} ..."
236
+ )
237
+
238
+ file_dir_remote = fp_remote.rsplit('/', 1)[0]
239
+ file_name_remote = fp_remote.split('/')[-1]
240
+
241
+ file_dir_local = fp_local.parent.resolve()
242
+ file_dir_local.mkdir(exist_ok=True, parents=True)
243
+
244
+ with open(fp_local, 'w+b') as f:
245
+ if file_dir_remote != ftp.pwd():
246
+ ftp.cwd(file_dir_remote)
247
+
248
+ bufsize=1024
249
+ total=ftp.size(fp_remote)
250
+ pbar=tqdm(total=total)
251
+ def bar(data):
252
+ f.write(data)
253
+ pbar.update(len(data))
254
+ res = ftp.retrbinary('RETR '+ file_name_remote, bar, bufsize)
255
+ pbar.close()
256
+ # res = ftp.retrbinary('RETR ' + file_name_remote, f.write)
257
+ # mylog.simpleinfo.info(res)
258
+ if not res.startswith('226'):
259
+ mylog.StreamLogger.warning('The downloading is not compiled...: {}'.format(res))
260
+ fp_local.unlink(missing_ok=True)
261
+ self.done[i] = True
262
+ mylog.simpleinfo.info("Saved as {}.".format(fp_local))
263
+ if not self.done[i]:
264
+ mylog.StreamLogger.error("Error in downloading the file {}.".format(file_name_remote))
265
+ return
266
+
267
+
268
+ class DownloaderFromHTTPBase(DownloaderBase):
269
+ """
270
+ Base downloader for downloading data files from HTTP server.
271
+
272
+ Parameters
273
+ ----------
274
+ base_url : str
275
+ The base URL of the HTTP server.
276
+ root_dir_remote : str
277
+ The directory in the HTTP server that stores the data.
278
+ """
279
+
280
+ root_dir_remote = None
65
281
 
282
+ def __init__(
283
+ self,
284
+ dt_fr, dt_to,
285
+ base_url=None,
286
+ root_dir_local=None,
287
+ root_dir_remote=None,
288
+ direct_download=False, force_download=False, dry_run=False,
289
+ **kwargs
290
+ ):
291
+
292
+ self.base_url = base_url
293
+
294
+ if root_dir_remote is not None:
295
+ self.root_dir_remote = root_dir_remote
296
+
297
+ super(DownloaderFromHTTPBase, self).__init__(
298
+ dt_fr, dt_to,
299
+ root_dir_local=root_dir_local,
300
+ direct_download=direct_download,
301
+ force_download=force_download,
302
+ dry_run=dry_run,
303
+ **kwargs
304
+ )
305
+
306
+ def download(
307
+ self,
308
+ subdirs: None | list=None,
309
+ file_name_patterns: None | list=None,
310
+ **kwargs
311
+ ):
312
+
313
+ self.file_paths_remote = self.search_from_http(
314
+ subdirs=subdirs, file_name_patterns=file_name_patterns
315
+ )
316
+ self.save_files_from_http()
317
+
318
+
319
+ def search_from_http(self, subdirs: None | list=None, file_name_patterns: None | list=None, **kwargs):
320
+ url_patterns = [self.base_url]
321
+ if str(self.root_dir_remote):
322
+ url_patterns.append(self.root_dir_remote)
323
+ if subdirs is not None:
324
+ url_patterns.extend(subdirs)
325
+ url = '/'.join(url_patterns)
326
+ r = requests.get(url)
66
327
  soup = bs4.BeautifulSoup(r.text, 'html.parser')
67
328
  a_tags = soup.find_all('a', href=True)
68
329
  hrefs = [a_tag['href'] for a_tag in a_tags]
330
+
331
+ hrefs = self._search_files(file_list=hrefs, file_name_patterns=file_name_patterns)
69
332
 
70
- search_pattern = '.*' + '.*'.join(file_name_patterns) + '.*'
71
- fn_regex = re.compile(search_pattern)
72
- hrefs = list(filter(fn_regex.match, hrefs))
73
-
74
- paths = []
333
+ file_paths_remote = []
75
334
  for href in hrefs:
76
- paths.append(url + '/' + href)
77
- return paths
78
-
79
- def download_from_http(self, ):
80
-
81
- source_file_paths = self.search_from_http()
82
-
83
- for url in source_file_paths:
84
- if self.dry_run:
85
- print(f"Dry run: {url}.")
86
- else:
87
- self.save_file_from_http(url=url)
88
-
89
- def save_file_from_http(self, url, file_dir=None, file_name=None):
90
- if file_name is None:
91
- file_name = url.split('/')[-1]
92
- file_path = file_dir / file_name
93
- if file_path.is_file():
335
+ file_paths_remote.append(url + '/' + href)
336
+ r.close()
337
+ return file_paths_remote
338
+
339
+ def save_files_from_http(self, file_paths_local=None, root_dir_remote=None):
340
+ if root_dir_remote is None:
341
+ root_dir_remote = self.root_dir_remote
342
+ if str(root_dir_remote):
343
+ root_url = self.base_url + '/' + root_dir_remote
344
+ else:
345
+ root_url = self.base_url
346
+
347
+ if file_paths_local is None:
348
+ self.file_paths_local = []
349
+ for fp_remote in self.file_paths_remote:
350
+ self.file_paths_local.append(
351
+ self.root_dir_local / fp_remote.replace(root_url, '')
352
+ )
353
+ else:
354
+ self.file_paths_local = file_paths_local
355
+ self.done = [False] * len(self.file_paths_remote)
356
+
357
+ if self.dry_run:
358
+ for i, (fp_remote, fp_local) in enumerate(zip(self.file_paths_remote, self.file_paths_local)):
94
359
  mylog.simpleinfo.info(
95
- "The file {} exists in the directory {}.".format(file_path.name, file_path.parent.resolve()))
96
- if not self.force_download:
97
- self.done = True
98
- return
99
-
100
- file_path.parent.resolve().mkdir(parents=True, exist_ok=True)
101
- mylog.simpleinfo.info(f'Downloading {file_name} ...')
102
- res = self._download_by_requests_get(url, data_file_path=file_path)
360
+ f"Dry run: Downloading the file {fp_remote} to {fp_local} ..."
361
+ )
362
+ return
363
+
364
+ for i, (fp_remote, fp_local) in enumerate(zip(self.file_paths_remote, self.file_paths_local)):
365
+ to_download = self._to_download(fp_local)
366
+
367
+ if not to_download:
368
+ self.done[i] = True
369
+ continue
370
+
371
+ mylog.simpleinfo.info(
372
+ f"Downloading the file: {fp_remote} ..."
373
+ )
374
+
375
+ res = self._download_by_requests_get(fp_remote, fp_local)
103
376
  if res:
104
- mylog.simpleinfo.info(f'Saved in {file_dir}')
105
- self.done = True
106
- else:
107
- mylog.StreamLogger.error(f"Error during downloading. Code: {res}.")
377
+ mylog.simpleinfo.info(f"Saved as {fp_local}.")
378
+ self.done[i] = True
108
379
 
109
- return
380
+ return
110
381
 
111
382
  @staticmethod
112
- def _download_by_requests_get(url, data_file_path=None, params=None, stream=True, allow_redirects=True, **kwargs):
113
- r = requests.get(url, params=params, stream=stream, allow_redirects=allow_redirects, **kwargs)
114
- if r.status_code != 200:
115
- return -2
116
-
117
- total_size_in_bytes = int(r.headers.get('content-length', 0))
118
- block_size = 1024 # 1 Kibibyte
119
- progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
120
-
121
- with open(data_file_path, 'wb') as file:
122
- for data in r.iter_content(block_size):
123
- progress_bar.update(len(data))
124
- file.write(data)
125
- progress_bar.close()
126
- if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
127
- mylog.StreamLogger.error("Something wrong during the download!")
128
- data_file_path.unlink(missing_ok=True)
383
+ def _download_by_requests_get(
384
+ url, file_path_local,
385
+ params=None, stream=True, allow_redirects=True,
386
+ file_block_size=1024,
387
+ **kwargs
388
+ ):
389
+ try:
390
+ r = requests.get(url, params=params, stream=stream, allow_redirects=allow_redirects, **kwargs)
391
+ if r.status_code != 200:
392
+ return -2
393
+
394
+ total_size_in_bytes = int(r.headers.get('content-length', 0))
395
+
396
+ progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
397
+
398
+ file_path_local.parent.mkdir(parents=True, exist_ok=True)
399
+ with open(file_path_local, 'wb') as file:
400
+ for data in r.iter_content(file_block_size):
401
+ progress_bar.update(len(data))
402
+ file.write(data)
403
+ progress_bar.close()
404
+ if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
405
+ mylog.StreamLogger.error("Something wrong during the download!")
406
+ file_path_local.unlink(missing_ok=True)
407
+ return -1
408
+ r.close()
409
+ except Exception as e:
410
+ mylog.StreamLogger.error(f"Exception during downloading the file from {url}: {e}")
129
411
  return -1
130
-
131
412
  return 1
132
413
 
133
- def search_from_ftp(self, ftp, file_name_patterns=None):
134
- paths = []
135
-
136
- return paths
137
414
 
138
- def save_file(self, file_dir, file_name, r_source_file):
139
- file_path = file_dir / file_name
140
-
141
- if file_path.is_file():
142
- mylog.simpleinfo.info(
143
- "The file {} exists in the directory {}.".format(file_path.name, file_path.parent.resolve()))
415
+ class CDAWebHTTPDownloader(DownloaderFromHTTPBase):
416
+ """
417
+ Downloader for downloading data files from CDAWeb HTTP server.
144
418
 
145
- if self.force_download:
146
- file_path.parent.resolve().mkdir(parents=True, exist_ok=True)
147
- with open(file_path, "wb") as f:
148
- mylog.simpleinfo.info(
149
- "Downloading {} to the directory {} ...".format(file_path.name, file_path.parent.resolve())
150
- )
151
- f.write(r_source_file.content)
152
- mylog.simpleinfo.info("Done")
419
+ Parameters
420
+ ----------
421
+ base_url : str
422
+ The base URL of the CDAWeb HTTP server.
423
+ root_dir_remote : str
424
+ The directory in the CDAWeb HTTP server that stores the data.
425
+ """
153
426
 
427
+ root_dir_remote = 'pub/data'
428
+
429
+ def __init__(
430
+ self,
431
+ dt_fr, dt_to,
432
+ root_dir_local=None,
433
+ root_dir_remote=None,
434
+ direct_download=False, force_download=False, dry_run=False,
435
+ **kwargs
436
+ ):
437
+
438
+ base_url = 'https://cdaweb.gsfc.nasa.gov'
439
+
440
+ if root_dir_local is None:
441
+ root_dir_local = prf.datahub_data_root_dir / 'CDAWeb'
442
+
443
+ super().__init__(
444
+ dt_fr, dt_to,
445
+ base_url=base_url,
446
+ root_dir_local=root_dir_local,
447
+ root_dir_remote=root_dir_remote,
448
+ direct_download=direct_download,
449
+ force_download=force_download,
450
+ dry_run=dry_run,
451
+ **kwargs
452
+ )
@@ -0,0 +1,93 @@
1
+ import datetime
2
+ import pathlib
3
+ import copy
4
+
5
+ import geospacelab.toolbox.utilities.pydatetime as dttool
6
+ import geospacelab.toolbox.utilities.pylogging as mylog
7
+ from geospacelab.config import prf
8
+ from geospacelab.datahub.sources.cdaweb.downloader import Downloader as DownloaderBase
9
+
10
+
11
+ class Downloader(DownloaderBase):
12
+
13
+ def __init__(
14
+ self,
15
+ dt_fr=None, dt_to=None,
16
+ sat_id=None,
17
+ orbit_id=None,
18
+ direct_download=True,
19
+ force_download=False,
20
+ data_file_root_dir = None,
21
+ dry_run=False,
22
+ ):
23
+ product = 'EDR_AUR'
24
+ if data_file_root_dir is None:
25
+ data_file_root_dir = prf.datahub_data_root_dir / 'CDAWeb' / 'DMSP' / 'SSUSI' / product
26
+ self.sat_id = sat_id
27
+ self.orbit_id = orbit_id
28
+ self.source_subdirs = ['dmsp', 'dmsp'+self.sat_id.lower(), 'ssusi', 'data', 'edr-aurora']
29
+
30
+ super().__init__(
31
+ dt_fr, dt_to,
32
+ data_file_root_dir=data_file_root_dir,
33
+ direct_download=direct_download,force_download=force_download,dry_run=dry_run
34
+ )
35
+
36
+
37
+ def search_from_http(self, file_name_patterns=None, allow_multiple_files=True):
38
+
39
+ dt_fr_1 = self.dt_fr - datetime.timedelta(hours=3)
40
+ dt_to_1 = self.dt_to + datetime.timedelta(hours=3)
41
+ diff_days = dttool.get_diff_days(dt_fr_1, dt_to_1)
42
+ dt0 = dttool.get_start_of_the_day(dt_fr_1)
43
+ source_file_paths = []
44
+ for nd in range(diff_days + 1):
45
+ this_day = dt0 + datetime.timedelta(days=nd)
46
+ doy = dttool.get_doy(this_day)
47
+ sdoy = '{:03d}'.format(doy)
48
+ subdirs = copy.deepcopy(self.source_subdirs)
49
+ subdirs.extend(
50
+ [str(this_day.year), sdoy]
51
+ )
52
+
53
+ if self.orbit_id is None:
54
+ file_name_patterns = [
55
+ 'dmsp' + self.sat_id.lower(),
56
+ 'ssusi',
57
+ 'edr-aurora',
58
+ this_day.strftime("%Y") + sdoy + 'T',
59
+ '.nc'
60
+ ]
61
+ else:
62
+ file_name_patterns = [
63
+ 'dmsp' + self.sat_id.lower(),
64
+ 'ssusi',
65
+ 'edr-aurora',
66
+ 'REV',
67
+ self.orbit_id,
68
+ '.nc'
69
+ ]
70
+ paths = super().search_from_http(subdirs=subdirs, file_name_patterns=file_name_patterns)
71
+ source_file_paths.extend(paths)
72
+ return source_file_paths
73
+
74
+ def save_file_from_http(self, url, file_dir=None, file_name=None):
75
+
76
+ sy = url.split('/')[-3]
77
+ sdoy = url.split('/')[-2]
78
+ year = int(sy)
79
+ this_day = dttool.convert_doy_to_datetime(year, int(sdoy))
80
+ if file_dir is None:
81
+ file_dir = self.data_file_root_dir / self.sat_id.upper() / sy / this_day.strftime("%Y%m%d")
82
+ super().save_file_from_http(url, file_dir=file_dir)
83
+
84
+
85
+
86
+ if __name__ == "__main__":
87
+ downloader = Downloader(
88
+ dt_fr = datetime.datetime(2011, 1, 6),
89
+ dt_to = datetime.datetime(2011, 1, 6, 12),
90
+ sat_id='F17',
91
+ orbit_id='21523',
92
+ dry_run=False,
93
+ )