cdasws 1.8.10__py3-none-any.whl → 1.8.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cdasws/cdasws.py CHANGED
@@ -24,108 +24,175 @@
24
24
  #
25
25
  # NOSA HEADER END
26
26
  #
27
- # Copyright (c) 2018-2019 United States Government as represented by
27
+ # Copyright (c) 2018-2025 United States Government as represented by
28
28
  # the National Aeronautics and Space Administration. No copyright is
29
29
  # claimed in the United States under Title 17, U.S.Code. All Other
30
30
  # Rights Reserved.
31
31
  #
32
32
 
33
+
33
34
  """
34
- Package for accessing the Coordinate Data Analysis System (CDAS)
35
+ Module for accessing the Coordinate Data Analysis System (CDAS)
35
36
  web services <https://cdaweb.gsfc.nasa.gov/WebServices/REST/>.<br>
36
37
 
37
- Copyright &copy; 2018-2019 United States Government as represented by the
38
+ Copyright &copy; 2018-2025 United States Government as represented by the
38
39
  National Aeronautics and Space Administration. No copyright is claimed in
39
40
  the United States under Title 17, U.S.Code. All Other Rights Reserved.
41
+
42
+ Notes
43
+ -----
44
+ <ul>
45
+ <li>Due to rate limiting implemented by the CDAS web services, an
46
+ attempt to make simultaneous requests from many threads is likely
47
+ to actually reduce performance. At this time, it is best to make
48
+ calls from five or fewer threads.</li>
49
+ <li>Since CDAS data has datetime values with a UTC timezone, all
50
+ client provided datetime values should have a timezone of UTC.
51
+ If a given value's timezone is not UTC, the value is adjusted to
52
+ UTC. If a given value has no timezone (is naive), a UTC timezone
53
+ is set.</li>
54
+ </ul>
40
55
  """
41
56
 
57
+
58
+ import sys
42
59
  import os
43
60
  import platform
44
61
  import logging
62
+ import re
63
+ from importlib.util import find_spec
45
64
  import urllib.parse
65
+ from urllib.parse import urlparse
46
66
  import json
47
67
  from operator import itemgetter
48
- from datetime import datetime, timezone
68
+ import time
69
+ from datetime import datetime, timedelta, timezone
70
+ import xml.etree.ElementTree as ET
49
71
  from tempfile import mkstemp
50
- from typing import Dict, List, Tuple, Union
72
+ from typing import Any, Callable, Dict, List, Tuple, Union
73
+
51
74
  import requests
52
75
  import dateutil.parser
53
- import spacepy.datamodel as spdm # type: ignore
54
76
 
55
-
56
-
57
- class TimeInterval:
77
+ from cdasws.datarepresentation import DataRepresentation
78
+ from cdasws.datarequest import AudioRequest, DataRequest
79
+ from cdasws.datarequest import CdfFormat, CdfRequest, Compression
80
+ from cdasws.datarequest import ImageFormat, GraphOptions, GraphRequest
81
+ from cdasws.datarequest import TextFormat, TextRequest, ThumbnailRequest
82
+ from cdasws.timeinterval import TimeInterval
83
+
84
+
85
+ # requires python >= 3.4
86
+ #if find_spec('spacepy.datamodel') is not None:
87
+ # import spacepy.datamodel as spdm # type: ignore
88
+ # SPDM_AVAILABLE = True
89
+ #else:
90
+ # SPDM_AVAILABLE = False
91
+ # python < 3.4
92
+ try:
93
+ import spacepy.datamodel as spdm # type: ignore
94
+ SPDM_AVAILABLE = True
95
+ except ImportError:
96
+ SPDM_AVAILABLE = False
97
+
98
+ try:
99
+ from cdflib.xarray import cdf_to_xarray
100
+ import xarray as xr
101
+ CDF_XARRAY_AVAILABLE = True
102
+ except ImportError:
103
+ try:
104
+ import cdflib as cdf
105
+ import xarray as xr # pylint: disable=ungrouped-imports
106
+ CDF_XARRAY_AVAILABLE = True
107
+ def cdf_to_xarray(filename, to_datetime=False, to_unixtime=False,
108
+ fillval_to_nan=False):
109
+ """
110
+ Reads a CDF into an xarray.dataset. This function exists
111
+ to provide compatility with cdflib >= 1.0.1 for older
112
+ releases of cdflib.
113
+
114
+ Parameters:
115
+ -----------
116
+ filename
117
+ The path to the CDF file to read.
118
+ to_datetime
119
+ Whether or not to convert CDF_EPOCH/EPOCH_16/TT2000 to
120
+ datetime, or leave them as is.
121
+ to_unixtime
122
+ Whether or not to convert CDF_EPOCH/EPOCH_16/TT2000 to
123
+ unixtime, or leave them as is.
124
+ fillval_to_nan
125
+ If True, any data values that match the FILLVAL
126
+ attribute for a variable will be set to NaN.
127
+
128
+ Returns
129
+ -------
130
+ xarray.dataset
131
+ An XArray Dataset object.
132
+ """
133
+ return cdf.cdf_to_xarray(filename, to_datetime=to_datetime, # pylint: disable=no-member
134
+ to_unixtime=to_unixtime,
135
+ fillval_to_nan=fillval_to_nan)
136
+ except ImportError:
137
+ CDF_XARRAY_AVAILABLE = False
138
+
139
+
140
+ try:
141
+ import requests_cache
142
+ CACHE_AVAILABLE = True
143
+ except ImportError:
144
+ CACHE_AVAILABLE = False
145
+
146
+ from cdasws import __version__, RETRY_LIMIT, NAMESPACES as NS
147
+
148
+
149
+ def _get_data_progress(
150
+ progress: float,
151
+ msg: str,
152
+ value: Dict) -> int:
58
153
  """
59
- A time interval constisting of a start and end datetime.
154
+ A get_data progress callback which adjusts the progress value for
155
+ the download portion of a larger operation and then calls the
156
+ "real" progress callback function with this adjusted progress value.
60
157
 
61
- Attributes
158
+ Parameters
62
159
  ----------
63
- start
64
- Start time of interval.
65
- end
66
- End time of interval.
160
+ progress
161
+ Measure of progress.
162
+ msg
163
+ Message describing progress of get_data call.
164
+ value
165
+ Dictionary containing the function to call and values for
166
+ computing the adjusted progress value.
167
+ Returns
168
+ -------
169
+ int
170
+ Flag indicating whether to continue with getting the data.
171
+ 0 to continue. 1 to abort getting the data.
67
172
  """
68
- def __init__(self, start: Union[datetime, str],
69
- end: Union[datetime, str]):
70
- """
71
- Constructs a TimeInterval object.
72
-
73
- Parameters
74
- ----------
75
- start
76
- Start time of interval.
77
- end
78
- End time of interval.
79
- Raises
80
- ------
81
- ValueError
82
- If the given start/end datetime values are invalid.
83
- """
84
-
85
- if isinstance(start, datetime):
86
- self.start = start
87
- elif isinstance(start, str):
88
- self.start = dateutil.parser.parse(start)
89
- else:
90
- raise ValueError('unrecognized datetime value')
91
-
92
- self.start.astimezone(timezone.utc)
173
+ progress_callback = value.get('progressCallback', None)
174
+ progress_user_value = value.get('progressUserValue', None)
175
+ adjusted_progress = value['progressStart'] + \
176
+ value['progressFraction'] * progress
93
177
 
94
- if isinstance(end, datetime):
95
- self.end = end
96
- elif isinstance(end, str):
97
- self.end = dateutil.parser.parse(end)
98
- else:
99
- raise ValueError('unrecognized datetime value')
100
-
101
- self.end.astimezone(timezone.utc)
178
+ if progress_callback is not None:
102
179
 
103
- def __str__(self):
104
- return self.start.isoformat() + ' ' + self.end.isoformat()
180
+ return progress_callback(adjusted_progress, msg,
181
+ progress_user_value)
182
+ return 0
105
183
 
106
- def __eq__(self, other):
107
- return self.start == other.start and self.end == other.end
108
-
109
- @staticmethod
110
- def basic_iso_format(value: datetime) -> str:
111
- """
112
- Produces the basic (minimal) ISO 8601 format of the given
113
- datetime.
114
184
 
115
- Parameters
116
- ----------
117
- value
118
- datetime value to convert to string.
119
- Returns
120
- -------
121
- str
122
- Basic ISO 8601 format time string.
123
- """
124
- return value.isoformat().replace('+00:00', 'Z').translate(
125
- {ord(i):None for i in ':-'})
185
+ class NullAuth(requests.auth.AuthBase): # pylint: disable=too-few-public-methods
186
+ """
187
+ Authentication class used to cause requests to ignore any ~/.netrc
188
+ file. The CDAS web services do not support authentication and
189
+ a cdaweb (ftps) entry will cause CdasWs requests to fail with
190
+ a 401 error. See <https://github.com/psf/requests/issues/2773>.
191
+ """
192
+ def __call__(self, r):
193
+ return r
126
194
 
127
195
 
128
- # pylint: disable=too-many-instance-attributes
129
196
  class CdasWs:
130
197
  """
131
198
  Class representing the web service interface to NASA's
@@ -138,9 +205,17 @@ class CdasWs:
138
205
  it is configured with a NullHandler. Users of this class may configure
139
206
  the logger to aid in diagnosing problems.
140
207
  """
208
+ # pylint: disable=too-many-instance-attributes
141
209
  # pylint: disable=too-many-arguments
142
- def __init__(self, endpoint=None, timeout=None, proxy=None,
143
- ca_certs=None, disable_ssl_certificate_validation=False):
210
+ def __init__(
211
+ self,
212
+ endpoint=None,
213
+ timeout=None,
214
+ proxy=None,
215
+ ca_certs=None,
216
+ disable_ssl_certificate_validation=False,
217
+ user_agent=None,
218
+ disable_cache=False):
144
219
  """
145
220
  Creates an object representing the CDAS web services.
146
221
 
@@ -152,42 +227,62 @@ class CdasWs:
152
227
  timeout
153
228
  Number of seconds to wait for a response from the server.
154
229
  proxy
155
- HTTP proxy information. For example,
230
+ HTTP proxy information. For example,<pre>
156
231
  proxies = {
157
232
  'http': 'http://10.10.1.10:3128',
158
233
  'https': 'http://10.10.1.10:1080',
159
- }
234
+ }</pre>
160
235
  Proxy information can also be set with environment variables.
161
- For example,
236
+ For example,<pre>
162
237
  $ export HTTP_PROXY="http://10.10.1.10:3128"
163
- $ export HTTPS_PROXY="http://10.10.1.10:1080"
238
+ $ export HTTPS_PROXY="http://10.10.1.10:1080"</pre>
164
239
  ca_certs
165
240
  Path to certificate authority (CA) certificates that will
166
241
  override the default bundle.
167
242
  disable_ssl_certificate_validation
168
243
  Flag indicating whether to validate the SSL certificate.
244
+ user_agent
245
+ A value that is appended to the HTTP User-Agent value.
246
+ disable_cache
247
+ Flag indicating whether to disable HTTP caching.
169
248
  """
170
249
 
171
250
  self.logger = logging.getLogger(type(self).__name__)
172
251
  self.logger.addHandler(logging.NullHandler())
173
252
 
253
+ self.logger.debug('endpoint = %s', endpoint)
254
+ self.logger.debug('ca_certs = %s', ca_certs)
255
+ self.logger.debug('disable_ssl_certificate_validation = %s',
256
+ disable_ssl_certificate_validation)
257
+ self.logger.debug('disable_cache = %s', disable_cache)
258
+
174
259
  if endpoint is None:
175
260
  self._endpoint = 'https://cdaweb.gsfc.nasa.gov/WS/cdasr/1/dataviews/sp_phys/'
176
261
  else:
177
262
  self._endpoint = endpoint
178
263
 
179
- self._user_agent = 'CdasWsExample.py (' + \
264
+ self._user_agent = 'cdasws/' + __version__ + ' (' + \
180
265
  platform.python_implementation() + ' ' \
181
266
  + platform.python_version() + '; ' + platform.platform() + ')'
182
267
 
268
+ if user_agent is not None:
269
+ self._user_agent += ' (' + user_agent + ')'
270
+
183
271
  self._request_headers = {
184
- 'Content-Type' : 'application/json',
185
- 'Accept' : 'application/json',
272
+ #'Content-Type' : 'application/json',
273
+ 'Content-Type' : 'application/xml',
274
+ 'Accept' : 'application/xml',
186
275
  'User-Agent' : self._user_agent,
187
276
  #'Accept-Encoding' : 'gzip' # only beneficial for icdfml responses
188
277
  }
189
- self._session = requests.Session()
278
+ if CACHE_AVAILABLE and disable_cache is not True:
279
+ self._session = requests_cache.CachedSession('cdasws_cache',
280
+ cache_control=True)
281
+ else:
282
+ self._session = requests.Session()
283
+
190
284
  self._session.headers.update(self._request_headers)
285
+ self._session.auth = NullAuth()
191
286
 
192
287
  if ca_certs is not None:
193
288
  self._session.verify = ca_certs
@@ -200,6 +295,10 @@ class CdasWs:
200
295
 
201
296
  self._timeout = timeout
202
297
 
298
+ endpoint_components = urlparse(self._endpoint)
299
+ self._hdp_registry = endpoint_components.scheme + '://' + \
300
+ endpoint_components.netloc + '/registry/hdp/SscId.xql'
301
+
203
302
  # pylint: enable=too-many-arguments
204
303
 
205
304
 
@@ -219,14 +318,21 @@ class CdasWs:
219
318
  self._session.close()
220
319
 
221
320
 
222
- def get_observatory_groups(self, **keywords) -> List[Dict]:
321
+ def get_observatory_groups(
322
+ self,
323
+ **keywords: str
324
+ ) -> List[Dict]:
223
325
  """
224
326
  Gets descriptions of the observatory groups from the server.
225
327
 
226
328
  Parameters
227
329
  ----------
228
330
  keywords
229
- instrumentType value.
331
+ optional keyword parameters as follows:<br>
332
+ <b>instrumentType</b> - an instrument type value from those
333
+ returned by `CdasWs.get_instrument_types`. Omitting
334
+ this parameter indicates that no observatories are eliminated
335
+ based upon their instrumentType value.
230
336
  Returns
231
337
  -------
232
338
  List
@@ -252,27 +358,49 @@ class CdasWs:
252
358
  self.logger.info('response.text: %s', response.text)
253
359
  return []
254
360
 
255
- observatory_groups = response.json()
256
-
257
361
  if self.logger.level <= logging.DEBUG:
258
- self.logger.debug('observatory_groups: %s',
259
- json.dumps(observatory_groups,
260
- indent=4, sort_keys=True))
362
+ self.logger.debug('response.text = %s', response.text)
261
363
 
262
- if not observatory_groups:
263
- return []
364
+ observatory_response = ET.fromstring(response.text)
365
+
366
+ observatory_group_descriptions = []
367
+ for description in observatory_response.findall(\
368
+ 'cdas:ObservatoryGroupDescription', namespaces=NS):
369
+
370
+ observatory_ids = []
371
+ for observatory_id in description.findall(\
372
+ 'cdas:ObservatoryId', namespaces=NS):
373
+
374
+ observatory_ids.append(observatory_id.text)
264
375
 
265
- return observatory_groups['ObservatoryGroupDescription']
376
+ observatory_group_descriptions.append({
377
+ 'Name': description.find(\
378
+ 'cdas:Name', namespaces=NS).text,
379
+ 'ObservatoryId': observatory_ids
380
+ })
266
381
 
382
+ return observatory_group_descriptions
267
383
 
268
- def get_instrument_types(self, **keywords) -> List[Dict]:
384
+
385
+ def get_instrument_types(
386
+ self,
387
+ **keywords: str
388
+ ) -> List[Dict]:
269
389
  """
270
390
  Gets descriptions of the instrument types from the server.
271
391
 
272
392
  Parameters
273
393
  ----------
274
394
  keywords
275
- observatory or observatoryGroup value.
395
+ optional keyword parameters as follows:<br>
396
+ <b>observatory</b> - an observatory value from those returned
397
+ by `CdasWs.get_observatories`. Omitting this parameter
398
+ indicates that no instrumentTypes are eliminated based upon
399
+ their observatory value.<br>
400
+ <b>observatoryGroup</b> - an observatory group value from
401
+ those returned by `CdasWs.get_observatory_groups`. Omitting
402
+ this parameter indicates that no instrumentTypes are
403
+ eliminated based upon their observatoryGroup value.</br>
276
404
  Returns
277
405
  -------
278
406
  List
@@ -301,27 +429,45 @@ class CdasWs:
301
429
  self.logger.info('response.text: %s', response.text)
302
430
  return []
303
431
 
304
- instrument_types = response.json()
432
+ if self.logger.level <= logging.DEBUG:
433
+ self.logger.debug('response.text = %s', response.text)
434
+
435
+ instrument_response = ET.fromstring(response.text)
305
436
 
306
437
  if self.logger.level <= logging.DEBUG:
307
- self.logger.debug('instrument_types = %s',
308
- json.dumps(instrument_types, indent=4,
309
- sort_keys=True))
438
+ self.logger.debug('instrument_response = %s',
439
+ ET.tostring(instrument_response))
310
440
 
311
- if not instrument_types:
312
- return []
441
+ instrument_types = []
442
+ for description in instrument_response.findall(\
443
+ 'cdas:InstrumentTypeDescription', namespaces=NS):
313
444
 
314
- return instrument_types['InstrumentTypeDescription']
445
+ instrument_types.append({
446
+ 'Name': description.find('cdas:Name',
447
+ namespaces=NS).text
448
+ })
449
+ return instrument_types
315
450
 
316
451
 
317
- def get_instruments(self, **keywords) -> List[Dict]:
452
+ def get_instruments(
453
+ self,
454
+ **keywords: str
455
+ ) -> List[Dict]:
318
456
  """
319
457
  Gets descriptions of the instruments from the server.
320
458
 
321
459
  Parameters
322
460
  ----------
323
461
  keywords
324
- observatory or instrumentType value.
462
+ optional keyword parameters as follows:<br>
463
+ <b>observatory</b> - an observatory value from those returned
464
+ by `CdasWs.get_observatories`. Omitting this parameter
465
+ indicates that no instruments are eliminated based upon their
466
+ observatory value.<br>
467
+ <b>instrumentType</b> - an instrument type value from those
468
+ returned by `CdasWs.get_instrument_types`. Omitting this
469
+ parameter indicates that no instruments are eliminated based
470
+ upon their instrument type.<br>
325
471
  Returns
326
472
  -------
327
473
  List
@@ -350,27 +496,50 @@ class CdasWs:
350
496
  self.logger.info('response.text: %s', response.text)
351
497
  return []
352
498
 
353
- instruments = response.json()
499
+ if self.logger.level <= logging.DEBUG:
500
+ self.logger.debug('response.text = %s', response.text)
501
+
502
+ instruments_response = ET.fromstring(response.text)
354
503
 
355
504
  if self.logger.level <= logging.DEBUG:
356
- self.logger.debug('instruments = %s',
357
- json.dumps(instruments, indent=4,
358
- sort_keys=True))
505
+ self.logger.debug('instruments = %s', response.text)
506
+ #ET.indent(instruments_response, space=' '))
359
507
 
360
- if not instruments:
361
- return []
508
+ instruments = []
509
+ for instrument_description in instruments_response.findall(\
510
+ 'cdas:InstrumentDescription', namespaces=NS):
511
+
512
+ instruments.append({
513
+ 'Name': instrument_description.find(\
514
+ 'cdas:Name', namespaces=NS).text,
515
+ 'ShortDescription': instrument_description.find(\
516
+ 'cdas:ShortDescription', namespaces=NS).text,
517
+ 'LongDescription': instrument_description.find(\
518
+ 'cdas:LongDescription', namespaces=NS).text
519
+ })
362
520
 
363
- return instruments['InstrumentDescription']
521
+ return instruments
364
522
 
365
523
 
366
- def get_observatories(self, **keywords) -> List[Dict]:
524
+ def get_observatories(
525
+ self,
526
+ **keywords: str
527
+ ) -> List[Dict]:
367
528
  """
368
529
  Gets descriptions of the observatories from the server.
369
530
 
370
531
  Parameters
371
532
  ----------
372
533
  keywords
373
- instrument or instrumentType value.
534
+ optional keyword parameters as follows:<br>
535
+ <b>instrument</b> - an instrument value from those returned
536
+ by `CdasWs.get_instruments`. Omitting this parameter
537
+ indicates that no observatories are eliminated based upon
538
+ their instrument value.<br>
539
+ <b>instrumentType</b> - in instrument type value from those
540
+ returned by `CdasWs.get_instrument_types`. Omitting this
541
+ parameter indicates that no observatories are eliminated
542
+ based upon their instrumentType value.<br>
374
543
  Returns
375
544
  -------
376
545
  List
@@ -399,20 +568,34 @@ class CdasWs:
399
568
  self.logger.info('response.text: %s', response.text)
400
569
  return []
401
570
 
402
- observatories = response.json()
571
+ if self.logger.level <= logging.DEBUG:
572
+ self.logger.debug('response.text = %s', response.text)
573
+
574
+ observatory_response = ET.fromstring(response.text)
403
575
 
404
576
  if self.logger.level <= logging.DEBUG:
405
- self.logger.debug('observatories = %s',
406
- json.dumps(observatories, indent=4,
407
- sort_keys=True))
577
+ self.logger.debug('observatories = %s', response.text)
408
578
 
409
- if not observatories:
410
- return []
579
+ observatories = []
580
+
581
+ for observatory in observatory_response.findall(\
582
+ 'cdas:ObservatoryDescription', namespaces=NS):
583
+ observatories.append({
584
+ 'Name': observatory.find(\
585
+ 'cdas:Name', namespaces=NS).text,
586
+ 'ShortDescription': observatory.find(\
587
+ 'cdas:ShortDescription', namespaces=NS).text,
588
+ 'LongDescription': observatory.find(\
589
+ 'cdas:LongDescription', namespaces=NS).text
590
+ })
411
591
 
412
- return observatories['ObservatoryDescription']
592
+ return observatories
413
593
 
414
594
 
415
- def get_observatory_groups_and_instruments(self, **keywords) -> List[Dict]:
595
+ def get_observatory_groups_and_instruments(
596
+ self,
597
+ **keywords: str
598
+ ) -> List[Dict]:
416
599
  """
417
600
  Gets descriptions of the observatory groups (and associated
418
601
  instruments) from the server.
@@ -420,7 +603,11 @@ class CdasWs:
420
603
  Parameters
421
604
  ----------
422
605
  keywords
423
- instrumentType value.
606
+ optional keyword parameters as follows:<br>
607
+ <b>instrumentType</b> - an instrument type value from those
608
+ returned by `CdasWs.get_instrument_types`. Omitting this
609
+ parameter indicates that no observatories are eliminated
610
+ based upon their instrumentType value.<br>
424
611
  Returns
425
612
  -------
426
613
  List
@@ -447,54 +634,153 @@ class CdasWs:
447
634
  self.logger.info('response.text: %s', response.text)
448
635
  return []
449
636
 
450
- observatories = response.json()
451
-
452
637
  if self.logger.level <= logging.DEBUG:
453
- self.logger.debug('observatories = %s',
454
- json.dumps(observatories, indent=4,
455
- sort_keys=True))
638
+ self.logger.debug('response.text = %s', response.text)
456
639
 
457
- if not observatories:
458
- return []
640
+ observatories_response = ET.fromstring(response.text)
459
641
 
460
- return observatories['ObservatoryGroupInstrumentDescription']
642
+ if self.logger.level <= logging.DEBUG:
643
+ self.logger.debug('observatories = %s', response.text)
644
+
645
+ o_g_i_ds = []
646
+
647
+ for o_g_i_d in observatories_response.findall(\
648
+ 'cdas:ObservatoryGroupInstrumentDescription',\
649
+ namespaces=NS):
650
+
651
+ o_g_i_d_name = o_g_i_d.find('cdas:Name',
652
+ namespaces=NS).text
653
+ o_is = []
654
+ for o_i in o_g_i_d.findall('cdas:ObservatoryInstruments',
655
+ namespaces=NS):
656
+
657
+ o_i_name = o_i.find('cdas:Name',
658
+ namespaces=NS).text
659
+ i_ds = []
660
+ for i_d in o_i.findall('cdas:InstrumentDescription',
661
+ namespaces=NS):
662
+ i_d_name = i_d.find('cdas:Name',
663
+ namespaces=NS).text
664
+ i_d_short_description = \
665
+ i_d.find('cdas:ShortDescription',
666
+ namespaces=NS).text
667
+ i_d_long_description = \
668
+ i_d.find('cdas:LongDescription',
669
+ namespaces=NS).text
670
+ i_ds.append({
671
+ 'Name': i_d_name,
672
+ 'ShortDescription': i_d_short_description,
673
+ 'LongDescription': i_d_long_description
674
+ })
675
+ o_is.append({
676
+ 'Name': o_i_name,
677
+ 'InstrumentDescription': i_ds
678
+ })
679
+
680
+ o_g_i_ds.append({
681
+ 'Name': o_g_i_d_name,
682
+ 'ObservatoryInstruments': o_is
683
+ })
684
+
685
+ return o_g_i_ds
461
686
 
462
687
 
463
- def get_datasets(self, **keywords) -> List[Dict]:
688
+ # pylint: disable=too-many-branches
689
+ def get_datasets(
690
+ self,
691
+ **keywords: str
692
+ ) -> List[Dict]:
464
693
  """
465
694
  Gets descriptions of the specified datasets from the server.
466
695
 
467
696
  Parameters
468
697
  ----------
469
698
  keywords
470
- observatoryGroup, instrumentType, observatory,
471
- instrument, startDate, stopDate, idPattern, labelPattern,
472
- and/or notesPattern value(s).
699
+ optional keyword parameters as follows:<br>
700
+ <b>observatoryGroup</b> - an observatory group value from those
701
+ returned by `CdasWs.get_observatory_groups`. Omitting this
702
+ parameter
703
+ indicates that no datasets are eliminated based upon their
704
+ observatoryGroup value.<br>
705
+ <b>instrumentType</b> - an instrument type value from those
706
+ returned by `CdasWs.get_instrument_types`. Omitting this
707
+ parameter indicates that no datasets are eliminated based
708
+ upon their instrumentType value.<br>
709
+ <b>observatory</b> - an observatory name value from those
710
+ returned by `CdasWs.get_observatories`. Omitting this
711
+ parameter indicates that no datasets are eliminated based
712
+ upon their observatory value.<br>
713
+ <b>instrument</b> - an instrument value from those returned by
714
+ `CdasWs.get_instruments`. Omitting this parameter indicates
715
+ that no datasets are eliminated based upon their instrument
716
+ value.<br>
717
+ <b>startDate</b> - a datetime specifying the start of a time
718
+ interval. See module note about timezone value. If this
719
+ parameter is ommited, the time interval will begin infinitely
720
+ in the past.<br>
721
+ <b>stopDate</b> - a datetime specifying the end of a time
722
+ interval. See module note about timezone value. If this
723
+ parameter is omitted, the time interval will end infinitely
724
+ in the future.<br>
725
+ <b>id</b> - a dataset identifier. The value may be a CDAS
726
+ (e.g., AC_H2_MFI), DOI (e.g., 10.48322/fh85-fj47), or SPASE
727
+ [ResourceID] (e.g., spase://NASA/NumericalData/ACE/MAG/L2/PT1H)
728
+ identifier. If specified, all other keywords are ignored.<br>
729
+ <b>idPattern</b> - a java.util.regex compatible regular
730
+ expression that must match the dataset's CDAS identifier value.
731
+ Omitting this parameter is equivalent to `.*`.<br>
732
+ <b>labelPattern</b> - a java.util.regex compatible regular
733
+ expression that must match the dataset's CDAS label text.
734
+ Omitting this parameter is equivalent to `.*`. Embedded
735
+ matching flag expressions (e.g., `(?i)` for case insensitive
736
+ match mode) are supported and likely to be useful in this
737
+ case.<br>
738
+ <b>notesPattern</b> - a java.util.regex compatible regular
739
+ expression that must match the dataset's CDAS notes text.
740
+ Omitting this parameter is equivalent to `.*`. Embedded
741
+ matching flag expressions (e.g., `(?s)` for dotall match mode)
742
+ are supported and likely to be useful in this case.<br>
473
743
  Returns
474
744
  -------
475
745
  List
476
- A dictionary containing descriptions of the datasets
746
+ A list of dictionaries containing descriptions of the datasets
477
747
  requested. The dictionary structure is defined by the
478
748
  DatasetDescription element in
479
749
  <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>.
480
750
  """
481
751
  url = self._endpoint + 'datasets?'
482
752
 
483
- if 'observatoryGroup' in keywords:
484
- url = url + 'observatoryGroup=' \
485
- + urllib.parse.quote(keywords['observatoryGroup']) + '&'
486
-
487
- if 'instrumentType' in keywords:
488
- url = url + 'instrumentType=' \
489
- + urllib.parse.quote(keywords['instrumentType']) + '&'
490
-
491
- if 'observatory' in keywords:
492
- url = url + 'observatory=' \
493
- + urllib.parse.quote(keywords['observatory']) + '&'
494
-
495
- if 'instrument' in keywords:
496
- url = url + 'instrument=' \
497
- + urllib.parse.quote(keywords['instrument']) + '&'
753
+ observatory_groups = keywords.get('observatoryGroup', None)
754
+ if observatory_groups is not None:
755
+ if isinstance(observatory_groups, str):
756
+ observatory_groups = [observatory_groups]
757
+ for observatory_group in observatory_groups:
758
+ url = url + 'observatoryGroup=' \
759
+ + urllib.parse.quote(observatory_group) + '&'
760
+
761
+ instrument_types = keywords.get('instrumentType', None)
762
+ if instrument_types is not None:
763
+ if isinstance(instrument_types, str):
764
+ instrument_types = [instrument_types]
765
+ for instrument_type in instrument_types:
766
+ url = url + 'instrumentType=' \
767
+ + urllib.parse.quote(instrument_type) + '&'
768
+
769
+ observatories = keywords.get('observatory', None)
770
+ if observatories is not None:
771
+ if isinstance(observatories, str):
772
+ observatories = [observatories]
773
+ for observatory in observatories:
774
+ url = url + 'observatory=' \
775
+ + urllib.parse.quote(observatory) + '&'
776
+
777
+ instruments = keywords.get('instrument', None)
778
+ if instruments is not None:
779
+ if isinstance(instruments, str):
780
+ instruments = [instruments]
781
+ for instrument in instruments:
782
+ url = url + 'instrument=' \
783
+ + urllib.parse.quote(instrument) + '&'
498
784
 
499
785
  if 'startDate' in keywords:
500
786
  url = url + 'startDate=' \
@@ -504,6 +790,10 @@ class CdasWs:
504
790
  url = url + 'stopDate=' \
505
791
  + urllib.parse.quote(keywords['stopDate']) + '&'
506
792
 
793
+ if 'id' in keywords:
794
+ url = url + 'id=' \
795
+ + urllib.parse.quote(keywords['id']) + '&'
796
+
507
797
  if 'idPattern' in keywords:
508
798
  url = url + 'idPattern=' \
509
799
  + urllib.parse.quote(keywords['idPattern']) + '&'
@@ -527,21 +817,153 @@ class CdasWs:
527
817
  self.logger.info('response.text: %s', response.text)
528
818
  return []
529
819
 
530
- datasets = response.json()
820
+ if self.logger.level <= logging.DEBUG:
821
+ self.logger.debug('response.text = %s', response.text)
822
+
823
+ dss = ET.fromstring(response.text)
531
824
 
532
825
  if self.logger.level <= logging.DEBUG:
533
- self.logger.debug('datasets = %s',
534
- json.dumps(datasets, indent=4, sort_keys=True))
826
+ self.logger.debug('datasets = %s', response.text)
827
+
828
+ datasets = []
829
+ for ds in dss.findall('cdas:DatasetDescription',
830
+ namespaces=NS):
831
+
832
+ observatory_groups = []
833
+ for o_g in ds.findall('cdas:ObservatoryGroup',
834
+ namespaces=NS):
835
+ observatory_groups.append(o_g.text)
836
+
837
+ instrument_types = []
838
+ for i_t in ds.findall('cdas:InstrumentType',
839
+ namespaces=NS):
840
+ instrument_types.append(i_t.text)
841
+
842
+ dataset_links = []
843
+ for d_l in ds.findall('cdas:DatasetLink',
844
+ namespaces=NS):
845
+ dataset_links.append({
846
+ 'Title': d_l.find('cdas:Title',
847
+ namespaces=NS).text,
848
+ 'Text': d_l.find('cdas:Text',
849
+ namespaces=NS).text,
850
+ 'Url': d_l.find('cdas:Url',
851
+ namespaces=NS).text,
852
+ })
853
+
854
+ observatories = []
855
+ for obs_elem in ds.findall('cdas:Observatory',
856
+ namespaces=NS):
857
+ observatories.append(obs_elem.text)
858
+
859
+ instruments = []
860
+ for instr_elem in ds.findall('cdas:Instrument',
861
+ namespaces=NS):
862
+ instruments.append(instr_elem.text)
863
+
864
+ dataset = {
865
+ 'Id': ds.find('cdas:Id', namespaces=NS).text,
866
+ 'Observatory': observatories,
867
+ 'Instrument': instruments,
868
+ 'ObservatoryGroup': observatory_groups,
869
+ 'InstrumentType': instrument_types,
870
+ 'Label': ds.find('cdas:Label',
871
+ namespaces=NS).text,
872
+ 'TimeInterval': {
873
+ 'Start': ds.find('cdas:TimeInterval/cdas:Start',
874
+ namespaces=NS).text,
875
+ 'End': ds.find('cdas:TimeInterval/cdas:End',
876
+ namespaces=NS).text
877
+ },
878
+ 'PiName': ds.find('cdas:PiName',
879
+ namespaces=NS).text,
880
+ 'PiAffiliation': ds.find('cdas:PiAffiliation',
881
+ namespaces=NS).text,
882
+ 'Notes': ds.find('cdas:Notes',
883
+ namespaces=NS).text,
884
+ 'DatasetLink': dataset_links
885
+ }
886
+ doi = ds.find('cdas:Doi', namespaces=NS)
887
+ if doi is not None:
888
+ dataset['Doi'] = doi.text
889
+
890
+ spase_resource_id = ds.find('cdas:SpaseResourceId',
891
+ namespaces=NS)
892
+ if spase_resource_id is not None:
893
+ dataset['SpaseResourceId'] = spase_resource_id.text
894
+
895
+ additional_metadata = []
896
+ for add_meta in ds.findall('cdas:AdditionalMetadata',
897
+ namespaces=NS):
898
+ meta_type = add_meta.attrib['Type']
899
+ value = add_meta.text
900
+ additional_metadata.append({
901
+ 'Type': meta_type,
902
+ 'value': value
903
+ })
904
+
905
+ if len(additional_metadata) > 0:
906
+ dataset['AdditionalMetadata'] = additional_metadata
907
+
908
+ datasets.append(dataset)
909
+
910
+ return sorted(datasets, key=itemgetter('Id'))
911
+ # pylint: enable=too-many-branches
535
912
 
536
- if not datasets:
537
- return []
538
913
 
539
- return sorted(datasets['DatasetDescription'], key=itemgetter('Id'))
914
+ @staticmethod
915
+ def get_doi_landing_page_url(
916
+ doi: str
917
+ ) -> str:
918
+ """
919
+ Returns a URL to the given Digital Object Identifier's landing
920
+ page (metadata for the DOI).
921
+
922
+ Parameters
923
+ ----------
924
+ doi
925
+ digital object identifier.
926
+ Returns
927
+ -------
928
+ str
929
+ A URL to the DOI's landing page.
930
+ """
931
+
932
+ if not doi.startswith('http'):
933
+ return 'https://doi.org/' + doi
934
+ return doi
935
+
936
+
937
+ @staticmethod
938
+ def get_citation(
939
+ doi: str
940
+ ) -> str:
941
+ """
942
+ Returns the citation from doi.org for the given DOI.
943
+
944
+ Parameters
945
+ ----------
946
+ doi
947
+ digital object identifier.
948
+ Returns
949
+ -------
950
+ str
951
+ The citation from doi.org for the given DOI.
952
+ """
953
+
954
+ url = 'https://doi.org/' + doi
955
+ headers = {'Accept': 'text/x-bibliography; style=apa'}
956
+ response = requests.get(url, headers=headers,
957
+ timeout=30)
540
958
 
959
+ return response.text
541
960
 
542
961
 
543
- def get_inventory(self, identifier: str, **keywords
544
- ) -> List[TimeInterval]:
962
+ def get_inventory(
963
+ self,
964
+ identifier: str,
965
+ **keywords: str
966
+ ) -> List[TimeInterval]:
545
967
  """
546
968
  Gets a description of the specified dataset's data inventory.
547
969
 
@@ -550,14 +972,18 @@ class CdasWs:
550
972
  identifier
551
973
  dataset identifier of data inventory to get.
552
974
  keywords
553
- time interval value.
975
+ optional keyword parameters as follows:<br>
976
+ <b>timeInterval</b> - `timeinterval.TimeInterval` to restrict
977
+ returned inventory.
554
978
  Returns
555
979
  -------
556
980
  List
557
- An array of TimeIntervals when data is available.
981
+ An array of `timeinterval.TimeInterval`s when data is
982
+ available.
558
983
  """
559
984
 
560
- url = self._endpoint + 'datasets/' + identifier + '/inventory'
985
+ url = self._endpoint + 'datasets/' + \
986
+ urllib.parse.quote(identifier, safe='') + '/inventory'
561
987
 
562
988
  if 'timeInterval' in keywords:
563
989
  time_interval_keyword = keywords['timeInterval']
@@ -577,31 +1003,67 @@ class CdasWs:
577
1003
  self.logger.info('response.text: %s', response.text)
578
1004
  return []
579
1005
 
580
- inventory = response.json()
581
-
582
1006
  if self.logger.level <= logging.DEBUG:
583
- self.logger.debug('inventory = %s',
584
- json.dumps(inventory, indent=4, sort_keys=True))
1007
+ self.logger.debug('response.text = %s', response.text)
585
1008
 
1009
+ inventory = ET.fromstring(response.text)
586
1010
  intervals = []
587
-
588
- data_intervals = inventory['InventoryDescription'][0]
589
-
590
- if 'TimeInterval' in data_intervals:
591
-
592
- for time_interval in data_intervals['TimeInterval']:
593
-
1011
+ for inventory_desc in inventory.findall(\
1012
+ 'cdas:InventoryDescription',
1013
+ namespaces=NS):
1014
+ for time_interval in inventory_desc.findall(\
1015
+ 'cdas:TimeInterval',
1016
+ namespaces=NS):
594
1017
  intervals.append(
595
1018
  TimeInterval(
596
- time_interval['Start'],
597
- time_interval['End']
1019
+ time_interval.find('cdas:Start',
1020
+ namespaces=NS).text,
1021
+ time_interval.find('cdas:End',
1022
+ namespaces=NS).text
598
1023
  )
599
1024
  )
600
1025
 
601
1026
  return intervals
602
1027
 
603
1028
 
604
- def get_variables(self, identifier: str) -> List[Dict]:
1029
+ def get_example_time_interval(
1030
+ self,
1031
+ identifier: str,
1032
+ ) -> TimeInterval:
1033
+ """
1034
+ Gets a small example time interval for the specified dataset. The
1035
+ interval is near the end of the dataset's data inventory. The
1036
+ returned interval is not guaranteed to have non-fill data for any
1037
+ specific variable.
1038
+
1039
+ Parameters
1040
+ ----------
1041
+ identifier
1042
+ dataset identifier of data inventory to get.
1043
+ Returns
1044
+ -------
1045
+ timeinterval.TimeInterval
1046
+ An small example time interval that is likely, but not
1047
+ guaranteed, to have data or None if an interval cannot be
1048
+ found.
1049
+ """
1050
+
1051
+ time_intervals = self.get_inventory(identifier)
1052
+ if len(time_intervals) < 1:
1053
+ return None
1054
+ example_interval = time_intervals[-1]
1055
+ if re.search('MMS[1-4]_.+_BRST_.+', identifier):
1056
+ time_delta = timedelta(seconds=1)
1057
+ else:
1058
+ time_delta = timedelta(hours=2)
1059
+ example_interval.start = example_interval.end - time_delta
1060
+ return example_interval
1061
+
1062
+
1063
+ def get_variables(
1064
+ self,
1065
+ identifier: str
1066
+ ) -> List[Dict]:
605
1067
  """
606
1068
  Gets a description of the variables in the specified dataset.
607
1069
 
@@ -612,13 +1074,14 @@ class CdasWs:
612
1074
  Returns
613
1075
  -------
614
1076
  List
615
- A dictionary containing descriptions of the variables in
1077
+ A List of dictionary descriptions of the variables in
616
1078
  the specified dataset. The dictionary structure is defined by
617
1079
  the VariableDescription element in
618
1080
  <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>.
619
1081
  """
620
1082
 
621
- url = self._endpoint + 'datasets/' + identifier + '/variables'
1083
+ url = self._endpoint + 'datasets/' + \
1084
+ urllib.parse.quote(identifier, safe='') + '/variables'
622
1085
 
623
1086
  response = self._session.get(url, timeout=self._timeout)
624
1087
 
@@ -629,24 +1092,306 @@ class CdasWs:
629
1092
  self.logger.info('response.text: %s', response.text)
630
1093
  return []
631
1094
 
632
- variables = response.json()
1095
+ if self.logger.level <= logging.DEBUG:
1096
+ self.logger.debug('response.text = %s', response.text)
1097
+
1098
+ var_descriptions = ET.fromstring(response.text)
1099
+
1100
+ variables = []
1101
+ for var_description in var_descriptions.findall(\
1102
+ 'cdas:VariableDescription',
1103
+ namespaces=NS):
1104
+ name = var_description.find('cdas:Name',
1105
+ namespaces=NS).text
1106
+ short_description = var_description.find(\
1107
+ 'cdas:ShortDescription',
1108
+ namespaces=NS).text
1109
+ if short_description is None:
1110
+ short_description = ''
1111
+
1112
+ long_description = var_description.find(\
1113
+ 'cdas:LongDescription',
1114
+ namespaces=NS).text
1115
+ if long_description is None:
1116
+ long_description = ''
1117
+
1118
+ variables.append({
1119
+ 'Name': name,
1120
+ 'ShortDescription': short_description,
1121
+ 'LongDescription': long_description
1122
+ })
1123
+
1124
+ return variables
1125
+
1126
+
1127
+ def get_variable_names(
1128
+ self,
1129
+ identifier: str
1130
+ ) -> List[str]:
1131
+ """
1132
+ Gets the names of the variables in the specified dataset. This
1133
+ method is like the get_variables method except that it only returns
1134
+ the variable names and not the other metadata.
633
1135
 
634
- if not variables:
635
- return []
1136
+ Parameters
1137
+ ----------
1138
+ identifier
1139
+ dataset identifier of data to get.
1140
+ Returns
1141
+ -------
1142
+ List
1143
+ A List of the names of the variables in the specified dataset.
1144
+ """
636
1145
 
637
- return variables['VariableDescription']
1146
+ variable_names = []
1147
+ for variable in self.get_variables(identifier):
1148
+ variable_names.append(variable['Name'])
638
1149
 
1150
+ return variable_names
639
1151
 
640
- # pylint: disable=too-many-locals
641
- # pylint: disable=too-many-return-statements
642
- # pylint: disable=too-many-statements
643
- # pylint: disable=too-many-branches
644
- def get_data(self, dataset: str, variables: List[str],
645
- start: Union[datetime, str], end: Union[datetime, str],
646
- **keywords
647
- ) -> Tuple[int, spdm.SpaceData]:
1152
+
1153
+ @staticmethod
1154
+ def _get_thumbnail_description_dict(
1155
+ file_description_elem: ET.Element
1156
+ ) -> Dict:
648
1157
  """
649
- Gets the specified data from the server.
1158
+ Gets ThumbnailDescription dictionary representation from the
1159
+ given FileDescription element.
1160
+
1161
+ Parameters
1162
+ ----------
1163
+ file_description_elem
1164
+ a FileDescription Element.
1165
+ Returns
1166
+ -------
1167
+ Dict
1168
+ a Dictionary representation of the ThumbnailDescription
1169
+ contained in the given FileDescription element.
1170
+ """
1171
+ thumbnail_desc = file_description_elem.find(\
1172
+ 'cdas:ThumbnailDescription',
1173
+ namespaces=NS)
1174
+ if thumbnail_desc is not None:
1175
+ time_interval = thumbnail_desc.find('cdas:TimeInterval',
1176
+ namespaces=NS)
1177
+ start = time_interval.find('cdas:Start',
1178
+ namespaces=NS).text
1179
+ end = time_interval.find('cdas:End',
1180
+ namespaces=NS).text
1181
+ return {
1182
+ 'Name': thumbnail_desc.find('cdas:Name',
1183
+ namespaces=NS).text,
1184
+ 'Dataset': thumbnail_desc.find('cdas:Dataset',
1185
+ namespaces=NS).text,
1186
+ 'TimeInterval': {
1187
+ 'Start': start,
1188
+ 'End': end
1189
+ },
1190
+ 'VarName': thumbnail_desc.find('cdas:VarName',
1191
+ namespaces=NS).text,
1192
+ 'Options': int(thumbnail_desc.find(\
1193
+ 'cdas:Options',
1194
+ namespaces=NS).text),
1195
+ 'NumFrames': int(thumbnail_desc.find(\
1196
+ 'cdas:NumFrames',
1197
+ namespaces=NS).text),
1198
+ 'NumRows': int(thumbnail_desc.find(\
1199
+ 'cdas:NumRows',
1200
+ namespaces=NS).text),
1201
+ 'NumCols': int(thumbnail_desc.find(\
1202
+ 'cdas:NumCols',
1203
+ namespaces=NS).text),
1204
+ 'TitleHeight': int(thumbnail_desc.find(\
1205
+ 'cdas:TitleHeight',
1206
+ namespaces=NS).text),
1207
+ 'ThumbnailHeight': int(thumbnail_desc.find(\
1208
+ 'cdas:ThumbnailHeight',
1209
+ namespaces=NS).text),
1210
+ 'ThumbnailWidth': int(thumbnail_desc.find(\
1211
+ 'cdas:ThumbnailWidth',
1212
+ namespaces=NS).text),
1213
+ 'StartRecord': int(thumbnail_desc.find(\
1214
+ 'cdas:StartRecord',
1215
+ namespaces=NS).text),
1216
+ 'MyScale': float(thumbnail_desc.find(\
1217
+ 'cdas:MyScale',
1218
+ namespaces=NS).text),
1219
+ 'XyStep': float(thumbnail_desc.find(\
1220
+ 'cdas:XyStep',
1221
+ namespaces=NS).text)
1222
+ }
1223
+ return None
1224
+
1225
+
1226
+ @staticmethod
1227
+ def _get_data_result_dict(
1228
+ xml_data_result: str
1229
+ ) -> Dict:
1230
+ """
1231
+ Gets DataResult dictionary representation from the
1232
+ given XML DataResult element.
1233
+
1234
+ Parameters
1235
+ ----------
1236
+ xml_data_result
1237
+ XML representation of a DataResult.
1238
+ Returns
1239
+ -------
1240
+ Dict
1241
+ a Dictionary representation of the given XML representation
1242
+ of a DataResult.
1243
+ """
1244
+ data_result = ET.fromstring(xml_data_result)
1245
+ file_descriptions = []
1246
+ for file_description in data_result.findall(\
1247
+ 'cdas:FileDescription', namespaces=NS):
1248
+
1249
+ dict_file_description = {
1250
+ 'Name': file_description.find('cdas:Name',
1251
+ namespaces=NS).text,
1252
+ 'MimeType': file_description.find(\
1253
+ 'cdas:MimeType',
1254
+ namespaces=NS).text,
1255
+ 'StartTime': file_description.find(\
1256
+ 'cdas:StartTime',
1257
+ namespaces=NS).text,
1258
+ 'EndTime': file_description.find(\
1259
+ 'cdas:EndTime',
1260
+ namespaces=NS).text,
1261
+ 'Length': int(file_description.find(\
1262
+ 'cdas:Length',
1263
+ namespaces=NS).text),
1264
+ 'LastModified': file_description.find(\
1265
+ 'cdas:LastModified',
1266
+ namespaces=NS).text
1267
+ }
1268
+ thumbnail_dict = CdasWs._get_thumbnail_description_dict(\
1269
+ file_description)
1270
+ if thumbnail_dict is not None:
1271
+ dict_file_description['ThumbnailDescription'] = \
1272
+ thumbnail_dict
1273
+
1274
+ thumbnail_id_elem = file_description.find(\
1275
+ 'cdas:ThumbnailId',
1276
+ namespaces=NS)
1277
+ if thumbnail_id_elem is not None:
1278
+ dict_file_description['ThumbnailId'] = \
1279
+ thumbnail_id_elem.text
1280
+
1281
+ file_descriptions.append(dict_file_description)
1282
+
1283
+ if len(file_descriptions) > 0:
1284
+ return {
1285
+ 'FileDescription': file_descriptions
1286
+ }
1287
+ return None
1288
+
1289
+
1290
+ def get_data_result(
1291
+ self,
1292
+ data_request: DataRequest,
1293
+ progress_callback: Callable[[float, str, Any], int],
1294
+ progress_user_value: Any
1295
+ ) -> Tuple[int, Dict]:
1296
+ """
1297
+ Submits the given request to the server and returns the result.
1298
+ This is a relatively low-level method and most callers should
1299
+ probably use a higher-level method such as get_data.
1300
+
1301
+ Parameters
1302
+ ----------
1303
+ data_request
1304
+ data request.
1305
+ progress_callback
1306
+ function that is called repeatedly to report the progress
1307
+ of getting the data. The function should return 0 if it
1308
+ wants to continue getting data. If it returns a non-0 value,
1309
+ getting the data will be aborted and the get_data() function
1310
+ will immediately return (204, None). The float parameter
1311
+ is a value between 0.0 and 1.0 to indicate progress and
1312
+ the str parameter will contain a text message indicating
1313
+ the progress of this call.
1314
+ progressUserValue
1315
+ value that is passsed to the progressCallback function.
1316
+ Returns
1317
+ -------
1318
+ Tuple
1319
+ [0] contains the int HTTP status code. 200 when
1320
+ successful.<br>
1321
+ [1] contains a dictionary representing the DataResult from
1322
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
1323
+ or None.
1324
+ See Also
1325
+ --------
1326
+ CdasWs.get_data
1327
+ """
1328
+
1329
+ #self.logger.debug('data_request = %s', data_request.json())
1330
+ self.logger.debug('data_request = %s', data_request.xml_str())
1331
+
1332
+ url = self._endpoint + 'datasets'
1333
+
1334
+ for retries in range(RETRY_LIMIT):
1335
+ #response = self._session.post(url, data=data_request.json(),
1336
+ response = self._session.post(url, data=data_request.xml_str(),
1337
+ timeout=self._timeout)
1338
+
1339
+ if response.status_code == 200:
1340
+
1341
+ data_result = CdasWs._get_data_result_dict(response.text)
1342
+
1343
+ if not data_result:
1344
+ return (response.status_code, None)
1345
+
1346
+ return (response.status_code, data_result)
1347
+
1348
+ if response.status_code == 429 or \
1349
+ response.status_code == 503 and \
1350
+ 'Retry-After' in response.headers:
1351
+
1352
+ retry_after = response.headers['Retry-After']
1353
+
1354
+ self.logger.debug('429/503 status with Retry-After header: %s',
1355
+ retry_after)
1356
+
1357
+ if progress_callback is not None:
1358
+ if progress_callback(0.2, 'Waiting ' + retry_after + \
1359
+ 's before making server request.',
1360
+ progress_user_value) != 0:
1361
+ return (204, None)
1362
+
1363
+ retry_after = int(retry_after)
1364
+
1365
+ self.logger.info('Sleeping %d seconds before making request',
1366
+ retry_after)
1367
+ time.sleep(retry_after)
1368
+
1369
+ else:
1370
+ self.logger.info('%s failed with http code %d', url,
1371
+ response.status_code)
1372
+ self.logger.info('data_request = %s', data_request)
1373
+ self.logger.info('response.text: %s', response.text)
1374
+ return (response.status_code, None)
1375
+
1376
+ self.logger.info('%s failed with http code %d after %d retries',
1377
+ url, response.status_code, retries + 1)
1378
+ self.logger.info('data_request = %s', data_request)
1379
+ self.logger.info('response.text: %s', response.text)
1380
+ return (response.status_code, None)
1381
+
1382
+
1383
+ def get_data_file(
1384
+ self,
1385
+ dataset: str,
1386
+ variables: List[str],
1387
+ start: Union[datetime, str], end: Union[datetime, str],
1388
+ **keywords: Union[
1389
+ Dict,
1390
+ Callable[[float, str, Any], int],
1391
+ Any]
1392
+ ) -> Tuple[int, Dict]:
1393
+ """
1394
+ Gets the specified data file from the server.
650
1395
 
651
1396
  Parameters
652
1397
  ----------
@@ -655,163 +1400,133 @@ class CdasWs:
655
1400
  variables
656
1401
  array containing names of variables to get.
657
1402
  start
658
- start time of data to get.
1403
+ start time of data to get. See module note about timezone.
659
1404
  end
660
- end time of data to get.
1405
+ end time of data to get. See module note about timezone.
661
1406
  keywords
662
- optional keyword parameters as follows<br>
663
- binData - indicates that uniformly spaced values should be
664
- computed for scaler/vector/spectrogram data according to
665
- the given binning parameter values. binData may contain
666
- the following keys: interval, interpolateMissingValues,
667
- and/or sigmaMultiplier with values that override the
1407
+ optional keyword parameters as follows:<br>
1408
+ <b>binData</b> - indicates that uniformly spaced values should
1409
+ be computed for scaler/vector/spectrogram data according to
1410
+ the given binning parameter values. binData is a Dict that
1411
+ may contain the following keys: interval,
1412
+ interpolateMissingValues, sigmaMultiplier, and/or
1413
+ overrideDefaultBinning with values that override the
668
1414
  defaults.<br>
669
- progressCallback - is a
670
- typing.Callable[[float, str, typing.Any], int]
1415
+ <b>progressCallback</b> - is a
1416
+ Callable[[float, str, typing.Any], int]
671
1417
  function that is called repeatedly to report the progress
672
1418
  of getting the data. The function should return 0 if it
673
1419
  wants to continue getting data. If it returns non-0 value,
674
- getting the data will be aborted and the get_data() function
675
- will immediately return (204, None). The float parameter
676
- is a value between 0.0 and 1.0 to indicate progress and
677
- the str parameter will contain a text message indicating
1420
+ getting the data will be aborted and the get_data_file()
1421
+ function will immediately return (204, None). The float
1422
+ parameter is a value between 0.0 and 1.0 to indicate progress
1423
+ and the str parameter will contain a text message indicating
678
1424
  the progress of this call.<br>
679
- progressUserValue - is a typing.Any value that is passsed
1425
+ <b>progressUserValue</b> - is an Any value that is passsed
680
1426
  to the progressCallback function.<br>
681
1427
  Returns
682
1428
  -------
683
1429
  Tuple
684
- [0] contains a dictionary of HTTP and CDAS status information.
685
- When successful, ['http']['status_code'] will be 200.<br>
686
- [1] contains the requested data (SpaceData object) or None.
1430
+ [0] contains the int HTTP status code. 200 when
1431
+ successful.<br>
1432
+ [1] contains a dictionary representing the DataResult from
1433
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
1434
+ or None.
687
1435
  Raises
688
1436
  ------
689
1437
  ValueError
690
1438
  If the given start/end datetime values are invalid.
1439
+ See Also
1440
+ --------
1441
+ CdasWs.get_data : In addition to what get_data_file does,
1442
+ get_data also downloads and reads the data file into memory
1443
+ (SpaceData or xarray.Dataset object).
691
1444
  """
1445
+ # pylint: disable=too-many-locals
1446
+ # pylint: disable=too-many-return-statements
1447
+ # pylint: enable=too-many-statements
1448
+ # pylint: disable=too-many-branches
692
1449
 
693
- if isinstance(start, datetime):
694
- start_datetime = start
695
- elif isinstance(start, str):
696
- start_datetime = dateutil.parser.parse(start)
697
- else:
698
- raise ValueError('unrecognized start datetime value')
699
-
700
- if isinstance(end, datetime):
701
- end_datetime = end
702
- elif isinstance(end, str):
703
- end_datetime = dateutil.parser.parse(end)
704
- else:
705
- raise ValueError('unrecognized end datetime value')
1450
+ start_datetime, end_datetime = TimeInterval.get_datetimes(start,
1451
+ end)
706
1452
 
707
- data_request = {
708
- 'CdfRequest': {
709
- 'CdfFormat': 'Cdf',
710
- 'TimeInterval': {
711
- 'Start': start_datetime.isoformat(),
712
- 'End': end_datetime.isoformat()
713
- },
714
- 'DatasetRequest': {
715
- 'DatasetId': dataset,
716
- 'VariableName': variables
717
- }
718
- }
719
- }
720
- if 'binData' in keywords:
721
- bin_data_kw = keywords['binData']
722
- data_request['CdfRequest']['BinData'] = {}
723
- if 'interval' in bin_data_kw:
724
- data_request['CdfRequest']['BinData']['Interval'] = \
725
- bin_data_kw['interval']
726
- if 'interpolateMissingValues' in bin_data_kw:
727
- data_request['CdfRequest']['BinData']['InterpolateMissingValues'] = \
728
- bin_data_kw['interpolateMissingValues']
729
- if 'sigmaMultiplier' in bin_data_kw:
730
- data_request['CdfRequest']['BinData']['SigmaMultiplier'] = \
731
- bin_data_kw['sigmaMultiplier']
1453
+ data_request = CdfRequest(dataset, variables,
1454
+ TimeInterval(start_datetime,
1455
+ end_datetime),
1456
+ 3, CdfFormat.BINARY,
1457
+ **keywords.get('binData', {}))
732
1458
 
733
1459
  progress_callback = keywords.get('progressCallback', None)
734
1460
  progress_user_value = keywords.get('progressUserValue', None)
735
1461
 
736
1462
  self.logger.debug('data_request = %s', data_request)
737
1463
 
738
- status = {
739
- 'http': {
740
- 'status_code': 204
741
- },
742
- 'cdas': {
743
- 'status': [],
744
- 'message': [],
745
- 'warning': [],
746
- 'error': []
747
- }
748
- }
749
1464
  if progress_callback is not None:
750
- if progress_callback(0.1, 'Making initial server request.',
1465
+ if progress_callback(0.1, 'Making server request.',
751
1466
  progress_user_value) != 0:
752
- return (status, None)
753
-
754
- url = self._endpoint + 'datasets'
755
-
756
- response = self._session.post(url, data=json.dumps(data_request),
757
- timeout=self._timeout)
1467
+ return (204, None)
758
1468
 
759
- status['http']['status_code'] = response.status_code
1469
+ status_code, data_result = self.get_data_result(data_request,
1470
+ progress_callback,
1471
+ progress_user_value)
760
1472
 
761
1473
  if progress_callback is not None:
762
- if progress_callback(0.2, 'Initial server request complete.',
1474
+ if progress_callback(1.0, 'Initial server request complete.',
763
1475
  progress_user_value) != 0:
764
- return (status, None)
765
-
766
- try:
767
- data_result = response.json()
768
- if 'Status' in data_result:
769
- status['cdas']['status'] = data_result['Status']
770
- if 'Message' in data_result:
771
- status['cdas']['message'] = data_result['Message']
772
- if 'Warning' in data_result:
773
- status['cdas']['warning'] = data_result['Warning']
774
- if 'Error' in data_result:
775
- status['cdas']['error'] = data_result['Error']
776
- except ValueError:
777
- # for example, a 503 from apache will not be json
778
- self.logger.debug('Non-JSON response: %s', response.text)
779
- status['http']['error_body'] = response.text
780
-
781
- if response.status_code != 200:
782
-
783
- self.logger.info('%s failed with http code %d', url,
784
- response.status_code)
785
- self.logger.info('data_request = %s', data_request)
786
- self.logger.info('response.text: %s', response.text)
787
- return (status, None)
1476
+ return (status_code, None)
788
1477
 
789
- if not data_result:
790
- return (status, None)
1478
+ return (status_code, data_result)
791
1479
 
792
- if self.logger.level <= logging.DEBUG:
793
- self.logger.debug('data_result = %s',
794
- json.dumps(data_result, indent=4,
795
- sort_keys=True))
796
1480
 
797
- if progress_callback is not None:
798
- if progress_callback(0.3, 'Beginning download of data.',
799
- progress_user_value) != 0:
800
- return (status, None)
1481
+ def download(
1482
+ self,
1483
+ url: str,
1484
+ size: int = 0,
1485
+ **keywords
1486
+ ) -> str:
1487
+ """
1488
+ Downloads the file specified by the given URL to a temporary
1489
+ file without reading all of it into memory. This method
1490
+ utilizes the connection pool and persistent HTTP connection
1491
+ to the CdasWs server.
801
1492
 
802
- file_descriptions = data_result['FileDescription']
1493
+ Parameters
1494
+ ----------
1495
+ url
1496
+ URL of file to download.
1497
+ size
1498
+ number of bytes in file to download.
1499
+ keywords
1500
+ optional keyword parameters as follows:<br>
1501
+ <b>progressCallback</b> - is a
1502
+ typing.Callable[[float, str, typing.Any], int]
1503
+ function that is called repeatedly to report the progress
1504
+ of getting the data. The function should return 0 if it
1505
+ wants to continue getting data. If it returns a non-0 value,
1506
+ getting the data will be aborted and this download() function
1507
+ will immediately return None. The float parameter
1508
+ is a value between 0.0 and 1.0 to indicate progress and
1509
+ the str parameter will contain a text message indicating
1510
+ the progress of this call.<br>
1511
+ <b>progressUserValue</b> - is a typing.Any value that is
1512
+ passsed to the progressCallback function.<br>
1513
+ Returns
1514
+ -------
1515
+ str
1516
+ name of tempory file or None if there was an error.
1517
+ """
1518
+ # pylint: disable=too-many-locals
803
1519
 
804
- data_url = file_descriptions[0]['Name']
805
- data_length = file_descriptions[0]['Length']
1520
+ progress_callback = keywords.get('progressCallback', None)
1521
+ progress_user_value = keywords.get('progressUserValue', None)
806
1522
 
807
- self.logger.debug('data_url = %s, data_length = %d',
808
- data_url, data_length)
1523
+ suffix = os.path.splitext(urlparse(url).path)[1]
809
1524
 
810
- file_descriptor, tmp_filename = mkstemp(suffix='.cdf')
1525
+ file_descriptor, tmp_filename = mkstemp(suffix=suffix)
811
1526
 
812
1527
  download_bytes = 0
813
1528
  next_progress_report = 0.1
814
- with self._session.get(data_url, stream=True,
1529
+ with self._session.get(url, stream=True,
815
1530
  timeout=self._timeout) as response:
816
1531
 
817
1532
  file = open(tmp_filename, 'wb')
@@ -821,33 +1536,831 @@ class CdasWs:
821
1536
  # file.flush()
822
1537
  if progress_callback is not None:
823
1538
  download_bytes += len(chunk)
824
- download_progress = float(download_bytes) / data_length
1539
+ if size == 0:
1540
+ download_progress = 0.0
1541
+ else:
1542
+ download_progress = float(download_bytes) / size
825
1543
  if download_progress > next_progress_report:
826
1544
  next_progress_report += download_progress
827
- if progress_callback(0.3 + 0.1 * download_progress,
828
- 'Continuing download of data.',
1545
+ if progress_callback(download_progress,\
1546
+ 'Continuing download of data.',
829
1547
  progress_user_value) != 0:
1548
+
830
1549
  file.close()
831
1550
  os.close(file_descriptor)
832
- return (status, None)
1551
+ return None
833
1552
  file.close()
834
1553
  os.close(file_descriptor)
835
1554
 
836
1555
  if progress_callback is not None:
837
- if progress_callback(0.4, 'Data download complete. Reading data.',
1556
+ if progress_callback(0.4,
1557
+ 'Data download complete. Reading data.',
838
1558
  progress_user_value) != 0:
839
- return (status, None)
1559
+ return None
840
1560
 
841
- data = spdm.fromCDF(tmp_filename)
842
- if progress_callback is not None:
843
- if progress_callback(1.0, 'Finished reading data.',
844
- progress_user_value) != 0:
1561
+ return tmp_filename
1562
+
1563
+
1564
+ @staticmethod
1565
+ def read_data(
1566
+ filename: str,
1567
+ data_representation: DataRepresentation
1568
+ ) -> Union['spacepy.datamodel', 'xr.Dataset']:
1569
+ """
1570
+ Reads the data from the given file.
1571
+
1572
+ Parameters
1573
+ ----------
1574
+ filename
1575
+ Name of file to read.
1576
+ data_representation
1577
+ Requested data representation.
1578
+ Returns
1579
+ -------
1580
+ spacepy.datamodel or xr.Dataset
1581
+ Data from file.
1582
+ Raises
1583
+ ------
1584
+ Exception
1585
+ If an Exception is raise by either the spdm.fromCDF() or
1586
+ cdflib.cdf_to_xarray() functions.
1587
+ ModuleNotFoundError
1588
+ If the required spacepy.datamodel or the cdflib and xarray
1589
+ modules are not installed.
1590
+ """
1591
+ if data_representation is None:
1592
+ if SPDM_AVAILABLE:
1593
+ return spdm.fromCDF(filename)
1594
+ if CDF_XARRAY_AVAILABLE:
1595
+ return cdf_to_xarray(filename, to_datetime=True,
1596
+ fillval_to_nan=True)
1597
+ raise ModuleNotFoundError(
1598
+ 'neither the spacepy.datamodel nor the cdflib and '
1599
+ 'xarray modules are installed')
1600
+
1601
+ if data_representation is DataRepresentation.SPACEPY and \
1602
+ not SPDM_AVAILABLE:
1603
+ raise ModuleNotFoundError('spacepy module must be installed')
1604
+ if data_representation is DataRepresentation.XARRAY and \
1605
+ not CDF_XARRAY_AVAILABLE:
1606
+ raise ModuleNotFoundError('cdflib and xarray modules must be installed')
1607
+
1608
+ if data_representation is DataRepresentation.SPACEPY:
1609
+ return spdm.fromCDF(filename)
1610
+ if data_representation is DataRepresentation.XARRAY:
1611
+ return cdf_to_xarray(filename, to_datetime=True,
1612
+ fillval_to_nan=True)
1613
+ return None
1614
+
1615
+
1616
+ def get_data(
1617
+ self,
1618
+ dataset: str,
1619
+ variables: List[str],
1620
+ time0: Union[TimeInterval, List[TimeInterval], datetime, str],
1621
+ time1: Union[datetime, str] = None,
1622
+ **keywords: Union[
1623
+ Dict,
1624
+ DataRepresentation,
1625
+ Callable[[float, str, Any], int],
1626
+ Any]
1627
+ ) -> Tuple[Dict, 'spdm.SpaceData', 'xarray']:
1628
+ """
1629
+ Gets the specified data from the server. The representation
1630
+ of the returned data is determined as follows:<br>
1631
+ 1. If a dataRepresentation keyword parameter is given, its
1632
+ value will determine the representation of the returned
1633
+ data. If no dataRepresenation keyword parameter is
1634
+ given, then<br>
1635
+ 2. If the presence of spacepy.datamodel is found, then the data
1636
+ is returned in the spacepy.datamodel representation.<br>
1637
+ 3. If the presence of the cdflib and xarray modules are found,
1638
+ then the data is returned in an xarray.Dataset.
1639
+
1640
+ Parameters
1641
+ ----------
1642
+ dataset
1643
+ dataset identifier of data to get.
1644
+ variables
1645
+ array containing names of variables to get. The value
1646
+ ALL-VARIABLES may be used instead of specifying all the
1647
+ individual variable names.
1648
+ time0
1649
+ TimeInterval(s) or start time of data to get. See module
1650
+ note about timezone.
1651
+ time1
1652
+ when time0 is not one or more TimeInterval(s), the end time
1653
+ of data to get. See module note about timezone.
1654
+ keywords
1655
+ optional keyword parameters as follows:<br>
1656
+ <b>binData</b> - indicates that uniformly spaced values should
1657
+ be computed for scaler/vector/spectrogram data according to
1658
+ the given binning parameter values. See
1659
+ <https://cdaweb.gsfc.nasa.gov/CDAWeb_Binning_readme.html>
1660
+ for more details. binData is a Dict that
1661
+ may contain the following keys: interval,
1662
+ interpolateMissingValues, sigmaMultiplier, and/or
1663
+ overrideDefaultBinning with values that override the
1664
+ defaults.<br>
1665
+ <b>dataRepresentation</b> - specifies the representation of
1666
+ the returned data as one of
1667
+ `datarepresentation.DataRepresentation`.<br>
1668
+ <b>progressCallback</b> - is a
1669
+ Callable[[float, str, typing.Any], int]
1670
+ function that is called repeatedly to report the progress
1671
+ of getting the data. The function should return 0 if it
1672
+ wants to continue getting data. If it returns non-0 value,
1673
+ getting the data will be aborted and the get_data() function
1674
+ will immediately return (204, None). The float parameter
1675
+ is a value between 0.0 and 1.0 to indicate progress and
1676
+ the str parameter will contain a text message indicating
1677
+ the progress of this call.<br>
1678
+ <b>progressUserValue</b> - is an Any value that is passsed
1679
+ to the progressCallback function.<br>
1680
+ Returns
1681
+ -------
1682
+ Tuple
1683
+ [0] contains a dictionary of HTTP and CDAS status information.
1684
+ When successful, ['http']['status_code'] will be 200.<br>
1685
+ [1] contains the requested data (SpaceData or xarray.Dataset
1686
+ object) or None.
1687
+ Raises
1688
+ ------
1689
+ ValueError
1690
+ If no variables are given or if the given start/end datetime
1691
+ values are invalid.
1692
+ """
1693
+ # pylint: disable=too-many-locals
1694
+ # pylint: disable=too-many-return-statements
1695
+ # pylint: disable=too-many-statements
1696
+ # pylint: disable=too-many-branches
1697
+ # pylint: disable=import-outside-toplevel
1698
+
1699
+ #import spacepy.datamodel as spdm # type: ignore
1700
+
1701
+ if len(variables) < 1:
1702
+ raise ValueError('at least one variable name is required')
1703
+
1704
+ if isinstance(time0, (str, datetime)):
1705
+ if isinstance(time1, (str, datetime)):
1706
+ time_intervals = [TimeInterval(time0, time1)]
1707
+ else:
1708
+ raise ValueError('time1 must be str/datetime')
1709
+ elif isinstance(time0, TimeInterval):
1710
+ time_intervals = [time0]
1711
+ elif isinstance(time0, list) and len(time0) > 0 and\
1712
+ isinstance(time0[0], TimeInterval):
1713
+ time_intervals = time0
1714
+ else:
1715
+ raise ValueError('invalid time0 type')
1716
+
1717
+ data_request = CdfRequest(dataset, variables,
1718
+ time_intervals,
1719
+ 3, CdfFormat.BINARY,
1720
+ binData=keywords.get('binData', {}))
1721
+
1722
+ data_rep = keywords.get('dataRepresentation', None)
1723
+ progress_callback = keywords.get('progressCallback', None)
1724
+ progress_user_value = keywords.get('progressUserValue', None)
1725
+
1726
+ self.logger.debug('data_request = %s', data_request)
1727
+
1728
+ status = {
1729
+ 'http': {
1730
+ 'status_code': 204
1731
+ },
1732
+ 'cdas': {
1733
+ 'status': [],
1734
+ 'message': [],
1735
+ 'warning': [],
1736
+ 'error': []
1737
+ }
1738
+ }
1739
+
1740
+ if progress_callback is not None:
1741
+ if progress_callback(0.1, 'Making initial server request.',
1742
+ progress_user_value) != 0:
1743
+ return (status, None)
1744
+
1745
+ status_code, data_result = self.get_data_result(data_request,
1746
+ progress_callback,
1747
+ progress_user_value)
1748
+
1749
+ status['http']['status_code'] = status_code
1750
+
1751
+ if progress_callback is not None:
1752
+ if progress_callback(0.3, 'Initial server request complete.',
1753
+ progress_user_value) != 0:
845
1754
  return (status, None)
846
- os.remove(tmp_filename)
1755
+
1756
+ if status_code != 200:
1757
+
1758
+ self.logger.info('get_data_result failed with http code %d',
1759
+ status_code)
1760
+ self.logger.info('data_request = %s', data_request)
1761
+ return (status, None)
1762
+
1763
+ if not data_result:
1764
+ return (status, None)
1765
+
1766
+ if 'Status' in data_result:
1767
+ status['cdas']['status'] = data_result['Status']
1768
+ if 'Message' in data_result:
1769
+ status['cdas']['message'] = data_result['Message']
1770
+ if 'Warning' in data_result:
1771
+ status['cdas']['warning'] = data_result['Warning']
1772
+ if 'Error' in data_result:
1773
+ status['cdas']['error'] = data_result['Error']
1774
+
1775
+ if progress_callback is not None:
1776
+ if progress_callback(0.4, 'Beginning download of data.',
1777
+ progress_user_value) != 0:
1778
+ return (status, None)
1779
+
1780
+ file_descriptions = data_result['FileDescription']
1781
+
1782
+ data_url = file_descriptions[0]['Name']
1783
+ data_length = file_descriptions[0]['Length']
1784
+
1785
+ self.logger.debug('data_url = %s, data_length = %d',
1786
+ data_url, data_length)
1787
+
1788
+ sub_progress_control = {
1789
+ 'progressCallback': progress_callback,
1790
+ 'progressUserValue': progress_user_value,
1791
+ 'progressStart': 0.4,
1792
+ 'progressFraction': 0.1
1793
+ }
1794
+
1795
+ tmp_filename = self.download(data_url, data_length,
1796
+ progressCallback=_get_data_progress,
1797
+ progressUserValue=sub_progress_control)
1798
+
1799
+ try:
1800
+ data = self.read_data(tmp_filename, data_rep)
1801
+ os.remove(tmp_filename)
1802
+ if progress_callback is not None:
1803
+ if progress_callback(1.0, 'Finished reading data.',
1804
+ progress_user_value) != 0:
1805
+ return (status, None)
1806
+ except:
1807
+ self.logger.error('Exception from read_data(%s): %s, %s',
1808
+ tmp_filename, sys.exc_info()[0],
1809
+ sys.exc_info()[1])
1810
+ self.logger.error('CDF file has been retained.')
1811
+ raise
847
1812
  return (status, data)
848
- # pylint: enable=too-many-locals
849
- # pylint: enable=too-many-return-statements
850
- # pylint: enable=too-many-statements
851
- # pylint: enable=too-many-branches
852
1813
 
853
- # pylint: enable=too-many-instance-attributes
1814
+
1815
+ # pylint: disable=too-many-arguments
1816
+ def get_graph(
1817
+ self,
1818
+ dataset: str,
1819
+ variables: List[str],
1820
+ start: Union[datetime, str],
1821
+ end: Union[datetime, str],
1822
+ options: GraphOptions = None,
1823
+ image_format: List[ImageFormat] = None,
1824
+ **keywords
1825
+ ) -> Tuple[int, Dict]:
1826
+ """
1827
+ Gets a graphical representation of the specified data from the
1828
+ server.
1829
+
1830
+ Parameters
1831
+ ----------
1832
+ dataset
1833
+ dataset identifier of data to get.
1834
+ variables
1835
+ array containing names of variables to get.
1836
+ start
1837
+ start time of data to get. See module note about timezone.
1838
+ end
1839
+ end time of data to get. See module note about timezone.
1840
+ options
1841
+ graph options.
1842
+ image_format
1843
+ image format. If None, then [ImageFormat.PNG].
1844
+ keywords
1845
+ optional keyword parameters as follows:<br>
1846
+ <b>binData</b> - indicates that uniformly spaced values should
1847
+ be computed for scaler/vector/spectrogram data according to
1848
+ the given binning parameter values. binData is a Dict that
1849
+ may contain the following keys: interval,
1850
+ interpolateMissingValues, sigmaMultiplier, and/or
1851
+ overrideDefaultBinning with values that override the
1852
+ defaults.<br>
1853
+ <b>progressCallback</b> - is a
1854
+ typing.Callable[[float, str, typing.Any], int]
1855
+ function that is called repeatedly to report the progress
1856
+ of getting the data. The function should return 0 if it
1857
+ wants to continue getting data. If it returns non-0 value,
1858
+ getting the data will be aborted and the get_data() function
1859
+ will immediately return (204, None). The float parameter
1860
+ is a value between 0.0 and 1.0 to indicate progress and
1861
+ the str parameter will contain a text message indicating
1862
+ the progress of this call.<br>
1863
+ <b>progressUserValue</b> - is a typing.Any value that is
1864
+ passsed to the progressCallback function.<br>
1865
+ Returns
1866
+ -------
1867
+ Tuple
1868
+ [0] contains the HTTP status code value (200 when successful).<br>
1869
+ [1] contains a dictionary representation of a
1870
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
1871
+ DataResult object or None.<br>
1872
+ Raises
1873
+ ------
1874
+ ValueError
1875
+ If the given start/end datetime values are invalid.
1876
+ """
1877
+ # pylint: disable=too-many-locals
1878
+ # pylint: disable=too-many-return-statements
1879
+ # pylint: enable=too-many-statements
1880
+ # pylint: disable=too-many-branches
1881
+
1882
+ start_datetime, end_datetime = TimeInterval.get_datetimes(start,
1883
+ end)
1884
+
1885
+ request = GraphRequest(dataset, variables,
1886
+ TimeInterval(start_datetime, end_datetime),
1887
+ options, image_format,
1888
+ **keywords)
1889
+
1890
+ progress_callback = keywords.get('progressCallback', None)
1891
+ progress_user_value = keywords.get('progressUserValue', None)
1892
+
1893
+ self.logger.debug('request = %s', request)
1894
+
1895
+ if progress_callback is not None:
1896
+ if progress_callback(0.1, 'Making server request.',
1897
+ progress_user_value) != 0:
1898
+ return (204, None)
1899
+
1900
+ status_code, result = self.get_data_result(request, progress_callback, progress_user_value)
1901
+
1902
+ if progress_callback is not None:
1903
+ if progress_callback(1.0, 'Server request complete.',
1904
+ progress_user_value) != 0:
1905
+ return (status_code, None)
1906
+
1907
+ if status_code != 200:
1908
+
1909
+ self.logger.info('get_result failed with http code %d',
1910
+ status_code)
1911
+ self.logger.info('request = %s', request)
1912
+ return (status_code, None)
1913
+
1914
+ return (status_code, result)
1915
+ # pylint: enable=too-many-arguments
1916
+
1917
+
1918
+ # pylint: disable=too-many-arguments
1919
+ def get_thumbnail(
1920
+ self,
1921
+ dataset: str,
1922
+ variables: List[str],
1923
+ start: Union[datetime, str],
1924
+ end: Union[datetime, str],
1925
+ identifier: str,
1926
+ thumbnail: int = 1,
1927
+ **keywords
1928
+ ) -> Tuple[int, Dict]:
1929
+ """
1930
+ Gets a graphical representation of the specified data from the
1931
+ server.
1932
+
1933
+ Parameters
1934
+ ----------
1935
+ dataset
1936
+ dataset identifier of data to get.
1937
+ variables
1938
+ array containing names of variables to get.
1939
+ start
1940
+ start time of data to get. See module note about timezone.
1941
+ end
1942
+ end time of data to get. See module note about timezone.
1943
+ identifier
1944
+ thumbnail identifier (returned in a previous get_graph
1945
+ result).
1946
+ thumbnail
1947
+ number of thumbnail whose full size image is being requested.
1948
+ Thumbnail images are counted beginning at one (not zero).
1949
+ keywords
1950
+ optional keyword parameters as follows:<br>
1951
+ <b>progressCallback</b> - is a
1952
+ typing.Callable[[float, str, typing.Any], int]
1953
+ function that is called repeatedly to report the progress
1954
+ of getting the data. The function should return 0 if it
1955
+ wants to continue getting data. If it returns non-0 value,
1956
+ getting the data will be aborted and the get_data() function
1957
+ will immediately return (204, None). The float parameter
1958
+ is a value between 0.0 and 1.0 to indicate progress and
1959
+ the str parameter will contain a text message indicating
1960
+ the progress of this call.<br>
1961
+ <b>progressUserValue</b> - is a typing.Any value that is
1962
+ passsed to the progressCallback function.<br>
1963
+ Returns
1964
+ -------
1965
+ Tuple
1966
+ [0] contains the HTTP status code value (200 when successful).<br>
1967
+ [1] contains a dictionary representation of a
1968
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
1969
+ DataResult object or None.<br>
1970
+ Raises
1971
+ ------
1972
+ ValueError
1973
+ If the given start/end datetime values are invalid.
1974
+ """
1975
+ # pylint: disable=too-many-locals
1976
+ # pylint: disable=too-many-return-statements
1977
+ # pylint: enable=too-many-statements
1978
+ # pylint: disable=too-many-branches
1979
+
1980
+ start_datetime, end_datetime = TimeInterval.get_datetimes(start,
1981
+ end)
1982
+
1983
+ request = ThumbnailRequest(dataset, variables,
1984
+ TimeInterval(start_datetime, end_datetime),
1985
+ identifier, thumbnail)
1986
+
1987
+ progress_callback = keywords.get('progressCallback', None)
1988
+ progress_user_value = keywords.get('progressUserValue', None)
1989
+
1990
+ self.logger.debug('request = %s', request)
1991
+
1992
+ if progress_callback is not None:
1993
+ if progress_callback(0.1, 'Making server request.',
1994
+ progress_user_value) != 0:
1995
+ return (204, None)
1996
+
1997
+ status_code, result = self.get_data_result(request,
1998
+ progress_callback,
1999
+ progress_user_value)
2000
+
2001
+ if progress_callback is not None:
2002
+ if progress_callback(1.0, 'Server request complete.',
2003
+ progress_user_value) != 0:
2004
+ return (status_code, None)
2005
+
2006
+ if status_code != 200:
2007
+
2008
+ self.logger.info('get_result failed with http code %d',
2009
+ status_code)
2010
+ self.logger.info('request = %s', request)
2011
+ return (status_code, None)
2012
+
2013
+ return (status_code, result)
2014
+ # pylint: enable=too-many-arguments
2015
+
2016
+
2017
+ # pylint: disable=too-many-arguments
2018
+ def get_text(
2019
+ self,
2020
+ dataset: str,
2021
+ variables: List[str],
2022
+ start: Union[datetime, str],
2023
+ end: Union[datetime, str],
2024
+ compression: Compression = Compression.UNCOMPRESSED,
2025
+ text_format: TextFormat = TextFormat.PLAIN,
2026
+ **keywords
2027
+ ) -> Tuple[int, Dict]:
2028
+ """
2029
+ Gets a textual representation of the specified data from the
2030
+ server.
2031
+
2032
+ Parameters
2033
+ ----------
2034
+ dataset
2035
+ dataset identifier of data to get.
2036
+ variables
2037
+ array containing names of variables to get.
2038
+ start
2039
+ start time of data to get. See module note about timezone.
2040
+ end
2041
+ end time of data to get. See module note about timezone.
2042
+ compression
2043
+ file compression.
2044
+ text_format
2045
+ text format.
2046
+ keywords
2047
+ optional keyword parameters as follows:<br>
2048
+ <b>binData</b> - indicates that uniformly spaced values should
2049
+ be computed for scaler/vector/spectrogram data according to
2050
+ the given binning parameter values. binData is a Dict that
2051
+ may contain the following keys: interval,
2052
+ interpolateMissingValues, sigmaMultiplier, and/or
2053
+ overrideDefaultBinning with values that override the
2054
+ defaults.<br>
2055
+ <b>progressCallback</b> - is a
2056
+ typing.Callable[[float, str, typing.Any], int]
2057
+ function that is called repeatedly to report the progress
2058
+ of getting the data. The function should return 0 if it
2059
+ wants to continue getting data. If it returns non-0 value,
2060
+ getting the data will be aborted and the get_data() function
2061
+ will immediately return (204, None). The float parameter
2062
+ is a value between 0.0 and 1.0 to indicate progress and
2063
+ the str parameter will contain a text message indicating
2064
+ the progress of this call.<br>
2065
+ <b>progressUserValue</b> - is a typing.Any value that is
2066
+ passsed to the progressCallback function.<br>
2067
+ Returns
2068
+ -------
2069
+ Tuple
2070
+ [0] contains the HTTP status code value (200 when successful).<br>
2071
+ [1] contains a dictionary representation of a
2072
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
2073
+ DataResult object or None.<br>
2074
+ Raises
2075
+ ------
2076
+ ValueError
2077
+ If the given start/end datetime values are invalid.
2078
+ """
2079
+ # pylint: disable=too-many-locals
2080
+ # pylint: disable=too-many-return-statements
2081
+ # pylint: enable=too-many-statements
2082
+ # pylint: disable=too-many-branches
2083
+
2084
+ start_datetime, end_datetime = TimeInterval.get_datetimes(start,
2085
+ end)
2086
+
2087
+ request = TextRequest(dataset, variables,
2088
+ TimeInterval(start_datetime, end_datetime),
2089
+ compression, text_format,
2090
+ **keywords)
2091
+
2092
+ progress_callback = keywords.get('progressCallback', None)
2093
+ progress_user_value = keywords.get('progressUserValue', None)
2094
+
2095
+ self.logger.debug('request = %s', request)
2096
+
2097
+ if progress_callback is not None:
2098
+ if progress_callback(0.1, 'Making server request.',
2099
+ progress_user_value) != 0:
2100
+ return (204, None)
2101
+
2102
+ status_code, result = self.get_data_result(request,
2103
+ progress_callback,
2104
+ progress_user_value)
2105
+
2106
+ if progress_callback is not None:
2107
+ if progress_callback(1.0, 'Server request complete.',
2108
+ progress_user_value) != 0:
2109
+ return (status_code, None)
2110
+
2111
+ if status_code != 200:
2112
+
2113
+ self.logger.info('get_result failed with http code %d',
2114
+ status_code)
2115
+ self.logger.info('request = %s', request)
2116
+ return (status_code, None)
2117
+
2118
+ return (status_code, result)
2119
+ # pylint: enable=too-many-arguments
2120
+
2121
+
2122
+ def get_audio(
2123
+ self,
2124
+ dataset: str,
2125
+ variables: List[str],
2126
+ start: Union[datetime, str],
2127
+ end: Union[datetime, str],
2128
+ **keywords
2129
+ ) -> Tuple[int, Dict]:
2130
+ """
2131
+ Gets an audio representation of the specified data from the
2132
+ server.
2133
+
2134
+ Parameters
2135
+ ----------
2136
+ dataset
2137
+ dataset identifier of data to get.
2138
+ variables
2139
+ array containing names of variables to get.
2140
+ start
2141
+ start time of data to get. See module note about timezone.
2142
+ end
2143
+ end time of data to get. See module note about timezone.
2144
+ keywords
2145
+ optional keyword parameters as follows:<br>
2146
+ <b>binData</b> - indicates that uniformly spaced values should
2147
+ be computed for scaler/vector/spectrogram data according to
2148
+ the given binning parameter values. binData is a Dict that
2149
+ may contain the following keys: interval,
2150
+ interpolateMissingValues, sigmaMultiplier, and/or
2151
+ overrideDefaultBinning with values that override the
2152
+ defaults.<br>
2153
+ <b>progressCallback</b> - is a
2154
+ typing.Callable[[float, str, typing.Any], int]
2155
+ function that is called repeatedly to report the progress
2156
+ of getting the data. The function should return 0 if it
2157
+ wants to continue getting data. If it returns non-0 value,
2158
+ getting the data will be aborted and the get_data() function
2159
+ will immediately return (204, None). The float parameter
2160
+ is a value between 0.0 and 1.0 to indicate progress and
2161
+ the str parameter will contain a text message indicating
2162
+ the progress of this call.<br>
2163
+ <b>progressUserValue</b> - is a typing.Any value that is
2164
+ passsed to the progressCallback function.<br>
2165
+ Returns
2166
+ -------
2167
+ Tuple
2168
+ [0] contains the HTTP status code value (200 when successful).<br>
2169
+ [1] contains a dictionary representation of a
2170
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
2171
+ DataResult object or None.<br>
2172
+ Raises
2173
+ ------
2174
+ ValueError
2175
+ If the given start/end datetime values are invalid.
2176
+ """
2177
+ # pylint: disable=too-many-locals
2178
+ # pylint: disable=too-many-return-statements
2179
+ # pylint: enable=too-many-statements
2180
+ # pylint: disable=too-many-branches
2181
+
2182
+ start_datetime, end_datetime = TimeInterval.get_datetimes(start,
2183
+ end)
2184
+
2185
+ request = AudioRequest(dataset, variables,
2186
+ TimeInterval(start_datetime, end_datetime),
2187
+ **keywords)
2188
+
2189
+ progress_callback = keywords.get('progressCallback', None)
2190
+ progress_user_value = keywords.get('progressUserValue', None)
2191
+
2192
+ self.logger.debug('request = %s', request)
2193
+
2194
+ if progress_callback is not None:
2195
+ if progress_callback(0.1, 'Making server request.',
2196
+ progress_user_value) != 0:
2197
+ return (204, None)
2198
+
2199
+ status_code, result = self.get_data_result(request,
2200
+ progress_callback,
2201
+ progress_user_value)
2202
+
2203
+ if progress_callback is not None:
2204
+ if progress_callback(1.0, 'Server request complete.',
2205
+ progress_user_value) != 0:
2206
+ return (status_code, None)
2207
+
2208
+ if status_code != 200:
2209
+
2210
+ self.logger.info('get_result failed with http code %d',
2211
+ status_code)
2212
+ self.logger.info('request = %s', request)
2213
+ return (status_code, None)
2214
+
2215
+ return (status_code, result)
2216
+
2217
+
2218
+ def get_original_files(
2219
+ self,
2220
+ dataset: str,
2221
+ start: Union[datetime, str],
2222
+ end: Union[datetime, str],
2223
+ **keywords
2224
+ ) -> Tuple[int, Dict]:
2225
+ """
2226
+ Gets original data files from a dataset. Original data files
2227
+ lack updated meta-data and virtual variable values contained
2228
+ in files obtained from the `CdasWs.get_data`. Most callers
2229
+ should probably use `CdasWs.get_data` instead of this function.
2230
+
2231
+ Parameters
2232
+ ----------
2233
+ dataset
2234
+ dataset identifier of data to get.
2235
+ start
2236
+ start time of data to get. See module note about timezone.
2237
+ end
2238
+ end time of data to get. See module note about timezone.
2239
+ keywords
2240
+ optional keyword parameters as follows:<br>
2241
+ <b>progressCallback</b> - is a
2242
+ typing.Callable[[float, str, typing.Any], int]
2243
+ function that is called repeatedly to report the progress
2244
+ of getting the data. The function should return 0 if it
2245
+ wants to continue getting data. If it returns non-0 value,
2246
+ getting the data will be aborted and the get_data() function
2247
+ will immediately return (204, None). The float parameter
2248
+ is a value between 0.0 and 1.0 to indicate progress and
2249
+ the str parameter will contain a text message indicating
2250
+ the progress of this call.<br>
2251
+ <b>progressUserValue</b> - is a typing.Any value that is
2252
+ passsed to the progressCallback function.<br>
2253
+ Returns
2254
+ -------
2255
+ Tuple
2256
+ [0] contains the HTTP status code value (200 when successful).<br>
2257
+ [1] array of dictionary representations of a
2258
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
2259
+ FileDescription objects or None.<br>
2260
+ Raises
2261
+ ------
2262
+ ValueError
2263
+ If the given start/end datetime values are invalid.
2264
+ See Also
2265
+ --------
2266
+ CdasWs.get_data
2267
+ """
2268
+ # pylint: disable=too-many-locals
2269
+ # pylint: disable=too-many-return-statements
2270
+ # pylint: enable=too-many-statements
2271
+ # pylint: disable=too-many-branches
2272
+
2273
+ start_datetime, end_datetime = TimeInterval.get_datetimes(start,
2274
+ end)
2275
+
2276
+ request = CdfRequest(dataset, [],
2277
+ TimeInterval(start_datetime, end_datetime))
2278
+
2279
+ progress_callback = keywords.get('progressCallback', None)
2280
+ progress_user_value = keywords.get('progressUserValue', None)
2281
+
2282
+ self.logger.debug('request = %s', request)
2283
+
2284
+ if progress_callback is not None:
2285
+ if progress_callback(0.1, 'Making server request.',
2286
+ progress_user_value) != 0:
2287
+ return (204, None)
2288
+
2289
+ status_code, result = self.get_data_result(request,
2290
+ progress_callback,
2291
+ progress_user_value)
2292
+
2293
+ if progress_callback is not None:
2294
+ if progress_callback(1.0, 'Server request complete.',
2295
+ progress_user_value) != 0:
2296
+ return (status_code, None)
2297
+
2298
+ if status_code != 200:
2299
+
2300
+ self.logger.info('get_result failed with http code %d',
2301
+ status_code)
2302
+ self.logger.info('request = %s', request)
2303
+ return (status_code, None)
2304
+
2305
+ return (status_code, result['FileDescription'])
2306
+
2307
+
2308
+ def get_ssc_id(
2309
+ self,
2310
+ dataset: str
2311
+ ) -> Tuple[int, Union[str, List[str]]]:
2312
+ """
2313
+ Gets the Satellite Situation Center (SSC)
2314
+ <https://sscweb.gsfc.nasa.gov/> observatory identifier(s)
2315
+ associated with the given cdaweb dataset identifier.
2316
+
2317
+ Notes
2318
+ -----
2319
+ This method relies upon the Heliophysics Data Portal's
2320
+ <https://heliophysicsdata.gsfc.nasa.gov/> metadata. That metadata
2321
+ may be incomplete. Also, cdaweb has datasets for which SSC has
2322
+ no corresponding observatory (for example, ground observatory
2323
+ data). Callers should be prepared for negative results (200, None)
2324
+ from this method.
2325
+
2326
+ Parameters
2327
+ ----------
2328
+ dataset
2329
+ cdaweb dataset identifier.
2330
+ Returns
2331
+ -------
2332
+ Tuple
2333
+ [0] contains the HTTP status code value (200 when successful).<br>
2334
+ [1] the SSC observatory identifier(s) associated with the given
2335
+ cdaweb dataset identifier or None if none is found.
2336
+ """
2337
+ url = self._hdp_registry + '?cdawebId=' + dataset
2338
+
2339
+ self.logger.debug('request url = %s', url)
2340
+
2341
+ response = self._session.get(url, timeout=self._timeout)
2342
+
2343
+ if response.status_code != 200:
2344
+
2345
+ self.logger.info('%s failed with http code %d', url,
2346
+ response.status_code)
2347
+ self.logger.info('response.text: %s', response.text)
2348
+ return (response.status_code, None)
2349
+
2350
+ if self.logger.level <= logging.DEBUG:
2351
+ self.logger.debug('response.text = %s', response.text)
2352
+
2353
+ results = ET.fromstring(response.text)
2354
+
2355
+ ssc_id = []
2356
+ for ssc_id_elem in results.findall('SscId'):
2357
+ ssc_id.append(ssc_id_elem.text)
2358
+
2359
+ if len(ssc_id) == 0:
2360
+ result = None
2361
+ elif len(ssc_id) == 1:
2362
+ result = ssc_id[0]
2363
+ else:
2364
+ result = ssc_id
2365
+
2366
+ return (response.status_code, result)