cdasws 1.8.11__py3-none-any.whl → 1.8.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cdasws/cdasws.py CHANGED
@@ -24,108 +24,175 @@
24
24
  #
25
25
  # NOSA HEADER END
26
26
  #
27
- # Copyright (c) 2018-2019 United States Government as represented by
27
+ # Copyright (c) 2018-2026 United States Government as represented by
28
28
  # the National Aeronautics and Space Administration. No copyright is
29
29
  # claimed in the United States under Title 17, U.S.Code. All Other
30
30
  # Rights Reserved.
31
31
  #
32
32
 
33
+
33
34
  """
34
- Package for accessing the Coordinate Data Analysis System (CDAS)
35
+ Module for accessing the Coordinate Data Analysis System (CDAS)
35
36
  web services <https://cdaweb.gsfc.nasa.gov/WebServices/REST/>.<br>
36
37
 
37
- Copyright &copy; 2018-2019 United States Government as represented by the
38
+ Copyright &copy; 2018-2026 United States Government as represented by the
38
39
  National Aeronautics and Space Administration. No copyright is claimed in
39
40
  the United States under Title 17, U.S.Code. All Other Rights Reserved.
41
+
42
+ Notes
43
+ -----
44
+ <ul>
45
+ <li>Due to rate limiting implemented by the CDAS web services, an
46
+ attempt to make simultaneous requests from many threads is likely
47
+ to actually reduce performance. At this time, it is best to make
48
+ calls from five or fewer threads.</li>
49
+ <li>Since CDAS data has datetime values with a UTC timezone, all
50
+ client provided datetime values should have a timezone of UTC.
51
+ If a given value's timezone is not UTC, the value is adjusted to
52
+ UTC. If a given value has no timezone (is naive), a UTC timezone
53
+ is set.</li>
54
+ </ul>
40
55
  """
41
56
 
57
+
58
+ import sys
42
59
  import os
43
60
  import platform
44
61
  import logging
62
+ import re
63
+ #from importlib.util import find_spec
45
64
  import urllib.parse
46
- import json
65
+ from urllib.parse import urlparse
66
+ #import json
47
67
  from operator import itemgetter
48
- from datetime import datetime, timezone
68
+ import time
69
+ from datetime import datetime, timedelta
70
+ import xml.etree.ElementTree as ET
49
71
  from tempfile import mkstemp
50
- from typing import Dict, List, Tuple, Union
51
- import requests
52
- import dateutil.parser
53
- import spacepy.datamodel as spdm # type: ignore
72
+ from typing import Any, Callable, Dict, List, Tuple, Union
54
73
 
55
-
56
-
57
- class TimeInterval:
74
+ import requests
75
+ #import dateutil.parser
76
+
77
+ from cdasws.datarepresentation import DataRepresentation
78
+ from cdasws.datarequest import AudioRequest, DataRequest
79
+ from cdasws.datarequest import CdfFormat, CdfRequest, Compression
80
+ from cdasws.datarequest import ImageFormat, GraphOptions, GraphRequest
81
+ from cdasws.datarequest import TextFormat, TextRequest, ThumbnailRequest
82
+ from cdasws.timeinterval import TimeInterval
83
+ from cdasws.doi import get_doi_landing_page_url
84
+ from cdasws import __version__, RETRY_LIMIT, NAMESPACES as NS
85
+
86
+
87
+ # requires python >= 3.4
88
+ #if find_spec('spacepy.datamodel') is not None:
89
+ # import spacepy.datamodel as spdm # type: ignore
90
+ # SPDM_AVAILABLE = True
91
+ #else:
92
+ # SPDM_AVAILABLE = False
93
+ # python < 3.4
94
+ try:
95
+ import spacepy.datamodel as spdm # type: ignore
96
+ SPDM_AVAILABLE = True
97
+ except ImportError:
98
+ SPDM_AVAILABLE = False
99
+
100
+ try:
101
+ from cdflib.xarray import cdf_to_xarray
102
+ import xarray as xr
103
+ CDF_XARRAY_AVAILABLE = True
104
+ except ImportError:
105
+ try:
106
+ import cdflib as cdf
107
+ import xarray as xr # pylint: disable=ungrouped-imports
108
+ CDF_XARRAY_AVAILABLE = True
109
+ def cdf_to_xarray(filename, to_datetime=False, to_unixtime=False,
110
+ fillval_to_nan=False):
111
+ """
112
+ Reads a CDF into an xarray.dataset. This function exists
113
+ to provide compatility with cdflib >= 1.0.1 for older
114
+ releases of cdflib.
115
+
116
+ Parameters:
117
+ -----------
118
+ filename
119
+ The path to the CDF file to read.
120
+ to_datetime
121
+ Whether or not to convert CDF_EPOCH/EPOCH_16/TT2000 to
122
+ datetime, or leave them as is.
123
+ to_unixtime
124
+ Whether or not to convert CDF_EPOCH/EPOCH_16/TT2000 to
125
+ unixtime, or leave them as is.
126
+ fillval_to_nan
127
+ If True, any data values that match the FILLVAL
128
+ attribute for a variable will be set to NaN.
129
+
130
+ Returns
131
+ -------
132
+ xarray.dataset
133
+ An XArray Dataset object.
134
+ """
135
+ return cdf.cdf_to_xarray(filename, to_datetime=to_datetime, # pylint: disable=no-member
136
+ to_unixtime=to_unixtime,
137
+ fillval_to_nan=fillval_to_nan)
138
+ except ImportError:
139
+ CDF_XARRAY_AVAILABLE = False
140
+
141
+
142
+ try:
143
+ import requests_cache
144
+ CACHE_AVAILABLE = True
145
+ except ImportError:
146
+ CACHE_AVAILABLE = False
147
+
148
+
149
+ def _get_data_progress(
150
+ progress: float,
151
+ msg: str,
152
+ value: Dict) -> int:
58
153
  """
59
- A time interval constisting of a start and end datetime.
154
+ A get_data progress callback which adjusts the progress value for
155
+ the download portion of a larger operation and then calls the
156
+ "real" progress callback function with this adjusted progress value.
60
157
 
61
- Attributes
158
+ Parameters
62
159
  ----------
63
- start
64
- Start time of interval.
65
- end
66
- End time of interval.
160
+ progress
161
+ Measure of progress.
162
+ msg
163
+ Message describing progress of get_data call.
164
+ value
165
+ Dictionary containing the function to call and values for
166
+ computing the adjusted progress value.
167
+ Returns
168
+ -------
169
+ int
170
+ Flag indicating whether to continue with getting the data.
171
+ 0 to continue. 1 to abort getting the data.
67
172
  """
68
- def __init__(self, start: Union[datetime, str],
69
- end: Union[datetime, str]):
70
- """
71
- Constructs a TimeInterval object.
72
-
73
- Parameters
74
- ----------
75
- start
76
- Start time of interval.
77
- end
78
- End time of interval.
79
- Raises
80
- ------
81
- ValueError
82
- If the given start/end datetime values are invalid.
83
- """
84
-
85
- if isinstance(start, datetime):
86
- self.start = start
87
- elif isinstance(start, str):
88
- self.start = dateutil.parser.parse(start)
89
- else:
90
- raise ValueError('unrecognized datetime value')
91
-
92
- self.start.astimezone(timezone.utc)
93
-
94
- if isinstance(end, datetime):
95
- self.end = end
96
- elif isinstance(end, str):
97
- self.end = dateutil.parser.parse(end)
98
- else:
99
- raise ValueError('unrecognized datetime value')
100
-
101
- self.end.astimezone(timezone.utc)
173
+ progress_callback = value.get('progressCallback', None)
174
+ progress_user_value = value.get('progressUserValue', None)
175
+ adjusted_progress = value['progressStart'] + \
176
+ value['progressFraction'] * progress
102
177
 
103
- def __str__(self):
104
- return self.start.isoformat() + ' ' + self.end.isoformat()
178
+ if progress_callback is not None:
105
179
 
106
- def __eq__(self, other):
107
- return self.start == other.start and self.end == other.end
180
+ return progress_callback(adjusted_progress, msg,
181
+ progress_user_value)
182
+ return 0
108
183
 
109
- @staticmethod
110
- def basic_iso_format(value: datetime) -> str:
111
- """
112
- Produces the basic (minimal) ISO 8601 format of the given
113
- datetime.
114
184
 
115
- Parameters
116
- ----------
117
- value
118
- datetime value to convert to string.
119
- Returns
120
- -------
121
- str
122
- Basic ISO 8601 format time string.
123
- """
124
- return value.isoformat().replace('+00:00', 'Z').translate(
125
- {ord(i):None for i in ':-'})
185
+ class NullAuth(requests.auth.AuthBase): # pylint: disable=too-few-public-methods
186
+ """
187
+ Authentication class used to cause requests to ignore any ~/.netrc
188
+ file. The CDAS web services do not support authentication and
189
+ a cdaweb (ftps) entry will cause CdasWs requests to fail with
190
+ a 401 error. See <https://github.com/psf/requests/issues/2773>.
191
+ """
192
+ def __call__(self, r):
193
+ return r
126
194
 
127
195
 
128
- # pylint: disable=too-many-instance-attributes
129
196
  class CdasWs:
130
197
  """
131
198
  Class representing the web service interface to NASA's
@@ -138,9 +205,17 @@ class CdasWs:
138
205
  it is configured with a NullHandler. Users of this class may configure
139
206
  the logger to aid in diagnosing problems.
140
207
  """
208
+ # pylint: disable=too-many-instance-attributes
141
209
  # pylint: disable=too-many-arguments
142
- def __init__(self, endpoint=None, timeout=None, proxy=None,
143
- ca_certs=None, disable_ssl_certificate_validation=False):
210
+ def __init__(
211
+ self,
212
+ endpoint=None,
213
+ timeout=None,
214
+ proxy=None,
215
+ ca_certs=None,
216
+ disable_ssl_certificate_validation=False,
217
+ user_agent=None,
218
+ disable_cache=False):
144
219
  """
145
220
  Creates an object representing the CDAS web services.
146
221
 
@@ -152,42 +227,62 @@ class CdasWs:
152
227
  timeout
153
228
  Number of seconds to wait for a response from the server.
154
229
  proxy
155
- HTTP proxy information. For example,
230
+ HTTP proxy information. For example,<pre>
156
231
  proxies = {
157
232
  'http': 'http://10.10.1.10:3128',
158
233
  'https': 'http://10.10.1.10:1080',
159
- }
234
+ }</pre>
160
235
  Proxy information can also be set with environment variables.
161
- For example,
236
+ For example,<pre>
162
237
  $ export HTTP_PROXY="http://10.10.1.10:3128"
163
- $ export HTTPS_PROXY="http://10.10.1.10:1080"
238
+ $ export HTTPS_PROXY="http://10.10.1.10:1080"</pre>
164
239
  ca_certs
165
240
  Path to certificate authority (CA) certificates that will
166
241
  override the default bundle.
167
242
  disable_ssl_certificate_validation
168
243
  Flag indicating whether to validate the SSL certificate.
244
+ user_agent
245
+ A value that is appended to the HTTP User-Agent value.
246
+ disable_cache
247
+ Flag indicating whether to disable HTTP caching.
169
248
  """
170
249
 
171
250
  self.logger = logging.getLogger(type(self).__name__)
172
251
  self.logger.addHandler(logging.NullHandler())
173
252
 
253
+ self.logger.debug('endpoint = %s', endpoint)
254
+ self.logger.debug('ca_certs = %s', ca_certs)
255
+ self.logger.debug('disable_ssl_certificate_validation = %s',
256
+ disable_ssl_certificate_validation)
257
+ self.logger.debug('disable_cache = %s', disable_cache)
258
+
174
259
  if endpoint is None:
175
260
  self._endpoint = 'https://cdaweb.gsfc.nasa.gov/WS/cdasr/1/dataviews/sp_phys/'
176
261
  else:
177
262
  self._endpoint = endpoint
178
263
 
179
- self._user_agent = 'CdasWsExample.py (' + \
264
+ self._user_agent = 'cdasws/' + __version__ + ' (' + \
180
265
  platform.python_implementation() + ' ' \
181
266
  + platform.python_version() + '; ' + platform.platform() + ')'
182
267
 
268
+ if user_agent is not None:
269
+ self._user_agent += ' (' + user_agent + ')'
270
+
183
271
  self._request_headers = {
184
- 'Content-Type' : 'application/json',
185
- 'Accept' : 'application/json',
272
+ #'Content-Type' : 'application/json',
273
+ 'Content-Type' : 'application/xml',
274
+ 'Accept' : 'application/xml',
186
275
  'User-Agent' : self._user_agent,
187
276
  #'Accept-Encoding' : 'gzip' # only beneficial for icdfml responses
188
277
  }
189
- self._session = requests.Session()
278
+ if CACHE_AVAILABLE and disable_cache is not True:
279
+ self._session = requests_cache.CachedSession('cdasws_cache',
280
+ cache_control=True)
281
+ else:
282
+ self._session = requests.Session()
283
+
190
284
  self._session.headers.update(self._request_headers)
285
+ self._session.auth = NullAuth()
191
286
 
192
287
  if ca_certs is not None:
193
288
  self._session.verify = ca_certs
@@ -200,6 +295,10 @@ class CdasWs:
200
295
 
201
296
  self._timeout = timeout
202
297
 
298
+ endpoint_components = urlparse(self._endpoint)
299
+ self._hdp_registry = endpoint_components.scheme + '://' + \
300
+ endpoint_components.netloc + '/registry/hdp/SscId.xql'
301
+
203
302
  # pylint: enable=too-many-arguments
204
303
 
205
304
 
@@ -219,14 +318,21 @@ class CdasWs:
219
318
  self._session.close()
220
319
 
221
320
 
222
- def get_observatory_groups(self, **keywords) -> List[Dict]:
321
+ def get_observatory_groups(
322
+ self,
323
+ **keywords: str
324
+ ) -> List[Dict]:
223
325
  """
224
326
  Gets descriptions of the observatory groups from the server.
225
327
 
226
328
  Parameters
227
329
  ----------
228
330
  keywords
229
- instrumentType value.
331
+ optional keyword parameters as follows:<br>
332
+ <b>instrumentType</b> - an instrument type value from those
333
+ returned by `CdasWs.get_instrument_types`. Omitting
334
+ this parameter indicates that no observatories are eliminated
335
+ based upon their instrumentType value.
230
336
  Returns
231
337
  -------
232
338
  List
@@ -252,27 +358,49 @@ class CdasWs:
252
358
  self.logger.info('response.text: %s', response.text)
253
359
  return []
254
360
 
255
- observatory_groups = response.json()
256
-
257
361
  if self.logger.level <= logging.DEBUG:
258
- self.logger.debug('observatory_groups: %s',
259
- json.dumps(observatory_groups,
260
- indent=4, sort_keys=True))
362
+ self.logger.debug('response.text = %s', response.text)
261
363
 
262
- if not observatory_groups:
263
- return []
364
+ observatory_response = ET.fromstring(response.text)
365
+
366
+ observatory_group_descriptions = []
367
+ for description in observatory_response.findall(\
368
+ 'cdas:ObservatoryGroupDescription', namespaces=NS):
369
+
370
+ observatory_ids = []
371
+ for observatory_id in description.findall(\
372
+ 'cdas:ObservatoryId', namespaces=NS):
264
373
 
265
- return observatory_groups['ObservatoryGroupDescription']
374
+ observatory_ids.append(observatory_id.text)
266
375
 
376
+ observatory_group_descriptions.append({
377
+ 'Name': description.find(\
378
+ 'cdas:Name', namespaces=NS).text,
379
+ 'ObservatoryId': observatory_ids
380
+ })
267
381
 
268
- def get_instrument_types(self, **keywords) -> List[Dict]:
382
+ return observatory_group_descriptions
383
+
384
+
385
+ def get_instrument_types(
386
+ self,
387
+ **keywords: str
388
+ ) -> List[Dict]:
269
389
  """
270
390
  Gets descriptions of the instrument types from the server.
271
391
 
272
392
  Parameters
273
393
  ----------
274
394
  keywords
275
- observatory or observatoryGroup value.
395
+ optional keyword parameters as follows:<br>
396
+ <b>observatory</b> - an observatory value from those returned
397
+ by `CdasWs.get_observatories`. Omitting this parameter
398
+ indicates that no instrumentTypes are eliminated based upon
399
+ their observatory value.<br>
400
+ <b>observatoryGroup</b> - an observatory group value from
401
+ those returned by `CdasWs.get_observatory_groups`. Omitting
402
+ this parameter indicates that no instrumentTypes are
403
+ eliminated based upon their observatoryGroup value.</br>
276
404
  Returns
277
405
  -------
278
406
  List
@@ -301,27 +429,45 @@ class CdasWs:
301
429
  self.logger.info('response.text: %s', response.text)
302
430
  return []
303
431
 
304
- instrument_types = response.json()
432
+ if self.logger.level <= logging.DEBUG:
433
+ self.logger.debug('response.text = %s', response.text)
434
+
435
+ instrument_response = ET.fromstring(response.text)
305
436
 
306
437
  if self.logger.level <= logging.DEBUG:
307
- self.logger.debug('instrument_types = %s',
308
- json.dumps(instrument_types, indent=4,
309
- sort_keys=True))
438
+ self.logger.debug('instrument_response = %s',
439
+ ET.tostring(instrument_response))
310
440
 
311
- if not instrument_types:
312
- return []
441
+ instrument_types = []
442
+ for description in instrument_response.findall(\
443
+ 'cdas:InstrumentTypeDescription', namespaces=NS):
313
444
 
314
- return instrument_types['InstrumentTypeDescription']
445
+ instrument_types.append({
446
+ 'Name': description.find('cdas:Name',
447
+ namespaces=NS).text
448
+ })
449
+ return instrument_types
315
450
 
316
451
 
317
- def get_instruments(self, **keywords) -> List[Dict]:
452
+ def get_instruments(
453
+ self,
454
+ **keywords: str
455
+ ) -> List[Dict]:
318
456
  """
319
457
  Gets descriptions of the instruments from the server.
320
458
 
321
459
  Parameters
322
460
  ----------
323
461
  keywords
324
- observatory or instrumentType value.
462
+ optional keyword parameters as follows:<br>
463
+ <b>observatory</b> - an observatory value from those returned
464
+ by `CdasWs.get_observatories`. Omitting this parameter
465
+ indicates that no instruments are eliminated based upon their
466
+ observatory value.<br>
467
+ <b>instrumentType</b> - an instrument type value from those
468
+ returned by `CdasWs.get_instrument_types`. Omitting this
469
+ parameter indicates that no instruments are eliminated based
470
+ upon their instrument type.<br>
325
471
  Returns
326
472
  -------
327
473
  List
@@ -350,27 +496,50 @@ class CdasWs:
350
496
  self.logger.info('response.text: %s', response.text)
351
497
  return []
352
498
 
353
- instruments = response.json()
499
+ if self.logger.level <= logging.DEBUG:
500
+ self.logger.debug('response.text = %s', response.text)
501
+
502
+ instruments_response = ET.fromstring(response.text)
354
503
 
355
504
  if self.logger.level <= logging.DEBUG:
356
- self.logger.debug('instruments = %s',
357
- json.dumps(instruments, indent=4,
358
- sort_keys=True))
505
+ self.logger.debug('instruments = %s', response.text)
506
+ #ET.indent(instruments_response, space=' '))
359
507
 
360
- if not instruments:
361
- return []
508
+ instruments = []
509
+ for instrument_description in instruments_response.findall(\
510
+ 'cdas:InstrumentDescription', namespaces=NS):
362
511
 
363
- return instruments['InstrumentDescription']
512
+ instruments.append({
513
+ 'Name': instrument_description.find(\
514
+ 'cdas:Name', namespaces=NS).text,
515
+ 'ShortDescription': instrument_description.find(\
516
+ 'cdas:ShortDescription', namespaces=NS).text,
517
+ 'LongDescription': instrument_description.find(\
518
+ 'cdas:LongDescription', namespaces=NS).text
519
+ })
364
520
 
521
+ return instruments
365
522
 
366
- def get_observatories(self, **keywords) -> List[Dict]:
523
+
524
+ def get_observatories(
525
+ self,
526
+ **keywords: str
527
+ ) -> List[Dict]:
367
528
  """
368
529
  Gets descriptions of the observatories from the server.
369
530
 
370
531
  Parameters
371
532
  ----------
372
533
  keywords
373
- instrument or instrumentType value.
534
+ optional keyword parameters as follows:<br>
535
+ <b>instrument</b> - an instrument value from those returned
536
+ by `CdasWs.get_instruments`. Omitting this parameter
537
+ indicates that no observatories are eliminated based upon
538
+ their instrument value.<br>
539
+ <b>instrumentType</b> - in instrument type value from those
540
+ returned by `CdasWs.get_instrument_types`. Omitting this
541
+ parameter indicates that no observatories are eliminated
542
+ based upon their instrumentType value.<br>
374
543
  Returns
375
544
  -------
376
545
  List
@@ -399,20 +568,34 @@ class CdasWs:
399
568
  self.logger.info('response.text: %s', response.text)
400
569
  return []
401
570
 
402
- observatories = response.json()
571
+ if self.logger.level <= logging.DEBUG:
572
+ self.logger.debug('response.text = %s', response.text)
573
+
574
+ observatory_response = ET.fromstring(response.text)
403
575
 
404
576
  if self.logger.level <= logging.DEBUG:
405
- self.logger.debug('observatories = %s',
406
- json.dumps(observatories, indent=4,
407
- sort_keys=True))
577
+ self.logger.debug('observatories = %s', response.text)
408
578
 
409
- if not observatories:
410
- return []
579
+ observatories = []
411
580
 
412
- return observatories['ObservatoryDescription']
581
+ for observatory in observatory_response.findall(\
582
+ 'cdas:ObservatoryDescription', namespaces=NS):
583
+ observatories.append({
584
+ 'Name': observatory.find(\
585
+ 'cdas:Name', namespaces=NS).text,
586
+ 'ShortDescription': observatory.find(\
587
+ 'cdas:ShortDescription', namespaces=NS).text,
588
+ 'LongDescription': observatory.find(\
589
+ 'cdas:LongDescription', namespaces=NS).text
590
+ })
413
591
 
592
+ return observatories
414
593
 
415
- def get_observatory_groups_and_instruments(self, **keywords) -> List[Dict]:
594
+
595
+ def get_observatory_groups_and_instruments(
596
+ self,
597
+ **keywords: str
598
+ ) -> List[Dict]:
416
599
  """
417
600
  Gets descriptions of the observatory groups (and associated
418
601
  instruments) from the server.
@@ -420,7 +603,11 @@ class CdasWs:
420
603
  Parameters
421
604
  ----------
422
605
  keywords
423
- instrumentType value.
606
+ optional keyword parameters as follows:<br>
607
+ <b>instrumentType</b> - an instrument type value from those
608
+ returned by `CdasWs.get_instrument_types`. Omitting this
609
+ parameter indicates that no observatories are eliminated
610
+ based upon their instrumentType value.<br>
424
611
  Returns
425
612
  -------
426
613
  List
@@ -447,54 +634,153 @@ class CdasWs:
447
634
  self.logger.info('response.text: %s', response.text)
448
635
  return []
449
636
 
450
- observatories = response.json()
451
-
452
637
  if self.logger.level <= logging.DEBUG:
453
- self.logger.debug('observatories = %s',
454
- json.dumps(observatories, indent=4,
455
- sort_keys=True))
638
+ self.logger.debug('response.text = %s', response.text)
456
639
 
457
- if not observatories:
458
- return []
640
+ observatories_response = ET.fromstring(response.text)
459
641
 
460
- return observatories['ObservatoryGroupInstrumentDescription']
642
+ if self.logger.level <= logging.DEBUG:
643
+ self.logger.debug('observatories = %s', response.text)
644
+
645
+ o_g_i_ds = []
646
+
647
+ for o_g_i_d in observatories_response.findall(\
648
+ 'cdas:ObservatoryGroupInstrumentDescription',\
649
+ namespaces=NS):
650
+
651
+ o_g_i_d_name = o_g_i_d.find('cdas:Name',
652
+ namespaces=NS).text
653
+ o_is = []
654
+ for o_i in o_g_i_d.findall('cdas:ObservatoryInstruments',
655
+ namespaces=NS):
656
+
657
+ o_i_name = o_i.find('cdas:Name',
658
+ namespaces=NS).text
659
+ i_ds = []
660
+ for i_d in o_i.findall('cdas:InstrumentDescription',
661
+ namespaces=NS):
662
+ i_d_name = i_d.find('cdas:Name',
663
+ namespaces=NS).text
664
+ i_d_short_description = \
665
+ i_d.find('cdas:ShortDescription',
666
+ namespaces=NS).text
667
+ i_d_long_description = \
668
+ i_d.find('cdas:LongDescription',
669
+ namespaces=NS).text
670
+ i_ds.append({
671
+ 'Name': i_d_name,
672
+ 'ShortDescription': i_d_short_description,
673
+ 'LongDescription': i_d_long_description
674
+ })
675
+ o_is.append({
676
+ 'Name': o_i_name,
677
+ 'InstrumentDescription': i_ds
678
+ })
679
+
680
+ o_g_i_ds.append({
681
+ 'Name': o_g_i_d_name,
682
+ 'ObservatoryInstruments': o_is
683
+ })
684
+
685
+ return o_g_i_ds
461
686
 
462
687
 
463
- def get_datasets(self, **keywords) -> List[Dict]:
688
+ # pylint: disable=too-many-branches
689
+ def get_datasets(
690
+ self,
691
+ **keywords: str
692
+ ) -> List[Dict]:
464
693
  """
465
694
  Gets descriptions of the specified datasets from the server.
466
695
 
467
696
  Parameters
468
697
  ----------
469
698
  keywords
470
- observatoryGroup, instrumentType, observatory,
471
- instrument, startDate, stopDate, idPattern, labelPattern,
472
- and/or notesPattern value(s).
699
+ optional keyword parameters as follows:<br>
700
+ <b>observatoryGroup</b> - an observatory group value from those
701
+ returned by `CdasWs.get_observatory_groups`. Omitting this
702
+ parameter
703
+ indicates that no datasets are eliminated based upon their
704
+ observatoryGroup value.<br>
705
+ <b>instrumentType</b> - an instrument type value from those
706
+ returned by `CdasWs.get_instrument_types`. Omitting this
707
+ parameter indicates that no datasets are eliminated based
708
+ upon their instrumentType value.<br>
709
+ <b>observatory</b> - an observatory name value from those
710
+ returned by `CdasWs.get_observatories`. Omitting this
711
+ parameter indicates that no datasets are eliminated based
712
+ upon their observatory value.<br>
713
+ <b>instrument</b> - an instrument value from those returned by
714
+ `CdasWs.get_instruments`. Omitting this parameter indicates
715
+ that no datasets are eliminated based upon their instrument
716
+ value.<br>
717
+ <b>startDate</b> - a datetime specifying the start of a time
718
+ interval. See module note about timezone value. If this
719
+ parameter is ommited, the time interval will begin infinitely
720
+ in the past.<br>
721
+ <b>stopDate</b> - a datetime specifying the end of a time
722
+ interval. See module note about timezone value. If this
723
+ parameter is omitted, the time interval will end infinitely
724
+ in the future.<br>
725
+ <b>id</b> - a dataset identifier. The value may be a CDAS
726
+ (e.g., AC_H2_MFI), DOI (e.g., 10.48322/fh85-fj47), or SPASE
727
+ [ResourceID] (e.g., spase://NASA/NumericalData/ACE/MAG/L2/PT1H)
728
+ identifier. If specified, all other keywords are ignored.<br>
729
+ <b>idPattern</b> - a java.util.regex compatible regular
730
+ expression that must match the dataset's CDAS identifier value.
731
+ Omitting this parameter is equivalent to `.*`.<br>
732
+ <b>labelPattern</b> - a java.util.regex compatible regular
733
+ expression that must match the dataset's CDAS label text.
734
+ Omitting this parameter is equivalent to `.*`. Embedded
735
+ matching flag expressions (e.g., `(?i)` for case insensitive
736
+ match mode) are supported and likely to be useful in this
737
+ case.<br>
738
+ <b>notesPattern</b> - a java.util.regex compatible regular
739
+ expression that must match the dataset's CDAS notes text.
740
+ Omitting this parameter is equivalent to `.*`. Embedded
741
+ matching flag expressions (e.g., `(?s)` for dotall match mode)
742
+ are supported and likely to be useful in this case.<br>
473
743
  Returns
474
744
  -------
475
745
  List
476
- A dictionary containing descriptions of the datasets
746
+ A list of dictionaries containing descriptions of the datasets
477
747
  requested. The dictionary structure is defined by the
478
748
  DatasetDescription element in
479
749
  <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>.
480
750
  """
481
751
  url = self._endpoint + 'datasets?'
482
752
 
483
- if 'observatoryGroup' in keywords:
484
- url = url + 'observatoryGroup=' \
485
- + urllib.parse.quote(keywords['observatoryGroup']) + '&'
486
-
487
- if 'instrumentType' in keywords:
488
- url = url + 'instrumentType=' \
489
- + urllib.parse.quote(keywords['instrumentType']) + '&'
490
-
491
- if 'observatory' in keywords:
492
- url = url + 'observatory=' \
493
- + urllib.parse.quote(keywords['observatory']) + '&'
494
-
495
- if 'instrument' in keywords:
496
- url = url + 'instrument=' \
497
- + urllib.parse.quote(keywords['instrument']) + '&'
753
+ observatory_groups = keywords.get('observatoryGroup', None)
754
+ if observatory_groups is not None:
755
+ if isinstance(observatory_groups, str):
756
+ observatory_groups = [observatory_groups]
757
+ for observatory_group in observatory_groups:
758
+ url = url + 'observatoryGroup=' \
759
+ + urllib.parse.quote(observatory_group) + '&'
760
+
761
+ instrument_types = keywords.get('instrumentType', None)
762
+ if instrument_types is not None:
763
+ if isinstance(instrument_types, str):
764
+ instrument_types = [instrument_types]
765
+ for instrument_type in instrument_types:
766
+ url = url + 'instrumentType=' \
767
+ + urllib.parse.quote(instrument_type) + '&'
768
+
769
+ observatories = keywords.get('observatory', None)
770
+ if observatories is not None:
771
+ if isinstance(observatories, str):
772
+ observatories = [observatories]
773
+ for observatory in observatories:
774
+ url = url + 'observatory=' \
775
+ + urllib.parse.quote(observatory) + '&'
776
+
777
+ instruments = keywords.get('instrument', None)
778
+ if instruments is not None:
779
+ if isinstance(instruments, str):
780
+ instruments = [instruments]
781
+ for instrument in instruments:
782
+ url = url + 'instrument=' \
783
+ + urllib.parse.quote(instrument) + '&'
498
784
 
499
785
  if 'startDate' in keywords:
500
786
  url = url + 'startDate=' \
@@ -504,6 +790,10 @@ class CdasWs:
504
790
  url = url + 'stopDate=' \
505
791
  + urllib.parse.quote(keywords['stopDate']) + '&'
506
792
 
793
+ if 'id' in keywords:
794
+ url = url + 'id=' \
795
+ + urllib.parse.quote(keywords['id']) + '&'
796
+
507
797
  if 'idPattern' in keywords:
508
798
  url = url + 'idPattern=' \
509
799
  + urllib.parse.quote(keywords['idPattern']) + '&'
@@ -527,21 +817,154 @@ class CdasWs:
527
817
  self.logger.info('response.text: %s', response.text)
528
818
  return []
529
819
 
530
- datasets = response.json()
820
+ if self.logger.level <= logging.DEBUG:
821
+ self.logger.debug('response.text = %s', response.text)
822
+
823
+ dss = ET.fromstring(response.text)
531
824
 
532
825
  if self.logger.level <= logging.DEBUG:
533
- self.logger.debug('datasets = %s',
534
- json.dumps(datasets, indent=4, sort_keys=True))
826
+ self.logger.debug('datasets = %s', response.text)
827
+
828
+ datasets = []
829
+ for ds in dss.findall('cdas:DatasetDescription',
830
+ namespaces=NS):
831
+
832
+ observatory_groups = []
833
+ for o_g in ds.findall('cdas:ObservatoryGroup',
834
+ namespaces=NS):
835
+ observatory_groups.append(o_g.text)
836
+
837
+ instrument_types = []
838
+ for i_t in ds.findall('cdas:InstrumentType',
839
+ namespaces=NS):
840
+ instrument_types.append(i_t.text)
841
+
842
+ dataset_links = []
843
+ for d_l in ds.findall('cdas:DatasetLink',
844
+ namespaces=NS):
845
+ dataset_links.append({
846
+ 'Title': d_l.find('cdas:Title',
847
+ namespaces=NS).text,
848
+ 'Text': d_l.find('cdas:Text',
849
+ namespaces=NS).text,
850
+ 'Url': d_l.find('cdas:Url',
851
+ namespaces=NS).text,
852
+ })
853
+
854
+ observatories = []
855
+ for obs_elem in ds.findall('cdas:Observatory',
856
+ namespaces=NS):
857
+ observatories.append(obs_elem.text)
858
+
859
+ instruments = []
860
+ for instr_elem in ds.findall('cdas:Instrument',
861
+ namespaces=NS):
862
+ instruments.append(instr_elem.text)
863
+
864
+ dataset = {
865
+ 'Id': ds.find('cdas:Id', namespaces=NS).text,
866
+ 'Observatory': observatories,
867
+ 'Instrument': instruments,
868
+ 'ObservatoryGroup': observatory_groups,
869
+ 'InstrumentType': instrument_types,
870
+ 'Label': ds.find('cdas:Label',
871
+ namespaces=NS).text,
872
+ 'TimeInterval': {
873
+ 'Start': ds.find('cdas:TimeInterval/cdas:Start',
874
+ namespaces=NS).text,
875
+ 'End': ds.find('cdas:TimeInterval/cdas:End',
876
+ namespaces=NS).text
877
+ },
878
+ 'PiName': ds.find('cdas:PiName',
879
+ namespaces=NS).text,
880
+ 'PiAffiliation': ds.find('cdas:PiAffiliation',
881
+ namespaces=NS).text,
882
+ 'Notes': ds.find('cdas:Notes',
883
+ namespaces=NS).text,
884
+ 'DatasetLink': dataset_links
885
+ }
886
+ doi = ds.find('cdas:Doi', namespaces=NS)
887
+ if doi is not None:
888
+ dataset['Doi'] = doi.text
889
+
890
+ spase_resource_id = ds.find('cdas:SpaseResourceId',
891
+ namespaces=NS)
892
+ if spase_resource_id is not None:
893
+ dataset['SpaseResourceId'] = spase_resource_id.text
894
+
895
+ additional_metadata = []
896
+ for add_meta in ds.findall('cdas:AdditionalMetadata',
897
+ namespaces=NS):
898
+ meta_type = add_meta.attrib['Type']
899
+ value = add_meta.text
900
+ additional_metadata.append({
901
+ 'Type': meta_type,
902
+ 'value': value
903
+ })
904
+
905
+ if len(additional_metadata) > 0:
906
+ dataset['AdditionalMetadata'] = additional_metadata
907
+
908
+ datasets.append(dataset)
909
+
910
+ return sorted(datasets, key=itemgetter('Id'))
911
+ # pylint: enable=too-many-branches
535
912
 
536
- if not datasets:
537
- return []
538
913
 
539
- return sorted(datasets['DatasetDescription'], key=itemgetter('Id'))
914
+ @staticmethod
915
+ def get_doi_landing_page_url(
916
+ doi: str
917
+ ) -> str:
918
+ """
919
+ Returns a URL to the given Digital Object Identifier's landing
920
+ page (metadata for the DOI).
921
+
922
+ Note: this method is deprecated. You should call
923
+ doi.get_doi_landing_page_url directly.
924
+
925
+ Parameters
926
+ ----------
927
+ doi
928
+ digital object identifier.
929
+ Returns
930
+ -------
931
+ str
932
+ A URL to the DOI's landing page.
933
+ """
934
+
935
+ return get_doi_landing_page_url(doi)
936
+
937
+
938
+ @staticmethod
939
+ def get_citation(
940
+ doi: str
941
+ ) -> str:
942
+ """
943
+ Returns the citation from doi.org for the given DOI.
944
+
945
+ Parameters
946
+ ----------
947
+ doi
948
+ digital object identifier.
949
+ Returns
950
+ -------
951
+ str
952
+ The citation from doi.org for the given DOI.
953
+ """
954
+
955
+ url = 'https://doi.org/' + doi
956
+ headers = {'Accept': 'text/x-bibliography; style=apa'}
957
+ response = requests.get(url, headers=headers,
958
+ timeout=30)
540
959
 
960
+ return response.text
541
961
 
542
962
 
543
- def get_inventory(self, identifier: str, **keywords
544
- ) -> List[TimeInterval]:
963
+ def get_inventory(
964
+ self,
965
+ identifier: str,
966
+ **keywords: str
967
+ ) -> List[TimeInterval]:
545
968
  """
546
969
  Gets a description of the specified dataset's data inventory.
547
970
 
@@ -550,14 +973,18 @@ class CdasWs:
550
973
  identifier
551
974
  dataset identifier of data inventory to get.
552
975
  keywords
553
- time interval value.
976
+ optional keyword parameters as follows:<br>
977
+ <b>timeInterval</b> - `timeinterval.TimeInterval` to restrict
978
+ returned inventory.
554
979
  Returns
555
980
  -------
556
981
  List
557
- An array of TimeIntervals when data is available.
982
+ An array of `timeinterval.TimeInterval`s when data is
983
+ available.
558
984
  """
559
985
 
560
- url = self._endpoint + 'datasets/' + identifier + '/inventory'
986
+ url = self._endpoint + 'datasets/' + \
987
+ urllib.parse.quote(identifier, safe='') + '/inventory'
561
988
 
562
989
  if 'timeInterval' in keywords:
563
990
  time_interval_keyword = keywords['timeInterval']
@@ -577,31 +1004,67 @@ class CdasWs:
577
1004
  self.logger.info('response.text: %s', response.text)
578
1005
  return []
579
1006
 
580
- inventory = response.json()
581
-
582
1007
  if self.logger.level <= logging.DEBUG:
583
- self.logger.debug('inventory = %s',
584
- json.dumps(inventory, indent=4, sort_keys=True))
1008
+ self.logger.debug('response.text = %s', response.text)
585
1009
 
1010
+ inventory = ET.fromstring(response.text)
586
1011
  intervals = []
587
-
588
- data_intervals = inventory['InventoryDescription'][0]
589
-
590
- if 'TimeInterval' in data_intervals:
591
-
592
- for time_interval in data_intervals['TimeInterval']:
593
-
1012
+ for inventory_desc in inventory.findall(\
1013
+ 'cdas:InventoryDescription',
1014
+ namespaces=NS):
1015
+ for time_interval in inventory_desc.findall(\
1016
+ 'cdas:TimeInterval',
1017
+ namespaces=NS):
594
1018
  intervals.append(
595
1019
  TimeInterval(
596
- time_interval['Start'],
597
- time_interval['End']
1020
+ time_interval.find('cdas:Start',
1021
+ namespaces=NS).text,
1022
+ time_interval.find('cdas:End',
1023
+ namespaces=NS).text
598
1024
  )
599
1025
  )
600
1026
 
601
1027
  return intervals
602
1028
 
603
1029
 
604
- def get_variables(self, identifier: str) -> List[Dict]:
1030
+ def get_example_time_interval(
1031
+ self,
1032
+ identifier: str,
1033
+ ) -> TimeInterval:
1034
+ """
1035
+ Gets a small example time interval for the specified dataset. The
1036
+ interval is near the end of the dataset's data inventory. The
1037
+ returned interval is not guaranteed to have non-fill data for any
1038
+ specific variable.
1039
+
1040
+ Parameters
1041
+ ----------
1042
+ identifier
1043
+ dataset identifier of data inventory to get.
1044
+ Returns
1045
+ -------
1046
+ timeinterval.TimeInterval
1047
+ An small example time interval that is likely, but not
1048
+ guaranteed, to have data or None if an interval cannot be
1049
+ found.
1050
+ """
1051
+
1052
+ time_intervals = self.get_inventory(identifier)
1053
+ if len(time_intervals) < 1:
1054
+ return None
1055
+ example_interval = time_intervals[-1]
1056
+ if re.search('MMS[1-4]_.+_BRST_.+', identifier):
1057
+ time_delta = timedelta(seconds=1)
1058
+ else:
1059
+ time_delta = timedelta(hours=2)
1060
+ example_interval.start = example_interval.end - time_delta
1061
+ return example_interval
1062
+
1063
+
1064
+ def get_variables(
1065
+ self,
1066
+ identifier: str
1067
+ ) -> List[Dict]:
605
1068
  """
606
1069
  Gets a description of the variables in the specified dataset.
607
1070
 
@@ -612,13 +1075,14 @@ class CdasWs:
612
1075
  Returns
613
1076
  -------
614
1077
  List
615
- A dictionary containing descriptions of the variables in
1078
+ A List of dictionary descriptions of the variables in
616
1079
  the specified dataset. The dictionary structure is defined by
617
1080
  the VariableDescription element in
618
1081
  <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>.
619
1082
  """
620
1083
 
621
- url = self._endpoint + 'datasets/' + identifier + '/variables'
1084
+ url = self._endpoint + 'datasets/' + \
1085
+ urllib.parse.quote(identifier, safe='') + '/variables'
622
1086
 
623
1087
  response = self._session.get(url, timeout=self._timeout)
624
1088
 
@@ -629,24 +1093,306 @@ class CdasWs:
629
1093
  self.logger.info('response.text: %s', response.text)
630
1094
  return []
631
1095
 
632
- variables = response.json()
1096
+ if self.logger.level <= logging.DEBUG:
1097
+ self.logger.debug('response.text = %s', response.text)
1098
+
1099
+ var_descriptions = ET.fromstring(response.text)
1100
+
1101
+ variables = []
1102
+ for var_description in var_descriptions.findall(\
1103
+ 'cdas:VariableDescription',
1104
+ namespaces=NS):
1105
+ name = var_description.find('cdas:Name',
1106
+ namespaces=NS).text
1107
+ short_description = var_description.find(\
1108
+ 'cdas:ShortDescription',
1109
+ namespaces=NS).text
1110
+ if short_description is None:
1111
+ short_description = ''
1112
+
1113
+ long_description = var_description.find(\
1114
+ 'cdas:LongDescription',
1115
+ namespaces=NS).text
1116
+ if long_description is None:
1117
+ long_description = ''
1118
+
1119
+ variables.append({
1120
+ 'Name': name,
1121
+ 'ShortDescription': short_description,
1122
+ 'LongDescription': long_description
1123
+ })
1124
+
1125
+ return variables
1126
+
1127
+
1128
+ def get_variable_names(
1129
+ self,
1130
+ identifier: str
1131
+ ) -> List[str]:
1132
+ """
1133
+ Gets the names of the variables in the specified dataset. This
1134
+ method is like the get_variables method except that it only returns
1135
+ the variable names and not the other metadata.
633
1136
 
634
- if not variables:
635
- return []
1137
+ Parameters
1138
+ ----------
1139
+ identifier
1140
+ dataset identifier of data to get.
1141
+ Returns
1142
+ -------
1143
+ List
1144
+ A List of the names of the variables in the specified dataset.
1145
+ """
636
1146
 
637
- return variables['VariableDescription']
1147
+ variable_names = []
1148
+ for variable in self.get_variables(identifier):
1149
+ variable_names.append(variable['Name'])
638
1150
 
1151
+ return variable_names
639
1152
 
640
- # pylint: disable=too-many-locals
641
- # pylint: disable=too-many-return-statements
642
- # pylint: disable=too-many-statements
643
- # pylint: disable=too-many-branches
644
- def get_data(self, dataset: str, variables: List[str],
645
- start: Union[datetime, str], end: Union[datetime, str],
646
- **keywords
647
- ) -> Tuple[int, spdm.SpaceData]:
1153
+
1154
+ @staticmethod
1155
+ def _get_thumbnail_description_dict(
1156
+ file_description_elem: ET.Element
1157
+ ) -> Dict:
1158
+ """
1159
+ Gets ThumbnailDescription dictionary representation from the
1160
+ given FileDescription element.
1161
+
1162
+ Parameters
1163
+ ----------
1164
+ file_description_elem
1165
+ a FileDescription Element.
1166
+ Returns
1167
+ -------
1168
+ Dict
1169
+ a Dictionary representation of the ThumbnailDescription
1170
+ contained in the given FileDescription element.
1171
+ """
1172
+ thumbnail_desc = file_description_elem.find(\
1173
+ 'cdas:ThumbnailDescription',
1174
+ namespaces=NS)
1175
+ if thumbnail_desc is not None:
1176
+ time_interval = thumbnail_desc.find('cdas:TimeInterval',
1177
+ namespaces=NS)
1178
+ start = time_interval.find('cdas:Start',
1179
+ namespaces=NS).text
1180
+ end = time_interval.find('cdas:End',
1181
+ namespaces=NS).text
1182
+ return {
1183
+ 'Name': thumbnail_desc.find('cdas:Name',
1184
+ namespaces=NS).text,
1185
+ 'Dataset': thumbnail_desc.find('cdas:Dataset',
1186
+ namespaces=NS).text,
1187
+ 'TimeInterval': {
1188
+ 'Start': start,
1189
+ 'End': end
1190
+ },
1191
+ 'VarName': thumbnail_desc.find('cdas:VarName',
1192
+ namespaces=NS).text,
1193
+ 'Options': int(thumbnail_desc.find(\
1194
+ 'cdas:Options',
1195
+ namespaces=NS).text),
1196
+ 'NumFrames': int(thumbnail_desc.find(\
1197
+ 'cdas:NumFrames',
1198
+ namespaces=NS).text),
1199
+ 'NumRows': int(thumbnail_desc.find(\
1200
+ 'cdas:NumRows',
1201
+ namespaces=NS).text),
1202
+ 'NumCols': int(thumbnail_desc.find(\
1203
+ 'cdas:NumCols',
1204
+ namespaces=NS).text),
1205
+ 'TitleHeight': int(thumbnail_desc.find(\
1206
+ 'cdas:TitleHeight',
1207
+ namespaces=NS).text),
1208
+ 'ThumbnailHeight': int(thumbnail_desc.find(\
1209
+ 'cdas:ThumbnailHeight',
1210
+ namespaces=NS).text),
1211
+ 'ThumbnailWidth': int(thumbnail_desc.find(\
1212
+ 'cdas:ThumbnailWidth',
1213
+ namespaces=NS).text),
1214
+ 'StartRecord': int(thumbnail_desc.find(\
1215
+ 'cdas:StartRecord',
1216
+ namespaces=NS).text),
1217
+ 'MyScale': float(thumbnail_desc.find(\
1218
+ 'cdas:MyScale',
1219
+ namespaces=NS).text),
1220
+ 'XyStep': float(thumbnail_desc.find(\
1221
+ 'cdas:XyStep',
1222
+ namespaces=NS).text)
1223
+ }
1224
+ return None
1225
+
1226
+
1227
+ @staticmethod
1228
+ def _get_data_result_dict(
1229
+ xml_data_result: str
1230
+ ) -> Dict:
1231
+ """
1232
+ Gets DataResult dictionary representation from the
1233
+ given XML DataResult element.
1234
+
1235
+ Parameters
1236
+ ----------
1237
+ xml_data_result
1238
+ XML representation of a DataResult.
1239
+ Returns
1240
+ -------
1241
+ Dict
1242
+ a Dictionary representation of the given XML representation
1243
+ of a DataResult.
1244
+ """
1245
+ data_result = ET.fromstring(xml_data_result)
1246
+ file_descriptions = []
1247
+ for file_description in data_result.findall(\
1248
+ 'cdas:FileDescription', namespaces=NS):
1249
+
1250
+ dict_file_description = {
1251
+ 'Name': file_description.find('cdas:Name',
1252
+ namespaces=NS).text,
1253
+ 'MimeType': file_description.find(\
1254
+ 'cdas:MimeType',
1255
+ namespaces=NS).text,
1256
+ 'StartTime': file_description.find(\
1257
+ 'cdas:StartTime',
1258
+ namespaces=NS).text,
1259
+ 'EndTime': file_description.find(\
1260
+ 'cdas:EndTime',
1261
+ namespaces=NS).text,
1262
+ 'Length': int(file_description.find(\
1263
+ 'cdas:Length',
1264
+ namespaces=NS).text),
1265
+ 'LastModified': file_description.find(\
1266
+ 'cdas:LastModified',
1267
+ namespaces=NS).text
1268
+ }
1269
+ thumbnail_dict = CdasWs._get_thumbnail_description_dict(\
1270
+ file_description)
1271
+ if thumbnail_dict is not None:
1272
+ dict_file_description['ThumbnailDescription'] = \
1273
+ thumbnail_dict
1274
+
1275
+ thumbnail_id_elem = file_description.find(\
1276
+ 'cdas:ThumbnailId',
1277
+ namespaces=NS)
1278
+ if thumbnail_id_elem is not None:
1279
+ dict_file_description['ThumbnailId'] = \
1280
+ thumbnail_id_elem.text
1281
+
1282
+ file_descriptions.append(dict_file_description)
1283
+
1284
+ if len(file_descriptions) > 0:
1285
+ return {
1286
+ 'FileDescription': file_descriptions
1287
+ }
1288
+ return None
1289
+
1290
+
1291
+ def get_data_result(
1292
+ self,
1293
+ data_request: DataRequest,
1294
+ progress_callback: Callable[[float, str, Any], int],
1295
+ progress_user_value: Any
1296
+ ) -> Tuple[int, Dict]:
1297
+ """
1298
+ Submits the given request to the server and returns the result.
1299
+ This is a relatively low-level method and most callers should
1300
+ probably use a higher-level method such as get_data.
1301
+
1302
+ Parameters
1303
+ ----------
1304
+ data_request
1305
+ data request.
1306
+ progress_callback
1307
+ function that is called repeatedly to report the progress
1308
+ of getting the data. The function should return 0 if it
1309
+ wants to continue getting data. If it returns a non-0 value,
1310
+ getting the data will be aborted and the get_data() function
1311
+ will immediately return (204, None). The float parameter
1312
+ is a value between 0.0 and 1.0 to indicate progress and
1313
+ the str parameter will contain a text message indicating
1314
+ the progress of this call.
1315
+ progressUserValue
1316
+ value that is passsed to the progressCallback function.
1317
+ Returns
1318
+ -------
1319
+ Tuple
1320
+ [0] contains the int HTTP status code. 200 when
1321
+ successful.<br>
1322
+ [1] contains a dictionary representing the DataResult from
1323
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
1324
+ or None.
1325
+ See Also
1326
+ --------
1327
+ CdasWs.get_data
1328
+ """
1329
+
1330
+ #self.logger.debug('data_request = %s', data_request.json())
1331
+ self.logger.debug('data_request = %s', data_request.xml_str())
1332
+
1333
+ url = self._endpoint + 'datasets'
1334
+
1335
+ for retries in range(RETRY_LIMIT):
1336
+ #response = self._session.post(url, data=data_request.json(),
1337
+ response = self._session.post(url, data=data_request.xml_str(),
1338
+ timeout=self._timeout)
1339
+
1340
+ if response.status_code == 200:
1341
+
1342
+ data_result = CdasWs._get_data_result_dict(response.text)
1343
+
1344
+ if not data_result:
1345
+ return (response.status_code, None)
1346
+
1347
+ return (response.status_code, data_result)
1348
+
1349
+ if response.status_code == 429 or \
1350
+ response.status_code == 503 and \
1351
+ 'Retry-After' in response.headers:
1352
+
1353
+ retry_after = response.headers['Retry-After']
1354
+
1355
+ self.logger.debug('429/503 status with Retry-After header: %s',
1356
+ retry_after)
1357
+
1358
+ if progress_callback is not None:
1359
+ if progress_callback(0.2, 'Waiting ' + retry_after + \
1360
+ 's before making server request.',
1361
+ progress_user_value) != 0:
1362
+ return (204, None)
1363
+
1364
+ retry_after = int(retry_after)
1365
+
1366
+ self.logger.info('Sleeping %d seconds before making request',
1367
+ retry_after)
1368
+ time.sleep(retry_after)
1369
+
1370
+ else:
1371
+ self.logger.info('%s failed with http code %d', url,
1372
+ response.status_code)
1373
+ self.logger.info('data_request = %s', data_request)
1374
+ self.logger.info('response.text: %s', response.text)
1375
+ return (response.status_code, None)
1376
+
1377
+ self.logger.info('%s failed with http code %d after %d retries',
1378
+ url, response.status_code, retries + 1)
1379
+ self.logger.info('data_request = %s', data_request)
1380
+ self.logger.info('response.text: %s', response.text)
1381
+ return (response.status_code, None)
1382
+
1383
+
1384
+ def get_data_file(
1385
+ self,
1386
+ dataset: str,
1387
+ variables: List[str],
1388
+ start: Union[datetime, str], end: Union[datetime, str],
1389
+ **keywords: Union[
1390
+ Dict,
1391
+ Callable[[float, str, Any], int],
1392
+ Any]
1393
+ ) -> Tuple[int, Dict]:
648
1394
  """
649
- Gets the specified data from the server.
1395
+ Gets the specified data file from the server.
650
1396
 
651
1397
  Parameters
652
1398
  ----------
@@ -655,163 +1401,133 @@ class CdasWs:
655
1401
  variables
656
1402
  array containing names of variables to get.
657
1403
  start
658
- start time of data to get.
1404
+ start time of data to get. See module note about timezone.
659
1405
  end
660
- end time of data to get.
1406
+ end time of data to get. See module note about timezone.
661
1407
  keywords
662
- optional keyword parameters as follows<br>
663
- binData - indicates that uniformly spaced values should be
664
- computed for scaler/vector/spectrogram data according to
665
- the given binning parameter values. binData may contain
666
- the following keys: interval, interpolateMissingValues,
667
- and/or sigmaMultiplier with values that override the
1408
+ optional keyword parameters as follows:<br>
1409
+ <b>binData</b> - indicates that uniformly spaced values should
1410
+ be computed for scaler/vector/spectrogram data according to
1411
+ the given binning parameter values. binData is a Dict that
1412
+ may contain the following keys: interval,
1413
+ interpolateMissingValues, sigmaMultiplier, and/or
1414
+ overrideDefaultBinning with values that override the
668
1415
  defaults.<br>
669
- progressCallback - is a
670
- typing.Callable[[float, str, typing.Any], int]
1416
+ <b>progressCallback</b> - is a
1417
+ Callable[[float, str, typing.Any], int]
671
1418
  function that is called repeatedly to report the progress
672
1419
  of getting the data. The function should return 0 if it
673
1420
  wants to continue getting data. If it returns non-0 value,
674
- getting the data will be aborted and the get_data() function
675
- will immediately return (204, None). The float parameter
676
- is a value between 0.0 and 1.0 to indicate progress and
677
- the str parameter will contain a text message indicating
1421
+ getting the data will be aborted and the get_data_file()
1422
+ function will immediately return (204, None). The float
1423
+ parameter is a value between 0.0 and 1.0 to indicate progress
1424
+ and the str parameter will contain a text message indicating
678
1425
  the progress of this call.<br>
679
- progressUserValue - is a typing.Any value that is passsed
1426
+ <b>progressUserValue</b> - is an Any value that is passsed
680
1427
  to the progressCallback function.<br>
681
1428
  Returns
682
1429
  -------
683
1430
  Tuple
684
- [0] contains a dictionary of HTTP and CDAS status information.
685
- When successful, ['http']['status_code'] will be 200.<br>
686
- [1] contains the requested data (SpaceData object) or None.
1431
+ [0] contains the int HTTP status code. 200 when
1432
+ successful.<br>
1433
+ [1] contains a dictionary representing the DataResult from
1434
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
1435
+ or None.
687
1436
  Raises
688
1437
  ------
689
1438
  ValueError
690
1439
  If the given start/end datetime values are invalid.
1440
+ See Also
1441
+ --------
1442
+ CdasWs.get_data : In addition to what get_data_file does,
1443
+ get_data also downloads and reads the data file into memory
1444
+ (SpaceData or xarray.Dataset object).
691
1445
  """
1446
+ # pylint: disable=too-many-locals
1447
+ # pylint: disable=too-many-return-statements
1448
+ # pylint: enable=too-many-statements
1449
+ # pylint: disable=too-many-branches
692
1450
 
693
- if isinstance(start, datetime):
694
- start_datetime = start
695
- elif isinstance(start, str):
696
- start_datetime = dateutil.parser.parse(start)
697
- else:
698
- raise ValueError('unrecognized start datetime value')
699
-
700
- if isinstance(end, datetime):
701
- end_datetime = end
702
- elif isinstance(end, str):
703
- end_datetime = dateutil.parser.parse(end)
704
- else:
705
- raise ValueError('unrecognized end datetime value')
1451
+ start_datetime, end_datetime = TimeInterval.get_datetimes(start,
1452
+ end)
706
1453
 
707
- data_request = {
708
- 'CdfRequest': {
709
- 'CdfFormat': 'Cdf',
710
- 'TimeInterval': {
711
- 'Start': start_datetime.isoformat(),
712
- 'End': end_datetime.isoformat()
713
- },
714
- 'DatasetRequest': {
715
- 'DatasetId': dataset,
716
- 'VariableName': variables
717
- }
718
- }
719
- }
720
- if 'binData' in keywords:
721
- bin_data_kw = keywords['binData']
722
- data_request['CdfRequest']['BinData'] = {}
723
- if 'interval' in bin_data_kw:
724
- data_request['CdfRequest']['BinData']['Interval'] = \
725
- bin_data_kw['interval']
726
- if 'interpolateMissingValues' in bin_data_kw:
727
- data_request['CdfRequest']['BinData']['InterpolateMissingValues'] = \
728
- bin_data_kw['interpolateMissingValues']
729
- if 'sigmaMultiplier' in bin_data_kw:
730
- data_request['CdfRequest']['BinData']['SigmaMultiplier'] = \
731
- bin_data_kw['sigmaMultiplier']
1454
+ data_request = CdfRequest(dataset, variables,
1455
+ TimeInterval(start_datetime,
1456
+ end_datetime),
1457
+ 3, CdfFormat.BINARY,
1458
+ **keywords.get('binData', {}))
732
1459
 
733
1460
  progress_callback = keywords.get('progressCallback', None)
734
1461
  progress_user_value = keywords.get('progressUserValue', None)
735
1462
 
736
1463
  self.logger.debug('data_request = %s', data_request)
737
1464
 
738
- status = {
739
- 'http': {
740
- 'status_code': 204
741
- },
742
- 'cdas': {
743
- 'status': [],
744
- 'message': [],
745
- 'warning': [],
746
- 'error': []
747
- }
748
- }
749
1465
  if progress_callback is not None:
750
- if progress_callback(0.1, 'Making initial server request.',
1466
+ if progress_callback(0.1, 'Making server request.',
751
1467
  progress_user_value) != 0:
752
- return (status, None)
753
-
754
- url = self._endpoint + 'datasets'
755
-
756
- response = self._session.post(url, data=json.dumps(data_request),
757
- timeout=self._timeout)
1468
+ return (204, None)
758
1469
 
759
- status['http']['status_code'] = response.status_code
1470
+ status_code, data_result = self.get_data_result(data_request,
1471
+ progress_callback,
1472
+ progress_user_value)
760
1473
 
761
1474
  if progress_callback is not None:
762
- if progress_callback(0.2, 'Initial server request complete.',
1475
+ if progress_callback(1.0, 'Initial server request complete.',
763
1476
  progress_user_value) != 0:
764
- return (status, None)
765
-
766
- try:
767
- data_result = response.json()
768
- if 'Status' in data_result:
769
- status['cdas']['status'] = data_result['Status']
770
- if 'Message' in data_result:
771
- status['cdas']['message'] = data_result['Message']
772
- if 'Warning' in data_result:
773
- status['cdas']['warning'] = data_result['Warning']
774
- if 'Error' in data_result:
775
- status['cdas']['error'] = data_result['Error']
776
- except ValueError:
777
- # for example, a 503 from apache will not be json
778
- self.logger.debug('Non-JSON response: %s', response.text)
779
- status['http']['error_body'] = response.text
780
-
781
- if response.status_code != 200:
782
-
783
- self.logger.info('%s failed with http code %d', url,
784
- response.status_code)
785
- self.logger.info('data_request = %s', data_request)
786
- self.logger.info('response.text: %s', response.text)
787
- return (status, None)
1477
+ return (status_code, None)
788
1478
 
789
- if not data_result:
790
- return (status, None)
1479
+ return (status_code, data_result)
791
1480
 
792
- if self.logger.level <= logging.DEBUG:
793
- self.logger.debug('data_result = %s',
794
- json.dumps(data_result, indent=4,
795
- sort_keys=True))
796
1481
 
797
- if progress_callback is not None:
798
- if progress_callback(0.3, 'Beginning download of data.',
799
- progress_user_value) != 0:
800
- return (status, None)
1482
+ def download(
1483
+ self,
1484
+ url: str,
1485
+ size: int = 0,
1486
+ **keywords
1487
+ ) -> str:
1488
+ """
1489
+ Downloads the file specified by the given URL to a temporary
1490
+ file without reading all of it into memory. This method
1491
+ utilizes the connection pool and persistent HTTP connection
1492
+ to the CdasWs server.
801
1493
 
802
- file_descriptions = data_result['FileDescription']
1494
+ Parameters
1495
+ ----------
1496
+ url
1497
+ URL of file to download.
1498
+ size
1499
+ number of bytes in file to download.
1500
+ keywords
1501
+ optional keyword parameters as follows:<br>
1502
+ <b>progressCallback</b> - is a
1503
+ typing.Callable[[float, str, typing.Any], int]
1504
+ function that is called repeatedly to report the progress
1505
+ of getting the data. The function should return 0 if it
1506
+ wants to continue getting data. If it returns a non-0 value,
1507
+ getting the data will be aborted and this download() function
1508
+ will immediately return None. The float parameter
1509
+ is a value between 0.0 and 1.0 to indicate progress and
1510
+ the str parameter will contain a text message indicating
1511
+ the progress of this call.<br>
1512
+ <b>progressUserValue</b> - is a typing.Any value that is
1513
+ passsed to the progressCallback function.<br>
1514
+ Returns
1515
+ -------
1516
+ str
1517
+ name of tempory file or None if there was an error.
1518
+ """
1519
+ # pylint: disable=too-many-locals
803
1520
 
804
- data_url = file_descriptions[0]['Name']
805
- data_length = file_descriptions[0]['Length']
1521
+ progress_callback = keywords.get('progressCallback', None)
1522
+ progress_user_value = keywords.get('progressUserValue', None)
806
1523
 
807
- self.logger.debug('data_url = %s, data_length = %d',
808
- data_url, data_length)
1524
+ suffix = os.path.splitext(urlparse(url).path)[1]
809
1525
 
810
- file_descriptor, tmp_filename = mkstemp(suffix='.cdf')
1526
+ file_descriptor, tmp_filename = mkstemp(suffix=suffix)
811
1527
 
812
1528
  download_bytes = 0
813
1529
  next_progress_report = 0.1
814
- with self._session.get(data_url, stream=True,
1530
+ with self._session.get(url, stream=True,
815
1531
  timeout=self._timeout) as response:
816
1532
 
817
1533
  file = open(tmp_filename, 'wb')
@@ -821,33 +1537,831 @@ class CdasWs:
821
1537
  # file.flush()
822
1538
  if progress_callback is not None:
823
1539
  download_bytes += len(chunk)
824
- download_progress = float(download_bytes) / data_length
1540
+ if size == 0:
1541
+ download_progress = 0.0
1542
+ else:
1543
+ download_progress = float(download_bytes) / size
825
1544
  if download_progress > next_progress_report:
826
1545
  next_progress_report += download_progress
827
- if progress_callback(0.3 + 0.1 * download_progress,
828
- 'Continuing download of data.',
1546
+ if progress_callback(download_progress,\
1547
+ 'Continuing download of data.',
829
1548
  progress_user_value) != 0:
1549
+
830
1550
  file.close()
831
1551
  os.close(file_descriptor)
832
- return (status, None)
1552
+ return None
833
1553
  file.close()
834
1554
  os.close(file_descriptor)
835
1555
 
836
1556
  if progress_callback is not None:
837
- if progress_callback(0.4, 'Data download complete. Reading data.',
1557
+ if progress_callback(0.4,
1558
+ 'Data download complete. Reading data.',
838
1559
  progress_user_value) != 0:
839
- return (status, None)
1560
+ return None
1561
+
1562
+ return tmp_filename
1563
+
1564
+
1565
+ @staticmethod
1566
+ def read_data(
1567
+ filename: str,
1568
+ data_representation: DataRepresentation
1569
+ ) -> Union['spacepy.datamodel', 'xr.Dataset']:
1570
+ """
1571
+ Reads the data from the given file.
1572
+
1573
+ Parameters
1574
+ ----------
1575
+ filename
1576
+ Name of file to read.
1577
+ data_representation
1578
+ Requested data representation.
1579
+ Returns
1580
+ -------
1581
+ spacepy.datamodel or xr.Dataset
1582
+ Data from file.
1583
+ Raises
1584
+ ------
1585
+ Exception
1586
+ If an Exception is raise by either the spdm.fromCDF() or
1587
+ cdflib.cdf_to_xarray() functions.
1588
+ ModuleNotFoundError
1589
+ If the required spacepy.datamodel or the cdflib and xarray
1590
+ modules are not installed.
1591
+ """
1592
+ if data_representation is None:
1593
+ if SPDM_AVAILABLE:
1594
+ return spdm.fromCDF(filename)
1595
+ if CDF_XARRAY_AVAILABLE:
1596
+ return cdf_to_xarray(filename, to_datetime=True,
1597
+ fillval_to_nan=True)
1598
+ raise ModuleNotFoundError(
1599
+ 'neither the spacepy.datamodel nor the cdflib and '
1600
+ 'xarray modules are installed')
1601
+
1602
+ if data_representation is DataRepresentation.SPACEPY and \
1603
+ not SPDM_AVAILABLE:
1604
+ raise ModuleNotFoundError('spacepy module must be installed')
1605
+ if data_representation is DataRepresentation.XARRAY and \
1606
+ not CDF_XARRAY_AVAILABLE:
1607
+ raise ModuleNotFoundError('cdflib and xarray modules must be installed')
1608
+
1609
+ if data_representation is DataRepresentation.SPACEPY:
1610
+ return spdm.fromCDF(filename)
1611
+ if data_representation is DataRepresentation.XARRAY:
1612
+ return cdf_to_xarray(filename, to_datetime=True,
1613
+ fillval_to_nan=True)
1614
+ return None
1615
+
1616
+
1617
+ def get_data(
1618
+ self,
1619
+ dataset: str,
1620
+ variables: List[str],
1621
+ time0: Union[TimeInterval, List[TimeInterval], datetime, str],
1622
+ time1: Union[datetime, str] = None,
1623
+ **keywords: Union[
1624
+ Dict,
1625
+ DataRepresentation,
1626
+ Callable[[float, str, Any], int],
1627
+ Any]
1628
+ ) -> Tuple[Dict, 'spdm.SpaceData', 'xarray']:
1629
+ """
1630
+ Gets the specified data from the server. The representation
1631
+ of the returned data is determined as follows:<br>
1632
+ 1. If a dataRepresentation keyword parameter is given, its
1633
+ value will determine the representation of the returned
1634
+ data. If no dataRepresenation keyword parameter is
1635
+ given, then<br>
1636
+ 2. If the presence of spacepy.datamodel is found, then the data
1637
+ is returned in the spacepy.datamodel representation.<br>
1638
+ 3. If the presence of the cdflib and xarray modules are found,
1639
+ then the data is returned in an xarray.Dataset.
1640
+
1641
+ Parameters
1642
+ ----------
1643
+ dataset
1644
+ dataset identifier of data to get.
1645
+ variables
1646
+ array containing names of variables to get. The value
1647
+ ALL-VARIABLES may be used instead of specifying all the
1648
+ individual variable names.
1649
+ time0
1650
+ TimeInterval(s) or start time of data to get. See module
1651
+ note about timezone.
1652
+ time1
1653
+ when time0 is not one or more TimeInterval(s), the end time
1654
+ of data to get. See module note about timezone.
1655
+ keywords
1656
+ optional keyword parameters as follows:<br>
1657
+ <b>binData</b> - indicates that uniformly spaced values should
1658
+ be computed for scaler/vector/spectrogram data according to
1659
+ the given binning parameter values. See
1660
+ <https://cdaweb.gsfc.nasa.gov/CDAWeb_Binning_readme.html>
1661
+ for more details. binData is a Dict that
1662
+ may contain the following keys: interval,
1663
+ interpolateMissingValues, sigmaMultiplier, and/or
1664
+ overrideDefaultBinning with values that override the
1665
+ defaults.<br>
1666
+ <b>dataRepresentation</b> - specifies the representation of
1667
+ the returned data as one of
1668
+ `datarepresentation.DataRepresentation`.<br>
1669
+ <b>progressCallback</b> - is a
1670
+ Callable[[float, str, typing.Any], int]
1671
+ function that is called repeatedly to report the progress
1672
+ of getting the data. The function should return 0 if it
1673
+ wants to continue getting data. If it returns non-0 value,
1674
+ getting the data will be aborted and the get_data() function
1675
+ will immediately return (204, None). The float parameter
1676
+ is a value between 0.0 and 1.0 to indicate progress and
1677
+ the str parameter will contain a text message indicating
1678
+ the progress of this call.<br>
1679
+ <b>progressUserValue</b> - is an Any value that is passsed
1680
+ to the progressCallback function.<br>
1681
+ Returns
1682
+ -------
1683
+ Tuple
1684
+ [0] contains a dictionary of HTTP and CDAS status information.
1685
+ When successful, ['http']['status_code'] will be 200.<br>
1686
+ [1] contains the requested data (SpaceData or xarray.Dataset
1687
+ object) or None.
1688
+ Raises
1689
+ ------
1690
+ ValueError
1691
+ If no variables are given or if the given start/end datetime
1692
+ values are invalid.
1693
+ """
1694
+ # pylint: disable=too-many-locals
1695
+ # pylint: disable=too-many-return-statements
1696
+ # pylint: disable=too-many-statements
1697
+ # pylint: disable=too-many-branches
1698
+ # pylint: disable=import-outside-toplevel
1699
+
1700
+ #import spacepy.datamodel as spdm # type: ignore
1701
+
1702
+ if len(variables) < 1:
1703
+ raise ValueError('at least one variable name is required')
1704
+
1705
+ if isinstance(time0, (str, datetime)):
1706
+ if isinstance(time1, (str, datetime)):
1707
+ time_intervals = [TimeInterval(time0, time1)]
1708
+ else:
1709
+ raise ValueError('time1 must be str/datetime')
1710
+ elif isinstance(time0, TimeInterval):
1711
+ time_intervals = [time0]
1712
+ elif isinstance(time0, list) and len(time0) > 0 and\
1713
+ isinstance(time0[0], TimeInterval):
1714
+ time_intervals = time0
1715
+ else:
1716
+ raise ValueError('invalid time0 type')
1717
+
1718
+ data_request = CdfRequest(dataset, variables,
1719
+ time_intervals,
1720
+ 3, CdfFormat.BINARY,
1721
+ binData=keywords.get('binData', {}))
1722
+
1723
+ data_rep = keywords.get('dataRepresentation', None)
1724
+ progress_callback = keywords.get('progressCallback', None)
1725
+ progress_user_value = keywords.get('progressUserValue', None)
1726
+
1727
+ self.logger.debug('data_request = %s', data_request)
1728
+
1729
+ status = {
1730
+ 'http': {
1731
+ 'status_code': 204
1732
+ },
1733
+ 'cdas': {
1734
+ 'status': [],
1735
+ 'message': [],
1736
+ 'warning': [],
1737
+ 'error': []
1738
+ }
1739
+ }
840
1740
 
841
- data = spdm.fromCDF(tmp_filename)
842
1741
  if progress_callback is not None:
843
- if progress_callback(1.0, 'Finished reading data.',
1742
+ if progress_callback(0.1, 'Making initial server request.',
844
1743
  progress_user_value) != 0:
845
1744
  return (status, None)
846
- os.remove(tmp_filename)
1745
+
1746
+ status_code, data_result = self.get_data_result(data_request,
1747
+ progress_callback,
1748
+ progress_user_value)
1749
+
1750
+ status['http']['status_code'] = status_code
1751
+
1752
+ if progress_callback is not None:
1753
+ if progress_callback(0.3, 'Initial server request complete.',
1754
+ progress_user_value) != 0:
1755
+ return (status, None)
1756
+
1757
+ if status_code != 200:
1758
+
1759
+ self.logger.info('get_data_result failed with http code %d',
1760
+ status_code)
1761
+ self.logger.info('data_request = %s', data_request)
1762
+ return (status, None)
1763
+
1764
+ if not data_result:
1765
+ return (status, None)
1766
+
1767
+ if 'Status' in data_result:
1768
+ status['cdas']['status'] = data_result['Status']
1769
+ if 'Message' in data_result:
1770
+ status['cdas']['message'] = data_result['Message']
1771
+ if 'Warning' in data_result:
1772
+ status['cdas']['warning'] = data_result['Warning']
1773
+ if 'Error' in data_result:
1774
+ status['cdas']['error'] = data_result['Error']
1775
+
1776
+ if progress_callback is not None:
1777
+ if progress_callback(0.4, 'Beginning download of data.',
1778
+ progress_user_value) != 0:
1779
+ return (status, None)
1780
+
1781
+ file_descriptions = data_result['FileDescription']
1782
+
1783
+ data_url = file_descriptions[0]['Name']
1784
+ data_length = file_descriptions[0]['Length']
1785
+
1786
+ self.logger.debug('data_url = %s, data_length = %d',
1787
+ data_url, data_length)
1788
+
1789
+ sub_progress_control = {
1790
+ 'progressCallback': progress_callback,
1791
+ 'progressUserValue': progress_user_value,
1792
+ 'progressStart': 0.4,
1793
+ 'progressFraction': 0.1
1794
+ }
1795
+
1796
+ tmp_filename = self.download(data_url, data_length,
1797
+ progressCallback=_get_data_progress,
1798
+ progressUserValue=sub_progress_control)
1799
+
1800
+ try:
1801
+ data = self.read_data(tmp_filename, data_rep)
1802
+ os.remove(tmp_filename)
1803
+ if progress_callback is not None:
1804
+ if progress_callback(1.0, 'Finished reading data.',
1805
+ progress_user_value) != 0:
1806
+ return (status, None)
1807
+ except:
1808
+ self.logger.error('Exception from read_data(%s): %s, %s',
1809
+ tmp_filename, sys.exc_info()[0],
1810
+ sys.exc_info()[1])
1811
+ self.logger.error('CDF file has been retained.')
1812
+ raise
847
1813
  return (status, data)
848
- # pylint: enable=too-many-locals
849
- # pylint: enable=too-many-return-statements
850
- # pylint: enable=too-many-statements
851
- # pylint: enable=too-many-branches
852
1814
 
853
- # pylint: enable=too-many-instance-attributes
1815
+
1816
+ # pylint: disable=too-many-arguments
1817
+ def get_graph(
1818
+ self,
1819
+ dataset: str,
1820
+ variables: List[str],
1821
+ start: Union[datetime, str],
1822
+ end: Union[datetime, str],
1823
+ options: GraphOptions = None,
1824
+ image_format: List[ImageFormat] = None,
1825
+ **keywords
1826
+ ) -> Tuple[int, Dict]:
1827
+ """
1828
+ Gets a graphical representation of the specified data from the
1829
+ server.
1830
+
1831
+ Parameters
1832
+ ----------
1833
+ dataset
1834
+ dataset identifier of data to get.
1835
+ variables
1836
+ array containing names of variables to get.
1837
+ start
1838
+ start time of data to get. See module note about timezone.
1839
+ end
1840
+ end time of data to get. See module note about timezone.
1841
+ options
1842
+ graph options.
1843
+ image_format
1844
+ image format. If None, then [ImageFormat.PNG].
1845
+ keywords
1846
+ optional keyword parameters as follows:<br>
1847
+ <b>binData</b> - indicates that uniformly spaced values should
1848
+ be computed for scaler/vector/spectrogram data according to
1849
+ the given binning parameter values. binData is a Dict that
1850
+ may contain the following keys: interval,
1851
+ interpolateMissingValues, sigmaMultiplier, and/or
1852
+ overrideDefaultBinning with values that override the
1853
+ defaults.<br>
1854
+ <b>progressCallback</b> - is a
1855
+ typing.Callable[[float, str, typing.Any], int]
1856
+ function that is called repeatedly to report the progress
1857
+ of getting the data. The function should return 0 if it
1858
+ wants to continue getting data. If it returns non-0 value,
1859
+ getting the data will be aborted and the get_data() function
1860
+ will immediately return (204, None). The float parameter
1861
+ is a value between 0.0 and 1.0 to indicate progress and
1862
+ the str parameter will contain a text message indicating
1863
+ the progress of this call.<br>
1864
+ <b>progressUserValue</b> - is a typing.Any value that is
1865
+ passsed to the progressCallback function.<br>
1866
+ Returns
1867
+ -------
1868
+ Tuple
1869
+ [0] contains the HTTP status code value (200 when successful).<br>
1870
+ [1] contains a dictionary representation of a
1871
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
1872
+ DataResult object or None.<br>
1873
+ Raises
1874
+ ------
1875
+ ValueError
1876
+ If the given start/end datetime values are invalid.
1877
+ """
1878
+ # pylint: disable=too-many-locals
1879
+ # pylint: disable=too-many-return-statements
1880
+ # pylint: enable=too-many-statements
1881
+ # pylint: disable=too-many-branches
1882
+
1883
+ start_datetime, end_datetime = TimeInterval.get_datetimes(start,
1884
+ end)
1885
+
1886
+ request = GraphRequest(dataset, variables,
1887
+ TimeInterval(start_datetime, end_datetime),
1888
+ options, image_format,
1889
+ **keywords)
1890
+
1891
+ progress_callback = keywords.get('progressCallback', None)
1892
+ progress_user_value = keywords.get('progressUserValue', None)
1893
+
1894
+ self.logger.debug('request = %s', request)
1895
+
1896
+ if progress_callback is not None:
1897
+ if progress_callback(0.1, 'Making server request.',
1898
+ progress_user_value) != 0:
1899
+ return (204, None)
1900
+
1901
+ status_code, result = self.get_data_result(request, progress_callback, progress_user_value)
1902
+
1903
+ if progress_callback is not None:
1904
+ if progress_callback(1.0, 'Server request complete.',
1905
+ progress_user_value) != 0:
1906
+ return (status_code, None)
1907
+
1908
+ if status_code != 200:
1909
+
1910
+ self.logger.info('get_result failed with http code %d',
1911
+ status_code)
1912
+ self.logger.info('request = %s', request)
1913
+ return (status_code, None)
1914
+
1915
+ return (status_code, result)
1916
+ # pylint: enable=too-many-arguments
1917
+
1918
+
1919
+ # pylint: disable=too-many-arguments
1920
+ def get_thumbnail(
1921
+ self,
1922
+ dataset: str,
1923
+ variables: List[str],
1924
+ start: Union[datetime, str],
1925
+ end: Union[datetime, str],
1926
+ identifier: str,
1927
+ thumbnail: int = 1,
1928
+ **keywords
1929
+ ) -> Tuple[int, Dict]:
1930
+ """
1931
+ Gets a graphical representation of the specified data from the
1932
+ server.
1933
+
1934
+ Parameters
1935
+ ----------
1936
+ dataset
1937
+ dataset identifier of data to get.
1938
+ variables
1939
+ array containing names of variables to get.
1940
+ start
1941
+ start time of data to get. See module note about timezone.
1942
+ end
1943
+ end time of data to get. See module note about timezone.
1944
+ identifier
1945
+ thumbnail identifier (returned in a previous get_graph
1946
+ result).
1947
+ thumbnail
1948
+ number of thumbnail whose full size image is being requested.
1949
+ Thumbnail images are counted beginning at one (not zero).
1950
+ keywords
1951
+ optional keyword parameters as follows:<br>
1952
+ <b>progressCallback</b> - is a
1953
+ typing.Callable[[float, str, typing.Any], int]
1954
+ function that is called repeatedly to report the progress
1955
+ of getting the data. The function should return 0 if it
1956
+ wants to continue getting data. If it returns non-0 value,
1957
+ getting the data will be aborted and the get_data() function
1958
+ will immediately return (204, None). The float parameter
1959
+ is a value between 0.0 and 1.0 to indicate progress and
1960
+ the str parameter will contain a text message indicating
1961
+ the progress of this call.<br>
1962
+ <b>progressUserValue</b> - is a typing.Any value that is
1963
+ passsed to the progressCallback function.<br>
1964
+ Returns
1965
+ -------
1966
+ Tuple
1967
+ [0] contains the HTTP status code value (200 when successful).<br>
1968
+ [1] contains a dictionary representation of a
1969
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
1970
+ DataResult object or None.<br>
1971
+ Raises
1972
+ ------
1973
+ ValueError
1974
+ If the given start/end datetime values are invalid.
1975
+ """
1976
+ # pylint: disable=too-many-locals
1977
+ # pylint: disable=too-many-return-statements
1978
+ # pylint: enable=too-many-statements
1979
+ # pylint: disable=too-many-branches
1980
+
1981
+ start_datetime, end_datetime = TimeInterval.get_datetimes(start,
1982
+ end)
1983
+
1984
+ request = ThumbnailRequest(dataset, variables,
1985
+ TimeInterval(start_datetime, end_datetime),
1986
+ identifier, thumbnail)
1987
+
1988
+ progress_callback = keywords.get('progressCallback', None)
1989
+ progress_user_value = keywords.get('progressUserValue', None)
1990
+
1991
+ self.logger.debug('request = %s', request)
1992
+
1993
+ if progress_callback is not None:
1994
+ if progress_callback(0.1, 'Making server request.',
1995
+ progress_user_value) != 0:
1996
+ return (204, None)
1997
+
1998
+ status_code, result = self.get_data_result(request,
1999
+ progress_callback,
2000
+ progress_user_value)
2001
+
2002
+ if progress_callback is not None:
2003
+ if progress_callback(1.0, 'Server request complete.',
2004
+ progress_user_value) != 0:
2005
+ return (status_code, None)
2006
+
2007
+ if status_code != 200:
2008
+
2009
+ self.logger.info('get_result failed with http code %d',
2010
+ status_code)
2011
+ self.logger.info('request = %s', request)
2012
+ return (status_code, None)
2013
+
2014
+ return (status_code, result)
2015
+ # pylint: enable=too-many-arguments
2016
+
2017
+
2018
+ # pylint: disable=too-many-arguments
2019
+ def get_text(
2020
+ self,
2021
+ dataset: str,
2022
+ variables: List[str],
2023
+ start: Union[datetime, str],
2024
+ end: Union[datetime, str],
2025
+ compression: Compression = Compression.UNCOMPRESSED,
2026
+ text_format: TextFormat = TextFormat.PLAIN,
2027
+ **keywords
2028
+ ) -> Tuple[int, Dict]:
2029
+ """
2030
+ Gets a textual representation of the specified data from the
2031
+ server.
2032
+
2033
+ Parameters
2034
+ ----------
2035
+ dataset
2036
+ dataset identifier of data to get.
2037
+ variables
2038
+ array containing names of variables to get.
2039
+ start
2040
+ start time of data to get. See module note about timezone.
2041
+ end
2042
+ end time of data to get. See module note about timezone.
2043
+ compression
2044
+ file compression.
2045
+ text_format
2046
+ text format.
2047
+ keywords
2048
+ optional keyword parameters as follows:<br>
2049
+ <b>binData</b> - indicates that uniformly spaced values should
2050
+ be computed for scaler/vector/spectrogram data according to
2051
+ the given binning parameter values. binData is a Dict that
2052
+ may contain the following keys: interval,
2053
+ interpolateMissingValues, sigmaMultiplier, and/or
2054
+ overrideDefaultBinning with values that override the
2055
+ defaults.<br>
2056
+ <b>progressCallback</b> - is a
2057
+ typing.Callable[[float, str, typing.Any], int]
2058
+ function that is called repeatedly to report the progress
2059
+ of getting the data. The function should return 0 if it
2060
+ wants to continue getting data. If it returns non-0 value,
2061
+ getting the data will be aborted and the get_data() function
2062
+ will immediately return (204, None). The float parameter
2063
+ is a value between 0.0 and 1.0 to indicate progress and
2064
+ the str parameter will contain a text message indicating
2065
+ the progress of this call.<br>
2066
+ <b>progressUserValue</b> - is a typing.Any value that is
2067
+ passsed to the progressCallback function.<br>
2068
+ Returns
2069
+ -------
2070
+ Tuple
2071
+ [0] contains the HTTP status code value (200 when successful).<br>
2072
+ [1] contains a dictionary representation of a
2073
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
2074
+ DataResult object or None.<br>
2075
+ Raises
2076
+ ------
2077
+ ValueError
2078
+ If the given start/end datetime values are invalid.
2079
+ """
2080
+ # pylint: disable=too-many-locals
2081
+ # pylint: disable=too-many-return-statements
2082
+ # pylint: enable=too-many-statements
2083
+ # pylint: disable=too-many-branches
2084
+
2085
+ start_datetime, end_datetime = TimeInterval.get_datetimes(start,
2086
+ end)
2087
+
2088
+ request = TextRequest(dataset, variables,
2089
+ TimeInterval(start_datetime, end_datetime),
2090
+ compression, text_format,
2091
+ **keywords)
2092
+
2093
+ progress_callback = keywords.get('progressCallback', None)
2094
+ progress_user_value = keywords.get('progressUserValue', None)
2095
+
2096
+ self.logger.debug('request = %s', request)
2097
+
2098
+ if progress_callback is not None:
2099
+ if progress_callback(0.1, 'Making server request.',
2100
+ progress_user_value) != 0:
2101
+ return (204, None)
2102
+
2103
+ status_code, result = self.get_data_result(request,
2104
+ progress_callback,
2105
+ progress_user_value)
2106
+
2107
+ if progress_callback is not None:
2108
+ if progress_callback(1.0, 'Server request complete.',
2109
+ progress_user_value) != 0:
2110
+ return (status_code, None)
2111
+
2112
+ if status_code != 200:
2113
+
2114
+ self.logger.info('get_result failed with http code %d',
2115
+ status_code)
2116
+ self.logger.info('request = %s', request)
2117
+ return (status_code, None)
2118
+
2119
+ return (status_code, result)
2120
+ # pylint: enable=too-many-arguments
2121
+
2122
+
2123
+ def get_audio(
2124
+ self,
2125
+ dataset: str,
2126
+ variables: List[str],
2127
+ start: Union[datetime, str],
2128
+ end: Union[datetime, str],
2129
+ **keywords
2130
+ ) -> Tuple[int, Dict]:
2131
+ """
2132
+ Gets an audio representation of the specified data from the
2133
+ server.
2134
+
2135
+ Parameters
2136
+ ----------
2137
+ dataset
2138
+ dataset identifier of data to get.
2139
+ variables
2140
+ array containing names of variables to get.
2141
+ start
2142
+ start time of data to get. See module note about timezone.
2143
+ end
2144
+ end time of data to get. See module note about timezone.
2145
+ keywords
2146
+ optional keyword parameters as follows:<br>
2147
+ <b>binData</b> - indicates that uniformly spaced values should
2148
+ be computed for scaler/vector/spectrogram data according to
2149
+ the given binning parameter values. binData is a Dict that
2150
+ may contain the following keys: interval,
2151
+ interpolateMissingValues, sigmaMultiplier, and/or
2152
+ overrideDefaultBinning with values that override the
2153
+ defaults.<br>
2154
+ <b>progressCallback</b> - is a
2155
+ typing.Callable[[float, str, typing.Any], int]
2156
+ function that is called repeatedly to report the progress
2157
+ of getting the data. The function should return 0 if it
2158
+ wants to continue getting data. If it returns non-0 value,
2159
+ getting the data will be aborted and the get_data() function
2160
+ will immediately return (204, None). The float parameter
2161
+ is a value between 0.0 and 1.0 to indicate progress and
2162
+ the str parameter will contain a text message indicating
2163
+ the progress of this call.<br>
2164
+ <b>progressUserValue</b> - is a typing.Any value that is
2165
+ passsed to the progressCallback function.<br>
2166
+ Returns
2167
+ -------
2168
+ Tuple
2169
+ [0] contains the HTTP status code value (200 when successful).<br>
2170
+ [1] contains a dictionary representation of a
2171
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
2172
+ DataResult object or None.<br>
2173
+ Raises
2174
+ ------
2175
+ ValueError
2176
+ If the given start/end datetime values are invalid.
2177
+ """
2178
+ # pylint: disable=too-many-locals
2179
+ # pylint: disable=too-many-return-statements
2180
+ # pylint: enable=too-many-statements
2181
+ # pylint: disable=too-many-branches
2182
+
2183
+ start_datetime, end_datetime = TimeInterval.get_datetimes(start,
2184
+ end)
2185
+
2186
+ request = AudioRequest(dataset, variables,
2187
+ TimeInterval(start_datetime, end_datetime),
2188
+ **keywords)
2189
+
2190
+ progress_callback = keywords.get('progressCallback', None)
2191
+ progress_user_value = keywords.get('progressUserValue', None)
2192
+
2193
+ self.logger.debug('request = %s', request)
2194
+
2195
+ if progress_callback is not None:
2196
+ if progress_callback(0.1, 'Making server request.',
2197
+ progress_user_value) != 0:
2198
+ return (204, None)
2199
+
2200
+ status_code, result = self.get_data_result(request,
2201
+ progress_callback,
2202
+ progress_user_value)
2203
+
2204
+ if progress_callback is not None:
2205
+ if progress_callback(1.0, 'Server request complete.',
2206
+ progress_user_value) != 0:
2207
+ return (status_code, None)
2208
+
2209
+ if status_code != 200:
2210
+
2211
+ self.logger.info('get_result failed with http code %d',
2212
+ status_code)
2213
+ self.logger.info('request = %s', request)
2214
+ return (status_code, None)
2215
+
2216
+ return (status_code, result)
2217
+
2218
+
2219
+ def get_original_files(
2220
+ self,
2221
+ dataset: str,
2222
+ start: Union[datetime, str],
2223
+ end: Union[datetime, str],
2224
+ **keywords
2225
+ ) -> Tuple[int, Dict]:
2226
+ """
2227
+ Gets original data files from a dataset. Original data files
2228
+ lack updated meta-data and virtual variable values contained
2229
+ in files obtained from the `CdasWs.get_data`. Most callers
2230
+ should probably use `CdasWs.get_data` instead of this function.
2231
+
2232
+ Parameters
2233
+ ----------
2234
+ dataset
2235
+ dataset identifier of data to get.
2236
+ start
2237
+ start time of data to get. See module note about timezone.
2238
+ end
2239
+ end time of data to get. See module note about timezone.
2240
+ keywords
2241
+ optional keyword parameters as follows:<br>
2242
+ <b>progressCallback</b> - is a
2243
+ typing.Callable[[float, str, typing.Any], int]
2244
+ function that is called repeatedly to report the progress
2245
+ of getting the data. The function should return 0 if it
2246
+ wants to continue getting data. If it returns non-0 value,
2247
+ getting the data will be aborted and the get_data() function
2248
+ will immediately return (204, None). The float parameter
2249
+ is a value between 0.0 and 1.0 to indicate progress and
2250
+ the str parameter will contain a text message indicating
2251
+ the progress of this call.<br>
2252
+ <b>progressUserValue</b> - is a typing.Any value that is
2253
+ passsed to the progressCallback function.<br>
2254
+ Returns
2255
+ -------
2256
+ Tuple
2257
+ [0] contains the HTTP status code value (200 when successful).<br>
2258
+ [1] array of dictionary representations of a
2259
+ <https://cdaweb.gsfc.nasa.gov/WebServices/REST/CDAS.xsd>
2260
+ FileDescription objects or None.<br>
2261
+ Raises
2262
+ ------
2263
+ ValueError
2264
+ If the given start/end datetime values are invalid.
2265
+ See Also
2266
+ --------
2267
+ CdasWs.get_data
2268
+ """
2269
+ # pylint: disable=too-many-locals
2270
+ # pylint: disable=too-many-return-statements
2271
+ # pylint: enable=too-many-statements
2272
+ # pylint: disable=too-many-branches
2273
+
2274
+ start_datetime, end_datetime = TimeInterval.get_datetimes(start,
2275
+ end)
2276
+
2277
+ request = CdfRequest(dataset, [],
2278
+ TimeInterval(start_datetime, end_datetime))
2279
+
2280
+ progress_callback = keywords.get('progressCallback', None)
2281
+ progress_user_value = keywords.get('progressUserValue', None)
2282
+
2283
+ self.logger.debug('request = %s', request)
2284
+
2285
+ if progress_callback is not None:
2286
+ if progress_callback(0.1, 'Making server request.',
2287
+ progress_user_value) != 0:
2288
+ return (204, None)
2289
+
2290
+ status_code, result = self.get_data_result(request,
2291
+ progress_callback,
2292
+ progress_user_value)
2293
+
2294
+ if progress_callback is not None:
2295
+ if progress_callback(1.0, 'Server request complete.',
2296
+ progress_user_value) != 0:
2297
+ return (status_code, None)
2298
+
2299
+ if status_code != 200:
2300
+
2301
+ self.logger.info('get_result failed with http code %d',
2302
+ status_code)
2303
+ self.logger.info('request = %s', request)
2304
+ return (status_code, None)
2305
+
2306
+ return (status_code, result['FileDescription'])
2307
+
2308
+
2309
+ def get_ssc_id(
2310
+ self,
2311
+ dataset: str
2312
+ ) -> Tuple[int, Union[str, List[str]]]:
2313
+ """
2314
+ Gets the Satellite Situation Center (SSC)
2315
+ <https://sscweb.gsfc.nasa.gov/> observatory identifier(s)
2316
+ associated with the given cdaweb dataset identifier.
2317
+
2318
+ Notes
2319
+ -----
2320
+ This method relies upon the Heliophysics Data Portal's
2321
+ <https://heliophysicsdata.gsfc.nasa.gov/> metadata. That metadata
2322
+ may be incomplete. Also, cdaweb has datasets for which SSC has
2323
+ no corresponding observatory (for example, ground observatory
2324
+ data). Callers should be prepared for negative results (200, None)
2325
+ from this method.
2326
+
2327
+ Parameters
2328
+ ----------
2329
+ dataset
2330
+ cdaweb dataset identifier.
2331
+ Returns
2332
+ -------
2333
+ Tuple
2334
+ [0] contains the HTTP status code value (200 when successful).<br>
2335
+ [1] the SSC observatory identifier(s) associated with the given
2336
+ cdaweb dataset identifier or None if none is found.
2337
+ """
2338
+ url = self._hdp_registry + '?cdawebId=' + dataset
2339
+
2340
+ self.logger.debug('request url = %s', url)
2341
+
2342
+ response = self._session.get(url, timeout=self._timeout)
2343
+
2344
+ if response.status_code != 200:
2345
+
2346
+ self.logger.info('%s failed with http code %d', url,
2347
+ response.status_code)
2348
+ self.logger.info('response.text: %s', response.text)
2349
+ return (response.status_code, None)
2350
+
2351
+ if self.logger.level <= logging.DEBUG:
2352
+ self.logger.debug('response.text = %s', response.text)
2353
+
2354
+ results = ET.fromstring(response.text)
2355
+
2356
+ ssc_id = []
2357
+ for ssc_id_elem in results.findall('SscId'):
2358
+ ssc_id.append(ssc_id_elem.text)
2359
+
2360
+ if len(ssc_id) == 0:
2361
+ result = None
2362
+ elif len(ssc_id) == 1:
2363
+ result = ssc_id[0]
2364
+ else:
2365
+ result = ssc_id
2366
+
2367
+ return (response.status_code, result)