hdpws 0.6.25__tar.gz → 0.6.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: hdpws
3
- Version: 0.6.25
3
+ Version: 0.6.26
4
4
  Summary: NASA's Heliophysics Data Portal Web Service Client Library
5
5
  Home-page: https://heliophysicsdata.gsfc.nasa.gov/WebServices
6
6
  Author: Bernie Harris
@@ -32,6 +32,7 @@ Dynamic: description-content-type
32
32
  Dynamic: home-page
33
33
  Dynamic: keywords
34
34
  Dynamic: license
35
+ Dynamic: license-file
35
36
  Dynamic: requires-dist
36
37
  Dynamic: summary
37
38
 
@@ -41,7 +41,7 @@ the United States under Title 17, U.S.Code. All Other Rights Reserved.
41
41
  """
42
42
 
43
43
 
44
- __version__ = "0.6.25"
44
+ __version__ = "0.6.26"
45
45
 
46
46
 
47
47
  #
@@ -47,6 +47,7 @@ from dateutil import parser
47
47
  from hdpws import __version__, NAMESPACES as NS
48
48
  from hdpws.dateinterval import DateInterval
49
49
  from hdpws.resourcetype import ResourceType
50
+ from hdpws.spasehtmlparser import SpaseHtmlParser
50
51
 
51
52
 
52
53
  #
@@ -594,7 +595,7 @@ class HdpWs:
594
595
  resource_id: str,
595
596
  **keywords: Union[
596
597
  datetime]
597
- ) -> str:
598
+ ) -> Dict:
598
599
  """
599
600
  Gets the an HTML representation of the specified SPASE document
600
601
  from HDP.
@@ -658,6 +659,58 @@ class HdpWs:
658
659
  return result
659
660
 
660
661
 
662
+ def get_spase_json_ld(
663
+ self,
664
+ resource_id: str,
665
+ **keywords: Union[
666
+ datetime]
667
+ ) -> Dict:
668
+ """
669
+ Gets the an JSON-LD representation of the specified SPASE document
670
+ from HDP.
671
+
672
+ Parameters
673
+ ----------
674
+ resource_id
675
+ SPASE ResourceID value of the document to get.
676
+ keywords
677
+ Optional keyword paramaters as follows:<br>
678
+ <b>if_modified_since</b> - conditional GET If-Modified-Since
679
+ datetime value.<br>
680
+
681
+ Returns
682
+ -------
683
+ Dict
684
+ Dictionary containing a 'Result' key whose value is the JSON-LD
685
+ representation of the specified SPASE document with the
686
+ addition of the following key/values:<br>
687
+ - HttpStatus: with the value of the HTTP status code.
688
+ Successful == 200.<br>
689
+ - Last-Modified: the value of the HTTP Last-Modified header
690
+ when available.<br>
691
+ When HttpStatus != 200:<br>
692
+ - HttpText: containing a string representation of the HTTP
693
+ entity body.<br>
694
+ When HttpText is a standard HDP WS error entity body the
695
+ following key/values (convenience to avoid parsing
696
+ HttpStatus):<br>
697
+ - ErrorMessage: value from HttpText.<br>
698
+ - ErrorDescription: value from HttpText.<br>
699
+ """
700
+
701
+ response = self.get_spase_html(resource_id, **keywords)
702
+
703
+ if response['HttpStatus'] != 200:
704
+ return response
705
+
706
+ spase_html_parser = SpaseHtmlParser()
707
+ spase_html_parser.feed(response['Result'])
708
+
709
+ response['Result'] = spase_html_parser.get_json_ld()
710
+
711
+ return response
712
+
713
+
661
714
  def get_spase_data(
662
715
  self,
663
716
  resource_types: List[ResourceType],
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env python3
2
+
3
+ #
4
+ # NOSA HEADER START
5
+ #
6
+ # The contents of this file are subject to the terms of the NASA Open
7
+ # Source Agreement (NOSA), Version 1.3 only (the "Agreement"). You may
8
+ # not use this file except in compliance with the Agreement.
9
+ #
10
+ # You can obtain a copy of the agreement at
11
+ # docs/NASA_Open_Source_Agreement_1.3.txt
12
+ # or
13
+ # https://cdaweb.gsfc.nasa.gov/WebServices/NASA_Open_Source_Agreement_1.3.txt.
14
+ #
15
+ # See the Agreement for the specific language governing permissions
16
+ # and limitations under the Agreement.
17
+ #
18
+ # When distributing Covered Code, include this NOSA HEADER in each
19
+ # file and include the Agreement file at
20
+ # docs/NASA_Open_Source_Agreement_1.3.txt. If applicable, add the
21
+ # following below this NOSA HEADER, with the fields enclosed by
22
+ # brackets "[]" replaced with your own identifying information:
23
+ # Portions Copyright [yyyy] [name of copyright owner]
24
+ #
25
+ # NOSA HEADER END
26
+ #
27
+ # Copyright (c) 2025 United States Government as represented by
28
+ # the National Aeronautics and Space Administration. No copyright is
29
+ # claimed in the United States under Title 17, U.S.Code. All Other
30
+ # Rights Reserved.
31
+ #
32
+
33
+
34
+ """
35
+ Module defining a class to represent a SPASE HTML parser. At present,
36
+ its main function is to extract the JSON-LD embedded in an HTML
37
+ representation of a SPASE XML document. Any other information should
38
+ be obtained from the original SPASE XML document.<br>
39
+
40
+ Copyright &copy; 2025 United States Government as represented by the
41
+ National Aeronautics and Space Administration. No copyright is claimed in
42
+ the United States under Title 17, U.S.Code. All Other Rights Reserved.
43
+ """
44
+
45
+
46
+ from html.parser import HTMLParser
47
+
48
+
49
+
50
+ class SpaseHtmlParser(HTMLParser):
51
+ """
52
+ A class representing a SPASE HTML parser.
53
+
54
+ Parameters
55
+ ----------
56
+ json_ld_element
57
+ Flag indicating that we have encountered the json-ld element.
58
+ The value is set back to false when we encounter the end tag.
59
+ json_ld
60
+ The json-ld "data" extracted from the json-ld element.
61
+ """
62
+ def __init__(self):
63
+
64
+ super().__init__()
65
+
66
+ self._json_ld_element = False
67
+ self._json_ld = None
68
+
69
+
70
+ def handle_starttag(self, tag, attrs):
71
+ """
72
+ This method is called to handle the start tag of an element.
73
+
74
+ Parameters
75
+ ----------
76
+ tag
77
+ The name of the tag encountered.
78
+ attrs
79
+ List of (name, value) pairs containing the attributes found
80
+ inside the tag’s &lt;&gt; brackets.
81
+ """
82
+
83
+ if tag == 'script':
84
+ for attr in attrs:
85
+ if attr[0] == 'type' and attr[1] == 'application/ld+json':
86
+ self._json_ld_element = True
87
+
88
+
89
+ def handle_endtag(self, tag):
90
+ """
91
+ This method is called to handle the end tag of an element.
92
+
93
+ Parameters
94
+ ----------
95
+ tag
96
+ The name of the tag encountered.
97
+ """
98
+
99
+ self._json_ld_element = False
100
+
101
+
102
+ def handle_data(self, data):
103
+ """
104
+ This method is called to process arbitrary data (e.g. text nodes
105
+ and the content of &lt;script&gt;...&lt;/script&gt; and
106
+ &lt;style&gt;...&lt;/style&gt;).
107
+
108
+ Parameters
109
+ ----------
110
+ data
111
+ The content of the element.
112
+ """
113
+
114
+ if self._json_ld_element:
115
+
116
+ self._json_ld = data
117
+
118
+
119
+ def get_json_ld(self) -> str:
120
+ """
121
+ Gets the JSON-LD from the SPASE HTML fed to this parser.
122
+
123
+ Returns:
124
+ --------
125
+ str
126
+ JSON-LD from the SPASE HTML or None.
127
+ """
128
+
129
+ return self._json_ld
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: hdpws
3
- Version: 0.6.25
3
+ Version: 0.6.26
4
4
  Summary: NASA's Heliophysics Data Portal Web Service Client Library
5
5
  Home-page: https://heliophysicsdata.gsfc.nasa.gov/WebServices
6
6
  Author: Bernie Harris
@@ -32,6 +32,7 @@ Dynamic: description-content-type
32
32
  Dynamic: home-page
33
33
  Dynamic: keywords
34
34
  Dynamic: license
35
+ Dynamic: license-file
35
36
  Dynamic: requires-dist
36
37
  Dynamic: summary
37
38
 
@@ -7,6 +7,7 @@ hdpws/dateinterval.py
7
7
  hdpws/hdpws.py
8
8
  hdpws/resourcetype.py
9
9
  hdpws/spase.py
10
+ hdpws/spasehtmlparser.py
10
11
  hdpws.egg-info/PKG-INFO
11
12
  hdpws.egg-info/SOURCES.txt
12
13
  hdpws.egg-info/dependency_links.txt
@@ -10,7 +10,7 @@ README = (HERE / "README.md").read_text()
10
10
  # This call to setup() does all the work
11
11
  setup(
12
12
  name="hdpws",
13
- version="0.6.25",
13
+ version="0.6.26",
14
14
  description="NASA's Heliophysics Data Portal Web Service Client Library",
15
15
  long_description=README,
16
16
  long_description_content_type="text/markdown",
@@ -41,6 +41,7 @@ the United States under Title 17, U.S.Code. All Other Rights Reserved.
41
41
  import unittest
42
42
  import datetime
43
43
  import xml.etree.ElementTree as ET
44
+ import json
44
45
 
45
46
  from context import hdpws # pylint: disable=unused-import
46
47
 
@@ -316,6 +317,39 @@ class TestHdpWs(unittest.TestCase):
316
317
  '{' + XHTML_NS + '}html')
317
318
 
318
319
 
320
+ def test_get_spase_json_ld(self):
321
+ """
322
+ Test for get_spase_json_ld function.
323
+ """
324
+
325
+ resource_id = 'spase://NASA/NumericalData/Wind/MFI/PT03S'
326
+
327
+ result = self._hdp.get_spase_json_ld(resource_id)
328
+
329
+ self.assertEqual(result['HttpStatus'], 200)
330
+
331
+ last_modified = result['Last-Modified']
332
+ self.assertIsNotNone(last_modified)
333
+
334
+ result = self._hdp.get_spase_json_ld(resource_id,
335
+ if_modified_since=last_modified)
336
+
337
+ self.assertEqual(result['HttpStatus'], 304)
338
+
339
+ last_modified -= datetime.timedelta(seconds=5)
340
+ result = self._hdp.get_spase_json_ld(resource_id,
341
+ if_modified_since=last_modified)
342
+
343
+ self.assertEqual(result['HttpStatus'], 200)
344
+ json_ld = json.loads(result['Result'])
345
+ self.assertEqual(json_ld['@context'], 'https://schema.org/')
346
+ self.assertEqual(json_ld['@type'], 'Dataset')
347
+ # print(result['Result'])
348
+ # result_element = ET.fromstring(result['Result'])
349
+ # self.assertEqual(result_element.tag,
350
+ # '{' + XHTML_NS + '}html')
351
+
352
+
319
353
  def test_get_spase_data(self):
320
354
  """
321
355
  Test for get_spase_data function.
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes