casaconfig 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,94 @@
1
+ # Copyright 2023 AUI, Inc. Washington DC, USA
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ def get_available_files(urlstr, pattern):
16
+ """
17
+ Returns a sorted list of all files found at the URL given by
18
+ urlstr that match the pattern.
19
+
20
+ This function does the work for measures_available and data_available.
21
+ The appropriate pattern is set in each of those functions. This function
22
+ does not check on the correctness of pattern (e.g. "tar" should be included
23
+ in the pattern in all cases).
24
+
25
+ In addition, this function excludes any file that ends with ".md5" from
26
+ the returned list. That is only relevent for casarundata but since it may
27
+ happen in the future for some other sites, just exclude it here.
28
+
29
+ Parameters
30
+ - urlstr (str) - The URL to be used when finding the files.
31
+ - pattern (str) - Any files that match this pattern are returned (excluding files ending in md5).
32
+
33
+ Returns
34
+ list - the list of file names found at urlstring matching the criteria
35
+
36
+ Raises
37
+ - casaconfig.NoNetwork - Raised when there is no network seen, can not continue.
38
+ - urllib.error.URLError - Raised when there is an error fetching some remote content for some reason other than no network.
39
+ - Exception - Unexpected exception while getting the list of available tarfiles.
40
+ """
41
+
42
+ import html.parser
43
+ import urllib.request
44
+ import urllib.error
45
+ import ssl
46
+ import certifi
47
+ import re
48
+
49
+ from casaconfig import RemoteError
50
+ from casaconfig import NoNetwork
51
+
52
+ from .have_network import have_network
53
+
54
+ if not have_network():
55
+ raise NoNetwork("No network, can not find the list of available data.")
56
+
57
+ class LinkParser(html.parser.HTMLParser):
58
+
59
+ def __init__(self, pattern):
60
+ self._pattern = pattern
61
+ super().__init__()
62
+
63
+ def reset(self):
64
+ super().reset()
65
+ self.rundataList = []
66
+
67
+ def handle_starttag(self, tag, attrs):
68
+ if tag == 'a':
69
+ for (name, value) in attrs:
70
+ # only care if this is an href and the pattern can be found in value and it the value doesn't end in ".md5"
71
+ if name == 'href' and re.search(self._pattern, value) and (value[-4:] != '.md5'):
72
+ # only add it to the list if it's not already there
73
+ if (value not in self.rundataList):
74
+ self.rundataList.append(value)
75
+
76
+ # don't look for any exceptions here, this will raise urllib.error.URLError for most URL errors
77
+ # other exceptions are unexpected but should be watched for upstream
78
+ context = ssl.create_default_context(cafile=certifi.where())
79
+ with urllib.request.urlopen(urlstr, context=context, timeout=400) as urlstream:
80
+ parser = LinkParser(pattern)
81
+ encoding = urlstream.headers.get_content_charset() or 'UTF-8'
82
+ for line in urlstream:
83
+ parser.feed(line.decode(encoding))
84
+
85
+ # return the sorted list, earliest versions are first, newest is last
86
+ return sorted(parser.rundataList)
87
+
88
+ # nothing to return if it got here, must have been an exception
89
+ return []
90
+
91
+
92
+
93
+
94
+
@@ -35,7 +35,10 @@ def get_data_info(path=None, logger=None, type=None):
35
35
  was installed. These values are taken from the readme.txt file for each type.
36
36
  For 'casarundata' an additional field of 'manifest' is present which is
37
37
  the list of files that have been installed for that specific version (this will
38
- be empty for an unknown or invalid version).
38
+ be empty for an unknown or invalid version). The measures dictionary also contains
39
+ a 'site' field which holds the URL of the site that supplied that version. For
40
+ older readme files that lack any site infromation the site is assumed to be
41
+ the astron site.
39
42
 
40
43
  The 'release' dictionary comes from the release_data_readme.txt file which is copied
41
44
  into place when a modular CASA is built. It consists of 'casarundata' and 'measures'
@@ -93,6 +96,7 @@ def get_data_info(path=None, logger=None, type=None):
93
96
  from .read_readme import read_readme
94
97
 
95
98
  from casaconfig import UnsetMeasurespath
99
+ from casaconfig import BadReadme
96
100
 
97
101
 
98
102
  currentTime = time.time()
@@ -126,14 +130,18 @@ def get_data_info(path=None, logger=None, type=None):
126
130
  if os.path.exists(datareadme_path):
127
131
  # the readme exists, get the info
128
132
  result['casarundata'] = {'version':'error', 'date':'', 'manifest':[], 'age':None}
129
- readmeContents = read_readme(datareadme_path)
130
- if readmeContents is not None:
131
- currentAge = (currentTime - os.path.getmtime(datareadme_path)) / secondsPerDay
132
- currentVersion = readmeContents['version']
133
- currentDate = readmeContents['date']
134
- # the manifest ('extra') must exist with at least 1 entry, otherwise this is no a valid readme file and the version should be 'error'
135
- if len(readmeContents['extra']) > 0:
136
- result['casarundata'] = {'version':currentVersion, 'date':currentDate, 'manifest':readmeContents['extra'], 'age':currentAge}
133
+ try:
134
+ readmeContents = read_readme(datareadme_path)
135
+ if readmeContents is not None:
136
+ currentAge = (currentTime - os.path.getmtime(datareadme_path)) / secondsPerDay
137
+ currentVersion = readmeContents['version']
138
+ currentDate = readmeContents['date']
139
+ # the manifest ('extra') must exist with at least 1 entry, otherwise this is no a valid readme file and the version should be 'error'
140
+ if len(readmeContents['extra']) > 0:
141
+ result['casarundata'] = {'version':currentVersion, 'date':currentDate, 'manifest':readmeContents['extra'], 'age':currentAge}
142
+ except BadReadme as exc:
143
+ # put the exception string into the version so it shows up in the summary
144
+ resulte['casarundata']['version'] = "error: " + str(exc)
137
145
  else:
138
146
  # does it look like it's probably casarundata?
139
147
  expected_dirs = ['alma','catalogs','demo','ephemerides','geodetic','gui','nrao']
@@ -153,25 +161,32 @@ def get_data_info(path=None, logger=None, type=None):
153
161
  measuresreadme_path = os.path.join(path,'geodetic/readme.txt')
154
162
  if os.path.exists(measuresreadme_path):
155
163
  # the readme exists, get the info
156
- result['measures'] = {'version':'error', 'date':'', 'age':None}
157
- readmeContents = read_readme(measuresreadme_path)
158
- if readmeContents is not None:
159
- currentVersion = readmeContents['version']
160
- currentDate = readmeContents['date']
161
- currentAge = (currentTime - os.path.getmtime(measuresreadme_path)) / secondsPerDay
162
- result['measures'] = {'version':currentVersion,'date':currentDate,'age':currentAge}
164
+ result['measures'] = {'site':'', 'version':'error', 'date':'', 'age':None}
165
+ try:
166
+ readmeContents = read_readme(measuresreadme_path)
167
+ if readmeContents is not None:
168
+ currentSite = readmeContents['site']
169
+ if currentSite is None:
170
+ currentSite = 'https://www.astron.nl/iers/'
171
+ currentVersion = readmeContents['version']
172
+ currentDate = readmeContents['date']
173
+ currentAge = (currentTime - os.path.getmtime(measuresreadme_path)) / secondsPerDay
174
+ result['measures'] = {'site':currentSite,'version':currentVersion,'date':currentDate,'age':currentAge}
175
+ except BadReadme as exc:
176
+ # put the exception string into the version so it shows up in the summary
177
+ result['measures']['version'] = "error: " + str(exc)
163
178
  else:
164
179
  # does it look like it's probably measuresdata?
165
180
  # path should have ephemerides and geodetic directories
166
181
  if os.path.isdir(os.path.join(path,'ephemerides')) and os.path.isdir(os.path.join(path,'geodetic')):
167
- result['measures'] = {'version':'unknown', 'date':'', 'age':None}
182
+ result['measures'] = {'site':'unknown', 'version':'unknown', 'date':'', 'age':None}
168
183
  else:
169
184
  # probably not measuresdata
170
- result['measures'] = {'version':'invalid', 'date':'', 'age':None}
185
+ result['measures'] = {'site':'', 'version':'invalid', 'date':'', 'age':None}
171
186
 
172
187
  if type is None or type=='release':
173
188
  # release data versions
174
- if importlib.resources.is_resource('casaconfig','release_data_readme.txt'):
189
+ if importlib.resources.files('casaconfig').joinpath('release_data_readme.txt').is_file():
175
190
  try:
176
191
  casarundataVersion = None
177
192
  measuresVersion = None
@@ -194,7 +209,7 @@ def get_data_info(path=None, logger=None, type=None):
194
209
  elif lineType == 'measures':
195
210
  if measuresVersion is not None:
196
211
  ok = False
197
- reason = "duplicate measures lins"
212
+ reason = "duplicate measures line"
198
213
  break
199
214
  measuresVersion = lineVers
200
215
  else:
@@ -15,77 +15,228 @@
15
15
  this module will be included in the api
16
16
  """
17
17
 
18
- def measures_available():
18
+ import traceback
19
+
20
+ def measures_available(measures_site=None, logger=None):
19
21
  """
20
- List available measures versions on ASTRON at https://www.astron.nl/iers/
22
+ Return a list of available measures versions at measures_site.
23
+
24
+ This returns a list of the measures versions available at measures_site.
25
+ If measures_site is None, then the meausres_site config value is used.
26
+
27
+ The list of available measures is sorted using the date and time fields of
28
+ the file name so that the most recent file appears at the end of the list.
29
+
30
+ The measures_site may be a single string value or a list of strings where
31
+ each element is a URL where measures tar files are found.
32
+
33
+ If measures_site is a list then the elements are used in order until
34
+ an appropriate list of available measures is found (see more below).
35
+ If measures_site is not provide then the measures_site config value
36
+ is used.
37
+
38
+ The first element of the returned list is always the measures site URL
39
+ used to populate the list.
40
+
41
+ The measures_site_interval config value may be used when determining which
42
+ site to use if measures_site is a list. When stepping through all the
43
+ elements of measures_site, if the most recent measures tar file at that
44
+ site has a date that is less than or equal to measures_site_interval
45
+ days before the current date then that list of measures tar files from
46
+ that site is returned. If none of the sites satisfy that criteria for
47
+ the most recent file then the list having the most recent tar file is
48
+ returned.
49
+
50
+ If the returned list is older than measures_site_interval days before
51
+ the current date then a warning is logged and printed as determind
52
+ by the casaconfig_verbose config value.
53
+
54
+ When comparing the date of the most recent tar file with that of
55
+ the current date, the date value (excluding the time) found in the
56
+ tar file name is compared with the current date without any correction
57
+ for time zone differences. This is an approximate age of the most recent
58
+ tar file and is primiarly intended to identify a site that may not be
59
+ updating regularly (daily) as expected so that casaconfig can use another
60
+ in the list of measures_site automatically.
21
61
 
22
- This returns a list of the measures versions available on the ASTRON
23
- server. The version parameter of measures_update must be one
24
- of the values in that list if set (otherwise the most recent version
25
- in this list is used).
62
+ The list of available measures versions is the list of files at
63
+ measures_site that follow the pattern of *Measures*YYYYMMDD-HHMMSS*tar*,
64
+ excluding files that end in ".md5", where YYYYMMDD and HHMMSS are each
65
+ single digits (0 through 9).
66
+
67
+ Note that the version parameter in measures_update must be an element in
68
+ a list returned by measures_available so that measures_update can find
69
+ the expected version. Note that measures tar file names will usually
70
+ not be the same at different sites.
26
71
 
27
72
  Parameters
28
- None
29
-
73
+ - measures_site(str or list of str = None) - Each value is a URL where measures tar files are found. If measures_site is a list then the elements are used in order until a list can be assembled. Default None uses config.measures_site.
74
+ - logger (casatools.logsink=None) - Instance of the casalogger to use for writing messages. Default None writes messages to the terminal. The value of config.casaconfig_verbose is used. The logger is only used if the last file in the returned list is more than config.measures_site_interval days before the current date.
75
+
30
76
  Returns
31
- list - version names returned as list of strings
77
+ list - version names returned as list of strings, the first element of this list is the is the site used. The file names are sorted by date and time as found in the name with the most recent name appearing at the end of the list.
32
78
 
33
79
  Raises
34
80
  - casaconfig.NoNetwork - Raised where there is no network seen, can not continue
35
81
  - casaconfig.RemoteError - Raised when there is an error fetching some remote content for some reason other than no network
82
+ - ValueError - Raised when config.measures_site_interval can not be used as an int
36
83
  - Exception - Unexpected exception while getting list of available measures versions
37
84
 
38
85
  """
39
-
40
- import html.parser
41
- import urllib.request
86
+ from casaconfig import NoNetwork, RemoteError
42
87
  import urllib.error
43
- import ssl
44
- import certifi
88
+ import re
89
+ from datetime import date
90
+
91
+ from .get_available_files import get_available_files
92
+ from .print_log_messages import print_log_messages
93
+
94
+ from .. import config as _config
45
95
 
46
- from casaconfig import RemoteError
47
- from casaconfig import NoNetwork
96
+ verbose = _config.casaconfig_verbose
48
97
 
49
- from .have_network import have_network
98
+ if measures_site is None:
99
+ measures_site = _config.measures_site
50
100
 
51
- if not have_network():
52
- raise NoNetwork("No network, can not find the list of available data.")
53
-
54
- class LinkParser(html.parser.HTMLParser):
55
- def reset(self):
56
- super().reset()
57
- self.rundataList = []
58
-
59
- def handle_starttag(self, tag, attrs):
60
- if tag == 'a':
61
- for (name, value) in attrs:
62
- # only care if this is an href and the value starts with
63
- # WSRT_Measures and has 'tar' after character 15 to exclude the "WSRT_Measures.ztar" file
64
- # without relying on the specific type of compression or nameing in more detail than that
65
- if name == 'href' and (value.startswith('WSRT_Measures') and (value.rfind('tar')>15)):
66
- # only add it to the list if it's not already there
67
- if (value not in self.rundataList):
68
- self.rundataList.append(value)
69
-
70
- try:
71
- context = ssl.create_default_context(cafile=certifi.where())
72
- with urllib.request.urlopen('https://www.astron.nl/iers', context=context, timeout=400) as urlstream:
73
- parser = LinkParser()
74
- encoding = urlstream.headers.get_content_charset() or 'UTF-8'
75
- for line in urlstream:
76
- parser.feed(line.decode(encoding))
77
-
78
- # return the sorted list, earliest versions are first, newest is last
79
- return sorted(parser.rundataList)
80
-
81
- except urllib.error.URLError as urlerr:
82
- raise RemoteError("Unable to retrieve list of available measures versions : " + str(urlerr)) from None
101
+ # this makes sure that measures_site_interval can be used as an int
102
+ # this raises a ValueError if there's a problem
103
+ measures_site_interval = int(_config.measures_site_interval)
104
+
105
+ def measuresFileAge(measuresFileName):
106
+ # return the age of a measuresFile, in days before today
107
+ # do not attempt to correct for time zones
108
+ # only uses the year, month, and day fields
83
109
 
84
- except Exception as exc:
85
- msg = "Unexpected exception while getting list of available measures versions : " + str(exc)
86
- raise Exception(msg)
110
+ date_pattern = r".*_Measures_(\d{4})(\d{2})(\d{2})-.*"
111
+ dateMatch = re.search(date_pattern, measuresFileName)
112
+ fileDate = date(int(dateMatch.group(1)),int(dateMatch.group(2)),int(dateMatch.group(3)))
113
+ dateDiff = date.today() - fileDate
114
+ return dateDiff.days
87
115
 
88
- # nothing to return if it got here, must have been an exception
89
- return []
116
+ if isinstance(measures_site, list):
117
+ saved_exc = None
118
+ # this list is only used if all of the sites in the list are out of date
119
+ # and it is necessary to find the one that's least out of date.
120
+ # the list is a tuple of (age, file_list) where age is the age, in days, of
121
+ # the last entry in file_list and file_list is the list of files at that
122
+ # site
123
+ file_age_list = []
124
+
125
+ saved_exc = None
126
+
127
+ for this_site in measures_site:
128
+ try:
129
+ # turn off the logger here so that only this initial call can produce a logged message warning
130
+ result = measures_available(this_site, logger=None)
131
+ if len(result) > 1:
132
+ siteAge = measuresFileAge(result[-1])
133
+ # something useful can be returned, unset saved_exc
134
+ saved_exc = None
135
+ if siteAge <= measures_site_interval:
136
+ return (result)
137
+ # the only way to get here is when the last file in result is more than measures_site_interval days from today
138
+ file_age_list.append((siteAge,result))
139
+ except RemoteError as exc:
140
+ # save it, if it's still set when the loop exits, reraise it, only the last exception is reraised
141
+ saved_exc = exc
142
+ except NoNetwork as exc:
143
+ # reraise this, there's no recovering from it
144
+ raise exc
145
+ except Exception as exc:
146
+ # save it, if it's still set when the loop exits, reraise it, only the last exception is reraised
147
+ print("exception when trying : " + this_site)
148
+ print(str(exc))
149
+ print(str(type(exc)))
150
+ saved_exc = exc
151
+
152
+ # if it gets here, either none of the sites had any result to use or
153
+ # file_age_list has at least one entry
154
+ # of there was an exception
155
+
156
+ if len(file_age_list) > 0:
157
+ # something can be returned
158
+ # return the one with the smallest age
159
+ thisAge = file_age_list[0][0]
160
+ result = file_age_list[0][1]
161
+ for age_tuple in file_age_list[1:]:
162
+ if age_tuple[0] < thisAge:
163
+ thisAge = age_tuple[0]
164
+ result = age_tuple[1]
165
+ # and log that that's what's going on, not an exception
166
+ msgs = []
167
+ msgs.append("Warning: the most recent measures tar file at each of the sites was older than config.measures_site_interval")
168
+ msgs.append("%s had the most recent measures tar file, returning that list" % result[0])
169
+ print_log_messages(msgs, logger, False, verbose)
170
+ return(result)
90
171
 
172
+ if saved_exc is None:
173
+ # I don't think this is possible
174
+ raise RemoteError("Unable to retrieve list of available measures versions, measures_site value may be an empty list or no files were found at any site, check and try again.")
175
+
176
+ else:
177
+ # saved exception, this is the most recent if multiple were raised, reraise it here
178
+ # unsure what this should look like, need try to things out
179
+ raise saved_exc from None
180
+
181
+ else:
182
+ # make sure it's used as a string
183
+ measures_site = str(measures_site)
184
+
185
+ # this pattern matches "<anything>_Measures_YYYYMMDD-HHMMSS.<anything>tar<anything>
186
+ # where YYYY MM DD HH MM SS are all digits having exactly that number of digits.
187
+ # "tar" must appear somewhere after the "." following the digits. This allows for
188
+ # different compression schemes to be used and signaled in the name, so long as the
189
+ # tarfile module can understand that compression. Note that get_available_tarfiles
190
+ # also excludes files that end in "md5". That is currently only relevant for casarundata
191
+ # but it may be an issue in some future site so that excludes those files.
192
+ pattern = r".*_Measures_\d{8}-\d{6}\..*tar.*"
193
+
194
+ try:
195
+ files_list = get_available_files(measures_site, pattern)
196
+
197
+ if len(files_list) == 0:
198
+ # nothing found there, RemoteError
199
+ # no exception, so the sites must have no files found
200
+ raise RemoteError("Unable to retrieve list of available measures versions, measures_site value may be an empty list or no files were found at measures_site, check and try again.")
201
+
202
+ # because the prefix changed during the development of the NRAO/casa measures
203
+ # tarballs this list needs to be sorted, excluded everything before "Measures_".
204
+ # it's probably not a bad idea in general, that keeps things in time sorted order
205
+
206
+ def sort_after_Measures(text):
207
+ # extract the substring after "Measures_" for sorting.
208
+ return text.split("Measures")[1]
209
+ result = sorted(files_list, key=sort_after_Measures)
210
+
211
+ # and prepend the measures_site to the list
212
+ result.insert(0,measures_site)
213
+
214
+ # if the logger is set, check if the site appears to be too old and
215
+ # log a warning if it is
216
+ if logger is not None:
217
+ siteAge = measuresFileAge(result[-1])
218
+ if siteAge > measures_site_interval:
219
+ msgs = []
220
+ msgs.append("Warning: the most recent tar file at %s was older than config.measures_site_interval" % measures_site)
221
+ print_log_messages(msgs, logger, False, verbose)
222
+
223
+ return (result)
91
224
 
225
+ except RemoteError as exc:
226
+ # reraise this as is
227
+ raise exc
228
+
229
+ except NoNetwork as exc:
230
+ # reraise this as is
231
+ raise exc
232
+
233
+ except urllib.error.URLError as urlerr:
234
+ raise RemoteError("Unable to retrieve list of available measures versions : " + str(urlerr)) from None
235
+ except UnicodeError as unicodeErr:
236
+ raise RemoteError("Unable to retrieve list of available measures versions because of a UnicodeError, likely the site name is incorrect. Site : " + str(measures_site) + " error: " + str(unicodeErr)) from None
237
+ except Exception as exc:
238
+ msg = "Unexpected exception while getting list of available measures versions : " + str(exc)
239
+ raise Exception(msg)
240
+
241
+ # nothing to return if it got here, I don't think there's a way to get here, anything not already returning raises an exception
242
+ return []