pynmrstar 3.3.5__pp310-pypy310_pp73-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pynmrstar might be problematic. Click here for more details.
- cnmrstar.pypy310-pp73-darwin.so +0 -0
- pynmrstar/__init__.py +55 -0
- pynmrstar/_internal.py +292 -0
- pynmrstar/definitions.py +32 -0
- pynmrstar/entry.py +970 -0
- pynmrstar/exceptions.py +43 -0
- pynmrstar/loop.py +1197 -0
- pynmrstar/parser.py +287 -0
- pynmrstar/reference_files/comments.str +538 -0
- pynmrstar/reference_files/data_types.csv +24 -0
- pynmrstar/reference_files/schema.csv +6726 -0
- pynmrstar/saveframe.py +1015 -0
- pynmrstar/schema.py +367 -0
- pynmrstar/utils.py +134 -0
- pynmrstar-3.3.5.dist-info/LICENSE +21 -0
- pynmrstar-3.3.5.dist-info/METADATA +59 -0
- pynmrstar-3.3.5.dist-info/RECORD +19 -0
- pynmrstar-3.3.5.dist-info/WHEEL +5 -0
- pynmrstar-3.3.5.dist-info/top_level.txt +2 -0
|
Binary file
|
pynmrstar/__init__.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
"""This module provides :py:class:`pynmrstar.Entry`, :py:class:`pynmrstar.Saveframe`,
|
|
4
|
+
:py:class:`pynmrstar.Loop`, and :py:class:`pynmrstar.Schema` objects.
|
|
5
|
+
|
|
6
|
+
It also provides some utility functions in :py:obj:`pynmrstar.utils`
|
|
7
|
+
|
|
8
|
+
Use python's built in help function for documentation."""
|
|
9
|
+
|
|
10
|
+
import decimal as _decimal
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
import cnmrstar
|
|
16
|
+
except ImportError:
|
|
17
|
+
try:
|
|
18
|
+
import pynmrstar.cnmrstar as cnmrstar
|
|
19
|
+
except ImportError:
|
|
20
|
+
if os.environ.get('READTHEDOCS'):
|
|
21
|
+
cnmrstar = None
|
|
22
|
+
else:
|
|
23
|
+
raise ImportError('Could not import cnmrstar sub-module! Your installation appears to be broken.')
|
|
24
|
+
|
|
25
|
+
from pynmrstar import utils
|
|
26
|
+
from pynmrstar._internal import __version__, min_cnmrstar_version
|
|
27
|
+
from pynmrstar.entry import Entry
|
|
28
|
+
from pynmrstar.loop import Loop
|
|
29
|
+
from pynmrstar.parser import Parser as _Parser
|
|
30
|
+
from pynmrstar.saveframe import Saveframe
|
|
31
|
+
from pynmrstar.schema import Schema
|
|
32
|
+
import pynmrstar.definitions as definitions
|
|
33
|
+
|
|
34
|
+
if cnmrstar:
|
|
35
|
+
if "version" not in dir(cnmrstar):
|
|
36
|
+
raise ImportError(f"Could not determine the version of cnmrstar installed, and version {min_cnmrstar_version} or "
|
|
37
|
+
"greater is required.")
|
|
38
|
+
if cnmrstar.version() < min_cnmrstar_version:
|
|
39
|
+
raise ImportError("The version of the cnmrstar module installed does not meet the requirements. As this should be "
|
|
40
|
+
f"handled automatically, there may be an issue with your installation. Version installed: "
|
|
41
|
+
f"{cnmrstar.version()}. Version required: {min_cnmrstar_version}")
|
|
42
|
+
|
|
43
|
+
# Set up logging
|
|
44
|
+
logger = logging.getLogger('pynmrstar')
|
|
45
|
+
|
|
46
|
+
# This makes sure that when decimals are printed a lower case "e" is used
|
|
47
|
+
_decimal.getcontext().capitals = 0
|
|
48
|
+
|
|
49
|
+
del loop
|
|
50
|
+
del entry
|
|
51
|
+
del saveframe
|
|
52
|
+
del schema
|
|
53
|
+
del parser
|
|
54
|
+
|
|
55
|
+
__all__ = ['Loop', 'Saveframe', 'Entry', 'Schema', 'definitions', 'utils', '__version__', 'exceptions', 'cnmrstar']
|
pynmrstar/_internal.py
ADDED
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
import decimal
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
import zlib
|
|
7
|
+
from datetime import date
|
|
8
|
+
from gzip import GzipFile
|
|
9
|
+
from io import StringIO, BytesIO
|
|
10
|
+
from typing import Dict, Union, IO, List, Tuple
|
|
11
|
+
from urllib.error import HTTPError, URLError
|
|
12
|
+
from urllib.request import urlopen, Request
|
|
13
|
+
|
|
14
|
+
import pynmrstar
|
|
15
|
+
|
|
16
|
+
__version__: str = "3.3.5"
|
|
17
|
+
min_cnmrstar_version: str = "3.2.0"
|
|
18
|
+
|
|
19
|
+
# If we have requests, open a session to reuse for the duration of the program run
|
|
20
|
+
try:
|
|
21
|
+
from requests import session as _requests_session
|
|
22
|
+
# This replaces the urllib HTTPError if we have requests
|
|
23
|
+
from requests.exceptions import HTTPError, ConnectionError
|
|
24
|
+
_session = _requests_session()
|
|
25
|
+
except ModuleNotFoundError:
|
|
26
|
+
_session = None
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger('pynmrstar')
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# noinspection PyDefaultArgument
|
|
32
|
+
def _get_comments(_comment_cache: Dict[str, Dict[str, str]] = {}) -> Dict[str, Dict[str, str]]:
|
|
33
|
+
""" Loads the comments that should be placed in written files.
|
|
34
|
+
|
|
35
|
+
The default argument is mutable on purpose, as it is used as a cache for memoization."""
|
|
36
|
+
|
|
37
|
+
# Comment dictionary already exists
|
|
38
|
+
if _comment_cache:
|
|
39
|
+
return _comment_cache
|
|
40
|
+
|
|
41
|
+
file_to_load = os.path.join(os.path.dirname(os.path.realpath(__file__)))
|
|
42
|
+
file_to_load = os.path.join(file_to_load, "reference_files/comments.str")
|
|
43
|
+
|
|
44
|
+
# The import needs to be here to avoid import errors due to circular imports
|
|
45
|
+
from pynmrstar.entry import Entry
|
|
46
|
+
try:
|
|
47
|
+
comment_entry = Entry.from_file(file_to_load)
|
|
48
|
+
except IOError:
|
|
49
|
+
# Load the comments from Github if we can't find them locally
|
|
50
|
+
try:
|
|
51
|
+
logger.warning('Could not load comments from disk. Loading from web...')
|
|
52
|
+
comment_entry = Entry.from_file(_interpret_file(pynmrstar.definitions.COMMENT_URL))
|
|
53
|
+
except Exception:
|
|
54
|
+
logger.exception('Could not load comments from web. No comments will be shown.')
|
|
55
|
+
# No comments will be printed
|
|
56
|
+
return {}
|
|
57
|
+
|
|
58
|
+
# Load the comments
|
|
59
|
+
comment_records = comment_entry[0][0].get_tag(["category", "comment", "every_flag"])
|
|
60
|
+
comment_map = {'N': False, 'Y': True}
|
|
61
|
+
for comment in comment_records:
|
|
62
|
+
if comment[1] != ".":
|
|
63
|
+
_comment_cache[comment[0]] = {'comment': comment[1].rstrip() + "\n\n",
|
|
64
|
+
'every_flag': comment_map[comment[2]]}
|
|
65
|
+
|
|
66
|
+
return _comment_cache
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _json_serialize(obj: object) -> str:
|
|
70
|
+
"""JSON serializer for objects not serializable by default json code"""
|
|
71
|
+
|
|
72
|
+
# Serialize datetime.date objects by calling str() on them
|
|
73
|
+
if isinstance(obj, (date, decimal.Decimal)):
|
|
74
|
+
return str(obj)
|
|
75
|
+
raise TypeError("Type not serializable: %s" % type(obj))
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _get_url_reliably(url: str, wait_time: float = 10, raw: bool = False, timeout: int = 10, retries: int = 2):
|
|
79
|
+
""" Attempts to load data from a URL, retrying the specified number of times with an exponential
|
|
80
|
+
backoff if rate limited. Fails immediately on 4xx errors that are not 403."""
|
|
81
|
+
|
|
82
|
+
global _session
|
|
83
|
+
|
|
84
|
+
# If using Requests
|
|
85
|
+
if _session:
|
|
86
|
+
try:
|
|
87
|
+
response = _session.get(url, timeout=timeout,
|
|
88
|
+
headers={'Application': f'PyNMRSTAR {__version__}'})
|
|
89
|
+
except ConnectionError:
|
|
90
|
+
_session = _requests_session()
|
|
91
|
+
try:
|
|
92
|
+
response = _session.get(url, timeout=timeout,
|
|
93
|
+
headers={'Application': f'PyNMRSTAR {__version__}'})
|
|
94
|
+
except ConnectionError:
|
|
95
|
+
raise HTTPError("A ConnectionError was thrown during an attempt to load the entry.")
|
|
96
|
+
|
|
97
|
+
# We are rate limited - sleep and try again
|
|
98
|
+
if response.status_code == 403:
|
|
99
|
+
if retries > 0:
|
|
100
|
+
logger.warning(f'We were rate limited. Sleeping for {wait_time} seconds.')
|
|
101
|
+
time.sleep(wait_time)
|
|
102
|
+
return _get_url_reliably(url, wait_time=wait_time*2, raw=raw, timeout=timeout,
|
|
103
|
+
retries=retries - 1)
|
|
104
|
+
else:
|
|
105
|
+
raise HTTPError("Continued to receive 403 (forbidden, due to rate limit) after multiple wait times.") \
|
|
106
|
+
from None
|
|
107
|
+
if response.status_code == 404:
|
|
108
|
+
raise KeyError(f"Server returned 404.") from None
|
|
109
|
+
response.raise_for_status()
|
|
110
|
+
if raw:
|
|
111
|
+
return response.content
|
|
112
|
+
else:
|
|
113
|
+
return response.text
|
|
114
|
+
else:
|
|
115
|
+
# Use the built in library
|
|
116
|
+
try:
|
|
117
|
+
req = Request(url)
|
|
118
|
+
req.add_header('Application', f'PyNMRSTAR {__version__}')
|
|
119
|
+
url_request = urlopen(req, timeout=timeout)
|
|
120
|
+
serialized_ent = url_request.read()
|
|
121
|
+
url_request.close()
|
|
122
|
+
|
|
123
|
+
except HTTPError as err:
|
|
124
|
+
if err.code == 404:
|
|
125
|
+
raise KeyError(f"Server returned 404.") from None
|
|
126
|
+
# We are rate limited - sleep and try again
|
|
127
|
+
elif err.code == 403:
|
|
128
|
+
if retries > 0:
|
|
129
|
+
logger.warning(f'We were rate limited. Sleeping for {wait_time} seconds.')
|
|
130
|
+
time.sleep(wait_time)
|
|
131
|
+
return _get_url_reliably(url, wait_time=wait_time * 2, raw=raw, timeout=timeout,
|
|
132
|
+
retries=retries - 1)
|
|
133
|
+
else:
|
|
134
|
+
raise HTTPError("Continued to receive 403 (forbidden, due to rate limit) after multiple wait "
|
|
135
|
+
"times.") from None
|
|
136
|
+
else:
|
|
137
|
+
raise err
|
|
138
|
+
if raw:
|
|
139
|
+
return serialized_ent
|
|
140
|
+
else:
|
|
141
|
+
return serialized_ent.decode()
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _get_entry_from_database(entry_num: Union[str, int],
|
|
145
|
+
convert_data_types: bool = False,
|
|
146
|
+
schema: 'pynmrstar.Schema' = None) -> 'pynmrstar.Entry':
|
|
147
|
+
""" Fetches an entry from the API (or falls back to the FTP site) in
|
|
148
|
+
as reliable and robust a way as possible. Used by Entry.from_database(). """
|
|
149
|
+
|
|
150
|
+
entry_num = str(entry_num).lower()
|
|
151
|
+
if entry_num.startswith("bmr"):
|
|
152
|
+
entry_num = entry_num[3:]
|
|
153
|
+
|
|
154
|
+
# Try to load the entry using JSON
|
|
155
|
+
|
|
156
|
+
entry_url: str = (pynmrstar.definitions.API_URL + "/entry/%s?format=zlib") % entry_num
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
serialized_ent = _get_url_reliably(entry_url, raw=True, retries=2)
|
|
160
|
+
json_data = json.loads(zlib.decompress(serialized_ent).decode())
|
|
161
|
+
if "error" in json_data:
|
|
162
|
+
raise RuntimeError('Something wrong with API response.')
|
|
163
|
+
ent = pynmrstar.Entry.from_json(json_data)
|
|
164
|
+
except (HTTPError, ConnectionError, RuntimeError):
|
|
165
|
+
# Can't fall back to FTP for chemcomps
|
|
166
|
+
if entry_num.startswith("chemcomp"):
|
|
167
|
+
raise IOError("Unable to load that chemcomp from the API.")
|
|
168
|
+
|
|
169
|
+
# We're going to try again from the FTP
|
|
170
|
+
logger.warning('Failed to download entry from the API, trying again from the FTP site.')
|
|
171
|
+
if "bmse" in entry_num or "bmst" in entry_num:
|
|
172
|
+
url = f"{pynmrstar.definitions.FTP_URL}/metabolomics/entry_directories/{entry_num}/{entry_num}.str"
|
|
173
|
+
else:
|
|
174
|
+
url = f"{pynmrstar.definitions.FTP_URL}/entry_directories/bmr{entry_num}/bmr{entry_num}_3.str"
|
|
175
|
+
try:
|
|
176
|
+
# Use a longer timeout for the timeout
|
|
177
|
+
entry_content = _get_url_reliably(url, raw=False, timeout=20, retries=1)
|
|
178
|
+
ent = pynmrstar.Entry.from_string(entry_content)
|
|
179
|
+
except HTTPError:
|
|
180
|
+
raise IOError(f"Entry {entry_num} does not exist in the public database.") from None
|
|
181
|
+
except URLError:
|
|
182
|
+
raise IOError("You don't appear to have an active internet connection. Cannot fetch entry.") from None
|
|
183
|
+
|
|
184
|
+
except KeyError:
|
|
185
|
+
raise IOError(f"Entry {entry_num} does not exist in the public database.") from None
|
|
186
|
+
|
|
187
|
+
# Update the entry source
|
|
188
|
+
ent.source = f"from_database({entry_num})"
|
|
189
|
+
for each_saveframe in ent:
|
|
190
|
+
each_saveframe.source = ent.source
|
|
191
|
+
for each_loop in each_saveframe:
|
|
192
|
+
each_loop.source = ent.source
|
|
193
|
+
|
|
194
|
+
if convert_data_types:
|
|
195
|
+
schema = pynmrstar.utils.get_schema(schema)
|
|
196
|
+
for each_saveframe in ent:
|
|
197
|
+
for tag in each_saveframe.tags:
|
|
198
|
+
cur_tag = each_saveframe.tag_prefix + "." + tag[0]
|
|
199
|
+
tag[1] = schema.convert_tag(cur_tag, tag[1])
|
|
200
|
+
for loop in each_saveframe:
|
|
201
|
+
for row in loop.data:
|
|
202
|
+
for pos in range(0, len(row)):
|
|
203
|
+
category = loop.category + "." + loop.tags[pos]
|
|
204
|
+
row[pos] = schema.convert_tag(category, row[pos])
|
|
205
|
+
|
|
206
|
+
return ent
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _interpret_file(the_file: Union[str, IO]) -> StringIO:
|
|
210
|
+
"""Helper method returns some sort of object with a read() method.
|
|
211
|
+
the_file could be a URL, a file location, a file object, or a
|
|
212
|
+
gzipped version of any of the above."""
|
|
213
|
+
|
|
214
|
+
if hasattr(the_file, 'read'):
|
|
215
|
+
read_data: Union[bytes, str] = the_file.read()
|
|
216
|
+
if type(read_data) == bytes:
|
|
217
|
+
buffer: BytesIO = BytesIO(read_data)
|
|
218
|
+
elif type(read_data) == str:
|
|
219
|
+
buffer = BytesIO(read_data.encode())
|
|
220
|
+
else:
|
|
221
|
+
raise IOError("What did your file object return when .read() was called on it?")
|
|
222
|
+
elif isinstance(the_file, str):
|
|
223
|
+
if the_file.startswith("http://") or the_file.startswith("https://") or the_file.startswith("ftp://"):
|
|
224
|
+
buffer = BytesIO(_get_url_reliably(the_file, raw=True, retries=0))
|
|
225
|
+
else:
|
|
226
|
+
with open(the_file, 'rb') as read_file:
|
|
227
|
+
buffer = BytesIO(read_file.read())
|
|
228
|
+
else:
|
|
229
|
+
raise ValueError("Cannot figure out how to interpret the file you passed.")
|
|
230
|
+
|
|
231
|
+
# Decompress the buffer if we are looking at a gzipped file
|
|
232
|
+
try:
|
|
233
|
+
gzip_buffer = GzipFile(fileobj=buffer)
|
|
234
|
+
gzip_buffer.readline()
|
|
235
|
+
gzip_buffer.seek(0)
|
|
236
|
+
buffer = BytesIO(gzip_buffer.read())
|
|
237
|
+
# Apparently we are not looking at a gzipped file
|
|
238
|
+
except (IOError, AttributeError, UnicodeDecodeError):
|
|
239
|
+
pass
|
|
240
|
+
|
|
241
|
+
buffer.seek(0)
|
|
242
|
+
return StringIO(buffer.read().decode().replace("\r\n", "\n").replace("\r", "\n"))
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def get_clean_tag_list(item: Union[str, List[str], Tuple[str]]) -> List[Dict[str, str]]:
|
|
246
|
+
""" Converts the provided item to a list of dictionaries of
|
|
247
|
+
{
|
|
248
|
+
formatted -> just the lower case tag name (category stripped)
|
|
249
|
+
original -> whatever was provided, completely unmodified
|
|
250
|
+
}"""
|
|
251
|
+
|
|
252
|
+
if not isinstance(item, (str, list, tuple)):
|
|
253
|
+
raise ValueError('Invalid object provided. Only a tag name (str), or list of tags (list or tuple)'
|
|
254
|
+
' are valid inputs to this function.')
|
|
255
|
+
|
|
256
|
+
if isinstance(item, list):
|
|
257
|
+
tag_list: List[str] = item
|
|
258
|
+
elif isinstance(item, tuple):
|
|
259
|
+
tag_list = list(item)
|
|
260
|
+
elif isinstance(item, str):
|
|
261
|
+
tag_list = [item]
|
|
262
|
+
else:
|
|
263
|
+
raise ValueError(f'The value you provided was not a string, list, or tuple. Item: {repr(item)}')
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
return [{"formatted": pynmrstar.utils.format_tag_lc(_), "original": _} for _ in tag_list]
|
|
267
|
+
except AttributeError:
|
|
268
|
+
raise ValueError('Your list or tuple may only contain tag names expressed as strings.')
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def write_to_file(nmrstar_object: Union['pynmrstar.Entry', 'pynmrstar.Saveframe'],
|
|
272
|
+
file_name: str,
|
|
273
|
+
format_: str = "nmrstar",
|
|
274
|
+
show_comments: bool = True,
|
|
275
|
+
skip_empty_loops: bool = False,
|
|
276
|
+
skip_empty_tags: bool = False):
|
|
277
|
+
""" Writes the object to the specified file in NMR-STAR format. """
|
|
278
|
+
|
|
279
|
+
if format_ not in ["nmrstar", "json"]:
|
|
280
|
+
raise ValueError("Invalid output format.")
|
|
281
|
+
|
|
282
|
+
data_to_write = ''
|
|
283
|
+
if format_ == "nmrstar":
|
|
284
|
+
data_to_write = nmrstar_object.format(show_comments=show_comments,
|
|
285
|
+
skip_empty_loops=skip_empty_loops,
|
|
286
|
+
skip_empty_tags=skip_empty_tags)
|
|
287
|
+
elif format_ == "json":
|
|
288
|
+
data_to_write = nmrstar_object.get_json()
|
|
289
|
+
|
|
290
|
+
out_file = open(file_name, "w")
|
|
291
|
+
out_file.write(data_to_write)
|
|
292
|
+
out_file.close()
|
pynmrstar/definitions.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#!/usr/bin/python3
|
|
2
|
+
|
|
3
|
+
""" NMR-STAR definitions and other module parameters live here. Technically
|
|
4
|
+
you can edit them, but you should really know what you're doing.
|
|
5
|
+
|
|
6
|
+
Adding key->value pairs to STR_CONVERSION_DICT will automatically convert tags
|
|
7
|
+
whose value matches "key" to the string "value" when printing. This allows you
|
|
8
|
+
to set the default conversion value for Booleans or other objects.
|
|
9
|
+
|
|
10
|
+
WARNINGS:
|
|
11
|
+
* STR_CONVERSION_DICT cannot contain both booleans and arithmetic types.
|
|
12
|
+
Attempting to use both will cause an issue since boolean True == 1 in python
|
|
13
|
+
and False == 0.
|
|
14
|
+
|
|
15
|
+
* You must call utils.quote_value.clear_cache() after changing the
|
|
16
|
+
STR_CONVERSION_DICT or else your changes won't take effect due to caching!
|
|
17
|
+
|
|
18
|
+
The only exception is if you set STR_CONVERSION_DICT before performing any
|
|
19
|
+
actions which would call quote_value() - which include calling __str__ or
|
|
20
|
+
format() on Entry, Saveframe, and Loop objects.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
NULL_VALUES = ['', ".", "?", None]
|
|
24
|
+
WHITESPACE: str = " \t\n\v"
|
|
25
|
+
RESERVED_KEYWORDS = ["data_", "save_", "loop_", "stop_", "global_"]
|
|
26
|
+
STR_CONVERSION_DICT: dict = {None: "."}
|
|
27
|
+
|
|
28
|
+
API_URL: str = "https://api.bmrb.io/v2"
|
|
29
|
+
SCHEMA_URL: str = 'https://raw.githubusercontent.com/uwbmrb/nmr-star-dictionary/master/xlschem_ann.csv'
|
|
30
|
+
COMMENT_URL: str = "https://raw.githubusercontent.com/uwbmrb/PyNMRSTAR/v3/reference_files/comments.str"
|
|
31
|
+
TYPES_URL: str = "https://raw.githubusercontent.com/uwbmrb/PyNMRSTAR/v3/pynmrstar/reference_files/data_types.csv"
|
|
32
|
+
FTP_URL: str = "https://bmrb.io/ftp/pub/bmrb"
|