pynmrstar 3.3.5__cp39-cp39-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pynmrstar might be problematic. Click here for more details.
- cnmrstar.cpython-39-x86_64-linux-gnu.so +0 -0
- pynmrstar/__init__.py +55 -0
- pynmrstar/_internal.py +292 -0
- pynmrstar/definitions.py +32 -0
- pynmrstar/entry.py +970 -0
- pynmrstar/exceptions.py +43 -0
- pynmrstar/loop.py +1197 -0
- pynmrstar/parser.py +287 -0
- pynmrstar/reference_files/comments.str +538 -0
- pynmrstar/reference_files/data_types.csv +24 -0
- pynmrstar/reference_files/schema.csv +6726 -0
- pynmrstar/saveframe.py +1015 -0
- pynmrstar/schema.py +367 -0
- pynmrstar/utils.py +134 -0
- pynmrstar-3.3.5.dist-info/LICENSE +21 -0
- pynmrstar-3.3.5.dist-info/METADATA +59 -0
- pynmrstar-3.3.5.dist-info/RECORD +19 -0
- pynmrstar-3.3.5.dist-info/WHEEL +5 -0
- pynmrstar-3.3.5.dist-info/top_level.txt +2 -0
pynmrstar/entry.py
ADDED
|
@@ -0,0 +1,970 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import warnings
|
|
5
|
+
from io import StringIO
|
|
6
|
+
from typing import TextIO, BinaryIO, Union, List, Optional, Dict, Any, Tuple
|
|
7
|
+
|
|
8
|
+
from pynmrstar import definitions, utils, loop as loop_mod, parser as parser_mod, saveframe as saveframe_mod
|
|
9
|
+
from pynmrstar._internal import _json_serialize, _interpret_file, _get_entry_from_database, write_to_file
|
|
10
|
+
from pynmrstar.exceptions import InvalidStateError
|
|
11
|
+
from pynmrstar.schema import Schema
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger('pynmrstar')
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Entry(object):
|
|
17
|
+
"""An object oriented representation of a BMRB entry. You can initialize this
|
|
18
|
+
object several ways; (e.g. from a file, from the official database,
|
|
19
|
+
from scratch) see the class methods below. """
|
|
20
|
+
|
|
21
|
+
def __contains__(self, item: Any):
|
|
22
|
+
""" Check if the given item is present in the entry. """
|
|
23
|
+
|
|
24
|
+
# Prepare for processing
|
|
25
|
+
if isinstance(item, (list, tuple)):
|
|
26
|
+
to_process: List[Union[str, saveframe_mod.Saveframe, loop_mod.Loop]] = list(item)
|
|
27
|
+
elif isinstance(item, (loop_mod.Loop, saveframe_mod.Saveframe, str)):
|
|
28
|
+
to_process = [item]
|
|
29
|
+
else:
|
|
30
|
+
return False
|
|
31
|
+
|
|
32
|
+
for item in to_process:
|
|
33
|
+
if isinstance(item, saveframe_mod.Saveframe):
|
|
34
|
+
if item not in self._frame_list:
|
|
35
|
+
return False
|
|
36
|
+
elif isinstance(item, (loop_mod.Loop, str)):
|
|
37
|
+
found = False
|
|
38
|
+
for saveframe in self._frame_list:
|
|
39
|
+
if item in saveframe:
|
|
40
|
+
found = True
|
|
41
|
+
break
|
|
42
|
+
if not found:
|
|
43
|
+
return False
|
|
44
|
+
else:
|
|
45
|
+
return False
|
|
46
|
+
return True
|
|
47
|
+
|
|
48
|
+
def __delitem__(self, item: Union['saveframe_mod.Saveframe', int, str]) -> None:
|
|
49
|
+
"""Remove the indicated saveframe."""
|
|
50
|
+
|
|
51
|
+
if isinstance(item, int):
|
|
52
|
+
try:
|
|
53
|
+
del self._frame_list[item]
|
|
54
|
+
except IndexError:
|
|
55
|
+
raise IndexError(f'Index out of range: no saveframe at index: {item}')
|
|
56
|
+
else:
|
|
57
|
+
self.remove_saveframe(item)
|
|
58
|
+
|
|
59
|
+
def __eq__(self, other) -> bool:
|
|
60
|
+
"""Returns True if this entry is equal to another entry, false
|
|
61
|
+
if it is not equal."""
|
|
62
|
+
|
|
63
|
+
if not isinstance(other, Entry):
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
return (self.entry_id, self._frame_list) == (other.entry_id, other._frame_list)
|
|
67
|
+
|
|
68
|
+
def __getitem__(self, item: Union[int, str]) -> 'saveframe_mod.Saveframe':
|
|
69
|
+
"""Get the indicated saveframe."""
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
return self._frame_list[item]
|
|
73
|
+
except TypeError:
|
|
74
|
+
return self.get_saveframe_by_name(item)
|
|
75
|
+
|
|
76
|
+
def __init__(self, **kwargs) -> None:
|
|
77
|
+
""" You should not directly instantiate an Entry using this method.
|
|
78
|
+
Instead use the class methods:
|
|
79
|
+
|
|
80
|
+
:py:meth:`Entry.from_database`, :py:meth:`Entry.from_file`,
|
|
81
|
+
:py:meth:`Entry.from_string`, :py:meth:`Entry.from_scratch`,
|
|
82
|
+
:py:meth:`Entry.from_json`, and :py:meth:`Entry.from_template`"""
|
|
83
|
+
|
|
84
|
+
# Default initializations
|
|
85
|
+
self._entry_id: Union[str, int] = 0
|
|
86
|
+
self._frame_list: List[saveframe_mod.Saveframe] = []
|
|
87
|
+
self.source: Optional[str] = None
|
|
88
|
+
|
|
89
|
+
# They initialized us wrong
|
|
90
|
+
if len(kwargs) == 0:
|
|
91
|
+
raise ValueError("You should not directly instantiate an Entry using this method. Instead use the "
|
|
92
|
+
"class methods: Entry.from_database(), Entry.from_file(), Entry.from_string(), "
|
|
93
|
+
"Entry.from_scratch(), and Entry.from_json().")
|
|
94
|
+
|
|
95
|
+
if 'the_string' in kwargs:
|
|
96
|
+
# Parse from a string by wrapping it in StringIO
|
|
97
|
+
star_buffer: StringIO = StringIO(kwargs['the_string'])
|
|
98
|
+
self.source = "from_string()"
|
|
99
|
+
elif 'file_name' in kwargs:
|
|
100
|
+
star_buffer = _interpret_file(kwargs['file_name'])
|
|
101
|
+
self.source = f"from_file('{kwargs['file_name']}')"
|
|
102
|
+
# Creating from template (schema)
|
|
103
|
+
elif 'all_tags' in kwargs:
|
|
104
|
+
self._entry_id = kwargs['entry_id']
|
|
105
|
+
|
|
106
|
+
saveframe_categories: dict = {}
|
|
107
|
+
schema = utils.get_schema(kwargs['schema'])
|
|
108
|
+
schema_obj = schema.schema
|
|
109
|
+
for tag in [schema_obj[x.lower()] for x in schema.schema_order]:
|
|
110
|
+
category = tag['SFCategory']
|
|
111
|
+
if category not in saveframe_categories:
|
|
112
|
+
saveframe_categories[category] = True
|
|
113
|
+
templated_saveframe = saveframe_mod.Saveframe.from_template(category, category + "_1",
|
|
114
|
+
entry_id=self._entry_id,
|
|
115
|
+
all_tags=kwargs['all_tags'],
|
|
116
|
+
default_values=kwargs['default_values'],
|
|
117
|
+
schema=schema)
|
|
118
|
+
self._frame_list.append(templated_saveframe)
|
|
119
|
+
entry_saveframe = self.get_saveframes_by_category('entry_information')[0]
|
|
120
|
+
entry_saveframe['NMR_STAR_version'] = schema.version
|
|
121
|
+
entry_saveframe['Original_NMR_STAR_version'] = schema.version
|
|
122
|
+
return
|
|
123
|
+
else:
|
|
124
|
+
# Initialize a blank entry
|
|
125
|
+
self._entry_id = kwargs['entry_id']
|
|
126
|
+
self.source = "from_scratch()"
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
# Load the BMRB entry from the file
|
|
130
|
+
parser: parser_mod.Parser = parser_mod.Parser(entry_to_parse_into=self)
|
|
131
|
+
parser.parse(star_buffer.read(), source=self.source, convert_data_types=kwargs.get('convert_data_types', False),
|
|
132
|
+
raise_parse_warnings=kwargs.get('raise_parse_warnings', False))
|
|
133
|
+
|
|
134
|
+
def __iter__(self) -> saveframe_mod.Saveframe:
|
|
135
|
+
""" Yields each of the saveframes contained within the entry. """
|
|
136
|
+
|
|
137
|
+
for saveframe in self._frame_list:
|
|
138
|
+
yield saveframe
|
|
139
|
+
|
|
140
|
+
def __len__(self) -> int:
|
|
141
|
+
""" Returns the number of saveframes in the entry."""
|
|
142
|
+
|
|
143
|
+
return len(self._frame_list)
|
|
144
|
+
|
|
145
|
+
def __repr__(self) -> str:
|
|
146
|
+
"""Returns a description of the entry."""
|
|
147
|
+
|
|
148
|
+
return f"<pynmrstar.Entry '{self._entry_id}' {self.source}>"
|
|
149
|
+
|
|
150
|
+
def __setitem__(self, key: Union[int, str], item: 'saveframe_mod.Saveframe') -> None:
|
|
151
|
+
"""Set the indicated saveframe."""
|
|
152
|
+
|
|
153
|
+
# It is a saveframe
|
|
154
|
+
if isinstance(item, saveframe_mod.Saveframe):
|
|
155
|
+
# Add by ordinal
|
|
156
|
+
if isinstance(key, int):
|
|
157
|
+
self._frame_list[key] = item
|
|
158
|
+
|
|
159
|
+
# TODO: Consider stripping this behavior out - it isn't clear it is useful
|
|
160
|
+
else:
|
|
161
|
+
# Add by key
|
|
162
|
+
contains_frame: bool = False
|
|
163
|
+
for pos, frame in enumerate(self._frame_list):
|
|
164
|
+
if frame.name == key:
|
|
165
|
+
if contains_frame:
|
|
166
|
+
raise ValueError(f"Cannot replace the saveframe with the name '{frame.name} "
|
|
167
|
+
f"because multiple saveframes in the entry have the same name. "
|
|
168
|
+
f'This library does not allow that normally, as it is '
|
|
169
|
+
f'invalid NMR-STAR. Did you manually edit the Entry.frame_list '
|
|
170
|
+
f'object? Please use the Entry.add_saveframe() method instead to '
|
|
171
|
+
f'add new saveframes.')
|
|
172
|
+
self._frame_list[pos] = item
|
|
173
|
+
contains_frame = True
|
|
174
|
+
|
|
175
|
+
if not contains_frame:
|
|
176
|
+
raise ValueError(f"Saveframe with name '{key}' does not exist and therefore cannot be "
|
|
177
|
+
f"written to. Use the add_saveframe() method to add new saveframes.")
|
|
178
|
+
else:
|
|
179
|
+
raise ValueError("You can only assign a saveframe to an entry splice. You attempted to assign: "
|
|
180
|
+
f"'{repr(item)}'")
|
|
181
|
+
|
|
182
|
+
def __str__(self, skip_empty_loops: bool = False, skip_empty_tags: bool = False, show_comments: bool = True) -> str:
|
|
183
|
+
"""Returns the entire entry in STAR format as a string."""
|
|
184
|
+
|
|
185
|
+
sf_strings = []
|
|
186
|
+
seen_saveframes = {}
|
|
187
|
+
for saveframe_obj in self:
|
|
188
|
+
if saveframe_obj.category in seen_saveframes:
|
|
189
|
+
sf_strings.append(saveframe_obj.format(skip_empty_loops=skip_empty_loops,
|
|
190
|
+
skip_empty_tags=skip_empty_tags, show_comments=False))
|
|
191
|
+
else:
|
|
192
|
+
sf_strings.append(saveframe_obj.format(skip_empty_loops=skip_empty_loops,
|
|
193
|
+
skip_empty_tags=skip_empty_tags, show_comments=show_comments))
|
|
194
|
+
seen_saveframes[saveframe_obj.category] = True
|
|
195
|
+
|
|
196
|
+
return f"data_{self.entry_id}\n\n" + "\n".join(sf_strings)
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def category_list(self) -> List[str]:
|
|
200
|
+
""" Returns a list of the unique categories present in the entry. """
|
|
201
|
+
|
|
202
|
+
category_list = []
|
|
203
|
+
for saveframe in self._frame_list:
|
|
204
|
+
category = saveframe.category
|
|
205
|
+
if category and category not in category_list:
|
|
206
|
+
category_list.append(category)
|
|
207
|
+
return list(category_list)
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
def empty(self) -> bool:
|
|
211
|
+
""" Check if the entry has no data. Ignore the structural tags."""
|
|
212
|
+
|
|
213
|
+
for saveframe in self._frame_list:
|
|
214
|
+
if not saveframe.empty:
|
|
215
|
+
return False
|
|
216
|
+
|
|
217
|
+
return True
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def entry_id(self) -> Union[str, int]:
|
|
221
|
+
""" When read, fetches the entry ID.
|
|
222
|
+
|
|
223
|
+
When set, updates the entry ID for the Entry, and updates all the tags which
|
|
224
|
+
are foreign keys of the Entry_ID. (For example, Entry.ID and
|
|
225
|
+
Citation.Entry_ID will be updated, if present.)
|
|
226
|
+
"""
|
|
227
|
+
return self._entry_id
|
|
228
|
+
|
|
229
|
+
@entry_id.setter
|
|
230
|
+
def entry_id(self, value: Union[str, int]) -> None:
|
|
231
|
+
self._entry_id = value
|
|
232
|
+
|
|
233
|
+
schema = utils.get_schema()
|
|
234
|
+
for saveframe in self._frame_list:
|
|
235
|
+
for tag in saveframe.tags:
|
|
236
|
+
fqtn = (saveframe.tag_prefix + "." + tag[0]).lower()
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
if schema.schema[fqtn]['entryIdFlg'] == 'Y':
|
|
240
|
+
tag[1] = self._entry_id
|
|
241
|
+
except KeyError:
|
|
242
|
+
pass
|
|
243
|
+
|
|
244
|
+
for loop in saveframe.loops:
|
|
245
|
+
for tag in loop.tags:
|
|
246
|
+
fqtn = (loop.category + "." + tag).lower()
|
|
247
|
+
try:
|
|
248
|
+
if schema.schema[fqtn]['entryIdFlg'] == 'Y':
|
|
249
|
+
loop[tag] = [self._entry_id] * len(loop[tag])
|
|
250
|
+
except KeyError:
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
def frame_dict(self) -> Dict[str, 'saveframe_mod.Saveframe']:
|
|
255
|
+
"""Returns a dictionary of saveframe name -> saveframe object mappings."""
|
|
256
|
+
|
|
257
|
+
fast_dict = dict((frame.name, frame) for frame in self._frame_list)
|
|
258
|
+
|
|
259
|
+
# If there are no duplicates then continue
|
|
260
|
+
if len(fast_dict) == len(self._frame_list):
|
|
261
|
+
return fast_dict
|
|
262
|
+
|
|
263
|
+
# Figure out where the duplicate is
|
|
264
|
+
frame_dict = {}
|
|
265
|
+
|
|
266
|
+
for frame in self._frame_list:
|
|
267
|
+
if frame.name in frame_dict:
|
|
268
|
+
raise InvalidStateError("The entry has multiple saveframes with the same name. That is not allowed in "
|
|
269
|
+
"the NMR-STAR format. Please remove or rename one. Duplicate name: "
|
|
270
|
+
f"'{frame.name}'. Furthermore, please use Entry.add_saveframe() and "
|
|
271
|
+
f"Entry.remove_saveframe() rather than manually editing the Entry.frame_list "
|
|
272
|
+
f"list, which will prevent this state from existing in the future.")
|
|
273
|
+
frame_dict[frame.name] = frame
|
|
274
|
+
|
|
275
|
+
return frame_dict
|
|
276
|
+
|
|
277
|
+
@property
|
|
278
|
+
def frame_list(self) -> List['saveframe_mod.Saveframe']:
|
|
279
|
+
return self._frame_list
|
|
280
|
+
|
|
281
|
+
@classmethod
|
|
282
|
+
def from_database(cls,
|
|
283
|
+
entry_num: Union[str, int],
|
|
284
|
+
convert_data_types: bool = False,
|
|
285
|
+
schema: Schema = None):
|
|
286
|
+
"""Create an entry corresponding to the most up to date entry on
|
|
287
|
+
the public BMRB server. (Requires ability to initiate outbound
|
|
288
|
+
HTTP connections.)
|
|
289
|
+
|
|
290
|
+
Setting convert_data_types to True will automatically convert
|
|
291
|
+
the data loaded from the file into the corresponding python type as
|
|
292
|
+
determined by loading the standard BMRB schema. This would mean that
|
|
293
|
+
all floats will be represented as decimal.Decimal objects, all integers
|
|
294
|
+
will be python int objects, strings and vars will remain strings, and
|
|
295
|
+
dates will become datetime.date objects. When printing str() is called
|
|
296
|
+
on all objects. Other that converting uppercase "E"s in scientific
|
|
297
|
+
notation floats to lowercase "e"s this should not cause any change in
|
|
298
|
+
the way re-printed NMR-STAR objects are displayed. Specify a custom
|
|
299
|
+
schema object to use using the schema parameter."""
|
|
300
|
+
|
|
301
|
+
return _get_entry_from_database(entry_num,
|
|
302
|
+
convert_data_types=convert_data_types,
|
|
303
|
+
schema=schema)
|
|
304
|
+
|
|
305
|
+
@classmethod
|
|
306
|
+
def from_file(cls,
|
|
307
|
+
the_file: Union[str, TextIO, BinaryIO],
|
|
308
|
+
convert_data_types: bool = False,
|
|
309
|
+
raise_parse_warnings: bool = False,
|
|
310
|
+
schema: Schema = None):
|
|
311
|
+
"""Create an entry by loading in a file. If the_file starts with
|
|
312
|
+
http://, https://, or ftp:// then we will use those protocols to
|
|
313
|
+
attempt to open the file.
|
|
314
|
+
|
|
315
|
+
Setting convert_data_types to True will automatically convert
|
|
316
|
+
the data loaded from the file into the corresponding python type as
|
|
317
|
+
determined by loading the standard BMRB schema. This would mean that
|
|
318
|
+
all floats will be represented as decimal.Decimal objects, all integers
|
|
319
|
+
will be python int objects, strings and vars will remain strings, and
|
|
320
|
+
dates will become datetime.date objects. When printing str() is called
|
|
321
|
+
on all objects. Other that converting uppercase "E"s in scientific
|
|
322
|
+
notation floats to lowercase "e"s this should not cause any change in
|
|
323
|
+
the way re-printed NMR-STAR objects are displayed. Specify a custom
|
|
324
|
+
schema object to use using the schema parameter.
|
|
325
|
+
|
|
326
|
+
Setting raise_parse_warnings to True will result in the raising of a
|
|
327
|
+
ParsingError rather than logging a warning when non-valid (but
|
|
328
|
+
ignorable) issues are found. """
|
|
329
|
+
|
|
330
|
+
return cls(file_name=the_file,
|
|
331
|
+
convert_data_types=convert_data_types,
|
|
332
|
+
raise_parse_warnings=raise_parse_warnings,
|
|
333
|
+
schema=schema)
|
|
334
|
+
|
|
335
|
+
@classmethod
|
|
336
|
+
def from_json(cls, json_dict: Union[dict, str]):
|
|
337
|
+
"""Create an entry from JSON (serialized or unserialized JSON)."""
|
|
338
|
+
|
|
339
|
+
# If they provided a string, try to load it using JSON
|
|
340
|
+
if not isinstance(json_dict, dict):
|
|
341
|
+
try:
|
|
342
|
+
json_dict = json.loads(json_dict)
|
|
343
|
+
except (TypeError, ValueError):
|
|
344
|
+
raise ValueError("The JSON you provided was neither a Python dictionary nor a JSON string.")
|
|
345
|
+
|
|
346
|
+
# Make sure it has the correct keys
|
|
347
|
+
if "saveframes" not in json_dict:
|
|
348
|
+
raise ValueError("The JSON you provide must be a hash and must contain the key 'saveframes' - even if the "
|
|
349
|
+
"key points to 'None'.")
|
|
350
|
+
if "entry_id" not in json_dict and "bmrb_id" not in json_dict:
|
|
351
|
+
raise ValueError("The JSON you provide must be a hash and must contain the key 'entry_id' - even if the"
|
|
352
|
+
" key points to 'None'.")
|
|
353
|
+
# Until the migration is complete, 'bmrb_id' is a synonym for
|
|
354
|
+
# 'entry_id'
|
|
355
|
+
if 'entry_id' not in json_dict:
|
|
356
|
+
json_dict['entry_id'] = json_dict['bmrb_id']
|
|
357
|
+
|
|
358
|
+
# Create an entry from scratch and populate it
|
|
359
|
+
ret = Entry.from_scratch(json_dict['entry_id'])
|
|
360
|
+
ret._frame_list = [saveframe_mod.Saveframe.from_json(x) for x in json_dict['saveframes']]
|
|
361
|
+
ret.source = "from_json()"
|
|
362
|
+
|
|
363
|
+
# Return the new loop
|
|
364
|
+
return ret
|
|
365
|
+
|
|
366
|
+
@classmethod
|
|
367
|
+
def from_string(cls,
|
|
368
|
+
the_string: str,
|
|
369
|
+
convert_data_types: bool = False,
|
|
370
|
+
raise_parse_warnings: bool = False,
|
|
371
|
+
schema: Schema = None):
|
|
372
|
+
"""Create an entry by parsing a string.
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
Setting convert_data_types to True will automatically convert
|
|
376
|
+
the data loaded from the file into the corresponding python type as
|
|
377
|
+
determined by loading the standard BMRB schema. This would mean that
|
|
378
|
+
all floats will be represented as decimal.Decimal objects, all integers
|
|
379
|
+
will be python int objects, strings and vars will remain strings, and
|
|
380
|
+
dates will become datetime.date objects. When printing str() is called
|
|
381
|
+
on all objects. Other that converting uppercase "E"s in scientific
|
|
382
|
+
notation floats to lowercase "e"s this should not cause any change in
|
|
383
|
+
the way re-printed NMR-STAR objects are displayed. Specify a custom
|
|
384
|
+
schema object to use using the schema parameter.
|
|
385
|
+
|
|
386
|
+
Setting raise_parse_warnings to True will result in the raising of a
|
|
387
|
+
ParsingError rather than logging a warning when non-valid (but
|
|
388
|
+
ignorable) issues are found."""
|
|
389
|
+
|
|
390
|
+
return cls(the_string=the_string,
|
|
391
|
+
convert_data_types=convert_data_types,
|
|
392
|
+
raise_parse_warnings=raise_parse_warnings,
|
|
393
|
+
schema=schema)
|
|
394
|
+
|
|
395
|
+
@classmethod
|
|
396
|
+
def from_scratch(cls, entry_id: Union[str, int]):
|
|
397
|
+
"""Create an empty entry that you can programmatically add to.
|
|
398
|
+
You must pass a value corresponding to the Entry ID.
|
|
399
|
+
(The unique identifier "xxx" from "data_xxx".)"""
|
|
400
|
+
|
|
401
|
+
return cls(entry_id=entry_id)
|
|
402
|
+
|
|
403
|
+
@classmethod
|
|
404
|
+
def from_template(cls,
|
|
405
|
+
entry_id,
|
|
406
|
+
all_tags=False,
|
|
407
|
+
default_values=False,
|
|
408
|
+
schema=None) -> 'Entry':
|
|
409
|
+
""" Create an entry that has all of the saveframes and loops from the
|
|
410
|
+
schema present. No values will be assigned. Specify the entry
|
|
411
|
+
ID when calling this method.
|
|
412
|
+
|
|
413
|
+
The optional argument 'all_tags' forces all tags to be included
|
|
414
|
+
rather than just the mandatory tags.
|
|
415
|
+
|
|
416
|
+
The optional argument 'default_values' will insert the default
|
|
417
|
+
values from the schema.
|
|
418
|
+
|
|
419
|
+
The optional argument 'schema' allows providing a custom schema."""
|
|
420
|
+
|
|
421
|
+
schema = utils.get_schema(schema)
|
|
422
|
+
entry = cls(entry_id=entry_id, all_tags=all_tags, default_values=default_values, schema=schema)
|
|
423
|
+
entry.source = f"from_template({schema.version})"
|
|
424
|
+
return entry
|
|
425
|
+
|
|
426
|
+
def add_saveframe(self, frame) -> None:
|
|
427
|
+
"""Add a saveframe to the entry."""
|
|
428
|
+
|
|
429
|
+
if not isinstance(frame, saveframe_mod.Saveframe):
|
|
430
|
+
raise ValueError("You can only add instances of saveframes using this method. You attempted to add "
|
|
431
|
+
f"the object: '{repr(frame)}'.")
|
|
432
|
+
|
|
433
|
+
# Do not allow the addition of saveframes with the same name
|
|
434
|
+
# as a saveframe which already exists in the entry
|
|
435
|
+
if frame.name in self.frame_dict:
|
|
436
|
+
raise ValueError(f"Cannot add a saveframe with name '{frame.name}' since a saveframe with that "
|
|
437
|
+
f"name already exists in the entry.")
|
|
438
|
+
|
|
439
|
+
self._frame_list.append(frame)
|
|
440
|
+
|
|
441
|
+
def compare(self, other) -> List[str]:
|
|
442
|
+
"""Returns the differences between two entries as a list.
|
|
443
|
+
Non-equal entries will always be detected, but specific differences
|
|
444
|
+
detected depends on order of entries."""
|
|
445
|
+
|
|
446
|
+
diffs = []
|
|
447
|
+
if self is other:
|
|
448
|
+
return []
|
|
449
|
+
if isinstance(other, str):
|
|
450
|
+
if str(self) == other:
|
|
451
|
+
return []
|
|
452
|
+
else:
|
|
453
|
+
return ['String was not exactly equal to entry.']
|
|
454
|
+
elif not isinstance(other, Entry):
|
|
455
|
+
return ['Other object is not of class Entry.']
|
|
456
|
+
try:
|
|
457
|
+
if str(self.entry_id) != str(other.entry_id):
|
|
458
|
+
diffs.append(f"Entry ID does not match between entries: '{self.entry_id}' vs '{other.entry_id}'.")
|
|
459
|
+
if len(self._frame_list) != len(other.frame_list):
|
|
460
|
+
diffs.append(f"The number of saveframes in the entries are not equal: '{len(self._frame_list)}' vs "
|
|
461
|
+
f"'{len(other.frame_list)}'.")
|
|
462
|
+
for frame in self._frame_list:
|
|
463
|
+
other_frame_dict = other.frame_dict
|
|
464
|
+
if frame.name not in other_frame_dict:
|
|
465
|
+
diffs.append(f"No saveframe with name '{frame.name}' in other entry.")
|
|
466
|
+
else:
|
|
467
|
+
comp = frame.compare(other_frame_dict[frame.name])
|
|
468
|
+
if len(comp) > 0:
|
|
469
|
+
diffs.append(f"Saveframes do not match: '{frame.name}'.")
|
|
470
|
+
diffs.extend(comp)
|
|
471
|
+
|
|
472
|
+
except AttributeError as err:
|
|
473
|
+
diffs.append(f"An exception occurred while comparing: '{err}'.")
|
|
474
|
+
|
|
475
|
+
return diffs
|
|
476
|
+
|
|
477
|
+
def add_missing_tags(self, schema: Schema = None, all_tags: bool = False) -> None:
|
|
478
|
+
""" Automatically adds any missing tags (according to the schema)
|
|
479
|
+
to all saveframes and loops and sorts the tags. """
|
|
480
|
+
|
|
481
|
+
for saveframe in self._frame_list:
|
|
482
|
+
saveframe.add_missing_tags(schema=schema, all_tags=all_tags)
|
|
483
|
+
|
|
484
|
+
def delete_empty_saveframes(self) -> None:
|
|
485
|
+
""" Deprecated. Please use `py:meth:pynmrstar.Entry.remove_empty_saveframes`. """
|
|
486
|
+
|
|
487
|
+
warnings.warn('Deprecated. Please use remove_empty_saveframes() instead.', DeprecationWarning)
|
|
488
|
+
return self.remove_empty_saveframes()
|
|
489
|
+
|
|
490
|
+
def format(self, skip_empty_loops: bool = True, skip_empty_tags: bool = False, show_comments: bool = True) -> str:
|
|
491
|
+
""" The same as calling str(Entry), except that you can pass options
|
|
492
|
+
to customize how the entry is printed.
|
|
493
|
+
|
|
494
|
+
skip_empty_loops will omit printing loops with no tags at all. (A loop with null tags is not "empty".)
|
|
495
|
+
skip_empty_tags will omit tags in the saveframes and loops which have no non-null values.
|
|
496
|
+
show_comments will show the standard comments before a saveframe."""
|
|
497
|
+
|
|
498
|
+
return self.__str__(skip_empty_loops=skip_empty_loops, skip_empty_tags=skip_empty_tags,
|
|
499
|
+
show_comments=show_comments)
|
|
500
|
+
|
|
501
|
+
def get_json(self, serialize: bool = True) -> Union[dict, str]:
|
|
502
|
+
""" Returns the entry in JSON format. If serialize is set to
|
|
503
|
+
False a dictionary representation of the entry that is
|
|
504
|
+
serializeable is returned instead."""
|
|
505
|
+
|
|
506
|
+
frames = [x.get_json(serialize=False) for x in self._frame_list]
|
|
507
|
+
|
|
508
|
+
entry_dict = {
|
|
509
|
+
"entry_id": self.entry_id,
|
|
510
|
+
"saveframes": frames
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
if serialize:
|
|
514
|
+
return json.dumps(entry_dict, default=_json_serialize)
|
|
515
|
+
else:
|
|
516
|
+
return entry_dict
|
|
517
|
+
|
|
518
|
+
def get_loops_by_category(self, value: str) -> List['loop_mod.Loop']:
|
|
519
|
+
"""Allows fetching loops by category."""
|
|
520
|
+
|
|
521
|
+
value = utils.format_category(value).lower()
|
|
522
|
+
|
|
523
|
+
results = []
|
|
524
|
+
for frame in self._frame_list:
|
|
525
|
+
for one_loop in frame.loops:
|
|
526
|
+
if one_loop.category.lower() == value:
|
|
527
|
+
results.append(one_loop)
|
|
528
|
+
return results
|
|
529
|
+
|
|
530
|
+
def get_saveframe_by_name(self, saveframe_name: str) -> 'saveframe_mod.Saveframe':
|
|
531
|
+
"""Allows fetching a saveframe by name."""
|
|
532
|
+
|
|
533
|
+
frames = self.frame_dict
|
|
534
|
+
if saveframe_name in frames:
|
|
535
|
+
return frames[saveframe_name]
|
|
536
|
+
else:
|
|
537
|
+
raise KeyError(f"No saveframe with name '{saveframe_name}'")
|
|
538
|
+
|
|
539
|
+
def get_saveframes_by_category(self, value: str) -> List['saveframe_mod.Saveframe']:
|
|
540
|
+
"""Allows fetching saveframes by category."""
|
|
541
|
+
|
|
542
|
+
return self.get_saveframes_by_tag_and_value("sf_category", value)
|
|
543
|
+
|
|
544
|
+
def get_saveframes_by_tag_and_value(self, tag_name: str, value: Any) -> List['saveframe_mod.Saveframe']:
|
|
545
|
+
"""Allows fetching saveframe(s) by tag and tag value."""
|
|
546
|
+
|
|
547
|
+
ret_frames = []
|
|
548
|
+
|
|
549
|
+
for frame in self._frame_list:
|
|
550
|
+
results = frame.get_tag(tag_name)
|
|
551
|
+
if results != [] and results[0] == value:
|
|
552
|
+
ret_frames.append(frame)
|
|
553
|
+
|
|
554
|
+
return ret_frames
|
|
555
|
+
|
|
556
|
+
def get_tag(self, tag: str, whole_tag: bool = False) -> list:
|
|
557
|
+
""" Given a tag (E.g. _Assigned_chem_shift_list.Data_file_name)
|
|
558
|
+
return a list of all values for that tag. Specify whole_tag=True
|
|
559
|
+
and the [tag_name, tag_value] pair will be returned."""
|
|
560
|
+
|
|
561
|
+
if "." not in str(tag):
|
|
562
|
+
raise ValueError("You must provide the tag category to call this method at the entry level. For "
|
|
563
|
+
"example, you must provide 'Entry.Title' rather than 'Title' as the tag if calling"
|
|
564
|
+
" this at the Entry level. You can call Saveframe.get_tag('Title') without issue.")
|
|
565
|
+
|
|
566
|
+
results = []
|
|
567
|
+
for frame in self._frame_list:
|
|
568
|
+
results.extend(frame.get_tag(tag, whole_tag=whole_tag))
|
|
569
|
+
|
|
570
|
+
return results
|
|
571
|
+
|
|
572
|
+
def get_tags(self, tags: list) -> Dict[str, list]:
|
|
573
|
+
""" Given a list of tags, get all of the tags and return the
|
|
574
|
+
results in a dictionary."""
|
|
575
|
+
|
|
576
|
+
# All tags
|
|
577
|
+
if tags is None or not isinstance(tags, list):
|
|
578
|
+
raise ValueError("Please provide a list of tags.")
|
|
579
|
+
|
|
580
|
+
results = {}
|
|
581
|
+
for tag in tags:
|
|
582
|
+
results[tag] = self.get_tag(tag)
|
|
583
|
+
|
|
584
|
+
return results
|
|
585
|
+
|
|
586
|
+
def normalize(self, schema: Optional[Schema] = None) -> None:
|
|
587
|
+
""" Sorts saveframes, loops, and tags according to the schema
|
|
588
|
+
provided (or BMRB default if none provided).
|
|
589
|
+
|
|
590
|
+
Also re-assigns ID tag values and updates tag links to ID values."""
|
|
591
|
+
|
|
592
|
+
# Assign all the ID tags, and update all links to ID tags
|
|
593
|
+
my_schema = utils.get_schema(schema)
|
|
594
|
+
|
|
595
|
+
# Sort the saveframes according to ID, if an ID exists. Otherwise, still sort by category
|
|
596
|
+
ordering = my_schema.category_order
|
|
597
|
+
|
|
598
|
+
def sf_key(_: saveframe_mod.Saveframe) -> [int, Union[int, float]]:
|
|
599
|
+
""" Helper function to sort the saveframes.
|
|
600
|
+
Returns (category order, saveframe order) """
|
|
601
|
+
|
|
602
|
+
# If not a real category, generate an artificial but stable order > the real saveframes
|
|
603
|
+
try:
|
|
604
|
+
category_order = ordering.index(_.tag_prefix)
|
|
605
|
+
except (ValueError, KeyError):
|
|
606
|
+
if _.category is None:
|
|
607
|
+
category_order = float('infinity')
|
|
608
|
+
else:
|
|
609
|
+
category_order = len(ordering) + abs(int(hashlib.sha1(str(_.category).encode()).hexdigest(), 16))
|
|
610
|
+
|
|
611
|
+
# See if there is an ID tag, and it is a number
|
|
612
|
+
saveframe_id = float('infinity')
|
|
613
|
+
try:
|
|
614
|
+
saveframe_id = int(_.get_tag("ID")[0])
|
|
615
|
+
except (ValueError, KeyError, IndexError, TypeError):
|
|
616
|
+
# Either there is no ID, or it is not a number. By default it will sort at the end of saveframes of its
|
|
617
|
+
# category. Note that the entry_information ID tag has a different meaning, but since there should
|
|
618
|
+
# only ever be one saveframe of that category, the sort order for it can be any value.
|
|
619
|
+
pass
|
|
620
|
+
|
|
621
|
+
return category_order, saveframe_id
|
|
622
|
+
|
|
623
|
+
def loop_key(_) -> Union[int, float]:
|
|
624
|
+
""" Helper function to sort the loops."""
|
|
625
|
+
|
|
626
|
+
try:
|
|
627
|
+
return ordering.index(_.category)
|
|
628
|
+
except ValueError:
|
|
629
|
+
# Generate an arbitrary sort order for loops that aren't in the schema but make sure that they
|
|
630
|
+
# always come after loops in the schema
|
|
631
|
+
return len(ordering) + abs(int(hashlib.sha1(str(_.category).encode()).hexdigest(), 16))
|
|
632
|
+
|
|
633
|
+
# Go through all the saveframes
|
|
634
|
+
for each_frame in self._frame_list:
|
|
635
|
+
each_frame.sort_tags(schema=my_schema)
|
|
636
|
+
# Iterate through the loops
|
|
637
|
+
for each_loop in each_frame:
|
|
638
|
+
each_loop.sort_tags(schema=my_schema)
|
|
639
|
+
|
|
640
|
+
# See if we can sort the rows (in addition to tags)
|
|
641
|
+
try:
|
|
642
|
+
each_loop.sort_rows("Ordinal")
|
|
643
|
+
except ValueError:
|
|
644
|
+
pass
|
|
645
|
+
each_frame.loops.sort(key=loop_key)
|
|
646
|
+
self._frame_list.sort(key=sf_key)
|
|
647
|
+
|
|
648
|
+
# Calculate all the categories present
|
|
649
|
+
categories: set = set()
|
|
650
|
+
for each_frame in self._frame_list:
|
|
651
|
+
categories.add(each_frame.category)
|
|
652
|
+
|
|
653
|
+
# tag_prefix -> tag -> original value -> mapped value
|
|
654
|
+
mapping: dict = {}
|
|
655
|
+
|
|
656
|
+
# Reassign the ID tags first
|
|
657
|
+
for each_category in categories:
|
|
658
|
+
|
|
659
|
+
# First in the saveframe tags
|
|
660
|
+
id_counter: int = 1
|
|
661
|
+
for each_frame in self.get_saveframes_by_category(each_category):
|
|
662
|
+
for tag in each_frame.tags:
|
|
663
|
+
tag_schema = my_schema.schema.get(f"{each_frame.tag_prefix}.{tag[0]}".lower())
|
|
664
|
+
if not tag_schema:
|
|
665
|
+
continue
|
|
666
|
+
|
|
667
|
+
# Make sure the capitalization of the tag is correct
|
|
668
|
+
tag[0] = tag_schema['Tag field']
|
|
669
|
+
|
|
670
|
+
if tag_schema['lclSfIdFlg'] == 'Y':
|
|
671
|
+
# If it's an Entry_ID tag, set it that way
|
|
672
|
+
if tag_schema['entryIdFlg'] == 'Y':
|
|
673
|
+
mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{tag[1]}'] = self._entry_id
|
|
674
|
+
tag[1] = self._entry_id
|
|
675
|
+
# Must be an integer to avoid renumbering the chem_comp ID, for example
|
|
676
|
+
elif tag_schema['BMRB data type'] == "int":
|
|
677
|
+
prev_tag = tag[1]
|
|
678
|
+
if isinstance(tag[1], str):
|
|
679
|
+
tag[1] = str(id_counter)
|
|
680
|
+
mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{prev_tag}'] = str(id_counter)
|
|
681
|
+
else:
|
|
682
|
+
tag[1] = id_counter
|
|
683
|
+
mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{prev_tag}'] = id_counter
|
|
684
|
+
# We need to still store all the other tag values too
|
|
685
|
+
else:
|
|
686
|
+
mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{tag[1]}'] = tag[1]
|
|
687
|
+
else:
|
|
688
|
+
mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{tag[1]}'] = tag[1]
|
|
689
|
+
|
|
690
|
+
# Then in the loop
|
|
691
|
+
for loop in each_frame:
|
|
692
|
+
for x, tag in enumerate(loop.tags):
|
|
693
|
+
tag_schema = my_schema.schema.get(f"{loop.category}.{tag}".lower())
|
|
694
|
+
if not tag_schema:
|
|
695
|
+
continue
|
|
696
|
+
|
|
697
|
+
# Make sure the tags have the proper capitalization
|
|
698
|
+
loop.tags[x] = tag_schema['Tag field']
|
|
699
|
+
|
|
700
|
+
for row in loop.data:
|
|
701
|
+
# We don't re-map loop IDs, but we should still store them
|
|
702
|
+
mapping[f'{loop.category[1:]}.{tag}.{row[x]}'] = row[x]
|
|
703
|
+
|
|
704
|
+
if tag_schema['lclSfIdFlg'] == 'Y':
|
|
705
|
+
# If it's an Entry_ID tag, set it that way
|
|
706
|
+
if tag_schema['entryIdFlg'] == 'Y':
|
|
707
|
+
row[x] = self._entry_id
|
|
708
|
+
# Must be an integer to avoid renumbering the chem_comp ID, for example
|
|
709
|
+
elif tag_schema['BMRB data type'] == "int":
|
|
710
|
+
if row[x] in definitions.NULL_VALUES:
|
|
711
|
+
if isinstance(row[x], str):
|
|
712
|
+
row[x] = str(id_counter)
|
|
713
|
+
else:
|
|
714
|
+
row[x] = id_counter
|
|
715
|
+
# Handle chem_comp and it's ilk
|
|
716
|
+
else:
|
|
717
|
+
parent_id_tag = f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']}"
|
|
718
|
+
parent_id_value = each_frame.get_tag(parent_id_tag)[0]
|
|
719
|
+
if isinstance(row[x], str):
|
|
720
|
+
row[x] = str(parent_id_value)
|
|
721
|
+
else:
|
|
722
|
+
row[x] = parent_id_value
|
|
723
|
+
id_counter += 1
|
|
724
|
+
|
|
725
|
+
# Now fix any other references
|
|
726
|
+
for saveframe in self:
|
|
727
|
+
for tag in saveframe.tags:
|
|
728
|
+
tag_schema = my_schema.schema.get(f"{saveframe.tag_prefix}.{tag[0]}".lower())
|
|
729
|
+
if not tag_schema:
|
|
730
|
+
continue
|
|
731
|
+
if tag_schema['Foreign Table'] and tag_schema['Sf pointer'] != 'Y':
|
|
732
|
+
|
|
733
|
+
if tag[1] in definitions.NULL_VALUES:
|
|
734
|
+
if tag_schema['Nullable']:
|
|
735
|
+
continue
|
|
736
|
+
else:
|
|
737
|
+
logger.warning("A foreign key tag that is not nullable was set to "
|
|
738
|
+
f"a null value. Tag: {saveframe.tag_prefix}.{tag[1]} Primary key: "
|
|
739
|
+
f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']} "
|
|
740
|
+
f"Value: {tag[1]}")
|
|
741
|
+
|
|
742
|
+
try:
|
|
743
|
+
tag[1] = mapping[f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']}.{tag[1]}"]
|
|
744
|
+
except KeyError:
|
|
745
|
+
logger.warning(f'The tag {saveframe.tag_prefix}.{tag[0]} has value {tag[1]} '
|
|
746
|
+
f'but there is no valid primary key.')
|
|
747
|
+
|
|
748
|
+
# Now apply the remapping to loops...
|
|
749
|
+
for loop in saveframe:
|
|
750
|
+
for x, tag in enumerate(loop.tags):
|
|
751
|
+
tag_schema = my_schema.schema.get(f"{loop.category}.{tag}".lower())
|
|
752
|
+
if not tag_schema:
|
|
753
|
+
continue
|
|
754
|
+
if tag_schema['Foreign Table'] and tag_schema['Sf pointer'] != 'Y':
|
|
755
|
+
for row in loop.data:
|
|
756
|
+
if row[x] in definitions.NULL_VALUES:
|
|
757
|
+
if tag_schema['Nullable']:
|
|
758
|
+
continue
|
|
759
|
+
else:
|
|
760
|
+
logger.warning("A foreign key reference tag that is not nullable was set to "
|
|
761
|
+
f"a null value. Tag: {loop.category}.{tag} Foreign key: "
|
|
762
|
+
f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']} "
|
|
763
|
+
f"Value: {row[x]}")
|
|
764
|
+
try:
|
|
765
|
+
row[x] = mapping[
|
|
766
|
+
f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']}.{row[x]}"]
|
|
767
|
+
except KeyError:
|
|
768
|
+
if (loop.category == '_Atom_chem_shift' or loop.category == '_Entity_comp_index') and \
|
|
769
|
+
(tag == 'Atom_ID' or tag == 'Comp_ID'):
|
|
770
|
+
continue
|
|
771
|
+
logger.warning(f'The tag {loop.category}.{tag} has value {row[x]} '
|
|
772
|
+
f'but there is no valid primary key '
|
|
773
|
+
f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']} "
|
|
774
|
+
f"with the tag value.")
|
|
775
|
+
|
|
776
|
+
# If there is both a label tag and an ID tag, do the reassignment
|
|
777
|
+
|
|
778
|
+
# We found a framecode reference
|
|
779
|
+
if tag_schema['Foreign Table'] and tag_schema['Foreign Column'] == 'Sf_framecode':
|
|
780
|
+
|
|
781
|
+
# Check if there is a tag pointing to the 'ID' tag
|
|
782
|
+
for conditional_tag in loop.tags:
|
|
783
|
+
conditional_tag_schema = my_schema.schema.get(f"{loop.category}.{conditional_tag}".lower())
|
|
784
|
+
if not conditional_tag_schema:
|
|
785
|
+
continue
|
|
786
|
+
if conditional_tag_schema['Foreign Table'] == tag_schema['Foreign Table'] and \
|
|
787
|
+
conditional_tag_schema['Foreign Column'] == 'ID' and \
|
|
788
|
+
conditional_tag_schema['entryIdFlg'] != 'Y':
|
|
789
|
+
# We found the matching tag
|
|
790
|
+
tag_pos = loop.tag_index(conditional_tag)
|
|
791
|
+
|
|
792
|
+
for row in loop.data:
|
|
793
|
+
# Check if the tag is null
|
|
794
|
+
if row[x] in definitions.NULL_VALUES:
|
|
795
|
+
if tag_schema['Nullable']:
|
|
796
|
+
continue
|
|
797
|
+
else:
|
|
798
|
+
logger.warning(f"A foreign saveframe reference tag that is not nullable was"
|
|
799
|
+
f" set to a null value. Tag: {loop.category}.{tag} "
|
|
800
|
+
f"Foreign saveframe: {tag_schema['Foreign Table']}"
|
|
801
|
+
f".{tag_schema['Foreign Column']}")
|
|
802
|
+
continue
|
|
803
|
+
try:
|
|
804
|
+
row[tag_pos] = self.get_saveframe_by_name(row[x][1:]).get_tag('ID')[0]
|
|
805
|
+
except KeyError:
|
|
806
|
+
logger.warning(f"Missing frame of type {tag} pointed to by {conditional_tag}")
|
|
807
|
+
|
|
808
|
+
# Renumber the 'ID' column in a loop
|
|
809
|
+
for each_frame in self._frame_list:
|
|
810
|
+
for loop in each_frame.loops:
|
|
811
|
+
if loop.tag_index('ID') is not None and loop.category != '_Experiment':
|
|
812
|
+
loop.renumber_rows('ID')
|
|
813
|
+
|
|
814
|
+
def print_tree(self) -> None:
|
|
815
|
+
"""Prints a summary, tree style, of the frames and loops in
|
|
816
|
+
the entry."""
|
|
817
|
+
|
|
818
|
+
print(repr(self))
|
|
819
|
+
frame: saveframe_mod.Saveframe
|
|
820
|
+
for pos, frame in enumerate(self):
|
|
821
|
+
print(f"\t[{pos}] {repr(frame)}")
|
|
822
|
+
for pos2, one_loop in enumerate(frame):
|
|
823
|
+
print(f"\t\t[{pos2}] {repr(one_loop)}")
|
|
824
|
+
|
|
825
|
+
def remove_empty_saveframes(self) -> None:
|
|
826
|
+
""" This method will remove all empty saveframes in an entry
|
|
827
|
+
(the loops in the saveframe must also be empty for the saveframe
|
|
828
|
+
to be deleted). "Empty" means no values in tags, not no tags present."""
|
|
829
|
+
|
|
830
|
+
self._frame_list = [_ for _ in self._frame_list if not _.empty]
|
|
831
|
+
|
|
832
|
+
def remove_saveframe(self, item: Union[str, List[str], Tuple[str], 'saveframe_mod.Saveframe',
|
|
833
|
+
List['saveframe_mod.Saveframe'], Tuple['saveframe_mod.Saveframe']]) -> None:
|
|
834
|
+
""" Removes one or more saveframes from the entry. You can remove saveframes either by passing the saveframe
|
|
835
|
+
object itself, the saveframe name (as a string), or a list or tuple of either."""
|
|
836
|
+
|
|
837
|
+
parsed_list: list
|
|
838
|
+
if isinstance(item, tuple):
|
|
839
|
+
parsed_list = list(item)
|
|
840
|
+
elif isinstance(item, list):
|
|
841
|
+
parsed_list = item
|
|
842
|
+
elif isinstance(item, (str, saveframe_mod.Saveframe)):
|
|
843
|
+
parsed_list = [item]
|
|
844
|
+
else:
|
|
845
|
+
raise ValueError('The item you provided was not one or more saveframe objects or saveframe names (strings).'
|
|
846
|
+
f' Item type: {type(item)}')
|
|
847
|
+
|
|
848
|
+
frames_to_remove = []
|
|
849
|
+
for saveframe in parsed_list:
|
|
850
|
+
if isinstance(saveframe, str):
|
|
851
|
+
try:
|
|
852
|
+
frames_to_remove.append(self.frame_dict[saveframe])
|
|
853
|
+
except KeyError:
|
|
854
|
+
raise ValueError('At least one saveframe specified to remove was not found in this saveframe. '
|
|
855
|
+
f'First missing saveframe: {saveframe}')
|
|
856
|
+
elif isinstance(saveframe, saveframe_mod.Saveframe):
|
|
857
|
+
if saveframe not in self._frame_list:
|
|
858
|
+
raise ValueError('At least one loop specified to remove was not found in this saveframe. First '
|
|
859
|
+
f'missing loop: {saveframe}')
|
|
860
|
+
frames_to_remove.append(saveframe)
|
|
861
|
+
else:
|
|
862
|
+
raise ValueError('One of the items you provided was not a saveframe object or saveframe name '
|
|
863
|
+
f'(string). Item: {repr(saveframe)}')
|
|
864
|
+
|
|
865
|
+
self._frame_list = [_ for _ in self._frame_list if _ not in frames_to_remove]
|
|
866
|
+
|
|
867
|
+
def rename_saveframe(self, original_name: str, new_name: str) -> None:
|
|
868
|
+
""" Renames a saveframe and updates all pointers to that
|
|
869
|
+
saveframe in the entry with the new name."""
|
|
870
|
+
|
|
871
|
+
# Strip off the starting $ in the names
|
|
872
|
+
if original_name.startswith("$"):
|
|
873
|
+
original_name = original_name[1:]
|
|
874
|
+
if new_name.startswith("$"):
|
|
875
|
+
new_name = new_name[1:]
|
|
876
|
+
|
|
877
|
+
# Make sure there is no saveframe called what the new name is
|
|
878
|
+
if [x.name for x in self._frame_list].count(new_name) > 0:
|
|
879
|
+
raise ValueError(f"Cannot rename the saveframe '{original_name}' as '{new_name}' because a "
|
|
880
|
+
f"saveframe with that name already exists in the entry.")
|
|
881
|
+
|
|
882
|
+
# This can raise a ValueError, but no point catching it since it really is a ValueError if they provide a name
|
|
883
|
+
# of a saveframe that doesn't exist in the entry.
|
|
884
|
+
change_frame = self.get_saveframe_by_name(original_name)
|
|
885
|
+
|
|
886
|
+
# Update the saveframe
|
|
887
|
+
change_frame.name = new_name
|
|
888
|
+
|
|
889
|
+
# What the new references should look like
|
|
890
|
+
old_reference = "$" + original_name
|
|
891
|
+
new_reference = "$" + new_name
|
|
892
|
+
|
|
893
|
+
# Go through all the saveframes
|
|
894
|
+
for each_frame in self:
|
|
895
|
+
# Iterate through the tags
|
|
896
|
+
for each_tag in each_frame.tags:
|
|
897
|
+
if each_tag[1] == old_reference:
|
|
898
|
+
each_tag[1] = new_reference
|
|
899
|
+
# Iterate through the loops
|
|
900
|
+
for each_loop in each_frame:
|
|
901
|
+
for each_row in each_loop:
|
|
902
|
+
for pos, val in enumerate(each_row):
|
|
903
|
+
if val == old_reference:
|
|
904
|
+
each_row[pos] = new_reference
|
|
905
|
+
|
|
906
|
+
def validate(self, validate_schema: bool = True, schema: Schema = None,
|
|
907
|
+
validate_star: bool = True) -> List[str]:
|
|
908
|
+
"""Validate an entry in a variety of ways. Returns a list of
|
|
909
|
+
errors found. 0-length list indicates no errors found. By
|
|
910
|
+
default all validation modes are enabled.
|
|
911
|
+
|
|
912
|
+
validate_schema - Determines if the entry is validated against
|
|
913
|
+
the NMR-STAR schema. You can pass your own custom schema if desired,
|
|
914
|
+
otherwise the cached schema will be used.
|
|
915
|
+
|
|
916
|
+
validate_star - Determines if the STAR syntax checks are ran."""
|
|
917
|
+
|
|
918
|
+
errors = []
|
|
919
|
+
|
|
920
|
+
# They should validate for something...
|
|
921
|
+
if not validate_star and not validate_schema:
|
|
922
|
+
errors.append("Validate() should be called with at least one validation method enabled.")
|
|
923
|
+
|
|
924
|
+
if validate_star:
|
|
925
|
+
|
|
926
|
+
# Check for saveframes with same name
|
|
927
|
+
saveframe_names = sorted(x.name for x in self)
|
|
928
|
+
for ordinal in range(0, len(saveframe_names) - 2):
|
|
929
|
+
if saveframe_names[ordinal] == saveframe_names[ordinal + 1]:
|
|
930
|
+
errors.append(f"Multiple saveframes with same name: '{saveframe_names[ordinal]}'")
|
|
931
|
+
|
|
932
|
+
# Check for dangling references
|
|
933
|
+
fdict = self.frame_dict
|
|
934
|
+
|
|
935
|
+
for each_frame in self:
|
|
936
|
+
# Iterate through the tags
|
|
937
|
+
for each_tag in each_frame.tags:
|
|
938
|
+
tag_copy = str(each_tag[1])
|
|
939
|
+
if (tag_copy.startswith("$")
|
|
940
|
+
and tag_copy[1:] not in fdict):
|
|
941
|
+
errors.append(f"Dangling saveframe reference '{each_tag[1]}' in "
|
|
942
|
+
f"tag '{each_frame.tag_prefix}.{each_tag[0]}'")
|
|
943
|
+
|
|
944
|
+
# Iterate through the loops
|
|
945
|
+
for each_loop in each_frame:
|
|
946
|
+
for each_row in each_loop:
|
|
947
|
+
for pos, val in enumerate(each_row):
|
|
948
|
+
val = str(val)
|
|
949
|
+
if val.startswith("$") and val[1:] not in fdict:
|
|
950
|
+
errors.append(f"Dangling saveframe reference '{val}' in tag "
|
|
951
|
+
f"'{each_loop.category}.{each_loop.tags[pos]}'")
|
|
952
|
+
|
|
953
|
+
# Ask the saveframes to check themselves for errors
|
|
954
|
+
for frame in self:
|
|
955
|
+
errors.extend(frame.validate(validate_schema=validate_schema, schema=schema, validate_star=validate_star))
|
|
956
|
+
|
|
957
|
+
return errors
|
|
958
|
+
|
|
959
|
+
def write_to_file(self, file_name: str, format_: str = "nmrstar", show_comments: bool = True,
|
|
960
|
+
skip_empty_loops: bool = False, skip_empty_tags: bool = False) -> None:
|
|
961
|
+
""" Writes the entry to the specified file in NMR-STAR format.
|
|
962
|
+
|
|
963
|
+
Optionally specify:
|
|
964
|
+
show_comments=False to disable the comments that are by default inserted. Ignored when writing json.
|
|
965
|
+
skip_empty_loops=False to force printing loops with no tags at all (loops with null tags are still printed)
|
|
966
|
+
skip_empty_tags=True will omit tags in the saveframes and loops which have no non-null values.
|
|
967
|
+
format_=json to write to the file in JSON format."""
|
|
968
|
+
|
|
969
|
+
write_to_file(self, file_name=file_name, format_=format_, show_comments=show_comments,
|
|
970
|
+
skip_empty_loops=skip_empty_loops, skip_empty_tags=skip_empty_tags)
|