pynmrstar 3.3.6__pp311-pypy311_pp73-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pynmrstar might be problematic. Click here for more details.

pynmrstar/entry.py ADDED
@@ -0,0 +1,972 @@
1
+ import hashlib
2
+ import json
3
+ import logging
4
+ import warnings
5
+ from io import StringIO
6
+ from pathlib import Path
7
+ from typing import TextIO, BinaryIO, Union, List, Optional, Dict, Any, Tuple
8
+
9
+ from pynmrstar import definitions, utils, loop as loop_mod, parser as parser_mod, saveframe as saveframe_mod
10
+ from pynmrstar._internal import _json_serialize, _interpret_file, _get_entry_from_database, write_to_file
11
+ from pynmrstar.exceptions import InvalidStateError
12
+ from pynmrstar.schema import Schema
13
+
14
+ logger = logging.getLogger('pynmrstar')
15
+
16
+
17
+ class Entry(object):
18
+ """An object oriented representation of a BMRB entry. You can initialize this
19
+ object several ways; (e.g. from a file, from the official database,
20
+ from scratch) see the class methods below. """
21
+
22
+ def __contains__(self, item: Any):
23
+ """ Check if the given item is present in the entry. """
24
+
25
+ # Prepare for processing
26
+ if isinstance(item, (list, tuple)):
27
+ to_process: List[Union[str, saveframe_mod.Saveframe, loop_mod.Loop]] = list(item)
28
+ elif isinstance(item, (loop_mod.Loop, saveframe_mod.Saveframe, str)):
29
+ to_process = [item]
30
+ else:
31
+ return False
32
+
33
+ for item in to_process:
34
+ if isinstance(item, saveframe_mod.Saveframe):
35
+ if item not in self._frame_list:
36
+ return False
37
+ elif isinstance(item, (loop_mod.Loop, str)):
38
+ found = False
39
+ for saveframe in self._frame_list:
40
+ if item in saveframe:
41
+ found = True
42
+ break
43
+ if not found:
44
+ return False
45
+ else:
46
+ return False
47
+ return True
48
+
49
+ def __delitem__(self, item: Union['saveframe_mod.Saveframe', int, str]) -> None:
50
+ """Remove the indicated saveframe."""
51
+
52
+ if isinstance(item, int):
53
+ try:
54
+ del self._frame_list[item]
55
+ except IndexError:
56
+ raise IndexError(f'Index out of range: no saveframe at index: {item}')
57
+ else:
58
+ self.remove_saveframe(item)
59
+
60
+ def __eq__(self, other) -> bool:
61
+ """Returns True if this entry is equal to another entry, false
62
+ if it is not equal."""
63
+
64
+ if not isinstance(other, Entry):
65
+ return False
66
+
67
+ return (self.entry_id, self._frame_list) == (other.entry_id, other._frame_list)
68
+
69
+ def __getitem__(self, item: Union[int, str]) -> 'saveframe_mod.Saveframe':
70
+ """Get the indicated saveframe."""
71
+
72
+ try:
73
+ return self._frame_list[item]
74
+ except TypeError:
75
+ return self.get_saveframe_by_name(item)
76
+
77
+ def __init__(self, **kwargs) -> None:
78
+ """ You should not directly instantiate an Entry using this method.
79
+ Instead use the class methods:
80
+
81
+ :py:meth:`Entry.from_database`, :py:meth:`Entry.from_file`,
82
+ :py:meth:`Entry.from_string`, :py:meth:`Entry.from_scratch`,
83
+ :py:meth:`Entry.from_json`, and :py:meth:`Entry.from_template`"""
84
+
85
+ # Default initializations
86
+ self._entry_id: Union[str, int] = 0
87
+ self._frame_list: List[saveframe_mod.Saveframe] = []
88
+ self.source: Optional[str] = None
89
+
90
+ # They initialized us wrong
91
+ if len(kwargs) == 0:
92
+ raise ValueError("You should not directly instantiate an Entry using this method. Instead use the "
93
+ "class methods: Entry.from_database(), Entry.from_file(), Entry.from_string(), "
94
+ "Entry.from_scratch(), and Entry.from_json().")
95
+
96
+ if 'the_string' in kwargs:
97
+ # Parse from a string by wrapping it in StringIO
98
+ star_buffer: StringIO = StringIO(kwargs['the_string'])
99
+ self.source = "from_string()"
100
+ elif 'file_name' in kwargs:
101
+ star_buffer = _interpret_file(kwargs['file_name'])
102
+ self.source = f"from_file('{kwargs['file_name']}')"
103
+ # Creating from template (schema)
104
+ elif 'all_tags' in kwargs:
105
+ self._entry_id = kwargs['entry_id']
106
+
107
+ saveframe_categories: dict = {}
108
+ schema = utils.get_schema(kwargs['schema'])
109
+ schema_obj = schema.schema
110
+ for tag in [schema_obj[x.lower()] for x in schema.schema_order]:
111
+ category = tag['SFCategory']
112
+ if category not in saveframe_categories:
113
+ saveframe_categories[category] = True
114
+ templated_saveframe = saveframe_mod.Saveframe.from_template(category, category + "_1",
115
+ entry_id=self._entry_id,
116
+ all_tags=kwargs['all_tags'],
117
+ default_values=kwargs['default_values'],
118
+ schema=schema)
119
+ self._frame_list.append(templated_saveframe)
120
+ entry_saveframe = self.get_saveframes_by_category('entry_information')[0]
121
+ entry_saveframe['NMR_STAR_version'] = schema.version
122
+ entry_saveframe['Original_NMR_STAR_version'] = schema.version
123
+ return
124
+ else:
125
+ # Initialize a blank entry
126
+ self._entry_id = kwargs['entry_id']
127
+ self.source = "from_scratch()"
128
+ return
129
+
130
+ # Load the BMRB entry from the file
131
+ parser: parser_mod.Parser = parser_mod.Parser(entry_to_parse_into=self)
132
+ parser.parse(star_buffer.read(), source=self.source, convert_data_types=kwargs.get('convert_data_types', False),
133
+ raise_parse_warnings=kwargs.get('raise_parse_warnings', False))
134
+
135
+ def __iter__(self) -> saveframe_mod.Saveframe:
136
+ """ Yields each of the saveframes contained within the entry. """
137
+
138
+ for saveframe in self._frame_list:
139
+ yield saveframe
140
+
141
+ def __len__(self) -> int:
142
+ """ Returns the number of saveframes in the entry."""
143
+
144
+ return len(self._frame_list)
145
+
146
+ def __repr__(self) -> str:
147
+ """Returns a description of the entry."""
148
+
149
+ return f"<pynmrstar.Entry '{self._entry_id}' {self.source}>"
150
+
151
+ def __setitem__(self, key: Union[int, str], item: 'saveframe_mod.Saveframe') -> None:
152
+ """Set the indicated saveframe."""
153
+
154
+ # It is a saveframe
155
+ if isinstance(item, saveframe_mod.Saveframe):
156
+ # Add by ordinal
157
+ if isinstance(key, int):
158
+ self._frame_list[key] = item
159
+
160
+ # TODO: Consider stripping this behavior out - it isn't clear it is useful
161
+ else:
162
+ # Add by key
163
+ contains_frame: bool = False
164
+ for pos, frame in enumerate(self._frame_list):
165
+ if frame.name == key:
166
+ if contains_frame:
167
+ raise ValueError(f"Cannot replace the saveframe with the name '{frame.name} "
168
+ f"because multiple saveframes in the entry have the same name. "
169
+ f'This library does not allow that normally, as it is '
170
+ f'invalid NMR-STAR. Did you manually edit the Entry.frame_list '
171
+ f'object? Please use the Entry.add_saveframe() method instead to '
172
+ f'add new saveframes.')
173
+ self._frame_list[pos] = item
174
+ contains_frame = True
175
+
176
+ if not contains_frame:
177
+ raise ValueError(f"Saveframe with name '{key}' does not exist and therefore cannot be "
178
+ f"written to. Use the add_saveframe() method to add new saveframes.")
179
+ else:
180
+ raise ValueError("You can only assign a saveframe to an entry splice. You attempted to assign: "
181
+ f"'{repr(item)}'")
182
+
183
+ def __str__(self, skip_empty_loops: bool = False, skip_empty_tags: bool = False, show_comments: bool = True) -> str:
184
+ """Returns the entire entry in STAR format as a string."""
185
+
186
+ sf_strings = []
187
+ seen_saveframes = {}
188
+ for saveframe_obj in self:
189
+ if saveframe_obj.category in seen_saveframes:
190
+ sf_strings.append(saveframe_obj.format(skip_empty_loops=skip_empty_loops,
191
+ skip_empty_tags=skip_empty_tags, show_comments=False))
192
+ else:
193
+ sf_strings.append(saveframe_obj.format(skip_empty_loops=skip_empty_loops,
194
+ skip_empty_tags=skip_empty_tags, show_comments=show_comments))
195
+ seen_saveframes[saveframe_obj.category] = True
196
+
197
+ return f"data_{self.entry_id}\n\n" + "\n".join(sf_strings)
198
+
199
+ @property
200
+ def category_list(self) -> List[str]:
201
+ """ Returns a list of the unique categories present in the entry. """
202
+
203
+ category_list = []
204
+ for saveframe in self._frame_list:
205
+ category = saveframe.category
206
+ if category and category not in category_list:
207
+ category_list.append(category)
208
+ return list(category_list)
209
+
210
+ @property
211
+ def empty(self) -> bool:
212
+ """ Check if the entry has no data. Ignore the structural tags."""
213
+
214
+ for saveframe in self._frame_list:
215
+ if not saveframe.empty:
216
+ return False
217
+
218
+ return True
219
+
220
+ @property
221
+ def entry_id(self) -> Union[str, int]:
222
+ """ When read, fetches the entry ID.
223
+
224
+ When set, updates the entry ID for the Entry, and updates all the tags which
225
+ are foreign keys of the Entry_ID. (For example, Entry.ID and
226
+ Citation.Entry_ID will be updated, if present.)
227
+ """
228
+ return self._entry_id
229
+
230
+ @entry_id.setter
231
+ def entry_id(self, value: Union[str, int]) -> None:
232
+ self._entry_id = value
233
+
234
+ schema = utils.get_schema()
235
+ for saveframe in self._frame_list:
236
+ for tag in saveframe.tags:
237
+ fqtn = (saveframe.tag_prefix + "." + tag[0]).lower()
238
+
239
+ try:
240
+ if schema.schema[fqtn]['entryIdFlg'] == 'Y':
241
+ tag[1] = self._entry_id
242
+ except KeyError:
243
+ pass
244
+
245
+ for loop in saveframe.loops:
246
+ for tag in loop.tags:
247
+ fqtn = (loop.category + "." + tag).lower()
248
+ try:
249
+ if schema.schema[fqtn]['entryIdFlg'] == 'Y':
250
+ loop[tag] = [self._entry_id] * len(loop[tag])
251
+ except KeyError:
252
+ pass
253
+
254
+ @property
255
+ def frame_dict(self) -> Dict[str, 'saveframe_mod.Saveframe']:
256
+ """Returns a dictionary of saveframe name -> saveframe object mappings."""
257
+
258
+ fast_dict = dict((frame.name, frame) for frame in self._frame_list)
259
+
260
+ # If there are no duplicates then continue
261
+ if len(fast_dict) == len(self._frame_list):
262
+ return fast_dict
263
+
264
+ # Figure out where the duplicate is
265
+ frame_dict = {}
266
+
267
+ for frame in self._frame_list:
268
+ if frame.name in frame_dict:
269
+ raise InvalidStateError("The entry has multiple saveframes with the same name. That is not allowed in "
270
+ "the NMR-STAR format. Please remove or rename one. Duplicate name: "
271
+ f"'{frame.name}'. Furthermore, please use Entry.add_saveframe() and "
272
+ f"Entry.remove_saveframe() rather than manually editing the Entry.frame_list "
273
+ f"list, which will prevent this state from existing in the future.")
274
+ frame_dict[frame.name] = frame
275
+
276
+ return frame_dict
277
+
278
+ @property
279
+ def frame_list(self) -> List['saveframe_mod.Saveframe']:
280
+ return self._frame_list
281
+
282
+ @classmethod
283
+ def from_database(cls,
284
+ entry_num: Union[str, int],
285
+ convert_data_types: bool = False,
286
+ schema: Schema = None):
287
+ """Create an entry corresponding to the most up to date entry on
288
+ the public BMRB server. (Requires ability to initiate outbound
289
+ HTTP connections.)
290
+
291
+ Setting convert_data_types to True will automatically convert
292
+ the data loaded from the file into the corresponding python type as
293
+ determined by loading the standard BMRB schema. This would mean that
294
+ all floats will be represented as decimal.Decimal objects, all integers
295
+ will be python int objects, strings and vars will remain strings, and
296
+ dates will become datetime.date objects. When printing str() is called
297
+ on all objects. Other that converting uppercase "E"s in scientific
298
+ notation floats to lowercase "e"s this should not cause any change in
299
+ the way re-printed NMR-STAR objects are displayed. Specify a custom
300
+ schema object to use using the schema parameter."""
301
+
302
+ return _get_entry_from_database(entry_num,
303
+ convert_data_types=convert_data_types,
304
+ schema=schema)
305
+
306
+ @classmethod
307
+ def from_file(cls,
308
+ the_file: Union[str, Path, TextIO, BinaryIO],
309
+ convert_data_types: bool = False,
310
+ raise_parse_warnings: bool = False,
311
+ schema: Schema = None):
312
+ """Create an entry by loading in a file. If the_file starts with
313
+ http://, https://, or ftp:// then we will use those protocols to
314
+ attempt to open the file. the_file can be a string path, pathlib.Path
315
+ object, or an open file handle.
316
+
317
+ Setting convert_data_types to True will automatically convert
318
+ the data loaded from the file into the corresponding python type as
319
+ determined by loading the standard BMRB schema. This would mean that
320
+ all floats will be represented as decimal.Decimal objects, all integers
321
+ will be python int objects, strings and vars will remain strings, and
322
+ dates will become datetime.date objects. When printing str() is called
323
+ on all objects. Other that converting uppercase "E"s in scientific
324
+ notation floats to lowercase "e"s this should not cause any change in
325
+ the way re-printed NMR-STAR objects are displayed. Specify a custom
326
+ schema object to use using the schema parameter.
327
+
328
+ Setting raise_parse_warnings to True will result in the raising of a
329
+ ParsingError rather than logging a warning when non-valid (but
330
+ ignorable) issues are found. """
331
+
332
+ return cls(file_name=the_file,
333
+ convert_data_types=convert_data_types,
334
+ raise_parse_warnings=raise_parse_warnings,
335
+ schema=schema)
336
+
337
+ @classmethod
338
+ def from_json(cls, json_dict: Union[dict, str]):
339
+ """Create an entry from JSON (serialized or unserialized JSON)."""
340
+
341
+ # If they provided a string, try to load it using JSON
342
+ if not isinstance(json_dict, dict):
343
+ try:
344
+ json_dict = json.loads(json_dict)
345
+ except (TypeError, ValueError):
346
+ raise ValueError("The JSON you provided was neither a Python dictionary nor a JSON string.")
347
+
348
+ # Make sure it has the correct keys
349
+ if "saveframes" not in json_dict:
350
+ raise ValueError("The JSON you provide must be a hash and must contain the key 'saveframes' - even if the "
351
+ "key points to 'None'.")
352
+ if "entry_id" not in json_dict and "bmrb_id" not in json_dict:
353
+ raise ValueError("The JSON you provide must be a hash and must contain the key 'entry_id' - even if the"
354
+ " key points to 'None'.")
355
+ # Until the migration is complete, 'bmrb_id' is a synonym for
356
+ # 'entry_id'
357
+ if 'entry_id' not in json_dict:
358
+ json_dict['entry_id'] = json_dict['bmrb_id']
359
+
360
+ # Create an entry from scratch and populate it
361
+ ret = Entry.from_scratch(json_dict['entry_id'])
362
+ ret._frame_list = [saveframe_mod.Saveframe.from_json(x) for x in json_dict['saveframes']]
363
+ ret.source = "from_json()"
364
+
365
+ # Return the new loop
366
+ return ret
367
+
368
+ @classmethod
369
+ def from_string(cls,
370
+ the_string: str,
371
+ convert_data_types: bool = False,
372
+ raise_parse_warnings: bool = False,
373
+ schema: Schema = None):
374
+ """Create an entry by parsing a string.
375
+
376
+
377
+ Setting convert_data_types to True will automatically convert
378
+ the data loaded from the file into the corresponding python type as
379
+ determined by loading the standard BMRB schema. This would mean that
380
+ all floats will be represented as decimal.Decimal objects, all integers
381
+ will be python int objects, strings and vars will remain strings, and
382
+ dates will become datetime.date objects. When printing str() is called
383
+ on all objects. Other that converting uppercase "E"s in scientific
384
+ notation floats to lowercase "e"s this should not cause any change in
385
+ the way re-printed NMR-STAR objects are displayed. Specify a custom
386
+ schema object to use using the schema parameter.
387
+
388
+ Setting raise_parse_warnings to True will result in the raising of a
389
+ ParsingError rather than logging a warning when non-valid (but
390
+ ignorable) issues are found."""
391
+
392
+ return cls(the_string=the_string,
393
+ convert_data_types=convert_data_types,
394
+ raise_parse_warnings=raise_parse_warnings,
395
+ schema=schema)
396
+
397
+ @classmethod
398
+ def from_scratch(cls, entry_id: Union[str, int]):
399
+ """Create an empty entry that you can programmatically add to.
400
+ You must pass a value corresponding to the Entry ID.
401
+ (The unique identifier "xxx" from "data_xxx".)"""
402
+
403
+ return cls(entry_id=entry_id)
404
+
405
+ @classmethod
406
+ def from_template(cls,
407
+ entry_id,
408
+ all_tags=False,
409
+ default_values=False,
410
+ schema=None) -> 'Entry':
411
+ """ Create an entry that has all of the saveframes and loops from the
412
+ schema present. No values will be assigned. Specify the entry
413
+ ID when calling this method.
414
+
415
+ The optional argument 'all_tags' forces all tags to be included
416
+ rather than just the mandatory tags.
417
+
418
+ The optional argument 'default_values' will insert the default
419
+ values from the schema.
420
+
421
+ The optional argument 'schema' allows providing a custom schema."""
422
+
423
+ schema = utils.get_schema(schema)
424
+ entry = cls(entry_id=entry_id, all_tags=all_tags, default_values=default_values, schema=schema)
425
+ entry.source = f"from_template({schema.version})"
426
+ return entry
427
+
428
+ def add_saveframe(self, frame) -> None:
429
+ """Add a saveframe to the entry."""
430
+
431
+ if not isinstance(frame, saveframe_mod.Saveframe):
432
+ raise ValueError("You can only add instances of saveframes using this method. You attempted to add "
433
+ f"the object: '{repr(frame)}'.")
434
+
435
+ # Do not allow the addition of saveframes with the same name
436
+ # as a saveframe which already exists in the entry
437
+ if frame.name in self.frame_dict:
438
+ raise ValueError(f"Cannot add a saveframe with name '{frame.name}' since a saveframe with that "
439
+ f"name already exists in the entry.")
440
+
441
+ self._frame_list.append(frame)
442
+
443
+ def compare(self, other) -> List[str]:
444
+ """Returns the differences between two entries as a list.
445
+ Non-equal entries will always be detected, but specific differences
446
+ detected depends on order of entries."""
447
+
448
+ diffs = []
449
+ if self is other:
450
+ return []
451
+ if isinstance(other, str):
452
+ if str(self) == other:
453
+ return []
454
+ else:
455
+ return ['String was not exactly equal to entry.']
456
+ elif not isinstance(other, Entry):
457
+ return ['Other object is not of class Entry.']
458
+ try:
459
+ if str(self.entry_id) != str(other.entry_id):
460
+ diffs.append(f"Entry ID does not match between entries: '{self.entry_id}' vs '{other.entry_id}'.")
461
+ if len(self._frame_list) != len(other.frame_list):
462
+ diffs.append(f"The number of saveframes in the entries are not equal: '{len(self._frame_list)}' vs "
463
+ f"'{len(other.frame_list)}'.")
464
+ for frame in self._frame_list:
465
+ other_frame_dict = other.frame_dict
466
+ if frame.name not in other_frame_dict:
467
+ diffs.append(f"No saveframe with name '{frame.name}' in other entry.")
468
+ else:
469
+ comp = frame.compare(other_frame_dict[frame.name])
470
+ if len(comp) > 0:
471
+ diffs.append(f"Saveframes do not match: '{frame.name}'.")
472
+ diffs.extend(comp)
473
+
474
+ except AttributeError as err:
475
+ diffs.append(f"An exception occurred while comparing: '{err}'.")
476
+
477
+ return diffs
478
+
479
+ def add_missing_tags(self, schema: Schema = None, all_tags: bool = False) -> None:
480
+ """ Automatically adds any missing tags (according to the schema)
481
+ to all saveframes and loops and sorts the tags. """
482
+
483
+ for saveframe in self._frame_list:
484
+ saveframe.add_missing_tags(schema=schema, all_tags=all_tags)
485
+
486
+ def delete_empty_saveframes(self) -> None:
487
+ """ Deprecated. Please use `py:meth:pynmrstar.Entry.remove_empty_saveframes`. """
488
+
489
+ warnings.warn('Deprecated. Please use remove_empty_saveframes() instead.', DeprecationWarning)
490
+ return self.remove_empty_saveframes()
491
+
492
+ def format(self, skip_empty_loops: bool = True, skip_empty_tags: bool = False, show_comments: bool = True) -> str:
493
+ """ The same as calling str(Entry), except that you can pass options
494
+ to customize how the entry is printed.
495
+
496
+ skip_empty_loops will omit printing loops with no tags at all. (A loop with null tags is not "empty".)
497
+ skip_empty_tags will omit tags in the saveframes and loops which have no non-null values.
498
+ show_comments will show the standard comments before a saveframe."""
499
+
500
+ return self.__str__(skip_empty_loops=skip_empty_loops, skip_empty_tags=skip_empty_tags,
501
+ show_comments=show_comments)
502
+
503
+ def get_json(self, serialize: bool = True) -> Union[dict, str]:
504
+ """ Returns the entry in JSON format. If serialize is set to
505
+ False a dictionary representation of the entry that is
506
+ serializeable is returned instead."""
507
+
508
+ frames = [x.get_json(serialize=False) for x in self._frame_list]
509
+
510
+ entry_dict = {
511
+ "entry_id": self.entry_id,
512
+ "saveframes": frames
513
+ }
514
+
515
+ if serialize:
516
+ return json.dumps(entry_dict, default=_json_serialize)
517
+ else:
518
+ return entry_dict
519
+
520
+ def get_loops_by_category(self, value: str) -> List['loop_mod.Loop']:
521
+ """Allows fetching loops by category."""
522
+
523
+ value = utils.format_category(value).lower()
524
+
525
+ results = []
526
+ for frame in self._frame_list:
527
+ for one_loop in frame.loops:
528
+ if one_loop.category.lower() == value:
529
+ results.append(one_loop)
530
+ return results
531
+
532
+ def get_saveframe_by_name(self, saveframe_name: str) -> 'saveframe_mod.Saveframe':
533
+ """Allows fetching a saveframe by name."""
534
+
535
+ frames = self.frame_dict
536
+ if saveframe_name in frames:
537
+ return frames[saveframe_name]
538
+ else:
539
+ raise KeyError(f"No saveframe with name '{saveframe_name}'")
540
+
541
+ def get_saveframes_by_category(self, value: str) -> List['saveframe_mod.Saveframe']:
542
+ """Allows fetching saveframes by category."""
543
+
544
+ return self.get_saveframes_by_tag_and_value("sf_category", value)
545
+
546
+ def get_saveframes_by_tag_and_value(self, tag_name: str, value: Any) -> List['saveframe_mod.Saveframe']:
547
+ """Allows fetching saveframe(s) by tag and tag value."""
548
+
549
+ ret_frames = []
550
+
551
+ for frame in self._frame_list:
552
+ results = frame.get_tag(tag_name)
553
+ if results != [] and results[0] == value:
554
+ ret_frames.append(frame)
555
+
556
+ return ret_frames
557
+
558
+ def get_tag(self, tag: str, whole_tag: bool = False) -> list:
559
+ """ Given a tag (E.g. _Assigned_chem_shift_list.Data_file_name)
560
+ return a list of all values for that tag. Specify whole_tag=True
561
+ and the [tag_name, tag_value] pair will be returned."""
562
+
563
+ if "." not in str(tag):
564
+ raise ValueError("You must provide the tag category to call this method at the entry level. For "
565
+ "example, you must provide 'Entry.Title' rather than 'Title' as the tag if calling"
566
+ " this at the Entry level. You can call Saveframe.get_tag('Title') without issue.")
567
+
568
+ results = []
569
+ for frame in self._frame_list:
570
+ results.extend(frame.get_tag(tag, whole_tag=whole_tag))
571
+
572
+ return results
573
+
574
+ def get_tags(self, tags: list) -> Dict[str, list]:
575
+ """ Given a list of tags, get all of the tags and return the
576
+ results in a dictionary."""
577
+
578
+ # All tags
579
+ if tags is None or not isinstance(tags, list):
580
+ raise ValueError("Please provide a list of tags.")
581
+
582
+ results = {}
583
+ for tag in tags:
584
+ results[tag] = self.get_tag(tag)
585
+
586
+ return results
587
+
588
+ def normalize(self, schema: Optional[Schema] = None) -> None:
589
+ """ Sorts saveframes, loops, and tags according to the schema
590
+ provided (or BMRB default if none provided).
591
+
592
+ Also re-assigns ID tag values and updates tag links to ID values."""
593
+
594
+ # Assign all the ID tags, and update all links to ID tags
595
+ my_schema = utils.get_schema(schema)
596
+
597
+ # Sort the saveframes according to ID, if an ID exists. Otherwise, still sort by category
598
+ ordering = my_schema.category_order
599
+
600
+ def sf_key(_: saveframe_mod.Saveframe) -> [int, Union[int, float]]:
601
+ """ Helper function to sort the saveframes.
602
+ Returns (category order, saveframe order) """
603
+
604
+ # If not a real category, generate an artificial but stable order > the real saveframes
605
+ try:
606
+ category_order = ordering.index(_.tag_prefix)
607
+ except (ValueError, KeyError):
608
+ if _.category is None:
609
+ category_order = float('infinity')
610
+ else:
611
+ category_order = len(ordering) + abs(int(hashlib.sha1(str(_.category).encode()).hexdigest(), 16))
612
+
613
+ # See if there is an ID tag, and it is a number
614
+ saveframe_id = float('infinity')
615
+ try:
616
+ saveframe_id = int(_.get_tag("ID")[0])
617
+ except (ValueError, KeyError, IndexError, TypeError):
618
+ # Either there is no ID, or it is not a number. By default it will sort at the end of saveframes of its
619
+ # category. Note that the entry_information ID tag has a different meaning, but since there should
620
+ # only ever be one saveframe of that category, the sort order for it can be any value.
621
+ pass
622
+
623
+ return category_order, saveframe_id
624
+
625
+ def loop_key(_) -> Union[int, float]:
626
+ """ Helper function to sort the loops."""
627
+
628
+ try:
629
+ return ordering.index(_.category)
630
+ except ValueError:
631
+ # Generate an arbitrary sort order for loops that aren't in the schema but make sure that they
632
+ # always come after loops in the schema
633
+ return len(ordering) + abs(int(hashlib.sha1(str(_.category).encode()).hexdigest(), 16))
634
+
635
+ # Go through all the saveframes
636
+ for each_frame in self._frame_list:
637
+ each_frame.sort_tags(schema=my_schema)
638
+ # Iterate through the loops
639
+ for each_loop in each_frame:
640
+ each_loop.sort_tags(schema=my_schema)
641
+
642
+ # See if we can sort the rows (in addition to tags)
643
+ try:
644
+ each_loop.sort_rows("Ordinal")
645
+ except ValueError:
646
+ pass
647
+ each_frame.loops.sort(key=loop_key)
648
+ self._frame_list.sort(key=sf_key)
649
+
650
+ # Calculate all the categories present
651
+ categories: set = set()
652
+ for each_frame in self._frame_list:
653
+ categories.add(each_frame.category)
654
+
655
+ # tag_prefix -> tag -> original value -> mapped value
656
+ mapping: dict = {}
657
+
658
+ # Reassign the ID tags first
659
+ for each_category in categories:
660
+
661
+ # First in the saveframe tags
662
+ id_counter: int = 1
663
+ for each_frame in self.get_saveframes_by_category(each_category):
664
+ for tag in each_frame.tags:
665
+ tag_schema = my_schema.schema.get(f"{each_frame.tag_prefix}.{tag[0]}".lower())
666
+ if not tag_schema:
667
+ continue
668
+
669
+ # Make sure the capitalization of the tag is correct
670
+ tag[0] = tag_schema['Tag field']
671
+
672
+ if tag_schema['lclSfIdFlg'] == 'Y':
673
+ # If it's an Entry_ID tag, set it that way
674
+ if tag_schema['entryIdFlg'] == 'Y':
675
+ mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{tag[1]}'] = self._entry_id
676
+ tag[1] = self._entry_id
677
+ # Must be an integer to avoid renumbering the chem_comp ID, for example
678
+ elif tag_schema['BMRB data type'] == "int":
679
+ prev_tag = tag[1]
680
+ if isinstance(tag[1], str):
681
+ tag[1] = str(id_counter)
682
+ mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{prev_tag}'] = str(id_counter)
683
+ else:
684
+ tag[1] = id_counter
685
+ mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{prev_tag}'] = id_counter
686
+ # We need to still store all the other tag values too
687
+ else:
688
+ mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{tag[1]}'] = tag[1]
689
+ else:
690
+ mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{tag[1]}'] = tag[1]
691
+
692
+ # Then in the loop
693
+ for loop in each_frame:
694
+ for x, tag in enumerate(loop.tags):
695
+ tag_schema = my_schema.schema.get(f"{loop.category}.{tag}".lower())
696
+ if not tag_schema:
697
+ continue
698
+
699
+ # Make sure the tags have the proper capitalization
700
+ loop.tags[x] = tag_schema['Tag field']
701
+
702
+ for row in loop.data:
703
+ # We don't re-map loop IDs, but we should still store them
704
+ mapping[f'{loop.category[1:]}.{tag}.{row[x]}'] = row[x]
705
+
706
+ if tag_schema['lclSfIdFlg'] == 'Y':
707
+ # If it's an Entry_ID tag, set it that way
708
+ if tag_schema['entryIdFlg'] == 'Y':
709
+ row[x] = self._entry_id
710
+ # Must be an integer to avoid renumbering the chem_comp ID, for example
711
+ elif tag_schema['BMRB data type'] == "int":
712
+ if row[x] in definitions.NULL_VALUES:
713
+ if isinstance(row[x], str):
714
+ row[x] = str(id_counter)
715
+ else:
716
+ row[x] = id_counter
717
+ # Handle chem_comp and it's ilk
718
+ else:
719
+ parent_id_tag = f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']}"
720
+ parent_id_value = each_frame.get_tag(parent_id_tag)[0]
721
+ if isinstance(row[x], str):
722
+ row[x] = str(parent_id_value)
723
+ else:
724
+ row[x] = parent_id_value
725
+ id_counter += 1
726
+
727
+ # Now fix any other references
728
+ for saveframe in self:
729
+ for tag in saveframe.tags:
730
+ tag_schema = my_schema.schema.get(f"{saveframe.tag_prefix}.{tag[0]}".lower())
731
+ if not tag_schema:
732
+ continue
733
+ if tag_schema['Foreign Table'] and tag_schema['Sf pointer'] != 'Y':
734
+
735
+ if tag[1] in definitions.NULL_VALUES:
736
+ if tag_schema['Nullable']:
737
+ continue
738
+ else:
739
+ logger.warning("A foreign key tag that is not nullable was set to "
740
+ f"a null value. Tag: {saveframe.tag_prefix}.{tag[1]} Primary key: "
741
+ f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']} "
742
+ f"Value: {tag[1]}")
743
+
744
+ try:
745
+ tag[1] = mapping[f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']}.{tag[1]}"]
746
+ except KeyError:
747
+ logger.warning(f'The tag {saveframe.tag_prefix}.{tag[0]} has value {tag[1]} '
748
+ f'but there is no valid primary key.')
749
+
750
+ # Now apply the remapping to loops...
751
+ for loop in saveframe:
752
+ for x, tag in enumerate(loop.tags):
753
+ tag_schema = my_schema.schema.get(f"{loop.category}.{tag}".lower())
754
+ if not tag_schema:
755
+ continue
756
+ if tag_schema['Foreign Table'] and tag_schema['Sf pointer'] != 'Y':
757
+ for row in loop.data:
758
+ if row[x] in definitions.NULL_VALUES:
759
+ if tag_schema['Nullable']:
760
+ continue
761
+ else:
762
+ logger.warning("A foreign key reference tag that is not nullable was set to "
763
+ f"a null value. Tag: {loop.category}.{tag} Foreign key: "
764
+ f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']} "
765
+ f"Value: {row[x]}")
766
+ try:
767
+ row[x] = mapping[
768
+ f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']}.{row[x]}"]
769
+ except KeyError:
770
+ if (loop.category == '_Atom_chem_shift' or loop.category == '_Entity_comp_index') and \
771
+ (tag == 'Atom_ID' or tag == 'Comp_ID'):
772
+ continue
773
+ logger.warning(f'The tag {loop.category}.{tag} has value {row[x]} '
774
+ f'but there is no valid primary key '
775
+ f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']} "
776
+ f"with the tag value.")
777
+
778
+ # If there is both a label tag and an ID tag, do the reassignment
779
+
780
+ # We found a framecode reference
781
+ if tag_schema['Foreign Table'] and tag_schema['Foreign Column'] == 'Sf_framecode':
782
+
783
+ # Check if there is a tag pointing to the 'ID' tag
784
+ for conditional_tag in loop.tags:
785
+ conditional_tag_schema = my_schema.schema.get(f"{loop.category}.{conditional_tag}".lower())
786
+ if not conditional_tag_schema:
787
+ continue
788
+ if conditional_tag_schema['Foreign Table'] == tag_schema['Foreign Table'] and \
789
+ conditional_tag_schema['Foreign Column'] == 'ID' and \
790
+ conditional_tag_schema['entryIdFlg'] != 'Y':
791
+ # We found the matching tag
792
+ tag_pos = loop.tag_index(conditional_tag)
793
+
794
+ for row in loop.data:
795
+ # Check if the tag is null
796
+ if row[x] in definitions.NULL_VALUES:
797
+ if tag_schema['Nullable']:
798
+ continue
799
+ else:
800
+ logger.warning(f"A foreign saveframe reference tag that is not nullable was"
801
+ f" set to a null value. Tag: {loop.category}.{tag} "
802
+ f"Foreign saveframe: {tag_schema['Foreign Table']}"
803
+ f".{tag_schema['Foreign Column']}")
804
+ continue
805
+ try:
806
+ row[tag_pos] = self.get_saveframe_by_name(row[x][1:]).get_tag('ID')[0]
807
+ except KeyError:
808
+ logger.warning(f"Missing frame of type {tag} pointed to by {conditional_tag}")
809
+
810
+ # Renumber the 'ID' column in a loop
811
+ for each_frame in self._frame_list:
812
+ for loop in each_frame.loops:
813
+ if loop.tag_index('ID') is not None and loop.category != '_Experiment':
814
+ loop.renumber_rows('ID')
815
+
816
+ def print_tree(self) -> None:
817
+ """Prints a summary, tree style, of the frames and loops in
818
+ the entry."""
819
+
820
+ print(repr(self))
821
+ frame: saveframe_mod.Saveframe
822
+ for pos, frame in enumerate(self):
823
+ print(f"\t[{pos}] {repr(frame)}")
824
+ for pos2, one_loop in enumerate(frame):
825
+ print(f"\t\t[{pos2}] {repr(one_loop)}")
826
+
827
+ def remove_empty_saveframes(self) -> None:
828
+ """ This method will remove all empty saveframes in an entry
829
+ (the loops in the saveframe must also be empty for the saveframe
830
+ to be deleted). "Empty" means no values in tags, not no tags present."""
831
+
832
+ self._frame_list = [_ for _ in self._frame_list if not _.empty]
833
+
834
+ def remove_saveframe(self, item: Union[str, List[str], Tuple[str], 'saveframe_mod.Saveframe',
835
+ List['saveframe_mod.Saveframe'], Tuple['saveframe_mod.Saveframe']]) -> None:
836
+ """ Removes one or more saveframes from the entry. You can remove saveframes either by passing the saveframe
837
+ object itself, the saveframe name (as a string), or a list or tuple of either."""
838
+
839
+ parsed_list: list
840
+ if isinstance(item, tuple):
841
+ parsed_list = list(item)
842
+ elif isinstance(item, list):
843
+ parsed_list = item
844
+ elif isinstance(item, (str, saveframe_mod.Saveframe)):
845
+ parsed_list = [item]
846
+ else:
847
+ raise ValueError('The item you provided was not one or more saveframe objects or saveframe names (strings).'
848
+ f' Item type: {type(item)}')
849
+
850
+ frames_to_remove = []
851
+ for saveframe in parsed_list:
852
+ if isinstance(saveframe, str):
853
+ try:
854
+ frames_to_remove.append(self.frame_dict[saveframe])
855
+ except KeyError:
856
+ raise ValueError('At least one saveframe specified to remove was not found in this saveframe. '
857
+ f'First missing saveframe: {saveframe}')
858
+ elif isinstance(saveframe, saveframe_mod.Saveframe):
859
+ if saveframe not in self._frame_list:
860
+ raise ValueError('At least one loop specified to remove was not found in this saveframe. First '
861
+ f'missing loop: {saveframe}')
862
+ frames_to_remove.append(saveframe)
863
+ else:
864
+ raise ValueError('One of the items you provided was not a saveframe object or saveframe name '
865
+ f'(string). Item: {repr(saveframe)}')
866
+
867
+ self._frame_list = [_ for _ in self._frame_list if _ not in frames_to_remove]
868
+
869
+ def rename_saveframe(self, original_name: str, new_name: str) -> None:
870
+ """ Renames a saveframe and updates all pointers to that
871
+ saveframe in the entry with the new name."""
872
+
873
+ # Strip off the starting $ in the names
874
+ if original_name.startswith("$"):
875
+ original_name = original_name[1:]
876
+ if new_name.startswith("$"):
877
+ new_name = new_name[1:]
878
+
879
+ # Make sure there is no saveframe called what the new name is
880
+ if [x.name for x in self._frame_list].count(new_name) > 0:
881
+ raise ValueError(f"Cannot rename the saveframe '{original_name}' as '{new_name}' because a "
882
+ f"saveframe with that name already exists in the entry.")
883
+
884
+ # This can raise a ValueError, but no point catching it since it really is a ValueError if they provide a name
885
+ # of a saveframe that doesn't exist in the entry.
886
+ change_frame = self.get_saveframe_by_name(original_name)
887
+
888
+ # Update the saveframe
889
+ change_frame.name = new_name
890
+
891
+ # What the new references should look like
892
+ old_reference = "$" + original_name
893
+ new_reference = "$" + new_name
894
+
895
+ # Go through all the saveframes
896
+ for each_frame in self:
897
+ # Iterate through the tags
898
+ for each_tag in each_frame.tags:
899
+ if each_tag[1] == old_reference:
900
+ each_tag[1] = new_reference
901
+ # Iterate through the loops
902
+ for each_loop in each_frame:
903
+ for each_row in each_loop:
904
+ for pos, val in enumerate(each_row):
905
+ if val == old_reference:
906
+ each_row[pos] = new_reference
907
+
908
+ def validate(self, validate_schema: bool = True, schema: Schema = None,
909
+ validate_star: bool = True) -> List[str]:
910
+ """Validate an entry in a variety of ways. Returns a list of
911
+ errors found. 0-length list indicates no errors found. By
912
+ default all validation modes are enabled.
913
+
914
+ validate_schema - Determines if the entry is validated against
915
+ the NMR-STAR schema. You can pass your own custom schema if desired,
916
+ otherwise the cached schema will be used.
917
+
918
+ validate_star - Determines if the STAR syntax checks are ran."""
919
+
920
+ errors = []
921
+
922
+ # They should validate for something...
923
+ if not validate_star and not validate_schema:
924
+ errors.append("Validate() should be called with at least one validation method enabled.")
925
+
926
+ if validate_star:
927
+
928
+ # Check for saveframes with same name
929
+ saveframe_names = sorted(x.name for x in self)
930
+ for ordinal in range(0, len(saveframe_names) - 2):
931
+ if saveframe_names[ordinal] == saveframe_names[ordinal + 1]:
932
+ errors.append(f"Multiple saveframes with same name: '{saveframe_names[ordinal]}'")
933
+
934
+ # Check for dangling references
935
+ fdict = self.frame_dict
936
+
937
+ for each_frame in self:
938
+ # Iterate through the tags
939
+ for each_tag in each_frame.tags:
940
+ tag_copy = str(each_tag[1])
941
+ if (tag_copy.startswith("$")
942
+ and tag_copy[1:] not in fdict):
943
+ errors.append(f"Dangling saveframe reference '{each_tag[1]}' in "
944
+ f"tag '{each_frame.tag_prefix}.{each_tag[0]}'")
945
+
946
+ # Iterate through the loops
947
+ for each_loop in each_frame:
948
+ for each_row in each_loop:
949
+ for pos, val in enumerate(each_row):
950
+ val = str(val)
951
+ if val.startswith("$") and val[1:] not in fdict:
952
+ errors.append(f"Dangling saveframe reference '{val}' in tag "
953
+ f"'{each_loop.category}.{each_loop.tags[pos]}'")
954
+
955
+ # Ask the saveframes to check themselves for errors
956
+ for frame in self:
957
+ errors.extend(frame.validate(validate_schema=validate_schema, schema=schema, validate_star=validate_star))
958
+
959
+ return errors
960
+
961
+ def write_to_file(self, file_name: Union[str, Path], format_: str = "nmrstar", show_comments: bool = True,
962
+ skip_empty_loops: bool = False, skip_empty_tags: bool = False) -> None:
963
+ """ Writes the entry to the specified file in NMR-STAR format.
964
+
965
+ Optionally specify:
966
+ show_comments=False to disable the comments that are by default inserted. Ignored when writing json.
967
+ skip_empty_loops=False to force printing loops with no tags at all (loops with null tags are still printed)
968
+ skip_empty_tags=True will omit tags in the saveframes and loops which have no non-null values.
969
+ format_=json to write to the file in JSON format."""
970
+
971
+ write_to_file(self, file_name=file_name, format_=format_, show_comments=show_comments,
972
+ skip_empty_loops=skip_empty_loops, skip_empty_tags=skip_empty_tags)