pynmrstar 3.3.5__pp39-pypy39_pp73-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pynmrstar might be problematic. Click here for more details.

pynmrstar/entry.py ADDED
@@ -0,0 +1,970 @@
1
+ import hashlib
2
+ import json
3
+ import logging
4
+ import warnings
5
+ from io import StringIO
6
+ from typing import TextIO, BinaryIO, Union, List, Optional, Dict, Any, Tuple
7
+
8
+ from pynmrstar import definitions, utils, loop as loop_mod, parser as parser_mod, saveframe as saveframe_mod
9
+ from pynmrstar._internal import _json_serialize, _interpret_file, _get_entry_from_database, write_to_file
10
+ from pynmrstar.exceptions import InvalidStateError
11
+ from pynmrstar.schema import Schema
12
+
13
+ logger = logging.getLogger('pynmrstar')
14
+
15
+
16
+ class Entry(object):
17
+ """An object oriented representation of a BMRB entry. You can initialize this
18
+ object several ways; (e.g. from a file, from the official database,
19
+ from scratch) see the class methods below. """
20
+
21
+ def __contains__(self, item: Any):
22
+ """ Check if the given item is present in the entry. """
23
+
24
+ # Prepare for processing
25
+ if isinstance(item, (list, tuple)):
26
+ to_process: List[Union[str, saveframe_mod.Saveframe, loop_mod.Loop]] = list(item)
27
+ elif isinstance(item, (loop_mod.Loop, saveframe_mod.Saveframe, str)):
28
+ to_process = [item]
29
+ else:
30
+ return False
31
+
32
+ for item in to_process:
33
+ if isinstance(item, saveframe_mod.Saveframe):
34
+ if item not in self._frame_list:
35
+ return False
36
+ elif isinstance(item, (loop_mod.Loop, str)):
37
+ found = False
38
+ for saveframe in self._frame_list:
39
+ if item in saveframe:
40
+ found = True
41
+ break
42
+ if not found:
43
+ return False
44
+ else:
45
+ return False
46
+ return True
47
+
48
+ def __delitem__(self, item: Union['saveframe_mod.Saveframe', int, str]) -> None:
49
+ """Remove the indicated saveframe."""
50
+
51
+ if isinstance(item, int):
52
+ try:
53
+ del self._frame_list[item]
54
+ except IndexError:
55
+ raise IndexError(f'Index out of range: no saveframe at index: {item}')
56
+ else:
57
+ self.remove_saveframe(item)
58
+
59
+ def __eq__(self, other) -> bool:
60
+ """Returns True if this entry is equal to another entry, false
61
+ if it is not equal."""
62
+
63
+ if not isinstance(other, Entry):
64
+ return False
65
+
66
+ return (self.entry_id, self._frame_list) == (other.entry_id, other._frame_list)
67
+
68
+ def __getitem__(self, item: Union[int, str]) -> 'saveframe_mod.Saveframe':
69
+ """Get the indicated saveframe."""
70
+
71
+ try:
72
+ return self._frame_list[item]
73
+ except TypeError:
74
+ return self.get_saveframe_by_name(item)
75
+
76
+ def __init__(self, **kwargs) -> None:
77
+ """ You should not directly instantiate an Entry using this method.
78
+ Instead use the class methods:
79
+
80
+ :py:meth:`Entry.from_database`, :py:meth:`Entry.from_file`,
81
+ :py:meth:`Entry.from_string`, :py:meth:`Entry.from_scratch`,
82
+ :py:meth:`Entry.from_json`, and :py:meth:`Entry.from_template`"""
83
+
84
+ # Default initializations
85
+ self._entry_id: Union[str, int] = 0
86
+ self._frame_list: List[saveframe_mod.Saveframe] = []
87
+ self.source: Optional[str] = None
88
+
89
+ # They initialized us wrong
90
+ if len(kwargs) == 0:
91
+ raise ValueError("You should not directly instantiate an Entry using this method. Instead use the "
92
+ "class methods: Entry.from_database(), Entry.from_file(), Entry.from_string(), "
93
+ "Entry.from_scratch(), and Entry.from_json().")
94
+
95
+ if 'the_string' in kwargs:
96
+ # Parse from a string by wrapping it in StringIO
97
+ star_buffer: StringIO = StringIO(kwargs['the_string'])
98
+ self.source = "from_string()"
99
+ elif 'file_name' in kwargs:
100
+ star_buffer = _interpret_file(kwargs['file_name'])
101
+ self.source = f"from_file('{kwargs['file_name']}')"
102
+ # Creating from template (schema)
103
+ elif 'all_tags' in kwargs:
104
+ self._entry_id = kwargs['entry_id']
105
+
106
+ saveframe_categories: dict = {}
107
+ schema = utils.get_schema(kwargs['schema'])
108
+ schema_obj = schema.schema
109
+ for tag in [schema_obj[x.lower()] for x in schema.schema_order]:
110
+ category = tag['SFCategory']
111
+ if category not in saveframe_categories:
112
+ saveframe_categories[category] = True
113
+ templated_saveframe = saveframe_mod.Saveframe.from_template(category, category + "_1",
114
+ entry_id=self._entry_id,
115
+ all_tags=kwargs['all_tags'],
116
+ default_values=kwargs['default_values'],
117
+ schema=schema)
118
+ self._frame_list.append(templated_saveframe)
119
+ entry_saveframe = self.get_saveframes_by_category('entry_information')[0]
120
+ entry_saveframe['NMR_STAR_version'] = schema.version
121
+ entry_saveframe['Original_NMR_STAR_version'] = schema.version
122
+ return
123
+ else:
124
+ # Initialize a blank entry
125
+ self._entry_id = kwargs['entry_id']
126
+ self.source = "from_scratch()"
127
+ return
128
+
129
+ # Load the BMRB entry from the file
130
+ parser: parser_mod.Parser = parser_mod.Parser(entry_to_parse_into=self)
131
+ parser.parse(star_buffer.read(), source=self.source, convert_data_types=kwargs.get('convert_data_types', False),
132
+ raise_parse_warnings=kwargs.get('raise_parse_warnings', False))
133
+
134
+ def __iter__(self) -> saveframe_mod.Saveframe:
135
+ """ Yields each of the saveframes contained within the entry. """
136
+
137
+ for saveframe in self._frame_list:
138
+ yield saveframe
139
+
140
+ def __len__(self) -> int:
141
+ """ Returns the number of saveframes in the entry."""
142
+
143
+ return len(self._frame_list)
144
+
145
+ def __repr__(self) -> str:
146
+ """Returns a description of the entry."""
147
+
148
+ return f"<pynmrstar.Entry '{self._entry_id}' {self.source}>"
149
+
150
+ def __setitem__(self, key: Union[int, str], item: 'saveframe_mod.Saveframe') -> None:
151
+ """Set the indicated saveframe."""
152
+
153
+ # It is a saveframe
154
+ if isinstance(item, saveframe_mod.Saveframe):
155
+ # Add by ordinal
156
+ if isinstance(key, int):
157
+ self._frame_list[key] = item
158
+
159
+ # TODO: Consider stripping this behavior out - it isn't clear it is useful
160
+ else:
161
+ # Add by key
162
+ contains_frame: bool = False
163
+ for pos, frame in enumerate(self._frame_list):
164
+ if frame.name == key:
165
+ if contains_frame:
166
+ raise ValueError(f"Cannot replace the saveframe with the name '{frame.name} "
167
+ f"because multiple saveframes in the entry have the same name. "
168
+ f'This library does not allow that normally, as it is '
169
+ f'invalid NMR-STAR. Did you manually edit the Entry.frame_list '
170
+ f'object? Please use the Entry.add_saveframe() method instead to '
171
+ f'add new saveframes.')
172
+ self._frame_list[pos] = item
173
+ contains_frame = True
174
+
175
+ if not contains_frame:
176
+ raise ValueError(f"Saveframe with name '{key}' does not exist and therefore cannot be "
177
+ f"written to. Use the add_saveframe() method to add new saveframes.")
178
+ else:
179
+ raise ValueError("You can only assign a saveframe to an entry splice. You attempted to assign: "
180
+ f"'{repr(item)}'")
181
+
182
+ def __str__(self, skip_empty_loops: bool = False, skip_empty_tags: bool = False, show_comments: bool = True) -> str:
183
+ """Returns the entire entry in STAR format as a string."""
184
+
185
+ sf_strings = []
186
+ seen_saveframes = {}
187
+ for saveframe_obj in self:
188
+ if saveframe_obj.category in seen_saveframes:
189
+ sf_strings.append(saveframe_obj.format(skip_empty_loops=skip_empty_loops,
190
+ skip_empty_tags=skip_empty_tags, show_comments=False))
191
+ else:
192
+ sf_strings.append(saveframe_obj.format(skip_empty_loops=skip_empty_loops,
193
+ skip_empty_tags=skip_empty_tags, show_comments=show_comments))
194
+ seen_saveframes[saveframe_obj.category] = True
195
+
196
+ return f"data_{self.entry_id}\n\n" + "\n".join(sf_strings)
197
+
198
+ @property
199
+ def category_list(self) -> List[str]:
200
+ """ Returns a list of the unique categories present in the entry. """
201
+
202
+ category_list = []
203
+ for saveframe in self._frame_list:
204
+ category = saveframe.category
205
+ if category and category not in category_list:
206
+ category_list.append(category)
207
+ return list(category_list)
208
+
209
+ @property
210
+ def empty(self) -> bool:
211
+ """ Check if the entry has no data. Ignore the structural tags."""
212
+
213
+ for saveframe in self._frame_list:
214
+ if not saveframe.empty:
215
+ return False
216
+
217
+ return True
218
+
219
+ @property
220
+ def entry_id(self) -> Union[str, int]:
221
+ """ When read, fetches the entry ID.
222
+
223
+ When set, updates the entry ID for the Entry, and updates all the tags which
224
+ are foreign keys of the Entry_ID. (For example, Entry.ID and
225
+ Citation.Entry_ID will be updated, if present.)
226
+ """
227
+ return self._entry_id
228
+
229
+ @entry_id.setter
230
+ def entry_id(self, value: Union[str, int]) -> None:
231
+ self._entry_id = value
232
+
233
+ schema = utils.get_schema()
234
+ for saveframe in self._frame_list:
235
+ for tag in saveframe.tags:
236
+ fqtn = (saveframe.tag_prefix + "." + tag[0]).lower()
237
+
238
+ try:
239
+ if schema.schema[fqtn]['entryIdFlg'] == 'Y':
240
+ tag[1] = self._entry_id
241
+ except KeyError:
242
+ pass
243
+
244
+ for loop in saveframe.loops:
245
+ for tag in loop.tags:
246
+ fqtn = (loop.category + "." + tag).lower()
247
+ try:
248
+ if schema.schema[fqtn]['entryIdFlg'] == 'Y':
249
+ loop[tag] = [self._entry_id] * len(loop[tag])
250
+ except KeyError:
251
+ pass
252
+
253
+ @property
254
+ def frame_dict(self) -> Dict[str, 'saveframe_mod.Saveframe']:
255
+ """Returns a dictionary of saveframe name -> saveframe object mappings."""
256
+
257
+ fast_dict = dict((frame.name, frame) for frame in self._frame_list)
258
+
259
+ # If there are no duplicates then continue
260
+ if len(fast_dict) == len(self._frame_list):
261
+ return fast_dict
262
+
263
+ # Figure out where the duplicate is
264
+ frame_dict = {}
265
+
266
+ for frame in self._frame_list:
267
+ if frame.name in frame_dict:
268
+ raise InvalidStateError("The entry has multiple saveframes with the same name. That is not allowed in "
269
+ "the NMR-STAR format. Please remove or rename one. Duplicate name: "
270
+ f"'{frame.name}'. Furthermore, please use Entry.add_saveframe() and "
271
+ f"Entry.remove_saveframe() rather than manually editing the Entry.frame_list "
272
+ f"list, which will prevent this state from existing in the future.")
273
+ frame_dict[frame.name] = frame
274
+
275
+ return frame_dict
276
+
277
+ @property
278
+ def frame_list(self) -> List['saveframe_mod.Saveframe']:
279
+ return self._frame_list
280
+
281
+ @classmethod
282
+ def from_database(cls,
283
+ entry_num: Union[str, int],
284
+ convert_data_types: bool = False,
285
+ schema: Schema = None):
286
+ """Create an entry corresponding to the most up to date entry on
287
+ the public BMRB server. (Requires ability to initiate outbound
288
+ HTTP connections.)
289
+
290
+ Setting convert_data_types to True will automatically convert
291
+ the data loaded from the file into the corresponding python type as
292
+ determined by loading the standard BMRB schema. This would mean that
293
+ all floats will be represented as decimal.Decimal objects, all integers
294
+ will be python int objects, strings and vars will remain strings, and
295
+ dates will become datetime.date objects. When printing str() is called
296
+ on all objects. Other that converting uppercase "E"s in scientific
297
+ notation floats to lowercase "e"s this should not cause any change in
298
+ the way re-printed NMR-STAR objects are displayed. Specify a custom
299
+ schema object to use using the schema parameter."""
300
+
301
+ return _get_entry_from_database(entry_num,
302
+ convert_data_types=convert_data_types,
303
+ schema=schema)
304
+
305
+ @classmethod
306
+ def from_file(cls,
307
+ the_file: Union[str, TextIO, BinaryIO],
308
+ convert_data_types: bool = False,
309
+ raise_parse_warnings: bool = False,
310
+ schema: Schema = None):
311
+ """Create an entry by loading in a file. If the_file starts with
312
+ http://, https://, or ftp:// then we will use those protocols to
313
+ attempt to open the file.
314
+
315
+ Setting convert_data_types to True will automatically convert
316
+ the data loaded from the file into the corresponding python type as
317
+ determined by loading the standard BMRB schema. This would mean that
318
+ all floats will be represented as decimal.Decimal objects, all integers
319
+ will be python int objects, strings and vars will remain strings, and
320
+ dates will become datetime.date objects. When printing str() is called
321
+ on all objects. Other that converting uppercase "E"s in scientific
322
+ notation floats to lowercase "e"s this should not cause any change in
323
+ the way re-printed NMR-STAR objects are displayed. Specify a custom
324
+ schema object to use using the schema parameter.
325
+
326
+ Setting raise_parse_warnings to True will result in the raising of a
327
+ ParsingError rather than logging a warning when non-valid (but
328
+ ignorable) issues are found. """
329
+
330
+ return cls(file_name=the_file,
331
+ convert_data_types=convert_data_types,
332
+ raise_parse_warnings=raise_parse_warnings,
333
+ schema=schema)
334
+
335
+ @classmethod
336
+ def from_json(cls, json_dict: Union[dict, str]):
337
+ """Create an entry from JSON (serialized or unserialized JSON)."""
338
+
339
+ # If they provided a string, try to load it using JSON
340
+ if not isinstance(json_dict, dict):
341
+ try:
342
+ json_dict = json.loads(json_dict)
343
+ except (TypeError, ValueError):
344
+ raise ValueError("The JSON you provided was neither a Python dictionary nor a JSON string.")
345
+
346
+ # Make sure it has the correct keys
347
+ if "saveframes" not in json_dict:
348
+ raise ValueError("The JSON you provide must be a hash and must contain the key 'saveframes' - even if the "
349
+ "key points to 'None'.")
350
+ if "entry_id" not in json_dict and "bmrb_id" not in json_dict:
351
+ raise ValueError("The JSON you provide must be a hash and must contain the key 'entry_id' - even if the"
352
+ " key points to 'None'.")
353
+ # Until the migration is complete, 'bmrb_id' is a synonym for
354
+ # 'entry_id'
355
+ if 'entry_id' not in json_dict:
356
+ json_dict['entry_id'] = json_dict['bmrb_id']
357
+
358
+ # Create an entry from scratch and populate it
359
+ ret = Entry.from_scratch(json_dict['entry_id'])
360
+ ret._frame_list = [saveframe_mod.Saveframe.from_json(x) for x in json_dict['saveframes']]
361
+ ret.source = "from_json()"
362
+
363
+ # Return the new loop
364
+ return ret
365
+
366
+ @classmethod
367
+ def from_string(cls,
368
+ the_string: str,
369
+ convert_data_types: bool = False,
370
+ raise_parse_warnings: bool = False,
371
+ schema: Schema = None):
372
+ """Create an entry by parsing a string.
373
+
374
+
375
+ Setting convert_data_types to True will automatically convert
376
+ the data loaded from the file into the corresponding python type as
377
+ determined by loading the standard BMRB schema. This would mean that
378
+ all floats will be represented as decimal.Decimal objects, all integers
379
+ will be python int objects, strings and vars will remain strings, and
380
+ dates will become datetime.date objects. When printing str() is called
381
+ on all objects. Other that converting uppercase "E"s in scientific
382
+ notation floats to lowercase "e"s this should not cause any change in
383
+ the way re-printed NMR-STAR objects are displayed. Specify a custom
384
+ schema object to use using the schema parameter.
385
+
386
+ Setting raise_parse_warnings to True will result in the raising of a
387
+ ParsingError rather than logging a warning when non-valid (but
388
+ ignorable) issues are found."""
389
+
390
+ return cls(the_string=the_string,
391
+ convert_data_types=convert_data_types,
392
+ raise_parse_warnings=raise_parse_warnings,
393
+ schema=schema)
394
+
395
+ @classmethod
396
+ def from_scratch(cls, entry_id: Union[str, int]):
397
+ """Create an empty entry that you can programmatically add to.
398
+ You must pass a value corresponding to the Entry ID.
399
+ (The unique identifier "xxx" from "data_xxx".)"""
400
+
401
+ return cls(entry_id=entry_id)
402
+
403
+ @classmethod
404
+ def from_template(cls,
405
+ entry_id,
406
+ all_tags=False,
407
+ default_values=False,
408
+ schema=None) -> 'Entry':
409
+ """ Create an entry that has all of the saveframes and loops from the
410
+ schema present. No values will be assigned. Specify the entry
411
+ ID when calling this method.
412
+
413
+ The optional argument 'all_tags' forces all tags to be included
414
+ rather than just the mandatory tags.
415
+
416
+ The optional argument 'default_values' will insert the default
417
+ values from the schema.
418
+
419
+ The optional argument 'schema' allows providing a custom schema."""
420
+
421
+ schema = utils.get_schema(schema)
422
+ entry = cls(entry_id=entry_id, all_tags=all_tags, default_values=default_values, schema=schema)
423
+ entry.source = f"from_template({schema.version})"
424
+ return entry
425
+
426
+ def add_saveframe(self, frame) -> None:
427
+ """Add a saveframe to the entry."""
428
+
429
+ if not isinstance(frame, saveframe_mod.Saveframe):
430
+ raise ValueError("You can only add instances of saveframes using this method. You attempted to add "
431
+ f"the object: '{repr(frame)}'.")
432
+
433
+ # Do not allow the addition of saveframes with the same name
434
+ # as a saveframe which already exists in the entry
435
+ if frame.name in self.frame_dict:
436
+ raise ValueError(f"Cannot add a saveframe with name '{frame.name}' since a saveframe with that "
437
+ f"name already exists in the entry.")
438
+
439
+ self._frame_list.append(frame)
440
+
441
+ def compare(self, other) -> List[str]:
442
+ """Returns the differences between two entries as a list.
443
+ Non-equal entries will always be detected, but specific differences
444
+ detected depends on order of entries."""
445
+
446
+ diffs = []
447
+ if self is other:
448
+ return []
449
+ if isinstance(other, str):
450
+ if str(self) == other:
451
+ return []
452
+ else:
453
+ return ['String was not exactly equal to entry.']
454
+ elif not isinstance(other, Entry):
455
+ return ['Other object is not of class Entry.']
456
+ try:
457
+ if str(self.entry_id) != str(other.entry_id):
458
+ diffs.append(f"Entry ID does not match between entries: '{self.entry_id}' vs '{other.entry_id}'.")
459
+ if len(self._frame_list) != len(other.frame_list):
460
+ diffs.append(f"The number of saveframes in the entries are not equal: '{len(self._frame_list)}' vs "
461
+ f"'{len(other.frame_list)}'.")
462
+ for frame in self._frame_list:
463
+ other_frame_dict = other.frame_dict
464
+ if frame.name not in other_frame_dict:
465
+ diffs.append(f"No saveframe with name '{frame.name}' in other entry.")
466
+ else:
467
+ comp = frame.compare(other_frame_dict[frame.name])
468
+ if len(comp) > 0:
469
+ diffs.append(f"Saveframes do not match: '{frame.name}'.")
470
+ diffs.extend(comp)
471
+
472
+ except AttributeError as err:
473
+ diffs.append(f"An exception occurred while comparing: '{err}'.")
474
+
475
+ return diffs
476
+
477
+ def add_missing_tags(self, schema: Schema = None, all_tags: bool = False) -> None:
478
+ """ Automatically adds any missing tags (according to the schema)
479
+ to all saveframes and loops and sorts the tags. """
480
+
481
+ for saveframe in self._frame_list:
482
+ saveframe.add_missing_tags(schema=schema, all_tags=all_tags)
483
+
484
+ def delete_empty_saveframes(self) -> None:
485
+ """ Deprecated. Please use `py:meth:pynmrstar.Entry.remove_empty_saveframes`. """
486
+
487
+ warnings.warn('Deprecated. Please use remove_empty_saveframes() instead.', DeprecationWarning)
488
+ return self.remove_empty_saveframes()
489
+
490
+ def format(self, skip_empty_loops: bool = True, skip_empty_tags: bool = False, show_comments: bool = True) -> str:
491
+ """ The same as calling str(Entry), except that you can pass options
492
+ to customize how the entry is printed.
493
+
494
+ skip_empty_loops will omit printing loops with no tags at all. (A loop with null tags is not "empty".)
495
+ skip_empty_tags will omit tags in the saveframes and loops which have no non-null values.
496
+ show_comments will show the standard comments before a saveframe."""
497
+
498
+ return self.__str__(skip_empty_loops=skip_empty_loops, skip_empty_tags=skip_empty_tags,
499
+ show_comments=show_comments)
500
+
501
+ def get_json(self, serialize: bool = True) -> Union[dict, str]:
502
+ """ Returns the entry in JSON format. If serialize is set to
503
+ False a dictionary representation of the entry that is
504
+ serializeable is returned instead."""
505
+
506
+ frames = [x.get_json(serialize=False) for x in self._frame_list]
507
+
508
+ entry_dict = {
509
+ "entry_id": self.entry_id,
510
+ "saveframes": frames
511
+ }
512
+
513
+ if serialize:
514
+ return json.dumps(entry_dict, default=_json_serialize)
515
+ else:
516
+ return entry_dict
517
+
518
+ def get_loops_by_category(self, value: str) -> List['loop_mod.Loop']:
519
+ """Allows fetching loops by category."""
520
+
521
+ value = utils.format_category(value).lower()
522
+
523
+ results = []
524
+ for frame in self._frame_list:
525
+ for one_loop in frame.loops:
526
+ if one_loop.category.lower() == value:
527
+ results.append(one_loop)
528
+ return results
529
+
530
+ def get_saveframe_by_name(self, saveframe_name: str) -> 'saveframe_mod.Saveframe':
531
+ """Allows fetching a saveframe by name."""
532
+
533
+ frames = self.frame_dict
534
+ if saveframe_name in frames:
535
+ return frames[saveframe_name]
536
+ else:
537
+ raise KeyError(f"No saveframe with name '{saveframe_name}'")
538
+
539
+ def get_saveframes_by_category(self, value: str) -> List['saveframe_mod.Saveframe']:
540
+ """Allows fetching saveframes by category."""
541
+
542
+ return self.get_saveframes_by_tag_and_value("sf_category", value)
543
+
544
+ def get_saveframes_by_tag_and_value(self, tag_name: str, value: Any) -> List['saveframe_mod.Saveframe']:
545
+ """Allows fetching saveframe(s) by tag and tag value."""
546
+
547
+ ret_frames = []
548
+
549
+ for frame in self._frame_list:
550
+ results = frame.get_tag(tag_name)
551
+ if results != [] and results[0] == value:
552
+ ret_frames.append(frame)
553
+
554
+ return ret_frames
555
+
556
+ def get_tag(self, tag: str, whole_tag: bool = False) -> list:
557
+ """ Given a tag (E.g. _Assigned_chem_shift_list.Data_file_name)
558
+ return a list of all values for that tag. Specify whole_tag=True
559
+ and the [tag_name, tag_value] pair will be returned."""
560
+
561
+ if "." not in str(tag):
562
+ raise ValueError("You must provide the tag category to call this method at the entry level. For "
563
+ "example, you must provide 'Entry.Title' rather than 'Title' as the tag if calling"
564
+ " this at the Entry level. You can call Saveframe.get_tag('Title') without issue.")
565
+
566
+ results = []
567
+ for frame in self._frame_list:
568
+ results.extend(frame.get_tag(tag, whole_tag=whole_tag))
569
+
570
+ return results
571
+
572
+ def get_tags(self, tags: list) -> Dict[str, list]:
573
+ """ Given a list of tags, get all of the tags and return the
574
+ results in a dictionary."""
575
+
576
+ # All tags
577
+ if tags is None or not isinstance(tags, list):
578
+ raise ValueError("Please provide a list of tags.")
579
+
580
+ results = {}
581
+ for tag in tags:
582
+ results[tag] = self.get_tag(tag)
583
+
584
+ return results
585
+
586
+ def normalize(self, schema: Optional[Schema] = None) -> None:
587
+ """ Sorts saveframes, loops, and tags according to the schema
588
+ provided (or BMRB default if none provided).
589
+
590
+ Also re-assigns ID tag values and updates tag links to ID values."""
591
+
592
+ # Assign all the ID tags, and update all links to ID tags
593
+ my_schema = utils.get_schema(schema)
594
+
595
+ # Sort the saveframes according to ID, if an ID exists. Otherwise, still sort by category
596
+ ordering = my_schema.category_order
597
+
598
+ def sf_key(_: saveframe_mod.Saveframe) -> [int, Union[int, float]]:
599
+ """ Helper function to sort the saveframes.
600
+ Returns (category order, saveframe order) """
601
+
602
+ # If not a real category, generate an artificial but stable order > the real saveframes
603
+ try:
604
+ category_order = ordering.index(_.tag_prefix)
605
+ except (ValueError, KeyError):
606
+ if _.category is None:
607
+ category_order = float('infinity')
608
+ else:
609
+ category_order = len(ordering) + abs(int(hashlib.sha1(str(_.category).encode()).hexdigest(), 16))
610
+
611
+ # See if there is an ID tag, and it is a number
612
+ saveframe_id = float('infinity')
613
+ try:
614
+ saveframe_id = int(_.get_tag("ID")[0])
615
+ except (ValueError, KeyError, IndexError, TypeError):
616
+ # Either there is no ID, or it is not a number. By default it will sort at the end of saveframes of its
617
+ # category. Note that the entry_information ID tag has a different meaning, but since there should
618
+ # only ever be one saveframe of that category, the sort order for it can be any value.
619
+ pass
620
+
621
+ return category_order, saveframe_id
622
+
623
+ def loop_key(_) -> Union[int, float]:
624
+ """ Helper function to sort the loops."""
625
+
626
+ try:
627
+ return ordering.index(_.category)
628
+ except ValueError:
629
+ # Generate an arbitrary sort order for loops that aren't in the schema but make sure that they
630
+ # always come after loops in the schema
631
+ return len(ordering) + abs(int(hashlib.sha1(str(_.category).encode()).hexdigest(), 16))
632
+
633
+ # Go through all the saveframes
634
+ for each_frame in self._frame_list:
635
+ each_frame.sort_tags(schema=my_schema)
636
+ # Iterate through the loops
637
+ for each_loop in each_frame:
638
+ each_loop.sort_tags(schema=my_schema)
639
+
640
+ # See if we can sort the rows (in addition to tags)
641
+ try:
642
+ each_loop.sort_rows("Ordinal")
643
+ except ValueError:
644
+ pass
645
+ each_frame.loops.sort(key=loop_key)
646
+ self._frame_list.sort(key=sf_key)
647
+
648
+ # Calculate all the categories present
649
+ categories: set = set()
650
+ for each_frame in self._frame_list:
651
+ categories.add(each_frame.category)
652
+
653
+ # tag_prefix -> tag -> original value -> mapped value
654
+ mapping: dict = {}
655
+
656
+ # Reassign the ID tags first
657
+ for each_category in categories:
658
+
659
+ # First in the saveframe tags
660
+ id_counter: int = 1
661
+ for each_frame in self.get_saveframes_by_category(each_category):
662
+ for tag in each_frame.tags:
663
+ tag_schema = my_schema.schema.get(f"{each_frame.tag_prefix}.{tag[0]}".lower())
664
+ if not tag_schema:
665
+ continue
666
+
667
+ # Make sure the capitalization of the tag is correct
668
+ tag[0] = tag_schema['Tag field']
669
+
670
+ if tag_schema['lclSfIdFlg'] == 'Y':
671
+ # If it's an Entry_ID tag, set it that way
672
+ if tag_schema['entryIdFlg'] == 'Y':
673
+ mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{tag[1]}'] = self._entry_id
674
+ tag[1] = self._entry_id
675
+ # Must be an integer to avoid renumbering the chem_comp ID, for example
676
+ elif tag_schema['BMRB data type'] == "int":
677
+ prev_tag = tag[1]
678
+ if isinstance(tag[1], str):
679
+ tag[1] = str(id_counter)
680
+ mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{prev_tag}'] = str(id_counter)
681
+ else:
682
+ tag[1] = id_counter
683
+ mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{prev_tag}'] = id_counter
684
+ # We need to still store all the other tag values too
685
+ else:
686
+ mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{tag[1]}'] = tag[1]
687
+ else:
688
+ mapping[f'{each_frame.tag_prefix[1:]}.{tag[0]}.{tag[1]}'] = tag[1]
689
+
690
+ # Then in the loop
691
+ for loop in each_frame:
692
+ for x, tag in enumerate(loop.tags):
693
+ tag_schema = my_schema.schema.get(f"{loop.category}.{tag}".lower())
694
+ if not tag_schema:
695
+ continue
696
+
697
+ # Make sure the tags have the proper capitalization
698
+ loop.tags[x] = tag_schema['Tag field']
699
+
700
+ for row in loop.data:
701
+ # We don't re-map loop IDs, but we should still store them
702
+ mapping[f'{loop.category[1:]}.{tag}.{row[x]}'] = row[x]
703
+
704
+ if tag_schema['lclSfIdFlg'] == 'Y':
705
+ # If it's an Entry_ID tag, set it that way
706
+ if tag_schema['entryIdFlg'] == 'Y':
707
+ row[x] = self._entry_id
708
+ # Must be an integer to avoid renumbering the chem_comp ID, for example
709
+ elif tag_schema['BMRB data type'] == "int":
710
+ if row[x] in definitions.NULL_VALUES:
711
+ if isinstance(row[x], str):
712
+ row[x] = str(id_counter)
713
+ else:
714
+ row[x] = id_counter
715
+ # Handle chem_comp and it's ilk
716
+ else:
717
+ parent_id_tag = f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']}"
718
+ parent_id_value = each_frame.get_tag(parent_id_tag)[0]
719
+ if isinstance(row[x], str):
720
+ row[x] = str(parent_id_value)
721
+ else:
722
+ row[x] = parent_id_value
723
+ id_counter += 1
724
+
725
+ # Now fix any other references
726
+ for saveframe in self:
727
+ for tag in saveframe.tags:
728
+ tag_schema = my_schema.schema.get(f"{saveframe.tag_prefix}.{tag[0]}".lower())
729
+ if not tag_schema:
730
+ continue
731
+ if tag_schema['Foreign Table'] and tag_schema['Sf pointer'] != 'Y':
732
+
733
+ if tag[1] in definitions.NULL_VALUES:
734
+ if tag_schema['Nullable']:
735
+ continue
736
+ else:
737
+ logger.warning("A foreign key tag that is not nullable was set to "
738
+ f"a null value. Tag: {saveframe.tag_prefix}.{tag[1]} Primary key: "
739
+ f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']} "
740
+ f"Value: {tag[1]}")
741
+
742
+ try:
743
+ tag[1] = mapping[f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']}.{tag[1]}"]
744
+ except KeyError:
745
+ logger.warning(f'The tag {saveframe.tag_prefix}.{tag[0]} has value {tag[1]} '
746
+ f'but there is no valid primary key.')
747
+
748
+ # Now apply the remapping to loops...
749
+ for loop in saveframe:
750
+ for x, tag in enumerate(loop.tags):
751
+ tag_schema = my_schema.schema.get(f"{loop.category}.{tag}".lower())
752
+ if not tag_schema:
753
+ continue
754
+ if tag_schema['Foreign Table'] and tag_schema['Sf pointer'] != 'Y':
755
+ for row in loop.data:
756
+ if row[x] in definitions.NULL_VALUES:
757
+ if tag_schema['Nullable']:
758
+ continue
759
+ else:
760
+ logger.warning("A foreign key reference tag that is not nullable was set to "
761
+ f"a null value. Tag: {loop.category}.{tag} Foreign key: "
762
+ f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']} "
763
+ f"Value: {row[x]}")
764
+ try:
765
+ row[x] = mapping[
766
+ f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']}.{row[x]}"]
767
+ except KeyError:
768
+ if (loop.category == '_Atom_chem_shift' or loop.category == '_Entity_comp_index') and \
769
+ (tag == 'Atom_ID' or tag == 'Comp_ID'):
770
+ continue
771
+ logger.warning(f'The tag {loop.category}.{tag} has value {row[x]} '
772
+ f'but there is no valid primary key '
773
+ f"{tag_schema['Foreign Table']}.{tag_schema['Foreign Column']} "
774
+ f"with the tag value.")
775
+
776
+ # If there is both a label tag and an ID tag, do the reassignment
777
+
778
+ # We found a framecode reference
779
+ if tag_schema['Foreign Table'] and tag_schema['Foreign Column'] == 'Sf_framecode':
780
+
781
+ # Check if there is a tag pointing to the 'ID' tag
782
+ for conditional_tag in loop.tags:
783
+ conditional_tag_schema = my_schema.schema.get(f"{loop.category}.{conditional_tag}".lower())
784
+ if not conditional_tag_schema:
785
+ continue
786
+ if conditional_tag_schema['Foreign Table'] == tag_schema['Foreign Table'] and \
787
+ conditional_tag_schema['Foreign Column'] == 'ID' and \
788
+ conditional_tag_schema['entryIdFlg'] != 'Y':
789
+ # We found the matching tag
790
+ tag_pos = loop.tag_index(conditional_tag)
791
+
792
+ for row in loop.data:
793
+ # Check if the tag is null
794
+ if row[x] in definitions.NULL_VALUES:
795
+ if tag_schema['Nullable']:
796
+ continue
797
+ else:
798
+ logger.warning(f"A foreign saveframe reference tag that is not nullable was"
799
+ f" set to a null value. Tag: {loop.category}.{tag} "
800
+ f"Foreign saveframe: {tag_schema['Foreign Table']}"
801
+ f".{tag_schema['Foreign Column']}")
802
+ continue
803
+ try:
804
+ row[tag_pos] = self.get_saveframe_by_name(row[x][1:]).get_tag('ID')[0]
805
+ except KeyError:
806
+ logger.warning(f"Missing frame of type {tag} pointed to by {conditional_tag}")
807
+
808
+ # Renumber the 'ID' column in a loop
809
+ for each_frame in self._frame_list:
810
+ for loop in each_frame.loops:
811
+ if loop.tag_index('ID') is not None and loop.category != '_Experiment':
812
+ loop.renumber_rows('ID')
813
+
814
+ def print_tree(self) -> None:
815
+ """Prints a summary, tree style, of the frames and loops in
816
+ the entry."""
817
+
818
+ print(repr(self))
819
+ frame: saveframe_mod.Saveframe
820
+ for pos, frame in enumerate(self):
821
+ print(f"\t[{pos}] {repr(frame)}")
822
+ for pos2, one_loop in enumerate(frame):
823
+ print(f"\t\t[{pos2}] {repr(one_loop)}")
824
+
825
+ def remove_empty_saveframes(self) -> None:
826
+ """ This method will remove all empty saveframes in an entry
827
+ (the loops in the saveframe must also be empty for the saveframe
828
+ to be deleted). "Empty" means no values in tags, not no tags present."""
829
+
830
+ self._frame_list = [_ for _ in self._frame_list if not _.empty]
831
+
832
+ def remove_saveframe(self, item: Union[str, List[str], Tuple[str], 'saveframe_mod.Saveframe',
833
+ List['saveframe_mod.Saveframe'], Tuple['saveframe_mod.Saveframe']]) -> None:
834
+ """ Removes one or more saveframes from the entry. You can remove saveframes either by passing the saveframe
835
+ object itself, the saveframe name (as a string), or a list or tuple of either."""
836
+
837
+ parsed_list: list
838
+ if isinstance(item, tuple):
839
+ parsed_list = list(item)
840
+ elif isinstance(item, list):
841
+ parsed_list = item
842
+ elif isinstance(item, (str, saveframe_mod.Saveframe)):
843
+ parsed_list = [item]
844
+ else:
845
+ raise ValueError('The item you provided was not one or more saveframe objects or saveframe names (strings).'
846
+ f' Item type: {type(item)}')
847
+
848
+ frames_to_remove = []
849
+ for saveframe in parsed_list:
850
+ if isinstance(saveframe, str):
851
+ try:
852
+ frames_to_remove.append(self.frame_dict[saveframe])
853
+ except KeyError:
854
+ raise ValueError('At least one saveframe specified to remove was not found in this saveframe. '
855
+ f'First missing saveframe: {saveframe}')
856
+ elif isinstance(saveframe, saveframe_mod.Saveframe):
857
+ if saveframe not in self._frame_list:
858
+ raise ValueError('At least one loop specified to remove was not found in this saveframe. First '
859
+ f'missing loop: {saveframe}')
860
+ frames_to_remove.append(saveframe)
861
+ else:
862
+ raise ValueError('One of the items you provided was not a saveframe object or saveframe name '
863
+ f'(string). Item: {repr(saveframe)}')
864
+
865
+ self._frame_list = [_ for _ in self._frame_list if _ not in frames_to_remove]
866
+
867
+ def rename_saveframe(self, original_name: str, new_name: str) -> None:
868
+ """ Renames a saveframe and updates all pointers to that
869
+ saveframe in the entry with the new name."""
870
+
871
+ # Strip off the starting $ in the names
872
+ if original_name.startswith("$"):
873
+ original_name = original_name[1:]
874
+ if new_name.startswith("$"):
875
+ new_name = new_name[1:]
876
+
877
+ # Make sure there is no saveframe called what the new name is
878
+ if [x.name for x in self._frame_list].count(new_name) > 0:
879
+ raise ValueError(f"Cannot rename the saveframe '{original_name}' as '{new_name}' because a "
880
+ f"saveframe with that name already exists in the entry.")
881
+
882
+ # This can raise a ValueError, but no point catching it since it really is a ValueError if they provide a name
883
+ # of a saveframe that doesn't exist in the entry.
884
+ change_frame = self.get_saveframe_by_name(original_name)
885
+
886
+ # Update the saveframe
887
+ change_frame.name = new_name
888
+
889
+ # What the new references should look like
890
+ old_reference = "$" + original_name
891
+ new_reference = "$" + new_name
892
+
893
+ # Go through all the saveframes
894
+ for each_frame in self:
895
+ # Iterate through the tags
896
+ for each_tag in each_frame.tags:
897
+ if each_tag[1] == old_reference:
898
+ each_tag[1] = new_reference
899
+ # Iterate through the loops
900
+ for each_loop in each_frame:
901
+ for each_row in each_loop:
902
+ for pos, val in enumerate(each_row):
903
+ if val == old_reference:
904
+ each_row[pos] = new_reference
905
+
906
+ def validate(self, validate_schema: bool = True, schema: Schema = None,
907
+ validate_star: bool = True) -> List[str]:
908
+ """Validate an entry in a variety of ways. Returns a list of
909
+ errors found. 0-length list indicates no errors found. By
910
+ default all validation modes are enabled.
911
+
912
+ validate_schema - Determines if the entry is validated against
913
+ the NMR-STAR schema. You can pass your own custom schema if desired,
914
+ otherwise the cached schema will be used.
915
+
916
+ validate_star - Determines if the STAR syntax checks are ran."""
917
+
918
+ errors = []
919
+
920
+ # They should validate for something...
921
+ if not validate_star and not validate_schema:
922
+ errors.append("Validate() should be called with at least one validation method enabled.")
923
+
924
+ if validate_star:
925
+
926
+ # Check for saveframes with same name
927
+ saveframe_names = sorted(x.name for x in self)
928
+ for ordinal in range(0, len(saveframe_names) - 2):
929
+ if saveframe_names[ordinal] == saveframe_names[ordinal + 1]:
930
+ errors.append(f"Multiple saveframes with same name: '{saveframe_names[ordinal]}'")
931
+
932
+ # Check for dangling references
933
+ fdict = self.frame_dict
934
+
935
+ for each_frame in self:
936
+ # Iterate through the tags
937
+ for each_tag in each_frame.tags:
938
+ tag_copy = str(each_tag[1])
939
+ if (tag_copy.startswith("$")
940
+ and tag_copy[1:] not in fdict):
941
+ errors.append(f"Dangling saveframe reference '{each_tag[1]}' in "
942
+ f"tag '{each_frame.tag_prefix}.{each_tag[0]}'")
943
+
944
+ # Iterate through the loops
945
+ for each_loop in each_frame:
946
+ for each_row in each_loop:
947
+ for pos, val in enumerate(each_row):
948
+ val = str(val)
949
+ if val.startswith("$") and val[1:] not in fdict:
950
+ errors.append(f"Dangling saveframe reference '{val}' in tag "
951
+ f"'{each_loop.category}.{each_loop.tags[pos]}'")
952
+
953
+ # Ask the saveframes to check themselves for errors
954
+ for frame in self:
955
+ errors.extend(frame.validate(validate_schema=validate_schema, schema=schema, validate_star=validate_star))
956
+
957
+ return errors
958
+
959
+ def write_to_file(self, file_name: str, format_: str = "nmrstar", show_comments: bool = True,
960
+ skip_empty_loops: bool = False, skip_empty_tags: bool = False) -> None:
961
+ """ Writes the entry to the specified file in NMR-STAR format.
962
+
963
+ Optionally specify:
964
+ show_comments=False to disable the comments that are by default inserted. Ignored when writing json.
965
+ skip_empty_loops=False to force printing loops with no tags at all (loops with null tags are still printed)
966
+ skip_empty_tags=True will omit tags in the saveframes and loops which have no non-null values.
967
+ format_=json to write to the file in JSON format."""
968
+
969
+ write_to_file(self, file_name=file_name, format_=format_, show_comments=show_comments,
970
+ skip_empty_loops=skip_empty_loops, skip_empty_tags=skip_empty_tags)