pynmrstar 3.3.6__pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pynmrstar might be problematic. Click here for more details.

pynmrstar/saveframe.py ADDED
@@ -0,0 +1,1017 @@
1
+ import json
2
+ import warnings
3
+ from csv import reader as csv_reader, writer as csv_writer
4
+ from io import StringIO
5
+ from pathlib import Path
6
+ from typing import TextIO, BinaryIO, Union, List, Optional, Any, Dict, Iterable, Tuple
7
+
8
+ from pynmrstar import definitions, entry as entry_mod, loop as loop_mod, parser as parser_mod, utils
9
+ from pynmrstar._internal import _get_comments, _json_serialize, _interpret_file, get_clean_tag_list, write_to_file
10
+ from pynmrstar.exceptions import InvalidStateError
11
+ from pynmrstar.schema import Schema
12
+
13
+
14
+ class Saveframe(object):
15
+ """A saveframe object. Create using the class methods, see below."""
16
+
17
+ def __contains__(self, item: any) -> bool:
18
+ """ Check if the saveframe contains a tag or a loop name."""
19
+
20
+ # Prepare for processing
21
+ if isinstance(item, (list, tuple)):
22
+ to_process: List[Union[str, loop_mod.Loop]] = list(item)
23
+ elif isinstance(item, (loop_mod.Loop, str)):
24
+ to_process = [item]
25
+ else:
26
+ return False
27
+
28
+ lc_tags = self._lc_tags
29
+ loop_dict = self.loop_dict
30
+
31
+ for item in to_process:
32
+ if isinstance(item, loop_mod.Loop):
33
+ if item not in self.loops:
34
+ return False
35
+ elif isinstance(item, str):
36
+ if item.startswith("_") and "." not in item:
37
+ if item.lower() not in loop_dict:
38
+ return False
39
+ else:
40
+ if utils.format_tag_lc(item) not in lc_tags:
41
+ return False
42
+ else:
43
+ return False
44
+ return True
45
+
46
+ def __delitem__(self, item: Union[int, str, 'loop_mod.Loop']) -> None:
47
+ """Remove the indicated tag or loop."""
48
+
49
+ # If they specify the specific loop to delete, go ahead and delete it
50
+ if isinstance(item, loop_mod.Loop):
51
+ self.remove_loop(item)
52
+ elif isinstance(item, int):
53
+ try:
54
+ self.remove_loop(self._loops[item])
55
+ except IndexError:
56
+ raise IndexError(f'Index out of range: no loop at index: {item}')
57
+ elif isinstance(item, str):
58
+ # Assume it is a loop category based on the proceeding underscore
59
+ # and lack of the '.' category and tag separator
60
+ if item.startswith("_") and "." not in item:
61
+ self.remove_loop(item)
62
+ else:
63
+ self.remove_tag(item)
64
+ else:
65
+ raise ValueError(f'Item of invalid type provided: {type(item)}')
66
+
67
+ def __eq__(self, other) -> bool:
68
+ """Returns True if this saveframe is equal to another saveframe,
69
+ False if it is equal."""
70
+
71
+ if not isinstance(other, Saveframe):
72
+ return False
73
+
74
+ return (self.name, self._category, self._tags, self._loops) == \
75
+ (other.name, other._category, other._tags, other._loops)
76
+
77
+ def __getitem__(self, item: Union[int, str]) -> Union[list, 'loop_mod.Loop']:
78
+ """Get the indicated loop or tag."""
79
+
80
+ if isinstance(item, int):
81
+ try:
82
+ return self._loops[item]
83
+ except KeyError:
84
+ raise KeyError(f"No loop with index '{item}'.")
85
+ elif isinstance(item, str):
86
+ # Assume it is a loop category based on the proceeding underscore
87
+ # and lack of the '.' category and tag separator
88
+ if item.startswith("_") and "." not in item:
89
+ try:
90
+ return self.loop_dict[item.lower()]
91
+ except KeyError:
92
+ raise KeyError(f"No loop matching '{item}'.")
93
+ else:
94
+ results = self.get_tag(item)
95
+ if not results:
96
+ raise KeyError(f"No tag matching '{item}'.")
97
+ return results
98
+
99
+ def __iter__(self) -> Iterable["loop_mod.Loop"]:
100
+ """ Yields each of the loops contained within the saveframe. """
101
+
102
+ return iter(self._loops)
103
+
104
+ def __len__(self) -> int:
105
+ """Return the number of loops in this saveframe."""
106
+
107
+ return len(self._loops)
108
+
109
+ def __lt__(self, other) -> bool:
110
+ """Returns True if this saveframe sorts lower than the compared
111
+ saveframe, false otherwise. The alphabetical ordering of the
112
+ saveframe category is used to perform the comparison."""
113
+
114
+ if not isinstance(other, Saveframe):
115
+ return NotImplemented
116
+
117
+ return self.tag_prefix < other.tag_prefix
118
+
119
+ def __init__(self, **kwargs) -> None:
120
+ """Don't use this directly. Use the class methods to construct:
121
+ :py:meth:`Saveframe.from_scratch`, :py:meth:`Saveframe.from_string`,
122
+ :py:meth:`Saveframe.from_template`, :py:meth:`Saveframe.from_file`,
123
+ :py:meth:`Saveframe.from_json`"""
124
+
125
+ # They initialized us wrong
126
+ if len(kwargs) == 0:
127
+ raise ValueError("You should not directly instantiate a Saveframe using this method. Instead use the class"
128
+ " methods: Saveframe.from_scratch(), Saveframe.from_string(), Saveframe.from_template(), "
129
+ "Saveframe.from_file(), and Saveframe.from_json().")
130
+
131
+ # Initialize our local variables
132
+ self._tags: List[Any] = []
133
+ self._loops: List[loop_mod.Loop] = []
134
+ self._name: str = ""
135
+ self.source: str = "unknown"
136
+ self._category: Optional[str] = None
137
+ self.tag_prefix: Optional[str] = None
138
+
139
+ star_buffer: StringIO = StringIO('')
140
+
141
+ # Update our source if it provided
142
+ if 'source' in kwargs:
143
+ self.source = kwargs['source']
144
+
145
+ if 'the_string' in kwargs:
146
+ # Parse from a string by wrapping it in StringIO
147
+ star_buffer = StringIO(kwargs['the_string'])
148
+ self.source = "from_string()"
149
+ elif 'file_name' in kwargs:
150
+ star_buffer = _interpret_file(kwargs['file_name'])
151
+ self.source = f"from_file('{kwargs['file_name']}')"
152
+ # Creating from template (schema)
153
+ elif 'all_tags' in kwargs:
154
+ schema_obj = utils.get_schema(kwargs['schema'])
155
+ schema = schema_obj.schema
156
+ self._category = kwargs['category']
157
+
158
+ self._name = self._category
159
+ if 'saveframe_name' in kwargs and kwargs['saveframe_name']:
160
+ self._name = kwargs['saveframe_name']
161
+
162
+ # Make sure it is a valid category
163
+ if self._category not in [x["SFCategory"] for x in schema.values()]:
164
+ raise ValueError(f"The saveframe category '{self._category}' was not found in the dictionary.")
165
+
166
+ s = sorted(schema.values(), key=lambda _: float(_["Dictionary sequence"]))
167
+
168
+ loops_added = []
169
+
170
+ for item in s:
171
+ if item["SFCategory"] == self._category:
172
+
173
+ # It is a tag in this saveframe
174
+ if item["Loopflag"] == "N":
175
+
176
+ ft = utils.format_tag(item["Tag"])
177
+ # Set the value for sf_category and sf_framecode
178
+ if ft == "Sf_category":
179
+ self.add_tag(item["Tag"], self._category)
180
+ elif ft == "Sf_framecode":
181
+ self.add_tag(item["Tag"], self.name)
182
+ # If the tag is the entry ID tag, set the entry ID
183
+ elif item["entryIdFlg"] == "Y":
184
+ self.add_tag(item["Tag"], kwargs['entry_id'])
185
+ else:
186
+ tag_value = None
187
+ if kwargs['default_values']:
188
+ if item['default value'] not in definitions.NULL_VALUES:
189
+ tag_value = item['default value']
190
+ # Unconditional add
191
+ if kwargs['all_tags']:
192
+ self.add_tag(item["Tag"], tag_value)
193
+ # Conditional add
194
+ else:
195
+ if item["public"] != "I":
196
+ self.add_tag(item["Tag"], tag_value)
197
+
198
+ # It is a contained loop tag
199
+ else:
200
+ cat_formatted = utils.format_category(item["Tag"])
201
+ if cat_formatted not in loops_added:
202
+ loops_added.append(cat_formatted)
203
+ try:
204
+ self.add_loop(loop_mod.Loop.from_template(cat_formatted,
205
+ all_tags=kwargs['all_tags'],
206
+ schema=schema_obj))
207
+ except ValueError:
208
+ pass
209
+ return
210
+
211
+ elif 'saveframe_name' in kwargs:
212
+ # If they are creating from scratch, just get the saveframe name
213
+ self._name = kwargs['saveframe_name']
214
+ if 'tag_prefix' in kwargs:
215
+ self.tag_prefix = utils.format_category(kwargs['tag_prefix'])
216
+ return
217
+
218
+ # If we are reading from a CSV file, go ahead and parse it
219
+ if 'csv' in kwargs and kwargs['csv']:
220
+ csv_reader_object = csv_reader(star_buffer)
221
+ tags = next(csv_reader_object)
222
+ values = next(csv_reader_object)
223
+ if len(tags) != len(values):
224
+ raise ValueError("Your CSV data is invalid. The header length does not match the data length.")
225
+ for ordinal in range(0, len(tags)):
226
+ self.add_tag(tags[ordinal], values[ordinal])
227
+ return
228
+
229
+ tmp_entry = entry_mod.Entry.from_scratch(0)
230
+
231
+ # Load the BMRB entry from the file
232
+ star_buffer = StringIO("data_1 " + star_buffer.read())
233
+ parser = parser_mod.Parser(entry_to_parse_into=tmp_entry)
234
+ parser.parse(star_buffer.read(), source=self.source, convert_data_types=kwargs.get('convert_data_types', False),
235
+ raise_parse_warnings=kwargs.get('raise_parse_warnings', False))
236
+
237
+ # Copy the first parsed saveframe into ourself
238
+ if len(tmp_entry.frame_list) > 1:
239
+ raise ValueError("You attempted to parse one saveframe but the source you provided had more than one "
240
+ "saveframe. Please either parse all saveframes as an entry or only parse one saveframe. "
241
+ "Saveframes detected: " + str(tmp_entry.frame_list))
242
+ self._tags = tmp_entry[0].tags
243
+ self._loops = tmp_entry[0].loops
244
+ self._name = tmp_entry[0].name
245
+ self._category = tmp_entry[0].category
246
+ self.tag_prefix = tmp_entry[0].tag_prefix
247
+
248
+ @property
249
+ def _lc_tags(self) -> Dict[str, int]:
250
+ return {_[1][0].lower(): _[0] for _ in enumerate(self._tags)}
251
+
252
+ @property
253
+ def category(self) -> str:
254
+ return self._category
255
+
256
+ @category.setter
257
+ def category(self, category):
258
+ """ Updates the saveframe category. Sets the Sf_category tag if not present,
259
+ updates it if present. """
260
+
261
+ if category in definitions.NULL_VALUES:
262
+ raise ValueError("Cannot set the saveframe category to a null-equivalent value.")
263
+
264
+ # Update the sf_category tag if present - otherwise add it
265
+ category_tag = self.get_tag('sf_category', whole_tag=True)
266
+ if category_tag:
267
+ category_tag[0][1] = category
268
+ else:
269
+ self.add_tag('Sf_category', category)
270
+
271
+ self._category = category
272
+
273
+ @property
274
+ def empty(self) -> bool:
275
+ """ Check if the saveframe has no data. Ignore the structural tags."""
276
+
277
+ for tag in self._tags:
278
+ tag_lower = tag[0].lower()
279
+ if tag_lower not in ['sf_category', 'sf_framecode', 'id', 'entry_id', 'nmr_star_version',
280
+ 'original_nmr_star_version']:
281
+ if tag[1] not in definitions.NULL_VALUES:
282
+ return False
283
+
284
+ for loop in self._loops:
285
+ if not loop.empty:
286
+ return False
287
+
288
+ return True
289
+
290
+ @property
291
+ def loops(self) -> List['loop_mod.Loop']:
292
+ return self._loops
293
+
294
+ @property
295
+ def loop_dict(self) -> Dict[str, 'loop_mod.Loop']:
296
+ """Returns a hash of loop category -> loop."""
297
+
298
+ res = {}
299
+ for each_loop in self._loops:
300
+ if each_loop.category is not None:
301
+ res[each_loop.category.lower()] = each_loop
302
+ return res
303
+
304
+ @property
305
+ def name(self) -> Any:
306
+ """ Returns the name of the saveframe."""
307
+
308
+ return self._name
309
+
310
+ @name.setter
311
+ def name(self, name):
312
+ """ Updates the saveframe name. """
313
+
314
+ for char in str(name):
315
+ if char in utils.definitions.WHITESPACE:
316
+ raise ValueError("Saveframe names can not contain whitespace characters.")
317
+ if name in definitions.NULL_VALUES:
318
+ raise ValueError("Cannot set the saveframe name to a null-equivalent value.")
319
+
320
+ # Update the sf_framecode tag too
321
+ lc_tags = self._lc_tags
322
+ if 'sf_framecode' in lc_tags:
323
+ self.tags[lc_tags['sf_framecode']][1] = name
324
+ self._name = name
325
+
326
+ @property
327
+ def tags(self) -> List[List[any]]:
328
+ return self._tags
329
+
330
+ @property
331
+ def tag_dict(self) -> Dict[str, str]:
332
+ """Returns a hash of (tag name).lower() -> tag value."""
333
+
334
+ return {x[0].lower(): x[1] for x in self._tags}
335
+
336
+ @classmethod
337
+ def from_scratch(cls,
338
+ sf_name: str,
339
+ tag_prefix: str = None,
340
+ source: str = "from_scratch()"):
341
+ """Create an empty saveframe that you can programmatically add
342
+ to. You may also pass the tag prefix as the second argument. If
343
+ you do not pass the tag prefix it will be set the first time you
344
+ add a tag."""
345
+
346
+ return cls(saveframe_name=sf_name, tag_prefix=tag_prefix, source=source)
347
+
348
+ @classmethod
349
+ def from_file(cls,
350
+ the_file: Union[str, Path, TextIO, BinaryIO],
351
+ csv: bool = False,
352
+ convert_data_types: bool = False,
353
+ raise_parse_warnings: bool = False,
354
+ schema: Schema = None):
355
+ """Create a saveframe by loading in a file. Specify csv=True is
356
+ the file is a CSV file. If the_file starts with http://,
357
+ https://, or ftp:// then we will use those protocols to attempt
358
+ to open the file. the_file can be a string path, pathlib.Path object,
359
+ or an open file handle.
360
+
361
+ Setting convert_data_types to True will automatically convert
362
+ the data loaded from the file into the corresponding python type as
363
+ determined by loading the standard BMRB schema. This would mean that
364
+ all floats will be represented as decimal.Decimal objects, all integers
365
+ will be python int objects, strings and vars will remain strings, and
366
+ dates will become datetime.date objects. When printing str() is called
367
+ on all objects. Other that converting uppercase "E"s in scientific
368
+ notation floats to lowercase "e"s this should not cause any change in
369
+ the way re-printed NMR-STAR objects are displayed. Specify a custom
370
+ schema object to use using the schema parameter.
371
+
372
+ Setting raise_parse_warnings to True will result in the raising of a
373
+ ParsingError rather than logging a warning when non-valid (but
374
+ ignorable) issues are found."""
375
+
376
+ return cls(file_name=the_file,
377
+ csv=csv,
378
+ convert_data_types=convert_data_types,
379
+ raise_parse_warnings=raise_parse_warnings,
380
+ schema=schema)
381
+
382
+ @classmethod
383
+ def from_json(cls, json_dict: Union[dict, str]):
384
+ """Create a saveframe from JSON (serialized or unserialized JSON)."""
385
+
386
+ # If they provided a string, try to load it using JSON
387
+ if not isinstance(json_dict, dict):
388
+ try:
389
+ json_dict = json.loads(json_dict)
390
+ except (TypeError, ValueError):
391
+ raise ValueError("The JSON you provided was neither a Python dictionary nor a JSON string.")
392
+
393
+ # Make sure it has the correct keys
394
+ for check in ["name", "tag_prefix", "tags", "loops"]:
395
+ if check not in json_dict:
396
+ raise ValueError(f"The JSON you provide must be a hash and must contain the key '{check}' - even if "
397
+ "the key points to None.")
398
+
399
+ # Create a saveframe from scratch and populate it
400
+ ret = Saveframe.from_scratch(json_dict['name'])
401
+ ret.tag_prefix = json_dict['tag_prefix']
402
+ ret._category = json_dict.get('category', None)
403
+ ret._tags = json_dict['tags']
404
+ ret._loops = [loop_mod.Loop.from_json(x) for x in json_dict['loops']]
405
+ ret.source = "from_json()"
406
+
407
+ # Return the new loop
408
+ return ret
409
+
410
+ @classmethod
411
+ def from_string(cls,
412
+ the_string: str,
413
+ csv: bool = False,
414
+ convert_data_types: bool = False,
415
+ raise_parse_warnings: bool = False,
416
+ schema: Schema = None):
417
+ """Create a saveframe by parsing a string. Specify csv=True is
418
+ the string is in CSV format and not NMR-STAR format.
419
+
420
+ Setting convert_data_types to True will automatically convert
421
+ the data loaded from the file into the corresponding python type as
422
+ determined by loading the standard BMRB schema. This would mean that
423
+ all floats will be represented as decimal.Decimal objects, all integers
424
+ will be python int objects, strings and vars will remain strings, and
425
+ dates will become datetime.date objects. When printing str() is called
426
+ on all objects. Other that converting uppercase "E"s in scientific
427
+ notation floats to lowercase "e"s this should not cause any change in
428
+ the way re-printed NMR-STAR objects are displayed. Specify a custom
429
+ schema object to use using the schema parameter.
430
+
431
+ Setting raise_parse_warnings to True will result in the raising of a
432
+ ParsingError rather than logging a warning when non-valid (but
433
+ ignorable) issues are found."""
434
+
435
+ return cls(the_string=the_string,
436
+ csv=csv,
437
+ convert_data_types=convert_data_types,
438
+ raise_parse_warnings=raise_parse_warnings,
439
+ schema=schema)
440
+
441
+ @classmethod
442
+ def from_template(cls,
443
+ category: str,
444
+ name: str = None,
445
+ entry_id: Union[str, int] = None,
446
+ all_tags: bool = False,
447
+ default_values: bool = False,
448
+ schema: Schema = None):
449
+ """ Create a saveframe that has all of the tags and loops from the
450
+ schema present. No values will be assigned. Specify the category
451
+ when calling this method. Optionally also provide the name of the
452
+ saveframe as the 'name' argument.
453
+
454
+ The optional argument 'all_tags' forces all tags to be included
455
+ rather than just the mandatory tags.
456
+
457
+ The optional argument 'default_values' will insert the default
458
+ values from the schema."""
459
+
460
+ schema = utils.get_schema(schema)
461
+ return cls(category=category,
462
+ saveframe_name=name,
463
+ entry_id=entry_id,
464
+ all_tags=all_tags,
465
+ default_values=default_values,
466
+ schema=schema,
467
+ source=f"from_template({schema.version})")
468
+
469
+ def __repr__(self) -> str:
470
+ """Returns a description of the saveframe."""
471
+
472
+ return f"<pynmrstar.Saveframe '{self.name}'>"
473
+
474
+ def __setitem__(self, key: Union[str, int], item: Union[str, 'loop_mod.Loop']) -> None:
475
+ """Set the indicated loop or tag."""
476
+
477
+ # It's a loop
478
+ if isinstance(item, loop_mod.Loop):
479
+ try:
480
+ integer = int(str(key))
481
+ self._loops[integer] = item
482
+ except ValueError:
483
+ if key.lower() in self.loop_dict:
484
+ for pos, tmp_loop in enumerate(self._loops):
485
+ if tmp_loop.category.lower() == key.lower():
486
+ self._loops[pos] = item
487
+ else:
488
+ raise KeyError(f"Loop with category '{key}' does not exist and therefore cannot be written to. Use "
489
+ "add_loop instead.")
490
+ else:
491
+ # If the tag already exists, set its value
492
+ self.add_tag(key, item, update=True)
493
+
494
+ def __str__(self,
495
+ first_in_category: bool = True,
496
+ skip_empty_loops: bool = False,
497
+ skip_empty_tags: bool = False,
498
+ show_comments: bool = True) -> str:
499
+ """Returns the saveframe in STAR format as a string. Please use :py:meth:`Saveframe.format`
500
+ when you want to pass arguments."""
501
+
502
+ if self.tag_prefix is None:
503
+ raise InvalidStateError(f"The tag prefix was never set! Error in saveframe named '{self.name}'.")
504
+
505
+ return_chunks = []
506
+
507
+ # Insert the comment if not disabled
508
+ if show_comments:
509
+ if self._category in _get_comments():
510
+ this_comment = _get_comments()[self._category]
511
+ if first_in_category or this_comment['every_flag']:
512
+ return_chunks.append(_get_comments()[self._category]['comment'])
513
+
514
+ # Print the saveframe
515
+ return_chunks.append(f"save_{self.name}\n")
516
+
517
+ if len(self._tags) > 0:
518
+ width = max([len(self.tag_prefix + "." + x[0]) for x in self._tags])
519
+ pstring = " %%-%ds %%s\n" % width
520
+ mstring = " %%-%ds\n;\n%%s;\n" % width
521
+
522
+ # Print the tags
523
+ for each_tag in self._tags:
524
+ if skip_empty_tags and each_tag[1] in definitions.NULL_VALUES:
525
+ continue
526
+ try:
527
+ clean_tag = utils.quote_value(each_tag[1])
528
+ except ValueError:
529
+ raise InvalidStateError('Cannot generate NMR-STAR for entry, as empty strings are not valid tag'
530
+ ' values in NMR-STAR. Please either replace the empty strings with None '
531
+ 'objects, or set pynmrstar.definitions.STR_CONVERSION_DICT[\'\'] = None. '
532
+ f'Saveframe: {self.name} Tag: {each_tag[0]}')
533
+
534
+ formatted_tag = self.tag_prefix + "." + each_tag[0]
535
+ if "\n" in clean_tag:
536
+ return_chunks.append(mstring % (formatted_tag, clean_tag))
537
+ else:
538
+ return_chunks.append(pstring % (formatted_tag, clean_tag))
539
+
540
+ # Print any loops
541
+ for each_loop in self._loops:
542
+ return_chunks.append(each_loop.format(skip_empty_loops=skip_empty_loops, skip_empty_tags=skip_empty_tags))
543
+
544
+ # Close the saveframe
545
+ return "".join(return_chunks) + "\nsave_\n"
546
+
547
+ def add_loop(self, loop_to_add: 'loop_mod.Loop') -> None:
548
+ """Add a loop to the saveframe loops."""
549
+
550
+ if loop_to_add.category in self.loop_dict or str(loop_to_add.category).lower() in self.loop_dict:
551
+ if loop_to_add.category is None:
552
+ raise ValueError("You cannot have two loops with the same category in one saveframe. You are getting "
553
+ "this error because you haven't yet set your loop categories.")
554
+ else:
555
+ raise ValueError("You cannot have two loops with the same category in one saveframe. Category: "
556
+ f"'{loop_to_add.category}'.")
557
+
558
+ self._loops.append(loop_to_add)
559
+
560
+ def add_tag(self,
561
+ name: str,
562
+ value: Any,
563
+ update: bool = False,
564
+ convert_data_types: bool = False,
565
+ schema: Schema = None) -> None:
566
+ """Add a tag to the tag list. Does a bit of validation and
567
+ parsing.
568
+
569
+ Set update to True to update a tag if it exists rather
570
+ than raise an exception.
571
+
572
+ Set convert_data_types to True to convert the tag value from str to
573
+ whatever type the tag is as defined in the schema.
574
+
575
+ Optionally specify a schema if you don't want to use the default schema.
576
+ """
577
+
578
+ if not isinstance(name, str):
579
+ raise ValueError('Tag names must be strings.')
580
+
581
+ if "." in name:
582
+ if name[0] != ".":
583
+ prefix = utils.format_category(name)
584
+ if self.tag_prefix is None:
585
+ self.tag_prefix = prefix
586
+ elif self.tag_prefix != prefix:
587
+ raise ValueError(
588
+ "One saveframe cannot have tags with different categories (or tags that don't "
589
+ f"match the set category)! Saveframe tag prefix is '{self.tag_prefix}' but the added tag, "
590
+ f"'{name}' has prefix '{prefix}'.")
591
+ name = name[name.index(".") + 1:]
592
+ else:
593
+ name = name[1:]
594
+
595
+ if name in definitions.NULL_VALUES:
596
+ raise ValueError(f"Cannot use a null-equivalent value as a tag name. Invalid tag name: '{name}'")
597
+ if "." in name:
598
+ raise ValueError(f"There cannot be more than one '.' in a tag name. Invalid tag name: '{name}'")
599
+ for char in name:
600
+ if char in utils.definitions.WHITESPACE:
601
+ raise ValueError(f"Tag names can not contain whitespace characters. Invalid tag name: '{name}'")
602
+
603
+ # No duplicate tags
604
+ if self.get_tag(name):
605
+ if not update:
606
+ raise ValueError(f"There is already a tag with the name '{name}' in the saveframe '{self.name}."
607
+ f" Set update=True if you want to override its value.")
608
+ else:
609
+ tag_name_lower = name.lower()
610
+ if tag_name_lower == "sf_category":
611
+ self._category = value
612
+ if tag_name_lower == "sf_framecode":
613
+ if value in definitions.NULL_VALUES:
614
+ raise ValueError("Cannot set the saveframe name tag (Sf_framecode) to a null-equivalent "
615
+ f"value. Invalid value: '{name}'")
616
+ self._name = value
617
+ self.get_tag(name, whole_tag=True)[0][1] = value
618
+ return
619
+
620
+ # See if we need to convert the data type
621
+ if convert_data_types:
622
+ new_tag = [name, utils.get_schema(schema).convert_tag(self.tag_prefix + "." + name, value)]
623
+ else:
624
+ new_tag = [name, value]
625
+
626
+ # Set the category if the tag we are loading is the category
627
+ tag_name_lower = name.lower()
628
+ if tag_name_lower == "sf_category":
629
+ self._category = value
630
+ if tag_name_lower == "sf_framecode":
631
+ if not self._name:
632
+ self._name = value
633
+ elif self._name != value:
634
+ raise ValueError('The Sf_framecode tag cannot be different from the saveframe name. Error '
635
+ f'occurred in tag {self.tag_prefix}.Sf_framecode with value {value} which '
636
+ f'conflicts with the saveframe name {self._name}.')
637
+ self._tags.append(new_tag)
638
+
639
+ def add_tags(self, tag_list: list, update: bool = False) -> None:
640
+ """Adds multiple tags to the list. Input should be a list of
641
+ tuples that are either [key, value] or [key]. In the latter case
642
+ the value will be set to ".". Set update to true to update a
643
+ tag if it exists rather than raise an exception."""
644
+
645
+ for tag_pair in tag_list:
646
+ if len(tag_pair) == 2:
647
+ self.add_tag(tag_pair[0], tag_pair[1], update=update)
648
+ elif len(tag_pair) == 1:
649
+ self.add_tag(tag_pair[0], ".", update=update)
650
+ else:
651
+ raise ValueError(f"You provided an invalid tag/value to add: '{tag_pair}'.")
652
+
653
+ def add_missing_tags(self,
654
+ schema: Schema = None,
655
+ all_tags: bool = False,
656
+ recursive: bool = True) -> None:
657
+ """ Automatically adds any missing tags (according to the schema)
658
+ and sorts the tags.
659
+
660
+ Set recursive to False to only operate on the tags in this saveframe,
661
+ and not those in child loops."""
662
+
663
+ if not self.tag_prefix:
664
+ raise InvalidStateError("You must first specify the tag prefix of this Saveframe before calling this "
665
+ "method. You can do this by adding a fully qualified tag "
666
+ "(i.e. _Entry.Sf_framecode), by specifying the tag_prefix when calling "
667
+ "from_scratch() or by modifying the .tag_prefix attribute.")
668
+
669
+ schema = utils.get_schema(schema)
670
+ tag_prefix: str = self.tag_prefix.lower() + '.'
671
+
672
+ for item in schema.schema_order:
673
+
674
+ # The tag is in the loop
675
+ if item.lower().startswith(tag_prefix):
676
+
677
+ try:
678
+ # Unconditional add
679
+ if all_tags:
680
+ self.add_tag(item, None)
681
+ # Conditional add
682
+ else:
683
+ if schema.schema[item.lower()]["public"] != "I":
684
+ self.add_tag(item, None)
685
+ except ValueError:
686
+ pass
687
+
688
+ if recursive:
689
+ for loop in self._loops:
690
+ try:
691
+ loop.add_missing_tags(schema=schema, all_tags=all_tags)
692
+ except ValueError:
693
+ pass
694
+
695
+ self.sort_tags()
696
+
697
+ def compare(self, other) -> List[str]:
698
+ """Returns the differences between two saveframes as a list.
699
+ Non-equal saveframes will always be detected, but specific
700
+ differences detected depends on order of saveframes."""
701
+
702
+ diffs = []
703
+
704
+ # Check if this is literally the same object
705
+ if self is other:
706
+ return []
707
+ # Check if the other object is our string representation
708
+ if isinstance(other, str):
709
+ if str(self) == other:
710
+ return []
711
+ else:
712
+ return ['String was not exactly equal to saveframe.']
713
+ elif not isinstance(other, Saveframe):
714
+ return ['Other object is not of class Saveframe.']
715
+
716
+ # We need to do this in case of an extra "\n" on the end of one tag
717
+ if str(other) == str(self):
718
+ return []
719
+
720
+ # Do STAR comparison
721
+ try:
722
+ if str(self.name) != str(other.name):
723
+ # No point comparing apples to oranges. If the tags are
724
+ # this different just return
725
+ diffs.append(f"\tSaveframe names do not match: '{self.name}' vs '{other.name}'.")
726
+ return diffs
727
+
728
+ if str(self.tag_prefix) != str(other.tag_prefix):
729
+ # No point comparing apples to oranges. If the tags are
730
+ # this different just return
731
+ diffs.append(f"\tTag prefix does not match: '{self.tag_prefix}' vs '{other.tag_prefix}'.")
732
+ return diffs
733
+
734
+ if len(self._tags) < len(other.tags):
735
+ diffs.append(f"\tNumber of tags does not match: '{len(self._tags)}' vs '{len(other.tags)}'. The "
736
+ f"compared entry has at least one tag this entry does not.")
737
+
738
+ for tag in self._tags:
739
+ other_tag = other.get_tag(tag[0])
740
+
741
+ if not other_tag:
742
+ diffs.append(f"\tNo tag with name '{self.tag_prefix}.{tag[0]}' in compared entry.")
743
+ continue
744
+
745
+ # Compare the string version of the tags in case there are
746
+ # non-string types. Use the conversion dict to get to str
747
+ if (str(definitions.STR_CONVERSION_DICT.get(tag[1], tag[1])) !=
748
+ str(definitions.STR_CONVERSION_DICT.get(other_tag[0], other_tag[0]))):
749
+ newline_stripped_tag = str(tag[1]).replace("\n", "\\n")
750
+ newline_stripped_other_tag = str(other_tag[0]).replace("\n", "\\n")
751
+ diffs.append(f"\tMismatched tag values for tag '{self.tag_prefix}.{tag[0]}': '"
752
+ f"{newline_stripped_tag}' vs '{newline_stripped_other_tag}'.")
753
+
754
+ if len(self._loops) != len(other.loops):
755
+ diffs.append(f"\tNumber of children loops does not match: '{len(self._loops)}' vs "
756
+ f"'{len(other.loops)}'.")
757
+
758
+ compare_loop_dict = other.loop_dict
759
+ for each_loop in self._loops:
760
+ if each_loop.category.lower() in compare_loop_dict:
761
+ compare = each_loop.compare(compare_loop_dict[each_loop.category.lower()])
762
+ if len(compare) > 0:
763
+ diffs.append(f"\tLoops do not match: '{each_loop.category}'.")
764
+ diffs.extend(compare)
765
+ else:
766
+ diffs.append(f"\tNo loop with category '{each_loop.category}' in other entry.")
767
+
768
+ except AttributeError as err:
769
+ diffs.append(f"\tAn exception occurred while comparing: '{err}'.")
770
+
771
+ return diffs
772
+
773
+ def delete_tag(self, tag: str) -> None:
774
+ """ Deprecated, please see :py:meth:`pynmrstar.Saveframe.remove_tag`. """
775
+
776
+ warnings.warn('This method name has been renamed to remove_tag. Please update your code.', DeprecationWarning)
777
+ return self.remove_tag(tag)
778
+
779
+ def get_data_as_csv(self, header: bool = True, show_category: bool = True) -> str:
780
+ """Return the data contained in the loops, properly CSVd, as a
781
+ string. Set header to False omit the header. Set show_category
782
+ to False to omit the loop category from the headers."""
783
+
784
+ csv_buffer = StringIO()
785
+ csv_writer_object = csv_writer(csv_buffer)
786
+
787
+ if header:
788
+ if show_category:
789
+ csv_writer_object.writerow([str(self.tag_prefix) + "." + str(x[0]) for x in self._tags])
790
+ else:
791
+ csv_writer_object.writerow([str(x[0]) for x in self._tags])
792
+
793
+ data = []
794
+ for each_tag in self._tags:
795
+ data.append(each_tag[1])
796
+
797
+ csv_writer_object.writerow(data)
798
+
799
+ csv_buffer.seek(0)
800
+ return csv_buffer.read().replace('\r\n', '\n')
801
+
802
+ def format(self, skip_empty_loops: bool = True, skip_empty_tags: bool = False, show_comments: bool = True) -> str:
803
+ """ The same as calling str(Saveframe), except that you can pass options
804
+ to customize how the saveframe is printed.
805
+
806
+ skip_empty_loops will omit printing loops with no tags at all. (A loop with null tags is not "empty".)
807
+ skip_empty_tags will omit tags in the saveframe and child loops which have no non-null values.
808
+ show_comments will show the standard comments before a saveframe."""
809
+
810
+ return self.__str__(skip_empty_loops=skip_empty_loops, show_comments=show_comments,
811
+ skip_empty_tags=skip_empty_tags)
812
+
813
+ def get_json(self, serialize: bool = True) -> Union[dict, str]:
814
+ """ Returns the saveframe in JSON format. If serialize is set to
815
+ False a dictionary representation of the saveframe that is
816
+ serializeable is returned."""
817
+
818
+ saveframe_data = {
819
+ "name": self.name,
820
+ "category": self._category,
821
+ "tag_prefix": self.tag_prefix,
822
+ "tags": [[x[0], x[1]] for x in self._tags],
823
+ "loops": [x.get_json(serialize=False) for x in self._loops]
824
+ }
825
+
826
+ if serialize:
827
+ return json.dumps(saveframe_data, default=_json_serialize)
828
+ else:
829
+ return saveframe_data
830
+
831
+ def get_loop(self, name: str) -> 'loop_mod.Loop':
832
+ """Return a loop based on the loop name (category)."""
833
+
834
+ name = utils.format_category(name).lower()
835
+ for each_loop in self._loops:
836
+ if str(each_loop.category).lower() == name:
837
+ return each_loop
838
+ raise KeyError(f"No loop with category '{name}'.")
839
+
840
+ def get_loop_by_category(self, name: str) -> 'loop_mod.Loop':
841
+ """ Deprecated. Please use :py:meth:`pynmrstar.Saveframe.get_loop` instead. """
842
+
843
+ warnings.warn('Deprecated. Please use get_loop() instead.', DeprecationWarning)
844
+ return self.get_loop(name)
845
+
846
+ def get_tag(self, query: str, whole_tag: bool = False) -> list:
847
+ """Allows fetching the value of a tag by tag name. Returns
848
+ a list of all matching tag values.
849
+
850
+ Specify whole_tag=True and the [tag_name, tag_value] pair will be
851
+ returned instead of just the value"""
852
+
853
+ results = []
854
+
855
+ # Make sure this is the correct saveframe if they specify a tag
856
+ # prefix
857
+ if "." in query:
858
+ tag_prefix = utils.format_category(query)
859
+ else:
860
+ tag_prefix = self.tag_prefix
861
+
862
+ # Check the loops
863
+ for each_loop in self._loops:
864
+ if ((each_loop.category is not None and tag_prefix is not None and
865
+ each_loop.category.lower() == tag_prefix.lower())):
866
+ results.extend(each_loop.get_tag(query, whole_tag=whole_tag))
867
+
868
+ # Check our tags
869
+ query = utils.format_tag_lc(query)
870
+ if tag_prefix is not None and tag_prefix.lower() == self.tag_prefix.lower():
871
+ for tag in self._tags:
872
+ if query == tag[0].lower():
873
+ if whole_tag:
874
+ results.append(tag)
875
+ else:
876
+ results.append(tag[1])
877
+
878
+ return results
879
+
880
+ def loop_iterator(self) -> Iterable['loop_mod.Loop']:
881
+ """Returns an iterator for saveframe loops."""
882
+
883
+ return iter(self._loops)
884
+
885
+ def print_tree(self) -> None:
886
+ """Prints a summary, tree style, of the loops in the saveframe."""
887
+
888
+ print(repr(self))
889
+ for pos, each_loop in enumerate(self):
890
+ print(f"\t[{pos}] {repr(each_loop)}")
891
+
892
+ def remove_loop(self, item: Union[str, List[str], Tuple[str],
893
+ 'loop_mod.Loop', List['loop_mod.Loop'], Tuple['loop_mod.Loop']]) -> None:
894
+ """ Removes one or more loops from the saveframe. You can remove loops either by passing the loop object itself,
895
+ the loop category (as a string), or a list or tuple of either."""
896
+
897
+ parsed_list: list
898
+ if isinstance(item, tuple):
899
+ parsed_list = list(item)
900
+ elif isinstance(item, list):
901
+ parsed_list = item
902
+ elif isinstance(item, (str, loop_mod.Loop)):
903
+ parsed_list = [item]
904
+ else:
905
+ raise ValueError('The item you provided was not one or more loop objects or loop categories (strings). '
906
+ f'Item type: {type(item)}')
907
+
908
+ loop_names = self.loop_dict
909
+
910
+ loops_to_remove = []
911
+ for loop in parsed_list:
912
+ if isinstance(loop, str):
913
+ formatted_loop = loop.lower()
914
+ if not formatted_loop.startswith('_'):
915
+ formatted_loop = f"_{loop}"
916
+ if formatted_loop not in loop_names:
917
+ raise ValueError('At least one loop specified to remove was not found in this saveframe. First '
918
+ f'missing loop: {loop}')
919
+ loops_to_remove.append(loop_names[formatted_loop])
920
+ elif isinstance(loop, loop_mod.Loop):
921
+ if loop not in self._loops:
922
+ raise ValueError('At least one loop specified to remove was not found in this saveframe. First '
923
+ f'missing loop: {loop}')
924
+ loops_to_remove.append(loop)
925
+ else:
926
+ raise ValueError('One of the items you provided was not a loop object or loop category (string). '
927
+ f'Item: {repr(loop)}')
928
+
929
+ self._loops = [_ for _ in self._loops if _ not in loops_to_remove]
930
+
931
+ def remove_tag(self, item: Union[str, List[str], Tuple[str]]) -> None:
932
+ """Removes one or more tags from the saveframe based on tag name(s).
933
+ Provide either a tag name or a list or tuple containing tag names. """
934
+
935
+ tags = get_clean_tag_list(item)
936
+ lc_tags = self._lc_tags
937
+
938
+ for item in tags:
939
+ if item["formatted"] not in lc_tags:
940
+ raise KeyError(f"There is no tag with name '{item['original']}' to remove.")
941
+
942
+ # Create a new list stripping out all of the deleted tags
943
+ positions = [lc_tags[_["formatted"]] for _ in tags]
944
+ self._tags = [_[1] for _ in enumerate(self._tags) if _[0] not in positions]
945
+
946
+ def set_tag_prefix(self, tag_prefix: str) -> None:
947
+ """Set the tag prefix for this saveframe."""
948
+
949
+ self.tag_prefix = utils.format_category(tag_prefix)
950
+
951
+ def sort_tags(self, schema: Schema = None) -> None:
952
+ """ Sort the tags so they are in the same order as a BMRB
953
+ schema. Will automatically use the standard schema if none
954
+ is provided."""
955
+
956
+ schema = utils.get_schema(schema)
957
+
958
+ def sort_key(x) -> int:
959
+ return schema.tag_key(self.tag_prefix + "." + x[0])
960
+
961
+ self._tags.sort(key=sort_key)
962
+
963
+ def tag_iterator(self) -> Iterable[Tuple[str, str]]:
964
+ """Returns an iterator for saveframe tags."""
965
+ # :py:attr:`pynmrstar.Saveframe.tags`
966
+ return iter(self._tags)
967
+
968
+ def validate(self, validate_schema: bool = True, schema: Schema = None, validate_star: bool = True):
969
+ """Validate a saveframe in a variety of ways. Returns a list of
970
+ errors found. 0-length list indicates no errors found. By
971
+ default all validation modes are enabled.
972
+
973
+ validate_schema - Determines if the entry is validated against
974
+ the NMR-STAR schema. You can pass your own custom schema if desired,
975
+ otherwise the schema will be fetched from the BMRB servers.
976
+
977
+ validate_star - Determines if the STAR syntax checks are ran."""
978
+
979
+ errors = []
980
+
981
+ my_category = self._category
982
+ if not my_category:
983
+ errors.append(f"Cannot properly validate saveframe: '{self.name}'. No saveframe category defined.")
984
+ my_category = None
985
+
986
+ if validate_schema:
987
+ # Get the default schema if we are not passed a schema
988
+ my_schema = utils.get_schema(schema)
989
+
990
+ for tag in self._tags:
991
+ formatted_tag = self.tag_prefix + "." + tag[0]
992
+ cur_errors = my_schema.val_type(formatted_tag, tag[1], category=my_category)
993
+ errors.extend(cur_errors)
994
+
995
+ # Check the loops for errors
996
+ for each_loop in self._loops:
997
+ errors.extend(each_loop.validate(validate_schema=validate_schema, schema=schema,
998
+ validate_star=validate_star, category=my_category))
999
+
1000
+ return errors
1001
+
1002
+ def write_to_file(self,
1003
+ file_name: Union[str, Path],
1004
+ format_: str = "nmrstar",
1005
+ show_comments: bool = True,
1006
+ skip_empty_loops: bool = False,
1007
+ skip_empty_tags: bool = False) -> None:
1008
+ """ Writes the saveframe to the specified file in NMR-STAR format.
1009
+
1010
+ Optionally specify:
1011
+ show_comments=False to disable the comments that are by default inserted. Ignored when writing json.
1012
+ skip_empty_loops=False to force printing loops with no tags at all (loops with null tags are still printed)
1013
+ skip_empty_tags=True will omit tags in the saveframes and loops which have no non-null values.
1014
+ format_=json to write to the file in JSON format."""
1015
+
1016
+ write_to_file(self, file_name=file_name, format_=format_, show_comments=show_comments,
1017
+ skip_empty_loops=skip_empty_loops, skip_empty_tags=skip_empty_tags)