pynmrstar 3.3.5__cp39-cp39-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pynmrstar might be problematic. Click here for more details.

pynmrstar/saveframe.py ADDED
@@ -0,0 +1,1015 @@
1
+ import json
2
+ import warnings
3
+ from csv import reader as csv_reader, writer as csv_writer
4
+ from io import StringIO
5
+ from typing import TextIO, BinaryIO, Union, List, Optional, Any, Dict, Iterable, Tuple
6
+
7
+ from pynmrstar import definitions, entry as entry_mod, loop as loop_mod, parser as parser_mod, utils
8
+ from pynmrstar._internal import _get_comments, _json_serialize, _interpret_file, get_clean_tag_list, write_to_file
9
+ from pynmrstar.exceptions import InvalidStateError
10
+ from pynmrstar.schema import Schema
11
+
12
+
13
+ class Saveframe(object):
14
+ """A saveframe object. Create using the class methods, see below."""
15
+
16
+ def __contains__(self, item: any) -> bool:
17
+ """ Check if the saveframe contains a tag or a loop name."""
18
+
19
+ # Prepare for processing
20
+ if isinstance(item, (list, tuple)):
21
+ to_process: List[Union[str, loop_mod.Loop]] = list(item)
22
+ elif isinstance(item, (loop_mod.Loop, str)):
23
+ to_process = [item]
24
+ else:
25
+ return False
26
+
27
+ lc_tags = self._lc_tags
28
+ loop_dict = self.loop_dict
29
+
30
+ for item in to_process:
31
+ if isinstance(item, loop_mod.Loop):
32
+ if item not in self.loops:
33
+ return False
34
+ elif isinstance(item, str):
35
+ if item.startswith("_") and "." not in item:
36
+ if item.lower() not in loop_dict:
37
+ return False
38
+ else:
39
+ if utils.format_tag_lc(item) not in lc_tags:
40
+ return False
41
+ else:
42
+ return False
43
+ return True
44
+
45
+ def __delitem__(self, item: Union[int, str, 'loop_mod.Loop']) -> None:
46
+ """Remove the indicated tag or loop."""
47
+
48
+ # If they specify the specific loop to delete, go ahead and delete it
49
+ if isinstance(item, loop_mod.Loop):
50
+ self.remove_loop(item)
51
+ elif isinstance(item, int):
52
+ try:
53
+ self.remove_loop(self._loops[item])
54
+ except IndexError:
55
+ raise IndexError(f'Index out of range: no loop at index: {item}')
56
+ elif isinstance(item, str):
57
+ # Assume it is a loop category based on the proceeding underscore
58
+ # and lack of the '.' category and tag separator
59
+ if item.startswith("_") and "." not in item:
60
+ self.remove_loop(item)
61
+ else:
62
+ self.remove_tag(item)
63
+ else:
64
+ raise ValueError(f'Item of invalid type provided: {type(item)}')
65
+
66
+ def __eq__(self, other) -> bool:
67
+ """Returns True if this saveframe is equal to another saveframe,
68
+ False if it is equal."""
69
+
70
+ if not isinstance(other, Saveframe):
71
+ return False
72
+
73
+ return (self.name, self._category, self._tags, self._loops) == \
74
+ (other.name, other._category, other._tags, other._loops)
75
+
76
+ def __getitem__(self, item: Union[int, str]) -> Union[list, 'loop_mod.Loop']:
77
+ """Get the indicated loop or tag."""
78
+
79
+ if isinstance(item, int):
80
+ try:
81
+ return self._loops[item]
82
+ except KeyError:
83
+ raise KeyError(f"No loop with index '{item}'.")
84
+ elif isinstance(item, str):
85
+ # Assume it is a loop category based on the proceeding underscore
86
+ # and lack of the '.' category and tag separator
87
+ if item.startswith("_") and "." not in item:
88
+ try:
89
+ return self.loop_dict[item.lower()]
90
+ except KeyError:
91
+ raise KeyError(f"No loop matching '{item}'.")
92
+ else:
93
+ results = self.get_tag(item)
94
+ if not results:
95
+ raise KeyError(f"No tag matching '{item}'.")
96
+ return results
97
+
98
+ def __iter__(self) -> Iterable["loop_mod.Loop"]:
99
+ """ Yields each of the loops contained within the saveframe. """
100
+
101
+ return iter(self._loops)
102
+
103
+ def __len__(self) -> int:
104
+ """Return the number of loops in this saveframe."""
105
+
106
+ return len(self._loops)
107
+
108
+ def __lt__(self, other) -> bool:
109
+ """Returns True if this saveframe sorts lower than the compared
110
+ saveframe, false otherwise. The alphabetical ordering of the
111
+ saveframe category is used to perform the comparison."""
112
+
113
+ if not isinstance(other, Saveframe):
114
+ return NotImplemented
115
+
116
+ return self.tag_prefix < other.tag_prefix
117
+
118
+ def __init__(self, **kwargs) -> None:
119
+ """Don't use this directly. Use the class methods to construct:
120
+ :py:meth:`Saveframe.from_scratch`, :py:meth:`Saveframe.from_string`,
121
+ :py:meth:`Saveframe.from_template`, :py:meth:`Saveframe.from_file`,
122
+ :py:meth:`Saveframe.from_json`"""
123
+
124
+ # They initialized us wrong
125
+ if len(kwargs) == 0:
126
+ raise ValueError("You should not directly instantiate a Saveframe using this method. Instead use the class"
127
+ " methods: Saveframe.from_scratch(), Saveframe.from_string(), Saveframe.from_template(), "
128
+ "Saveframe.from_file(), and Saveframe.from_json().")
129
+
130
+ # Initialize our local variables
131
+ self._tags: List[Any] = []
132
+ self._loops: List[loop_mod.Loop] = []
133
+ self._name: str = ""
134
+ self.source: str = "unknown"
135
+ self._category: Optional[str] = None
136
+ self.tag_prefix: Optional[str] = None
137
+
138
+ star_buffer: StringIO = StringIO('')
139
+
140
+ # Update our source if it provided
141
+ if 'source' in kwargs:
142
+ self.source = kwargs['source']
143
+
144
+ if 'the_string' in kwargs:
145
+ # Parse from a string by wrapping it in StringIO
146
+ star_buffer = StringIO(kwargs['the_string'])
147
+ self.source = "from_string()"
148
+ elif 'file_name' in kwargs:
149
+ star_buffer = _interpret_file(kwargs['file_name'])
150
+ self.source = f"from_file('{kwargs['file_name']}')"
151
+ # Creating from template (schema)
152
+ elif 'all_tags' in kwargs:
153
+ schema_obj = utils.get_schema(kwargs['schema'])
154
+ schema = schema_obj.schema
155
+ self._category = kwargs['category']
156
+
157
+ self._name = self._category
158
+ if 'saveframe_name' in kwargs and kwargs['saveframe_name']:
159
+ self._name = kwargs['saveframe_name']
160
+
161
+ # Make sure it is a valid category
162
+ if self._category not in [x["SFCategory"] for x in schema.values()]:
163
+ raise ValueError(f"The saveframe category '{self._category}' was not found in the dictionary.")
164
+
165
+ s = sorted(schema.values(), key=lambda _: float(_["Dictionary sequence"]))
166
+
167
+ loops_added = []
168
+
169
+ for item in s:
170
+ if item["SFCategory"] == self._category:
171
+
172
+ # It is a tag in this saveframe
173
+ if item["Loopflag"] == "N":
174
+
175
+ ft = utils.format_tag(item["Tag"])
176
+ # Set the value for sf_category and sf_framecode
177
+ if ft == "Sf_category":
178
+ self.add_tag(item["Tag"], self._category)
179
+ elif ft == "Sf_framecode":
180
+ self.add_tag(item["Tag"], self.name)
181
+ # If the tag is the entry ID tag, set the entry ID
182
+ elif item["entryIdFlg"] == "Y":
183
+ self.add_tag(item["Tag"], kwargs['entry_id'])
184
+ else:
185
+ tag_value = None
186
+ if kwargs['default_values']:
187
+ if item['default value'] not in definitions.NULL_VALUES:
188
+ tag_value = item['default value']
189
+ # Unconditional add
190
+ if kwargs['all_tags']:
191
+ self.add_tag(item["Tag"], tag_value)
192
+ # Conditional add
193
+ else:
194
+ if item["public"] != "I":
195
+ self.add_tag(item["Tag"], tag_value)
196
+
197
+ # It is a contained loop tag
198
+ else:
199
+ cat_formatted = utils.format_category(item["Tag"])
200
+ if cat_formatted not in loops_added:
201
+ loops_added.append(cat_formatted)
202
+ try:
203
+ self.add_loop(loop_mod.Loop.from_template(cat_formatted,
204
+ all_tags=kwargs['all_tags'],
205
+ schema=schema_obj))
206
+ except ValueError:
207
+ pass
208
+ return
209
+
210
+ elif 'saveframe_name' in kwargs:
211
+ # If they are creating from scratch, just get the saveframe name
212
+ self._name = kwargs['saveframe_name']
213
+ if 'tag_prefix' in kwargs:
214
+ self.tag_prefix = utils.format_category(kwargs['tag_prefix'])
215
+ return
216
+
217
+ # If we are reading from a CSV file, go ahead and parse it
218
+ if 'csv' in kwargs and kwargs['csv']:
219
+ csv_reader_object = csv_reader(star_buffer)
220
+ tags = next(csv_reader_object)
221
+ values = next(csv_reader_object)
222
+ if len(tags) != len(values):
223
+ raise ValueError("Your CSV data is invalid. The header length does not match the data length.")
224
+ for ordinal in range(0, len(tags)):
225
+ self.add_tag(tags[ordinal], values[ordinal])
226
+ return
227
+
228
+ tmp_entry = entry_mod.Entry.from_scratch(0)
229
+
230
+ # Load the BMRB entry from the file
231
+ star_buffer = StringIO("data_1 " + star_buffer.read())
232
+ parser = parser_mod.Parser(entry_to_parse_into=tmp_entry)
233
+ parser.parse(star_buffer.read(), source=self.source, convert_data_types=kwargs.get('convert_data_types', False),
234
+ raise_parse_warnings=kwargs.get('raise_parse_warnings', False))
235
+
236
+ # Copy the first parsed saveframe into ourself
237
+ if len(tmp_entry.frame_list) > 1:
238
+ raise ValueError("You attempted to parse one saveframe but the source you provided had more than one "
239
+ "saveframe. Please either parse all saveframes as an entry or only parse one saveframe. "
240
+ "Saveframes detected: " + str(tmp_entry.frame_list))
241
+ self._tags = tmp_entry[0].tags
242
+ self._loops = tmp_entry[0].loops
243
+ self._name = tmp_entry[0].name
244
+ self._category = tmp_entry[0].category
245
+ self.tag_prefix = tmp_entry[0].tag_prefix
246
+
247
+ @property
248
+ def _lc_tags(self) -> Dict[str, int]:
249
+ return {_[1][0].lower(): _[0] for _ in enumerate(self._tags)}
250
+
251
+ @property
252
+ def category(self) -> str:
253
+ return self._category
254
+
255
+ @category.setter
256
+ def category(self, category):
257
+ """ Updates the saveframe category. Sets the Sf_category tag if not present,
258
+ updates it if present. """
259
+
260
+ if category in definitions.NULL_VALUES:
261
+ raise ValueError("Cannot set the saveframe category to a null-equivalent value.")
262
+
263
+ # Update the sf_category tag if present - otherwise add it
264
+ category_tag = self.get_tag('sf_category', whole_tag=True)
265
+ if category_tag:
266
+ category_tag[0][1] = category
267
+ else:
268
+ self.add_tag('Sf_category', category)
269
+
270
+ self._category = category
271
+
272
+ @property
273
+ def empty(self) -> bool:
274
+ """ Check if the saveframe has no data. Ignore the structural tags."""
275
+
276
+ for tag in self._tags:
277
+ tag_lower = tag[0].lower()
278
+ if tag_lower not in ['sf_category', 'sf_framecode', 'id', 'entry_id', 'nmr_star_version',
279
+ 'original_nmr_star_version']:
280
+ if tag[1] not in definitions.NULL_VALUES:
281
+ return False
282
+
283
+ for loop in self._loops:
284
+ if not loop.empty:
285
+ return False
286
+
287
+ return True
288
+
289
+ @property
290
+ def loops(self) -> List['loop_mod.Loop']:
291
+ return self._loops
292
+
293
+ @property
294
+ def loop_dict(self) -> Dict[str, 'loop_mod.Loop']:
295
+ """Returns a hash of loop category -> loop."""
296
+
297
+ res = {}
298
+ for each_loop in self._loops:
299
+ if each_loop.category is not None:
300
+ res[each_loop.category.lower()] = each_loop
301
+ return res
302
+
303
+ @property
304
+ def name(self) -> Any:
305
+ """ Returns the name of the saveframe."""
306
+
307
+ return self._name
308
+
309
+ @name.setter
310
+ def name(self, name):
311
+ """ Updates the saveframe name. """
312
+
313
+ for char in str(name):
314
+ if char in utils.definitions.WHITESPACE:
315
+ raise ValueError("Saveframe names can not contain whitespace characters.")
316
+ if name in definitions.NULL_VALUES:
317
+ raise ValueError("Cannot set the saveframe name to a null-equivalent value.")
318
+
319
+ # Update the sf_framecode tag too
320
+ lc_tags = self._lc_tags
321
+ if 'sf_framecode' in lc_tags:
322
+ self.tags[lc_tags['sf_framecode']][1] = name
323
+ self._name = name
324
+
325
+ @property
326
+ def tags(self) -> List[List[any]]:
327
+ return self._tags
328
+
329
+ @property
330
+ def tag_dict(self) -> Dict[str, str]:
331
+ """Returns a hash of (tag name).lower() -> tag value."""
332
+
333
+ return {x[0].lower(): x[1] for x in self._tags}
334
+
335
+ @classmethod
336
+ def from_scratch(cls,
337
+ sf_name: str,
338
+ tag_prefix: str = None,
339
+ source: str = "from_scratch()"):
340
+ """Create an empty saveframe that you can programmatically add
341
+ to. You may also pass the tag prefix as the second argument. If
342
+ you do not pass the tag prefix it will be set the first time you
343
+ add a tag."""
344
+
345
+ return cls(saveframe_name=sf_name, tag_prefix=tag_prefix, source=source)
346
+
347
+ @classmethod
348
+ def from_file(cls,
349
+ the_file: Union[str, TextIO, BinaryIO],
350
+ csv: bool = False,
351
+ convert_data_types: bool = False,
352
+ raise_parse_warnings: bool = False,
353
+ schema: Schema = None):
354
+ """Create a saveframe by loading in a file. Specify csv=True is
355
+ the file is a CSV file. If the_file starts with http://,
356
+ https://, or ftp:// then we will use those protocols to attempt
357
+ to open the file.
358
+
359
+ Setting convert_data_types to True will automatically convert
360
+ the data loaded from the file into the corresponding python type as
361
+ determined by loading the standard BMRB schema. This would mean that
362
+ all floats will be represented as decimal.Decimal objects, all integers
363
+ will be python int objects, strings and vars will remain strings, and
364
+ dates will become datetime.date objects. When printing str() is called
365
+ on all objects. Other that converting uppercase "E"s in scientific
366
+ notation floats to lowercase "e"s this should not cause any change in
367
+ the way re-printed NMR-STAR objects are displayed. Specify a custom
368
+ schema object to use using the schema parameter.
369
+
370
+ Setting raise_parse_warnings to True will result in the raising of a
371
+ ParsingError rather than logging a warning when non-valid (but
372
+ ignorable) issues are found."""
373
+
374
+ return cls(file_name=the_file,
375
+ csv=csv,
376
+ convert_data_types=convert_data_types,
377
+ raise_parse_warnings=raise_parse_warnings,
378
+ schema=schema)
379
+
380
+ @classmethod
381
+ def from_json(cls, json_dict: Union[dict, str]):
382
+ """Create a saveframe from JSON (serialized or unserialized JSON)."""
383
+
384
+ # If they provided a string, try to load it using JSON
385
+ if not isinstance(json_dict, dict):
386
+ try:
387
+ json_dict = json.loads(json_dict)
388
+ except (TypeError, ValueError):
389
+ raise ValueError("The JSON you provided was neither a Python dictionary nor a JSON string.")
390
+
391
+ # Make sure it has the correct keys
392
+ for check in ["name", "tag_prefix", "tags", "loops"]:
393
+ if check not in json_dict:
394
+ raise ValueError(f"The JSON you provide must be a hash and must contain the key '{check}' - even if "
395
+ "the key points to None.")
396
+
397
+ # Create a saveframe from scratch and populate it
398
+ ret = Saveframe.from_scratch(json_dict['name'])
399
+ ret.tag_prefix = json_dict['tag_prefix']
400
+ ret._category = json_dict.get('category', None)
401
+ ret._tags = json_dict['tags']
402
+ ret._loops = [loop_mod.Loop.from_json(x) for x in json_dict['loops']]
403
+ ret.source = "from_json()"
404
+
405
+ # Return the new loop
406
+ return ret
407
+
408
+ @classmethod
409
+ def from_string(cls,
410
+ the_string: str,
411
+ csv: bool = False,
412
+ convert_data_types: bool = False,
413
+ raise_parse_warnings: bool = False,
414
+ schema: Schema = None):
415
+ """Create a saveframe by parsing a string. Specify csv=True is
416
+ the string is in CSV format and not NMR-STAR format.
417
+
418
+ Setting convert_data_types to True will automatically convert
419
+ the data loaded from the file into the corresponding python type as
420
+ determined by loading the standard BMRB schema. This would mean that
421
+ all floats will be represented as decimal.Decimal objects, all integers
422
+ will be python int objects, strings and vars will remain strings, and
423
+ dates will become datetime.date objects. When printing str() is called
424
+ on all objects. Other that converting uppercase "E"s in scientific
425
+ notation floats to lowercase "e"s this should not cause any change in
426
+ the way re-printed NMR-STAR objects are displayed. Specify a custom
427
+ schema object to use using the schema parameter.
428
+
429
+ Setting raise_parse_warnings to True will result in the raising of a
430
+ ParsingError rather than logging a warning when non-valid (but
431
+ ignorable) issues are found."""
432
+
433
+ return cls(the_string=the_string,
434
+ csv=csv,
435
+ convert_data_types=convert_data_types,
436
+ raise_parse_warnings=raise_parse_warnings,
437
+ schema=schema)
438
+
439
+ @classmethod
440
+ def from_template(cls,
441
+ category: str,
442
+ name: str = None,
443
+ entry_id: Union[str, int] = None,
444
+ all_tags: bool = False,
445
+ default_values: bool = False,
446
+ schema: Schema = None):
447
+ """ Create a saveframe that has all of the tags and loops from the
448
+ schema present. No values will be assigned. Specify the category
449
+ when calling this method. Optionally also provide the name of the
450
+ saveframe as the 'name' argument.
451
+
452
+ The optional argument 'all_tags' forces all tags to be included
453
+ rather than just the mandatory tags.
454
+
455
+ The optional argument 'default_values' will insert the default
456
+ values from the schema."""
457
+
458
+ schema = utils.get_schema(schema)
459
+ return cls(category=category,
460
+ saveframe_name=name,
461
+ entry_id=entry_id,
462
+ all_tags=all_tags,
463
+ default_values=default_values,
464
+ schema=schema,
465
+ source=f"from_template({schema.version})")
466
+
467
+ def __repr__(self) -> str:
468
+ """Returns a description of the saveframe."""
469
+
470
+ return f"<pynmrstar.Saveframe '{self.name}'>"
471
+
472
+ def __setitem__(self, key: Union[str, int], item: Union[str, 'loop_mod.Loop']) -> None:
473
+ """Set the indicated loop or tag."""
474
+
475
+ # It's a loop
476
+ if isinstance(item, loop_mod.Loop):
477
+ try:
478
+ integer = int(str(key))
479
+ self._loops[integer] = item
480
+ except ValueError:
481
+ if key.lower() in self.loop_dict:
482
+ for pos, tmp_loop in enumerate(self._loops):
483
+ if tmp_loop.category.lower() == key.lower():
484
+ self._loops[pos] = item
485
+ else:
486
+ raise KeyError(f"Loop with category '{key}' does not exist and therefore cannot be written to. Use "
487
+ "add_loop instead.")
488
+ else:
489
+ # If the tag already exists, set its value
490
+ self.add_tag(key, item, update=True)
491
+
492
+ def __str__(self,
493
+ first_in_category: bool = True,
494
+ skip_empty_loops: bool = False,
495
+ skip_empty_tags: bool = False,
496
+ show_comments: bool = True) -> str:
497
+ """Returns the saveframe in STAR format as a string. Please use :py:meth:`Saveframe.format`
498
+ when you want to pass arguments."""
499
+
500
+ if self.tag_prefix is None:
501
+ raise InvalidStateError(f"The tag prefix was never set! Error in saveframe named '{self.name}'.")
502
+
503
+ return_chunks = []
504
+
505
+ # Insert the comment if not disabled
506
+ if show_comments:
507
+ if self._category in _get_comments():
508
+ this_comment = _get_comments()[self._category]
509
+ if first_in_category or this_comment['every_flag']:
510
+ return_chunks.append(_get_comments()[self._category]['comment'])
511
+
512
+ # Print the saveframe
513
+ return_chunks.append(f"save_{self.name}\n")
514
+
515
+ if len(self._tags) > 0:
516
+ width = max([len(self.tag_prefix + "." + x[0]) for x in self._tags])
517
+ pstring = " %%-%ds %%s\n" % width
518
+ mstring = " %%-%ds\n;\n%%s;\n" % width
519
+
520
+ # Print the tags
521
+ for each_tag in self._tags:
522
+ if skip_empty_tags and each_tag[1] in definitions.NULL_VALUES:
523
+ continue
524
+ try:
525
+ clean_tag = utils.quote_value(each_tag[1])
526
+ except ValueError:
527
+ raise InvalidStateError('Cannot generate NMR-STAR for entry, as empty strings are not valid tag'
528
+ ' values in NMR-STAR. Please either replace the empty strings with None '
529
+ 'objects, or set pynmrstar.definitions.STR_CONVERSION_DICT[\'\'] = None. '
530
+ f'Saveframe: {self.name} Tag: {each_tag[0]}')
531
+
532
+ formatted_tag = self.tag_prefix + "." + each_tag[0]
533
+ if "\n" in clean_tag:
534
+ return_chunks.append(mstring % (formatted_tag, clean_tag))
535
+ else:
536
+ return_chunks.append(pstring % (formatted_tag, clean_tag))
537
+
538
+ # Print any loops
539
+ for each_loop in self._loops:
540
+ return_chunks.append(each_loop.format(skip_empty_loops=skip_empty_loops, skip_empty_tags=skip_empty_tags))
541
+
542
+ # Close the saveframe
543
+ return "".join(return_chunks) + "\nsave_\n"
544
+
545
+ def add_loop(self, loop_to_add: 'loop_mod.Loop') -> None:
546
+ """Add a loop to the saveframe loops."""
547
+
548
+ if loop_to_add.category in self.loop_dict or str(loop_to_add.category).lower() in self.loop_dict:
549
+ if loop_to_add.category is None:
550
+ raise ValueError("You cannot have two loops with the same category in one saveframe. You are getting "
551
+ "this error because you haven't yet set your loop categories.")
552
+ else:
553
+ raise ValueError("You cannot have two loops with the same category in one saveframe. Category: "
554
+ f"'{loop_to_add.category}'.")
555
+
556
+ self._loops.append(loop_to_add)
557
+
558
+ def add_tag(self,
559
+ name: str,
560
+ value: Any,
561
+ update: bool = False,
562
+ convert_data_types: bool = False,
563
+ schema: Schema = None) -> None:
564
+ """Add a tag to the tag list. Does a bit of validation and
565
+ parsing.
566
+
567
+ Set update to True to update a tag if it exists rather
568
+ than raise an exception.
569
+
570
+ Set convert_data_types to True to convert the tag value from str to
571
+ whatever type the tag is as defined in the schema.
572
+
573
+ Optionally specify a schema if you don't want to use the default schema.
574
+ """
575
+
576
+ if not isinstance(name, str):
577
+ raise ValueError('Tag names must be strings.')
578
+
579
+ if "." in name:
580
+ if name[0] != ".":
581
+ prefix = utils.format_category(name)
582
+ if self.tag_prefix is None:
583
+ self.tag_prefix = prefix
584
+ elif self.tag_prefix != prefix:
585
+ raise ValueError(
586
+ "One saveframe cannot have tags with different categories (or tags that don't "
587
+ f"match the set category)! Saveframe tag prefix is '{self.tag_prefix}' but the added tag, "
588
+ f"'{name}' has prefix '{prefix}'.")
589
+ name = name[name.index(".") + 1:]
590
+ else:
591
+ name = name[1:]
592
+
593
+ if name in definitions.NULL_VALUES:
594
+ raise ValueError(f"Cannot use a null-equivalent value as a tag name. Invalid tag name: '{name}'")
595
+ if "." in name:
596
+ raise ValueError(f"There cannot be more than one '.' in a tag name. Invalid tag name: '{name}'")
597
+ for char in name:
598
+ if char in utils.definitions.WHITESPACE:
599
+ raise ValueError(f"Tag names can not contain whitespace characters. Invalid tag name: '{name}'")
600
+
601
+ # No duplicate tags
602
+ if self.get_tag(name):
603
+ if not update:
604
+ raise ValueError(f"There is already a tag with the name '{name}' in the saveframe '{self.name}."
605
+ f" Set update=True if you want to override its value.")
606
+ else:
607
+ tag_name_lower = name.lower()
608
+ if tag_name_lower == "sf_category":
609
+ self._category = value
610
+ if tag_name_lower == "sf_framecode":
611
+ if value in definitions.NULL_VALUES:
612
+ raise ValueError("Cannot set the saveframe name tag (Sf_framecode) to a null-equivalent "
613
+ f"value. Invalid value: '{name}'")
614
+ self._name = value
615
+ self.get_tag(name, whole_tag=True)[0][1] = value
616
+ return
617
+
618
+ # See if we need to convert the data type
619
+ if convert_data_types:
620
+ new_tag = [name, utils.get_schema(schema).convert_tag(self.tag_prefix + "." + name, value)]
621
+ else:
622
+ new_tag = [name, value]
623
+
624
+ # Set the category if the tag we are loading is the category
625
+ tag_name_lower = name.lower()
626
+ if tag_name_lower == "sf_category":
627
+ self._category = value
628
+ if tag_name_lower == "sf_framecode":
629
+ if not self._name:
630
+ self._name = value
631
+ elif self._name != value:
632
+ raise ValueError('The Sf_framecode tag cannot be different from the saveframe name. Error '
633
+ f'occurred in tag {self.tag_prefix}.Sf_framecode with value {value} which '
634
+ f'conflicts with the saveframe name {self._name}.')
635
+ self._tags.append(new_tag)
636
+
637
+ def add_tags(self, tag_list: list, update: bool = False) -> None:
638
+ """Adds multiple tags to the list. Input should be a list of
639
+ tuples that are either [key, value] or [key]. In the latter case
640
+ the value will be set to ".". Set update to true to update a
641
+ tag if it exists rather than raise an exception."""
642
+
643
+ for tag_pair in tag_list:
644
+ if len(tag_pair) == 2:
645
+ self.add_tag(tag_pair[0], tag_pair[1], update=update)
646
+ elif len(tag_pair) == 1:
647
+ self.add_tag(tag_pair[0], ".", update=update)
648
+ else:
649
+ raise ValueError(f"You provided an invalid tag/value to add: '{tag_pair}'.")
650
+
651
+ def add_missing_tags(self,
652
+ schema: Schema = None,
653
+ all_tags: bool = False,
654
+ recursive: bool = True) -> None:
655
+ """ Automatically adds any missing tags (according to the schema)
656
+ and sorts the tags.
657
+
658
+ Set recursive to False to only operate on the tags in this saveframe,
659
+ and not those in child loops."""
660
+
661
+ if not self.tag_prefix:
662
+ raise InvalidStateError("You must first specify the tag prefix of this Saveframe before calling this "
663
+ "method. You can do this by adding a fully qualified tag "
664
+ "(i.e. _Entry.Sf_framecode), by specifying the tag_prefix when calling "
665
+ "from_scratch() or by modifying the .tag_prefix attribute.")
666
+
667
+ schema = utils.get_schema(schema)
668
+ tag_prefix: str = self.tag_prefix.lower() + '.'
669
+
670
+ for item in schema.schema_order:
671
+
672
+ # The tag is in the loop
673
+ if item.lower().startswith(tag_prefix):
674
+
675
+ try:
676
+ # Unconditional add
677
+ if all_tags:
678
+ self.add_tag(item, None)
679
+ # Conditional add
680
+ else:
681
+ if schema.schema[item.lower()]["public"] != "I":
682
+ self.add_tag(item, None)
683
+ except ValueError:
684
+ pass
685
+
686
+ if recursive:
687
+ for loop in self._loops:
688
+ try:
689
+ loop.add_missing_tags(schema=schema, all_tags=all_tags)
690
+ except ValueError:
691
+ pass
692
+
693
+ self.sort_tags()
694
+
695
+ def compare(self, other) -> List[str]:
696
+ """Returns the differences between two saveframes as a list.
697
+ Non-equal saveframes will always be detected, but specific
698
+ differences detected depends on order of saveframes."""
699
+
700
+ diffs = []
701
+
702
+ # Check if this is literally the same object
703
+ if self is other:
704
+ return []
705
+ # Check if the other object is our string representation
706
+ if isinstance(other, str):
707
+ if str(self) == other:
708
+ return []
709
+ else:
710
+ return ['String was not exactly equal to saveframe.']
711
+ elif not isinstance(other, Saveframe):
712
+ return ['Other object is not of class Saveframe.']
713
+
714
+ # We need to do this in case of an extra "\n" on the end of one tag
715
+ if str(other) == str(self):
716
+ return []
717
+
718
+ # Do STAR comparison
719
+ try:
720
+ if str(self.name) != str(other.name):
721
+ # No point comparing apples to oranges. If the tags are
722
+ # this different just return
723
+ diffs.append(f"\tSaveframe names do not match: '{self.name}' vs '{other.name}'.")
724
+ return diffs
725
+
726
+ if str(self.tag_prefix) != str(other.tag_prefix):
727
+ # No point comparing apples to oranges. If the tags are
728
+ # this different just return
729
+ diffs.append(f"\tTag prefix does not match: '{self.tag_prefix}' vs '{other.tag_prefix}'.")
730
+ return diffs
731
+
732
+ if len(self._tags) < len(other.tags):
733
+ diffs.append(f"\tNumber of tags does not match: '{len(self._tags)}' vs '{len(other.tags)}'. The "
734
+ f"compared entry has at least one tag this entry does not.")
735
+
736
+ for tag in self._tags:
737
+ other_tag = other.get_tag(tag[0])
738
+
739
+ if not other_tag:
740
+ diffs.append(f"\tNo tag with name '{self.tag_prefix}.{tag[0]}' in compared entry.")
741
+ continue
742
+
743
+ # Compare the string version of the tags in case there are
744
+ # non-string types. Use the conversion dict to get to str
745
+ if (str(definitions.STR_CONVERSION_DICT.get(tag[1], tag[1])) !=
746
+ str(definitions.STR_CONVERSION_DICT.get(other_tag[0], other_tag[0]))):
747
+ newline_stripped_tag = str(tag[1]).replace("\n", "\\n")
748
+ newline_stripped_other_tag = str(other_tag[0]).replace("\n", "\\n")
749
+ diffs.append(f"\tMismatched tag values for tag '{self.tag_prefix}.{tag[0]}': '"
750
+ f"{newline_stripped_tag}' vs '{newline_stripped_other_tag}'.")
751
+
752
+ if len(self._loops) != len(other.loops):
753
+ diffs.append(f"\tNumber of children loops does not match: '{len(self._loops)}' vs "
754
+ f"'{len(other.loops)}'.")
755
+
756
+ compare_loop_dict = other.loop_dict
757
+ for each_loop in self._loops:
758
+ if each_loop.category.lower() in compare_loop_dict:
759
+ compare = each_loop.compare(compare_loop_dict[each_loop.category.lower()])
760
+ if len(compare) > 0:
761
+ diffs.append(f"\tLoops do not match: '{each_loop.category}'.")
762
+ diffs.extend(compare)
763
+ else:
764
+ diffs.append(f"\tNo loop with category '{each_loop.category}' in other entry.")
765
+
766
+ except AttributeError as err:
767
+ diffs.append(f"\tAn exception occurred while comparing: '{err}'.")
768
+
769
+ return diffs
770
+
771
+ def delete_tag(self, tag: str) -> None:
772
+ """ Deprecated, please see :py:meth:`pynmrstar.Saveframe.remove_tag`. """
773
+
774
+ warnings.warn('This method name has been renamed to remove_tag. Please update your code.', DeprecationWarning)
775
+ return self.remove_tag(tag)
776
+
777
+ def get_data_as_csv(self, header: bool = True, show_category: bool = True) -> str:
778
+ """Return the data contained in the loops, properly CSVd, as a
779
+ string. Set header to False omit the header. Set show_category
780
+ to False to omit the loop category from the headers."""
781
+
782
+ csv_buffer = StringIO()
783
+ csv_writer_object = csv_writer(csv_buffer)
784
+
785
+ if header:
786
+ if show_category:
787
+ csv_writer_object.writerow([str(self.tag_prefix) + "." + str(x[0]) for x in self._tags])
788
+ else:
789
+ csv_writer_object.writerow([str(x[0]) for x in self._tags])
790
+
791
+ data = []
792
+ for each_tag in self._tags:
793
+ data.append(each_tag[1])
794
+
795
+ csv_writer_object.writerow(data)
796
+
797
+ csv_buffer.seek(0)
798
+ return csv_buffer.read().replace('\r\n', '\n')
799
+
800
+ def format(self, skip_empty_loops: bool = True, skip_empty_tags: bool = False, show_comments: bool = True) -> str:
801
+ """ The same as calling str(Saveframe), except that you can pass options
802
+ to customize how the saveframe is printed.
803
+
804
+ skip_empty_loops will omit printing loops with no tags at all. (A loop with null tags is not "empty".)
805
+ skip_empty_tags will omit tags in the saveframe and child loops which have no non-null values.
806
+ show_comments will show the standard comments before a saveframe."""
807
+
808
+ return self.__str__(skip_empty_loops=skip_empty_loops, show_comments=show_comments,
809
+ skip_empty_tags=skip_empty_tags)
810
+
811
+ def get_json(self, serialize: bool = True) -> Union[dict, str]:
812
+ """ Returns the saveframe in JSON format. If serialize is set to
813
+ False a dictionary representation of the saveframe that is
814
+ serializeable is returned."""
815
+
816
+ saveframe_data = {
817
+ "name": self.name,
818
+ "category": self._category,
819
+ "tag_prefix": self.tag_prefix,
820
+ "tags": [[x[0], x[1]] for x in self._tags],
821
+ "loops": [x.get_json(serialize=False) for x in self._loops]
822
+ }
823
+
824
+ if serialize:
825
+ return json.dumps(saveframe_data, default=_json_serialize)
826
+ else:
827
+ return saveframe_data
828
+
829
+ def get_loop(self, name: str) -> 'loop_mod.Loop':
830
+ """Return a loop based on the loop name (category)."""
831
+
832
+ name = utils.format_category(name).lower()
833
+ for each_loop in self._loops:
834
+ if str(each_loop.category).lower() == name:
835
+ return each_loop
836
+ raise KeyError(f"No loop with category '{name}'.")
837
+
838
+ def get_loop_by_category(self, name: str) -> 'loop_mod.Loop':
839
+ """ Deprecated. Please use :py:meth:`pynmrstar.Saveframe.get_loop` instead. """
840
+
841
+ warnings.warn('Deprecated. Please use get_loop() instead.', DeprecationWarning)
842
+ return self.get_loop(name)
843
+
844
+ def get_tag(self, query: str, whole_tag: bool = False) -> list:
845
+ """Allows fetching the value of a tag by tag name. Returns
846
+ a list of all matching tag values.
847
+
848
+ Specify whole_tag=True and the [tag_name, tag_value] pair will be
849
+ returned instead of just the value"""
850
+
851
+ results = []
852
+
853
+ # Make sure this is the correct saveframe if they specify a tag
854
+ # prefix
855
+ if "." in query:
856
+ tag_prefix = utils.format_category(query)
857
+ else:
858
+ tag_prefix = self.tag_prefix
859
+
860
+ # Check the loops
861
+ for each_loop in self._loops:
862
+ if ((each_loop.category is not None and tag_prefix is not None and
863
+ each_loop.category.lower() == tag_prefix.lower())):
864
+ results.extend(each_loop.get_tag(query, whole_tag=whole_tag))
865
+
866
+ # Check our tags
867
+ query = utils.format_tag_lc(query)
868
+ if tag_prefix is not None and tag_prefix.lower() == self.tag_prefix.lower():
869
+ for tag in self._tags:
870
+ if query == tag[0].lower():
871
+ if whole_tag:
872
+ results.append(tag)
873
+ else:
874
+ results.append(tag[1])
875
+
876
+ return results
877
+
878
+ def loop_iterator(self) -> Iterable['loop_mod.Loop']:
879
+ """Returns an iterator for saveframe loops."""
880
+
881
+ return iter(self._loops)
882
+
883
+ def print_tree(self) -> None:
884
+ """Prints a summary, tree style, of the loops in the saveframe."""
885
+
886
+ print(repr(self))
887
+ for pos, each_loop in enumerate(self):
888
+ print(f"\t[{pos}] {repr(each_loop)}")
889
+
890
+ def remove_loop(self, item: Union[str, List[str], Tuple[str],
891
+ 'loop_mod.Loop', List['loop_mod.Loop'], Tuple['loop_mod.Loop']]) -> None:
892
+ """ Removes one or more loops from the saveframe. You can remove loops either by passing the loop object itself,
893
+ the loop category (as a string), or a list or tuple of either."""
894
+
895
+ parsed_list: list
896
+ if isinstance(item, tuple):
897
+ parsed_list = list(item)
898
+ elif isinstance(item, list):
899
+ parsed_list = item
900
+ elif isinstance(item, (str, loop_mod.Loop)):
901
+ parsed_list = [item]
902
+ else:
903
+ raise ValueError('The item you provided was not one or more loop objects or loop categories (strings). '
904
+ f'Item type: {type(item)}')
905
+
906
+ loop_names = self.loop_dict
907
+
908
+ loops_to_remove = []
909
+ for loop in parsed_list:
910
+ if isinstance(loop, str):
911
+ formatted_loop = loop.lower()
912
+ if not formatted_loop.startswith('_'):
913
+ formatted_loop = f"_{loop}"
914
+ if formatted_loop not in loop_names:
915
+ raise ValueError('At least one loop specified to remove was not found in this saveframe. First '
916
+ f'missing loop: {loop}')
917
+ loops_to_remove.append(loop_names[formatted_loop])
918
+ elif isinstance(loop, loop_mod.Loop):
919
+ if loop not in self._loops:
920
+ raise ValueError('At least one loop specified to remove was not found in this saveframe. First '
921
+ f'missing loop: {loop}')
922
+ loops_to_remove.append(loop)
923
+ else:
924
+ raise ValueError('One of the items you provided was not a loop object or loop category (string). '
925
+ f'Item: {repr(loop)}')
926
+
927
+ self._loops = [_ for _ in self._loops if _ not in loops_to_remove]
928
+
929
+ def remove_tag(self, item: Union[str, List[str], Tuple[str]]) -> None:
930
+ """Removes one or more tags from the saveframe based on tag name(s).
931
+ Provide either a tag name or a list or tuple containing tag names. """
932
+
933
+ tags = get_clean_tag_list(item)
934
+ lc_tags = self._lc_tags
935
+
936
+ for item in tags:
937
+ if item["formatted"] not in lc_tags:
938
+ raise KeyError(f"There is no tag with name '{item['original']}' to remove.")
939
+
940
+ # Create a new list stripping out all of the deleted tags
941
+ positions = [lc_tags[_["formatted"]] for _ in tags]
942
+ self._tags = [_[1] for _ in enumerate(self._tags) if _[0] not in positions]
943
+
944
+ def set_tag_prefix(self, tag_prefix: str) -> None:
945
+ """Set the tag prefix for this saveframe."""
946
+
947
+ self.tag_prefix = utils.format_category(tag_prefix)
948
+
949
+ def sort_tags(self, schema: Schema = None) -> None:
950
+ """ Sort the tags so they are in the same order as a BMRB
951
+ schema. Will automatically use the standard schema if none
952
+ is provided."""
953
+
954
+ schema = utils.get_schema(schema)
955
+
956
+ def sort_key(x) -> int:
957
+ return schema.tag_key(self.tag_prefix + "." + x[0])
958
+
959
+ self._tags.sort(key=sort_key)
960
+
961
+ def tag_iterator(self) -> Iterable[Tuple[str, str]]:
962
+ """Returns an iterator for saveframe tags."""
963
+ # :py:attr:`pynmrstar.Saveframe.tags`
964
+ return iter(self._tags)
965
+
966
+ def validate(self, validate_schema: bool = True, schema: Schema = None, validate_star: bool = True):
967
+ """Validate a saveframe in a variety of ways. Returns a list of
968
+ errors found. 0-length list indicates no errors found. By
969
+ default all validation modes are enabled.
970
+
971
+ validate_schema - Determines if the entry is validated against
972
+ the NMR-STAR schema. You can pass your own custom schema if desired,
973
+ otherwise the schema will be fetched from the BMRB servers.
974
+
975
+ validate_star - Determines if the STAR syntax checks are ran."""
976
+
977
+ errors = []
978
+
979
+ my_category = self._category
980
+ if not my_category:
981
+ errors.append(f"Cannot properly validate saveframe: '{self.name}'. No saveframe category defined.")
982
+ my_category = None
983
+
984
+ if validate_schema:
985
+ # Get the default schema if we are not passed a schema
986
+ my_schema = utils.get_schema(schema)
987
+
988
+ for tag in self._tags:
989
+ formatted_tag = self.tag_prefix + "." + tag[0]
990
+ cur_errors = my_schema.val_type(formatted_tag, tag[1], category=my_category)
991
+ errors.extend(cur_errors)
992
+
993
+ # Check the loops for errors
994
+ for each_loop in self._loops:
995
+ errors.extend(each_loop.validate(validate_schema=validate_schema, schema=schema,
996
+ validate_star=validate_star, category=my_category))
997
+
998
+ return errors
999
+
1000
+ def write_to_file(self,
1001
+ file_name: str,
1002
+ format_: str = "nmrstar",
1003
+ show_comments: bool = True,
1004
+ skip_empty_loops: bool = False,
1005
+ skip_empty_tags: bool = False) -> None:
1006
+ """ Writes the saveframe to the specified file in NMR-STAR format.
1007
+
1008
+ Optionally specify:
1009
+ show_comments=False to disable the comments that are by default inserted. Ignored when writing json.
1010
+ skip_empty_loops=False to force printing loops with no tags at all (loops with null tags are still printed)
1011
+ skip_empty_tags=True will omit tags in the saveframes and loops which have no non-null values.
1012
+ format_=json to write to the file in JSON format."""
1013
+
1014
+ write_to_file(self, file_name=file_name, format_=format_, show_comments=show_comments,
1015
+ skip_empty_loops=skip_empty_loops, skip_empty_tags=skip_empty_tags)