pynmrstar 3.3.5__cp38-cp38-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pynmrstar might be problematic. Click here for more details.

pynmrstar/loop.py ADDED
@@ -0,0 +1,1197 @@
1
+ import json
2
+ import warnings
3
+ from copy import deepcopy
4
+ from csv import reader as csv_reader, writer as csv_writer
5
+ from io import StringIO
6
+ from itertools import chain
7
+ from typing import TextIO, BinaryIO, Union, List, Optional, Any, Dict, Callable, Tuple
8
+
9
+ from pynmrstar import definitions, utils, entry as entry_mod
10
+ from pynmrstar._internal import _json_serialize, _interpret_file
11
+ from pynmrstar.exceptions import InvalidStateError
12
+ from pynmrstar.parser import Parser
13
+ from pynmrstar.schema import Schema
14
+
15
+
16
+ class Loop(object):
17
+ """A BMRB loop object. Create using the class methods, see below."""
18
+
19
+ def __contains__(self, item: Any) -> bool:
20
+ """ Check if the loop contains one or more tags. """
21
+
22
+ # Prepare for processing
23
+ if isinstance(item, (list, tuple)):
24
+ to_process: List[str] = list(item)
25
+ elif isinstance(item, str):
26
+ to_process = [item]
27
+ else:
28
+ return False
29
+
30
+ lc_tags = self._lc_tags
31
+ for tag in to_process:
32
+ if utils.format_tag_lc(tag) not in lc_tags:
33
+ return False
34
+ return True
35
+
36
+ def __eq__(self, other) -> bool:
37
+ """Returns True if this loop is equal to another loop, False if
38
+ it is different."""
39
+
40
+ if not isinstance(other, Loop):
41
+ return False
42
+
43
+ return (self.category, self._tags, self.data) == \
44
+ (other.category, other._tags, other.data)
45
+
46
+ def __getitem__(self, item: Union[int, str, List[str], Tuple[str]]) -> list:
47
+ """Get the indicated row from the data array."""
48
+
49
+ try:
50
+ return self.data[item]
51
+ except TypeError:
52
+ if isinstance(item, tuple):
53
+ item = list(item)
54
+ return self.get_tag(tags=item)
55
+
56
+ def __init__(self, **kwargs) -> None:
57
+ """ You should not directly instantiate a Loop using this method.
58
+ Instead use the class methods:
59
+
60
+ :py:meth:`Loop.from_scratch`, :py:meth:`Loop.from_string`,
61
+ :py:meth:`Loop.from_template`, :py:meth:`Loop.from_file`,
62
+ :py:meth:`Loop.from_json`"""
63
+
64
+ # Initialize our local variables
65
+ self._tags: List[str] = []
66
+ self.data: List[List[Any]] = []
67
+ self.category: Optional[str] = None
68
+ self.source: str = "unknown"
69
+
70
+ star_buffer: StringIO = StringIO("")
71
+
72
+ # Update our source if it provided
73
+ if 'source' in kwargs:
74
+ self.source = kwargs['source']
75
+
76
+ # Update our category if provided
77
+ if 'category' in kwargs:
78
+ self.category = utils.format_category(kwargs['category'])
79
+ return
80
+
81
+ # They initialized us wrong
82
+ if len(kwargs) == 0:
83
+ raise ValueError("You should not directly instantiate a Loop using this method. Instead use the "
84
+ "class methods: Loop.from_scratch(), Loop.from_string(), Loop.from_template(), "
85
+ "Loop.from_file(), and Loop.from_json().")
86
+
87
+ # Parsing from a string
88
+ if 'the_string' in kwargs:
89
+ # Parse from a string by wrapping it in StringIO
90
+ star_buffer = StringIO(kwargs['the_string'])
91
+ self.source = "from_string()"
92
+ # Parsing from a file
93
+ elif 'file_name' in kwargs:
94
+ star_buffer = _interpret_file(kwargs['file_name'])
95
+ self.source = f"from_file('{kwargs['file_name']}')"
96
+ # Creating from template (schema)
97
+ elif 'tag_prefix' in kwargs:
98
+
99
+ tags = Loop._get_tags_from_schema(kwargs['tag_prefix'], all_tags=kwargs['all_tags'],
100
+ schema=kwargs['schema'])
101
+ for tag in tags:
102
+ self.add_tag(tag)
103
+
104
+ return
105
+
106
+ # If we are reading from a CSV file, go ahead and parse it
107
+ if 'csv' in kwargs and kwargs['csv']:
108
+ csv_file = csv_reader(star_buffer)
109
+ self.add_tag(next(csv_file))
110
+ for row in csv_file:
111
+ self.add_data(row,
112
+ convert_data_types=kwargs.get('convert_data_types', False),
113
+ schema=kwargs.get('schema', None))
114
+ self.source = f"from_csv('{kwargs['csv']}')"
115
+ return
116
+
117
+ tmp_entry = entry_mod.Entry.from_scratch(0)
118
+
119
+ # Load the BMRB entry from the file
120
+ star_buffer = StringIO(f"data_0 save_internaluseyoushouldntseethis_frame _internal.use internal "
121
+ f"{star_buffer.read()} save_")
122
+ parser = Parser(entry_to_parse_into=tmp_entry)
123
+ parser.parse(star_buffer.read(),
124
+ source=self.source,
125
+ convert_data_types=kwargs.get('convert_data_types', False),
126
+ raise_parse_warnings=kwargs.get('raise_parse_warnings', False),
127
+ schema=kwargs.get('schema', None))
128
+
129
+ # Check that there was only one loop here
130
+ if len(tmp_entry[0].loops) > 1:
131
+ raise ValueError("You attempted to parse one loop but the source you provided had more than one loop. "
132
+ "Please either parse all loops as a saveframe or only parse one loop. Loops detected: " +
133
+ str(tmp_entry[0].loops))
134
+
135
+ # Copy the first parsed saveframe into ourself
136
+ self._tags = tmp_entry[0][0].tags
137
+ self.data = tmp_entry[0][0].data
138
+ self.category = tmp_entry[0][0].category
139
+
140
+ def __iter__(self) -> list:
141
+ """ Yields each of the rows contained within the loop. """
142
+
143
+ for row in self.data:
144
+ yield row
145
+
146
+ def __len__(self) -> int:
147
+ """Return the number of rows of data."""
148
+
149
+ return len(self.data)
150
+
151
+ def __lt__(self, other) -> bool:
152
+ """Returns True if this loop sorts lower than the compared
153
+ loop, false otherwise."""
154
+
155
+ if not isinstance(other, Loop):
156
+ return NotImplemented
157
+
158
+ return self.category < other.category
159
+
160
+ def __repr__(self) -> str:
161
+ """Returns a description of the loop."""
162
+
163
+ return f"<pynmrstar.Loop '{self.category}'>"
164
+
165
+ def __setitem__(self, key: str, item: Any) -> None:
166
+ """Set all of the instances of a tag to the provided value.
167
+ If there are 5 rows of data in the loop, you will need to
168
+ assign a list with 5 elements."""
169
+
170
+ tag = utils.format_tag_lc(key)
171
+
172
+ # Check that their tag is in the loop
173
+ if tag not in self._lc_tags:
174
+ raise ValueError(f"Cannot assign to tag '{key}' as it does not exist in this loop.")
175
+
176
+ # Determine where to assign
177
+ tag_id = self._lc_tags[tag]
178
+
179
+ # Make sure they provide a list of the correct length
180
+ if len(self[key]) != len(item):
181
+ raise ValueError("To assign to a tag you must provide a list (or iterable) of a length equal to the "
182
+ f"number of values that currently exist for that tag. The tag '{key}' currently has"
183
+ f" {len(self[key])} values and you supplied {len(item)} values.")
184
+
185
+ # Do the assignment
186
+ for pos, row in enumerate(self.data):
187
+ row[tag_id] = item[pos]
188
+
189
+ def __str__(self, skip_empty_loops: bool = False, skip_empty_tags: bool = False) -> str:
190
+ """Returns the loop in STAR format as a string."""
191
+
192
+ # Check if there is any data in this loop
193
+ if len(self.data) == 0:
194
+ # They do not want us to print empty loops
195
+ if skip_empty_loops:
196
+ return ""
197
+ else:
198
+ # If we have no tags than return the empty loop
199
+ if len(self._tags) == 0:
200
+ return "\n loop_\n\n stop_\n"
201
+
202
+ if len(self._tags) == 0:
203
+ raise InvalidStateError("Impossible to print data if there are no associated tags. Error in loop "
204
+ f"'{self.category}' which contains data but hasn't had any tags added.")
205
+
206
+ # Make sure the tags and data match
207
+ self._check_tags_match_data()
208
+
209
+ # If skipping null tags, it's easier to filter out a loop with only real tags and then print
210
+ if skip_empty_tags:
211
+ has_data = [not all([_ in definitions.NULL_VALUES for _ in column]) for column in zip(*self.data)]
212
+ return self.filter([tag for x, tag in enumerate(self._tags) if has_data[x]]).format()
213
+
214
+ # Start the loop
215
+ return_chunks = ["\n loop_\n"]
216
+ # Print the tags
217
+ format_string = " %-s\n"
218
+
219
+ # Check to make sure our category is set
220
+ if self.category is None:
221
+ raise InvalidStateError("The category was never set for this loop. Either add a tag with the category "
222
+ "intact, specify it when generating the loop, or set it using Loop.set_category().")
223
+
224
+ # Print the categories
225
+ if self.category is None:
226
+ for tag in self._tags:
227
+ return_chunks.append(format_string % tag)
228
+ else:
229
+ for tag in self._tags:
230
+ return_chunks.append(format_string % (self.category + "." + tag))
231
+
232
+ return_chunks.append("\n")
233
+
234
+ if len(self.data) != 0:
235
+
236
+ # Make a copy of the data
237
+ working_data = []
238
+ title_widths = [4]*len(self.data[0])
239
+
240
+ # Put quotes as needed on the data
241
+ for row_pos, row in enumerate(self.data):
242
+ clean_row = []
243
+ for col_pos, x in enumerate(row):
244
+ try:
245
+ clean_val = utils.quote_value(x)
246
+ clean_row.append(clean_val)
247
+ length = len(clean_val) + 3
248
+ if length > title_widths[col_pos] and "\n" not in clean_val:
249
+ title_widths[col_pos] = length
250
+
251
+ except ValueError:
252
+ raise InvalidStateError('Cannot generate NMR-STAR for entry, as empty strings are not valid '
253
+ 'tag values in NMR-STAR. Please either replace the empty strings with'
254
+ ' None objects, or set pynmrstar.definitions.STR_CONVERSION_DICT['
255
+ '\'\'] = None.\n'
256
+ f'Loop: {self.category} Row: {row_pos} Column: {col_pos}')
257
+
258
+ working_data.append(clean_row)
259
+
260
+ # Generate the format string
261
+ format_string = " " + "%-*s" * len(self._tags) + " \n"
262
+
263
+ # Print the data, with the tags sized appropriately
264
+ for datum in working_data:
265
+ for pos, item in enumerate(datum):
266
+ if "\n" in item:
267
+ datum[pos] = "\n;\n%s;\n" % item
268
+
269
+ # Print the data (combine the tags' widths with their data)
270
+ tag_width_list = [d for d in zip(title_widths, datum)]
271
+ return_chunks.append(format_string % tuple(chain.from_iterable(tag_width_list)))
272
+
273
+ # Close the loop
274
+ return "".join(return_chunks) + "\n stop_\n"
275
+
276
+ @property
277
+ def _lc_tags(self) -> Dict[str, int]:
278
+ return {_[1].lower(): _[0] for _ in enumerate(self._tags)}
279
+
280
+ @property
281
+ def empty(self) -> bool:
282
+ """ Check if the loop has no data. """
283
+
284
+ for row in self.data:
285
+ for col in row:
286
+ if col not in definitions.NULL_VALUES:
287
+ return False
288
+
289
+ return True
290
+
291
+ @property
292
+ def tags(self) -> List[str]:
293
+ return self._tags
294
+
295
+ @classmethod
296
+ def from_file(cls,
297
+ the_file: Union[str, TextIO, BinaryIO],
298
+ csv: bool = False,
299
+ convert_data_types: bool = False,
300
+ raise_parse_warnings: bool = False,
301
+ schema: Schema = None):
302
+ """Create a loop by loading in a file. Specify csv=True if
303
+ the file is a CSV file. If the_file starts with http://,
304
+ https://, or ftp:// then we will use those protocols to attempt
305
+ to open the file.
306
+
307
+ Setting convert_data_types to True will automatically convert
308
+ the data loaded from the file into the corresponding python type as
309
+ determined by loading the standard BMRB schema. This would mean that
310
+ all floats will be represented as decimal.Decimal objects, all integers
311
+ will be python int objects, strings and vars will remain strings, and
312
+ dates will become datetime.date objects. When printing str() is called
313
+ on all objects. Other that converting uppercase "E"s in scientific
314
+ notation floats to lowercase "e"s this should not cause any change in
315
+ the way re-printed NMR-STAR objects are displayed. Specify a custom
316
+ schema object to use using the schema parameter.
317
+
318
+ Setting raise_parse_warnings to True will result in the raising of a
319
+ ParsingError rather than logging a warning when non-valid (but
320
+ ignorable) issues are found."""
321
+
322
+ return cls(file_name=the_file,
323
+ csv=csv,
324
+ convert_data_types=convert_data_types,
325
+ raise_parse_warnings=raise_parse_warnings,
326
+ schema=schema)
327
+
328
+ @classmethod
329
+ def from_json(cls, json_dict: Union[dict, str]):
330
+ """Create a loop from JSON (serialized or unserialized JSON)."""
331
+
332
+ # If they provided a string, try to load it using JSON
333
+ if not isinstance(json_dict, dict):
334
+ try:
335
+ json_dict = json.loads(json_dict)
336
+ except (TypeError, ValueError):
337
+ raise ValueError("The JSON you provided was neither a Python dictionary nor a JSON string.")
338
+
339
+ # Make sure it has the correct keys
340
+ for check in ['tags', 'category', 'data']:
341
+ if check not in json_dict:
342
+ raise ValueError(f"The JSON you provide must be a dictionary and must contain the key '{check}' - even"
343
+ f" if the key points to None.")
344
+
345
+ # Create a loop from scratch and populate it
346
+ ret = Loop.from_scratch()
347
+ ret._tags = json_dict['tags']
348
+ ret.category = json_dict['category']
349
+ ret.data = json_dict['data']
350
+ ret.source = "from_json()"
351
+
352
+ # Return the new loop
353
+ return ret
354
+
355
+ @classmethod
356
+ def from_scratch(cls,
357
+ category: str = None,
358
+ source: str = "from_scratch()"):
359
+ """Create an empty saveframe that you can programmatically add
360
+ to. You may also pass the tag prefix as the second argument. If
361
+ you do not pass the tag prefix it will be set the first time you
362
+ add a tag."""
363
+
364
+ return cls(category=category, source=source)
365
+
366
+ @classmethod
367
+ def from_string(cls,
368
+ the_string: str,
369
+ csv: bool = False,
370
+ convert_data_types: bool = False,
371
+ raise_parse_warnings: bool = False,
372
+ schema: Schema = None):
373
+ """Create a loop by parsing a string. Specify csv=True if
374
+ the string is in CSV format and not NMR-STAR format.
375
+
376
+ Setting convert_data_types to True will automatically convert
377
+ the data loaded from the file into the corresponding python type as
378
+ determined by loading the standard BMRB schema. This would mean that
379
+ all floats will be represented as decimal.Decimal objects, all integers
380
+ will be python int objects, strings and vars will remain strings, and
381
+ dates will become datetime.date objects. When printing str() is called
382
+ on all objects. Other that converting uppercase "E"s in scientific
383
+ notation floats to lowercase "e"s this should not cause any change in
384
+ the way re-printed NMR-STAR objects are displayed. Specify a custom
385
+ schema object to use using the schema parameter.
386
+
387
+ Setting raise_parse_warnings to True will result in the raising of a
388
+ ParsingError rather than logging a warning when non-valid (but
389
+ ignorable) issues are found."""
390
+
391
+ return cls(the_string=the_string,
392
+ csv=csv,
393
+ convert_data_types=convert_data_types,
394
+ raise_parse_warnings=raise_parse_warnings,
395
+ schema=schema)
396
+
397
+ @classmethod
398
+ def from_template(cls, tag_prefix: str,
399
+ all_tags: bool = False,
400
+ schema: Schema = None):
401
+ """ Create a loop that has all of the tags from the schema present.
402
+ No values will be assigned. Specify the tag prefix of the loop.
403
+
404
+ The optional argument all_tags forces all tags to be included
405
+ rather than just the mandatory tags."""
406
+
407
+ schema = utils.get_schema(schema)
408
+ return cls(tag_prefix=tag_prefix,
409
+ all_tags=all_tags,
410
+ schema=schema,
411
+ source=f"from_template({schema.version})")
412
+
413
+ @staticmethod
414
+ def _get_tags_from_schema(category: str, schema: Schema = None, all_tags: bool = False) -> List[str]:
415
+ """ Returns the tags from the schema for the category of this
416
+ loop. """
417
+
418
+ schema = utils.get_schema(schema)
419
+
420
+ # Put the _ on the front for them if necessary
421
+ if not category.startswith("_"):
422
+ category = "_" + category
423
+ if not category.endswith("."):
424
+ category = category + "."
425
+
426
+ tags = []
427
+
428
+ for item in schema.schema_order:
429
+ # The tag is in the loop
430
+ if item.lower().startswith(category.lower()):
431
+
432
+ # Unconditional add
433
+ if all_tags:
434
+ tags.append(item)
435
+ # Conditional add
436
+ else:
437
+ if schema.schema[item.lower()]["public"] != "I":
438
+ tags.append(item)
439
+ if len(tags) == 0:
440
+ raise InvalidStateError(f"The tag prefix '{category}' has no corresponding tags in the dictionary.")
441
+
442
+ return tags
443
+
444
+ def _check_tags_match_data(self) -> bool:
445
+ """ Ensures that each row of the data has the same number of
446
+ elements as there are tags for the loop. This is necessary to
447
+ print or do some other operations on loops that count on the values
448
+ matching. """
449
+
450
+ # Make sure that if there is data, it is the same width as the
451
+ # tag names
452
+ if len(self.data) > 0:
453
+ for x, row in enumerate(self.data):
454
+ if len(self._tags) != len(row):
455
+ raise InvalidStateError(f"The number of tags must match the width of the data. Error in loop "
456
+ f"'{self.category}'. In this case, there are {len(self._tags)} tags, and "
457
+ f"row number {x} has {len(row)} tags.")
458
+
459
+ return True
460
+
461
+ def add_data(self,
462
+ data: Union[List[dict], Dict[str, List], List[Union[str, float, int]], List[List[Any]]],
463
+ rearrange: bool = False,
464
+ convert_data_types: bool = False,
465
+ schema: Schema = None):
466
+ """Add data to a loop. You can provide the data to add organized in four different ways, though the first
467
+ two are recommended for new code. The other two (#3 and #4) are preserved for sake of existing code (written
468
+ prior to version 3.3) and for niche use cases:
469
+
470
+ 1: You can provide a list of dictionaries of tags to add. For example,
471
+ ``[{'name': 'Jeff', 'location': 'Connecticut'}, {'name': 'Chad', 'location': 'Madison'}]`` will add two new
472
+ rows, and set the values of the tags ``name`` and ``location`` to the values provided. If there are other
473
+ tags in the loop, they will be assigned null values for the rows corresponding to the tags added.
474
+
475
+ 2: You can provide a dictionary of lists, as such (corresponds to adding the same ultimate data as in the
476
+ example #1): ``{'name': ['Jeff', 'Chad'], 'location': ['Connecticut', 'Madison']}``. This will also create
477
+ two new rows in the loop and assign the values provided.
478
+
479
+ 3: You can provide a list of lists of tag values to add. In this case, each list must have the same tag
480
+ values (and order of tags) as the known tags present in the loop. To correspond to the above examples, the data
481
+ would look like: ``[['Jeff', 'Connecticut'], ['Chad', 'Madison']]``. Adding data this way requires both that
482
+ you provide values for all tags present in the loop, and that you provide the values in the same order that the
483
+ tags already are already defined in the loop.
484
+
485
+ 4. You can provide a single list of tag values to add. In the most simple case, that would correspond to just
486
+ adding one row of data in the same was as in #3 above, as such: ``['Jeff', 'Connecticut']``. In a more
487
+ complicated example, you could also add data (corresponding to example #1 and #2) as such:
488
+ ``['Jeff', 'Connecticut', 'Chad', 'Madison']`` - but if you provide data this way, you must set
489
+ ``rearrange=True``. This usage is strongly discouraged, but exists for legacy reasons.
490
+
491
+ :param data: See the docstring for the method.
492
+ :type data: Union[List[dict], Dict[str, List], List[Union[str, float, int]], List[List[Any]]]
493
+ :param convert_data_types: If true, converts data you provide into the data type defined in the dictionary.
494
+ For example, if you provided the string '5' for the tag ``_Atom_chem_shift.Val``, it would automatically
495
+ be converted to a float while being added. This is mainly useful for parsers, as your data is probably
496
+ already in a format that is usable for you.
497
+ :type convert_data_types: bool
498
+ :param rearrange: If true, rearrange data provided in method #4 as necessary to fit in the loop. This only
499
+ exists for parsers, and it's use is strongly discouraged.
500
+ :type rearrange: bool
501
+ :param schema: A pynmrstar Schema object, which will be used to determine data types if convert_data_types
502
+ is True.
503
+ :type schema: pynmrstar.Schema
504
+ """
505
+
506
+ if not data:
507
+ raise ValueError('No valid data provided.')
508
+
509
+ pending_data: List = []
510
+ lc_tag_index: Dict[str, int] = self._lc_tags
511
+
512
+ def format_two_to_one(format_two: Dict[str, List]):
513
+ max_length = max([len(_) for _ in format_two.values()])
514
+ keys = format_two.keys()
515
+ for row_id in range(0, max_length):
516
+ row_dict = {}
517
+ for key in keys:
518
+ try:
519
+ row_dict[key] = format_two[key][row_id]
520
+ except IndexError:
521
+ pass
522
+ yield row_dict
523
+
524
+ # Data format #1 and #2
525
+ if (isinstance(data, list) and isinstance(data[0], dict)) or \
526
+ isinstance(data, dict) and all([isinstance(_, list) for _ in data.values()]):
527
+
528
+ # Handle format #2 by converting it to #1
529
+ if isinstance(data, dict):
530
+ data = format_two_to_one(data)
531
+
532
+ for pos, row in enumerate(data):
533
+ current_row = [None]*len(self._tags)
534
+ for tag, value in row.items():
535
+ try:
536
+ tag_index = lc_tag_index[utils.format_tag_lc(tag)]
537
+ except KeyError:
538
+ raise ValueError(f'In row {pos} of your provided data, a tag was supplied which was not'
539
+ f" already present in the loop. Invalid tag: '{tag}'")
540
+ current_row[tag_index] = value
541
+ pending_data.append(current_row)
542
+ # Type 4 - a list of lists
543
+ elif isinstance(data, list) and isinstance(data[0], list):
544
+ for pos, row in enumerate(data):
545
+ if len(row) != len(self.tags):
546
+ raise ValueError('One of the lists you provided is not the correct length to match the number '
547
+ f'of tags present in the loop. Error on row {pos} with values: {row}')
548
+ pending_data = data
549
+ # Type 3 - a list of values
550
+ elif isinstance(data, list):
551
+ if rearrange:
552
+ # Break their data into chunks based on the number of tags
553
+ pending_data = [data[x:x + len(self._tags)] for x in range(0, len(data), len(self._tags))]
554
+ if len(pending_data[-1]) != len(self._tags):
555
+ raise ValueError(f"The number of data elements in the list you provided is not an even multiple of "
556
+ f"the number of tags which are set in the loop. Please either add missing tags "
557
+ f"using Loop.add_tag() or modify the list of tag values you are adding to be an "
558
+ f"even multiple of the number of tags. Error in loop '{self.category}'.")
559
+ else:
560
+ # Add one row of data
561
+ if len(data) != len(self._tags):
562
+ raise ValueError("The list must have the same number of elements as the number of tags when adding "
563
+ "a single row of values! Insert tag names first by calling Loop.add_tag().")
564
+ # Add the user data
565
+ pending_data.append(data)
566
+ else:
567
+ raise ValueError("Your data did not match one of the supported types.")
568
+
569
+ # Auto convert data types if option set
570
+ if convert_data_types:
571
+ schema = utils.get_schema(schema)
572
+ for row in pending_data:
573
+ for tag_id, datum in enumerate(row):
574
+ row[tag_id] = schema.convert_tag(f"{self.category}.{self._tags[tag_id]}", datum)
575
+
576
+ # Add the data at the very end to ensure that errors are caught before we mutate the data
577
+ self.data.extend(pending_data)
578
+
579
+ def add_data_by_tag(self, tag_name: str, value) -> None:
580
+ """Deprecated: It is recommended to use add_data() instead for most use
581
+ cases.
582
+
583
+ Add data to the loop one element at a time, based on tag.
584
+ Useful when adding data from SANS parsers."""
585
+
586
+ warnings.warn("Deprecated: It is recommended to use Loop.add_data() instead for most use cases.",
587
+ DeprecationWarning)
588
+
589
+ # Make sure the category matches - if provided
590
+ if "." in tag_name:
591
+ supplied_category = utils.format_category(str(tag_name))
592
+ if supplied_category.lower() != self.category.lower():
593
+ raise ValueError(f"Category provided in your tag '{supplied_category}' does not match this loop's "
594
+ f"category '{self.category}'.")
595
+
596
+ pos = self.tag_index(tag_name)
597
+ if pos is None:
598
+ raise ValueError(f"The tag '{tag_name}' to which you are attempting to add data does not yet exist. Create "
599
+ f"the tags using Loop.add_tag() before adding data.")
600
+ if len(self.data) == 0:
601
+ self.data.append([])
602
+ if len(self.data[-1]) == len(self._tags):
603
+ self.data.append([])
604
+ if len(self.data[-1]) != pos:
605
+ raise ValueError("You cannot add data out of tag order.")
606
+ self.data[-1].append(value)
607
+
608
+ def add_missing_tags(self, schema: Schema = None, all_tags: bool = False) -> None:
609
+ """ Automatically adds any missing tags (according to the schema),
610
+ sorts the tags, and renumbers the tags by ordinal. """
611
+
612
+ self.add_tag(Loop._get_tags_from_schema(self.category, schema=schema, all_tags=all_tags),
613
+ ignore_duplicates=True, update_data=True)
614
+ self.sort_tags()
615
+
616
+ # See if we can sort the rows (in addition to tags)
617
+ try:
618
+ self.sort_rows("Ordinal")
619
+ except ValueError:
620
+ pass
621
+ except TypeError:
622
+ ordinal_idx = self.tag_index("Ordinal")
623
+
624
+ # If we are in another row, assign to the previous row
625
+ for pos, row in enumerate(self.data):
626
+ row[ordinal_idx] = pos + 1
627
+
628
+ def add_tag(self, name: Union[str, List[str]], ignore_duplicates: bool = False, update_data: bool = False) -> None:
629
+ """Add a tag to the tag name list. Does a bit of validation
630
+ and parsing. Set ignore_duplicates to true to ignore attempts
631
+ to add the same tag more than once rather than raise an
632
+ exception.
633
+
634
+ You can also pass a list of tag names to add more than one
635
+ tag at a time.
636
+
637
+ Adding a tag will update the data array to match by adding
638
+ None values to the rows if you specify update_data=True."""
639
+
640
+ # If they have passed multiple tags to add, call ourself
641
+ # on each of them in succession
642
+ if isinstance(name, (list, tuple)):
643
+ for item in name:
644
+ self.add_tag(item, ignore_duplicates=ignore_duplicates, update_data=update_data)
645
+ return
646
+
647
+ name = name.strip()
648
+
649
+ if "." in name:
650
+ if name[0] != ".":
651
+ category = name[0:name.index(".")]
652
+ if category[:1] != "_":
653
+ category = "_" + category
654
+
655
+ if self.category is None:
656
+ self.category = category
657
+ elif self.category.lower() != category.lower():
658
+ raise ValueError("One loop cannot have tags with different categories (or tags that don't "
659
+ f"match the loop category)! The loop category is '{self.category}' while "
660
+ f"the category in the tag was '{category}'.")
661
+ name = name[name.index(".") + 1:]
662
+ else:
663
+ name = name[1:]
664
+
665
+ # Ignore duplicate tags
666
+ if self.tag_index(name) is not None:
667
+ if ignore_duplicates:
668
+ return
669
+ else:
670
+ raise ValueError(f"There is already a tag with the name '{name}' in the loop '{self.category}'.")
671
+ if name in definitions.NULL_VALUES:
672
+ raise ValueError(f"Cannot use a null-equivalent value as a tag name. Invalid tag name: '{name}'")
673
+ if "." in name:
674
+ raise ValueError(f"There cannot be more than one '.' in a tag name. Invalid tag name: '{name}'")
675
+ for char in str(name):
676
+ if char in utils.definitions.WHITESPACE:
677
+ raise ValueError(f"Tag names can not contain whitespace characters. Invalid tag name: '{name}")
678
+
679
+ # Add the tag
680
+ self._tags.append(name)
681
+
682
+ # Add None's to the rows of data
683
+ if update_data:
684
+
685
+ for row in self.data:
686
+ row.append(None)
687
+
688
+ def clear_data(self) -> None:
689
+ """Erases all data in this loop. Does not erase the tag names
690
+ or loop category."""
691
+
692
+ self.data = []
693
+
694
+ def compare(self, other) -> List[str]:
695
+ """Returns the differences between two loops as a list. Order of
696
+ loops being compared does not make a difference on the specific
697
+ errors detected."""
698
+
699
+ diffs = []
700
+
701
+ # Check if this is literally the same object
702
+ if self is other:
703
+ return []
704
+ # Check if the other object is our string representation
705
+ if isinstance(other, str):
706
+ if str(self) == other:
707
+ return []
708
+ else:
709
+ return ['String was not exactly equal to loop.']
710
+ elif not isinstance(other, Loop):
711
+ return ['Other object is not of class Loop.']
712
+
713
+ # We need to do this in case of an extra "\n" on the end of one tag
714
+ if str(other) == str(self):
715
+ return []
716
+
717
+ # Do STAR comparison
718
+ try:
719
+ # Check category of loops
720
+ if str(self.category).lower() != str(other.category).lower():
721
+ diffs.append(f"\t\tCategory of loops does not match: '{self.category}' vs '{other.category}'.")
722
+
723
+ # Check tags of loops
724
+ if ([x.lower() for x in self._tags] !=
725
+ [x.lower() for x in other.tags]):
726
+ diffs.append(f"\t\tLoop tag names do not match for loop with category '{self.category}'.")
727
+
728
+ # No point checking if data is the same if the tag names aren't
729
+ else:
730
+ # Only sort the data if it is not already equal
731
+ if self.data != other.data:
732
+
733
+ # Check data of loops
734
+ self_data = sorted(deepcopy(self.data))
735
+ other_data = sorted(deepcopy(other.data))
736
+
737
+ if self_data != other_data:
738
+ diffs.append(f"\t\tLoop data does not match for loop with category '{self.category}'.")
739
+
740
+ except AttributeError as err:
741
+ diffs.append(f"\t\tAn exception occurred while comparing: '{err}'.")
742
+
743
+ return diffs
744
+
745
+ def delete_tag(self, tag: Union[str, List[str]]) -> None:
746
+ """ Deprecated. Please use `py:meth:pynmrstar.Loop.remove_tag` instead. """
747
+
748
+ warnings.warn('Please use remove_tag() instead.', DeprecationWarning)
749
+ return self.remove_tag(tag)
750
+
751
+ def delete_data_by_tag_value(self, tag: str, value: Any, index_tag: str = None) -> List[List[Any]]:
752
+ """ Deprecated. Please use `py:meth:pynmrstar.Loop.remove_data_by_tag_value` instead. """
753
+
754
+ warnings.warn('Please use remove_data_by_tag_value() instead.', DeprecationWarning)
755
+ return self.remove_data_by_tag_value(tag, value, index_tag)
756
+
757
+ def filter(self, tag_list: Union[str, List[str], Tuple[str]], ignore_missing_tags: bool = False):
758
+ """ Returns a new loop containing only the specified tags.
759
+ Specify ignore_missing_tags=True to bypass missing tags rather
760
+ than raising an error."""
761
+
762
+ result = Loop.from_scratch()
763
+ valid_tags = []
764
+
765
+ # If they only provide one tag make it a list
766
+ if not isinstance(tag_list, (list, tuple)):
767
+ tag_list = [tag_list]
768
+
769
+ # Make sure all the tags specified exist
770
+ for tag in tag_list:
771
+
772
+ # Handle an invalid tag
773
+ tag_match_index = self.tag_index(tag)
774
+ if tag_match_index is None:
775
+ if not ignore_missing_tags:
776
+ raise KeyError(f"Cannot filter tag '{tag}' as it isn't present in this loop.")
777
+ continue
778
+
779
+ valid_tags.append(tag)
780
+ result.add_tag(self._tags[tag_match_index])
781
+
782
+ # Add the data for the tags to the new loop
783
+ results = self.get_tag(valid_tags)
784
+
785
+ # If there is only a single tag, we can't add data the same way
786
+ if len(valid_tags) == 1:
787
+ for item in results:
788
+ result.add_data([item])
789
+ else:
790
+ for row in results:
791
+ # We know it's a row because we didn't specify dict_result=True to get_tag()
792
+ assert isinstance(row, list)
793
+ result.add_data(row)
794
+
795
+ # Assign the category of the new loop
796
+ if result.category is None:
797
+ result.category = self.category
798
+
799
+ return result
800
+
801
+ def format(self, skip_empty_loops: bool = True, skip_empty_tags: bool = False) -> str:
802
+ """ The same as calling str(Loop), except that you can pass options
803
+ to customize how the loop is printed.
804
+
805
+ skip_empty_loops will omit printing loops with no tags at all. (A loop with null tags is not "empty".)
806
+ skip_empty_tags will omit tags in the loop which have no non-null values."""
807
+
808
+ return self.__str__(skip_empty_loops=skip_empty_loops, skip_empty_tags=skip_empty_tags)
809
+
810
+ def get_data_as_csv(self, header: bool = True, show_category: bool = True) -> str:
811
+ """Return the data contained in the loops, properly CSVd, as a
812
+ string. Set header to False to omit the header. Set
813
+ show_category to false to omit the loop category from the
814
+ headers."""
815
+
816
+ csv_buffer = StringIO()
817
+ csv_writer_object = csv_writer(csv_buffer)
818
+
819
+ if header:
820
+ if show_category:
821
+ csv_writer_object.writerow(
822
+ [str(self.category) + "." + str(x) for x in self._tags])
823
+ else:
824
+ csv_writer_object.writerow([str(x) for x in self._tags])
825
+
826
+ for row in self.data:
827
+
828
+ data = []
829
+ for piece in row:
830
+ data.append(piece)
831
+
832
+ csv_writer_object.writerow(data)
833
+
834
+ csv_buffer.seek(0)
835
+ return csv_buffer.read().replace('\r\n', '\n')
836
+
837
+ def get_json(self, serialize: bool = True) -> Union[dict, str]:
838
+ """ Returns the loop in JSON format. If serialize is set to
839
+ False a dictionary representation of the loop that is
840
+ serializeable is returned."""
841
+
842
+ loop_dict = {
843
+ "category": self.category,
844
+ "tags": self._tags,
845
+ "data": self.data
846
+ }
847
+
848
+ if serialize:
849
+ return json.dumps(loop_dict, default=_json_serialize)
850
+ else:
851
+ return loop_dict
852
+
853
+ def get_tag_names(self) -> List[str]:
854
+ """ Return the tag names for this entry with the category
855
+ included. Throws ValueError if the category was never set.
856
+
857
+ To get the tags without the category, just access them directly
858
+ using the "tags" attribute.
859
+
860
+ To fetch tag values use get_tag()."""
861
+
862
+ if not self.category:
863
+ raise InvalidStateError("You never set the category of this loop. You must set the category before calling "
864
+ "this method, either by setting the loop category directly when creating the loop "
865
+ "using the Loop.from_scratch() class method, by calling loop.set_category(), or by "
866
+ "adding a fully qualified tag which includes the loop category (for example, "
867
+ "adding '_Citation_author.Family_name' rather than just 'Family_name').")
868
+
869
+ return [self.category + "." + x for x in self._tags]
870
+
871
+ def get_tag(self,
872
+ tags: Optional[Union[str, List[str]]] = None,
873
+ whole_tag: bool = False,
874
+ dict_result: bool = False) -> Union[List[Any], List[Dict[str, Any]]]:
875
+ """Provided a tag name (or a list of tag names) return the selected tags by row as
876
+ a list of lists. Leave tags unset to fetch all tags.
877
+
878
+ If whole_tag=True return the full tag name along with the tag
879
+ value, or if dict_result=True, as the tag key.
880
+
881
+ If dict_result=True, return the tags as a list of dictionaries
882
+ in which the tag value points to the tag. Uses the specified capitalization
883
+ of the tag unless whole_tag is True, in which case it will use the capitalization
884
+ found in the loop."""
885
+
886
+ # All tags
887
+ if tags is None:
888
+ if not dict_result:
889
+ return self.data
890
+ else:
891
+ tags = self._tags
892
+ # Turn single elements into lists
893
+ if not isinstance(tags, list):
894
+ tags = [tags]
895
+
896
+ # Make a copy of the tags to fetch - don't modify the
897
+ # list that was passed
898
+ lower_tags = deepcopy(tags)
899
+
900
+ # Strip the category if they provide it (also validate
901
+ # it during the process)
902
+ for pos, item in enumerate([str(x) for x in lower_tags]):
903
+ if "." in item and utils.format_category(item).lower() != self.category.lower():
904
+ raise ValueError(f"Cannot fetch data with tag '{item}' because the category does not match the "
905
+ f"category of this loop '{self.category}'.")
906
+ lower_tags[pos] = utils.format_tag_lc(item)
907
+
908
+ # Make a lower case copy of the tags
909
+ tags_lower = [x.lower() for x in self._tags]
910
+
911
+ # Map tag name to tag position in list
912
+ tag_mapping = dict(zip(reversed(tags_lower), reversed(range(len(tags_lower)))))
913
+
914
+ # Make sure their fields are actually present in the entry
915
+ tag_ids = []
916
+ for pos, query in enumerate(lower_tags):
917
+ if str(query) in tag_mapping:
918
+ tag_ids.append(tag_mapping[query])
919
+ elif isinstance(query, int):
920
+ tag_ids.append(query)
921
+ else:
922
+ raise KeyError(f"Could not locate the tag with name or ID: '{tags[pos]}' in loop '{self.category}'.")
923
+
924
+ # First build the tags as a list
925
+ if not dict_result:
926
+
927
+ # Use a list comprehension to pull the correct tags out of the rows
928
+ if whole_tag:
929
+ result = [[[self.category + "." + self._tags[col_id], row[col_id]]
930
+ for col_id in tag_ids] for row in self.data]
931
+ else:
932
+ result = [[row[col_id] for col_id in tag_ids] for row in self.data]
933
+
934
+ # Strip the extra list if only one tag
935
+ if len(lower_tags) == 1:
936
+ return [x[0] for x in result]
937
+ else:
938
+ return result
939
+ # Make a dictionary
940
+ else:
941
+ if whole_tag:
942
+ result = [dict((self.category + "." + self._tags[col_id], row[col_id]) for col_id in tag_ids) for
943
+ row in self.data]
944
+ else:
945
+ result = [dict((tags[pos], row[col_id]) for pos, col_id in enumerate(tag_ids)) for row in self.data]
946
+
947
+ return result
948
+
949
+ def print_tree(self) -> None:
950
+ """Prints a summary, tree style, of the loop."""
951
+
952
+ print(repr(self))
953
+
954
+ def remove_data_by_tag_value(self, tag: str, value: Any, index_tag: str = None) -> List[List[Any]]:
955
+ """Removes all rows which contain the provided value in the
956
+ provided tag name. If index_tag is provided, that tag is
957
+ renumbered starting with 1. Returns the deleted rows."""
958
+
959
+ # Make sure the category matches - if provided
960
+ if "." in tag:
961
+ supplied_category = utils.format_category(str(tag))
962
+ if supplied_category.lower() != self.category.lower():
963
+ raise ValueError(f"The category provided in your tag '{supplied_category}' does not match this loop's "
964
+ f"category '{self.category}'.")
965
+
966
+ search_tag = self.tag_index(tag)
967
+ if search_tag is None:
968
+ raise ValueError(f"The tag you provided '{tag}' isn't in this loop!")
969
+
970
+ deleted = []
971
+
972
+ # Delete all rows in which the user-provided tag matched
973
+ cur_row = 0
974
+ while cur_row < len(self.data):
975
+ if self.data[cur_row][search_tag] == value:
976
+ deleted.append(self.data.pop(cur_row))
977
+ continue
978
+ cur_row += 1
979
+
980
+ # Re-number if they so desire
981
+ if index_tag is not None:
982
+ self.renumber_rows(index_tag)
983
+
984
+ return deleted
985
+
986
+ def remove_tag(self, tag: Union[str, List[str]]) -> None:
987
+ """Removes one or more tags from the loop based on tag name. Also removes any data for the given tag.
988
+ Provide either a tag or list of tags."""
989
+
990
+ if not isinstance(tag, list):
991
+ tag = [tag]
992
+
993
+ # Check if the tags exist first
994
+ for each_tag in tag:
995
+ if self.tag_index(each_tag) is None:
996
+ raise KeyError(f"There is no tag with name '{each_tag}' to remove in loop '{self.category}'.")
997
+
998
+ # Calculate the tag position each time, because it will change as the previous tag is deleted
999
+ for each_tag in tag:
1000
+ tag_position: int = self.tag_index(each_tag)
1001
+ del self._tags[tag_position]
1002
+ for row in self.data:
1003
+ del row[tag_position]
1004
+
1005
+ def renumber_rows(self, index_tag: str, start_value: int = 1, maintain_ordering: bool = False):
1006
+ """Renumber a given tag incrementally. Set start_value to
1007
+ initial value if 1 is not acceptable. Set maintain_ordering to
1008
+ preserve sequence with offset.
1009
+
1010
+ E.g. 2,3,3,5 would become 1,2,2,4."""
1011
+
1012
+ # Make sure the category matches
1013
+ if "." in str(index_tag):
1014
+ supplied_category = utils.format_category(str(index_tag))
1015
+ if supplied_category.lower() != self.category.lower():
1016
+ raise ValueError(f"Category provided in your tag '{supplied_category}' does not match this loop's "
1017
+ f"category '{self.category}'.")
1018
+
1019
+ # Determine which tag ID to renumber
1020
+ renumber_tag = self.tag_index(index_tag)
1021
+
1022
+ # The tag to replace in is the tag they specify
1023
+ if renumber_tag is None:
1024
+ # Or, perhaps they specified an integer to represent the tag?
1025
+ try:
1026
+ renumber_tag = int(index_tag)
1027
+ except ValueError:
1028
+ raise ValueError(f"The renumbering tag you provided '{index_tag}' isn't in this loop!")
1029
+
1030
+ # Do nothing if we have no data
1031
+ if len(self.data) == 0:
1032
+ return
1033
+
1034
+ # Make sure the tags and data match
1035
+ self._check_tags_match_data()
1036
+
1037
+ if maintain_ordering:
1038
+ # If they have a string buried somewhere in the row, we'll
1039
+ # have to restore the original values
1040
+ data_copy = deepcopy(self.data)
1041
+ offset = 0
1042
+ for pos in range(0, len(self.data)):
1043
+ try:
1044
+ if pos == 0:
1045
+ offset = start_value - int(self.data[0][renumber_tag])
1046
+ new_data = int(self.data[pos][renumber_tag]) + offset
1047
+
1048
+ if isinstance(self.data[pos][renumber_tag], str):
1049
+ self.data[pos][renumber_tag] = str(new_data)
1050
+ else:
1051
+ self.data[pos][renumber_tag] = new_data
1052
+ except ValueError:
1053
+ self.data = data_copy
1054
+ raise ValueError("You can't renumber a row containing anything that can't be coerced into an "
1055
+ "integer using maintain_ordering. I.e. what am I suppose to renumber "
1056
+ f"'{self.data[pos][renumber_tag]}' to?")
1057
+
1058
+ # Simple renumbering algorithm if we don't need to maintain the ordering
1059
+ else:
1060
+ for pos in range(0, len(self.data)):
1061
+ if isinstance(self.data[pos][renumber_tag], str):
1062
+ self.data[pos][renumber_tag] = str(pos + start_value)
1063
+ else:
1064
+ self.data[pos][renumber_tag] = pos + start_value
1065
+
1066
+ def set_category(self, category: str) -> None:
1067
+ """ Set the category of the loop. Useful if you didn't know the
1068
+ category at loop creation time."""
1069
+
1070
+ self.category = utils.format_category(category)
1071
+
1072
+ def sort_tags(self, schema: Schema = None) -> None:
1073
+ """ Rearranges the tag names and data in the loop to match the order
1074
+ from the schema. Uses the BMRB schema unless one is provided."""
1075
+
1076
+ schema = utils.get_schema(schema)
1077
+ current_order = self.get_tag_names()
1078
+
1079
+ # Sort the tags
1080
+ def sort_key(_) -> int:
1081
+ return schema.tag_key(_)
1082
+
1083
+ sorted_order = sorted(current_order, key=sort_key)
1084
+
1085
+ # Don't touch the data if the tags are already in order
1086
+ if sorted_order == current_order:
1087
+ return
1088
+ else:
1089
+ self.data = self.get_tag(sorted_order)
1090
+ self._tags = [utils.format_tag(x) for x in sorted_order]
1091
+
1092
+ def sort_rows(self, tags: Union[str, List[str]], key: Callable = None) -> None:
1093
+ """ Sort the data in the rows by their values for a given tag
1094
+ or tags. Specify the tags using their names or ordinals.
1095
+ Accepts a list or an int/float. By default we will sort
1096
+ numerically. If that fails we do a string sort. Supply a
1097
+ function as key and we will order the elements based on the
1098
+ keys it provides. See the help for sorted() for more details. If
1099
+ you provide multiple tags to sort by, they are interpreted as
1100
+ increasing order of sort priority."""
1101
+
1102
+ # Do nothing if we have no data
1103
+ if len(self.data) == 0:
1104
+ return
1105
+
1106
+ # This will determine how we sort
1107
+ sort_ordinals = []
1108
+
1109
+ if isinstance(tags, list):
1110
+ processing_list = tags
1111
+ else:
1112
+ processing_list = [tags]
1113
+
1114
+ # Process their input to determine which tags to operate on
1115
+ for cur_tag in [str(x) for x in processing_list]:
1116
+
1117
+ # Make sure the category matches
1118
+ if "." in cur_tag:
1119
+ supplied_category = utils.format_category(cur_tag)
1120
+ if supplied_category.lower() != self.category.lower():
1121
+ raise ValueError(f"The category provided in your tag '{supplied_category}' does not match this "
1122
+ f"loop's category '{self.category}'.")
1123
+
1124
+ renumber_tag = self.tag_index(cur_tag)
1125
+
1126
+ # They didn't specify a valid tag
1127
+ if renumber_tag is None:
1128
+ # Perhaps they specified an integer to represent the tag?
1129
+ try:
1130
+ renumber_tag = int(cur_tag)
1131
+ except ValueError:
1132
+ raise ValueError(f"The sorting tag you provided '{cur_tag}' isn't in this loop!")
1133
+
1134
+ sort_ordinals.append(renumber_tag)
1135
+
1136
+ # Do the sort(s)
1137
+ for tag in sort_ordinals:
1138
+ # Going through each tag, first attempt to sort as integer.
1139
+ # Then fallback to string sort.
1140
+ try:
1141
+ if key is None:
1142
+ tmp_data = sorted(self.data, key=lambda _, pos=tag: float(_[pos]))
1143
+ else:
1144
+ tmp_data = sorted(self.data, key=key)
1145
+ except ValueError:
1146
+ if key is None:
1147
+ tmp_data = sorted(self.data, key=lambda _, pos=tag: _[pos])
1148
+ else:
1149
+ tmp_data = sorted(self.data, key=key)
1150
+ self.data = tmp_data
1151
+
1152
+ def tag_index(self, tag_name: str) -> Optional[int]:
1153
+ """ Helper method to do a case-insensitive check for the presence
1154
+ of a given tag in this loop. Returns the index of the tag if found
1155
+ and None if not found.
1156
+
1157
+ This is useful if you need to get the index of a certain tag to
1158
+ iterate through the data and modify it."""
1159
+
1160
+ try:
1161
+ return self._lc_tags[utils.format_tag_lc(str(tag_name))]
1162
+ except KeyError:
1163
+ return None
1164
+
1165
+ def validate(self, validate_schema: bool = True, schema: 'Schema' = None,
1166
+ validate_star: bool = True, category: str = None) -> List[str]:
1167
+ """Validate a loop in a variety of ways. Returns a list of
1168
+ errors found. 0-length list indicates no errors found. By
1169
+ default all validation modes are enabled.
1170
+
1171
+ validate_schema - Determines if the entry is validated against
1172
+ the NMR-STAR schema. You can pass your own custom schema if desired,
1173
+ otherwise the schema will be fetched from the BMRB servers.
1174
+
1175
+ validate_star - Determines if the STAR syntax checks are ran."""
1176
+
1177
+ errors = []
1178
+
1179
+ if validate_schema:
1180
+ # Get the default schema if we are not passed a schema
1181
+ my_schema = utils.get_schema(schema)
1182
+
1183
+ # Check the data
1184
+ for row_num, row in enumerate(self.data):
1185
+ for pos, datum in enumerate(row):
1186
+ errors.extend(my_schema.val_type(f"{self.category}.{self._tags[pos]}", datum, category=category))
1187
+
1188
+ if validate_star:
1189
+ # Check for wrong data size
1190
+ num_cols = len(self._tags)
1191
+ for row_num, row in enumerate(self.data):
1192
+ # Make sure the width matches
1193
+ if len(row) != num_cols:
1194
+ errors.append(f"Loop '{self.category}' data width does not match it's tag width on "
1195
+ f"row '{row_num}'.")
1196
+
1197
+ return errors