pynmrstar 3.3.5__cp312-cp312-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pynmrstar might be problematic. Click here for more details.
- cnmrstar.cpython-312-darwin.so +0 -0
- pynmrstar/__init__.py +55 -0
- pynmrstar/_internal.py +292 -0
- pynmrstar/definitions.py +32 -0
- pynmrstar/entry.py +970 -0
- pynmrstar/exceptions.py +43 -0
- pynmrstar/loop.py +1197 -0
- pynmrstar/parser.py +287 -0
- pynmrstar/reference_files/comments.str +538 -0
- pynmrstar/reference_files/data_types.csv +24 -0
- pynmrstar/reference_files/schema.csv +6726 -0
- pynmrstar/saveframe.py +1015 -0
- pynmrstar/schema.py +367 -0
- pynmrstar/utils.py +134 -0
- pynmrstar-3.3.5.dist-info/LICENSE +21 -0
- pynmrstar-3.3.5.dist-info/METADATA +59 -0
- pynmrstar-3.3.5.dist-info/RECORD +19 -0
- pynmrstar-3.3.5.dist-info/WHEEL +5 -0
- pynmrstar-3.3.5.dist-info/top_level.txt +2 -0
pynmrstar/loop.py
ADDED
|
@@ -0,0 +1,1197 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import warnings
|
|
3
|
+
from copy import deepcopy
|
|
4
|
+
from csv import reader as csv_reader, writer as csv_writer
|
|
5
|
+
from io import StringIO
|
|
6
|
+
from itertools import chain
|
|
7
|
+
from typing import TextIO, BinaryIO, Union, List, Optional, Any, Dict, Callable, Tuple
|
|
8
|
+
|
|
9
|
+
from pynmrstar import definitions, utils, entry as entry_mod
|
|
10
|
+
from pynmrstar._internal import _json_serialize, _interpret_file
|
|
11
|
+
from pynmrstar.exceptions import InvalidStateError
|
|
12
|
+
from pynmrstar.parser import Parser
|
|
13
|
+
from pynmrstar.schema import Schema
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Loop(object):
|
|
17
|
+
"""A BMRB loop object. Create using the class methods, see below."""
|
|
18
|
+
|
|
19
|
+
def __contains__(self, item: Any) -> bool:
|
|
20
|
+
""" Check if the loop contains one or more tags. """
|
|
21
|
+
|
|
22
|
+
# Prepare for processing
|
|
23
|
+
if isinstance(item, (list, tuple)):
|
|
24
|
+
to_process: List[str] = list(item)
|
|
25
|
+
elif isinstance(item, str):
|
|
26
|
+
to_process = [item]
|
|
27
|
+
else:
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
lc_tags = self._lc_tags
|
|
31
|
+
for tag in to_process:
|
|
32
|
+
if utils.format_tag_lc(tag) not in lc_tags:
|
|
33
|
+
return False
|
|
34
|
+
return True
|
|
35
|
+
|
|
36
|
+
def __eq__(self, other) -> bool:
|
|
37
|
+
"""Returns True if this loop is equal to another loop, False if
|
|
38
|
+
it is different."""
|
|
39
|
+
|
|
40
|
+
if not isinstance(other, Loop):
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
return (self.category, self._tags, self.data) == \
|
|
44
|
+
(other.category, other._tags, other.data)
|
|
45
|
+
|
|
46
|
+
def __getitem__(self, item: Union[int, str, List[str], Tuple[str]]) -> list:
|
|
47
|
+
"""Get the indicated row from the data array."""
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
return self.data[item]
|
|
51
|
+
except TypeError:
|
|
52
|
+
if isinstance(item, tuple):
|
|
53
|
+
item = list(item)
|
|
54
|
+
return self.get_tag(tags=item)
|
|
55
|
+
|
|
56
|
+
def __init__(self, **kwargs) -> None:
|
|
57
|
+
""" You should not directly instantiate a Loop using this method.
|
|
58
|
+
Instead use the class methods:
|
|
59
|
+
|
|
60
|
+
:py:meth:`Loop.from_scratch`, :py:meth:`Loop.from_string`,
|
|
61
|
+
:py:meth:`Loop.from_template`, :py:meth:`Loop.from_file`,
|
|
62
|
+
:py:meth:`Loop.from_json`"""
|
|
63
|
+
|
|
64
|
+
# Initialize our local variables
|
|
65
|
+
self._tags: List[str] = []
|
|
66
|
+
self.data: List[List[Any]] = []
|
|
67
|
+
self.category: Optional[str] = None
|
|
68
|
+
self.source: str = "unknown"
|
|
69
|
+
|
|
70
|
+
star_buffer: StringIO = StringIO("")
|
|
71
|
+
|
|
72
|
+
# Update our source if it provided
|
|
73
|
+
if 'source' in kwargs:
|
|
74
|
+
self.source = kwargs['source']
|
|
75
|
+
|
|
76
|
+
# Update our category if provided
|
|
77
|
+
if 'category' in kwargs:
|
|
78
|
+
self.category = utils.format_category(kwargs['category'])
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
# They initialized us wrong
|
|
82
|
+
if len(kwargs) == 0:
|
|
83
|
+
raise ValueError("You should not directly instantiate a Loop using this method. Instead use the "
|
|
84
|
+
"class methods: Loop.from_scratch(), Loop.from_string(), Loop.from_template(), "
|
|
85
|
+
"Loop.from_file(), and Loop.from_json().")
|
|
86
|
+
|
|
87
|
+
# Parsing from a string
|
|
88
|
+
if 'the_string' in kwargs:
|
|
89
|
+
# Parse from a string by wrapping it in StringIO
|
|
90
|
+
star_buffer = StringIO(kwargs['the_string'])
|
|
91
|
+
self.source = "from_string()"
|
|
92
|
+
# Parsing from a file
|
|
93
|
+
elif 'file_name' in kwargs:
|
|
94
|
+
star_buffer = _interpret_file(kwargs['file_name'])
|
|
95
|
+
self.source = f"from_file('{kwargs['file_name']}')"
|
|
96
|
+
# Creating from template (schema)
|
|
97
|
+
elif 'tag_prefix' in kwargs:
|
|
98
|
+
|
|
99
|
+
tags = Loop._get_tags_from_schema(kwargs['tag_prefix'], all_tags=kwargs['all_tags'],
|
|
100
|
+
schema=kwargs['schema'])
|
|
101
|
+
for tag in tags:
|
|
102
|
+
self.add_tag(tag)
|
|
103
|
+
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
# If we are reading from a CSV file, go ahead and parse it
|
|
107
|
+
if 'csv' in kwargs and kwargs['csv']:
|
|
108
|
+
csv_file = csv_reader(star_buffer)
|
|
109
|
+
self.add_tag(next(csv_file))
|
|
110
|
+
for row in csv_file:
|
|
111
|
+
self.add_data(row,
|
|
112
|
+
convert_data_types=kwargs.get('convert_data_types', False),
|
|
113
|
+
schema=kwargs.get('schema', None))
|
|
114
|
+
self.source = f"from_csv('{kwargs['csv']}')"
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
tmp_entry = entry_mod.Entry.from_scratch(0)
|
|
118
|
+
|
|
119
|
+
# Load the BMRB entry from the file
|
|
120
|
+
star_buffer = StringIO(f"data_0 save_internaluseyoushouldntseethis_frame _internal.use internal "
|
|
121
|
+
f"{star_buffer.read()} save_")
|
|
122
|
+
parser = Parser(entry_to_parse_into=tmp_entry)
|
|
123
|
+
parser.parse(star_buffer.read(),
|
|
124
|
+
source=self.source,
|
|
125
|
+
convert_data_types=kwargs.get('convert_data_types', False),
|
|
126
|
+
raise_parse_warnings=kwargs.get('raise_parse_warnings', False),
|
|
127
|
+
schema=kwargs.get('schema', None))
|
|
128
|
+
|
|
129
|
+
# Check that there was only one loop here
|
|
130
|
+
if len(tmp_entry[0].loops) > 1:
|
|
131
|
+
raise ValueError("You attempted to parse one loop but the source you provided had more than one loop. "
|
|
132
|
+
"Please either parse all loops as a saveframe or only parse one loop. Loops detected: " +
|
|
133
|
+
str(tmp_entry[0].loops))
|
|
134
|
+
|
|
135
|
+
# Copy the first parsed saveframe into ourself
|
|
136
|
+
self._tags = tmp_entry[0][0].tags
|
|
137
|
+
self.data = tmp_entry[0][0].data
|
|
138
|
+
self.category = tmp_entry[0][0].category
|
|
139
|
+
|
|
140
|
+
def __iter__(self) -> list:
|
|
141
|
+
""" Yields each of the rows contained within the loop. """
|
|
142
|
+
|
|
143
|
+
for row in self.data:
|
|
144
|
+
yield row
|
|
145
|
+
|
|
146
|
+
def __len__(self) -> int:
|
|
147
|
+
"""Return the number of rows of data."""
|
|
148
|
+
|
|
149
|
+
return len(self.data)
|
|
150
|
+
|
|
151
|
+
def __lt__(self, other) -> bool:
|
|
152
|
+
"""Returns True if this loop sorts lower than the compared
|
|
153
|
+
loop, false otherwise."""
|
|
154
|
+
|
|
155
|
+
if not isinstance(other, Loop):
|
|
156
|
+
return NotImplemented
|
|
157
|
+
|
|
158
|
+
return self.category < other.category
|
|
159
|
+
|
|
160
|
+
def __repr__(self) -> str:
|
|
161
|
+
"""Returns a description of the loop."""
|
|
162
|
+
|
|
163
|
+
return f"<pynmrstar.Loop '{self.category}'>"
|
|
164
|
+
|
|
165
|
+
def __setitem__(self, key: str, item: Any) -> None:
|
|
166
|
+
"""Set all of the instances of a tag to the provided value.
|
|
167
|
+
If there are 5 rows of data in the loop, you will need to
|
|
168
|
+
assign a list with 5 elements."""
|
|
169
|
+
|
|
170
|
+
tag = utils.format_tag_lc(key)
|
|
171
|
+
|
|
172
|
+
# Check that their tag is in the loop
|
|
173
|
+
if tag not in self._lc_tags:
|
|
174
|
+
raise ValueError(f"Cannot assign to tag '{key}' as it does not exist in this loop.")
|
|
175
|
+
|
|
176
|
+
# Determine where to assign
|
|
177
|
+
tag_id = self._lc_tags[tag]
|
|
178
|
+
|
|
179
|
+
# Make sure they provide a list of the correct length
|
|
180
|
+
if len(self[key]) != len(item):
|
|
181
|
+
raise ValueError("To assign to a tag you must provide a list (or iterable) of a length equal to the "
|
|
182
|
+
f"number of values that currently exist for that tag. The tag '{key}' currently has"
|
|
183
|
+
f" {len(self[key])} values and you supplied {len(item)} values.")
|
|
184
|
+
|
|
185
|
+
# Do the assignment
|
|
186
|
+
for pos, row in enumerate(self.data):
|
|
187
|
+
row[tag_id] = item[pos]
|
|
188
|
+
|
|
189
|
+
def __str__(self, skip_empty_loops: bool = False, skip_empty_tags: bool = False) -> str:
|
|
190
|
+
"""Returns the loop in STAR format as a string."""
|
|
191
|
+
|
|
192
|
+
# Check if there is any data in this loop
|
|
193
|
+
if len(self.data) == 0:
|
|
194
|
+
# They do not want us to print empty loops
|
|
195
|
+
if skip_empty_loops:
|
|
196
|
+
return ""
|
|
197
|
+
else:
|
|
198
|
+
# If we have no tags than return the empty loop
|
|
199
|
+
if len(self._tags) == 0:
|
|
200
|
+
return "\n loop_\n\n stop_\n"
|
|
201
|
+
|
|
202
|
+
if len(self._tags) == 0:
|
|
203
|
+
raise InvalidStateError("Impossible to print data if there are no associated tags. Error in loop "
|
|
204
|
+
f"'{self.category}' which contains data but hasn't had any tags added.")
|
|
205
|
+
|
|
206
|
+
# Make sure the tags and data match
|
|
207
|
+
self._check_tags_match_data()
|
|
208
|
+
|
|
209
|
+
# If skipping null tags, it's easier to filter out a loop with only real tags and then print
|
|
210
|
+
if skip_empty_tags:
|
|
211
|
+
has_data = [not all([_ in definitions.NULL_VALUES for _ in column]) for column in zip(*self.data)]
|
|
212
|
+
return self.filter([tag for x, tag in enumerate(self._tags) if has_data[x]]).format()
|
|
213
|
+
|
|
214
|
+
# Start the loop
|
|
215
|
+
return_chunks = ["\n loop_\n"]
|
|
216
|
+
# Print the tags
|
|
217
|
+
format_string = " %-s\n"
|
|
218
|
+
|
|
219
|
+
# Check to make sure our category is set
|
|
220
|
+
if self.category is None:
|
|
221
|
+
raise InvalidStateError("The category was never set for this loop. Either add a tag with the category "
|
|
222
|
+
"intact, specify it when generating the loop, or set it using Loop.set_category().")
|
|
223
|
+
|
|
224
|
+
# Print the categories
|
|
225
|
+
if self.category is None:
|
|
226
|
+
for tag in self._tags:
|
|
227
|
+
return_chunks.append(format_string % tag)
|
|
228
|
+
else:
|
|
229
|
+
for tag in self._tags:
|
|
230
|
+
return_chunks.append(format_string % (self.category + "." + tag))
|
|
231
|
+
|
|
232
|
+
return_chunks.append("\n")
|
|
233
|
+
|
|
234
|
+
if len(self.data) != 0:
|
|
235
|
+
|
|
236
|
+
# Make a copy of the data
|
|
237
|
+
working_data = []
|
|
238
|
+
title_widths = [4]*len(self.data[0])
|
|
239
|
+
|
|
240
|
+
# Put quotes as needed on the data
|
|
241
|
+
for row_pos, row in enumerate(self.data):
|
|
242
|
+
clean_row = []
|
|
243
|
+
for col_pos, x in enumerate(row):
|
|
244
|
+
try:
|
|
245
|
+
clean_val = utils.quote_value(x)
|
|
246
|
+
clean_row.append(clean_val)
|
|
247
|
+
length = len(clean_val) + 3
|
|
248
|
+
if length > title_widths[col_pos] and "\n" not in clean_val:
|
|
249
|
+
title_widths[col_pos] = length
|
|
250
|
+
|
|
251
|
+
except ValueError:
|
|
252
|
+
raise InvalidStateError('Cannot generate NMR-STAR for entry, as empty strings are not valid '
|
|
253
|
+
'tag values in NMR-STAR. Please either replace the empty strings with'
|
|
254
|
+
' None objects, or set pynmrstar.definitions.STR_CONVERSION_DICT['
|
|
255
|
+
'\'\'] = None.\n'
|
|
256
|
+
f'Loop: {self.category} Row: {row_pos} Column: {col_pos}')
|
|
257
|
+
|
|
258
|
+
working_data.append(clean_row)
|
|
259
|
+
|
|
260
|
+
# Generate the format string
|
|
261
|
+
format_string = " " + "%-*s" * len(self._tags) + " \n"
|
|
262
|
+
|
|
263
|
+
# Print the data, with the tags sized appropriately
|
|
264
|
+
for datum in working_data:
|
|
265
|
+
for pos, item in enumerate(datum):
|
|
266
|
+
if "\n" in item:
|
|
267
|
+
datum[pos] = "\n;\n%s;\n" % item
|
|
268
|
+
|
|
269
|
+
# Print the data (combine the tags' widths with their data)
|
|
270
|
+
tag_width_list = [d for d in zip(title_widths, datum)]
|
|
271
|
+
return_chunks.append(format_string % tuple(chain.from_iterable(tag_width_list)))
|
|
272
|
+
|
|
273
|
+
# Close the loop
|
|
274
|
+
return "".join(return_chunks) + "\n stop_\n"
|
|
275
|
+
|
|
276
|
+
@property
|
|
277
|
+
def _lc_tags(self) -> Dict[str, int]:
|
|
278
|
+
return {_[1].lower(): _[0] for _ in enumerate(self._tags)}
|
|
279
|
+
|
|
280
|
+
@property
|
|
281
|
+
def empty(self) -> bool:
|
|
282
|
+
""" Check if the loop has no data. """
|
|
283
|
+
|
|
284
|
+
for row in self.data:
|
|
285
|
+
for col in row:
|
|
286
|
+
if col not in definitions.NULL_VALUES:
|
|
287
|
+
return False
|
|
288
|
+
|
|
289
|
+
return True
|
|
290
|
+
|
|
291
|
+
@property
|
|
292
|
+
def tags(self) -> List[str]:
|
|
293
|
+
return self._tags
|
|
294
|
+
|
|
295
|
+
@classmethod
|
|
296
|
+
def from_file(cls,
|
|
297
|
+
the_file: Union[str, TextIO, BinaryIO],
|
|
298
|
+
csv: bool = False,
|
|
299
|
+
convert_data_types: bool = False,
|
|
300
|
+
raise_parse_warnings: bool = False,
|
|
301
|
+
schema: Schema = None):
|
|
302
|
+
"""Create a loop by loading in a file. Specify csv=True if
|
|
303
|
+
the file is a CSV file. If the_file starts with http://,
|
|
304
|
+
https://, or ftp:// then we will use those protocols to attempt
|
|
305
|
+
to open the file.
|
|
306
|
+
|
|
307
|
+
Setting convert_data_types to True will automatically convert
|
|
308
|
+
the data loaded from the file into the corresponding python type as
|
|
309
|
+
determined by loading the standard BMRB schema. This would mean that
|
|
310
|
+
all floats will be represented as decimal.Decimal objects, all integers
|
|
311
|
+
will be python int objects, strings and vars will remain strings, and
|
|
312
|
+
dates will become datetime.date objects. When printing str() is called
|
|
313
|
+
on all objects. Other that converting uppercase "E"s in scientific
|
|
314
|
+
notation floats to lowercase "e"s this should not cause any change in
|
|
315
|
+
the way re-printed NMR-STAR objects are displayed. Specify a custom
|
|
316
|
+
schema object to use using the schema parameter.
|
|
317
|
+
|
|
318
|
+
Setting raise_parse_warnings to True will result in the raising of a
|
|
319
|
+
ParsingError rather than logging a warning when non-valid (but
|
|
320
|
+
ignorable) issues are found."""
|
|
321
|
+
|
|
322
|
+
return cls(file_name=the_file,
|
|
323
|
+
csv=csv,
|
|
324
|
+
convert_data_types=convert_data_types,
|
|
325
|
+
raise_parse_warnings=raise_parse_warnings,
|
|
326
|
+
schema=schema)
|
|
327
|
+
|
|
328
|
+
@classmethod
|
|
329
|
+
def from_json(cls, json_dict: Union[dict, str]):
|
|
330
|
+
"""Create a loop from JSON (serialized or unserialized JSON)."""
|
|
331
|
+
|
|
332
|
+
# If they provided a string, try to load it using JSON
|
|
333
|
+
if not isinstance(json_dict, dict):
|
|
334
|
+
try:
|
|
335
|
+
json_dict = json.loads(json_dict)
|
|
336
|
+
except (TypeError, ValueError):
|
|
337
|
+
raise ValueError("The JSON you provided was neither a Python dictionary nor a JSON string.")
|
|
338
|
+
|
|
339
|
+
# Make sure it has the correct keys
|
|
340
|
+
for check in ['tags', 'category', 'data']:
|
|
341
|
+
if check not in json_dict:
|
|
342
|
+
raise ValueError(f"The JSON you provide must be a dictionary and must contain the key '{check}' - even"
|
|
343
|
+
f" if the key points to None.")
|
|
344
|
+
|
|
345
|
+
# Create a loop from scratch and populate it
|
|
346
|
+
ret = Loop.from_scratch()
|
|
347
|
+
ret._tags = json_dict['tags']
|
|
348
|
+
ret.category = json_dict['category']
|
|
349
|
+
ret.data = json_dict['data']
|
|
350
|
+
ret.source = "from_json()"
|
|
351
|
+
|
|
352
|
+
# Return the new loop
|
|
353
|
+
return ret
|
|
354
|
+
|
|
355
|
+
@classmethod
|
|
356
|
+
def from_scratch(cls,
|
|
357
|
+
category: str = None,
|
|
358
|
+
source: str = "from_scratch()"):
|
|
359
|
+
"""Create an empty saveframe that you can programmatically add
|
|
360
|
+
to. You may also pass the tag prefix as the second argument. If
|
|
361
|
+
you do not pass the tag prefix it will be set the first time you
|
|
362
|
+
add a tag."""
|
|
363
|
+
|
|
364
|
+
return cls(category=category, source=source)
|
|
365
|
+
|
|
366
|
+
@classmethod
|
|
367
|
+
def from_string(cls,
|
|
368
|
+
the_string: str,
|
|
369
|
+
csv: bool = False,
|
|
370
|
+
convert_data_types: bool = False,
|
|
371
|
+
raise_parse_warnings: bool = False,
|
|
372
|
+
schema: Schema = None):
|
|
373
|
+
"""Create a loop by parsing a string. Specify csv=True if
|
|
374
|
+
the string is in CSV format and not NMR-STAR format.
|
|
375
|
+
|
|
376
|
+
Setting convert_data_types to True will automatically convert
|
|
377
|
+
the data loaded from the file into the corresponding python type as
|
|
378
|
+
determined by loading the standard BMRB schema. This would mean that
|
|
379
|
+
all floats will be represented as decimal.Decimal objects, all integers
|
|
380
|
+
will be python int objects, strings and vars will remain strings, and
|
|
381
|
+
dates will become datetime.date objects. When printing str() is called
|
|
382
|
+
on all objects. Other that converting uppercase "E"s in scientific
|
|
383
|
+
notation floats to lowercase "e"s this should not cause any change in
|
|
384
|
+
the way re-printed NMR-STAR objects are displayed. Specify a custom
|
|
385
|
+
schema object to use using the schema parameter.
|
|
386
|
+
|
|
387
|
+
Setting raise_parse_warnings to True will result in the raising of a
|
|
388
|
+
ParsingError rather than logging a warning when non-valid (but
|
|
389
|
+
ignorable) issues are found."""
|
|
390
|
+
|
|
391
|
+
return cls(the_string=the_string,
|
|
392
|
+
csv=csv,
|
|
393
|
+
convert_data_types=convert_data_types,
|
|
394
|
+
raise_parse_warnings=raise_parse_warnings,
|
|
395
|
+
schema=schema)
|
|
396
|
+
|
|
397
|
+
@classmethod
|
|
398
|
+
def from_template(cls, tag_prefix: str,
|
|
399
|
+
all_tags: bool = False,
|
|
400
|
+
schema: Schema = None):
|
|
401
|
+
""" Create a loop that has all of the tags from the schema present.
|
|
402
|
+
No values will be assigned. Specify the tag prefix of the loop.
|
|
403
|
+
|
|
404
|
+
The optional argument all_tags forces all tags to be included
|
|
405
|
+
rather than just the mandatory tags."""
|
|
406
|
+
|
|
407
|
+
schema = utils.get_schema(schema)
|
|
408
|
+
return cls(tag_prefix=tag_prefix,
|
|
409
|
+
all_tags=all_tags,
|
|
410
|
+
schema=schema,
|
|
411
|
+
source=f"from_template({schema.version})")
|
|
412
|
+
|
|
413
|
+
@staticmethod
|
|
414
|
+
def _get_tags_from_schema(category: str, schema: Schema = None, all_tags: bool = False) -> List[str]:
|
|
415
|
+
""" Returns the tags from the schema for the category of this
|
|
416
|
+
loop. """
|
|
417
|
+
|
|
418
|
+
schema = utils.get_schema(schema)
|
|
419
|
+
|
|
420
|
+
# Put the _ on the front for them if necessary
|
|
421
|
+
if not category.startswith("_"):
|
|
422
|
+
category = "_" + category
|
|
423
|
+
if not category.endswith("."):
|
|
424
|
+
category = category + "."
|
|
425
|
+
|
|
426
|
+
tags = []
|
|
427
|
+
|
|
428
|
+
for item in schema.schema_order:
|
|
429
|
+
# The tag is in the loop
|
|
430
|
+
if item.lower().startswith(category.lower()):
|
|
431
|
+
|
|
432
|
+
# Unconditional add
|
|
433
|
+
if all_tags:
|
|
434
|
+
tags.append(item)
|
|
435
|
+
# Conditional add
|
|
436
|
+
else:
|
|
437
|
+
if schema.schema[item.lower()]["public"] != "I":
|
|
438
|
+
tags.append(item)
|
|
439
|
+
if len(tags) == 0:
|
|
440
|
+
raise InvalidStateError(f"The tag prefix '{category}' has no corresponding tags in the dictionary.")
|
|
441
|
+
|
|
442
|
+
return tags
|
|
443
|
+
|
|
444
|
+
def _check_tags_match_data(self) -> bool:
|
|
445
|
+
""" Ensures that each row of the data has the same number of
|
|
446
|
+
elements as there are tags for the loop. This is necessary to
|
|
447
|
+
print or do some other operations on loops that count on the values
|
|
448
|
+
matching. """
|
|
449
|
+
|
|
450
|
+
# Make sure that if there is data, it is the same width as the
|
|
451
|
+
# tag names
|
|
452
|
+
if len(self.data) > 0:
|
|
453
|
+
for x, row in enumerate(self.data):
|
|
454
|
+
if len(self._tags) != len(row):
|
|
455
|
+
raise InvalidStateError(f"The number of tags must match the width of the data. Error in loop "
|
|
456
|
+
f"'{self.category}'. In this case, there are {len(self._tags)} tags, and "
|
|
457
|
+
f"row number {x} has {len(row)} tags.")
|
|
458
|
+
|
|
459
|
+
return True
|
|
460
|
+
|
|
461
|
+
def add_data(self,
|
|
462
|
+
data: Union[List[dict], Dict[str, List], List[Union[str, float, int]], List[List[Any]]],
|
|
463
|
+
rearrange: bool = False,
|
|
464
|
+
convert_data_types: bool = False,
|
|
465
|
+
schema: Schema = None):
|
|
466
|
+
"""Add data to a loop. You can provide the data to add organized in four different ways, though the first
|
|
467
|
+
two are recommended for new code. The other two (#3 and #4) are preserved for sake of existing code (written
|
|
468
|
+
prior to version 3.3) and for niche use cases:
|
|
469
|
+
|
|
470
|
+
1: You can provide a list of dictionaries of tags to add. For example,
|
|
471
|
+
``[{'name': 'Jeff', 'location': 'Connecticut'}, {'name': 'Chad', 'location': 'Madison'}]`` will add two new
|
|
472
|
+
rows, and set the values of the tags ``name`` and ``location`` to the values provided. If there are other
|
|
473
|
+
tags in the loop, they will be assigned null values for the rows corresponding to the tags added.
|
|
474
|
+
|
|
475
|
+
2: You can provide a dictionary of lists, as such (corresponds to adding the same ultimate data as in the
|
|
476
|
+
example #1): ``{'name': ['Jeff', 'Chad'], 'location': ['Connecticut', 'Madison']}``. This will also create
|
|
477
|
+
two new rows in the loop and assign the values provided.
|
|
478
|
+
|
|
479
|
+
3: You can provide a list of lists of tag values to add. In this case, each list must have the same tag
|
|
480
|
+
values (and order of tags) as the known tags present in the loop. To correspond to the above examples, the data
|
|
481
|
+
would look like: ``[['Jeff', 'Connecticut'], ['Chad', 'Madison']]``. Adding data this way requires both that
|
|
482
|
+
you provide values for all tags present in the loop, and that you provide the values in the same order that the
|
|
483
|
+
tags already are already defined in the loop.
|
|
484
|
+
|
|
485
|
+
4. You can provide a single list of tag values to add. In the most simple case, that would correspond to just
|
|
486
|
+
adding one row of data in the same was as in #3 above, as such: ``['Jeff', 'Connecticut']``. In a more
|
|
487
|
+
complicated example, you could also add data (corresponding to example #1 and #2) as such:
|
|
488
|
+
``['Jeff', 'Connecticut', 'Chad', 'Madison']`` - but if you provide data this way, you must set
|
|
489
|
+
``rearrange=True``. This usage is strongly discouraged, but exists for legacy reasons.
|
|
490
|
+
|
|
491
|
+
:param data: See the docstring for the method.
|
|
492
|
+
:type data: Union[List[dict], Dict[str, List], List[Union[str, float, int]], List[List[Any]]]
|
|
493
|
+
:param convert_data_types: If true, converts data you provide into the data type defined in the dictionary.
|
|
494
|
+
For example, if you provided the string '5' for the tag ``_Atom_chem_shift.Val``, it would automatically
|
|
495
|
+
be converted to a float while being added. This is mainly useful for parsers, as your data is probably
|
|
496
|
+
already in a format that is usable for you.
|
|
497
|
+
:type convert_data_types: bool
|
|
498
|
+
:param rearrange: If true, rearrange data provided in method #4 as necessary to fit in the loop. This only
|
|
499
|
+
exists for parsers, and it's use is strongly discouraged.
|
|
500
|
+
:type rearrange: bool
|
|
501
|
+
:param schema: A pynmrstar Schema object, which will be used to determine data types if convert_data_types
|
|
502
|
+
is True.
|
|
503
|
+
:type schema: pynmrstar.Schema
|
|
504
|
+
"""
|
|
505
|
+
|
|
506
|
+
if not data:
|
|
507
|
+
raise ValueError('No valid data provided.')
|
|
508
|
+
|
|
509
|
+
pending_data: List = []
|
|
510
|
+
lc_tag_index: Dict[str, int] = self._lc_tags
|
|
511
|
+
|
|
512
|
+
def format_two_to_one(format_two: Dict[str, List]):
|
|
513
|
+
max_length = max([len(_) for _ in format_two.values()])
|
|
514
|
+
keys = format_two.keys()
|
|
515
|
+
for row_id in range(0, max_length):
|
|
516
|
+
row_dict = {}
|
|
517
|
+
for key in keys:
|
|
518
|
+
try:
|
|
519
|
+
row_dict[key] = format_two[key][row_id]
|
|
520
|
+
except IndexError:
|
|
521
|
+
pass
|
|
522
|
+
yield row_dict
|
|
523
|
+
|
|
524
|
+
# Data format #1 and #2
|
|
525
|
+
if (isinstance(data, list) and isinstance(data[0], dict)) or \
|
|
526
|
+
isinstance(data, dict) and all([isinstance(_, list) for _ in data.values()]):
|
|
527
|
+
|
|
528
|
+
# Handle format #2 by converting it to #1
|
|
529
|
+
if isinstance(data, dict):
|
|
530
|
+
data = format_two_to_one(data)
|
|
531
|
+
|
|
532
|
+
for pos, row in enumerate(data):
|
|
533
|
+
current_row = [None]*len(self._tags)
|
|
534
|
+
for tag, value in row.items():
|
|
535
|
+
try:
|
|
536
|
+
tag_index = lc_tag_index[utils.format_tag_lc(tag)]
|
|
537
|
+
except KeyError:
|
|
538
|
+
raise ValueError(f'In row {pos} of your provided data, a tag was supplied which was not'
|
|
539
|
+
f" already present in the loop. Invalid tag: '{tag}'")
|
|
540
|
+
current_row[tag_index] = value
|
|
541
|
+
pending_data.append(current_row)
|
|
542
|
+
# Type 4 - a list of lists
|
|
543
|
+
elif isinstance(data, list) and isinstance(data[0], list):
|
|
544
|
+
for pos, row in enumerate(data):
|
|
545
|
+
if len(row) != len(self.tags):
|
|
546
|
+
raise ValueError('One of the lists you provided is not the correct length to match the number '
|
|
547
|
+
f'of tags present in the loop. Error on row {pos} with values: {row}')
|
|
548
|
+
pending_data = data
|
|
549
|
+
# Type 3 - a list of values
|
|
550
|
+
elif isinstance(data, list):
|
|
551
|
+
if rearrange:
|
|
552
|
+
# Break their data into chunks based on the number of tags
|
|
553
|
+
pending_data = [data[x:x + len(self._tags)] for x in range(0, len(data), len(self._tags))]
|
|
554
|
+
if len(pending_data[-1]) != len(self._tags):
|
|
555
|
+
raise ValueError(f"The number of data elements in the list you provided is not an even multiple of "
|
|
556
|
+
f"the number of tags which are set in the loop. Please either add missing tags "
|
|
557
|
+
f"using Loop.add_tag() or modify the list of tag values you are adding to be an "
|
|
558
|
+
f"even multiple of the number of tags. Error in loop '{self.category}'.")
|
|
559
|
+
else:
|
|
560
|
+
# Add one row of data
|
|
561
|
+
if len(data) != len(self._tags):
|
|
562
|
+
raise ValueError("The list must have the same number of elements as the number of tags when adding "
|
|
563
|
+
"a single row of values! Insert tag names first by calling Loop.add_tag().")
|
|
564
|
+
# Add the user data
|
|
565
|
+
pending_data.append(data)
|
|
566
|
+
else:
|
|
567
|
+
raise ValueError("Your data did not match one of the supported types.")
|
|
568
|
+
|
|
569
|
+
# Auto convert data types if option set
|
|
570
|
+
if convert_data_types:
|
|
571
|
+
schema = utils.get_schema(schema)
|
|
572
|
+
for row in pending_data:
|
|
573
|
+
for tag_id, datum in enumerate(row):
|
|
574
|
+
row[tag_id] = schema.convert_tag(f"{self.category}.{self._tags[tag_id]}", datum)
|
|
575
|
+
|
|
576
|
+
# Add the data at the very end to ensure that errors are caught before we mutate the data
|
|
577
|
+
self.data.extend(pending_data)
|
|
578
|
+
|
|
579
|
+
def add_data_by_tag(self, tag_name: str, value) -> None:
|
|
580
|
+
"""Deprecated: It is recommended to use add_data() instead for most use
|
|
581
|
+
cases.
|
|
582
|
+
|
|
583
|
+
Add data to the loop one element at a time, based on tag.
|
|
584
|
+
Useful when adding data from SANS parsers."""
|
|
585
|
+
|
|
586
|
+
warnings.warn("Deprecated: It is recommended to use Loop.add_data() instead for most use cases.",
|
|
587
|
+
DeprecationWarning)
|
|
588
|
+
|
|
589
|
+
# Make sure the category matches - if provided
|
|
590
|
+
if "." in tag_name:
|
|
591
|
+
supplied_category = utils.format_category(str(tag_name))
|
|
592
|
+
if supplied_category.lower() != self.category.lower():
|
|
593
|
+
raise ValueError(f"Category provided in your tag '{supplied_category}' does not match this loop's "
|
|
594
|
+
f"category '{self.category}'.")
|
|
595
|
+
|
|
596
|
+
pos = self.tag_index(tag_name)
|
|
597
|
+
if pos is None:
|
|
598
|
+
raise ValueError(f"The tag '{tag_name}' to which you are attempting to add data does not yet exist. Create "
|
|
599
|
+
f"the tags using Loop.add_tag() before adding data.")
|
|
600
|
+
if len(self.data) == 0:
|
|
601
|
+
self.data.append([])
|
|
602
|
+
if len(self.data[-1]) == len(self._tags):
|
|
603
|
+
self.data.append([])
|
|
604
|
+
if len(self.data[-1]) != pos:
|
|
605
|
+
raise ValueError("You cannot add data out of tag order.")
|
|
606
|
+
self.data[-1].append(value)
|
|
607
|
+
|
|
608
|
+
def add_missing_tags(self, schema: Schema = None, all_tags: bool = False) -> None:
|
|
609
|
+
""" Automatically adds any missing tags (according to the schema),
|
|
610
|
+
sorts the tags, and renumbers the tags by ordinal. """
|
|
611
|
+
|
|
612
|
+
self.add_tag(Loop._get_tags_from_schema(self.category, schema=schema, all_tags=all_tags),
|
|
613
|
+
ignore_duplicates=True, update_data=True)
|
|
614
|
+
self.sort_tags()
|
|
615
|
+
|
|
616
|
+
# See if we can sort the rows (in addition to tags)
|
|
617
|
+
try:
|
|
618
|
+
self.sort_rows("Ordinal")
|
|
619
|
+
except ValueError:
|
|
620
|
+
pass
|
|
621
|
+
except TypeError:
|
|
622
|
+
ordinal_idx = self.tag_index("Ordinal")
|
|
623
|
+
|
|
624
|
+
# If we are in another row, assign to the previous row
|
|
625
|
+
for pos, row in enumerate(self.data):
|
|
626
|
+
row[ordinal_idx] = pos + 1
|
|
627
|
+
|
|
628
|
+
def add_tag(self, name: Union[str, List[str]], ignore_duplicates: bool = False, update_data: bool = False) -> None:
|
|
629
|
+
"""Add a tag to the tag name list. Does a bit of validation
|
|
630
|
+
and parsing. Set ignore_duplicates to true to ignore attempts
|
|
631
|
+
to add the same tag more than once rather than raise an
|
|
632
|
+
exception.
|
|
633
|
+
|
|
634
|
+
You can also pass a list of tag names to add more than one
|
|
635
|
+
tag at a time.
|
|
636
|
+
|
|
637
|
+
Adding a tag will update the data array to match by adding
|
|
638
|
+
None values to the rows if you specify update_data=True."""
|
|
639
|
+
|
|
640
|
+
# If they have passed multiple tags to add, call ourself
|
|
641
|
+
# on each of them in succession
|
|
642
|
+
if isinstance(name, (list, tuple)):
|
|
643
|
+
for item in name:
|
|
644
|
+
self.add_tag(item, ignore_duplicates=ignore_duplicates, update_data=update_data)
|
|
645
|
+
return
|
|
646
|
+
|
|
647
|
+
name = name.strip()
|
|
648
|
+
|
|
649
|
+
if "." in name:
|
|
650
|
+
if name[0] != ".":
|
|
651
|
+
category = name[0:name.index(".")]
|
|
652
|
+
if category[:1] != "_":
|
|
653
|
+
category = "_" + category
|
|
654
|
+
|
|
655
|
+
if self.category is None:
|
|
656
|
+
self.category = category
|
|
657
|
+
elif self.category.lower() != category.lower():
|
|
658
|
+
raise ValueError("One loop cannot have tags with different categories (or tags that don't "
|
|
659
|
+
f"match the loop category)! The loop category is '{self.category}' while "
|
|
660
|
+
f"the category in the tag was '{category}'.")
|
|
661
|
+
name = name[name.index(".") + 1:]
|
|
662
|
+
else:
|
|
663
|
+
name = name[1:]
|
|
664
|
+
|
|
665
|
+
# Ignore duplicate tags
|
|
666
|
+
if self.tag_index(name) is not None:
|
|
667
|
+
if ignore_duplicates:
|
|
668
|
+
return
|
|
669
|
+
else:
|
|
670
|
+
raise ValueError(f"There is already a tag with the name '{name}' in the loop '{self.category}'.")
|
|
671
|
+
if name in definitions.NULL_VALUES:
|
|
672
|
+
raise ValueError(f"Cannot use a null-equivalent value as a tag name. Invalid tag name: '{name}'")
|
|
673
|
+
if "." in name:
|
|
674
|
+
raise ValueError(f"There cannot be more than one '.' in a tag name. Invalid tag name: '{name}'")
|
|
675
|
+
for char in str(name):
|
|
676
|
+
if char in utils.definitions.WHITESPACE:
|
|
677
|
+
raise ValueError(f"Tag names can not contain whitespace characters. Invalid tag name: '{name}")
|
|
678
|
+
|
|
679
|
+
# Add the tag
|
|
680
|
+
self._tags.append(name)
|
|
681
|
+
|
|
682
|
+
# Add None's to the rows of data
|
|
683
|
+
if update_data:
|
|
684
|
+
|
|
685
|
+
for row in self.data:
|
|
686
|
+
row.append(None)
|
|
687
|
+
|
|
688
|
+
def clear_data(self) -> None:
|
|
689
|
+
"""Erases all data in this loop. Does not erase the tag names
|
|
690
|
+
or loop category."""
|
|
691
|
+
|
|
692
|
+
self.data = []
|
|
693
|
+
|
|
694
|
+
def compare(self, other) -> List[str]:
|
|
695
|
+
"""Returns the differences between two loops as a list. Order of
|
|
696
|
+
loops being compared does not make a difference on the specific
|
|
697
|
+
errors detected."""
|
|
698
|
+
|
|
699
|
+
diffs = []
|
|
700
|
+
|
|
701
|
+
# Check if this is literally the same object
|
|
702
|
+
if self is other:
|
|
703
|
+
return []
|
|
704
|
+
# Check if the other object is our string representation
|
|
705
|
+
if isinstance(other, str):
|
|
706
|
+
if str(self) == other:
|
|
707
|
+
return []
|
|
708
|
+
else:
|
|
709
|
+
return ['String was not exactly equal to loop.']
|
|
710
|
+
elif not isinstance(other, Loop):
|
|
711
|
+
return ['Other object is not of class Loop.']
|
|
712
|
+
|
|
713
|
+
# We need to do this in case of an extra "\n" on the end of one tag
|
|
714
|
+
if str(other) == str(self):
|
|
715
|
+
return []
|
|
716
|
+
|
|
717
|
+
# Do STAR comparison
|
|
718
|
+
try:
|
|
719
|
+
# Check category of loops
|
|
720
|
+
if str(self.category).lower() != str(other.category).lower():
|
|
721
|
+
diffs.append(f"\t\tCategory of loops does not match: '{self.category}' vs '{other.category}'.")
|
|
722
|
+
|
|
723
|
+
# Check tags of loops
|
|
724
|
+
if ([x.lower() for x in self._tags] !=
|
|
725
|
+
[x.lower() for x in other.tags]):
|
|
726
|
+
diffs.append(f"\t\tLoop tag names do not match for loop with category '{self.category}'.")
|
|
727
|
+
|
|
728
|
+
# No point checking if data is the same if the tag names aren't
|
|
729
|
+
else:
|
|
730
|
+
# Only sort the data if it is not already equal
|
|
731
|
+
if self.data != other.data:
|
|
732
|
+
|
|
733
|
+
# Check data of loops
|
|
734
|
+
self_data = sorted(deepcopy(self.data))
|
|
735
|
+
other_data = sorted(deepcopy(other.data))
|
|
736
|
+
|
|
737
|
+
if self_data != other_data:
|
|
738
|
+
diffs.append(f"\t\tLoop data does not match for loop with category '{self.category}'.")
|
|
739
|
+
|
|
740
|
+
except AttributeError as err:
|
|
741
|
+
diffs.append(f"\t\tAn exception occurred while comparing: '{err}'.")
|
|
742
|
+
|
|
743
|
+
return diffs
|
|
744
|
+
|
|
745
|
+
def delete_tag(self, tag: Union[str, List[str]]) -> None:
|
|
746
|
+
""" Deprecated. Please use `py:meth:pynmrstar.Loop.remove_tag` instead. """
|
|
747
|
+
|
|
748
|
+
warnings.warn('Please use remove_tag() instead.', DeprecationWarning)
|
|
749
|
+
return self.remove_tag(tag)
|
|
750
|
+
|
|
751
|
+
def delete_data_by_tag_value(self, tag: str, value: Any, index_tag: str = None) -> List[List[Any]]:
|
|
752
|
+
""" Deprecated. Please use `py:meth:pynmrstar.Loop.remove_data_by_tag_value` instead. """
|
|
753
|
+
|
|
754
|
+
warnings.warn('Please use remove_data_by_tag_value() instead.', DeprecationWarning)
|
|
755
|
+
return self.remove_data_by_tag_value(tag, value, index_tag)
|
|
756
|
+
|
|
757
|
+
def filter(self, tag_list: Union[str, List[str], Tuple[str]], ignore_missing_tags: bool = False):
|
|
758
|
+
""" Returns a new loop containing only the specified tags.
|
|
759
|
+
Specify ignore_missing_tags=True to bypass missing tags rather
|
|
760
|
+
than raising an error."""
|
|
761
|
+
|
|
762
|
+
result = Loop.from_scratch()
|
|
763
|
+
valid_tags = []
|
|
764
|
+
|
|
765
|
+
# If they only provide one tag make it a list
|
|
766
|
+
if not isinstance(tag_list, (list, tuple)):
|
|
767
|
+
tag_list = [tag_list]
|
|
768
|
+
|
|
769
|
+
# Make sure all the tags specified exist
|
|
770
|
+
for tag in tag_list:
|
|
771
|
+
|
|
772
|
+
# Handle an invalid tag
|
|
773
|
+
tag_match_index = self.tag_index(tag)
|
|
774
|
+
if tag_match_index is None:
|
|
775
|
+
if not ignore_missing_tags:
|
|
776
|
+
raise KeyError(f"Cannot filter tag '{tag}' as it isn't present in this loop.")
|
|
777
|
+
continue
|
|
778
|
+
|
|
779
|
+
valid_tags.append(tag)
|
|
780
|
+
result.add_tag(self._tags[tag_match_index])
|
|
781
|
+
|
|
782
|
+
# Add the data for the tags to the new loop
|
|
783
|
+
results = self.get_tag(valid_tags)
|
|
784
|
+
|
|
785
|
+
# If there is only a single tag, we can't add data the same way
|
|
786
|
+
if len(valid_tags) == 1:
|
|
787
|
+
for item in results:
|
|
788
|
+
result.add_data([item])
|
|
789
|
+
else:
|
|
790
|
+
for row in results:
|
|
791
|
+
# We know it's a row because we didn't specify dict_result=True to get_tag()
|
|
792
|
+
assert isinstance(row, list)
|
|
793
|
+
result.add_data(row)
|
|
794
|
+
|
|
795
|
+
# Assign the category of the new loop
|
|
796
|
+
if result.category is None:
|
|
797
|
+
result.category = self.category
|
|
798
|
+
|
|
799
|
+
return result
|
|
800
|
+
|
|
801
|
+
def format(self, skip_empty_loops: bool = True, skip_empty_tags: bool = False) -> str:
|
|
802
|
+
""" The same as calling str(Loop), except that you can pass options
|
|
803
|
+
to customize how the loop is printed.
|
|
804
|
+
|
|
805
|
+
skip_empty_loops will omit printing loops with no tags at all. (A loop with null tags is not "empty".)
|
|
806
|
+
skip_empty_tags will omit tags in the loop which have no non-null values."""
|
|
807
|
+
|
|
808
|
+
return self.__str__(skip_empty_loops=skip_empty_loops, skip_empty_tags=skip_empty_tags)
|
|
809
|
+
|
|
810
|
+
def get_data_as_csv(self, header: bool = True, show_category: bool = True) -> str:
|
|
811
|
+
"""Return the data contained in the loops, properly CSVd, as a
|
|
812
|
+
string. Set header to False to omit the header. Set
|
|
813
|
+
show_category to false to omit the loop category from the
|
|
814
|
+
headers."""
|
|
815
|
+
|
|
816
|
+
csv_buffer = StringIO()
|
|
817
|
+
csv_writer_object = csv_writer(csv_buffer)
|
|
818
|
+
|
|
819
|
+
if header:
|
|
820
|
+
if show_category:
|
|
821
|
+
csv_writer_object.writerow(
|
|
822
|
+
[str(self.category) + "." + str(x) for x in self._tags])
|
|
823
|
+
else:
|
|
824
|
+
csv_writer_object.writerow([str(x) for x in self._tags])
|
|
825
|
+
|
|
826
|
+
for row in self.data:
|
|
827
|
+
|
|
828
|
+
data = []
|
|
829
|
+
for piece in row:
|
|
830
|
+
data.append(piece)
|
|
831
|
+
|
|
832
|
+
csv_writer_object.writerow(data)
|
|
833
|
+
|
|
834
|
+
csv_buffer.seek(0)
|
|
835
|
+
return csv_buffer.read().replace('\r\n', '\n')
|
|
836
|
+
|
|
837
|
+
def get_json(self, serialize: bool = True) -> Union[dict, str]:
|
|
838
|
+
""" Returns the loop in JSON format. If serialize is set to
|
|
839
|
+
False a dictionary representation of the loop that is
|
|
840
|
+
serializeable is returned."""
|
|
841
|
+
|
|
842
|
+
loop_dict = {
|
|
843
|
+
"category": self.category,
|
|
844
|
+
"tags": self._tags,
|
|
845
|
+
"data": self.data
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
if serialize:
|
|
849
|
+
return json.dumps(loop_dict, default=_json_serialize)
|
|
850
|
+
else:
|
|
851
|
+
return loop_dict
|
|
852
|
+
|
|
853
|
+
def get_tag_names(self) -> List[str]:
|
|
854
|
+
""" Return the tag names for this entry with the category
|
|
855
|
+
included. Throws ValueError if the category was never set.
|
|
856
|
+
|
|
857
|
+
To get the tags without the category, just access them directly
|
|
858
|
+
using the "tags" attribute.
|
|
859
|
+
|
|
860
|
+
To fetch tag values use get_tag()."""
|
|
861
|
+
|
|
862
|
+
if not self.category:
|
|
863
|
+
raise InvalidStateError("You never set the category of this loop. You must set the category before calling "
|
|
864
|
+
"this method, either by setting the loop category directly when creating the loop "
|
|
865
|
+
"using the Loop.from_scratch() class method, by calling loop.set_category(), or by "
|
|
866
|
+
"adding a fully qualified tag which includes the loop category (for example, "
|
|
867
|
+
"adding '_Citation_author.Family_name' rather than just 'Family_name').")
|
|
868
|
+
|
|
869
|
+
return [self.category + "." + x for x in self._tags]
|
|
870
|
+
|
|
871
|
+
def get_tag(self,
|
|
872
|
+
tags: Optional[Union[str, List[str]]] = None,
|
|
873
|
+
whole_tag: bool = False,
|
|
874
|
+
dict_result: bool = False) -> Union[List[Any], List[Dict[str, Any]]]:
|
|
875
|
+
"""Provided a tag name (or a list of tag names) return the selected tags by row as
|
|
876
|
+
a list of lists. Leave tags unset to fetch all tags.
|
|
877
|
+
|
|
878
|
+
If whole_tag=True return the full tag name along with the tag
|
|
879
|
+
value, or if dict_result=True, as the tag key.
|
|
880
|
+
|
|
881
|
+
If dict_result=True, return the tags as a list of dictionaries
|
|
882
|
+
in which the tag value points to the tag. Uses the specified capitalization
|
|
883
|
+
of the tag unless whole_tag is True, in which case it will use the capitalization
|
|
884
|
+
found in the loop."""
|
|
885
|
+
|
|
886
|
+
# All tags
|
|
887
|
+
if tags is None:
|
|
888
|
+
if not dict_result:
|
|
889
|
+
return self.data
|
|
890
|
+
else:
|
|
891
|
+
tags = self._tags
|
|
892
|
+
# Turn single elements into lists
|
|
893
|
+
if not isinstance(tags, list):
|
|
894
|
+
tags = [tags]
|
|
895
|
+
|
|
896
|
+
# Make a copy of the tags to fetch - don't modify the
|
|
897
|
+
# list that was passed
|
|
898
|
+
lower_tags = deepcopy(tags)
|
|
899
|
+
|
|
900
|
+
# Strip the category if they provide it (also validate
|
|
901
|
+
# it during the process)
|
|
902
|
+
for pos, item in enumerate([str(x) for x in lower_tags]):
|
|
903
|
+
if "." in item and utils.format_category(item).lower() != self.category.lower():
|
|
904
|
+
raise ValueError(f"Cannot fetch data with tag '{item}' because the category does not match the "
|
|
905
|
+
f"category of this loop '{self.category}'.")
|
|
906
|
+
lower_tags[pos] = utils.format_tag_lc(item)
|
|
907
|
+
|
|
908
|
+
# Make a lower case copy of the tags
|
|
909
|
+
tags_lower = [x.lower() for x in self._tags]
|
|
910
|
+
|
|
911
|
+
# Map tag name to tag position in list
|
|
912
|
+
tag_mapping = dict(zip(reversed(tags_lower), reversed(range(len(tags_lower)))))
|
|
913
|
+
|
|
914
|
+
# Make sure their fields are actually present in the entry
|
|
915
|
+
tag_ids = []
|
|
916
|
+
for pos, query in enumerate(lower_tags):
|
|
917
|
+
if str(query) in tag_mapping:
|
|
918
|
+
tag_ids.append(tag_mapping[query])
|
|
919
|
+
elif isinstance(query, int):
|
|
920
|
+
tag_ids.append(query)
|
|
921
|
+
else:
|
|
922
|
+
raise KeyError(f"Could not locate the tag with name or ID: '{tags[pos]}' in loop '{self.category}'.")
|
|
923
|
+
|
|
924
|
+
# First build the tags as a list
|
|
925
|
+
if not dict_result:
|
|
926
|
+
|
|
927
|
+
# Use a list comprehension to pull the correct tags out of the rows
|
|
928
|
+
if whole_tag:
|
|
929
|
+
result = [[[self.category + "." + self._tags[col_id], row[col_id]]
|
|
930
|
+
for col_id in tag_ids] for row in self.data]
|
|
931
|
+
else:
|
|
932
|
+
result = [[row[col_id] for col_id in tag_ids] for row in self.data]
|
|
933
|
+
|
|
934
|
+
# Strip the extra list if only one tag
|
|
935
|
+
if len(lower_tags) == 1:
|
|
936
|
+
return [x[0] for x in result]
|
|
937
|
+
else:
|
|
938
|
+
return result
|
|
939
|
+
# Make a dictionary
|
|
940
|
+
else:
|
|
941
|
+
if whole_tag:
|
|
942
|
+
result = [dict((self.category + "." + self._tags[col_id], row[col_id]) for col_id in tag_ids) for
|
|
943
|
+
row in self.data]
|
|
944
|
+
else:
|
|
945
|
+
result = [dict((tags[pos], row[col_id]) for pos, col_id in enumerate(tag_ids)) for row in self.data]
|
|
946
|
+
|
|
947
|
+
return result
|
|
948
|
+
|
|
949
|
+
def print_tree(self) -> None:
|
|
950
|
+
"""Prints a summary, tree style, of the loop."""
|
|
951
|
+
|
|
952
|
+
print(repr(self))
|
|
953
|
+
|
|
954
|
+
def remove_data_by_tag_value(self, tag: str, value: Any, index_tag: str = None) -> List[List[Any]]:
|
|
955
|
+
"""Removes all rows which contain the provided value in the
|
|
956
|
+
provided tag name. If index_tag is provided, that tag is
|
|
957
|
+
renumbered starting with 1. Returns the deleted rows."""
|
|
958
|
+
|
|
959
|
+
# Make sure the category matches - if provided
|
|
960
|
+
if "." in tag:
|
|
961
|
+
supplied_category = utils.format_category(str(tag))
|
|
962
|
+
if supplied_category.lower() != self.category.lower():
|
|
963
|
+
raise ValueError(f"The category provided in your tag '{supplied_category}' does not match this loop's "
|
|
964
|
+
f"category '{self.category}'.")
|
|
965
|
+
|
|
966
|
+
search_tag = self.tag_index(tag)
|
|
967
|
+
if search_tag is None:
|
|
968
|
+
raise ValueError(f"The tag you provided '{tag}' isn't in this loop!")
|
|
969
|
+
|
|
970
|
+
deleted = []
|
|
971
|
+
|
|
972
|
+
# Delete all rows in which the user-provided tag matched
|
|
973
|
+
cur_row = 0
|
|
974
|
+
while cur_row < len(self.data):
|
|
975
|
+
if self.data[cur_row][search_tag] == value:
|
|
976
|
+
deleted.append(self.data.pop(cur_row))
|
|
977
|
+
continue
|
|
978
|
+
cur_row += 1
|
|
979
|
+
|
|
980
|
+
# Re-number if they so desire
|
|
981
|
+
if index_tag is not None:
|
|
982
|
+
self.renumber_rows(index_tag)
|
|
983
|
+
|
|
984
|
+
return deleted
|
|
985
|
+
|
|
986
|
+
def remove_tag(self, tag: Union[str, List[str]]) -> None:
|
|
987
|
+
"""Removes one or more tags from the loop based on tag name. Also removes any data for the given tag.
|
|
988
|
+
Provide either a tag or list of tags."""
|
|
989
|
+
|
|
990
|
+
if not isinstance(tag, list):
|
|
991
|
+
tag = [tag]
|
|
992
|
+
|
|
993
|
+
# Check if the tags exist first
|
|
994
|
+
for each_tag in tag:
|
|
995
|
+
if self.tag_index(each_tag) is None:
|
|
996
|
+
raise KeyError(f"There is no tag with name '{each_tag}' to remove in loop '{self.category}'.")
|
|
997
|
+
|
|
998
|
+
# Calculate the tag position each time, because it will change as the previous tag is deleted
|
|
999
|
+
for each_tag in tag:
|
|
1000
|
+
tag_position: int = self.tag_index(each_tag)
|
|
1001
|
+
del self._tags[tag_position]
|
|
1002
|
+
for row in self.data:
|
|
1003
|
+
del row[tag_position]
|
|
1004
|
+
|
|
1005
|
+
def renumber_rows(self, index_tag: str, start_value: int = 1, maintain_ordering: bool = False):
|
|
1006
|
+
"""Renumber a given tag incrementally. Set start_value to
|
|
1007
|
+
initial value if 1 is not acceptable. Set maintain_ordering to
|
|
1008
|
+
preserve sequence with offset.
|
|
1009
|
+
|
|
1010
|
+
E.g. 2,3,3,5 would become 1,2,2,4."""
|
|
1011
|
+
|
|
1012
|
+
# Make sure the category matches
|
|
1013
|
+
if "." in str(index_tag):
|
|
1014
|
+
supplied_category = utils.format_category(str(index_tag))
|
|
1015
|
+
if supplied_category.lower() != self.category.lower():
|
|
1016
|
+
raise ValueError(f"Category provided in your tag '{supplied_category}' does not match this loop's "
|
|
1017
|
+
f"category '{self.category}'.")
|
|
1018
|
+
|
|
1019
|
+
# Determine which tag ID to renumber
|
|
1020
|
+
renumber_tag = self.tag_index(index_tag)
|
|
1021
|
+
|
|
1022
|
+
# The tag to replace in is the tag they specify
|
|
1023
|
+
if renumber_tag is None:
|
|
1024
|
+
# Or, perhaps they specified an integer to represent the tag?
|
|
1025
|
+
try:
|
|
1026
|
+
renumber_tag = int(index_tag)
|
|
1027
|
+
except ValueError:
|
|
1028
|
+
raise ValueError(f"The renumbering tag you provided '{index_tag}' isn't in this loop!")
|
|
1029
|
+
|
|
1030
|
+
# Do nothing if we have no data
|
|
1031
|
+
if len(self.data) == 0:
|
|
1032
|
+
return
|
|
1033
|
+
|
|
1034
|
+
# Make sure the tags and data match
|
|
1035
|
+
self._check_tags_match_data()
|
|
1036
|
+
|
|
1037
|
+
if maintain_ordering:
|
|
1038
|
+
# If they have a string buried somewhere in the row, we'll
|
|
1039
|
+
# have to restore the original values
|
|
1040
|
+
data_copy = deepcopy(self.data)
|
|
1041
|
+
offset = 0
|
|
1042
|
+
for pos in range(0, len(self.data)):
|
|
1043
|
+
try:
|
|
1044
|
+
if pos == 0:
|
|
1045
|
+
offset = start_value - int(self.data[0][renumber_tag])
|
|
1046
|
+
new_data = int(self.data[pos][renumber_tag]) + offset
|
|
1047
|
+
|
|
1048
|
+
if isinstance(self.data[pos][renumber_tag], str):
|
|
1049
|
+
self.data[pos][renumber_tag] = str(new_data)
|
|
1050
|
+
else:
|
|
1051
|
+
self.data[pos][renumber_tag] = new_data
|
|
1052
|
+
except ValueError:
|
|
1053
|
+
self.data = data_copy
|
|
1054
|
+
raise ValueError("You can't renumber a row containing anything that can't be coerced into an "
|
|
1055
|
+
"integer using maintain_ordering. I.e. what am I suppose to renumber "
|
|
1056
|
+
f"'{self.data[pos][renumber_tag]}' to?")
|
|
1057
|
+
|
|
1058
|
+
# Simple renumbering algorithm if we don't need to maintain the ordering
|
|
1059
|
+
else:
|
|
1060
|
+
for pos in range(0, len(self.data)):
|
|
1061
|
+
if isinstance(self.data[pos][renumber_tag], str):
|
|
1062
|
+
self.data[pos][renumber_tag] = str(pos + start_value)
|
|
1063
|
+
else:
|
|
1064
|
+
self.data[pos][renumber_tag] = pos + start_value
|
|
1065
|
+
|
|
1066
|
+
def set_category(self, category: str) -> None:
|
|
1067
|
+
""" Set the category of the loop. Useful if you didn't know the
|
|
1068
|
+
category at loop creation time."""
|
|
1069
|
+
|
|
1070
|
+
self.category = utils.format_category(category)
|
|
1071
|
+
|
|
1072
|
+
def sort_tags(self, schema: Schema = None) -> None:
|
|
1073
|
+
""" Rearranges the tag names and data in the loop to match the order
|
|
1074
|
+
from the schema. Uses the BMRB schema unless one is provided."""
|
|
1075
|
+
|
|
1076
|
+
schema = utils.get_schema(schema)
|
|
1077
|
+
current_order = self.get_tag_names()
|
|
1078
|
+
|
|
1079
|
+
# Sort the tags
|
|
1080
|
+
def sort_key(_) -> int:
|
|
1081
|
+
return schema.tag_key(_)
|
|
1082
|
+
|
|
1083
|
+
sorted_order = sorted(current_order, key=sort_key)
|
|
1084
|
+
|
|
1085
|
+
# Don't touch the data if the tags are already in order
|
|
1086
|
+
if sorted_order == current_order:
|
|
1087
|
+
return
|
|
1088
|
+
else:
|
|
1089
|
+
self.data = self.get_tag(sorted_order)
|
|
1090
|
+
self._tags = [utils.format_tag(x) for x in sorted_order]
|
|
1091
|
+
|
|
1092
|
+
def sort_rows(self, tags: Union[str, List[str]], key: Callable = None) -> None:
|
|
1093
|
+
""" Sort the data in the rows by their values for a given tag
|
|
1094
|
+
or tags. Specify the tags using their names or ordinals.
|
|
1095
|
+
Accepts a list or an int/float. By default we will sort
|
|
1096
|
+
numerically. If that fails we do a string sort. Supply a
|
|
1097
|
+
function as key and we will order the elements based on the
|
|
1098
|
+
keys it provides. See the help for sorted() for more details. If
|
|
1099
|
+
you provide multiple tags to sort by, they are interpreted as
|
|
1100
|
+
increasing order of sort priority."""
|
|
1101
|
+
|
|
1102
|
+
# Do nothing if we have no data
|
|
1103
|
+
if len(self.data) == 0:
|
|
1104
|
+
return
|
|
1105
|
+
|
|
1106
|
+
# This will determine how we sort
|
|
1107
|
+
sort_ordinals = []
|
|
1108
|
+
|
|
1109
|
+
if isinstance(tags, list):
|
|
1110
|
+
processing_list = tags
|
|
1111
|
+
else:
|
|
1112
|
+
processing_list = [tags]
|
|
1113
|
+
|
|
1114
|
+
# Process their input to determine which tags to operate on
|
|
1115
|
+
for cur_tag in [str(x) for x in processing_list]:
|
|
1116
|
+
|
|
1117
|
+
# Make sure the category matches
|
|
1118
|
+
if "." in cur_tag:
|
|
1119
|
+
supplied_category = utils.format_category(cur_tag)
|
|
1120
|
+
if supplied_category.lower() != self.category.lower():
|
|
1121
|
+
raise ValueError(f"The category provided in your tag '{supplied_category}' does not match this "
|
|
1122
|
+
f"loop's category '{self.category}'.")
|
|
1123
|
+
|
|
1124
|
+
renumber_tag = self.tag_index(cur_tag)
|
|
1125
|
+
|
|
1126
|
+
# They didn't specify a valid tag
|
|
1127
|
+
if renumber_tag is None:
|
|
1128
|
+
# Perhaps they specified an integer to represent the tag?
|
|
1129
|
+
try:
|
|
1130
|
+
renumber_tag = int(cur_tag)
|
|
1131
|
+
except ValueError:
|
|
1132
|
+
raise ValueError(f"The sorting tag you provided '{cur_tag}' isn't in this loop!")
|
|
1133
|
+
|
|
1134
|
+
sort_ordinals.append(renumber_tag)
|
|
1135
|
+
|
|
1136
|
+
# Do the sort(s)
|
|
1137
|
+
for tag in sort_ordinals:
|
|
1138
|
+
# Going through each tag, first attempt to sort as integer.
|
|
1139
|
+
# Then fallback to string sort.
|
|
1140
|
+
try:
|
|
1141
|
+
if key is None:
|
|
1142
|
+
tmp_data = sorted(self.data, key=lambda _, pos=tag: float(_[pos]))
|
|
1143
|
+
else:
|
|
1144
|
+
tmp_data = sorted(self.data, key=key)
|
|
1145
|
+
except ValueError:
|
|
1146
|
+
if key is None:
|
|
1147
|
+
tmp_data = sorted(self.data, key=lambda _, pos=tag: _[pos])
|
|
1148
|
+
else:
|
|
1149
|
+
tmp_data = sorted(self.data, key=key)
|
|
1150
|
+
self.data = tmp_data
|
|
1151
|
+
|
|
1152
|
+
def tag_index(self, tag_name: str) -> Optional[int]:
|
|
1153
|
+
""" Helper method to do a case-insensitive check for the presence
|
|
1154
|
+
of a given tag in this loop. Returns the index of the tag if found
|
|
1155
|
+
and None if not found.
|
|
1156
|
+
|
|
1157
|
+
This is useful if you need to get the index of a certain tag to
|
|
1158
|
+
iterate through the data and modify it."""
|
|
1159
|
+
|
|
1160
|
+
try:
|
|
1161
|
+
return self._lc_tags[utils.format_tag_lc(str(tag_name))]
|
|
1162
|
+
except KeyError:
|
|
1163
|
+
return None
|
|
1164
|
+
|
|
1165
|
+
def validate(self, validate_schema: bool = True, schema: 'Schema' = None,
|
|
1166
|
+
validate_star: bool = True, category: str = None) -> List[str]:
|
|
1167
|
+
"""Validate a loop in a variety of ways. Returns a list of
|
|
1168
|
+
errors found. 0-length list indicates no errors found. By
|
|
1169
|
+
default all validation modes are enabled.
|
|
1170
|
+
|
|
1171
|
+
validate_schema - Determines if the entry is validated against
|
|
1172
|
+
the NMR-STAR schema. You can pass your own custom schema if desired,
|
|
1173
|
+
otherwise the schema will be fetched from the BMRB servers.
|
|
1174
|
+
|
|
1175
|
+
validate_star - Determines if the STAR syntax checks are ran."""
|
|
1176
|
+
|
|
1177
|
+
errors = []
|
|
1178
|
+
|
|
1179
|
+
if validate_schema:
|
|
1180
|
+
# Get the default schema if we are not passed a schema
|
|
1181
|
+
my_schema = utils.get_schema(schema)
|
|
1182
|
+
|
|
1183
|
+
# Check the data
|
|
1184
|
+
for row_num, row in enumerate(self.data):
|
|
1185
|
+
for pos, datum in enumerate(row):
|
|
1186
|
+
errors.extend(my_schema.val_type(f"{self.category}.{self._tags[pos]}", datum, category=category))
|
|
1187
|
+
|
|
1188
|
+
if validate_star:
|
|
1189
|
+
# Check for wrong data size
|
|
1190
|
+
num_cols = len(self._tags)
|
|
1191
|
+
for row_num, row in enumerate(self.data):
|
|
1192
|
+
# Make sure the width matches
|
|
1193
|
+
if len(row) != num_cols:
|
|
1194
|
+
errors.append(f"Loop '{self.category}' data width does not match it's tag width on "
|
|
1195
|
+
f"row '{row_num}'.")
|
|
1196
|
+
|
|
1197
|
+
return errors
|