biotite 0.39.0__cp310-cp310-win_amd64.whl → 0.40.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +3 -3
- biotite/application/dssp/app.py +18 -18
- biotite/database/rcsb/download.py +19 -14
- biotite/sequence/align/banded.c +258 -237
- biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.c +243 -222
- biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.c +215 -196
- biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cpp +233 -205
- biotite/sequence/align/localgapped.c +258 -237
- biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.c +235 -214
- biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.c +255 -234
- biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.c +274 -253
- biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.c +215 -196
- biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.c +217 -197
- biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.c +215 -195
- biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/codec.c +235 -214
- biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/nj.c +215 -196
- biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.c +227 -202
- biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.c +215 -196
- biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
- biotite/structure/basepairs.py +7 -12
- biotite/structure/bonds.c +1175 -1226
- biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
- biotite/structure/celllist.c +217 -197
- biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
- biotite/structure/charges.c +1052 -1101
- biotite/structure/charges.cp310-win_amd64.pyd +0 -0
- biotite/structure/filter.py +30 -37
- biotite/structure/info/__init__.py +5 -8
- biotite/structure/info/atoms.py +25 -67
- biotite/structure/info/bonds.py +46 -100
- biotite/structure/info/ccd/README.rst +8 -0
- biotite/structure/info/ccd/amino_acids.txt +1646 -0
- biotite/structure/info/ccd/carbohydrates.txt +1133 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +797 -0
- biotite/structure/info/ccd.py +95 -0
- biotite/structure/info/groups.py +90 -0
- biotite/structure/info/masses.py +21 -20
- biotite/structure/info/misc.py +11 -22
- biotite/structure/info/standardize.py +17 -12
- biotite/structure/io/__init__.py +2 -4
- biotite/structure/io/ctab.py +1 -1
- biotite/structure/io/general.py +37 -43
- biotite/structure/io/mmtf/__init__.py +3 -0
- biotite/structure/io/mmtf/convertarray.c +219 -198
- biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.c +217 -197
- biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.c +225 -204
- biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.c +215 -196
- biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/file.py +34 -26
- biotite/structure/io/npz/__init__.py +3 -0
- biotite/structure/io/npz/file.py +21 -18
- biotite/structure/io/pdb/__init__.py +3 -3
- biotite/structure/io/pdb/file.py +5 -3
- biotite/structure/io/pdb/hybrid36.c +63 -43
- biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +13 -6
- biotite/structure/io/pdbx/bcif.py +649 -0
- biotite/structure/io/pdbx/cif.py +1028 -0
- biotite/structure/io/pdbx/component.py +243 -0
- biotite/structure/io/pdbx/convert.py +707 -359
- biotite/structure/io/pdbx/encoding.c +112813 -0
- biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/error.py +14 -0
- biotite/structure/io/pdbx/legacy.py +267 -0
- biotite/structure/molecules.py +151 -151
- biotite/structure/sasa.c +215 -196
- biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
- biotite/structure/superimpose.py +158 -115
- {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/METADATA +2 -2
- {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/RECORD +92 -90
- {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/WHEEL +1 -1
- biotite/structure/info/amino_acids.json +0 -1556
- biotite/structure/info/amino_acids.py +0 -42
- biotite/structure/info/carbohydrates.json +0 -1122
- biotite/structure/info/carbohydrates.py +0 -39
- biotite/structure/info/intra_bonds.msgpack +0 -0
- biotite/structure/info/link_types.msgpack +0 -1
- biotite/structure/info/nucleotides.json +0 -772
- biotite/structure/info/nucleotides.py +0 -39
- biotite/structure/info/residue_masses.msgpack +0 -0
- biotite/structure/info/residue_names.msgpack +0 -3
- biotite/structure/info/residues.msgpack +0 -0
- biotite/structure/io/pdbx/file.py +0 -652
- {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/LICENSE.rst +0 -0
- {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/top_level.txt +0 -0
|
Binary file
|
|
@@ -1,652 +0,0 @@
|
|
|
1
|
-
# This source code is part of the Biotite package and is distributed
|
|
2
|
-
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
-
# information.
|
|
4
|
-
|
|
5
|
-
__name__ = "biotite.structure.io.pdbx"
|
|
6
|
-
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = ["PDBxFile"]
|
|
8
|
-
|
|
9
|
-
import copy
|
|
10
|
-
import shlex
|
|
11
|
-
from collections.abc import MutableMapping
|
|
12
|
-
import numpy as np
|
|
13
|
-
from ....file import TextFile, InvalidFileError
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class PDBxFile(TextFile, MutableMapping):
|
|
17
|
-
"""
|
|
18
|
-
This class represents a PDBx/mmCIF file.
|
|
19
|
-
|
|
20
|
-
The categories of the file can be accessed using the
|
|
21
|
-
:meth:`get_category()`/:meth:`set_category()` methods.
|
|
22
|
-
The content of each category is represented by a dictionary.
|
|
23
|
-
The dictionary contains the entry
|
|
24
|
-
(e.g. *label_entity_id* in *atom_site*) as key.
|
|
25
|
-
The corresponding values are either strings in *non-looped*
|
|
26
|
-
categories, or 1-D numpy arrays of string objects in case of
|
|
27
|
-
*looped* categories.
|
|
28
|
-
|
|
29
|
-
A category can be changed or added using :meth:`set_category()`:
|
|
30
|
-
If a string-valued dictionary is provided, a *non-looped* category
|
|
31
|
-
will be created; if an array-valued dictionary is given, a
|
|
32
|
-
*looped* category will be created. In case of arrays, it is
|
|
33
|
-
important that all arrays have the same size.
|
|
34
|
-
|
|
35
|
-
Alternatively, The content of this file can also be read/write
|
|
36
|
-
accessed using dictionary-like indexing:
|
|
37
|
-
You can either provide a data block and a category or only a
|
|
38
|
-
category, in which case the first data block is taken.
|
|
39
|
-
|
|
40
|
-
Notes
|
|
41
|
-
-----
|
|
42
|
-
This class is also able to detect and parse multiline entries in the
|
|
43
|
-
file. However, when writing a category no multiline values are used.
|
|
44
|
-
This could lead to long lines.
|
|
45
|
-
|
|
46
|
-
This class uses a lazy category dictionary creation: When reading
|
|
47
|
-
the file only the line positions of all categories are checked. The
|
|
48
|
-
time consuming task of dictionary creation is done when
|
|
49
|
-
:meth:`get_category()` is called.
|
|
50
|
-
|
|
51
|
-
Examples
|
|
52
|
-
--------
|
|
53
|
-
Read the file and get author names:
|
|
54
|
-
|
|
55
|
-
>>> import os.path
|
|
56
|
-
>>> file = PDBxFile.read(os.path.join(path_to_structures, "1l2y.cif"))
|
|
57
|
-
>>> author_dict = file.get_category("citation_author", block="1L2Y")
|
|
58
|
-
>>> print(author_dict["name"])
|
|
59
|
-
['Neidigh, J.W.' 'Fesinmeyer, R.M.' 'Andersen, N.H.']
|
|
60
|
-
|
|
61
|
-
Dictionary style indexing, no specification of data block:
|
|
62
|
-
|
|
63
|
-
>>> print(file["citation_author"]["name"])
|
|
64
|
-
['Neidigh, J.W.' 'Fesinmeyer, R.M.' 'Andersen, N.H.']
|
|
65
|
-
|
|
66
|
-
Get the structure from the file:
|
|
67
|
-
|
|
68
|
-
>>> arr = get_structure(file)
|
|
69
|
-
>>> print(type(arr).__name__)
|
|
70
|
-
AtomArrayStack
|
|
71
|
-
>>> arr = get_structure(file, model=1)
|
|
72
|
-
>>> print(type(arr).__name__)
|
|
73
|
-
AtomArray
|
|
74
|
-
|
|
75
|
-
Modify atom array and write it back into the file:
|
|
76
|
-
|
|
77
|
-
>>> arr_mod = rotate(arr, [1,2,3])
|
|
78
|
-
>>> set_structure(file, arr_mod)
|
|
79
|
-
>>> file.write(os.path.join(path_to_directory, "1l2y_mod.cif"))
|
|
80
|
-
"""
|
|
81
|
-
|
|
82
|
-
def __init__(self):
|
|
83
|
-
super().__init__()
|
|
84
|
-
# This dictionary saves the PDBx category names,
|
|
85
|
-
# together with its line position in the file
|
|
86
|
-
# and the data_block it is in
|
|
87
|
-
self._categories = {}
|
|
88
|
-
|
|
89
|
-
@classmethod
|
|
90
|
-
def read(cls, file):
|
|
91
|
-
"""
|
|
92
|
-
Read a PDBx/mmCIF file.
|
|
93
|
-
|
|
94
|
-
Parameters
|
|
95
|
-
----------
|
|
96
|
-
file : file-like object or str
|
|
97
|
-
The file to be read.
|
|
98
|
-
Alternatively a file path can be supplied.
|
|
99
|
-
|
|
100
|
-
Returns
|
|
101
|
-
-------
|
|
102
|
-
file_object : PDBxFile
|
|
103
|
-
The parsed file.
|
|
104
|
-
"""
|
|
105
|
-
file = super().read(file)
|
|
106
|
-
# Remove emptyline at then end of file, if present
|
|
107
|
-
if file.lines[-1] == "":
|
|
108
|
-
del file.lines[-1]
|
|
109
|
-
|
|
110
|
-
current_category = None
|
|
111
|
-
start = -1
|
|
112
|
-
stop = -1
|
|
113
|
-
is_loop = False
|
|
114
|
-
has_multiline_values = False
|
|
115
|
-
for i, line in enumerate(file.lines):
|
|
116
|
-
# Ignore empty and comment lines
|
|
117
|
-
if not _is_empty(line):
|
|
118
|
-
data_block_name = _data_block_name(line)
|
|
119
|
-
if data_block_name is not None:
|
|
120
|
-
data_block = data_block_name
|
|
121
|
-
# If new data block begins, reset category data
|
|
122
|
-
current_category = None
|
|
123
|
-
start = -1
|
|
124
|
-
stop = -1
|
|
125
|
-
is_loop = False
|
|
126
|
-
has_multiline_values = False
|
|
127
|
-
|
|
128
|
-
is_loop_in_line = _is_loop_start(line)
|
|
129
|
-
category_in_line = _get_category_name(line)
|
|
130
|
-
if is_loop_in_line or (
|
|
131
|
-
category_in_line != current_category
|
|
132
|
-
and category_in_line is not None
|
|
133
|
-
):
|
|
134
|
-
# Start of a new category
|
|
135
|
-
# Add an entry into the dictionary with the old category
|
|
136
|
-
stop = i
|
|
137
|
-
file._add_category(
|
|
138
|
-
data_block,
|
|
139
|
-
current_category,
|
|
140
|
-
start,
|
|
141
|
-
stop,
|
|
142
|
-
is_loop,
|
|
143
|
-
has_multiline_values,
|
|
144
|
-
)
|
|
145
|
-
# Track the new category
|
|
146
|
-
if is_loop_in_line:
|
|
147
|
-
# In case of lines with "loop_" the category is in the
|
|
148
|
-
# next line
|
|
149
|
-
category_in_line = _get_category_name(
|
|
150
|
-
file.lines[i + 1]
|
|
151
|
-
)
|
|
152
|
-
is_loop = is_loop_in_line
|
|
153
|
-
current_category = category_in_line
|
|
154
|
-
start = i
|
|
155
|
-
has_multiline_values = False
|
|
156
|
-
|
|
157
|
-
multiline = _is_multi(line, is_loop)
|
|
158
|
-
if multiline:
|
|
159
|
-
has_multiline_values = True
|
|
160
|
-
# Add the entry for the final category
|
|
161
|
-
# Since at the end of the file the end of the category
|
|
162
|
-
# is not determined by the start of a new one,
|
|
163
|
-
# this needs to be handled separately
|
|
164
|
-
stop = len(file.lines)
|
|
165
|
-
file._add_category(
|
|
166
|
-
data_block,
|
|
167
|
-
current_category,
|
|
168
|
-
start,
|
|
169
|
-
stop,
|
|
170
|
-
is_loop,
|
|
171
|
-
has_multiline_values,
|
|
172
|
-
)
|
|
173
|
-
return file
|
|
174
|
-
|
|
175
|
-
def get_block_names(self):
|
|
176
|
-
"""
|
|
177
|
-
Get the names of all data blocks in the file.
|
|
178
|
-
|
|
179
|
-
Returns
|
|
180
|
-
-------
|
|
181
|
-
blocks : list
|
|
182
|
-
List of data block names.
|
|
183
|
-
"""
|
|
184
|
-
blocks = set()
|
|
185
|
-
for category_tuple in self._categories.keys():
|
|
186
|
-
block, _ = category_tuple
|
|
187
|
-
blocks.add(block)
|
|
188
|
-
return sorted(blocks)
|
|
189
|
-
|
|
190
|
-
def get_category(self, category, block=None, expect_looped=False):
|
|
191
|
-
"""
|
|
192
|
-
Get the dictionary for a given category.
|
|
193
|
-
|
|
194
|
-
Parameters
|
|
195
|
-
----------
|
|
196
|
-
category : string
|
|
197
|
-
The name of the category. The leading underscore is omitted.
|
|
198
|
-
block : string, optional
|
|
199
|
-
The name of the data block. Default is the first
|
|
200
|
-
(and most times only) data block of the file.
|
|
201
|
-
expect_looped : bool, optional
|
|
202
|
-
If set to true, the returned dictionary will always contain
|
|
203
|
-
arrays (only if the category exists):
|
|
204
|
-
If the category is *non-looped*, each array will contain
|
|
205
|
-
only one element.
|
|
206
|
-
|
|
207
|
-
Returns
|
|
208
|
-
-------
|
|
209
|
-
category_dict : dict of (str or ndarray, dtype=str) or None
|
|
210
|
-
A entry keyed dictionary. The corresponding values are
|
|
211
|
-
strings or array of strings for *non-looped* and
|
|
212
|
-
*looped* categories, respectively.
|
|
213
|
-
Returns None, if the data block does not contain the given
|
|
214
|
-
category.
|
|
215
|
-
"""
|
|
216
|
-
if block is None:
|
|
217
|
-
try:
|
|
218
|
-
block = self.get_block_names()[0]
|
|
219
|
-
except IndexError:
|
|
220
|
-
raise InvalidFileError("File is empty")
|
|
221
|
-
category_info = self._categories.get((block, category))
|
|
222
|
-
if category_info is None:
|
|
223
|
-
return None
|
|
224
|
-
start = category_info["start"]
|
|
225
|
-
stop = category_info["stop"]
|
|
226
|
-
is_loop = category_info["loop"]
|
|
227
|
-
is_multilined = category_info["multiline"]
|
|
228
|
-
|
|
229
|
-
if is_multilined:
|
|
230
|
-
# Convert multiline values into singleline values
|
|
231
|
-
prelines = [
|
|
232
|
-
line.strip()
|
|
233
|
-
for line in self.lines[start:stop]
|
|
234
|
-
if not _is_empty(line) and not _is_loop_start(line)
|
|
235
|
-
]
|
|
236
|
-
lines = (len(prelines)) * [None]
|
|
237
|
-
# lines index
|
|
238
|
-
k = 0
|
|
239
|
-
# prelines index
|
|
240
|
-
i = 0
|
|
241
|
-
while i < len(prelines):
|
|
242
|
-
if prelines[i][0] == ";":
|
|
243
|
-
# multiline values
|
|
244
|
-
multi_line_str = prelines[i][1:]
|
|
245
|
-
j = i + 1
|
|
246
|
-
while prelines[j] != ";":
|
|
247
|
-
multi_line_str += prelines[j]
|
|
248
|
-
j += 1
|
|
249
|
-
lines[k - 1] += " " + shlex.quote(multi_line_str)
|
|
250
|
-
i = j + 1
|
|
251
|
-
elif not is_loop and prelines[i][0] in ["'", '"']:
|
|
252
|
-
# Singleline values where value is in the line
|
|
253
|
-
# after the corresponding key
|
|
254
|
-
lines[k - 1] += " " + prelines[i]
|
|
255
|
-
i += 1
|
|
256
|
-
else:
|
|
257
|
-
# Normal singleline value in the same row as the key
|
|
258
|
-
lines[k] = prelines[i]
|
|
259
|
-
i += 1
|
|
260
|
-
k += 1
|
|
261
|
-
lines = [line for line in lines if line is not None]
|
|
262
|
-
|
|
263
|
-
else:
|
|
264
|
-
lines = [
|
|
265
|
-
line.strip()
|
|
266
|
-
for line in self.lines[start:stop]
|
|
267
|
-
if not _is_empty(line) and not _is_loop_start(line)
|
|
268
|
-
]
|
|
269
|
-
|
|
270
|
-
if is_loop:
|
|
271
|
-
# Special optimization for "atom_site":
|
|
272
|
-
# Even if the values are quote protected,
|
|
273
|
-
# no whitespace is expected in escaped values
|
|
274
|
-
# Therefore slow shlex.split() call is not necessary
|
|
275
|
-
if category == "atom_site":
|
|
276
|
-
whitespace_values = False
|
|
277
|
-
else:
|
|
278
|
-
whitespace_values = True
|
|
279
|
-
category_dict = _process_looped(lines, whitespace_values)
|
|
280
|
-
else:
|
|
281
|
-
category_dict = _process_singlevalued(lines)
|
|
282
|
-
|
|
283
|
-
if expect_looped and not is_loop:
|
|
284
|
-
category_dict = {
|
|
285
|
-
key: np.array([val], dtype=object)
|
|
286
|
-
for key, val in category_dict.items()
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
return category_dict
|
|
290
|
-
|
|
291
|
-
def set_category(self, category, category_dict, block=None):
|
|
292
|
-
"""
|
|
293
|
-
Set the content of a category.
|
|
294
|
-
|
|
295
|
-
If the category is already exisiting, all lines corresponding
|
|
296
|
-
to the category are replaced. Otherwise a new category is
|
|
297
|
-
created and the lines are appended at the end of the data block.
|
|
298
|
-
|
|
299
|
-
Parameters
|
|
300
|
-
----------
|
|
301
|
-
category : string
|
|
302
|
-
The name of the category. The leading underscore is omitted.
|
|
303
|
-
category_dict : dict
|
|
304
|
-
The category content. The dictionary must have strings
|
|
305
|
-
(subcategories) as keys and strings or :class:`ndarray`
|
|
306
|
-
objects as values.
|
|
307
|
-
block : string, optional
|
|
308
|
-
The name of the data block. Default is the first
|
|
309
|
-
(and most times only) data block of the file. If the
|
|
310
|
-
block is not contained in the file yet, a new block is
|
|
311
|
-
appended at the end of the file.
|
|
312
|
-
"""
|
|
313
|
-
if block is None:
|
|
314
|
-
try:
|
|
315
|
-
block = self.get_block_names()[0]
|
|
316
|
-
except IndexError:
|
|
317
|
-
raise InvalidFileError(
|
|
318
|
-
"File is empty, give an explicit data block"
|
|
319
|
-
)
|
|
320
|
-
|
|
321
|
-
# Determine whether the category is a looped category
|
|
322
|
-
sample_category_value = list(category_dict.values())[0]
|
|
323
|
-
if isinstance(sample_category_value, (np.ndarray, list)):
|
|
324
|
-
is_looped = True
|
|
325
|
-
# Check whether all arrays have the same length
|
|
326
|
-
arr_len = len(list(category_dict.values())[0])
|
|
327
|
-
for subcat, array in category_dict.items():
|
|
328
|
-
if len(array) != arr_len:
|
|
329
|
-
raise ValueError(
|
|
330
|
-
f"Length of Subcategory '{subcat}' is {len(array)}, "
|
|
331
|
-
f" but {arr_len} was expected"
|
|
332
|
-
)
|
|
333
|
-
else:
|
|
334
|
-
is_looped = False
|
|
335
|
-
|
|
336
|
-
# Sanitize dictionary
|
|
337
|
-
# -> convert to string
|
|
338
|
-
# -> replace empty values with '.'
|
|
339
|
-
category_dict = copy.deepcopy(category_dict)
|
|
340
|
-
if is_looped:
|
|
341
|
-
for subcat, value in category_dict.items():
|
|
342
|
-
array = np.asarray(value)
|
|
343
|
-
# Cast array if its data type is not a Unicode string
|
|
344
|
-
if array.dtype.kind != "U":
|
|
345
|
-
array = array.astype(str)
|
|
346
|
-
array = np.char.strip(array)
|
|
347
|
-
array[array == ""] = "."
|
|
348
|
-
category_dict[subcat] = array
|
|
349
|
-
else:
|
|
350
|
-
for subcat, value in category_dict.items():
|
|
351
|
-
value = str(value)
|
|
352
|
-
value = value if value != "" else "."
|
|
353
|
-
category_dict[subcat] = str(value)
|
|
354
|
-
|
|
355
|
-
# Value arrays (looped categories) can be modified (e.g. quoted)
|
|
356
|
-
# Hence make a copy to avoid unwanted side effects
|
|
357
|
-
# due to modification of input values
|
|
358
|
-
if is_looped:
|
|
359
|
-
category_dict = {
|
|
360
|
-
key: val.copy() for key, val in category_dict.items()
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
# Enclose values with quotes if required
|
|
364
|
-
for key, value in category_dict.items():
|
|
365
|
-
if is_looped:
|
|
366
|
-
# Since value is a numpy string array with fixed size,
|
|
367
|
-
# we need to convert it as a list before using _quote
|
|
368
|
-
category_dict[key] = np.asarray(
|
|
369
|
-
[_quote(item) for item in value.tolist()]
|
|
370
|
-
)
|
|
371
|
-
else:
|
|
372
|
-
category_dict[key] = _quote(value)
|
|
373
|
-
|
|
374
|
-
if is_looped:
|
|
375
|
-
keylines = [
|
|
376
|
-
"_" + category + "." + key + " "
|
|
377
|
-
for key in category_dict.keys()
|
|
378
|
-
]
|
|
379
|
-
value_arr = list(category_dict.values())
|
|
380
|
-
# Array containing the number of characters + whitespace
|
|
381
|
-
# of each column
|
|
382
|
-
col_lens = np.zeros(len(value_arr), dtype=int)
|
|
383
|
-
for i, column in enumerate(value_arr):
|
|
384
|
-
col_len = 0
|
|
385
|
-
for value in column:
|
|
386
|
-
if len(value) > col_len:
|
|
387
|
-
col_len = len(value)
|
|
388
|
-
# Length of column is max value length
|
|
389
|
-
# +1 whitespace character as separator
|
|
390
|
-
col_lens[i] = col_len + 1
|
|
391
|
-
valuelines = [""] * arr_len
|
|
392
|
-
for i in range(arr_len):
|
|
393
|
-
for j, arr in enumerate(value_arr):
|
|
394
|
-
valuelines[i] += arr[i] + " " * (col_lens[j] - len(arr[i]))
|
|
395
|
-
newlines = ["loop_"] + keylines + valuelines
|
|
396
|
-
|
|
397
|
-
else:
|
|
398
|
-
# For better readability, not only one space is inserted
|
|
399
|
-
# after each key, but as much spaces that every value starts
|
|
400
|
-
# at the same position in the line
|
|
401
|
-
max_len = 0
|
|
402
|
-
for key in category_dict.keys():
|
|
403
|
-
if len(key) > max_len:
|
|
404
|
-
max_len = len(key)
|
|
405
|
-
# "+3" Because of three whitespace chars after longest key
|
|
406
|
-
req_len = max_len + 3
|
|
407
|
-
newlines = [
|
|
408
|
-
"_" + category + "." + key + " " * (req_len - len(key)) + value
|
|
409
|
-
for key, value in category_dict.items()
|
|
410
|
-
]
|
|
411
|
-
|
|
412
|
-
# A comment line is set after every category
|
|
413
|
-
newlines += ["#"]
|
|
414
|
-
|
|
415
|
-
if (block, category) in self._categories:
|
|
416
|
-
# Category already exists in data block
|
|
417
|
-
category_info = self._categories[(block, category)]
|
|
418
|
-
# Insertion point of new lines
|
|
419
|
-
old_category_start = category_info["start"]
|
|
420
|
-
old_category_stop = category_info["stop"]
|
|
421
|
-
category_start = old_category_start
|
|
422
|
-
# Difference between number of lines of the old and new category
|
|
423
|
-
len_diff = len(newlines) - (old_category_stop - old_category_start)
|
|
424
|
-
# Remove old category content
|
|
425
|
-
del self.lines[old_category_start:old_category_stop]
|
|
426
|
-
# Insert new lines at category start
|
|
427
|
-
self.lines[category_start:category_start] = newlines
|
|
428
|
-
# Update category info
|
|
429
|
-
category_info["start"] = category_start
|
|
430
|
-
category_info["stop"] = category_start + len(newlines)
|
|
431
|
-
# When writing a category no multiline values are used
|
|
432
|
-
category_info["multiline"] = False
|
|
433
|
-
category_info["loop"] = is_looped
|
|
434
|
-
elif block in self.get_block_names():
|
|
435
|
-
# Data block exists but not the category
|
|
436
|
-
# Find last category in the block
|
|
437
|
-
# and set start of new category to stop of last category
|
|
438
|
-
last_stop = 0
|
|
439
|
-
for category_tuple, category_info in self._categories.items():
|
|
440
|
-
if block == category_tuple[0]:
|
|
441
|
-
if last_stop < category_info["stop"]:
|
|
442
|
-
last_stop = category_info["stop"]
|
|
443
|
-
category_start = last_stop
|
|
444
|
-
category_stop = category_start + len(newlines)
|
|
445
|
-
len_diff = len(newlines)
|
|
446
|
-
self.lines[category_start:category_start] = newlines
|
|
447
|
-
self._add_category(
|
|
448
|
-
block,
|
|
449
|
-
category,
|
|
450
|
-
category_start,
|
|
451
|
-
category_stop,
|
|
452
|
-
is_looped,
|
|
453
|
-
is_multilined=False,
|
|
454
|
-
)
|
|
455
|
-
else:
|
|
456
|
-
# The data block does not exist
|
|
457
|
-
# Put the begin of data block in front of newlines
|
|
458
|
-
newlines = ["data_" + block, "#"] + newlines
|
|
459
|
-
# Find last category in the file
|
|
460
|
-
# and set start of new data_block with new category
|
|
461
|
-
# to stop of last category
|
|
462
|
-
last_stop = 0
|
|
463
|
-
for category_info in self._categories.values():
|
|
464
|
-
if last_stop < category_info["stop"]:
|
|
465
|
-
last_stop = category_info["stop"]
|
|
466
|
-
category_start = last_stop + 2
|
|
467
|
-
category_stop = last_stop + len(newlines)
|
|
468
|
-
len_diff = len(newlines) - 2
|
|
469
|
-
self.lines[last_stop:last_stop] = newlines
|
|
470
|
-
self._add_category(
|
|
471
|
-
block,
|
|
472
|
-
category,
|
|
473
|
-
category_start,
|
|
474
|
-
category_stop,
|
|
475
|
-
is_looped,
|
|
476
|
-
is_multilined=False,
|
|
477
|
-
)
|
|
478
|
-
# Update start and stop of all categories appearing after the
|
|
479
|
-
# changed/added category
|
|
480
|
-
for category_info in self._categories.values():
|
|
481
|
-
if category_info["start"] > category_start:
|
|
482
|
-
category_info["start"] += len_diff
|
|
483
|
-
category_info["stop"] += len_diff
|
|
484
|
-
|
|
485
|
-
def __copy_fill__(self, clone):
|
|
486
|
-
super().__copy_fill__(clone)
|
|
487
|
-
clone._categories = copy.deepcopy(self._categories)
|
|
488
|
-
|
|
489
|
-
def __setitem__(self, index, item):
|
|
490
|
-
block, category_name = self._full_index(index)
|
|
491
|
-
self.set_category(category_name, item, block=block)
|
|
492
|
-
|
|
493
|
-
def __getitem__(self, index):
|
|
494
|
-
block, category_name = self._full_index(index)
|
|
495
|
-
return self.get_category(category_name, block=block)
|
|
496
|
-
|
|
497
|
-
def __delitem__(self, index):
|
|
498
|
-
block, category_name = self._full_index(index)
|
|
499
|
-
category_info = self._categories[(block, category_name)]
|
|
500
|
-
# Insertion point of new lines
|
|
501
|
-
category_start = category_info["start"]
|
|
502
|
-
category_stop = category_info["stop"]
|
|
503
|
-
del self.lines[category_start:category_stop]
|
|
504
|
-
# Update start and stop of all categories appearing after the
|
|
505
|
-
# deleted category
|
|
506
|
-
len_diff = category_stop - category_start
|
|
507
|
-
for category_info in self._categories.values():
|
|
508
|
-
if category_info["start"] > category_start:
|
|
509
|
-
category_info["start"] -= len_diff
|
|
510
|
-
category_info["stop"] -= len_diff
|
|
511
|
-
|
|
512
|
-
def __contains__(self, index):
|
|
513
|
-
block, category_name = self._full_index(index)
|
|
514
|
-
return (block, category_name) in self._categories
|
|
515
|
-
|
|
516
|
-
def __iter__(self):
|
|
517
|
-
return self._categories.__iter__()
|
|
518
|
-
|
|
519
|
-
def __len__(self):
|
|
520
|
-
return len(self._categories)
|
|
521
|
-
|
|
522
|
-
def _full_index(self, index):
|
|
523
|
-
"""
|
|
524
|
-
Converts a an integer or tuple index into a block and a category
|
|
525
|
-
name.
|
|
526
|
-
"""
|
|
527
|
-
if isinstance(index, tuple):
|
|
528
|
-
return index[0], index[1]
|
|
529
|
-
elif isinstance(index, str):
|
|
530
|
-
return self.get_block_names()[0], index
|
|
531
|
-
else:
|
|
532
|
-
raise TypeError(
|
|
533
|
-
f"'{type(index).__name__}' is an invalid index type"
|
|
534
|
-
)
|
|
535
|
-
|
|
536
|
-
def _add_category(
|
|
537
|
-
self, block, category_name, start, stop, is_loop, is_multilined
|
|
538
|
-
):
|
|
539
|
-
# Before the first category starts,
|
|
540
|
-
# the current_category is None
|
|
541
|
-
# This is checked before adding an entry
|
|
542
|
-
if category_name is not None:
|
|
543
|
-
self._categories[(block, category_name)] = {
|
|
544
|
-
"start": start,
|
|
545
|
-
"stop": stop,
|
|
546
|
-
"loop": is_loop,
|
|
547
|
-
"multiline": is_multilined,
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
def _process_singlevalued(lines):
|
|
552
|
-
category_dict = {}
|
|
553
|
-
i = 0
|
|
554
|
-
while i < len(lines):
|
|
555
|
-
parts = shlex.split(lines[i])
|
|
556
|
-
key = parts[0].split(".")[1]
|
|
557
|
-
if len(parts) > 1:
|
|
558
|
-
value = parts[1]
|
|
559
|
-
else:
|
|
560
|
-
# The value is not in the same line,
|
|
561
|
-
# but in the following one
|
|
562
|
-
i += 1
|
|
563
|
-
value = shlex.split(lines[i])[0]
|
|
564
|
-
category_dict[key] = value
|
|
565
|
-
i += 1
|
|
566
|
-
return category_dict
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
def _process_looped(lines, whitepace_values):
|
|
570
|
-
category_dict = {}
|
|
571
|
-
keys = []
|
|
572
|
-
# Array index
|
|
573
|
-
i = 0
|
|
574
|
-
# Dictionary key index
|
|
575
|
-
j = 0
|
|
576
|
-
for line in lines:
|
|
577
|
-
if line[0] == "_":
|
|
578
|
-
# Key line
|
|
579
|
-
key = line.split(".")[1]
|
|
580
|
-
keys.append(key)
|
|
581
|
-
# Pessimistic array allocation
|
|
582
|
-
# numpy array filled with strings
|
|
583
|
-
category_dict[key] = np.zeros(len(lines), dtype=object)
|
|
584
|
-
keys_length = len(keys)
|
|
585
|
-
else:
|
|
586
|
-
# If whitespace is expected in quote protected values,
|
|
587
|
-
# use standard shlex split
|
|
588
|
-
# Otherwise use much more faster whitespace split
|
|
589
|
-
# and quote removal if applicable,
|
|
590
|
-
# bypassing the slow shlex module
|
|
591
|
-
if whitepace_values:
|
|
592
|
-
values = shlex.split(line)
|
|
593
|
-
else:
|
|
594
|
-
values = line.split()
|
|
595
|
-
for k in range(len(values)):
|
|
596
|
-
# Remove quotes
|
|
597
|
-
if (values[k][0] == '"' and values[k][-1] == '"') or (
|
|
598
|
-
values[k][0] == "'" and values[k][-1] == "'"
|
|
599
|
-
):
|
|
600
|
-
values[k] = values[k][1:-1]
|
|
601
|
-
for value in values:
|
|
602
|
-
category_dict[keys[j]][i] = value
|
|
603
|
-
j += 1
|
|
604
|
-
if j == keys_length:
|
|
605
|
-
# If all keys have been filled with a value,
|
|
606
|
-
# restart with first key with incremented index
|
|
607
|
-
j = 0
|
|
608
|
-
i += 1
|
|
609
|
-
for key in category_dict.keys():
|
|
610
|
-
# Trim to correct size
|
|
611
|
-
category_dict[key] = category_dict[key][:i]
|
|
612
|
-
return category_dict
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
def _is_empty(line):
|
|
616
|
-
return len(line) == 0 or line[0] == "#"
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
def _data_block_name(line):
|
|
620
|
-
if line.startswith("data_"):
|
|
621
|
-
return line[5:]
|
|
622
|
-
else:
|
|
623
|
-
return None
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
def _is_loop_start(line):
|
|
627
|
-
return line.startswith("loop_")
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
def _is_multi(line, is_loop):
|
|
631
|
-
if is_loop:
|
|
632
|
-
return line[0] == ";"
|
|
633
|
-
else:
|
|
634
|
-
return line[0] in [";", "'", '"']
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
def _get_category_name(line):
|
|
638
|
-
if line[0] != "_":
|
|
639
|
-
return None
|
|
640
|
-
else:
|
|
641
|
-
return line[1 : line.find(".")]
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
def _quote(value):
|
|
645
|
-
if "'" in value:
|
|
646
|
-
return '"' + value + '"'
|
|
647
|
-
elif '"' in value:
|
|
648
|
-
return "'" + value + "'"
|
|
649
|
-
elif " " in value:
|
|
650
|
-
return "'" + value + "'"
|
|
651
|
-
else:
|
|
652
|
-
return value
|
|
File without changes
|
|
File without changes
|