biotite 0.41.1__cp310-cp310-win_amd64.whl → 1.0.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +36 -10
- biotite/application/application.py +22 -11
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +16 -5
- biotite/sequence/align/__init__.py +160 -6
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +35 -35
- biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +112 -126
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +64 -64
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +226 -240
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +88 -100
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cp310-win_amd64.pyd +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +21 -7
- biotite/structure/info/groups.py +10 -15
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -52
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/METADATA +6 -6
- biotite-1.0.0.dist-info/RECORD +322 -0
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/WHEEL +1 -1
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.1.dist-info/RECORD +0 -340
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/io/mol/ctab.py
CHANGED
|
@@ -12,13 +12,13 @@ __author__ = "Patrick Kunzmann"
|
|
|
12
12
|
__all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]
|
|
13
13
|
|
|
14
14
|
import itertools
|
|
15
|
-
import warnings
|
|
16
15
|
import shlex
|
|
16
|
+
import warnings
|
|
17
17
|
import numpy as np
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
18
|
+
from biotite.file import InvalidFileError
|
|
19
|
+
from biotite.structure.atoms import AtomArray, AtomArrayStack
|
|
20
|
+
from biotite.structure.bonds import BondList, BondType
|
|
21
|
+
from biotite.structure.error import BadStructureError
|
|
22
22
|
|
|
23
23
|
BOND_TYPE_MAPPING = {
|
|
24
24
|
1: BondType.SINGLE,
|
|
@@ -84,8 +84,7 @@ def read_structure_from_ctab(ctab_lines):
|
|
|
84
84
|
raise InvalidFileError(f"Unknown CTAB version '{unkown_version}'")
|
|
85
85
|
|
|
86
86
|
|
|
87
|
-
def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY,
|
|
88
|
-
version=None):
|
|
87
|
+
def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY, version=None):
|
|
89
88
|
"""
|
|
90
89
|
Convert an :class:`AtomArray` into a
|
|
91
90
|
*MDL* connection table (Ctab).
|
|
@@ -124,8 +123,7 @@ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY,
|
|
|
124
123
|
"""
|
|
125
124
|
if isinstance(atoms, AtomArrayStack):
|
|
126
125
|
raise TypeError(
|
|
127
|
-
"An 'AtomArrayStack' was given, "
|
|
128
|
-
"but only a single model can be written"
|
|
126
|
+
"An 'AtomArrayStack' was given, " "but only a single model can be written"
|
|
129
127
|
)
|
|
130
128
|
if atoms.bonds is None:
|
|
131
129
|
raise BadStructureError("Input AtomArray has no associated BondList")
|
|
@@ -134,9 +132,7 @@ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY,
|
|
|
134
132
|
|
|
135
133
|
match version:
|
|
136
134
|
case None:
|
|
137
|
-
if _is_v2000_compatible(
|
|
138
|
-
atoms.array_length(), atoms.bonds.get_bond_count()
|
|
139
|
-
):
|
|
135
|
+
if _is_v2000_compatible(atoms.array_length(), atoms.bonds.get_bond_count()):
|
|
140
136
|
return _write_structure_to_ctab_v2000(atoms, default_bond_type)
|
|
141
137
|
else:
|
|
142
138
|
return _write_structure_to_ctab_v3000(atoms, default_bond_type)
|
|
@@ -160,7 +156,8 @@ def _read_structure_from_ctab_v2000(ctab_lines):
|
|
|
160
156
|
atom_lines = ctab_lines[1 : 1 + n_atoms]
|
|
161
157
|
bond_lines = ctab_lines[1 + n_atoms : 1 + n_atoms + n_bonds]
|
|
162
158
|
charge_lines = [
|
|
163
|
-
line
|
|
159
|
+
line
|
|
160
|
+
for line in ctab_lines[1 + n_atoms + n_bonds :]
|
|
164
161
|
if line.startswith("M CHG")
|
|
165
162
|
]
|
|
166
163
|
|
|
@@ -208,10 +205,9 @@ def _read_structure_from_ctab_v2000(ctab_lines):
|
|
|
208
205
|
|
|
209
206
|
return atoms
|
|
210
207
|
|
|
208
|
+
|
|
211
209
|
def _read_structure_from_ctab_v3000(ctab_lines):
|
|
212
|
-
v30_lines = [
|
|
213
|
-
line[6:].strip() for line in ctab_lines if line.startswith("M V30")
|
|
214
|
-
]
|
|
210
|
+
v30_lines = [line[6:].strip() for line in ctab_lines if line.startswith("M V30")]
|
|
215
211
|
|
|
216
212
|
atom_lines = _get_block_v3000(v30_lines, "ATOM")
|
|
217
213
|
if len(atom_lines) == 0:
|
|
@@ -262,16 +258,20 @@ def _read_structure_from_ctab_v3000(ctab_lines):
|
|
|
262
258
|
|
|
263
259
|
return atoms
|
|
264
260
|
|
|
261
|
+
|
|
265
262
|
def _get_version(counts_line):
|
|
266
263
|
return counts_line[33:39].strip()
|
|
267
264
|
|
|
265
|
+
|
|
268
266
|
def _is_v2000_compatible(n_atoms, n_bonds):
|
|
269
267
|
# The format uses a maximum of 3 digits for the atom and bond count
|
|
270
268
|
return n_atoms < 1000 and n_bonds < 1000
|
|
271
269
|
|
|
270
|
+
|
|
272
271
|
def _get_counts_v2000(counts_line):
|
|
273
272
|
return int(counts_line[0:3]), int(counts_line[3:6])
|
|
274
273
|
|
|
274
|
+
|
|
275
275
|
def _get_block_v3000(v30_lines, block_name):
|
|
276
276
|
block_lines = []
|
|
277
277
|
in_block = False
|
|
@@ -282,13 +282,12 @@ def _get_block_v3000(v30_lines, block_name):
|
|
|
282
282
|
if in_block:
|
|
283
283
|
return block_lines
|
|
284
284
|
else:
|
|
285
|
-
raise InvalidFileError(
|
|
286
|
-
f"Block '{block_name}' ended before it began"
|
|
287
|
-
)
|
|
285
|
+
raise InvalidFileError(f"Block '{block_name}' ended before it began")
|
|
288
286
|
elif in_block:
|
|
289
287
|
block_lines.append(line)
|
|
290
288
|
return block_lines
|
|
291
289
|
|
|
290
|
+
|
|
292
291
|
def create_property_dict_v3000(property_strings):
|
|
293
292
|
properties = {}
|
|
294
293
|
for prop in property_strings:
|
|
@@ -315,7 +314,8 @@ def _write_structure_to_ctab_v2000(atoms, default_bond_type):
|
|
|
315
314
|
f" {atoms.element[i].capitalize():3}"
|
|
316
315
|
f"{0:>2}" # Mass difference -> unused
|
|
317
316
|
f"{CHARGE_MAPPING_REV.get(charge[i], 0):>3d}"
|
|
318
|
-
+ f"{0:>3d}"
|
|
317
|
+
+ f"{0:>3d}"
|
|
318
|
+
* 10 # More unused fields
|
|
319
319
|
for i in range(atoms.array_length())
|
|
320
320
|
]
|
|
321
321
|
|
|
@@ -323,7 +323,8 @@ def _write_structure_to_ctab_v2000(atoms, default_bond_type):
|
|
|
323
323
|
bond_lines = [
|
|
324
324
|
f"{i+1:>3d}{j+1:>3d}"
|
|
325
325
|
f"{BOND_TYPE_MAPPING_REV.get(bond_type, default_bond_value):>3d}"
|
|
326
|
-
+ f"{0:>3d}"
|
|
326
|
+
+ f"{0:>3d}"
|
|
327
|
+
* 4
|
|
327
328
|
for i, j, bond_type in atoms.bonds.as_array()
|
|
328
329
|
]
|
|
329
330
|
|
|
@@ -332,8 +333,7 @@ def _write_structure_to_ctab_v2000(atoms, default_bond_type):
|
|
|
332
333
|
charge_lines = []
|
|
333
334
|
# Each `M CHG` line can contain up to 8 charges
|
|
334
335
|
for batch in _batched(
|
|
335
|
-
[(atom_i, c) for atom_i, c in enumerate(charge) if c != 0],
|
|
336
|
-
N_CHARGES_PER_LINE
|
|
336
|
+
[(atom_i, c) for atom_i, c in enumerate(charge) if c != 0], N_CHARGES_PER_LINE
|
|
337
337
|
):
|
|
338
338
|
charge_lines.append(
|
|
339
339
|
f"M CHG{len(batch):>3d}"
|
|
@@ -349,9 +349,7 @@ def _write_structure_to_ctab_v3000(atoms, default_bond_type):
|
|
|
349
349
|
except AttributeError:
|
|
350
350
|
charges = np.zeros(atoms.array_length(), dtype=int)
|
|
351
351
|
|
|
352
|
-
counts_line = (
|
|
353
|
-
f"COUNTS {atoms.array_length()} {atoms.bonds.get_bond_count()} 0 0 0"
|
|
354
|
-
)
|
|
352
|
+
counts_line = f"COUNTS {atoms.array_length()} {atoms.bonds.get_bond_count()} 0 0 0"
|
|
355
353
|
|
|
356
354
|
atom_lines = [
|
|
357
355
|
f"{i + 1}"
|
|
@@ -375,32 +373,35 @@ def _write_structure_to_ctab_v3000(atoms, default_bond_type):
|
|
|
375
373
|
]
|
|
376
374
|
|
|
377
375
|
lines = (
|
|
378
|
-
["BEGIN CTAB"]
|
|
379
|
-
[counts_line]
|
|
380
|
-
["BEGIN ATOM"]
|
|
381
|
-
atom_lines
|
|
382
|
-
["END ATOM"]
|
|
383
|
-
["BEGIN BOND"]
|
|
384
|
-
bond_lines
|
|
385
|
-
["END BOND"]
|
|
386
|
-
["END CTAB"]
|
|
376
|
+
["BEGIN CTAB"]
|
|
377
|
+
+ [counts_line]
|
|
378
|
+
+ ["BEGIN ATOM"]
|
|
379
|
+
+ atom_lines
|
|
380
|
+
+ ["END ATOM"]
|
|
381
|
+
+ ["BEGIN BOND"]
|
|
382
|
+
+ bond_lines
|
|
383
|
+
+ ["END BOND"]
|
|
384
|
+
+ ["END CTAB"]
|
|
387
385
|
)
|
|
388
386
|
# Mark lines as V3000 CTAB
|
|
389
387
|
lines = ["M V30 " + line for line in lines]
|
|
390
388
|
return [V2000_COMPATIBILITY_LINE] + lines + ["M END"]
|
|
391
389
|
|
|
390
|
+
|
|
392
391
|
def _to_property(charge):
|
|
393
392
|
if charge == 0:
|
|
394
393
|
return ""
|
|
395
394
|
else:
|
|
396
395
|
return f"CHG={charge}"
|
|
397
396
|
|
|
397
|
+
|
|
398
398
|
def _quote(string):
|
|
399
399
|
if " " in string or len(string) == 0:
|
|
400
400
|
return f'"{string}"'
|
|
401
401
|
else:
|
|
402
402
|
return string
|
|
403
403
|
|
|
404
|
+
|
|
404
405
|
def _batched(iterable, n):
|
|
405
406
|
"""
|
|
406
407
|
Equivalent to :func:`itertools.batched()`.
|
|
@@ -411,4 +412,4 @@ def _batched(iterable, n):
|
|
|
411
412
|
"""
|
|
412
413
|
iterator = iter(iterable)
|
|
413
414
|
while batch := tuple(itertools.islice(iterator, n)):
|
|
414
|
-
yield batch
|
|
415
|
+
yield batch
|
|
@@ -6,16 +6,15 @@ __name__ = "biotite.structure.io.mol"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["Header"]
|
|
8
8
|
|
|
9
|
-
import warnings
|
|
10
9
|
import datetime
|
|
10
|
+
import warnings
|
|
11
11
|
from dataclasses import dataclass
|
|
12
12
|
|
|
13
|
-
|
|
14
13
|
_DATE_FORMAT = "%m%d%y%H%M"
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
@dataclass
|
|
18
|
-
class Header
|
|
17
|
+
class Header:
|
|
19
18
|
"""
|
|
20
19
|
The header for connection tables.
|
|
21
20
|
|
|
@@ -70,20 +69,25 @@ class Header():
|
|
|
70
69
|
try:
|
|
71
70
|
time = datetime.datetime.strptime(time_string, _DATE_FORMAT)
|
|
72
71
|
except ValueError:
|
|
73
|
-
warnings.warn(
|
|
74
|
-
f"Invalid time format '{time_string}' in file header"
|
|
75
|
-
)
|
|
72
|
+
warnings.warn(f"Invalid time format '{time_string}' in file header")
|
|
76
73
|
time = None
|
|
77
74
|
dimensions = lines[1][20:22].strip()
|
|
78
75
|
scaling_factors = lines[1][22:34].strip()
|
|
79
|
-
energy
|
|
76
|
+
energy = lines[1][34:46].strip()
|
|
80
77
|
registry_number = lines[1][46:52].strip()
|
|
81
78
|
|
|
82
79
|
comments = lines[2].strip()
|
|
83
80
|
|
|
84
81
|
return Header(
|
|
85
|
-
mol_name,
|
|
86
|
-
|
|
82
|
+
mol_name,
|
|
83
|
+
initials,
|
|
84
|
+
program,
|
|
85
|
+
time,
|
|
86
|
+
dimensions,
|
|
87
|
+
scaling_factors,
|
|
88
|
+
energy,
|
|
89
|
+
registry_number,
|
|
90
|
+
comments,
|
|
87
91
|
)
|
|
88
92
|
|
|
89
93
|
def serialize(self):
|
|
@@ -113,4 +117,4 @@ class Header():
|
|
|
113
117
|
return text
|
|
114
118
|
|
|
115
119
|
def __str__(self):
|
|
116
|
-
return self.serialize()
|
|
120
|
+
return self.serialize()
|
biotite/structure/io/mol/mol.py
CHANGED
|
@@ -6,11 +6,13 @@ __name__ = "biotite.structure.io.mol"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["MOLFile"]
|
|
8
8
|
|
|
9
|
-
from
|
|
10
|
-
from .
|
|
11
|
-
from .
|
|
12
|
-
|
|
13
|
-
|
|
9
|
+
from biotite.file import InvalidFileError, TextFile
|
|
10
|
+
from biotite.structure.bonds import BondType
|
|
11
|
+
from biotite.structure.io.mol.ctab import (
|
|
12
|
+
read_structure_from_ctab,
|
|
13
|
+
write_structure_to_ctab,
|
|
14
|
+
)
|
|
15
|
+
from biotite.structure.io.mol.header import Header
|
|
14
16
|
|
|
15
17
|
# Number of header lines
|
|
16
18
|
N_HEADER = 3
|
|
@@ -80,66 +82,23 @@ class MOLFile(TextFile):
|
|
|
80
82
|
self.lines = [""] * N_HEADER
|
|
81
83
|
self._header = None
|
|
82
84
|
|
|
83
|
-
|
|
84
85
|
@classmethod
|
|
85
86
|
def read(cls, file):
|
|
86
87
|
mol_file = super().read(file)
|
|
87
88
|
mol_file._header = None
|
|
88
89
|
return mol_file
|
|
89
90
|
|
|
90
|
-
|
|
91
91
|
@property
|
|
92
92
|
def header(self):
|
|
93
93
|
if self._header is None:
|
|
94
94
|
self._header = Header.deserialize("\n".join(self.lines[0:3]) + "\n")
|
|
95
95
|
return self._header
|
|
96
96
|
|
|
97
|
-
|
|
98
97
|
@header.setter
|
|
99
98
|
def header(self, header):
|
|
100
99
|
self._header = header
|
|
101
100
|
self.lines[0:3] = self._header.serialize().splitlines()
|
|
102
101
|
|
|
103
|
-
|
|
104
|
-
def get_header(self):
|
|
105
|
-
"""
|
|
106
|
-
Get the header from the MOL file.
|
|
107
|
-
|
|
108
|
-
DEPRECATED: Use the :attr:`header` property instead.
|
|
109
|
-
|
|
110
|
-
Returns
|
|
111
|
-
-------
|
|
112
|
-
header_attributes
|
|
113
|
-
See :class:`Header`.
|
|
114
|
-
"""
|
|
115
|
-
header = self.header
|
|
116
|
-
return (
|
|
117
|
-
header.mol_name,
|
|
118
|
-
header.initials,
|
|
119
|
-
header.program,
|
|
120
|
-
header.time,
|
|
121
|
-
header.dimensions,
|
|
122
|
-
header.scaling_factors,
|
|
123
|
-
header.energy,
|
|
124
|
-
header.registry_number,
|
|
125
|
-
header.comments
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
def set_header(self, *args, **kwargs):
|
|
130
|
-
"""
|
|
131
|
-
Set the header for the MOL file.
|
|
132
|
-
|
|
133
|
-
DEPRECATED: Use the :attr:`header` property instead.
|
|
134
|
-
|
|
135
|
-
Parameters
|
|
136
|
-
----------
|
|
137
|
-
**args, **kwars
|
|
138
|
-
See :class:`Header`.
|
|
139
|
-
"""
|
|
140
|
-
self.header = Header(*args, **kwargs)
|
|
141
|
-
|
|
142
|
-
|
|
143
102
|
def get_structure(self):
|
|
144
103
|
"""
|
|
145
104
|
Get an :class:`AtomArray` from the MOL file.
|
|
@@ -157,9 +116,7 @@ class MOLFile(TextFile):
|
|
|
157
116
|
raise InvalidFileError("File does not contain structure data")
|
|
158
117
|
return read_structure_from_ctab(ctab_lines)
|
|
159
118
|
|
|
160
|
-
|
|
161
|
-
def set_structure(self, atoms, default_bond_type=BondType.ANY,
|
|
162
|
-
version=None):
|
|
119
|
+
def set_structure(self, atoms, default_bond_type=BondType.ANY, version=None):
|
|
163
120
|
"""
|
|
164
121
|
Set the :class:`AtomArray` for the file.
|
|
165
122
|
|
|
@@ -185,9 +142,8 @@ class MOLFile(TextFile):
|
|
|
185
142
|
)
|
|
186
143
|
|
|
187
144
|
|
|
188
|
-
|
|
189
145
|
def _get_ctab_lines(lines):
|
|
190
146
|
for i, line in enumerate(lines):
|
|
191
147
|
if line.startswith("M END"):
|
|
192
|
-
return lines[N_HEADER:i+1]
|
|
148
|
+
return lines[N_HEADER : i + 1]
|
|
193
149
|
return lines[N_HEADER:]
|
biotite/structure/io/mol/sdf.py
CHANGED
|
@@ -8,16 +8,24 @@ __all__ = ["SDFile", "SDRecord", "Metadata"]
|
|
|
8
8
|
|
|
9
9
|
import re
|
|
10
10
|
import warnings
|
|
11
|
+
from collections.abc import Mapping, MutableMapping
|
|
11
12
|
from dataclasses import dataclass
|
|
12
|
-
from collections.abc import MutableMapping, Mapping
|
|
13
13
|
import numpy as np
|
|
14
|
-
from
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
14
|
+
from biotite.file import (
|
|
15
|
+
DeserializationError,
|
|
16
|
+
File,
|
|
17
|
+
InvalidFileError,
|
|
18
|
+
SerializationError,
|
|
19
|
+
is_open_compatible,
|
|
20
|
+
is_text,
|
|
21
|
+
)
|
|
22
|
+
from biotite.structure.atoms import AtomArray
|
|
23
|
+
from biotite.structure.bonds import BondList, BondType
|
|
24
|
+
from biotite.structure.io.mol.ctab import (
|
|
25
|
+
read_structure_from_ctab,
|
|
26
|
+
write_structure_to_ctab,
|
|
27
|
+
)
|
|
28
|
+
from biotite.structure.io.mol.header import Header
|
|
21
29
|
|
|
22
30
|
_N_HEADER = 3
|
|
23
31
|
# Number of header lines
|
|
@@ -96,6 +104,7 @@ class Metadata(MutableMapping):
|
|
|
96
104
|
number, name, registry_internal, registry_external
|
|
97
105
|
The same as the parameters.
|
|
98
106
|
"""
|
|
107
|
+
|
|
99
108
|
# The characters that can be given as input to `name`
|
|
100
109
|
# First character must be alphanumeric,
|
|
101
110
|
# following characters may include underscores and periods
|
|
@@ -103,7 +112,7 @@ class Metadata(MutableMapping):
|
|
|
103
112
|
# they are still used in practice and therefore allowed here
|
|
104
113
|
_NAME_INPUT_REGEX = re.compile(r"^[a-zA-Z0-9][\w.]*$")
|
|
105
114
|
# These regexes are used to parse the key from a line
|
|
106
|
-
_COMPONENT_REGEX =
|
|
115
|
+
_COMPONENT_REGEX = {
|
|
107
116
|
"number": re.compile(r"^DT(\d+)$"),
|
|
108
117
|
"name": re.compile(r"^<([a-zA-Z0-9][\w.]*)>$"),
|
|
109
118
|
"registry_internal": re.compile(r"^(\d+)$"),
|
|
@@ -162,9 +171,7 @@ class Metadata(MutableMapping):
|
|
|
162
171
|
break
|
|
163
172
|
else:
|
|
164
173
|
# There is no matching pattern
|
|
165
|
-
raise DeserializationError(
|
|
166
|
-
f"Invalid key component '{component}'"
|
|
167
|
-
)
|
|
174
|
+
raise DeserializationError(f"Invalid key component '{component}'")
|
|
168
175
|
return Metadata.Key(**parsed_component_dict)
|
|
169
176
|
|
|
170
177
|
def serialize(self):
|
|
@@ -190,7 +197,6 @@ class Metadata(MutableMapping):
|
|
|
190
197
|
def __str__(self):
|
|
191
198
|
return self.serialize()
|
|
192
199
|
|
|
193
|
-
|
|
194
200
|
def __init__(self, metadata=None):
|
|
195
201
|
if metadata is None:
|
|
196
202
|
metadata = {}
|
|
@@ -222,9 +228,7 @@ class Metadata(MutableMapping):
|
|
|
222
228
|
current_value = None
|
|
223
229
|
else:
|
|
224
230
|
if current_key is None:
|
|
225
|
-
raise DeserializationError(
|
|
226
|
-
"Value found before metadata key"
|
|
227
|
-
)
|
|
231
|
+
raise DeserializationError("Value found before metadata key")
|
|
228
232
|
if current_value is None:
|
|
229
233
|
current_value = line
|
|
230
234
|
else:
|
|
@@ -388,7 +392,7 @@ class SDRecord:
|
|
|
388
392
|
if isinstance(self._header, str):
|
|
389
393
|
try:
|
|
390
394
|
self._header = Header.deserialize(self._header)
|
|
391
|
-
except:
|
|
395
|
+
except Exception:
|
|
392
396
|
raise DeserializationError("Failed to deserialize header")
|
|
393
397
|
return self._header
|
|
394
398
|
|
|
@@ -406,7 +410,7 @@ class SDRecord:
|
|
|
406
410
|
if isinstance(self._metadata, str):
|
|
407
411
|
try:
|
|
408
412
|
self._metadata = Metadata.deserialize(self._metadata)
|
|
409
|
-
except:
|
|
413
|
+
except Exception:
|
|
410
414
|
raise DeserializationError("Failed to deserialize metadata")
|
|
411
415
|
return self._metadata
|
|
412
416
|
|
|
@@ -483,8 +487,7 @@ class SDRecord:
|
|
|
483
487
|
raise InvalidFileError("File does not contain structure data")
|
|
484
488
|
return read_structure_from_ctab(ctab_lines)
|
|
485
489
|
|
|
486
|
-
def set_structure(self, atoms, default_bond_type=BondType.ANY,
|
|
487
|
-
version=None):
|
|
490
|
+
def set_structure(self, atoms, default_bond_type=BondType.ANY, version=None):
|
|
488
491
|
"""
|
|
489
492
|
Set the structural data in the SD record.
|
|
490
493
|
|
|
@@ -505,9 +508,9 @@ class SDRecord:
|
|
|
505
508
|
By default, ``"V2000"`` is used, unless the number of atoms
|
|
506
509
|
or bonds exceeds 999, in which case ``"V3000"`` is used.
|
|
507
510
|
"""
|
|
508
|
-
self._ctab = _join_with_terminal_newline(
|
|
509
|
-
atoms, default_bond_type, version
|
|
510
|
-
)
|
|
511
|
+
self._ctab = _join_with_terminal_newline(
|
|
512
|
+
write_structure_to_ctab(atoms, default_bond_type, version)
|
|
513
|
+
)
|
|
511
514
|
|
|
512
515
|
def __eq__(self, other):
|
|
513
516
|
if not isinstance(other, type(self)):
|
|
@@ -736,28 +739,29 @@ class SDFile(File, MutableMapping):
|
|
|
736
739
|
The content to be deserialized.
|
|
737
740
|
"""
|
|
738
741
|
lines = text.splitlines()
|
|
739
|
-
record_ends = np.array(
|
|
740
|
-
i for i, line in enumerate(lines)
|
|
741
|
-
|
|
742
|
-
|
|
742
|
+
record_ends = np.array(
|
|
743
|
+
[i for i, line in enumerate(lines) if line.startswith(_RECORD_DELIMITER)],
|
|
744
|
+
dtype=int,
|
|
745
|
+
)
|
|
743
746
|
if len(record_ends) == 0:
|
|
744
747
|
warnings.warn(
|
|
745
748
|
"Final record delimiter missing, "
|
|
746
749
|
"maybe this is a MOL file instead of a SD file"
|
|
747
750
|
)
|
|
748
|
-
record_ends = np.array([len(lines)-1], dtype=int)
|
|
751
|
+
record_ends = np.array([len(lines) - 1], dtype=int)
|
|
749
752
|
# The first record starts at the first line and the last
|
|
750
753
|
# delimiter is at the end of the file
|
|
751
754
|
# Records in the middle start directly after the delimiter
|
|
752
755
|
record_starts = np.concatenate(([0], record_ends[:-1] + 1), dtype=int)
|
|
753
756
|
record_names = [lines[start].strip() for start in record_starts]
|
|
754
|
-
return SDFile(
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
757
|
+
return SDFile(
|
|
758
|
+
{
|
|
759
|
+
# Do not include the delimiter
|
|
760
|
+
# -> stop at end (instead of end + 1)
|
|
761
|
+
name: _join_with_terminal_newline(lines[start:end])
|
|
762
|
+
for name, start, end in zip(record_names, record_starts, record_ends)
|
|
763
|
+
}
|
|
764
|
+
)
|
|
761
765
|
|
|
762
766
|
def serialize(self):
|
|
763
767
|
"""
|
|
@@ -776,7 +780,7 @@ class SDFile(File, MutableMapping):
|
|
|
776
780
|
else:
|
|
777
781
|
try:
|
|
778
782
|
text_blocks.append(record.serialize())
|
|
779
|
-
except:
|
|
783
|
+
except Exception:
|
|
780
784
|
raise SerializationError(
|
|
781
785
|
f"Failed to serialize record '{record_name}'"
|
|
782
786
|
)
|
|
@@ -835,19 +839,15 @@ class SDFile(File, MutableMapping):
|
|
|
835
839
|
# -> must be deserialized first
|
|
836
840
|
try:
|
|
837
841
|
record = SDRecord.deserialize(record)
|
|
838
|
-
except:
|
|
839
|
-
raise DeserializationError(
|
|
840
|
-
f"Failed to deserialize record '{key}'"
|
|
841
|
-
)
|
|
842
|
+
except Exception:
|
|
843
|
+
raise DeserializationError(f"Failed to deserialize record '{key}'")
|
|
842
844
|
# Update with deserialized object
|
|
843
845
|
self._records[key] = record
|
|
844
846
|
return record
|
|
845
847
|
|
|
846
848
|
def __setitem__(self, key, record):
|
|
847
849
|
if not isinstance(record, SDRecord):
|
|
848
|
-
raise TypeError(
|
|
849
|
-
f"Expected 'SDRecord', but got '{type(record).__name__}'"
|
|
850
|
-
)
|
|
850
|
+
raise TypeError(f"Expected 'SDRecord', but got '{type(record).__name__}'")
|
|
851
851
|
# The molecule name in the header is unique across the file
|
|
852
852
|
record.header.mol_name = key
|
|
853
853
|
self._records[key] = record
|
|
@@ -895,22 +895,19 @@ def _to_metadata_key(key):
|
|
|
895
895
|
return Metadata.Key(name=key)
|
|
896
896
|
else:
|
|
897
897
|
raise TypeError(
|
|
898
|
-
"Expected 'Metadata.Key' or str, "
|
|
899
|
-
f"but got '{type(key).__name__}'"
|
|
898
|
+
"Expected 'Metadata.Key' or str, " f"but got '{type(key).__name__}'"
|
|
900
899
|
)
|
|
901
900
|
|
|
902
901
|
|
|
903
902
|
def _add_key_value_pair(metadata, key, value):
|
|
904
903
|
if key is not None:
|
|
905
904
|
if value is None:
|
|
906
|
-
raise DeserializationError(
|
|
907
|
-
f"No value found for metadata key {key}"
|
|
908
|
-
)
|
|
905
|
+
raise DeserializationError(f"No value found for metadata key {key}")
|
|
909
906
|
metadata[key] = value
|
|
910
907
|
|
|
911
908
|
|
|
912
909
|
def _get_ctab_stop(lines):
|
|
913
910
|
for i in range(_N_HEADER, len(lines)):
|
|
914
911
|
if lines[i].startswith("M END"):
|
|
915
|
-
return i+1
|
|
916
|
-
return len(lines)
|
|
912
|
+
return i + 1
|
|
913
|
+
return len(lines)
|
|
@@ -6,21 +6,21 @@ __name__ = "biotite.structure.io.netcdf"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["NetCDFFile"]
|
|
8
8
|
|
|
9
|
+
import biotraj
|
|
9
10
|
import numpy as np
|
|
10
|
-
from
|
|
11
|
-
from
|
|
11
|
+
from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
|
|
12
|
+
from biotite.structure.io.trajfile import TrajectoryFile
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class NetCDFFile(TrajectoryFile):
|
|
15
16
|
"""
|
|
16
17
|
This file class represents a NetCDF trajectory file.
|
|
17
18
|
"""
|
|
18
|
-
|
|
19
|
+
|
|
19
20
|
@classmethod
|
|
20
21
|
def traj_type(cls):
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
return biotraj.NetCDFTrajectoryFile
|
|
23
|
+
|
|
24
24
|
@classmethod
|
|
25
25
|
def process_read_values(cls, read_values):
|
|
26
26
|
# .dcd files use Angstrom
|
|
@@ -29,35 +29,36 @@ class NetCDFFile(TrajectoryFile):
|
|
|
29
29
|
cell_lengths = read_values[2]
|
|
30
30
|
cell_angles = read_values[3]
|
|
31
31
|
if cell_lengths is None or cell_angles is None:
|
|
32
|
-
|
|
32
|
+
box = None
|
|
33
33
|
else:
|
|
34
34
|
box = np.stack(
|
|
35
|
-
[
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
35
|
+
[
|
|
36
|
+
vectors_from_unitcell(a, b, c, alpha, beta, gamma)
|
|
37
|
+
for (a, b, c), (alpha, beta, gamma) in zip(
|
|
38
|
+
cell_lengths, np.deg2rad(cell_angles)
|
|
39
|
+
)
|
|
40
|
+
],
|
|
41
|
+
axis=0,
|
|
39
42
|
)
|
|
40
43
|
return coord, box, time
|
|
41
|
-
|
|
44
|
+
|
|
42
45
|
@classmethod
|
|
43
46
|
def prepare_write_values(cls, coord, box, time):
|
|
44
|
-
coord = coord.astype(np.float32, copy=False)
|
|
45
|
-
|
|
46
|
-
time = time.astype(np.float32, copy=False) \
|
|
47
|
-
if time is not None else None
|
|
47
|
+
coord = coord.astype(np.float32, copy=False) if coord is not None else None
|
|
48
|
+
time = time.astype(np.float32, copy=False) if time is not None else None
|
|
48
49
|
if box is None:
|
|
49
50
|
cell_lengths = None
|
|
50
|
-
cell_angles
|
|
51
|
+
cell_angles = None
|
|
51
52
|
else:
|
|
52
53
|
cell_lengths = np.zeros((len(box), 3), dtype=np.float32)
|
|
53
|
-
cell_angles
|
|
54
|
+
cell_angles = np.zeros((len(box), 3), dtype=np.float32)
|
|
54
55
|
for i, model_box in enumerate(box):
|
|
55
56
|
a, b, c, alpha, beta, gamma = unitcell_from_vectors(model_box)
|
|
56
57
|
cell_lengths[i] = np.array((a, b, c))
|
|
57
58
|
cell_angles[i] = np.rad2deg((alpha, beta, gamma))
|
|
58
59
|
return {
|
|
59
|
-
"coordinates"
|
|
60
|
-
"time"
|
|
61
|
-
"cell_lengths"
|
|
62
|
-
"cell_angles"
|
|
63
|
-
}
|
|
60
|
+
"coordinates": coord,
|
|
61
|
+
"time": time,
|
|
62
|
+
"cell_lengths": cell_lengths,
|
|
63
|
+
"cell_angles": cell_angles,
|
|
64
|
+
}
|