modelcraft 5.0.2__py3-none-any.whl → 6.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modelcraft/__init__.py +16 -31
- modelcraft/__main__.py +0 -1
- modelcraft/arguments.py +35 -7
- modelcraft/combine.py +22 -41
- modelcraft/contents.py +188 -164
- modelcraft/environ.py +0 -7
- modelcraft/geometry.py +39 -27
- modelcraft/job.py +6 -5
- modelcraft/jobs/acedrg.py +2 -0
- modelcraft/jobs/buccaneer.py +22 -4
- modelcraft/jobs/comit.py +2 -0
- modelcraft/jobs/ctruncate.py +3 -1
- modelcraft/jobs/emda.py +2 -0
- modelcraft/jobs/findwaters.py +2 -0
- modelcraft/jobs/freerflag.py +2 -0
- modelcraft/jobs/libg.py +2 -0
- modelcraft/jobs/molrep.py +2 -0
- modelcraft/jobs/nautilus.py +28 -14
- modelcraft/jobs/nucleofind.py +88 -0
- modelcraft/jobs/parrot.py +13 -2
- modelcraft/jobs/phasematch.py +2 -1
- modelcraft/jobs/refmac.py +3 -1
- modelcraft/jobs/servalcat.py +38 -4
- modelcraft/jobs/sheetbend.py +2 -0
- modelcraft/modelcraftem.py +49 -6
- modelcraft/modelcraftxray.py +90 -42
- modelcraft/monlib.py +55 -52
- modelcraft/pdbe.py +54 -0
- modelcraft/pipeline.py +1 -1
- modelcraft/prune.py +69 -0
- modelcraft/reflections.py +11 -1
- modelcraft/scripts/contents.py +5 -215
- modelcraft/scripts/copies.py +26 -17
- modelcraft/scripts/modelcraft.py +1 -0
- modelcraft/scripts/sidechains.py +141 -0
- modelcraft/scripts/validate.py +81 -0
- modelcraft/sequence.py +106 -0
- modelcraft/solvent.py +42 -113
- modelcraft/structure.py +64 -41
- modelcraft/tests/ccp4/__init__.py +7 -11
- modelcraft/tests/ccp4/test_acedrg.py +2 -0
- modelcraft/tests/ccp4/test_arguments.py +3 -0
- modelcraft/tests/ccp4/test_buccaneer.py +3 -2
- modelcraft/tests/ccp4/test_cell.py +4 -1
- modelcraft/tests/ccp4/test_comit.py +2 -0
- modelcraft/tests/ccp4/test_contents.py +99 -17
- modelcraft/tests/ccp4/test_copies.py +1 -0
- modelcraft/tests/ccp4/test_ctruncate.py +2 -0
- modelcraft/tests/ccp4/test_findwaters.py +2 -0
- modelcraft/tests/ccp4/test_freerflag.py +2 -0
- modelcraft/tests/ccp4/test_libg.py +1 -0
- modelcraft/tests/ccp4/test_molrep.py +3 -0
- modelcraft/tests/ccp4/test_monlib.py +75 -45
- modelcraft/tests/ccp4/test_nautilus.py +5 -3
- modelcraft/tests/ccp4/test_nucleofind.py +62 -0
- modelcraft/tests/ccp4/test_parrot.py +3 -1
- modelcraft/tests/ccp4/test_phasematch.py +2 -0
- modelcraft/tests/ccp4/test_prune.py +17 -0
- modelcraft/tests/ccp4/test_reflections.py +110 -1
- modelcraft/tests/ccp4/test_refmac.py +3 -0
- modelcraft/tests/{unittests/test_contents.py → ccp4/test_sequence.py} +5 -12
- modelcraft/tests/ccp4/test_servalcat.py +52 -0
- modelcraft/tests/ccp4/test_sheetbend.py +4 -3
- modelcraft/tests/ccp4/test_sidechains.py +25 -0
- modelcraft/tests/ccp4/test_solvent.py +12 -26
- modelcraft/tests/ccp4/test_structure.py +1 -0
- modelcraft/tests/ccp4/test_validation.py +19 -0
- modelcraft/tests/ccp4/test_xray.py +12 -6
- modelcraft/tests/ccpem/test_em.py +3 -0
- modelcraft/tests/ccpem/test_emda.py +2 -0
- modelcraft/tests/ccpem/test_refmac.py +1 -0
- modelcraft/tests/ccpem/test_servalcat.py +4 -3
- modelcraft/utils.py +16 -4
- modelcraft/validation.py +101 -0
- modelcraft-6.0.0.dist-info/METADATA +76 -0
- modelcraft-6.0.0.dist-info/RECORD +85 -0
- {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/WHEEL +1 -1
- {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/entry_points.txt +2 -0
- modelcraft/coot/prune.py +0 -1085
- modelcraft/coot/sidechains.py +0 -68
- modelcraft/jobs/acorn.py +0 -114
- modelcraft/jobs/coot.py +0 -104
- modelcraft/tests/ccp4/test_coot.py +0 -29
- modelcraft/tests/ccp4/test_geometry.py +0 -20
- modelcraft/tests/unittests/__init__.py +0 -0
- modelcraft/tests/unittests/test_reflections.py +0 -101
- modelcraft-5.0.2.dist-info/LICENSE +0 -504
- modelcraft-5.0.2.dist-info/METADATA +0 -48
- modelcraft-5.0.2.dist-info/RECORD +0 -82
- {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/top_level.txt +0 -0
modelcraft/solvent.py
CHANGED
|
@@ -1,140 +1,69 @@
|
|
|
1
1
|
import collections
|
|
2
2
|
import dataclasses
|
|
3
|
-
import functools
|
|
4
3
|
import math
|
|
5
|
-
import re
|
|
6
|
-
import gemmi
|
|
7
|
-
from .contents import AsuContents, Polymer, PolymerType
|
|
8
|
-
from .monlib import chemcomp
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def solvent_fraction(contents: AsuContents, mtz: gemmi.Mtz) -> float:
|
|
12
|
-
volume = _contents_volume(contents)
|
|
13
|
-
asu_volume = mtz.cell.volume / len(mtz.spacegroup.operations())
|
|
14
|
-
copies = contents.copies or _guess_copies(contents, mtz)
|
|
15
|
-
return 1 - copies * volume / asu_volume
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
@functools.lru_cache(maxsize=None)
|
|
19
|
-
def _library_weight(code: str) -> float:
|
|
20
|
-
return sum(atom.el.weight for atom in chemcomp(code).atoms)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
@functools.lru_cache(maxsize=None)
|
|
24
|
-
def _library_volume(code: str) -> float:
|
|
25
|
-
return sum(18 for atom in chemcomp(code).atoms if not atom.is_hydrogen())
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def _polymer_weight(polymer: Polymer) -> float:
|
|
29
|
-
codes = polymer.residue_codes(modified=False)
|
|
30
|
-
total = sum(_library_weight(code) for code in codes)
|
|
31
|
-
total -= _library_weight("HOH") * (len(codes) - 1)
|
|
32
|
-
return total
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def _polymer_volume(polymer: Polymer) -> float:
|
|
36
|
-
density = 1.35 if polymer.type == PolymerType.PROTEIN else 2.0
|
|
37
|
-
return _polymer_weight(polymer) / (density * 0.602214)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def _smiles_volume(smiles: str) -> float:
|
|
41
|
-
atoms = re.findall(pattern="[A-Z][a-z]?", string=smiles)
|
|
42
|
-
return 18 * len(atoms)
|
|
43
4
|
|
|
5
|
+
import gemmi
|
|
44
6
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
item.volume * item.stoichiometry for item in _volume_components(contents)
|
|
48
|
-
)
|
|
7
|
+
from .contents import AsuContents
|
|
8
|
+
from .monlib import MonLib
|
|
49
9
|
|
|
50
10
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
(
|
|
63
|
-
|
|
64
|
-
):
|
|
65
|
-
for polymer in polymers:
|
|
66
|
-
sequence = polymer.sequence
|
|
67
|
-
description = f"{kind} with {len(sequence)} residues: "
|
|
68
|
-
if len(sequence) > 9:
|
|
69
|
-
description += f"{sequence[:3]}...{sequence[-3:]}"
|
|
70
|
-
else:
|
|
71
|
-
description += f"{sequence:9}"
|
|
72
|
-
stoichiometry = polymer.stoichiometry or 1
|
|
73
|
-
stoichiometry_assumed = polymer.stoichiometry is None
|
|
74
|
-
volume = _polymer_volume(polymer)
|
|
75
|
-
yield _VolumeComponent(
|
|
76
|
-
description, stoichiometry, stoichiometry_assumed, volume
|
|
77
|
-
)
|
|
78
|
-
for carb in contents.carbs:
|
|
79
|
-
description = "Carb:"
|
|
80
|
-
stoichiometry = carb.stoichiometry or 1
|
|
81
|
-
stoichiometry_assumed = carb.stoichiometry is None
|
|
82
|
-
volume = 0
|
|
83
|
-
length = 0
|
|
84
|
-
for code, count in carb.codes.items():
|
|
85
|
-
description += f" {count}x{code}"
|
|
86
|
-
length += count
|
|
87
|
-
if code in contents.smiles:
|
|
88
|
-
volume += _smiles_volume(contents.smiles[code]) * count
|
|
89
|
-
else:
|
|
90
|
-
volume += _library_volume(code) * count
|
|
91
|
-
volume -= _library_volume("HOH") * length
|
|
92
|
-
yield _VolumeComponent(
|
|
93
|
-
description, stoichiometry, stoichiometry_assumed, volume
|
|
94
|
-
)
|
|
95
|
-
for ligand in contents.ligands:
|
|
96
|
-
description = "Ligand: " + ligand.code
|
|
97
|
-
stoichiometry = ligand.stoichiometry or 1
|
|
98
|
-
stoichiometry_assumed = ligand.stoichiometry is None
|
|
99
|
-
if ligand.code in contents.smiles:
|
|
100
|
-
volume = _smiles_volume(contents.smiles[ligand.code])
|
|
101
|
-
else:
|
|
102
|
-
volume = _library_volume(ligand.code)
|
|
103
|
-
yield _VolumeComponent(
|
|
104
|
-
description, stoichiometry, stoichiometry_assumed, volume
|
|
105
|
-
)
|
|
11
|
+
def solvent_fraction(
|
|
12
|
+
contents: AsuContents,
|
|
13
|
+
cell: gemmi.UnitCell,
|
|
14
|
+
spacegroup: gemmi.SpaceGroup,
|
|
15
|
+
resolution: float,
|
|
16
|
+
monlib: MonLib = None,
|
|
17
|
+
) -> float:
|
|
18
|
+
monlib = monlib or MonLib(contents.monomer_codes(), include_standard=True)
|
|
19
|
+
asu_volume = cell.volume / len(spacegroup.operations())
|
|
20
|
+
copies = contents.copies
|
|
21
|
+
if copies is None:
|
|
22
|
+
copies = _guess_copies(contents, cell, spacegroup, resolution, monlib)
|
|
23
|
+
return 1 - copies * contents.volume(monlib) / asu_volume
|
|
106
24
|
|
|
107
25
|
|
|
108
26
|
@dataclasses.dataclass
|
|
109
|
-
class
|
|
27
|
+
class CopiesOption:
|
|
110
28
|
copies: int
|
|
111
29
|
solvent: float
|
|
112
30
|
probability: float
|
|
113
31
|
|
|
114
32
|
|
|
115
|
-
def
|
|
33
|
+
def copies_options(
|
|
34
|
+
contents: AsuContents,
|
|
35
|
+
cell: gemmi.UnitCell,
|
|
36
|
+
spacegroup: gemmi.SpaceGroup,
|
|
37
|
+
resolution: float,
|
|
38
|
+
monlib: MonLib,
|
|
39
|
+
) -> list:
|
|
116
40
|
options = []
|
|
117
41
|
nucleic_acids = contents.rnas + contents.dnas
|
|
118
|
-
mwp = sum(
|
|
119
|
-
mwn = sum(
|
|
120
|
-
asu_volume =
|
|
121
|
-
contents_volume =
|
|
122
|
-
resolution = mtz.resolution_high()
|
|
42
|
+
mwp = sum(p.weight(monlib) * (p.stoichiometry or 1) for p in contents.proteins)
|
|
43
|
+
mwn = sum(n.weight(monlib) * (n.stoichiometry or 1) for n in nucleic_acids)
|
|
44
|
+
asu_volume = cell.volume / len(spacegroup.operations())
|
|
45
|
+
contents_volume = contents.volume(monlib)
|
|
123
46
|
total_probability = 0
|
|
124
47
|
for copies in range(1, 60):
|
|
125
48
|
solvent = 1 - copies * contents_volume / asu_volume
|
|
126
|
-
probability =
|
|
49
|
+
probability = _matthews_probability(mwp, mwn, copies, asu_volume, resolution)
|
|
127
50
|
if solvent < 0:
|
|
128
51
|
break
|
|
129
|
-
options.append(
|
|
52
|
+
options.append(CopiesOption(copies, solvent, probability))
|
|
130
53
|
total_probability += probability
|
|
131
54
|
for option in options:
|
|
132
55
|
option.probability /= total_probability
|
|
133
56
|
return options
|
|
134
57
|
|
|
135
58
|
|
|
136
|
-
def _guess_copies(
|
|
137
|
-
|
|
59
|
+
def _guess_copies(
|
|
60
|
+
contents: AsuContents,
|
|
61
|
+
cell: gemmi.UnitCell,
|
|
62
|
+
spacegroup: gemmi.SpaceGroup,
|
|
63
|
+
resolution: float,
|
|
64
|
+
monlib: MonLib,
|
|
65
|
+
) -> int:
|
|
66
|
+
options = copies_options(contents, cell, spacegroup, resolution, monlib)
|
|
138
67
|
if len(options) == 0:
|
|
139
68
|
raise ValueError("Contents are too big to fit into the asymmetric unit")
|
|
140
69
|
chosen = max(options, key=lambda option: option.probability)
|
|
@@ -165,15 +94,15 @@ _MATTHEWS_PROBABILITY_SETTINGS = [
|
|
|
165
94
|
]
|
|
166
95
|
|
|
167
96
|
|
|
168
|
-
def
|
|
97
|
+
def _matthews_probability(
|
|
169
98
|
protein_mw: float,
|
|
170
99
|
nucleic_mw: float,
|
|
171
100
|
copies: int,
|
|
172
101
|
asu_volume: float,
|
|
173
102
|
resolution: float,
|
|
174
103
|
) -> float:
|
|
175
|
-
total_mw = protein_mw + nucleic_mw
|
|
176
|
-
|
|
104
|
+
total_mw = (protein_mw + nucleic_mw) * copies
|
|
105
|
+
matthews = asu_volume / total_mw
|
|
177
106
|
if protein_mw > 0.9 * total_mw:
|
|
178
107
|
for index in range(12):
|
|
179
108
|
if resolution < _MATTHEWS_PROBABILITY_SETTINGS[index].rbin:
|
|
@@ -183,5 +112,5 @@ def _probability(
|
|
|
183
112
|
else:
|
|
184
113
|
index = 14
|
|
185
114
|
_, p0, vmbar, w, a, s = _MATTHEWS_PROBABILITY_SETTINGS[index]
|
|
186
|
-
z = (
|
|
115
|
+
z = (matthews - vmbar) / w
|
|
187
116
|
return p0 + a * (math.exp(-math.exp(-z) - z * s + 1))
|
modelcraft/structure.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from typing import Iterator
|
|
2
|
+
|
|
2
3
|
import gemmi
|
|
3
|
-
|
|
4
|
+
|
|
5
|
+
from .monlib import MonLib
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
def read_structure(path: str) -> gemmi.Structure:
|
|
@@ -10,25 +12,11 @@ def read_structure(path: str) -> gemmi.Structure:
|
|
|
10
12
|
# TODO: Currently altconfs appear in CIF auth_atom_id after sheetbend
|
|
11
13
|
# TODO: Keep alternative conformations after problem is fixed
|
|
12
14
|
structure.remove_alternative_conformations()
|
|
15
|
+
_remove_point_mutations(structure)
|
|
13
16
|
_patch_names(structure)
|
|
14
17
|
return structure
|
|
15
18
|
|
|
16
19
|
|
|
17
|
-
def consecutive_residues(chain: gemmi.Chain):
|
|
18
|
-
"Iterate through lists of residues with consecutive seqnums (first conformer only)"
|
|
19
|
-
consecutive = []
|
|
20
|
-
last_seqnum = None
|
|
21
|
-
for residue in chain.first_conformer():
|
|
22
|
-
if last_seqnum is None or residue.seqid.num == last_seqnum + 1:
|
|
23
|
-
consecutive.append(residue)
|
|
24
|
-
else:
|
|
25
|
-
yield consecutive
|
|
26
|
-
consecutive = [residue]
|
|
27
|
-
last_seqnum = residue.seqid.num
|
|
28
|
-
if len(consecutive) > 0:
|
|
29
|
-
yield consecutive
|
|
30
|
-
|
|
31
|
-
|
|
32
20
|
def contains_residue(structure: gemmi.Structure, name: str) -> bool:
|
|
33
21
|
return any(residue.name == name for residue in _residues(structure))
|
|
34
22
|
|
|
@@ -42,15 +30,6 @@ def remove_residues(structure: gemmi.Structure, names) -> None:
|
|
|
42
30
|
structure.remove_empty_chains()
|
|
43
31
|
|
|
44
32
|
|
|
45
|
-
def remove_non_library_atoms(structure: gemmi.Structure) -> None:
|
|
46
|
-
for residue in _residues(structure):
|
|
47
|
-
if in_library(residue.name):
|
|
48
|
-
for i, atom in reversed(list(enumerate(residue))):
|
|
49
|
-
if atom.name not in atom_ids(residue.name):
|
|
50
|
-
del residue[i]
|
|
51
|
-
structure.remove_empty_chains()
|
|
52
|
-
|
|
53
|
-
|
|
54
33
|
def remove_non_protein(structure: gemmi.Structure) -> None:
|
|
55
34
|
for model in structure:
|
|
56
35
|
for chain in model:
|
|
@@ -68,13 +47,15 @@ def write_mmcif(path: str, structure: gemmi.Structure) -> None:
|
|
|
68
47
|
|
|
69
48
|
|
|
70
49
|
class ModelStats:
|
|
71
|
-
def __init__(self, structure: gemmi.Structure):
|
|
50
|
+
def __init__(self, structure: gemmi.Structure, monlib: MonLib = None):
|
|
72
51
|
self.residues: int = 0
|
|
73
52
|
self.protein: int = 0
|
|
74
53
|
self.nucleic: int = 0
|
|
75
54
|
self.waters: int = 0
|
|
76
55
|
self.dummy_atoms: int = 0
|
|
77
56
|
|
|
57
|
+
monlib = monlib or MonLib(structure[0].get_all_residue_names())
|
|
58
|
+
|
|
78
59
|
for residue in _residues(structure):
|
|
79
60
|
if residue.name == "HOH":
|
|
80
61
|
self.waters += 1
|
|
@@ -82,24 +63,11 @@ class ModelStats:
|
|
|
82
63
|
self.dummy_atoms += 1
|
|
83
64
|
else:
|
|
84
65
|
self.residues += 1
|
|
85
|
-
if is_protein(residue.name):
|
|
66
|
+
if monlib.is_protein(residue.name):
|
|
86
67
|
self.protein += 1
|
|
87
|
-
if is_nucleic(residue.name):
|
|
68
|
+
if monlib.is_nucleic(residue.name):
|
|
88
69
|
self.nucleic += 1
|
|
89
70
|
|
|
90
|
-
def __eq__(self, other):
|
|
91
|
-
if isinstance(other, ModelStats):
|
|
92
|
-
return (
|
|
93
|
-
self.residues == other.residues
|
|
94
|
-
and self.waters == other.waters
|
|
95
|
-
and self.dummy_atoms == other.dummy_atoms
|
|
96
|
-
)
|
|
97
|
-
return NotImplemented
|
|
98
|
-
|
|
99
|
-
def __ne__(self, other):
|
|
100
|
-
equal = self.__eq__(other)
|
|
101
|
-
return NotImplemented if equal is not NotImplemented else not equal
|
|
102
|
-
|
|
103
71
|
|
|
104
72
|
def _residues(structure: gemmi.Structure) -> Iterator[gemmi.Residue]:
|
|
105
73
|
for model in structure:
|
|
@@ -108,6 +76,19 @@ def _residues(structure: gemmi.Structure) -> Iterator[gemmi.Residue]:
|
|
|
108
76
|
yield residue
|
|
109
77
|
|
|
110
78
|
|
|
79
|
+
def _remove_point_mutations(structure: gemmi.Structure) -> None:
|
|
80
|
+
for model in structure:
|
|
81
|
+
to_remove = []
|
|
82
|
+
for chain in model:
|
|
83
|
+
for group in chain.whole().residue_groups():
|
|
84
|
+
for i in range(1, len(group)):
|
|
85
|
+
residue = group[i]
|
|
86
|
+
key = (chain.name, str(residue.seqid), residue.name)
|
|
87
|
+
to_remove.append(key)
|
|
88
|
+
for chain_name, residue_seqid, residue_name in to_remove:
|
|
89
|
+
del model[chain_name][residue_seqid][residue_name]
|
|
90
|
+
|
|
91
|
+
|
|
111
92
|
def _patch_names(structure: gemmi.Structure) -> None:
|
|
112
93
|
residue_patches = {"SUL": "SO4"}
|
|
113
94
|
atom_patches = {("HOH", "O1"): "O"}
|
|
@@ -117,3 +98,45 @@ def _patch_names(structure: gemmi.Structure) -> None:
|
|
|
117
98
|
for atom in residue:
|
|
118
99
|
atom.name = atom.name.strip()
|
|
119
100
|
atom.name = atom_patches.get((residue.name, atom.name), atom.name)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _are_connected(
|
|
104
|
+
residue1: gemmi.Residue, residue2: gemmi.Residue, monlib: MonLib
|
|
105
|
+
) -> bool:
|
|
106
|
+
if (
|
|
107
|
+
monlib.is_protein(residue1.name)
|
|
108
|
+
and monlib.is_protein(residue2.name)
|
|
109
|
+
and "C" in residue1
|
|
110
|
+
and "N" in residue2
|
|
111
|
+
):
|
|
112
|
+
for atom1 in residue1["C"]:
|
|
113
|
+
for atom2 in residue2["N"]:
|
|
114
|
+
if atom1.pos.dist(atom2.pos) < 2.5:
|
|
115
|
+
return True
|
|
116
|
+
if (
|
|
117
|
+
monlib.is_nucleic(residue1.name)
|
|
118
|
+
and monlib.is_nucleic(residue2.name)
|
|
119
|
+
and "O3'" in residue1
|
|
120
|
+
and "P" in residue2
|
|
121
|
+
):
|
|
122
|
+
for atom1 in residue1["O3'"]:
|
|
123
|
+
for atom2 in residue2["P"]:
|
|
124
|
+
if atom1.pos.dist(atom2.pos) < 2.5:
|
|
125
|
+
return True
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def remove_isolated_fragments(chain: gemmi.Chain, monlib: MonLib, max_length: int):
|
|
130
|
+
to_remove = []
|
|
131
|
+
fragment = []
|
|
132
|
+
for i, residue in enumerate(chain):
|
|
133
|
+
if i > 0 and _are_connected(chain[i - 1], residue, monlib):
|
|
134
|
+
fragment.append(i)
|
|
135
|
+
else:
|
|
136
|
+
if len(fragment) <= max_length:
|
|
137
|
+
to_remove.extend(fragment)
|
|
138
|
+
fragment = [i]
|
|
139
|
+
if len(fragment) <= max_length:
|
|
140
|
+
to_remove.extend(fragment)
|
|
141
|
+
for i in reversed(to_remove):
|
|
142
|
+
del chain[i]
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import os
|
|
3
3
|
import shutil
|
|
4
|
-
import uuid
|
|
5
4
|
import urllib.request
|
|
5
|
+
import uuid
|
|
6
|
+
|
|
6
7
|
import gemmi
|
|
8
|
+
|
|
7
9
|
from modelcraft.contents import AsuContents, Ligand, Polymer, PolymerType
|
|
8
10
|
from modelcraft.jobs.refmac import Refmac
|
|
9
11
|
from modelcraft.reflections import DataItem
|
|
@@ -16,7 +18,7 @@ def ccp4_path(*paths: str) -> str:
|
|
|
16
18
|
|
|
17
19
|
def in_temp_directory(func):
|
|
18
20
|
def wrapper():
|
|
19
|
-
tmp_dir = "tmp
|
|
21
|
+
tmp_dir = f"tmp{uuid.uuid4()}"
|
|
20
22
|
os.mkdir(tmp_dir)
|
|
21
23
|
os.chdir(tmp_dir)
|
|
22
24
|
try:
|
|
@@ -66,7 +68,6 @@ def insulin_refmac():
|
|
|
66
68
|
|
|
67
69
|
@functools.lru_cache(maxsize=None)
|
|
68
70
|
def insulin_contents():
|
|
69
|
-
contents = AsuContents()
|
|
70
71
|
chain_a = Polymer(
|
|
71
72
|
sequence="GIVEQCCASVCSLYQLENYCN",
|
|
72
73
|
polymer_type=PolymerType.PROTEIN,
|
|
@@ -75,9 +76,8 @@ def insulin_contents():
|
|
|
75
76
|
sequence="FVNQHLCGSHLVEALYLVCGERGFFYTPKA",
|
|
76
77
|
polymer_type=PolymerType.PROTEIN,
|
|
77
78
|
)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
return contents
|
|
79
|
+
ligand = Ligand("GOL")
|
|
80
|
+
return AsuContents(proteins=[chain_a, chain_b], ligands=[ligand])
|
|
81
81
|
|
|
82
82
|
|
|
83
83
|
@functools.lru_cache(maxsize=None)
|
|
@@ -92,8 +92,4 @@ def pdb1rxf_contents():
|
|
|
92
92
|
)
|
|
93
93
|
protein = Polymer(sequence=sequence, polymer_type=PolymerType.PROTEIN)
|
|
94
94
|
ligand = Ligand(code="FE")
|
|
95
|
-
|
|
96
|
-
contents.proteins.append(protein)
|
|
97
|
-
contents.ligands.append(ligand)
|
|
98
|
-
contents.copies = 1
|
|
99
|
-
return contents
|
|
95
|
+
return AsuContents(copies=1, proteins=[protein], ligands=[ligand])
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from modelcraft.jobs.buccaneer import Buccaneer, _known_structure_ids
|
|
2
2
|
from modelcraft.structure import ModelStats, read_structure
|
|
3
|
+
|
|
3
4
|
from . import (
|
|
4
5
|
in_temp_directory,
|
|
5
|
-
|
|
6
|
+
insulin_contents,
|
|
6
7
|
insulin_freer,
|
|
8
|
+
insulin_fsigf,
|
|
7
9
|
insulin_refmac,
|
|
8
|
-
insulin_contents,
|
|
9
10
|
pdbe_download,
|
|
10
11
|
)
|
|
11
12
|
|
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
import urllib.request
|
|
2
|
+
|
|
2
3
|
import gemmi
|
|
4
|
+
|
|
3
5
|
from modelcraft.cell import max_distortion, remove_scale, update_cell
|
|
4
6
|
from modelcraft.structure import read_structure
|
|
7
|
+
|
|
5
8
|
from . import in_temp_directory
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
@in_temp_directory
|
|
9
12
|
def test_1ana():
|
|
10
|
-
url = "https://
|
|
13
|
+
url = "https://ftp.ebi.ac.uk/pub/databases/pdb_versioned/data/entries/"
|
|
11
14
|
url += "an/pdb_00001ana/pdb_00001ana_xyz_v1-2.cif.gz"
|
|
12
15
|
urllib.request.urlretrieve(url, "1ana.cif.gz")
|
|
13
16
|
structure = read_structure("1ana.cif.gz")
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
from
|
|
1
|
+
from pytest import approx
|
|
2
|
+
|
|
3
|
+
from modelcraft.contents import AsuContents, Polymer, PolymerType
|
|
4
|
+
from modelcraft.monlib import MonLib
|
|
2
5
|
|
|
3
6
|
|
|
4
7
|
def _test_contents(entry: str, expected_json: list, selenomet: bool):
|
|
5
|
-
contents =
|
|
8
|
+
contents = AsuContents.from_pdbe(entry)
|
|
6
9
|
assert contents.to_json() == expected_json
|
|
7
10
|
assert contents.is_selenomet() == selenomet
|
|
8
11
|
return contents
|
|
@@ -13,7 +16,10 @@ def test_1o6a():
|
|
|
13
16
|
"copies": 2,
|
|
14
17
|
"proteins": [
|
|
15
18
|
{
|
|
16
|
-
"sequence":
|
|
19
|
+
"sequence": (
|
|
20
|
+
"SETRKTEVPSDKLELLLDIPLKVTVELGRTRMTLKRVLEMIHGSIIELDKLTGEPVDILV"
|
|
21
|
+
"NGKLIARGEVVVIDENFGVRITEIVSPKERLELLNE"
|
|
22
|
+
),
|
|
17
23
|
"stoichiometry": 1,
|
|
18
24
|
"modifications": ["M->MSE"],
|
|
19
25
|
}
|
|
@@ -23,7 +29,6 @@ def test_1o6a():
|
|
|
23
29
|
"carbs": [],
|
|
24
30
|
"ligands": [],
|
|
25
31
|
"buffers": [],
|
|
26
|
-
"smiles": {},
|
|
27
32
|
}
|
|
28
33
|
_test_contents("1o6a", expected, selenomet=True)
|
|
29
34
|
|
|
@@ -34,7 +39,11 @@ def test_4gxy():
|
|
|
34
39
|
"proteins": [],
|
|
35
40
|
"rnas": [
|
|
36
41
|
{
|
|
37
|
-
"sequence":
|
|
42
|
+
"sequence": (
|
|
43
|
+
"GGCGGCAGGUGCUCCCGACCCUGCGGUCGGGAGUUAAAAGGGAAGCCGGUGCAAGUCCGG"
|
|
44
|
+
"CACGGUCCCGCCACUGUGACGGGGAGUCGCCCCUCGGGAUGUGCCACUGGCCCGAAGGCC"
|
|
45
|
+
"GGGAAGGCGGAGGGGCGGCGAGGAUCCGGAGUCAGGAAACCUGCCUGCCGUC"
|
|
46
|
+
),
|
|
38
47
|
"stoichiometry": 1,
|
|
39
48
|
"modifications": ["1->GTP", "172->CCC"],
|
|
40
49
|
}
|
|
@@ -46,7 +55,6 @@ def test_4gxy():
|
|
|
46
55
|
{"code": "IRI", "stoichiometry": 7},
|
|
47
56
|
],
|
|
48
57
|
"buffers": ["MG"],
|
|
49
|
-
"smiles": {},
|
|
50
58
|
}
|
|
51
59
|
_test_contents("4gxy", expected, selenomet=False)
|
|
52
60
|
|
|
@@ -56,7 +64,24 @@ def test_6as7():
|
|
|
56
64
|
"copies": 1,
|
|
57
65
|
"proteins": [
|
|
58
66
|
{
|
|
59
|
-
"sequence":
|
|
67
|
+
"sequence": (
|
|
68
|
+
"DEEQVFHFYWLDAYEDQYNQPGVVFLFGKVWIESAETHVSCCVMVKNIERTLYFLPREMK"
|
|
69
|
+
"IDLNTGKETGTPISMKDVYEEFDEKIATKYKIMKFKSKPVEKNYAFEIPDVPEKSEYLEV"
|
|
70
|
+
"KYSAEMPQLPQDLKGETFSHVFGTNTSSLELFLMNRKIKGPCWLEVKSPQLLNQPVSWCK"
|
|
71
|
+
"AEAMALKPDLVNVIKDVSPPPLVVMAFSMKTMQNAKNHQNEIIAMAALVHHSFALDKAAPK"
|
|
72
|
+
"PPFQSHFCVVSKPKDCIFPYAFKEVIEKKNVKVEVAATERTLLGFFLAKVHKIDPDIIVGH"
|
|
73
|
+
"NIYGFELEVLLQRINVCKAPHWSKIGRLKRSNMPKLGGRSGFGERNATCGRMICDVEISAK"
|
|
74
|
+
"ELIRCKSYHLSELVQQILKTERVVIPMENIQNMYSESSQLLYLLEHTWKDAKFILQIMCEL"
|
|
75
|
+
"NVLPLALQITNIAGNIMSRTLMGGRSERNEFLLLHAFYENNYIVPDKQIFRKPQQKLGDED"
|
|
76
|
+
"EEIDGDTNKYKKGRKKAAYAGGLVLDPKVGFYDKFILLLDFNSLYPSIIQEFNICFTTVQR"
|
|
77
|
+
"VASEAQKVTEDGEQEQIPELPDPSLEMGILPREIRKLVERRKQVKQLMKQQDLNPDLILQY"
|
|
78
|
+
"DIRQKALKLTANSMYGCLGFSYSRFYAKPLAALVTYKGREILMHTKEMVQKMNLEVIYGDT"
|
|
79
|
+
"DSIMINTNSTNLEEVFKLGNKVKSEVNKLYKLLEIDIDGVFKSLLLLKKKKYAALVVEPTS"
|
|
80
|
+
"DGNYVTKQELKGLDIVRRDWCDLAKDTGNFVIGQILSDQSRDTIVENIQKRLIEIGENVLN"
|
|
81
|
+
"GSVPVSQFEINKALTKDPQDYPDKKSLPHVHVALWINSQGGRKVKAGDTVSYVICQDGSNL"
|
|
82
|
+
"TASQRAYAPEQLQKQDNLTIDTQYYLAQQIHPVVARICEPIDGIDAVLIATWLGLDPTQFR"
|
|
83
|
+
"VHHYHKDEEN"
|
|
84
|
+
),
|
|
60
85
|
"stoichiometry": 1,
|
|
61
86
|
"modifications": [],
|
|
62
87
|
}
|
|
@@ -77,7 +102,6 @@ def test_6as7():
|
|
|
77
102
|
"carbs": [],
|
|
78
103
|
"ligands": [{"code": "DCP", "stoichiometry": 1}],
|
|
79
104
|
"buffers": ["MG", "CO"],
|
|
80
|
-
"smiles": {},
|
|
81
105
|
}
|
|
82
106
|
_test_contents("6as7", expected, selenomet=False)
|
|
83
107
|
|
|
@@ -87,7 +111,17 @@ def test_4aqd():
|
|
|
87
111
|
"copies": 1,
|
|
88
112
|
"proteins": [
|
|
89
113
|
{
|
|
90
|
-
"sequence":
|
|
114
|
+
"sequence": (
|
|
115
|
+
"RSEDDIIIATKNGKVRGMNLTVFGGTVTAFLGIPYAQPPLGRLRFKKPQSLTKWSDIWNA"
|
|
116
|
+
"TKYANSCCQNIDQSFPGFHGSEMWNPNTDLSEDCLYLNVWIPAPKPKNATVLIWIYGGGF"
|
|
117
|
+
"QTGTSSLHVYDGKFLARVERVIVVSMNYRVGALGFLALPGNPEAPGNMGLFDQQLALQWV"
|
|
118
|
+
"QKNIAAFGGNPKSVTLFGESAGAASVSLHLLSPGSHSLFTRAILQSGSFNAPWAVTSLYE"
|
|
119
|
+
"ARNRTLNLAKLTGCSRENETEIIKCLRNKDPQEILLNEAFVVPYGTPLSVNFGPTVDGDF"
|
|
120
|
+
"LTDMPDILLELGQFKKTQILVGVNKDEGTAFLVYGAPGFSKDNNSIITRKEFQEGLKIFF"
|
|
121
|
+
"PGVSEFGKESILFHYTDWVDDQRPENYREALGDVVGDYNFICPALEFTKKFSEWGNNAFF"
|
|
122
|
+
"YYFEHRSSKLPWPEWMGVMHGYEIEFVFGLPLERRDNYTKAEEILSRSIVKRWANFAKYG"
|
|
123
|
+
"NPNETQNNSTSWPVFKSTEQKYLTLNTESTRIMTKLRAQQCRFWTSFFPKV"
|
|
124
|
+
),
|
|
91
125
|
"stoichiometry": 2,
|
|
92
126
|
"modifications": [],
|
|
93
127
|
}
|
|
@@ -105,8 +139,7 @@ def test_4aqd():
|
|
|
105
139
|
{"code": "PG4", "stoichiometry": 2},
|
|
106
140
|
{"code": "PEG", "stoichiometry": 2},
|
|
107
141
|
],
|
|
108
|
-
"buffers": ["EDO", "CL", "GLY"],
|
|
109
|
-
"smiles": {},
|
|
142
|
+
"buffers": ["EDO", "UNX", "CL", "GLY"],
|
|
110
143
|
}
|
|
111
144
|
_test_contents("4aqd", expected, selenomet=False)
|
|
112
145
|
|
|
@@ -116,7 +149,13 @@ def test_1vjr():
|
|
|
116
149
|
"copies": 1,
|
|
117
150
|
"proteins": [
|
|
118
151
|
{
|
|
119
|
-
"sequence":
|
|
152
|
+
"sequence": (
|
|
153
|
+
"MGSDKIHHHHHHVLDKIELFILDMDGTFYLDDSLLPGSLEFLETLKEKNKRFVFFTNNSS"
|
|
154
|
+
"LGAQDYVRKLRNMGVDVPDDAVVTSGEITAEHMLKRFGRCRIFLLGTPQLKKVFEAYGHV"
|
|
155
|
+
"IDEENPDFVVLGFDKTLTYERLKKACILLRKGKFYIATHPDINCPSKEGPVPDAGSIMAA"
|
|
156
|
+
"IEASTGRKPDLIAGKPNPLVVDVISEKFGVPKERMAMVGDRLYTDVKLGKNAGIVSILVL"
|
|
157
|
+
"TGETTPEDLERAETKPDFVFKNLGELAKAVQ"
|
|
158
|
+
),
|
|
120
159
|
"stoichiometry": 1,
|
|
121
160
|
"modifications": ["M->MSE"],
|
|
122
161
|
}
|
|
@@ -126,7 +165,6 @@ def test_1vjr():
|
|
|
126
165
|
"carbs": [],
|
|
127
166
|
"ligands": [],
|
|
128
167
|
"buffers": ["NI", "CL"],
|
|
129
|
-
"smiles": {},
|
|
130
168
|
}
|
|
131
169
|
_test_contents("1vjr", expected, selenomet=True)
|
|
132
170
|
|
|
@@ -157,7 +195,6 @@ def test_1cag():
|
|
|
157
195
|
"carbs": [],
|
|
158
196
|
"ligands": [],
|
|
159
197
|
"buffers": ["ACY"],
|
|
160
|
-
"smiles": {},
|
|
161
198
|
}
|
|
162
199
|
contents = _test_contents("1cag", expected, selenomet=False)
|
|
163
200
|
polymer = contents.proteins[0]
|
|
@@ -182,10 +219,55 @@ def test_1iha():
|
|
|
182
219
|
"carbs": [],
|
|
183
220
|
"ligands": [{"code": "RHD", "stoichiometry": 1}],
|
|
184
221
|
"buffers": ["CL"],
|
|
185
|
-
"smiles": {},
|
|
186
222
|
}
|
|
187
223
|
_test_contents("1iha", expected, selenomet=False)
|
|
188
224
|
|
|
189
225
|
|
|
190
|
-
def
|
|
191
|
-
|
|
226
|
+
def test_3ue7():
|
|
227
|
+
expected = {
|
|
228
|
+
"copies": 1,
|
|
229
|
+
"proteins": [
|
|
230
|
+
{
|
|
231
|
+
"sequence": "TTCCPSIVARSNFNACRLPGTPEALCATYTGCIIIPGATCPGDYAN",
|
|
232
|
+
"stoichiometry": 1,
|
|
233
|
+
"modifications": [
|
|
234
|
+
"T->DTH",
|
|
235
|
+
"C->DCY",
|
|
236
|
+
"P->DPR",
|
|
237
|
+
"S->DSN",
|
|
238
|
+
"I->DIL",
|
|
239
|
+
"V->DVA",
|
|
240
|
+
"A->DAL",
|
|
241
|
+
"R->DAR",
|
|
242
|
+
"N->DSG",
|
|
243
|
+
"F->DPN",
|
|
244
|
+
"L->DLE",
|
|
245
|
+
"E->DGL",
|
|
246
|
+
"Y->DTY",
|
|
247
|
+
"D->DAS",
|
|
248
|
+
],
|
|
249
|
+
},
|
|
250
|
+
{
|
|
251
|
+
"sequence": "TTCCPSIVAKSNFNACRLPGTPEALCATYTGCIIIPGATCPGDYAN",
|
|
252
|
+
"stoichiometry": 1,
|
|
253
|
+
"modifications": [],
|
|
254
|
+
},
|
|
255
|
+
],
|
|
256
|
+
"rnas": [],
|
|
257
|
+
"dnas": [],
|
|
258
|
+
"carbs": [],
|
|
259
|
+
"ligands": [],
|
|
260
|
+
"buffers": [],
|
|
261
|
+
}
|
|
262
|
+
_test_contents("3ue7", expected, selenomet=False)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def test_5vz8():
|
|
266
|
+
contents = AsuContents.from_pdbe("5vz8")
|
|
267
|
+
contents.monomer_codes()
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def test_polymer_weight():
|
|
271
|
+
polymer = Polymer("GG", polymer_type=PolymerType.PROTEIN)
|
|
272
|
+
monlib = MonLib(["GLY"], include_standard=True)
|
|
273
|
+
assert polymer.weight(monlib) == approx(132.12, abs=0.01)
|