modelcraft 5.0.3__py3-none-any.whl → 6.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. modelcraft/__init__.py +16 -31
  2. modelcraft/__main__.py +0 -1
  3. modelcraft/arguments.py +35 -7
  4. modelcraft/combine.py +22 -41
  5. modelcraft/contents.py +188 -164
  6. modelcraft/environ.py +0 -7
  7. modelcraft/geometry.py +39 -27
  8. modelcraft/job.py +6 -5
  9. modelcraft/jobs/acedrg.py +2 -0
  10. modelcraft/jobs/buccaneer.py +22 -4
  11. modelcraft/jobs/comit.py +2 -0
  12. modelcraft/jobs/ctruncate.py +3 -1
  13. modelcraft/jobs/emda.py +2 -0
  14. modelcraft/jobs/findwaters.py +2 -0
  15. modelcraft/jobs/freerflag.py +2 -0
  16. modelcraft/jobs/libg.py +2 -0
  17. modelcraft/jobs/molrep.py +2 -0
  18. modelcraft/jobs/nautilus.py +28 -14
  19. modelcraft/jobs/nucleofind.py +88 -0
  20. modelcraft/jobs/parrot.py +13 -2
  21. modelcraft/jobs/phasematch.py +2 -1
  22. modelcraft/jobs/refmac.py +3 -1
  23. modelcraft/jobs/servalcat.py +36 -2
  24. modelcraft/jobs/sheetbend.py +2 -0
  25. modelcraft/modelcraftem.py +49 -6
  26. modelcraft/modelcraftxray.py +90 -42
  27. modelcraft/monlib.py +55 -52
  28. modelcraft/pdbe.py +54 -0
  29. modelcraft/pipeline.py +1 -1
  30. modelcraft/prune.py +69 -0
  31. modelcraft/reflections.py +11 -1
  32. modelcraft/scripts/contents.py +5 -215
  33. modelcraft/scripts/copies.py +26 -17
  34. modelcraft/scripts/modelcraft.py +1 -0
  35. modelcraft/scripts/sidechains.py +141 -0
  36. modelcraft/scripts/validate.py +81 -0
  37. modelcraft/sequence.py +106 -0
  38. modelcraft/solvent.py +42 -113
  39. modelcraft/structure.py +64 -41
  40. modelcraft/tests/ccp4/__init__.py +7 -11
  41. modelcraft/tests/ccp4/test_acedrg.py +2 -0
  42. modelcraft/tests/ccp4/test_arguments.py +3 -0
  43. modelcraft/tests/ccp4/test_buccaneer.py +3 -2
  44. modelcraft/tests/ccp4/test_cell.py +4 -1
  45. modelcraft/tests/ccp4/test_comit.py +2 -0
  46. modelcraft/tests/ccp4/test_contents.py +99 -17
  47. modelcraft/tests/ccp4/test_copies.py +1 -0
  48. modelcraft/tests/ccp4/test_ctruncate.py +2 -0
  49. modelcraft/tests/ccp4/test_findwaters.py +2 -0
  50. modelcraft/tests/ccp4/test_freerflag.py +2 -0
  51. modelcraft/tests/ccp4/test_libg.py +1 -0
  52. modelcraft/tests/ccp4/test_molrep.py +3 -0
  53. modelcraft/tests/ccp4/test_monlib.py +75 -45
  54. modelcraft/tests/ccp4/test_nautilus.py +5 -3
  55. modelcraft/tests/ccp4/test_nucleofind.py +62 -0
  56. modelcraft/tests/ccp4/test_parrot.py +3 -1
  57. modelcraft/tests/ccp4/test_phasematch.py +2 -0
  58. modelcraft/tests/ccp4/test_prune.py +17 -0
  59. modelcraft/tests/ccp4/test_reflections.py +110 -1
  60. modelcraft/tests/ccp4/test_refmac.py +3 -0
  61. modelcraft/tests/{unittests/test_contents.py → ccp4/test_sequence.py} +5 -12
  62. modelcraft/tests/ccp4/test_servalcat.py +52 -0
  63. modelcraft/tests/ccp4/test_sheetbend.py +4 -3
  64. modelcraft/tests/ccp4/test_sidechains.py +25 -0
  65. modelcraft/tests/ccp4/test_solvent.py +12 -26
  66. modelcraft/tests/ccp4/test_structure.py +1 -0
  67. modelcraft/tests/ccp4/test_validation.py +19 -0
  68. modelcraft/tests/ccp4/test_xray.py +12 -6
  69. modelcraft/tests/ccpem/test_em.py +3 -0
  70. modelcraft/tests/ccpem/test_emda.py +2 -0
  71. modelcraft/tests/ccpem/test_refmac.py +1 -0
  72. modelcraft/tests/ccpem/test_servalcat.py +4 -3
  73. modelcraft/utils.py +16 -4
  74. modelcraft/validation.py +101 -0
  75. modelcraft-6.0.0.dist-info/METADATA +76 -0
  76. modelcraft-6.0.0.dist-info/RECORD +85 -0
  77. {modelcraft-5.0.3.dist-info → modelcraft-6.0.0.dist-info}/WHEEL +1 -1
  78. {modelcraft-5.0.3.dist-info → modelcraft-6.0.0.dist-info}/entry_points.txt +2 -0
  79. modelcraft/coot/prune.py +0 -1085
  80. modelcraft/coot/sidechains.py +0 -68
  81. modelcraft/jobs/acorn.py +0 -114
  82. modelcraft/jobs/coot.py +0 -104
  83. modelcraft/tests/ccp4/test_coot.py +0 -29
  84. modelcraft/tests/ccp4/test_geometry.py +0 -20
  85. modelcraft/tests/unittests/__init__.py +0 -0
  86. modelcraft/tests/unittests/test_reflections.py +0 -101
  87. modelcraft-5.0.3.dist-info/METADATA +0 -49
  88. modelcraft-5.0.3.dist-info/RECORD +0 -82
  89. modelcraft-5.0.3.dist-info/licenses/LICENSE +0 -504
  90. {modelcraft-5.0.3.dist-info → modelcraft-6.0.0.dist-info}/top_level.txt +0 -0
modelcraft/solvent.py CHANGED
@@ -1,140 +1,69 @@
1
1
  import collections
2
2
  import dataclasses
3
- import functools
4
3
  import math
5
- import re
6
- import gemmi
7
- from .contents import AsuContents, Polymer, PolymerType
8
- from .monlib import chemcomp
9
-
10
-
11
- def solvent_fraction(contents: AsuContents, mtz: gemmi.Mtz) -> float:
12
- volume = _contents_volume(contents)
13
- asu_volume = mtz.cell.volume / len(mtz.spacegroup.operations())
14
- copies = contents.copies or _guess_copies(contents, mtz)
15
- return 1 - copies * volume / asu_volume
16
-
17
-
18
- @functools.lru_cache(maxsize=None)
19
- def _library_weight(code: str) -> float:
20
- return sum(atom.el.weight for atom in chemcomp(code).atoms)
21
-
22
-
23
- @functools.lru_cache(maxsize=None)
24
- def _library_volume(code: str) -> float:
25
- return sum(18 for atom in chemcomp(code).atoms if not atom.is_hydrogen())
26
-
27
-
28
- def _polymer_weight(polymer: Polymer) -> float:
29
- codes = polymer.residue_codes(modified=False)
30
- total = sum(_library_weight(code) for code in codes)
31
- total -= _library_weight("HOH") * (len(codes) - 1)
32
- return total
33
-
34
-
35
- def _polymer_volume(polymer: Polymer) -> float:
36
- density = 1.35 if polymer.type == PolymerType.PROTEIN else 2.0
37
- return _polymer_weight(polymer) / (density * 0.602214)
38
-
39
-
40
- def _smiles_volume(smiles: str) -> float:
41
- atoms = re.findall(pattern="[A-Z][a-z]?", string=smiles)
42
- return 18 * len(atoms)
43
4
 
5
+ import gemmi
44
6
 
45
- def _contents_volume(contents: AsuContents) -> float:
46
- return sum(
47
- item.volume * item.stoichiometry for item in _volume_components(contents)
48
- )
7
+ from .contents import AsuContents
8
+ from .monlib import MonLib
49
9
 
50
10
 
51
- @dataclasses.dataclass
52
- class _VolumeComponent:
53
- description: str
54
- stoichiometry: int
55
- stoichiometry_assumed: bool
56
- volume: float
57
-
58
-
59
- def _volume_components(contents: AsuContents):
60
- for kind, polymers in (
61
- ("Protein", contents.proteins),
62
- ("RNA", contents.rnas),
63
- ("DNA", contents.dnas),
64
- ):
65
- for polymer in polymers:
66
- sequence = polymer.sequence
67
- description = f"{kind} with {len(sequence)} residues: "
68
- if len(sequence) > 9:
69
- description += f"{sequence[:3]}...{sequence[-3:]}"
70
- else:
71
- description += f"{sequence:9}"
72
- stoichiometry = polymer.stoichiometry or 1
73
- stoichiometry_assumed = polymer.stoichiometry is None
74
- volume = _polymer_volume(polymer)
75
- yield _VolumeComponent(
76
- description, stoichiometry, stoichiometry_assumed, volume
77
- )
78
- for carb in contents.carbs:
79
- description = "Carb:"
80
- stoichiometry = carb.stoichiometry or 1
81
- stoichiometry_assumed = carb.stoichiometry is None
82
- volume = 0
83
- length = 0
84
- for code, count in carb.codes.items():
85
- description += f" {count}x{code}"
86
- length += count
87
- if code in contents.smiles:
88
- volume += _smiles_volume(contents.smiles[code]) * count
89
- else:
90
- volume += _library_volume(code) * count
91
- volume -= _library_volume("HOH") * length
92
- yield _VolumeComponent(
93
- description, stoichiometry, stoichiometry_assumed, volume
94
- )
95
- for ligand in contents.ligands:
96
- description = "Ligand: " + ligand.code
97
- stoichiometry = ligand.stoichiometry or 1
98
- stoichiometry_assumed = ligand.stoichiometry is None
99
- if ligand.code in contents.smiles:
100
- volume = _smiles_volume(contents.smiles[ligand.code])
101
- else:
102
- volume = _library_volume(ligand.code)
103
- yield _VolumeComponent(
104
- description, stoichiometry, stoichiometry_assumed, volume
105
- )
11
+ def solvent_fraction(
12
+ contents: AsuContents,
13
+ cell: gemmi.UnitCell,
14
+ spacegroup: gemmi.SpaceGroup,
15
+ resolution: float,
16
+ monlib: MonLib = None,
17
+ ) -> float:
18
+ monlib = monlib or MonLib(contents.monomer_codes(), include_standard=True)
19
+ asu_volume = cell.volume / len(spacegroup.operations())
20
+ copies = contents.copies
21
+ if copies is None:
22
+ copies = _guess_copies(contents, cell, spacegroup, resolution, monlib)
23
+ return 1 - copies * contents.volume(monlib) / asu_volume
106
24
 
107
25
 
108
26
  @dataclasses.dataclass
109
- class _CopiesOption:
27
+ class CopiesOption:
110
28
  copies: int
111
29
  solvent: float
112
30
  probability: float
113
31
 
114
32
 
115
- def _copies_options(contents: AsuContents, mtz: gemmi.Mtz) -> list:
33
+ def copies_options(
34
+ contents: AsuContents,
35
+ cell: gemmi.UnitCell,
36
+ spacegroup: gemmi.SpaceGroup,
37
+ resolution: float,
38
+ monlib: MonLib,
39
+ ) -> list:
116
40
  options = []
117
41
  nucleic_acids = contents.rnas + contents.dnas
118
- mwp = sum(_polymer_weight(p) * (p.stoichiometry or 1) for p in contents.proteins)
119
- mwn = sum(_polymer_weight(n) * (n.stoichiometry or 1) for n in nucleic_acids)
120
- asu_volume = mtz.cell.volume / len(mtz.spacegroup.operations())
121
- contents_volume = _contents_volume(contents)
122
- resolution = mtz.resolution_high()
42
+ mwp = sum(p.weight(monlib) * (p.stoichiometry or 1) for p in contents.proteins)
43
+ mwn = sum(n.weight(monlib) * (n.stoichiometry or 1) for n in nucleic_acids)
44
+ asu_volume = cell.volume / len(spacegroup.operations())
45
+ contents_volume = contents.volume(monlib)
123
46
  total_probability = 0
124
47
  for copies in range(1, 60):
125
48
  solvent = 1 - copies * contents_volume / asu_volume
126
- probability = _probability(mwp, mwn, copies, asu_volume, resolution)
49
+ probability = _matthews_probability(mwp, mwn, copies, asu_volume, resolution)
127
50
  if solvent < 0:
128
51
  break
129
- options.append(_CopiesOption(copies, solvent, probability))
52
+ options.append(CopiesOption(copies, solvent, probability))
130
53
  total_probability += probability
131
54
  for option in options:
132
55
  option.probability /= total_probability
133
56
  return options
134
57
 
135
58
 
136
- def _guess_copies(contents: AsuContents, mtz: gemmi.Mtz) -> int:
137
- options = _copies_options(contents, mtz)
59
+ def _guess_copies(
60
+ contents: AsuContents,
61
+ cell: gemmi.UnitCell,
62
+ spacegroup: gemmi.SpaceGroup,
63
+ resolution: float,
64
+ monlib: MonLib,
65
+ ) -> int:
66
+ options = copies_options(contents, cell, spacegroup, resolution, monlib)
138
67
  if len(options) == 0:
139
68
  raise ValueError("Contents are too big to fit into the asymmetric unit")
140
69
  chosen = max(options, key=lambda option: option.probability)
@@ -165,15 +94,15 @@ _MATTHEWS_PROBABILITY_SETTINGS = [
165
94
  ]
166
95
 
167
96
 
168
- def _probability(
97
+ def _matthews_probability(
169
98
  protein_mw: float,
170
99
  nucleic_mw: float,
171
100
  copies: int,
172
101
  asu_volume: float,
173
102
  resolution: float,
174
103
  ) -> float:
175
- total_mw = protein_mw + nucleic_mw
176
- matt = asu_volume / (total_mw * copies)
104
+ total_mw = (protein_mw + nucleic_mw) * copies
105
+ matthews = asu_volume / total_mw
177
106
  if protein_mw > 0.9 * total_mw:
178
107
  for index in range(12):
179
108
  if resolution < _MATTHEWS_PROBABILITY_SETTINGS[index].rbin:
@@ -183,5 +112,5 @@ def _probability(
183
112
  else:
184
113
  index = 14
185
114
  _, p0, vmbar, w, a, s = _MATTHEWS_PROBABILITY_SETTINGS[index]
186
- z = (matt - vmbar) / w
115
+ z = (matthews - vmbar) / w
187
116
  return p0 + a * (math.exp(-math.exp(-z) - z * s + 1))
modelcraft/structure.py CHANGED
@@ -1,6 +1,8 @@
1
1
  from typing import Iterator
2
+
2
3
  import gemmi
3
- from .monlib import atom_ids, in_library, is_protein, is_nucleic
4
+
5
+ from .monlib import MonLib
4
6
 
5
7
 
6
8
  def read_structure(path: str) -> gemmi.Structure:
@@ -10,25 +12,11 @@ def read_structure(path: str) -> gemmi.Structure:
10
12
  # TODO: Currently altconfs appear in CIF auth_atom_id after sheetbend
11
13
  # TODO: Keep alternative conformations after problem is fixed
12
14
  structure.remove_alternative_conformations()
15
+ _remove_point_mutations(structure)
13
16
  _patch_names(structure)
14
17
  return structure
15
18
 
16
19
 
17
- def consecutive_residues(chain: gemmi.Chain):
18
- "Iterate through lists of residues with consecutive seqnums (first conformer only)"
19
- consecutive = []
20
- last_seqnum = None
21
- for residue in chain.first_conformer():
22
- if last_seqnum is None or residue.seqid.num == last_seqnum + 1:
23
- consecutive.append(residue)
24
- else:
25
- yield consecutive
26
- consecutive = [residue]
27
- last_seqnum = residue.seqid.num
28
- if len(consecutive) > 0:
29
- yield consecutive
30
-
31
-
32
20
  def contains_residue(structure: gemmi.Structure, name: str) -> bool:
33
21
  return any(residue.name == name for residue in _residues(structure))
34
22
 
@@ -42,15 +30,6 @@ def remove_residues(structure: gemmi.Structure, names) -> None:
42
30
  structure.remove_empty_chains()
43
31
 
44
32
 
45
- def remove_non_library_atoms(structure: gemmi.Structure) -> None:
46
- for residue in _residues(structure):
47
- if in_library(residue.name):
48
- for i, atom in reversed(list(enumerate(residue))):
49
- if atom.name not in atom_ids(residue.name):
50
- del residue[i]
51
- structure.remove_empty_chains()
52
-
53
-
54
33
  def remove_non_protein(structure: gemmi.Structure) -> None:
55
34
  for model in structure:
56
35
  for chain in model:
@@ -68,13 +47,15 @@ def write_mmcif(path: str, structure: gemmi.Structure) -> None:
68
47
 
69
48
 
70
49
  class ModelStats:
71
- def __init__(self, structure: gemmi.Structure):
50
+ def __init__(self, structure: gemmi.Structure, monlib: MonLib = None):
72
51
  self.residues: int = 0
73
52
  self.protein: int = 0
74
53
  self.nucleic: int = 0
75
54
  self.waters: int = 0
76
55
  self.dummy_atoms: int = 0
77
56
 
57
+ monlib = monlib or MonLib(structure[0].get_all_residue_names())
58
+
78
59
  for residue in _residues(structure):
79
60
  if residue.name == "HOH":
80
61
  self.waters += 1
@@ -82,24 +63,11 @@ class ModelStats:
82
63
  self.dummy_atoms += 1
83
64
  else:
84
65
  self.residues += 1
85
- if is_protein(residue.name):
66
+ if monlib.is_protein(residue.name):
86
67
  self.protein += 1
87
- if is_nucleic(residue.name):
68
+ if monlib.is_nucleic(residue.name):
88
69
  self.nucleic += 1
89
70
 
90
- def __eq__(self, other):
91
- if isinstance(other, ModelStats):
92
- return (
93
- self.residues == other.residues
94
- and self.waters == other.waters
95
- and self.dummy_atoms == other.dummy_atoms
96
- )
97
- return NotImplemented
98
-
99
- def __ne__(self, other):
100
- equal = self.__eq__(other)
101
- return NotImplemented if equal is not NotImplemented else not equal
102
-
103
71
 
104
72
  def _residues(structure: gemmi.Structure) -> Iterator[gemmi.Residue]:
105
73
  for model in structure:
@@ -108,6 +76,19 @@ def _residues(structure: gemmi.Structure) -> Iterator[gemmi.Residue]:
108
76
  yield residue
109
77
 
110
78
 
79
+ def _remove_point_mutations(structure: gemmi.Structure) -> None:
80
+ for model in structure:
81
+ to_remove = []
82
+ for chain in model:
83
+ for group in chain.whole().residue_groups():
84
+ for i in range(1, len(group)):
85
+ residue = group[i]
86
+ key = (chain.name, str(residue.seqid), residue.name)
87
+ to_remove.append(key)
88
+ for chain_name, residue_seqid, residue_name in to_remove:
89
+ del model[chain_name][residue_seqid][residue_name]
90
+
91
+
111
92
  def _patch_names(structure: gemmi.Structure) -> None:
112
93
  residue_patches = {"SUL": "SO4"}
113
94
  atom_patches = {("HOH", "O1"): "O"}
@@ -117,3 +98,45 @@ def _patch_names(structure: gemmi.Structure) -> None:
117
98
  for atom in residue:
118
99
  atom.name = atom.name.strip()
119
100
  atom.name = atom_patches.get((residue.name, atom.name), atom.name)
101
+
102
+
103
+ def _are_connected(
104
+ residue1: gemmi.Residue, residue2: gemmi.Residue, monlib: MonLib
105
+ ) -> bool:
106
+ if (
107
+ monlib.is_protein(residue1.name)
108
+ and monlib.is_protein(residue2.name)
109
+ and "C" in residue1
110
+ and "N" in residue2
111
+ ):
112
+ for atom1 in residue1["C"]:
113
+ for atom2 in residue2["N"]:
114
+ if atom1.pos.dist(atom2.pos) < 2.5:
115
+ return True
116
+ if (
117
+ monlib.is_nucleic(residue1.name)
118
+ and monlib.is_nucleic(residue2.name)
119
+ and "O3'" in residue1
120
+ and "P" in residue2
121
+ ):
122
+ for atom1 in residue1["O3'"]:
123
+ for atom2 in residue2["P"]:
124
+ if atom1.pos.dist(atom2.pos) < 2.5:
125
+ return True
126
+ return False
127
+
128
+
129
+ def remove_isolated_fragments(chain: gemmi.Chain, monlib: MonLib, max_length: int):
130
+ to_remove = []
131
+ fragment = []
132
+ for i, residue in enumerate(chain):
133
+ if i > 0 and _are_connected(chain[i - 1], residue, monlib):
134
+ fragment.append(i)
135
+ else:
136
+ if len(fragment) <= max_length:
137
+ to_remove.extend(fragment)
138
+ fragment = [i]
139
+ if len(fragment) <= max_length:
140
+ to_remove.extend(fragment)
141
+ for i in reversed(to_remove):
142
+ del chain[i]
@@ -1,9 +1,11 @@
1
1
  import functools
2
2
  import os
3
3
  import shutil
4
- import uuid
5
4
  import urllib.request
5
+ import uuid
6
+
6
7
  import gemmi
8
+
7
9
  from modelcraft.contents import AsuContents, Ligand, Polymer, PolymerType
8
10
  from modelcraft.jobs.refmac import Refmac
9
11
  from modelcraft.reflections import DataItem
@@ -16,7 +18,7 @@ def ccp4_path(*paths: str) -> str:
16
18
 
17
19
  def in_temp_directory(func):
18
20
  def wrapper():
19
- tmp_dir = "tmp%s" % uuid.uuid4()
21
+ tmp_dir = f"tmp{uuid.uuid4()}"
20
22
  os.mkdir(tmp_dir)
21
23
  os.chdir(tmp_dir)
22
24
  try:
@@ -66,7 +68,6 @@ def insulin_refmac():
66
68
 
67
69
  @functools.lru_cache(maxsize=None)
68
70
  def insulin_contents():
69
- contents = AsuContents()
70
71
  chain_a = Polymer(
71
72
  sequence="GIVEQCCASVCSLYQLENYCN",
72
73
  polymer_type=PolymerType.PROTEIN,
@@ -75,9 +76,8 @@ def insulin_contents():
75
76
  sequence="FVNQHLCGSHLVEALYLVCGERGFFYTPKA",
76
77
  polymer_type=PolymerType.PROTEIN,
77
78
  )
78
- contents.add_polymer(chain_a)
79
- contents.add_polymer(chain_b)
80
- return contents
79
+ ligand = Ligand("GOL")
80
+ return AsuContents(proteins=[chain_a, chain_b], ligands=[ligand])
81
81
 
82
82
 
83
83
  @functools.lru_cache(maxsize=None)
@@ -92,8 +92,4 @@ def pdb1rxf_contents():
92
92
  )
93
93
  protein = Polymer(sequence=sequence, polymer_type=PolymerType.PROTEIN)
94
94
  ligand = Ligand(code="FE")
95
- contents = AsuContents()
96
- contents.proteins.append(protein)
97
- contents.ligands.append(ligand)
98
- contents.copies = 1
99
- return contents
95
+ return AsuContents(copies=1, proteins=[protein], ligands=[ligand])
@@ -1,6 +1,8 @@
1
1
  import math
2
2
  import os
3
+
3
4
  import gemmi
5
+
4
6
  from modelcraft.jobs.acedrg import Acedrg
5
7
 
6
8
 
@@ -1,6 +1,9 @@
1
1
  import subprocess
2
+
2
3
  import pytest
4
+
3
5
  from modelcraft.arguments import parse
6
+
4
7
  from . import ccp4_path, in_temp_directory, pdbe_download
5
8
 
6
9
 
@@ -1,11 +1,12 @@
1
1
  from modelcraft.jobs.buccaneer import Buccaneer, _known_structure_ids
2
2
  from modelcraft.structure import ModelStats, read_structure
3
+
3
4
  from . import (
4
5
  in_temp_directory,
5
- insulin_fsigf,
6
+ insulin_contents,
6
7
  insulin_freer,
8
+ insulin_fsigf,
7
9
  insulin_refmac,
8
- insulin_contents,
9
10
  pdbe_download,
10
11
  )
11
12
 
@@ -1,13 +1,16 @@
1
1
  import urllib.request
2
+
2
3
  import gemmi
4
+
3
5
  from modelcraft.cell import max_distortion, remove_scale, update_cell
4
6
  from modelcraft.structure import read_structure
7
+
5
8
  from . import in_temp_directory
6
9
 
7
10
 
8
11
  @in_temp_directory
9
12
  def test_1ana():
10
- url = "https://files-versioned.wwpdb.org/pdb_versioned/data/entries/"
13
+ url = "https://ftp.ebi.ac.uk/pub/databases/pdb_versioned/data/entries/"
11
14
  url += "an/pdb_00001ana/pdb_00001ana_xyz_v1-2.cif.gz"
12
15
  urllib.request.urlretrieve(url, "1ana.cif.gz")
13
16
  structure = read_structure("1ana.cif.gz")
@@ -1,6 +1,8 @@
1
1
  import gemmi
2
+
2
3
  from modelcraft.jobs.comit import Comit
3
4
  from modelcraft.reflections import DataItem
5
+
4
6
  from . import ccp4_path
5
7
 
6
8
 
@@ -1,8 +1,11 @@
1
- from modelcraft.scripts.contents import _entry_contents, _smiles
1
+ from pytest import approx
2
+
3
+ from modelcraft.contents import AsuContents, Polymer, PolymerType
4
+ from modelcraft.monlib import MonLib
2
5
 
3
6
 
4
7
  def _test_contents(entry: str, expected_json: list, selenomet: bool):
5
- contents = _entry_contents(entry)
8
+ contents = AsuContents.from_pdbe(entry)
6
9
  assert contents.to_json() == expected_json
7
10
  assert contents.is_selenomet() == selenomet
8
11
  return contents
@@ -13,7 +16,10 @@ def test_1o6a():
13
16
  "copies": 2,
14
17
  "proteins": [
15
18
  {
16
- "sequence": "SETRKTEVPSDKLELLLDIPLKVTVELGRTRMTLKRVLEMIHGSIIELDKLTGEPVDILVNGKLIARGEVVVIDENFGVRITEIVSPKERLELLNE",
19
+ "sequence": (
20
+ "SETRKTEVPSDKLELLLDIPLKVTVELGRTRMTLKRVLEMIHGSIIELDKLTGEPVDILV"
21
+ "NGKLIARGEVVVIDENFGVRITEIVSPKERLELLNE"
22
+ ),
17
23
  "stoichiometry": 1,
18
24
  "modifications": ["M->MSE"],
19
25
  }
@@ -23,7 +29,6 @@ def test_1o6a():
23
29
  "carbs": [],
24
30
  "ligands": [],
25
31
  "buffers": [],
26
- "smiles": {},
27
32
  }
28
33
  _test_contents("1o6a", expected, selenomet=True)
29
34
 
@@ -34,7 +39,11 @@ def test_4gxy():
34
39
  "proteins": [],
35
40
  "rnas": [
36
41
  {
37
- "sequence": "GGCGGCAGGUGCUCCCGACCCUGCGGUCGGGAGUUAAAAGGGAAGCCGGUGCAAGUCCGGCACGGUCCCGCCACUGUGACGGGGAGUCGCCCCUCGGGAUGUGCCACUGGCCCGAAGGCCGGGAAGGCGGAGGGGCGGCGAGGAUCCGGAGUCAGGAAACCUGCCUGCCGUC",
42
+ "sequence": (
43
+ "GGCGGCAGGUGCUCCCGACCCUGCGGUCGGGAGUUAAAAGGGAAGCCGGUGCAAGUCCGG"
44
+ "CACGGUCCCGCCACUGUGACGGGGAGUCGCCCCUCGGGAUGUGCCACUGGCCCGAAGGCC"
45
+ "GGGAAGGCGGAGGGGCGGCGAGGAUCCGGAGUCAGGAAACCUGCCUGCCGUC"
46
+ ),
38
47
  "stoichiometry": 1,
39
48
  "modifications": ["1->GTP", "172->CCC"],
40
49
  }
@@ -46,7 +55,6 @@ def test_4gxy():
46
55
  {"code": "IRI", "stoichiometry": 7},
47
56
  ],
48
57
  "buffers": ["MG"],
49
- "smiles": {},
50
58
  }
51
59
  _test_contents("4gxy", expected, selenomet=False)
52
60
 
@@ -56,7 +64,24 @@ def test_6as7():
56
64
  "copies": 1,
57
65
  "proteins": [
58
66
  {
59
- "sequence": "DEEQVFHFYWLDAYEDQYNQPGVVFLFGKVWIESAETHVSCCVMVKNIERTLYFLPREMKIDLNTGKETGTPISMKDVYEEFDEKIATKYKIMKFKSKPVEKNYAFEIPDVPEKSEYLEVKYSAEMPQLPQDLKGETFSHVFGTNTSSLELFLMNRKIKGPCWLEVKSPQLLNQPVSWCKAEAMALKPDLVNVIKDVSPPPLVVMAFSMKTMQNAKNHQNEIIAMAALVHHSFALDKAAPKPPFQSHFCVVSKPKDCIFPYAFKEVIEKKNVKVEVAATERTLLGFFLAKVHKIDPDIIVGHNIYGFELEVLLQRINVCKAPHWSKIGRLKRSNMPKLGGRSGFGERNATCGRMICDVEISAKELIRCKSYHLSELVQQILKTERVVIPMENIQNMYSESSQLLYLLEHTWKDAKFILQIMCELNVLPLALQITNIAGNIMSRTLMGGRSERNEFLLLHAFYENNYIVPDKQIFRKPQQKLGDEDEEIDGDTNKYKKGRKKAAYAGGLVLDPKVGFYDKFILLLDFNSLYPSIIQEFNICFTTVQRVASEAQKVTEDGEQEQIPELPDPSLEMGILPREIRKLVERRKQVKQLMKQQDLNPDLILQYDIRQKALKLTANSMYGCLGFSYSRFYAKPLAALVTYKGREILMHTKEMVQKMNLEVIYGDTDSIMINTNSTNLEEVFKLGNKVKSEVNKLYKLLEIDIDGVFKSLLLLKKKKYAALVVEPTSDGNYVTKQELKGLDIVRRDWCDLAKDTGNFVIGQILSDQSRDTIVENIQKRLIEIGENVLNGSVPVSQFEINKALTKDPQDYPDKKSLPHVHVALWINSQGGRKVKAGDTVSYVICQDGSNLTASQRAYAPEQLQKQDNLTIDTQYYLAQQIHPVVARICEPIDGIDAVLIATWLGLDPTQFRVHHYHKDEEN",
67
+ "sequence": (
68
+ "DEEQVFHFYWLDAYEDQYNQPGVVFLFGKVWIESAETHVSCCVMVKNIERTLYFLPREMK"
69
+ "IDLNTGKETGTPISMKDVYEEFDEKIATKYKIMKFKSKPVEKNYAFEIPDVPEKSEYLEV"
70
+ "KYSAEMPQLPQDLKGETFSHVFGTNTSSLELFLMNRKIKGPCWLEVKSPQLLNQPVSWCK"
71
+ "AEAMALKPDLVNVIKDVSPPPLVVMAFSMKTMQNAKNHQNEIIAMAALVHHSFALDKAAPK"
72
+ "PPFQSHFCVVSKPKDCIFPYAFKEVIEKKNVKVEVAATERTLLGFFLAKVHKIDPDIIVGH"
73
+ "NIYGFELEVLLQRINVCKAPHWSKIGRLKRSNMPKLGGRSGFGERNATCGRMICDVEISAK"
74
+ "ELIRCKSYHLSELVQQILKTERVVIPMENIQNMYSESSQLLYLLEHTWKDAKFILQIMCEL"
75
+ "NVLPLALQITNIAGNIMSRTLMGGRSERNEFLLLHAFYENNYIVPDKQIFRKPQQKLGDED"
76
+ "EEIDGDTNKYKKGRKKAAYAGGLVLDPKVGFYDKFILLLDFNSLYPSIIQEFNICFTTVQR"
77
+ "VASEAQKVTEDGEQEQIPELPDPSLEMGILPREIRKLVERRKQVKQLMKQQDLNPDLILQY"
78
+ "DIRQKALKLTANSMYGCLGFSYSRFYAKPLAALVTYKGREILMHTKEMVQKMNLEVIYGDT"
79
+ "DSIMINTNSTNLEEVFKLGNKVKSEVNKLYKLLEIDIDGVFKSLLLLKKKKYAALVVEPTS"
80
+ "DGNYVTKQELKGLDIVRRDWCDLAKDTGNFVIGQILSDQSRDTIVENIQKRLIEIGENVLN"
81
+ "GSVPVSQFEINKALTKDPQDYPDKKSLPHVHVALWINSQGGRKVKAGDTVSYVICQDGSNL"
82
+ "TASQRAYAPEQLQKQDNLTIDTQYYLAQQIHPVVARICEPIDGIDAVLIATWLGLDPTQFR"
83
+ "VHHYHKDEEN"
84
+ ),
60
85
  "stoichiometry": 1,
61
86
  "modifications": [],
62
87
  }
@@ -77,7 +102,6 @@ def test_6as7():
77
102
  "carbs": [],
78
103
  "ligands": [{"code": "DCP", "stoichiometry": 1}],
79
104
  "buffers": ["MG", "CO"],
80
- "smiles": {},
81
105
  }
82
106
  _test_contents("6as7", expected, selenomet=False)
83
107
 
@@ -87,7 +111,17 @@ def test_4aqd():
87
111
  "copies": 1,
88
112
  "proteins": [
89
113
  {
90
- "sequence": "RSEDDIIIATKNGKVRGMNLTVFGGTVTAFLGIPYAQPPLGRLRFKKPQSLTKWSDIWNATKYANSCCQNIDQSFPGFHGSEMWNPNTDLSEDCLYLNVWIPAPKPKNATVLIWIYGGGFQTGTSSLHVYDGKFLARVERVIVVSMNYRVGALGFLALPGNPEAPGNMGLFDQQLALQWVQKNIAAFGGNPKSVTLFGESAGAASVSLHLLSPGSHSLFTRAILQSGSFNAPWAVTSLYEARNRTLNLAKLTGCSRENETEIIKCLRNKDPQEILLNEAFVVPYGTPLSVNFGPTVDGDFLTDMPDILLELGQFKKTQILVGVNKDEGTAFLVYGAPGFSKDNNSIITRKEFQEGLKIFFPGVSEFGKESILFHYTDWVDDQRPENYREALGDVVGDYNFICPALEFTKKFSEWGNNAFFYYFEHRSSKLPWPEWMGVMHGYEIEFVFGLPLERRDNYTKAEEILSRSIVKRWANFAKYGNPNETQNNSTSWPVFKSTEQKYLTLNTESTRIMTKLRAQQCRFWTSFFPKV",
114
+ "sequence": (
115
+ "RSEDDIIIATKNGKVRGMNLTVFGGTVTAFLGIPYAQPPLGRLRFKKPQSLTKWSDIWNA"
116
+ "TKYANSCCQNIDQSFPGFHGSEMWNPNTDLSEDCLYLNVWIPAPKPKNATVLIWIYGGGF"
117
+ "QTGTSSLHVYDGKFLARVERVIVVSMNYRVGALGFLALPGNPEAPGNMGLFDQQLALQWV"
118
+ "QKNIAAFGGNPKSVTLFGESAGAASVSLHLLSPGSHSLFTRAILQSGSFNAPWAVTSLYE"
119
+ "ARNRTLNLAKLTGCSRENETEIIKCLRNKDPQEILLNEAFVVPYGTPLSVNFGPTVDGDF"
120
+ "LTDMPDILLELGQFKKTQILVGVNKDEGTAFLVYGAPGFSKDNNSIITRKEFQEGLKIFF"
121
+ "PGVSEFGKESILFHYTDWVDDQRPENYREALGDVVGDYNFICPALEFTKKFSEWGNNAFF"
122
+ "YYFEHRSSKLPWPEWMGVMHGYEIEFVFGLPLERRDNYTKAEEILSRSIVKRWANFAKYG"
123
+ "NPNETQNNSTSWPVFKSTEQKYLTLNTESTRIMTKLRAQQCRFWTSFFPKV"
124
+ ),
91
125
  "stoichiometry": 2,
92
126
  "modifications": [],
93
127
  }
@@ -105,8 +139,7 @@ def test_4aqd():
105
139
  {"code": "PG4", "stoichiometry": 2},
106
140
  {"code": "PEG", "stoichiometry": 2},
107
141
  ],
108
- "buffers": ["EDO", "CL", "GLY"],
109
- "smiles": {},
142
+ "buffers": ["EDO", "UNX", "CL", "GLY"],
110
143
  }
111
144
  _test_contents("4aqd", expected, selenomet=False)
112
145
 
@@ -116,7 +149,13 @@ def test_1vjr():
116
149
  "copies": 1,
117
150
  "proteins": [
118
151
  {
119
- "sequence": "MGSDKIHHHHHHVLDKIELFILDMDGTFYLDDSLLPGSLEFLETLKEKNKRFVFFTNNSSLGAQDYVRKLRNMGVDVPDDAVVTSGEITAEHMLKRFGRCRIFLLGTPQLKKVFEAYGHVIDEENPDFVVLGFDKTLTYERLKKACILLRKGKFYIATHPDINCPSKEGPVPDAGSIMAAIEASTGRKPDLIAGKPNPLVVDVISEKFGVPKERMAMVGDRLYTDVKLGKNAGIVSILVLTGETTPEDLERAETKPDFVFKNLGELAKAVQ",
152
+ "sequence": (
153
+ "MGSDKIHHHHHHVLDKIELFILDMDGTFYLDDSLLPGSLEFLETLKEKNKRFVFFTNNSS"
154
+ "LGAQDYVRKLRNMGVDVPDDAVVTSGEITAEHMLKRFGRCRIFLLGTPQLKKVFEAYGHV"
155
+ "IDEENPDFVVLGFDKTLTYERLKKACILLRKGKFYIATHPDINCPSKEGPVPDAGSIMAA"
156
+ "IEASTGRKPDLIAGKPNPLVVDVISEKFGVPKERMAMVGDRLYTDVKLGKNAGIVSILVL"
157
+ "TGETTPEDLERAETKPDFVFKNLGELAKAVQ"
158
+ ),
120
159
  "stoichiometry": 1,
121
160
  "modifications": ["M->MSE"],
122
161
  }
@@ -126,7 +165,6 @@ def test_1vjr():
126
165
  "carbs": [],
127
166
  "ligands": [],
128
167
  "buffers": ["NI", "CL"],
129
- "smiles": {},
130
168
  }
131
169
  _test_contents("1vjr", expected, selenomet=True)
132
170
 
@@ -157,7 +195,6 @@ def test_1cag():
157
195
  "carbs": [],
158
196
  "ligands": [],
159
197
  "buffers": ["ACY"],
160
- "smiles": {},
161
198
  }
162
199
  contents = _test_contents("1cag", expected, selenomet=False)
163
200
  polymer = contents.proteins[0]
@@ -182,10 +219,55 @@ def test_1iha():
182
219
  "carbs": [],
183
220
  "ligands": [{"code": "RHD", "stoichiometry": 1}],
184
221
  "buffers": ["CL"],
185
- "smiles": {},
186
222
  }
187
223
  _test_contents("1iha", expected, selenomet=False)
188
224
 
189
225
 
190
- def test_0pr_smiles():
191
- assert _smiles("0PR") == "Cc1c(c(c(cn1)COP(=O)(O)O)CN[C@@H](Cc2ccc(cc2)O)C(=O)O)O"
226
+ def test_3ue7():
227
+ expected = {
228
+ "copies": 1,
229
+ "proteins": [
230
+ {
231
+ "sequence": "TTCCPSIVARSNFNACRLPGTPEALCATYTGCIIIPGATCPGDYAN",
232
+ "stoichiometry": 1,
233
+ "modifications": [
234
+ "T->DTH",
235
+ "C->DCY",
236
+ "P->DPR",
237
+ "S->DSN",
238
+ "I->DIL",
239
+ "V->DVA",
240
+ "A->DAL",
241
+ "R->DAR",
242
+ "N->DSG",
243
+ "F->DPN",
244
+ "L->DLE",
245
+ "E->DGL",
246
+ "Y->DTY",
247
+ "D->DAS",
248
+ ],
249
+ },
250
+ {
251
+ "sequence": "TTCCPSIVAKSNFNACRLPGTPEALCATYTGCIIIPGATCPGDYAN",
252
+ "stoichiometry": 1,
253
+ "modifications": [],
254
+ },
255
+ ],
256
+ "rnas": [],
257
+ "dnas": [],
258
+ "carbs": [],
259
+ "ligands": [],
260
+ "buffers": [],
261
+ }
262
+ _test_contents("3ue7", expected, selenomet=False)
263
+
264
+
265
+ def test_5vz8():
266
+ contents = AsuContents.from_pdbe("5vz8")
267
+ contents.monomer_codes()
268
+
269
+
270
+ def test_polymer_weight():
271
+ polymer = Polymer("GG", polymer_type=PolymerType.PROTEIN)
272
+ monlib = MonLib(["GLY"], include_standard=True)
273
+ assert polymer.weight(monlib) == approx(132.12, abs=0.01)