modelcraft 5.0.3__py3-none-any.whl → 6.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. modelcraft/__init__.py +16 -31
  2. modelcraft/__main__.py +0 -1
  3. modelcraft/arguments.py +35 -7
  4. modelcraft/combine.py +22 -41
  5. modelcraft/contents.py +188 -164
  6. modelcraft/environ.py +0 -7
  7. modelcraft/geometry.py +39 -27
  8. modelcraft/job.py +6 -5
  9. modelcraft/jobs/acedrg.py +2 -0
  10. modelcraft/jobs/buccaneer.py +22 -4
  11. modelcraft/jobs/comit.py +2 -0
  12. modelcraft/jobs/ctruncate.py +3 -1
  13. modelcraft/jobs/emda.py +2 -0
  14. modelcraft/jobs/findwaters.py +2 -0
  15. modelcraft/jobs/freerflag.py +2 -0
  16. modelcraft/jobs/libg.py +2 -0
  17. modelcraft/jobs/molrep.py +2 -0
  18. modelcraft/jobs/nautilus.py +28 -14
  19. modelcraft/jobs/nucleofind.py +88 -0
  20. modelcraft/jobs/parrot.py +13 -2
  21. modelcraft/jobs/phasematch.py +2 -1
  22. modelcraft/jobs/refmac.py +3 -1
  23. modelcraft/jobs/servalcat.py +36 -2
  24. modelcraft/jobs/sheetbend.py +2 -0
  25. modelcraft/modelcraftem.py +49 -6
  26. modelcraft/modelcraftxray.py +90 -42
  27. modelcraft/monlib.py +55 -52
  28. modelcraft/pdbe.py +54 -0
  29. modelcraft/pipeline.py +1 -1
  30. modelcraft/prune.py +69 -0
  31. modelcraft/reflections.py +11 -1
  32. modelcraft/scripts/contents.py +5 -215
  33. modelcraft/scripts/copies.py +26 -17
  34. modelcraft/scripts/modelcraft.py +1 -0
  35. modelcraft/scripts/sidechains.py +141 -0
  36. modelcraft/scripts/validate.py +81 -0
  37. modelcraft/sequence.py +106 -0
  38. modelcraft/solvent.py +42 -113
  39. modelcraft/structure.py +64 -41
  40. modelcraft/tests/ccp4/__init__.py +7 -11
  41. modelcraft/tests/ccp4/test_acedrg.py +2 -0
  42. modelcraft/tests/ccp4/test_arguments.py +3 -0
  43. modelcraft/tests/ccp4/test_buccaneer.py +3 -2
  44. modelcraft/tests/ccp4/test_cell.py +4 -1
  45. modelcraft/tests/ccp4/test_comit.py +2 -0
  46. modelcraft/tests/ccp4/test_contents.py +99 -17
  47. modelcraft/tests/ccp4/test_copies.py +1 -0
  48. modelcraft/tests/ccp4/test_ctruncate.py +2 -0
  49. modelcraft/tests/ccp4/test_findwaters.py +2 -0
  50. modelcraft/tests/ccp4/test_freerflag.py +2 -0
  51. modelcraft/tests/ccp4/test_libg.py +1 -0
  52. modelcraft/tests/ccp4/test_molrep.py +3 -0
  53. modelcraft/tests/ccp4/test_monlib.py +75 -45
  54. modelcraft/tests/ccp4/test_nautilus.py +5 -3
  55. modelcraft/tests/ccp4/test_nucleofind.py +62 -0
  56. modelcraft/tests/ccp4/test_parrot.py +3 -1
  57. modelcraft/tests/ccp4/test_phasematch.py +2 -0
  58. modelcraft/tests/ccp4/test_prune.py +17 -0
  59. modelcraft/tests/ccp4/test_reflections.py +110 -1
  60. modelcraft/tests/ccp4/test_refmac.py +3 -0
  61. modelcraft/tests/{unittests/test_contents.py → ccp4/test_sequence.py} +5 -12
  62. modelcraft/tests/ccp4/test_servalcat.py +52 -0
  63. modelcraft/tests/ccp4/test_sheetbend.py +4 -3
  64. modelcraft/tests/ccp4/test_sidechains.py +25 -0
  65. modelcraft/tests/ccp4/test_solvent.py +12 -26
  66. modelcraft/tests/ccp4/test_structure.py +1 -0
  67. modelcraft/tests/ccp4/test_validation.py +19 -0
  68. modelcraft/tests/ccp4/test_xray.py +12 -6
  69. modelcraft/tests/ccpem/test_em.py +3 -0
  70. modelcraft/tests/ccpem/test_emda.py +2 -0
  71. modelcraft/tests/ccpem/test_refmac.py +1 -0
  72. modelcraft/tests/ccpem/test_servalcat.py +4 -3
  73. modelcraft/utils.py +16 -4
  74. modelcraft/validation.py +101 -0
  75. modelcraft-6.0.0.dist-info/METADATA +76 -0
  76. modelcraft-6.0.0.dist-info/RECORD +85 -0
  77. {modelcraft-5.0.3.dist-info → modelcraft-6.0.0.dist-info}/WHEEL +1 -1
  78. {modelcraft-5.0.3.dist-info → modelcraft-6.0.0.dist-info}/entry_points.txt +2 -0
  79. modelcraft/coot/prune.py +0 -1085
  80. modelcraft/coot/sidechains.py +0 -68
  81. modelcraft/jobs/acorn.py +0 -114
  82. modelcraft/jobs/coot.py +0 -104
  83. modelcraft/tests/ccp4/test_coot.py +0 -29
  84. modelcraft/tests/ccp4/test_geometry.py +0 -20
  85. modelcraft/tests/unittests/__init__.py +0 -0
  86. modelcraft/tests/unittests/test_reflections.py +0 -101
  87. modelcraft-5.0.3.dist-info/METADATA +0 -49
  88. modelcraft-5.0.3.dist-info/RECORD +0 -82
  89. modelcraft-5.0.3.dist-info/licenses/LICENSE +0 -504
  90. {modelcraft-5.0.3.dist-info → modelcraft-6.0.0.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,28 @@
1
1
  import os
2
+ import shutil
2
3
  import time
4
+ from pathlib import Path
5
+ from tempfile import TemporaryDirectory
6
+
3
7
  import gemmi
8
+
4
9
  from . import __version__
10
+ from .cell import max_distortion, remove_scale, update_cell
11
+ from .combine import combine_results
5
12
  from .jobs.buccaneer import Buccaneer
6
- from .jobs.coot import FixSideChains, Prune
7
13
  from .jobs.ctruncate import CTruncate
8
14
  from .jobs.findwaters import FindWaters
9
15
  from .jobs.nautilus import Nautilus
16
+ from .jobs.nucleofind import NucleoFindBuild, NucleoFindPredict
10
17
  from .jobs.parrot import Parrot
11
18
  from .jobs.refmac import Refmac
12
19
  from .jobs.sheetbend import Sheetbend
13
- from .cell import max_distortion, remove_scale, update_cell
14
- from .combine import combine_results
20
+ from .monlib import MonLib
15
21
  from .pipeline import Pipeline
22
+ from .prune import prune
16
23
  from .reflections import DataItem, write_mtz
24
+ from .scripts.sidechains import any_missing_side_chains
25
+ from .scripts.sidechains import main as fix_side_chains
17
26
  from .structure import ModelStats, remove_residues, write_mmcif
18
27
 
19
28
 
@@ -38,6 +47,10 @@ class ModelCraftXray(Pipeline):
38
47
  self.last_refmac = None
39
48
  self.output_refmac = None
40
49
  self.cycles_without_improvement = 0
50
+ resnames = self.args.contents.monomer_codes()
51
+ if self.args.model:
52
+ resnames |= set(self.args.model[0].get_all_residue_names())
53
+ self.monlib = MonLib(resnames, self.args.restraints, include_standard=True)
41
54
 
42
55
  @property
43
56
  def resolution(self):
@@ -52,23 +65,23 @@ class ModelCraftXray(Pipeline):
52
65
  if self.args.model is not None:
53
66
  self._refine_input_model()
54
67
  for self.cycle in range(1, self.args.cycles + 1):
55
- print("\n## Cycle %d\n" % self.cycle, flush=True)
68
+ print(f"\n## Cycle {self.cycle}\n", flush=True)
56
69
  self.run_cycle()
57
70
  self.process_cycle_output(self.last_refmac)
58
71
  if self.cycles_without_improvement == self.args.auto_stop_cycles > 0:
59
72
  break
60
73
  if (
61
74
  not self.args.basic
62
- and self.output_refmac.rwork < 0.3
63
- and self.resolution < 2.5
75
+ and not self.args.disable_side_chain_fixing
76
+ and any_missing_side_chains(self.output_refmac.structure)
64
77
  ):
65
- print("\n## Finalisations\n", flush=True)
78
+ print("\n## Adding missing side chains\n", flush=True)
66
79
  self.cycle += 1
67
80
  self.update_current_from_refmac_result(self.output_refmac)
68
81
  self.fixsidechains()
69
82
  self.process_cycle_output(self.last_refmac)
70
83
  print("\n## Best Model:", flush=True)
71
- _print_refmac_result(self.output_refmac)
84
+ self._print_refmac_result(self.output_refmac)
72
85
  self._remove_current_files()
73
86
  self.terminate(reason="Normal")
74
87
 
@@ -101,13 +114,13 @@ class ModelCraftXray(Pipeline):
101
114
  self.args.model = self.current_structure
102
115
  if self.args.phases is not None:
103
116
  self.current_phases = self.args.phases
104
- _print_refmac_result(self.last_refmac)
117
+ self._print_refmac_result(self.last_refmac)
105
118
 
106
119
  def run_cycle(self):
107
120
  if self.args.basic:
108
121
  if self.cycle == 1:
109
122
  self.parrot()
110
- self.run_buccaneer_and_nautilus()
123
+ self.run_model_building()
111
124
  else:
112
125
  if self.cycle > 1 and self.resolution < 2.3:
113
126
  self.prune()
@@ -116,19 +129,25 @@ class ModelCraftXray(Pipeline):
116
129
  if self.cycle > 1 or self.args.phases is None:
117
130
  self.findwaters(dummy=True)
118
131
  remove_residues(structure=self.current_structure, names={"HOH", "DUM"})
119
- self.run_buccaneer_and_nautilus()
132
+ self.run_model_building()
120
133
  self.prune(chains_only=True)
121
134
  self.findwaters()
122
135
 
123
- def run_buccaneer_and_nautilus(self):
136
+ def run_model_building(self):
124
137
  buccaneer = self.buccaneer()
138
+ if shutil.which("nucleofind"):
139
+ try:
140
+ return self.nucleofind(buccaneer)
141
+ except FileNotFoundError:
142
+ pass
125
143
  nautilus = self.nautilus()
126
144
  if buccaneer is None and nautilus is None:
127
145
  self.terminate(reason="No residues built")
128
146
  if buccaneer is None or nautilus is None:
129
147
  self.update_current_from_refmac_result(buccaneer or nautilus)
130
148
  else:
131
- combined = self.run_refmac(combine_results(buccaneer, nautilus), cycles=5)
149
+ combined_structure = combine_results(buccaneer, nautilus, self.monlib)
150
+ combined = self.run_refmac(combined_structure, cycles=5)
132
151
  best = min((buccaneer, nautilus, combined), key=lambda result: result.rfree)
133
152
  self.update_current_from_refmac_result(best)
134
153
 
@@ -149,11 +168,32 @@ class ModelCraftXray(Pipeline):
149
168
  cycles=3 if self.cycle == 1 else 2,
150
169
  threads=self.args.threads,
151
170
  ).run(self)
152
- if result.structure is None or ModelStats(result.structure).residues == 0:
171
+ if (
172
+ result.structure is None
173
+ or ModelStats(result.structure, self.monlib).residues == 0
174
+ ):
153
175
  return None
154
176
  write_mmcif(self.path("current.cif"), result.structure)
155
177
  return self.run_refmac(result.structure, cycles=10)
156
178
 
179
+ def nucleofind(self, refmac):
180
+ if not (self.args.contents.rnas or self.args.contents.dnas):
181
+ return None
182
+ prediction = NucleoFindPredict(fphi=refmac.fphi_best).run(self)
183
+ result = NucleoFindBuild(
184
+ contents=self.args.contents,
185
+ fphi=refmac.fphi_best,
186
+ structure=refmac.structure,
187
+ prediction=prediction,
188
+ ).run(self)
189
+ if (
190
+ result.structure is None
191
+ or ModelStats(result.structure, self.monlib).residues == 0
192
+ ):
193
+ return None
194
+ write_mmcif(self.path("current.cif"), result.structure)
195
+ self.refmac(result.structure, cycles=10, auto_accept=True)
196
+
157
197
  def nautilus(self):
158
198
  if not (self.args.contents.rnas or self.args.contents.dnas):
159
199
  return None
@@ -165,7 +205,10 @@ class ModelCraftXray(Pipeline):
165
205
  freer=self.args.freer,
166
206
  structure=self.current_structure,
167
207
  ).run(self)
168
- if result.structure is None or ModelStats(result.structure).residues == 0:
208
+ if (
209
+ result.structure is None
210
+ or ModelStats(result.structure, self.monlib).residues == 0
211
+ ):
169
212
  return None
170
213
  write_mmcif(self.path("current.cif"), result.structure)
171
214
  return self.run_refmac(result.structure, cycles=10)
@@ -181,7 +224,7 @@ class ModelCraftXray(Pipeline):
181
224
  write_mmcif(self.path("current.cif"), self.current_structure)
182
225
 
183
226
  def run_refmac(self, structure: gemmi.Structure, cycles: int):
184
- if ModelStats(structure).residues == 0:
227
+ if ModelStats(structure, self.monlib).residues == 0:
185
228
  self.terminate(reason="No residues to refine")
186
229
  use_phases = self.args.unbiased and (
187
230
  self.output_refmac is None or self.output_refmac.rwork > 0.35
@@ -216,6 +259,7 @@ class ModelCraftXray(Pipeline):
216
259
  phases=self.current_phases,
217
260
  fphi=self.current_fphi_best,
218
261
  structure=self.current_structure,
262
+ monlib=self.monlib,
219
263
  ).run(self)
220
264
  self.current_phases = result.abcd
221
265
  self.current_fphi_best = result.fphi
@@ -224,25 +268,30 @@ class ModelCraftXray(Pipeline):
224
268
  def prune(self, chains_only=False):
225
269
  if self.args.disable_pruning or not self.args.contents.proteins:
226
270
  return
227
- result = Prune(
271
+ pruned = prune(
228
272
  structure=self.current_structure,
229
273
  fphi_best=self.current_fphi_best,
230
274
  fphi_diff=self.current_fphi_diff,
231
- chains_only=chains_only,
232
- ).run(self)
233
- write_mmcif(self.path("current.cif"), result.structure)
234
- self.refmac(result.structure, cycles=5, auto_accept=True)
275
+ fphi_calc=self.current_fphi_calc,
276
+ residues=not chains_only,
277
+ monlib=self.monlib,
278
+ )
279
+ if pruned:
280
+ write_mmcif(self.path("current.cif"), pruned)
281
+ self.refmac(pruned, cycles=5, auto_accept=True)
235
282
 
236
283
  def fixsidechains(self):
237
- if self.args.disable_side_chain_fixing or not self.args.contents.proteins:
238
- return
239
- result = FixSideChains(
240
- structure=self.current_structure,
241
- fphi_best=self.current_fphi_best,
242
- fphi_diff=self.current_fphi_diff,
243
- ).run(self)
244
- write_mmcif(self.path("current.cif"), result.structure)
245
- self.refmac(result.structure, cycles=5, auto_accept=False)
284
+ with TemporaryDirectory() as tempdir:
285
+ xyzin = str(Path(tempdir, "input.cif"))
286
+ hklin = str(Path(tempdir, "input.mtz"))
287
+ xyzout = str(Path(tempdir, "output.cif"))
288
+ write_mmcif(xyzin, self.current_structure)
289
+ write_mtz(hklin, [self.current_fphi_best], ["FWT,PHWT"])
290
+ fix_side_chains([xyzin, hklin, xyzout])
291
+ if os.path.exists(xyzout):
292
+ structure = gemmi.read_structure(xyzout)
293
+ write_mmcif(self.path("current.cif"), structure)
294
+ self.refmac(structure, cycles=5, auto_accept=False)
246
295
 
247
296
  def findwaters(self, dummy=False):
248
297
  if dummy and self.args.disable_dummy_atoms:
@@ -258,8 +307,8 @@ class ModelCraftXray(Pipeline):
258
307
  self.refmac(result.structure, cycles=10, auto_accept=False)
259
308
 
260
309
  def process_cycle_output(self, result):
261
- _print_refmac_result(result)
262
- model_stats = ModelStats(result.structure)
310
+ self._print_refmac_result(result)
311
+ model_stats = ModelStats(result.structure, self.monlib)
263
312
  stats = {
264
313
  "cycle": self.cycle,
265
314
  "residues": model_stats.residues,
@@ -308,13 +357,12 @@ class ModelCraftXray(Pipeline):
308
357
  except FileNotFoundError:
309
358
  pass
310
359
 
311
-
312
- def _print_refmac_result(result):
313
- model_stats = ModelStats(result.structure)
314
- print("")
315
- print(f"Residues: {model_stats.residues:6d}")
316
- print(f"Protein: {model_stats.protein:6d}")
317
- print(f"Nucleic: {model_stats.nucleic:6d}")
318
- print(f"Waters: {model_stats.waters:6d}")
319
- print(f"R-work: {result.rwork:6.4f}")
320
- print(f"R-free: {result.rfree:6.4f}", flush=True)
360
+ def _print_refmac_result(self, result):
361
+ model_stats = ModelStats(result.structure, self.monlib)
362
+ print("")
363
+ print(f"Residues: {model_stats.residues:6d}")
364
+ print(f"Protein: {model_stats.protein:6d}")
365
+ print(f"Nucleic: {model_stats.nucleic:6d}")
366
+ print(f"Waters: {model_stats.waters:6d}")
367
+ print(f"R-work: {result.rwork:6.4f}")
368
+ print(f"R-free: {result.rfree:6.4f}", flush=True)
modelcraft/monlib.py CHANGED
@@ -1,55 +1,58 @@
1
- import functools
2
1
  import os
3
- import gemmi
4
-
5
-
6
- @functools.lru_cache(maxsize=None)
7
- def _path(code: str) -> str:
8
- directory = os.path.join(os.environ["CLIBD_MON"], code[0].lower())
9
- single = os.path.join(directory, f"{code.upper()}.cif")
10
- double = os.path.join(directory, f"{code.upper()}_{code.upper()}.cif")
11
- return double if os.path.exists(double) else single
12
-
13
-
14
- @functools.lru_cache(maxsize=None)
15
- def atom_ids(code: str) -> set:
16
- return {atom.id for atom in chemcomp(code).atoms}
17
-
18
-
19
- @functools.lru_cache(maxsize=None)
20
- def chemcomp(code: str) -> gemmi.ChemComp:
21
- doc = gemmi.cif.read(_path(code))
22
- return gemmi.make_chemcomp_from_block(doc[-1])
23
-
24
-
25
- @functools.lru_cache(maxsize=None)
26
- def in_library(code: str) -> bool:
27
- return os.path.exists(_path(code))
28
-
29
-
30
- @functools.lru_cache(maxsize=None)
31
- def group(code: str) -> gemmi.ChemComp.Group:
32
- if in_library(code):
33
- doc = gemmi.cif.read(_path(code))
34
- monlib = gemmi.MonLib()
35
- monlib.read_monomer_doc(doc)
36
- return monlib.monomers[code].group
37
- return None
38
-
39
-
40
- @functools.lru_cache(maxsize=None)
41
- def is_protein(code: str) -> bool:
42
- return group(code) in {
43
- gemmi.ChemComp.Group.Peptide,
44
- gemmi.ChemComp.Group.PPeptide,
45
- gemmi.ChemComp.Group.MPeptide,
46
- }
2
+ import sys
47
3
 
4
+ import gemmi
48
5
 
49
- @functools.lru_cache(maxsize=None)
50
- def is_nucleic(code: str) -> bool:
51
- return group(code) in {
52
- gemmi.ChemComp.Group.Dna,
53
- gemmi.ChemComp.Group.Rna,
54
- gemmi.ChemComp.Group.DnaRna,
55
- }
6
+ from .sequence import DNA_CODES, PROTEIN_CODES, RNA_CODES
7
+
8
+
9
+ class MonLib(gemmi.MonLib):
10
+ def __init__(self, resnames, libin: str = "", include_standard: bool = False):
11
+ super().__init__()
12
+ if libin:
13
+ self.read_monomer_cif(libin)
14
+ if include_standard:
15
+ resnames = set(resnames)
16
+ resnames |= set(PROTEIN_CODES.values())
17
+ resnames |= set(RNA_CODES.values())
18
+ resnames |= set(DNA_CODES.values())
19
+ resnames |= {"MSE", "HOH"}
20
+ ok = self.read_monomer_lib(
21
+ os.environ["CLIBD_MON"], list(resnames), logging=sys.stderr
22
+ )
23
+ if not ok:
24
+ raise ValueError("Please create definitions for missing monomers.")
25
+
26
+ def __contains__(self, code: str):
27
+ return code in self.monomers
28
+
29
+ def __getitem__(self, code: str):
30
+ if code not in self:
31
+ raise KeyError(f"Monomer {code} not in this monomer library instance")
32
+ return self.monomers[code]
33
+
34
+ def atom_ids(self, code: str):
35
+ return {atom.id for atom in self[code].atoms}
36
+
37
+ def group(self, code: str):
38
+ return self[code].group if code in self else gemmi.ChemComp.Group.Null
39
+
40
+ def is_nucleic(self, code: str) -> bool:
41
+ return self.group(code) in {
42
+ gemmi.ChemComp.Group.Dna,
43
+ gemmi.ChemComp.Group.Rna,
44
+ gemmi.ChemComp.Group.DnaRna,
45
+ }
46
+
47
+ def is_protein(self, code: str) -> bool:
48
+ return self.group(code) in {
49
+ gemmi.ChemComp.Group.Peptide,
50
+ gemmi.ChemComp.Group.PPeptide,
51
+ gemmi.ChemComp.Group.MPeptide,
52
+ }
53
+
54
+ def volume(self, code: str):
55
+ return sum(18 for atom in self[code].atoms if not atom.is_hydrogen())
56
+
57
+ def weight(self, code: str):
58
+ return sum(atom.el.weight for atom in self[code].atoms)
modelcraft/pdbe.py ADDED
@@ -0,0 +1,54 @@
1
+ import multiprocessing
2
+ import re
3
+
4
+ import requests
5
+
6
+ _MULTIPROCESSING_LOCK = multiprocessing.Lock()
7
+ _SERVER = "https://www.ebi.ac.uk/pdbe"
8
+
9
+
10
+ def _response_json(url, data=None):
11
+ with _MULTIPROCESSING_LOCK:
12
+ print("Requesting:", url)
13
+ if data is None:
14
+ response = requests.get(url, timeout=30)
15
+ else:
16
+ response = requests.post(url, data=data, timeout=30)
17
+ response.raise_for_status()
18
+ return response.json()
19
+
20
+
21
+ def molecule_dicts(entry_id: str) -> list:
22
+ entry_id = _superceeding_entry(entry_id)
23
+ url = _SERVER + "/api/pdb/entry/molecules/" + entry_id
24
+ response = _response_json(url)
25
+ mols = response[entry_id]
26
+ if any(mol["molecule_type"] == "carbohydrate polymer" for mol in mols):
27
+ codes = _carb_codes(entry_id)
28
+ for mol in mols:
29
+ mol["carb_codes"] = codes.get(mol["entity_id"])
30
+ return mols
31
+
32
+
33
+ def _superceeding_entry(entry_id: str) -> str:
34
+ entry_id = entry_id.lower()
35
+ url = _SERVER + "/api/pdb/entry/status/" + entry_id
36
+ response = _response_json(url)
37
+ superceded_by = response[entry_id][0].get("superceded_by", [])
38
+ return entry_id if len(superceded_by) == 0 else superceded_by[-1]
39
+
40
+
41
+ def _carb_codes(entry: str) -> dict:
42
+ url = _SERVER + "/search/pdb/select?"
43
+ query = "pdb_id:" + entry
44
+ filter_list = "carb_compound_id_entity"
45
+ request_data = {"q": query, "fl": filter_list, "wt": "json"}
46
+ response = _response_json(url, data=request_data)
47
+ docs = response["response"]["docs"]
48
+ codes = {}
49
+ for doc in docs:
50
+ for line in doc["carb_compound_id_entity"]:
51
+ match = re.match(r"(.+)\((\d+)\)_(\d+)", line)
52
+ code, copies, entity = match.groups()
53
+ codes.setdefault(int(entity), {})[code] = int(copies)
54
+ return codes
modelcraft/pipeline.py CHANGED
@@ -55,7 +55,7 @@ class Pipeline:
55
55
  def write_report(self):
56
56
  if self.json_name:
57
57
  self.seconds["total"] = time.time() - self.start_time
58
- with open(self.path(self.json_name), "w") as report_file:
58
+ with open(self.path(self.json_name), "w", encoding="utf-8") as report_file:
59
59
  json.dump(self.report, report_file, indent=4)
60
60
 
61
61
  def terminate(self, reason: str):
modelcraft/prune.py ADDED
@@ -0,0 +1,69 @@
1
+ import gemmi
2
+
3
+ from .monlib import MonLib
4
+ from .reflections import DataItem
5
+ from .structure import remove_isolated_fragments
6
+ from .validation import validate
7
+
8
+
9
+ def prune(
10
+ structure: gemmi.Structure,
11
+ fphi_best: DataItem,
12
+ fphi_diff: DataItem,
13
+ fphi_calc: DataItem,
14
+ residues: bool = True,
15
+ chain_threshold: float = -2,
16
+ residue_threshold: float = -5,
17
+ monlib: MonLib = None,
18
+ ) -> gemmi.Structure:
19
+ print("Performing validation for pruning", flush=True)
20
+ structure = structure.clone()
21
+ monlib = monlib or MonLib(structure[0].get_all_residue_names())
22
+ metrics = validate(structure, fphi_best, fphi_diff, fphi_calc, monlib)
23
+
24
+ max_deleted = int(len(metrics) * 0.2)
25
+ num_deleted = 0
26
+ grouped = metrics.groupby("Chain")
27
+ means = grouped.mean(numeric_only=True)
28
+ for chain_name in means.sort_values("Score").index:
29
+ score = means.loc[chain_name, "Score"]
30
+ count = grouped.size().loc[chain_name]
31
+ print(
32
+ f"Chain {chain_name} has a score of {score} over {count} residues",
33
+ flush=True,
34
+ )
35
+ if (
36
+ means.loc[chain_name, "Score"] < chain_threshold
37
+ and count <= 20
38
+ and num_deleted + count <= max_deleted
39
+ ):
40
+ print("Deleting chain", chain_name, flush=True)
41
+ del structure[0][chain_name]
42
+ num_deleted += num_deleted
43
+ metrics = metrics[metrics["Chain"] != chain_name]
44
+
45
+ if not residues:
46
+ return structure if num_deleted > 0 else None
47
+
48
+ max_deleted = int(len(metrics) * 0.2)
49
+ metrics = metrics[metrics["Score"] < residue_threshold]
50
+ metrics.sort_values("Score", inplace=True)
51
+ metrics = metrics.head(max_deleted)
52
+ if len(metrics) == 0:
53
+ return structure if num_deleted > 0 else None
54
+
55
+ print(
56
+ f"Deleting {len(metrics)} residues with scores < {residue_threshold}",
57
+ flush=True,
58
+ )
59
+ to_delete = {(row["Chain"], row["SeqId"]) for _, row in metrics.iterrows()}
60
+ for chain in structure[0]:
61
+ for i, residue in reversed(list(enumerate(chain))):
62
+ if (chain.name, str(residue.seqid)) in to_delete:
63
+ del chain[i]
64
+
65
+ print("Removing isolated residues (if any)", flush=True)
66
+ for chain in structure[0]:
67
+ remove_isolated_fragments(chain, monlib, max_length=1)
68
+
69
+ return structure
modelcraft/reflections.py CHANGED
@@ -1,6 +1,8 @@
1
- from typing import Iterator, Iterable, List, Optional, Union
2
1
  import itertools
3
2
  import re
3
+ from functools import partial
4
+ from typing import Iterable, Iterator, List, Optional, Union
5
+
4
6
  import gemmi
5
7
  import numpy
6
8
  import pandas
@@ -104,6 +106,14 @@ class DataItem(gemmi.Mtz):
104
106
  data = numpy.array(self, copy=copy)
105
107
  return pandas.DataFrame(data=data, columns=self.column_labels())
106
108
 
109
+ def map(self, spacing: float = 1.0, size=None) -> gemmi.FloatGrid:
110
+ if self.types != "FP":
111
+ raise ValueError("DataItem must contain F and PHI columns")
112
+ func = partial(self.transform_f_phi_to_map, self.label(0), self.label(1))
113
+ if size is None:
114
+ return func(sample_rate=self.resolution_high() / spacing)
115
+ return func(exact_size=size)
116
+
107
117
  @classmethod
108
118
  def search(cls, mtz: gemmi.Mtz, types: str, sequential: bool = True):
109
119
  types = list(types)