modelcraft 5.0.2__py3-none-any.whl → 6.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. modelcraft/__init__.py +16 -31
  2. modelcraft/__main__.py +0 -1
  3. modelcraft/arguments.py +35 -7
  4. modelcraft/combine.py +22 -41
  5. modelcraft/contents.py +188 -164
  6. modelcraft/environ.py +0 -7
  7. modelcraft/geometry.py +39 -27
  8. modelcraft/job.py +6 -5
  9. modelcraft/jobs/acedrg.py +2 -0
  10. modelcraft/jobs/buccaneer.py +22 -4
  11. modelcraft/jobs/comit.py +2 -0
  12. modelcraft/jobs/ctruncate.py +3 -1
  13. modelcraft/jobs/emda.py +2 -0
  14. modelcraft/jobs/findwaters.py +2 -0
  15. modelcraft/jobs/freerflag.py +2 -0
  16. modelcraft/jobs/libg.py +2 -0
  17. modelcraft/jobs/molrep.py +2 -0
  18. modelcraft/jobs/nautilus.py +28 -14
  19. modelcraft/jobs/nucleofind.py +88 -0
  20. modelcraft/jobs/parrot.py +13 -2
  21. modelcraft/jobs/phasematch.py +2 -1
  22. modelcraft/jobs/refmac.py +3 -1
  23. modelcraft/jobs/servalcat.py +38 -4
  24. modelcraft/jobs/sheetbend.py +2 -0
  25. modelcraft/modelcraftem.py +49 -6
  26. modelcraft/modelcraftxray.py +90 -42
  27. modelcraft/monlib.py +55 -52
  28. modelcraft/pdbe.py +54 -0
  29. modelcraft/pipeline.py +1 -1
  30. modelcraft/prune.py +69 -0
  31. modelcraft/reflections.py +11 -1
  32. modelcraft/scripts/contents.py +5 -215
  33. modelcraft/scripts/copies.py +26 -17
  34. modelcraft/scripts/modelcraft.py +1 -0
  35. modelcraft/scripts/sidechains.py +141 -0
  36. modelcraft/scripts/validate.py +81 -0
  37. modelcraft/sequence.py +106 -0
  38. modelcraft/solvent.py +42 -113
  39. modelcraft/structure.py +64 -41
  40. modelcraft/tests/ccp4/__init__.py +7 -11
  41. modelcraft/tests/ccp4/test_acedrg.py +2 -0
  42. modelcraft/tests/ccp4/test_arguments.py +3 -0
  43. modelcraft/tests/ccp4/test_buccaneer.py +3 -2
  44. modelcraft/tests/ccp4/test_cell.py +4 -1
  45. modelcraft/tests/ccp4/test_comit.py +2 -0
  46. modelcraft/tests/ccp4/test_contents.py +99 -17
  47. modelcraft/tests/ccp4/test_copies.py +1 -0
  48. modelcraft/tests/ccp4/test_ctruncate.py +2 -0
  49. modelcraft/tests/ccp4/test_findwaters.py +2 -0
  50. modelcraft/tests/ccp4/test_freerflag.py +2 -0
  51. modelcraft/tests/ccp4/test_libg.py +1 -0
  52. modelcraft/tests/ccp4/test_molrep.py +3 -0
  53. modelcraft/tests/ccp4/test_monlib.py +75 -45
  54. modelcraft/tests/ccp4/test_nautilus.py +5 -3
  55. modelcraft/tests/ccp4/test_nucleofind.py +62 -0
  56. modelcraft/tests/ccp4/test_parrot.py +3 -1
  57. modelcraft/tests/ccp4/test_phasematch.py +2 -0
  58. modelcraft/tests/ccp4/test_prune.py +17 -0
  59. modelcraft/tests/ccp4/test_reflections.py +110 -1
  60. modelcraft/tests/ccp4/test_refmac.py +3 -0
  61. modelcraft/tests/{unittests/test_contents.py → ccp4/test_sequence.py} +5 -12
  62. modelcraft/tests/ccp4/test_servalcat.py +52 -0
  63. modelcraft/tests/ccp4/test_sheetbend.py +4 -3
  64. modelcraft/tests/ccp4/test_sidechains.py +25 -0
  65. modelcraft/tests/ccp4/test_solvent.py +12 -26
  66. modelcraft/tests/ccp4/test_structure.py +1 -0
  67. modelcraft/tests/ccp4/test_validation.py +19 -0
  68. modelcraft/tests/ccp4/test_xray.py +12 -6
  69. modelcraft/tests/ccpem/test_em.py +3 -0
  70. modelcraft/tests/ccpem/test_emda.py +2 -0
  71. modelcraft/tests/ccpem/test_refmac.py +1 -0
  72. modelcraft/tests/ccpem/test_servalcat.py +4 -3
  73. modelcraft/utils.py +16 -4
  74. modelcraft/validation.py +101 -0
  75. modelcraft-6.0.0.dist-info/METADATA +76 -0
  76. modelcraft-6.0.0.dist-info/RECORD +85 -0
  77. {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/WHEEL +1 -1
  78. {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/entry_points.txt +2 -0
  79. modelcraft/coot/prune.py +0 -1085
  80. modelcraft/coot/sidechains.py +0 -68
  81. modelcraft/jobs/acorn.py +0 -114
  82. modelcraft/jobs/coot.py +0 -104
  83. modelcraft/tests/ccp4/test_coot.py +0 -29
  84. modelcraft/tests/ccp4/test_geometry.py +0 -20
  85. modelcraft/tests/unittests/__init__.py +0 -0
  86. modelcraft/tests/unittests/test_reflections.py +0 -101
  87. modelcraft-5.0.2.dist-info/LICENSE +0 -504
  88. modelcraft-5.0.2.dist-info/METADATA +0 -48
  89. modelcraft-5.0.2.dist-info/RECORD +0 -82
  90. {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/top_level.txt +0 -0
modelcraft/__init__.py CHANGED
@@ -1,16 +1,11 @@
1
- __version__ = "5.0.2"
1
+ __version__ = "6.0.0"
2
2
 
3
3
  from .cell import max_distortion as max_cell_distortion
4
- from .cell import remove_scale
5
- from .cell import update_cell
6
- from .contents import AsuContents
7
- from .contents import PolymerType
8
- from .geometry import rmsz
4
+ from .cell import remove_scale, update_cell
5
+ from .contents import AsuContents, PolymerType
9
6
  from .jobs.acedrg import Acedrg
10
7
  from .jobs.buccaneer import Buccaneer
11
8
  from .jobs.comit import Comit
12
- from .jobs.coot import FixSideChains
13
- from .jobs.coot import Prune
14
9
  from .jobs.ctruncate import CTruncate
15
10
  from .jobs.emda import EmdaMapMask
16
11
  from .jobs.findwaters import FindWaters
@@ -19,26 +14,21 @@ from .jobs.molrep import Molrep
19
14
  from .jobs.nautilus import Nautilus
20
15
  from .jobs.parrot import Parrot
21
16
  from .jobs.phasematch import PhaseMatch
22
- from .jobs.refmac import Refmac
23
- from .jobs.refmac import RefmacMapToMtz
24
- from .jobs.servalcat import ServalcatNemap
25
- from .jobs.servalcat import ServalcatRefine
26
- from .jobs.servalcat import ServalcatTrim
17
+ from .jobs.refmac import Refmac, RefmacMapToMtz
18
+ from .jobs.servalcat import ServalcatNemap, ServalcatRefine, ServalcatTrim
27
19
  from .jobs.sheetbend import Sheetbend
20
+ from .monlib import MonLib
28
21
  from .pipeline import Pipeline
29
- from .reflections import DataItem
30
- from .reflections import write_mtz
31
- from .scripts.contents import _entry_contents as entry_contents
22
+ from .reflections import DataItem, write_mtz
32
23
  from .scripts.modelcraft import main as run
33
- from .solvent import solvent_fraction
34
- from .structure import contains_residue
35
- from .structure import ModelStats
36
- from .structure import read_structure
37
- from .structure import remove_non_library_atoms
38
- from .structure import remove_non_protein
39
- from .structure import remove_residues
40
- from .structure import write_mmcif
41
-
24
+ from .structure import (
25
+ ModelStats,
26
+ contains_residue,
27
+ read_structure,
28
+ remove_non_protein,
29
+ remove_residues,
30
+ write_mmcif,
31
+ )
42
32
 
43
33
  __all__ = [
44
34
  "Acedrg",
@@ -49,33 +39,28 @@ __all__ = [
49
39
  "CTruncate",
50
40
  "DataItem",
51
41
  "EmdaMapMask",
52
- "entry_contents",
53
42
  "FindWaters",
54
- "FixSideChains",
55
43
  "FreeRFlag",
56
44
  "max_cell_distortion",
57
45
  "ModelStats",
58
46
  "Molrep",
47
+ "MonLib",
59
48
  "Nautilus",
60
49
  "Parrot",
61
50
  "PhaseMatch",
62
51
  "Pipeline",
63
52
  "PolymerType",
64
- "Prune",
65
53
  "read_structure",
66
54
  "Refmac",
67
55
  "RefmacMapToMtz",
68
- "remove_non_library_atoms",
69
56
  "remove_non_protein",
70
57
  "remove_residues",
71
58
  "remove_scale",
72
- "rmsz",
73
59
  "run",
74
60
  "ServalcatNemap",
75
61
  "ServalcatRefine",
76
62
  "ServalcatTrim",
77
63
  "Sheetbend",
78
- "solvent_fraction",
79
64
  "update_cell",
80
65
  "write_mmcif",
81
66
  "write_mtz",
modelcraft/__main__.py CHANGED
@@ -1,5 +1,4 @@
1
1
  from .scripts.modelcraft import main
2
2
 
3
-
4
3
  if __name__ == "__main__":
5
4
  main()
modelcraft/arguments.py CHANGED
@@ -1,15 +1,16 @@
1
- from typing import List, Optional
2
1
  import argparse
3
2
  import os
4
3
  import sys
4
+ from typing import List, Optional
5
+
5
6
  import gemmi
6
7
  import numpy as np
8
+
7
9
  from . import __version__
8
10
  from .contents import AsuContents
9
11
  from .reflections import DataItem
10
12
  from .structure import read_structure
11
13
 
12
-
13
14
  _PROG = None
14
15
  if os.path.basename(sys.argv[0]) == "__main__.py":
15
16
  _PROG = f"{sys.executable} -m modelcraft"
@@ -23,12 +24,16 @@ _GROUP.add_argument(
23
24
  required=True,
24
25
  metavar="X",
25
26
  help=(
26
- "A file with a description of the assymetric unit contents, "
27
+ "A file with a description of the full asymmetric unit contents, "
27
28
  "either as a sequence file (in FASTA or PIR format) "
28
29
  "with both protein and nucleic acid sequences, "
29
30
  "or a more detailed contents file in JSON format. "
30
31
  "Example JSON files for existing PDB entries "
31
- "can be created using the modelcraft-contents script."
32
+ "can be created using the modelcraft-contents script. "
33
+ "Currently, the only advantage of using the JSON format "
34
+ "is a more accurate calculation of the solvent fraction "
35
+ "for Parrot density modification in the X-ray pipeline. "
36
+ "Otherwise, the solvent fraction will be calculated using Matthews probability."
32
37
  ),
33
38
  )
34
39
  _GROUP = _PARENT.add_argument_group("optional arguments (shared)")
@@ -181,6 +186,16 @@ _GROUP.add_argument(
181
186
  "This is not required if the MTZ only contains one free-R flag."
182
187
  ),
183
188
  )
189
+ _GROUP.add_argument(
190
+ "--freerflag-value",
191
+ type=int,
192
+ metavar="X",
193
+ help=(
194
+ "Value of the free-R flag to identify reflections as free. "
195
+ "It defaults to the lowest flag with <50% of reflections. "
196
+ "If the chosen flag is not 0 it will changed to match the CCP4 convention."
197
+ ),
198
+ )
184
199
  _GROUP.add_argument(
185
200
  "--unbiased",
186
201
  action="store_true",
@@ -387,9 +402,22 @@ def _parse_freerflag(args: argparse.Namespace, mtz: gemmi.Mtz):
387
402
  else:
388
403
  args.freer = _item_from_label(mtz, args.freerflag_label, ["I"])
389
404
  values = list(args.freer.columns[-1])
390
- percentage = values.count(0) / len(values) * 100
391
- if percentage == 0 or percentage > 50:
392
- _PARSER.error(f"{percentage}% of the reflections are in the free set (flag 0)")
405
+ if args.freerflag_value is None:
406
+ for value in sorted(set(values)):
407
+ fraction = values.count(value) / len(values)
408
+ if fraction < 0.5:
409
+ args.freerflag_value = value
410
+ break
411
+ else:
412
+ _PARSER.error("No suitable value found for the free-R flag")
413
+ else:
414
+ percentage = values.count(args.freerflag_value) / len(values) * 100
415
+ if percentage == 0 or percentage > 50:
416
+ _PARSER.error(f"{percentage}% of the reflections are in the free set")
417
+ if args.freerflag_value != 0:
418
+ print("Changing the free-R flag to match CCP4 convention", flush=True)
419
+ values = [0 if v == args.freerflag_value else 1 for v in values]
420
+ args.freer.array[:, -1] = values
393
421
 
394
422
 
395
423
  def _parse_phases(args: argparse.Namespace, mtz: gemmi.Mtz):
modelcraft/combine.py CHANGED
@@ -1,23 +1,29 @@
1
1
  import gemmi
2
+
2
3
  from .jobs.refmac import RefmacResult
3
- from .monlib import is_protein, is_nucleic
4
+ from .monlib import MonLib
5
+ from .structure import remove_isolated_fragments
4
6
 
5
7
 
6
- def combine_results(buccaneer: RefmacResult, nautilus: RefmacResult) -> gemmi.Structure:
8
+ def combine_results(
9
+ buccaneer: RefmacResult, nautilus: RefmacResult, monlib: MonLib
10
+ ) -> gemmi.Structure:
7
11
  structure = buccaneer.structure.clone()
8
12
  for i, chain in reversed(list(enumerate(structure[0]))):
9
- if _is_nucleic_chain(chain):
13
+ if _is_nucleic_chain(chain, monlib):
10
14
  del structure[0][i]
11
- chains_to_add, clashing_to_remove = _resolve_clashes(structure, buccaneer, nautilus)
15
+ chains_to_add, clashing_to_remove = _resolve_clashes(
16
+ structure, buccaneer, nautilus, monlib
17
+ )
12
18
  for chain in structure[0]:
13
- protein = _is_protein_chain(chain)
19
+ protein = _is_protein_chain(chain, monlib)
14
20
  any_removed = False
15
21
  for i, residue in reversed(list(enumerate(chain))):
16
22
  if _key(chain, residue) in clashing_to_remove:
17
23
  del chain[i]
18
24
  any_removed = True
19
25
  if protein and any_removed:
20
- _remove_isolated_fragments(chain, _are_joined_protein, min_length=6)
26
+ remove_isolated_fragments(chain, monlib, max_length=5)
21
27
  structure.remove_empty_chains()
22
28
  for chain in chains_to_add:
23
29
  structure[0].add_chain(chain, unique_name=True)
@@ -25,7 +31,10 @@ def combine_results(buccaneer: RefmacResult, nautilus: RefmacResult) -> gemmi.St
25
31
 
26
32
 
27
33
  def _resolve_clashes(
28
- structure: gemmi.Structure, buccaneer: RefmacResult, nautilus: RefmacResult
34
+ structure: gemmi.Structure,
35
+ buccaneer: RefmacResult,
36
+ nautilus: RefmacResult,
37
+ monlib: MonLib,
29
38
  ) -> tuple:
30
39
  chains_to_add = []
31
40
  clashing_to_remove = set()
@@ -33,7 +42,7 @@ def _resolve_clashes(
33
42
  naut_scores = _scores(nautilus)
34
43
  search = gemmi.NeighborSearch(structure, max_radius=1).populate()
35
44
  for chain in nautilus.structure[0]:
36
- if _is_nucleic_chain(chain):
45
+ if _is_nucleic_chain(chain, monlib):
37
46
  residues_to_remove = set()
38
47
  for keys, clashing in _clashing_zones(chain, search, structure):
39
48
  if _mean_score(keys, naut_scores) < _mean_score(clashing, bucc_scores):
@@ -44,7 +53,7 @@ def _resolve_clashes(
44
53
  for i, residue in reversed(list(enumerate(chain))):
45
54
  if _key(chain, residue) in residues_to_remove:
46
55
  del chain[i]
47
- _remove_isolated_fragments(chain, _are_joined_nucleic, min_length=2)
56
+ remove_isolated_fragments(chain, monlib, max_length=1)
48
57
  if len(chain) > 0:
49
58
  chains_to_add.append(chain)
50
59
  return chains_to_add, clashing_to_remove
@@ -74,12 +83,12 @@ def _key(chain: gemmi.Chain, residue: gemmi.Residue) -> tuple:
74
83
  return (chain.name, residue.seqid.num, residue.seqid.icode)
75
84
 
76
85
 
77
- def _is_nucleic_chain(chain: gemmi.Chain) -> bool:
78
- return len(chain) > 1 and all(is_nucleic(res.name) for res in chain)
86
+ def _is_nucleic_chain(chain: gemmi.Chain, monlib: MonLib) -> bool:
87
+ return len(chain) > 1 and all(monlib.is_nucleic(res.name) for res in chain)
79
88
 
80
89
 
81
- def _is_protein_chain(chain: gemmi.Chain) -> bool:
82
- return len(chain) > 1 and all(is_protein(res.name) for res in chain)
90
+ def _is_protein_chain(chain: gemmi.Chain, monlib: MonLib) -> bool:
91
+ return len(chain) > 1 and all(monlib.is_protein(res.name) for res in chain)
83
92
 
84
93
 
85
94
  def _clashing_zones(
@@ -106,31 +115,3 @@ def _clashing_zones(
106
115
 
107
116
  def _mean_score(keys: set, scores: dict) -> float:
108
117
  return sum(scores[key] for key in keys) / len(keys)
109
-
110
-
111
- def _remove_isolated_fragments(chain: gemmi.Chain, are_joined, min_length: int):
112
- to_remove = []
113
- fragment = []
114
- for i, residue in enumerate(chain):
115
- if i > 0 and are_joined(chain[i - 1], residue):
116
- fragment.append(i)
117
- else:
118
- if len(fragment) < min_length:
119
- to_remove.extend(fragment)
120
- fragment = [i]
121
- if len(fragment) < min_length:
122
- to_remove.extend(fragment)
123
- for i in reversed(to_remove):
124
- del chain[i]
125
-
126
-
127
- def _are_joined_protein(res1: gemmi.Residue, res2: gemmi.Residue) -> bool:
128
- return "C" in res1 and "N" in res2 and res1["C"][0].pos.dist(res2["N"][0].pos) < 2.5
129
-
130
-
131
- def _are_joined_nucleic(res1: gemmi.Residue, res2: gemmi.Residue) -> bool:
132
- return (
133
- "O3'" in res1
134
- and "P" in res2
135
- and res1["O3'"][0].pos.dist(res2["P"][0].pos) < 2.5
136
- )