modelcraft 5.0.2__py3-none-any.whl → 6.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. modelcraft/__init__.py +16 -31
  2. modelcraft/__main__.py +0 -1
  3. modelcraft/arguments.py +35 -7
  4. modelcraft/combine.py +22 -41
  5. modelcraft/contents.py +188 -164
  6. modelcraft/environ.py +0 -7
  7. modelcraft/geometry.py +39 -27
  8. modelcraft/job.py +6 -5
  9. modelcraft/jobs/acedrg.py +2 -0
  10. modelcraft/jobs/buccaneer.py +22 -4
  11. modelcraft/jobs/comit.py +2 -0
  12. modelcraft/jobs/ctruncate.py +3 -1
  13. modelcraft/jobs/emda.py +2 -0
  14. modelcraft/jobs/findwaters.py +2 -0
  15. modelcraft/jobs/freerflag.py +2 -0
  16. modelcraft/jobs/libg.py +2 -0
  17. modelcraft/jobs/molrep.py +2 -0
  18. modelcraft/jobs/nautilus.py +28 -14
  19. modelcraft/jobs/nucleofind.py +88 -0
  20. modelcraft/jobs/parrot.py +13 -2
  21. modelcraft/jobs/phasematch.py +2 -1
  22. modelcraft/jobs/refmac.py +3 -1
  23. modelcraft/jobs/servalcat.py +38 -4
  24. modelcraft/jobs/sheetbend.py +2 -0
  25. modelcraft/modelcraftem.py +49 -6
  26. modelcraft/modelcraftxray.py +90 -42
  27. modelcraft/monlib.py +55 -52
  28. modelcraft/pdbe.py +54 -0
  29. modelcraft/pipeline.py +1 -1
  30. modelcraft/prune.py +69 -0
  31. modelcraft/reflections.py +11 -1
  32. modelcraft/scripts/contents.py +5 -215
  33. modelcraft/scripts/copies.py +26 -17
  34. modelcraft/scripts/modelcraft.py +1 -0
  35. modelcraft/scripts/sidechains.py +141 -0
  36. modelcraft/scripts/validate.py +81 -0
  37. modelcraft/sequence.py +106 -0
  38. modelcraft/solvent.py +42 -113
  39. modelcraft/structure.py +64 -41
  40. modelcraft/tests/ccp4/__init__.py +7 -11
  41. modelcraft/tests/ccp4/test_acedrg.py +2 -0
  42. modelcraft/tests/ccp4/test_arguments.py +3 -0
  43. modelcraft/tests/ccp4/test_buccaneer.py +3 -2
  44. modelcraft/tests/ccp4/test_cell.py +4 -1
  45. modelcraft/tests/ccp4/test_comit.py +2 -0
  46. modelcraft/tests/ccp4/test_contents.py +99 -17
  47. modelcraft/tests/ccp4/test_copies.py +1 -0
  48. modelcraft/tests/ccp4/test_ctruncate.py +2 -0
  49. modelcraft/tests/ccp4/test_findwaters.py +2 -0
  50. modelcraft/tests/ccp4/test_freerflag.py +2 -0
  51. modelcraft/tests/ccp4/test_libg.py +1 -0
  52. modelcraft/tests/ccp4/test_molrep.py +3 -0
  53. modelcraft/tests/ccp4/test_monlib.py +75 -45
  54. modelcraft/tests/ccp4/test_nautilus.py +5 -3
  55. modelcraft/tests/ccp4/test_nucleofind.py +62 -0
  56. modelcraft/tests/ccp4/test_parrot.py +3 -1
  57. modelcraft/tests/ccp4/test_phasematch.py +2 -0
  58. modelcraft/tests/ccp4/test_prune.py +17 -0
  59. modelcraft/tests/ccp4/test_reflections.py +110 -1
  60. modelcraft/tests/ccp4/test_refmac.py +3 -0
  61. modelcraft/tests/{unittests/test_contents.py → ccp4/test_sequence.py} +5 -12
  62. modelcraft/tests/ccp4/test_servalcat.py +52 -0
  63. modelcraft/tests/ccp4/test_sheetbend.py +4 -3
  64. modelcraft/tests/ccp4/test_sidechains.py +25 -0
  65. modelcraft/tests/ccp4/test_solvent.py +12 -26
  66. modelcraft/tests/ccp4/test_structure.py +1 -0
  67. modelcraft/tests/ccp4/test_validation.py +19 -0
  68. modelcraft/tests/ccp4/test_xray.py +12 -6
  69. modelcraft/tests/ccpem/test_em.py +3 -0
  70. modelcraft/tests/ccpem/test_emda.py +2 -0
  71. modelcraft/tests/ccpem/test_refmac.py +1 -0
  72. modelcraft/tests/ccpem/test_servalcat.py +4 -3
  73. modelcraft/utils.py +16 -4
  74. modelcraft/validation.py +101 -0
  75. modelcraft-6.0.0.dist-info/METADATA +76 -0
  76. modelcraft-6.0.0.dist-info/RECORD +85 -0
  77. {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/WHEEL +1 -1
  78. {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/entry_points.txt +2 -0
  79. modelcraft/coot/prune.py +0 -1085
  80. modelcraft/coot/sidechains.py +0 -68
  81. modelcraft/jobs/acorn.py +0 -114
  82. modelcraft/jobs/coot.py +0 -104
  83. modelcraft/tests/ccp4/test_coot.py +0 -29
  84. modelcraft/tests/ccp4/test_geometry.py +0 -20
  85. modelcraft/tests/unittests/__init__.py +0 -0
  86. modelcraft/tests/unittests/test_reflections.py +0 -101
  87. modelcraft-5.0.2.dist-info/LICENSE +0 -504
  88. modelcraft-5.0.2.dist-info/METADATA +0 -48
  89. modelcraft-5.0.2.dist-info/RECORD +0 -82
  90. {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/top_level.txt +0 -0
@@ -1,218 +1,8 @@
1
1
  import argparse
2
- import functools
3
- import math
4
- import os
5
- import re
6
2
  import sys
7
- import requests
8
- from ..contents import AsuContents, Carb, Ligand, Polymer, PolymerType
9
- from ..environ import setup_environ
10
-
11
-
12
- def _response_json(url, data=None):
13
- print("Requesting:", url)
14
- if data is None:
15
- response = requests.get(url)
16
- else:
17
- response = requests.post(url, data=data)
18
- if response.status_code != 200:
19
- raise ConnectionError(response.text)
20
- return response.json()
21
-
22
-
23
- def _add_smiles(contents: AsuContents) -> None:
24
- codes = contents.monomer_codes()
25
- codes = {code for code in codes if not _in_library(code)}
26
- for code in sorted(codes):
27
- path = os.path.join(os.environ["CLIBD_MON"], code[0].lower(), code + ".cif")
28
- if not os.path.exists(path):
29
- contents.smiles[code] = _smiles(code)
30
-
31
-
32
- @functools.lru_cache(maxsize=None)
33
- def _buffers() -> set:
34
- path = os.path.join(os.environ["CCP4"], "share", "pisa", "agents.dat")
35
- agents = set()
36
- with open(path) as stream:
37
- for line in stream:
38
- if line[0] != "#" and "," in line:
39
- code = line.split(",")[0]
40
- agents.add(code)
41
- return agents
42
-
43
-
44
- def _carb_codes(entry: str) -> dict:
45
- url = "https://www.ebi.ac.uk/pdbe/search/pdb/select?"
46
- query = "pdb_id:" + entry
47
- filter_list = "carb_compound_id_entity"
48
- request_data = {"q": query, "fl": filter_list, "wt": "json"}
49
- json = _response_json(url, data=request_data)
50
- docs = json["response"]["docs"]
51
- codes = {}
52
- for doc in docs:
53
- for line in doc["carb_compound_id_entity"]:
54
- match = re.match(r"(.+)\((\d+)\)_(\d+)", line)
55
- code, copies, entity = match.groups()
56
- codes.setdefault(int(entity), {})[code] = int(copies)
57
- return codes
58
-
59
-
60
- def _carb_from_pdbe_molecule_dict(mol: dict) -> Carb:
61
- codes = mol["carb_codes"]
62
- length = sum(codes.values())
63
- stoichiometry = mol["number_of_copies"] // length
64
- return Carb(codes=codes, stoichiometry=stoichiometry)
65
-
66
-
67
- def _divide_stoichiometry(contents: AsuContents):
68
- stoichiometry = []
69
- for item in (
70
- contents.proteins
71
- + contents.rnas
72
- + contents.dnas
73
- + contents.carbs
74
- + contents.ligands
75
- ):
76
- if item.stoichiometry is not None:
77
- stoichiometry.append(item.stoichiometry)
78
- divisor = stoichiometry[0]
79
- if len(stoichiometry) > 1:
80
- divisor = functools.reduce(math.gcd, stoichiometry)
81
- contents.copies *= divisor
82
- for item in (
83
- contents.proteins
84
- + contents.rnas
85
- + contents.dnas
86
- + contents.carbs
87
- + contents.ligands
88
- ):
89
- item.stoichiometry //= divisor
90
-
91
-
92
- def _entry_contents(entry: str) -> AsuContents:
93
- contents = AsuContents()
94
- contents.copies = 1
95
- for mol in _pdbe_molecules(entry):
96
- if "sequence" in mol:
97
- polymer = _polymer_from_pdbe_molecule_dict(mol)
98
- contents.add_polymer(polymer)
99
- if mol["molecule_type"] == "carbohydrate polymer":
100
- carb = _carb_from_pdbe_molecule_dict(mol)
101
- contents.carbs.append(carb)
102
- if mol["molecule_type"] == "bound":
103
- ligand = _ligand_from_pdbe_molecule_dict(mol)
104
- if _is_buffer(ligand.code):
105
- contents.buffers.append(ligand.code)
106
- elif ligand.code not in ("UNL", "UNX"):
107
- contents.ligands.append(ligand)
108
- _divide_stoichiometry(contents)
109
- _add_smiles(contents)
110
- return contents
111
3
 
112
-
113
- @functools.lru_cache(maxsize=None)
114
- def _in_library(code: str) -> bool:
115
- path = os.path.join(os.environ["CLIBD_MON"], code[0].lower(), code + ".cif")
116
- return os.path.exists(path)
117
-
118
-
119
- @functools.lru_cache(maxsize=None)
120
- def _is_buffer(code: str) -> float:
121
- return code.upper() in _buffers()
122
-
123
-
124
- def _ligand_from_pdbe_molecule_dict(mol: dict) -> Ligand:
125
- return Ligand(code=mol["chem_comp_ids"][0], stoichiometry=mol["number_of_copies"])
126
-
127
-
128
- def _modifications_in_pdbe_molecule_dict(mol: dict) -> list:
129
- indices = {}
130
- for index, mod in mol["pdb_sequence_indices_with_multiple_residues"].items():
131
- code1 = mod["one_letter_code"]
132
- code3 = mod["three_letter_code"]
133
- if code3 not in ("DA", "DC", "DG", "DT"):
134
- key = code1, code3
135
- indices.setdefault(key, []).append(index)
136
- modifications = []
137
- for key in indices:
138
- code1, code3 = key
139
- total = mol["sequence"].count(code1)
140
- if code1 == "M" and mol["sequence"][0] == "M":
141
- total -= 1
142
- if len(indices[key]) >= total:
143
- modifications.append(f"{code1}->{code3}")
144
- else:
145
- modifications.extend(f"{index}->{code3}" for index in indices[key])
146
- return modifications
147
-
148
-
149
- def _pdbe_molecules(entry: str) -> list:
150
- entry = entry.lower()
151
- url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/status/" + entry
152
- try:
153
- json = _response_json(url)
154
- except ConnectionError:
155
- sys.exit(f"Cannot determine the status of entry {entry}")
156
- superceded_by = json[entry][0].get("superceded_by", [])
157
- if len(superceded_by) > 0:
158
- entry = superceded_by[-1]
159
- url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/molecules/" + entry
160
- try:
161
- json = _response_json(url)
162
- except ConnectionError:
163
- sys.exit(f"No molecule information found for entry {entry}")
164
- mols = json[entry]
165
- if any(mol["molecule_type"] == "carbohydrate polymer" for mol in mols):
166
- codes = _carb_codes(entry)
167
- for mol in mols:
168
- mol["carb_codes"] = codes.get(mol["entity_id"])
169
- return mols
170
-
171
-
172
- def _polymer_from_pdbe_molecule_dict(mol: dict) -> Polymer:
173
- polymer_type = {
174
- "polypeptide(l)": PolymerType.PROTEIN,
175
- "polyribonucleotide": PolymerType.RNA,
176
- "polydeoxyribonucleotide": PolymerType.DNA,
177
- }.get(mol["molecule_type"].lower(), None)
178
- return Polymer(
179
- sequence=mol["sequence"],
180
- stoichiometry=mol["number_of_copies"],
181
- polymer_type=polymer_type,
182
- modifications=_modifications_in_pdbe_molecule_dict(mol),
183
- )
184
-
185
-
186
- @functools.lru_cache(maxsize=None)
187
- def _smiles(code: str) -> str:
188
- query = (
189
- "{\n"
190
- ' chem_comp(comp_id: "%s") {\n'
191
- " pdbx_chem_comp_descriptor {\n"
192
- " comp_id\n"
193
- " descriptor\n"
194
- " program\n"
195
- " program_version\n"
196
- " type\n"
197
- " }\n"
198
- " }\n"
199
- "}" % code
200
- )
201
- url = "https://data.rcsb.org/graphql?query=" + requests.utils.quote(query)
202
- json = _response_json(url)
203
- descriptors = json["data"]["chem_comp"]["pdbx_chem_comp_descriptor"]
204
- canonical = None
205
- smiles = None
206
- for descriptor in descriptors:
207
- if descriptor["type"] == "SMILES_CANONICAL":
208
- if descriptor["program"] == "OpenEye OEToolkits":
209
- return descriptor["descriptor"]
210
- canonical = descriptor["descriptor"]
211
- elif descriptor["type"] == "SMILES":
212
- smiles = descriptor["descriptor"]
213
- if canonical is None and smiles is None:
214
- raise RuntimeError("Could not get SMILES from RCSB for " + code)
215
- return canonical or smiles
4
+ from ..contents import AsuContents
5
+ from ..environ import setup_environ
216
6
 
217
7
 
218
8
  def main(argument_list=None):
@@ -221,10 +11,10 @@ def main(argument_list=None):
221
11
  argument_list = sys.argv[1:]
222
12
  description = "Create a contents JSON file for a PDB entry"
223
13
  parser = argparse.ArgumentParser(description=description)
224
- parser.add_argument("entry", help="PDB entry ID")
225
- parser.add_argument("contents", help="Path for the contents JSON")
14
+ parser.add_argument("entry_id", help="PDB entry ID")
15
+ parser.add_argument("contents", help="Path to write the contents JSON")
226
16
  args = parser.parse_args(argument_list)
227
- contents = _entry_contents(entry=args.entry)
17
+ contents = AsuContents.from_pdbe(args.entry_id)
228
18
  contents.write_json_file(args.contents)
229
19
 
230
20
 
@@ -1,9 +1,12 @@
1
1
  import argparse
2
2
  import sys
3
+
3
4
  import gemmi
5
+
4
6
  from ..contents import AsuContents
5
7
  from ..environ import setup_environ
6
- from ..solvent import _contents_volume, _copies_options, _volume_components
8
+ from ..monlib import MonLib
9
+ from ..solvent import copies_options
7
10
 
8
11
 
9
12
  def main(argument_list=None):
@@ -13,39 +16,44 @@ def main(argument_list=None):
13
16
  parser = argparse.ArgumentParser()
14
17
  parser.add_argument("contents", help="Path to contents file")
15
18
  parser.add_argument("mtz", help="Path to MTZ file")
19
+ parser.add_argument("--libin", help="Path to custom restraint dictionary")
16
20
  args = parser.parse_args(argument_list)
21
+
17
22
  contents = AsuContents.from_file(args.contents)
18
23
  mtz = gemmi.read_mtz_file(args.mtz)
24
+ monlib = MonLib(contents.monomer_codes(), args.libin, include_standard=True)
19
25
 
20
26
  cell = mtz.cell
21
27
  asu_volume = cell.volume / len(mtz.spacegroup.operations())
22
28
  print("## MTZ\n")
23
29
  print(
24
- "Cell %.3f %.3f %.3f %.2f %.2f %.2f"
25
- % (cell.a, cell.b, cell.c, cell.alpha, cell.beta, cell.gamma)
30
+ f"Cell {cell.a:.3f} {cell.b:.3f} {cell.c:.3f}"
31
+ f" {cell.alpha:.2f} {cell.beta:.2f} {cell.gamma:.2f}"
26
32
  )
27
- print("Spacegroup ", mtz.spacegroup.hm)
28
- print("ASU Volume %.0f" % asu_volume)
29
- print("Resolution %.2f - %.2f" % (mtz.resolution_low(), mtz.resolution_high()))
33
+ print(f"Spacegroup {mtz.spacegroup.hm}")
34
+ print(f"ASU Volume {asu_volume:.0f}")
35
+ print(f"Resolution {mtz.resolution_low():.2f} - {mtz.resolution_high():.2f}")
30
36
  print("")
31
37
 
32
38
  print("## Components\n")
33
39
  print("| Description | Stoichiometry | Volume |")
34
40
  print("|----------------------------------------------|---------------|----------|")
35
- for component in _volume_components(contents):
36
- description = component.description
37
- stoichiometry = component.stoichiometry
38
- assumed = "(assumed)" if component.stoichiometry_assumed else ""
39
- volume = component.volume
41
+ for component in contents.components():
42
+ stoichiometry = component.stoichiometry or 1
43
+ assumed = "(assumed)" if component.stoichiometry is None else ""
44
+ volume = component.volume(monlib)
40
45
  print(
41
- "| %44s | %9s %3d | %8.0f |"
42
- % (description[:44], assumed, stoichiometry, volume)
46
+ f"| {str(component)[:44]:44s} "
47
+ f"| {assumed:9s} {stoichiometry:3d} "
48
+ f"| {volume:8.0f} |"
43
49
  )
44
50
  print("|----------------------------------------------|---------------|----------|")
45
- print("| %44s | | %8.0f |" % ("Total", _contents_volume(contents)))
51
+ print(f"| {'Total':44s} | | {contents.volume(monlib):8.0f} |")
46
52
  print("")
47
53
 
48
- options = _copies_options(contents, mtz)
54
+ options = copies_options(
55
+ contents, cell, mtz.spacegroup, mtz.resolution_high(), monlib
56
+ )
49
57
  print("## Copies\n")
50
58
  if len(options) == 0:
51
59
  print("Contents are too big to fit into the asymmetric unit")
@@ -54,8 +62,9 @@ def main(argument_list=None):
54
62
  print("|--------|------------------|-------------|")
55
63
  for option in options:
56
64
  print(
57
- "| %6d | %16.3f | %11.3f |"
58
- % (option.copies, option.solvent, option.probability)
65
+ f"| {option.copies:6d} "
66
+ f"| {option.solvent:16.3f} "
67
+ f"| {option.probability:11.3f} |"
59
68
  )
60
69
  print("")
61
70
 
@@ -1,4 +1,5 @@
1
1
  import sys
2
+
2
3
  from ..arguments import parse
3
4
  from ..environ import setup_environ
4
5
  from ..modelcraftem import ModelCraftEm
@@ -0,0 +1,141 @@
1
+ "Add missing side chains to a protein model"
2
+
3
+ import argparse
4
+ import sys
5
+ from os import environ
6
+ from pathlib import Path
7
+ from shutil import rmtree
8
+
9
+ import coot_headless_api
10
+ import gemmi
11
+
12
+ from ..environ import setup_environ
13
+
14
+
15
+ def _parse_args(argument_list):
16
+ parser = argparse.ArgumentParser(
17
+ description=__doc__,
18
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
19
+ )
20
+ parser.add_argument(
21
+ "structure",
22
+ help="Input structure in PDB, mmCIF, mmJSON format",
23
+ )
24
+ parser.add_argument(
25
+ "mtz",
26
+ help="MTZ file amplitudes and phases (FWT and PHWT by default)",
27
+ )
28
+ parser.add_argument(
29
+ "output",
30
+ help="Path to write the output structure with added side chains",
31
+ )
32
+ parser.add_argument(
33
+ "--model-index",
34
+ type=int,
35
+ default=0,
36
+ metavar="N",
37
+ help="Index of the model to analyse (with 0 being the first model)",
38
+ )
39
+ parser.add_argument(
40
+ "--f_label",
41
+ default="FWT",
42
+ help="Column label for structure factor amplitudes",
43
+ )
44
+ parser.add_argument(
45
+ "--phi_label",
46
+ default="PHWT",
47
+ help="Column label for structure factor phases",
48
+ )
49
+ return parser.parse_args(argument_list or sys.argv[1:])
50
+
51
+
52
+ SIDE_CHAIN_ATOMS = {
53
+ "ARG": {"CG", "CD", "NE", "CZ", "NH1", "NH2"},
54
+ "ASN": {"CG", "OD1", "ND2"},
55
+ "ASP": {"CG", "OD1", "OD2"},
56
+ "CYS": {"SG"},
57
+ "GLN": {"CG", "CD", "OE1", "NE2"},
58
+ "GLU": {"CG", "CD", "OE1", "OE2"},
59
+ "HIS": {"CG", "ND1", "CD2", "CE1", "NE2"},
60
+ "ILE": {"CG1", "CG2", "CD1"},
61
+ "LEU": {"CG", "CD1", "CD2"},
62
+ "LYS": {"CG", "CD", "CE", "NZ"},
63
+ "MET": {"CG", "SD", "CE"},
64
+ "MSE": {"CG", "SE", "CE"},
65
+ "PHE": {"CG", "CD1", "CD2", "CE1", "CE2", "CZ"},
66
+ "PRO": {"CG", "CD"},
67
+ "SER": {"OG"},
68
+ "THR": {"OG1", "CG2"},
69
+ "TRP": {"CG", "CD1", "CD2", "NE1", "CE2", "CE3", "CZ2", "CZ3", "CH2"},
70
+ "TYR": {"CG", "CD1", "CD2", "CE1", "CE2", "CZ", "OH"},
71
+ "VAL": {"CG1", "CG2"},
72
+ }
73
+
74
+
75
+ def has_full_side_chain(residue: gemmi.Residue) -> bool:
76
+ "Check if a residue has all side chain atoms from gamma onwards."
77
+ expected = SIDE_CHAIN_ATOMS.get(residue.name, set())
78
+ built = {atom.name for atom in residue}
79
+ return built > expected
80
+
81
+
82
+ def any_missing_side_chains(structure: gemmi.Structure) -> bool:
83
+ "Check if any residue in a structure has missing side chain atoms."
84
+ for chain in structure[0]:
85
+ for residue in chain:
86
+ if not has_full_side_chain(residue):
87
+ return True
88
+ return False
89
+
90
+
91
+ def cif_path(name: str):
92
+ directory = Path(environ["CLIBD_MON"]) / name[0].lower()
93
+ single_path = directory / f"{name}.cif"
94
+ double_path = directory / f"{name}_{name}.cif"
95
+ if single_path.exists():
96
+ return str(single_path)
97
+ if double_path.exists():
98
+ return str(double_path)
99
+ return None
100
+
101
+
102
+ def main(argument_list=None):
103
+ backup_path = Path("coot-backup")
104
+ backup_existed_before = backup_path.exists()
105
+ setup_environ()
106
+ args = _parse_args(argument_list)
107
+ structure = gemmi.read_structure(args.structure)
108
+ if not any_missing_side_chains(structure):
109
+ print("No missing side chains detected, no action taken")
110
+ return
111
+ mc = coot_headless_api.molecules_container_t(True)
112
+ mc.set_use_gemmi(False)
113
+ mc.set_make_backups(False)
114
+ imol = mc.read_coordinates(args.structure)
115
+ non_standard = mc.non_standard_residue_types_in_model(imol)
116
+ for comp_id in non_standard:
117
+ if (path := cif_path(comp_id)) is None:
118
+ print("WARNING: No CIF file found for non-standard residue", comp_id)
119
+ continue
120
+ mc.import_cif_dictionary(path, imol)
121
+ imap = mc.read_mtz(args.mtz, args.f_label, args.phi_label, "", False, False)
122
+ mc.set_imol_refinement_map(imap)
123
+ mc.set_use_torsion_restraints(True)
124
+ mc.set_use_rama_plot_restraints(True)
125
+ for chain in structure[args.model_index]:
126
+ for residue in chain:
127
+ if not has_full_side_chain(residue):
128
+ num = residue.seqid.num
129
+ icode = residue.seqid.icode
130
+ icode = "" if icode == " " else icode
131
+ mc.refine_residues(imol, chain.name, num, icode, "", "TRIPLE", 1000)
132
+ mc.fill_partial_residue(imol, chain.name, num, icode)
133
+ mc.auto_fit_rotamer(imol, chain.name, num, icode, "", imap)
134
+ mc.refine_residues(imol, chain.name, num, icode, "", "TRIPLE", 1000)
135
+ mc.write_coordinates(imol, args.output)
136
+ if not backup_existed_before and backup_path.exists():
137
+ rmtree(backup_path, ignore_errors=True)
138
+
139
+
140
+ if __name__ == "__main__":
141
+ main()
@@ -0,0 +1,81 @@
1
+ import argparse
2
+ import sys
3
+
4
+ import gemmi
5
+ from tabulate import tabulate
6
+
7
+ from ..environ import setup_environ
8
+ from ..monlib import MonLib
9
+ from ..reflections import DataItem
10
+ from ..validation import validate
11
+
12
+
13
+ def _parse_args(argument_list):
14
+ parser = argparse.ArgumentParser(
15
+ description=__doc__,
16
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
17
+ )
18
+ parser.add_argument(
19
+ "structure",
20
+ help="Input structure in PDB, mmCIF, mmJSON format",
21
+ )
22
+ parser.add_argument(
23
+ "mtz",
24
+ help=(
25
+ "MTZ file from Refmac with standard output column labels "
26
+ "(the output MTZ from ModelCraft meets this requirement)."
27
+ ),
28
+ )
29
+ parser.add_argument(
30
+ "--format",
31
+ default="table",
32
+ choices=["table", "csv"],
33
+ help="Print the results as a human-readable table or a CSV file",
34
+ )
35
+ parser.add_argument(
36
+ "--libin",
37
+ metavar="PATH",
38
+ help="Path to a custom restraint dictionary in CIF format",
39
+ )
40
+ parser.add_argument(
41
+ "--sort",
42
+ action="store_true",
43
+ help="Order the output with the worse scoring residues first",
44
+ )
45
+ parser.add_argument(
46
+ "--model-index",
47
+ type=int,
48
+ default=0,
49
+ metavar="N",
50
+ help="Index of the model to analyse (with 0 being the first model)",
51
+ )
52
+ return parser.parse_args(argument_list or sys.argv[1:])
53
+
54
+
55
+ def main(argument_list=None):
56
+ setup_environ()
57
+ args = _parse_args(argument_list)
58
+
59
+ structure = gemmi.read_structure(args.structure, format=gemmi.CoorFormat.Detect)
60
+ mtz = gemmi.read_mtz_file(args.mtz)
61
+ fphi_best = DataItem(mtz, "FWT,PHWT")
62
+ fphi_diff = DataItem(mtz, "DELFWT,PHDELWT")
63
+ fphi_calc = DataItem(mtz, "FC_ALL,PHIC_ALL")
64
+
65
+ resnames = structure[args.model_index].get_all_residue_names()
66
+ monlib = MonLib(resnames, args.libin)
67
+ metrics = validate(
68
+ structure, fphi_best, fphi_diff, fphi_calc, monlib, args.model_index
69
+ )
70
+ if args.sort:
71
+ metrics.sort_values("Score", ascending=True, inplace=True)
72
+
73
+ if args.format == "table":
74
+ metrics["Sig"] = metrics["Score"].apply(lambda x: "+" * min(5, -int(x)))
75
+ print(tabulate(metrics, headers="keys", showindex=False, floatfmt=".1f"))
76
+ else:
77
+ print(metrics.to_csv(index=False))
78
+
79
+
80
+ if __name__ == "__main__":
81
+ main()
modelcraft/sequence.py ADDED
@@ -0,0 +1,106 @@
1
+ PROTEIN_CODES = {
2
+ "A": "ALA",
3
+ "B": "ASX",
4
+ "C": "CYS",
5
+ "D": "ASP",
6
+ "E": "GLU",
7
+ "F": "PHE",
8
+ "G": "GLY",
9
+ "H": "HIS",
10
+ "I": "ILE",
11
+ "K": "LYS",
12
+ "L": "LEU",
13
+ "M": "MET",
14
+ "N": "ASN",
15
+ "O": "PYL",
16
+ "P": "PRO",
17
+ "Q": "GLN",
18
+ "R": "ARG",
19
+ "S": "SER",
20
+ "T": "THR",
21
+ "U": "SEC",
22
+ "V": "VAL",
23
+ "W": "TRP",
24
+ "X": "UNK",
25
+ "Y": "TYR",
26
+ "Z": "GLX",
27
+ }
28
+
29
+ DNA_CODES = {
30
+ "A": "DA",
31
+ "C": "DC",
32
+ "G": "DG",
33
+ "I": "DI",
34
+ "N": "DN",
35
+ "T": "DT",
36
+ "U": "DU",
37
+ "X": "DN",
38
+ }
39
+
40
+ RNA_CODES = {
41
+ "A": "A",
42
+ "C": "C",
43
+ "G": "G",
44
+ "I": "I",
45
+ "N": "N",
46
+ "U": "U",
47
+ "X": "N",
48
+ }
49
+
50
+
51
+ class PolymerType:
52
+ PROTEIN = "PolymerType"
53
+ DNA = "PolymerType"
54
+ RNA = "PolymerType"
55
+
56
+ def __init__(self, name: str, codes: dict[str, str]):
57
+ self.name = name
58
+ self.codes = codes
59
+
60
+ def parse(self, sequence: str) -> list[str]:
61
+ return [self.codes.get(c, self.codes["X"]) for c in sequence]
62
+
63
+ @classmethod
64
+ def guess(cls, sequence: str):
65
+ codes = set(sequence)
66
+ if codes & set("DEFHKLMNPQRSVWY") or codes in ({"A"}, {"G"}):
67
+ return cls.PROTEIN
68
+ if "U" in codes:
69
+ return cls.RNA
70
+ if "T" in codes:
71
+ return cls.DNA
72
+ return cls.RNA
73
+
74
+
75
+ PolymerType.PROTEIN = PolymerType("Protein", PROTEIN_CODES)
76
+ PolymerType.DNA = PolymerType("DNA", DNA_CODES)
77
+ PolymerType.RNA = PolymerType("RNA", RNA_CODES)
78
+
79
+
80
+ PIR_CODES = {"D1", "DC", "DL", "F1", "N1", "N3", "P1", "RC", "RL", "XX"}
81
+
82
+
83
+ def sequences_in_file(contents: str) -> list:
84
+ sequence = ""
85
+ sequences = []
86
+ skip_line = False
87
+ skip_lines = False
88
+ lines = contents.splitlines(keepends=False)
89
+ for line in lines:
90
+ if skip_line:
91
+ skip_line = False
92
+ continue
93
+ if line[:1] == ">":
94
+ if len(sequence) > 0:
95
+ sequences.append(sequence)
96
+ sequence = ""
97
+ if line[1:3] in PIR_CODES and line[3] == ";":
98
+ skip_line = True
99
+ skip_lines = False
100
+ elif line[:1] != ";" and not skip_lines:
101
+ sequence += "".join(c for c in line if c.isalpha())
102
+ if line[-1:] == "*":
103
+ skip_lines = True
104
+ if len(sequence) > 0:
105
+ sequences.append(sequence)
106
+ return sequences