modelcraft 5.0.2__py3-none-any.whl → 6.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. modelcraft/__init__.py +16 -31
  2. modelcraft/__main__.py +0 -1
  3. modelcraft/arguments.py +35 -7
  4. modelcraft/combine.py +22 -41
  5. modelcraft/contents.py +188 -164
  6. modelcraft/environ.py +0 -7
  7. modelcraft/geometry.py +39 -27
  8. modelcraft/job.py +6 -5
  9. modelcraft/jobs/acedrg.py +2 -0
  10. modelcraft/jobs/buccaneer.py +22 -4
  11. modelcraft/jobs/comit.py +2 -0
  12. modelcraft/jobs/ctruncate.py +3 -1
  13. modelcraft/jobs/emda.py +2 -0
  14. modelcraft/jobs/findwaters.py +2 -0
  15. modelcraft/jobs/freerflag.py +2 -0
  16. modelcraft/jobs/libg.py +2 -0
  17. modelcraft/jobs/molrep.py +2 -0
  18. modelcraft/jobs/nautilus.py +28 -14
  19. modelcraft/jobs/nucleofind.py +88 -0
  20. modelcraft/jobs/parrot.py +13 -2
  21. modelcraft/jobs/phasematch.py +2 -1
  22. modelcraft/jobs/refmac.py +3 -1
  23. modelcraft/jobs/servalcat.py +38 -4
  24. modelcraft/jobs/sheetbend.py +2 -0
  25. modelcraft/modelcraftem.py +49 -6
  26. modelcraft/modelcraftxray.py +90 -42
  27. modelcraft/monlib.py +55 -52
  28. modelcraft/pdbe.py +54 -0
  29. modelcraft/pipeline.py +1 -1
  30. modelcraft/prune.py +69 -0
  31. modelcraft/reflections.py +11 -1
  32. modelcraft/scripts/contents.py +5 -215
  33. modelcraft/scripts/copies.py +26 -17
  34. modelcraft/scripts/modelcraft.py +1 -0
  35. modelcraft/scripts/sidechains.py +141 -0
  36. modelcraft/scripts/validate.py +81 -0
  37. modelcraft/sequence.py +106 -0
  38. modelcraft/solvent.py +42 -113
  39. modelcraft/structure.py +64 -41
  40. modelcraft/tests/ccp4/__init__.py +7 -11
  41. modelcraft/tests/ccp4/test_acedrg.py +2 -0
  42. modelcraft/tests/ccp4/test_arguments.py +3 -0
  43. modelcraft/tests/ccp4/test_buccaneer.py +3 -2
  44. modelcraft/tests/ccp4/test_cell.py +4 -1
  45. modelcraft/tests/ccp4/test_comit.py +2 -0
  46. modelcraft/tests/ccp4/test_contents.py +99 -17
  47. modelcraft/tests/ccp4/test_copies.py +1 -0
  48. modelcraft/tests/ccp4/test_ctruncate.py +2 -0
  49. modelcraft/tests/ccp4/test_findwaters.py +2 -0
  50. modelcraft/tests/ccp4/test_freerflag.py +2 -0
  51. modelcraft/tests/ccp4/test_libg.py +1 -0
  52. modelcraft/tests/ccp4/test_molrep.py +3 -0
  53. modelcraft/tests/ccp4/test_monlib.py +75 -45
  54. modelcraft/tests/ccp4/test_nautilus.py +5 -3
  55. modelcraft/tests/ccp4/test_nucleofind.py +62 -0
  56. modelcraft/tests/ccp4/test_parrot.py +3 -1
  57. modelcraft/tests/ccp4/test_phasematch.py +2 -0
  58. modelcraft/tests/ccp4/test_prune.py +17 -0
  59. modelcraft/tests/ccp4/test_reflections.py +110 -1
  60. modelcraft/tests/ccp4/test_refmac.py +3 -0
  61. modelcraft/tests/{unittests/test_contents.py → ccp4/test_sequence.py} +5 -12
  62. modelcraft/tests/ccp4/test_servalcat.py +52 -0
  63. modelcraft/tests/ccp4/test_sheetbend.py +4 -3
  64. modelcraft/tests/ccp4/test_sidechains.py +25 -0
  65. modelcraft/tests/ccp4/test_solvent.py +12 -26
  66. modelcraft/tests/ccp4/test_structure.py +1 -0
  67. modelcraft/tests/ccp4/test_validation.py +19 -0
  68. modelcraft/tests/ccp4/test_xray.py +12 -6
  69. modelcraft/tests/ccpem/test_em.py +3 -0
  70. modelcraft/tests/ccpem/test_emda.py +2 -0
  71. modelcraft/tests/ccpem/test_refmac.py +1 -0
  72. modelcraft/tests/ccpem/test_servalcat.py +4 -3
  73. modelcraft/utils.py +16 -4
  74. modelcraft/validation.py +101 -0
  75. modelcraft-6.0.0.dist-info/METADATA +76 -0
  76. modelcraft-6.0.0.dist-info/RECORD +85 -0
  77. {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/WHEEL +1 -1
  78. {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/entry_points.txt +2 -0
  79. modelcraft/coot/prune.py +0 -1085
  80. modelcraft/coot/sidechains.py +0 -68
  81. modelcraft/jobs/acorn.py +0 -114
  82. modelcraft/jobs/coot.py +0 -104
  83. modelcraft/tests/ccp4/test_coot.py +0 -29
  84. modelcraft/tests/ccp4/test_geometry.py +0 -20
  85. modelcraft/tests/unittests/__init__.py +0 -0
  86. modelcraft/tests/unittests/test_reflections.py +0 -101
  87. modelcraft-5.0.2.dist-info/LICENSE +0 -504
  88. modelcraft-5.0.2.dist-info/METADATA +0 -48
  89. modelcraft-5.0.2.dist-info/RECORD +0 -82
  90. {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/top_level.txt +0 -0
modelcraft/contents.py CHANGED
@@ -1,63 +1,43 @@
1
- import enum
1
+ import abc
2
+ import functools
2
3
  import json
3
-
4
- PROTEIN_CODES = {
5
- "A": "ALA",
6
- "B": "ASX",
7
- "C": "CYS",
8
- "D": "ASP",
9
- "E": "GLU",
10
- "F": "PHE",
11
- "G": "GLY",
12
- "H": "HIS",
13
- "I": "ILE",
14
- "K": "LYS",
15
- "L": "LEU",
16
- "M": "MET",
17
- "N": "ASN",
18
- "O": "PYL",
19
- "P": "PRO",
20
- "Q": "GLN",
21
- "R": "ARG",
22
- "S": "SER",
23
- "T": "THR",
24
- "U": "SEC",
25
- "V": "VAL",
26
- "W": "TRP",
27
- "X": "UNK",
28
- "Y": "TYR",
29
- "Z": "GLX",
30
- }
31
-
32
- RNA_CODES = {
33
- "A": "A",
34
- "C": "C",
35
- "G": "G",
36
- "I": "I",
37
- "U": "U",
38
- "X": "N",
39
- }
40
-
41
- DNA_CODES = {
42
- "A": "DA",
43
- "C": "DC",
44
- "G": "DG",
45
- "I": "DI",
46
- "T": "DT",
47
- "U": "DU",
48
- "X": "DN",
49
- }
50
-
51
- PIR_CODES = {"D1", "DC", "DL", "F1", "N1", "N3", "P1", "RC", "RL", "XX"}
52
-
53
-
54
- class PolymerType(enum.Enum):
55
- PROTEIN = "PROTEIN"
56
- RNA = "RNA"
57
- DNA = "DNA"
58
-
59
-
60
- class Polymer:
4
+ import math
5
+
6
+ from . import pdbe
7
+ from .monlib import MonLib
8
+ from .sequence import PolymerType, sequences_in_file
9
+
10
+ BUFFERS = {"12P", "144", "15P", "16D", "1BO", "1PS", "2OS", "3CO", "3NI", "ACA", "ACN"}
11
+ BUFFERS |= {"ACT", "ACY", "AG", "AGC", "AL", "AZI", "B3P", "B7G", "BA", "BCN", "BE7"}
12
+ BUFFERS |= {"BEQ", "BGC", "BMA", "BNG", "BOG", "BR", "BRO", "BTB", "BTC", "BU1", "BU2"}
13
+ BUFFERS |= {"BU3", "C10", "C15", "C8E", "CA", "CAC", "CBM", "CBX", "CCN", "CD", "CE1"}
14
+ BUFFERS |= {"CIT", "CL", "CLO", "CM", "CM5", "CN", "CO", "CPS", "CRY", "CS", "CU"}
15
+ BUFFERS |= {"CU1", "CXE", "CYN", "CYS", "DDQ", "DHD", "DIA", "DIO", "DMF", "DMS", "DMU"}
16
+ BUFFERS |= {"DMX", "DOX", "DPR", "DR6", "DXG", "EDO", "EEE", "EGL", "EOH", "ETF", "F"}
17
+ BUFFERS |= {"FCL", "FCY", "FE", "FE2", "FLO", "FMT", "FRU", "GBL", "GCD", "GLC", "GLO"}
18
+ BUFFERS |= {"GLY", "GOL", "GPX", "HEZ", "HG", "HTG", "HTO", "ICI", "ICT", "IDO", "IDT"}
19
+ BUFFERS |= {"IOD", "IOH", "IPA", "IPH", "JEF", "K", "LAK", "LAT", "LBT", "LDA", "LI"}
20
+ BUFFERS |= {"LMT", "MA4", "MAN", "MG", "MG8", "MHA", "MN", "MN3", "MOH", "MPD", "MPO"}
21
+ BUFFERS |= {"MRD", "MRY", "MTL", "N8E", "NA", "NCO", "NH4", "NHE", "NI", "NO3", "OTE"}
22
+ BUFFERS |= {"P33", "P4C", "PB", "PDO", "PE4", "PE7", "PE8", "PEU", "PG5", "PG6", "PGE"}
23
+ BUFFERS |= {"PGO", "PGQ", "PGR", "PIG", "PIN", "POL", "RB", "SAL", "SBT", "SCN", "SDS"}
24
+ BUFFERS |= {"SO4", "SOR", "SPD", "SPK", "SPM", "SR", "SUC", "SUL", "SYL", "TAR", "TAU"}
25
+ BUFFERS |= {"TBU", "TEP", "TFP", "TLA", "TMA", "TRE", "TRS", "TRT", "UMQ", "UNX", "URE"}
26
+ BUFFERS |= {"XPE", "Y1", "YT3", "ZN", "ZN2"}
27
+
28
+
29
+ @functools.cache
30
+ def is_buffer(code: str) -> bool:
31
+ return code.upper() in BUFFERS
32
+
33
+
34
+ class Component(abc.ABC):
35
+ @abc.abstractmethod
36
+ def volume(self, monlib: MonLib):
37
+ pass
38
+
39
+
40
+ class Polymer(Component):
61
41
  def __init__(
62
42
  self,
63
43
  sequence: str,
@@ -67,29 +47,54 @@ class Polymer:
67
47
  ):
68
48
  self.sequence = sequence.upper()
69
49
  self.stoichiometry = stoichiometry
70
- self.type = polymer_type or guess_sequence_type(self.sequence)
50
+ self.type = polymer_type or PolymerType.guess(self.sequence)
71
51
  self.modifications = modifications or []
72
52
 
73
- def __eq__(self, other) -> bool:
74
- if isinstance(other, Polymer):
75
- return (
76
- self.sequence == other.sequence
77
- and self.type == other.type
78
- and self.modifications == other.modifications
79
- )
80
- return NotImplemented
53
+ def __str__(self) -> str:
54
+ s = f"{self.type.name} with {len(self.sequence)} residues: "
55
+ if len(self.sequence) > 9:
56
+ s += f"{self.sequence[:3]}...{self.sequence[-3:]}"
57
+ else:
58
+ s += f"{self.sequence:9}"
59
+ return s
81
60
 
82
61
  @classmethod
83
- def from_json(cls, component: dict) -> "Polymer":
62
+ def from_json(cls, component: dict, polymer_type: PolymerType) -> "Polymer":
84
63
  return cls(
85
64
  sequence=component["sequence"],
86
65
  stoichiometry=component.get("stoichiometry"),
66
+ polymer_type=polymer_type,
87
67
  modifications=component.get("modifications"),
88
68
  )
89
69
 
70
+ @classmethod
71
+ def from_pdbe(cls, mol: dict, polymer_type: PolymerType) -> "Polymer":
72
+ mod_indices = {}
73
+ for index, mod in mol["pdb_sequence_indices_with_multiple_residues"].items():
74
+ code1 = mod["one_letter_code"]
75
+ code3 = mod["three_letter_code"]
76
+ if code3 not in ("DA", "DC", "DG", "DT"):
77
+ key = code1, code3
78
+ mod_indices.setdefault(key, []).append(index)
79
+ modifications = []
80
+ for (code1, code3), indices in mod_indices.items():
81
+ total = mol["sequence"].count(code1)
82
+ if code1 == "M" and mol["sequence"][0] == "M":
83
+ total -= 1
84
+ if len(indices) >= total:
85
+ modifications.append(f"{code1}->{code3}")
86
+ else:
87
+ modifications.extend(f"{index}->{code3}" for index in indices)
88
+ return cls(
89
+ sequence=mol["sequence"],
90
+ stoichiometry=mol["number_of_copies"],
91
+ polymer_type=polymer_type,
92
+ modifications=modifications,
93
+ )
94
+
90
95
  @classmethod
91
96
  def from_sequence_file(cls, path: str, polymer_type: PolymerType = None):
92
- with open(path) as stream:
97
+ with open(path, encoding="utf-8") as stream:
93
98
  contents = stream.read()
94
99
  for sequence in sequences_in_file(contents=contents):
95
100
  yield cls(sequence=sequence, polymer_type=polymer_type)
@@ -102,7 +107,7 @@ class Polymer:
102
107
  }
103
108
 
104
109
  def residue_codes(self, modified: bool = True) -> list:
105
- codes = [code1_to_code3(code1, self.type) for code1 in self.sequence]
110
+ codes = self.type.parse(self.sequence)
106
111
  if modified:
107
112
  for mod in self.modifications:
108
113
  source, code = mod.split("->")
@@ -118,16 +123,27 @@ class Polymer:
118
123
  def is_selenomet(self) -> bool:
119
124
  return "M->MSE" in self.modifications
120
125
 
126
+ def weight(self, monlib: MonLib) -> float:
127
+ codes = self.residue_codes(modified=False)
128
+ weight = sum(monlib.weight(code) for code in codes)
129
+ weight -= monlib.weight("HOH") * (len(codes) - 1)
130
+ return weight
131
+
132
+ def volume(self, monlib: MonLib) -> float:
133
+ density = 1.35 if self.type == PolymerType.PROTEIN else 2.0
134
+ return self.weight(monlib) / (density * 0.602214)
135
+
121
136
 
122
- class Carb:
137
+ class Carb(Component):
123
138
  def __init__(self, codes: dict, stoichiometry: int = None):
124
139
  self.codes = codes
125
140
  self.stoichiometry = stoichiometry
126
141
 
127
- def __eq__(self, other) -> bool:
128
- if isinstance(other, Carb):
129
- return self.codes == other.codes
130
- return NotImplemented
142
+ def __str__(self) -> str:
143
+ s = "Carb:"
144
+ for code, count in self.codes.items():
145
+ s += f" {count}x{code}"
146
+ return s
131
147
 
132
148
  @classmethod
133
149
  def from_json(cls, component: dict) -> "Carb":
@@ -136,19 +152,30 @@ class Carb:
136
152
  stoichiometry=component.get("stoichiometry"),
137
153
  )
138
154
 
155
+ @classmethod
156
+ def from_pdbe(cls, mol: dict) -> "Carb":
157
+ codes = mol["carb_codes"]
158
+ length = sum(codes.values())
159
+ stoichiometry = mol["number_of_copies"] // length
160
+ return cls(codes=codes, stoichiometry=stoichiometry)
161
+
139
162
  def to_json(self) -> dict:
140
163
  return {"codes": self.codes, "stoichiometry": self.stoichiometry}
141
164
 
165
+ def volume(self, monlib: MonLib) -> float:
166
+ monomers = sum(self.codes.values())
167
+ volume = sum(monlib.volume(code) for code in self.codes)
168
+ volume -= monomers * monlib.volume("HOH")
169
+ return volume
170
+
142
171
 
143
- class Ligand:
172
+ class Ligand(Component):
144
173
  def __init__(self, code: str, stoichiometry: int = None):
145
174
  self.code = code
146
175
  self.stoichiometry = stoichiometry
147
176
 
148
- def __eq__(self, other) -> bool:
149
- if isinstance(other, Ligand):
150
- return self.code == other.code
151
- return NotImplemented
177
+ def __str__(self) -> str:
178
+ return f"Ligand: {self.code}"
152
179
 
153
180
  @classmethod
154
181
  def from_json(cls, component: dict) -> "Ligand":
@@ -157,9 +184,16 @@ class Ligand:
157
184
  stoichiometry=component.get("stoichiometry"),
158
185
  )
159
186
 
187
+ @classmethod
188
+ def from_pdbe(cls, mol: dict) -> "Ligand":
189
+ return cls(code=mol["chem_comp_ids"][0], stoichiometry=mol["number_of_copies"])
190
+
160
191
  def to_json(self) -> dict:
161
192
  return {"code": self.code, "stoichiometry": self.stoichiometry}
162
193
 
194
+ def volume(self, monlib: MonLib) -> float:
195
+ return monlib.volume(self.code)
196
+
163
197
 
164
198
  class AsuContents:
165
199
  def __init__(
@@ -171,7 +205,6 @@ class AsuContents:
171
205
  carbs: list = None,
172
206
  ligands: list = None,
173
207
  buffers: list = None,
174
- smiles: dict = None,
175
208
  ):
176
209
  self.copies = copies
177
210
  self.proteins = proteins or []
@@ -180,7 +213,6 @@ class AsuContents:
180
213
  self.carbs = carbs or []
181
214
  self.ligands = ligands or []
182
215
  self.buffers = buffers or []
183
- self.smiles = smiles or {}
184
216
 
185
217
  @classmethod
186
218
  def from_file(cls, path: str) -> "AsuContents":
@@ -190,31 +222,26 @@ class AsuContents:
190
222
 
191
223
  @classmethod
192
224
  def from_json_file(cls, path: str) -> "AsuContents":
193
- contents = cls()
194
- with open(path) as stream:
195
- contents_json = json.load(stream)
196
- contents.copies = contents_json.get("copies")
197
- for obj in contents_json.get("proteins") or []:
198
- polymer = Polymer.from_json(obj)
199
- polymer.type = PolymerType.PROTEIN
200
- contents.proteins.append(polymer)
201
- for obj in contents_json.get("rnas") or []:
202
- polymer = Polymer.from_json(obj)
203
- polymer.type = PolymerType.RNA
204
- contents.rnas.append(polymer)
205
- for obj in contents_json.get("dnas") or []:
206
- polymer = Polymer.from_json(obj)
207
- polymer.type = PolymerType.DNA
208
- contents.dnas.append(polymer)
209
- for obj in contents_json.get("carbs") or []:
210
- carb = Carb.from_json(obj)
211
- contents.carbs.append(carb)
212
- for obj in contents_json.get("ligands") or []:
213
- ligand = Ligand.from_json(obj)
214
- contents.ligands.append(ligand)
215
- contents.buffers = contents_json.get("buffers") or []
216
- contents.smiles = contents_json.get("smiles") or []
217
- return contents
225
+ with open(path, encoding="utf-8") as stream:
226
+ contents = json.load(stream)
227
+ return cls(
228
+ copies=contents.get("copies"),
229
+ proteins=[
230
+ Polymer.from_json(obj, PolymerType.PROTEIN)
231
+ for obj in contents.get("proteins", [])
232
+ ],
233
+ rnas=[
234
+ Polymer.from_json(obj, PolymerType.RNA)
235
+ for obj in contents.get("rnas", [])
236
+ ],
237
+ dnas=[
238
+ Polymer.from_json(obj, PolymerType.DNA)
239
+ for obj in contents.get("dnas", [])
240
+ ],
241
+ carbs=[Carb.from_json(obj) for obj in contents.get("carbs", [])],
242
+ ligands=[Ligand.from_json(obj) for obj in contents.get("ligands", [])],
243
+ buffers=contents.get("buffers", []),
244
+ )
218
245
 
219
246
  @classmethod
220
247
  def from_sequence_file(
@@ -233,20 +260,67 @@ class AsuContents:
233
260
  if polymer.type == PolymerType.DNA:
234
261
  self.dnas.append(polymer)
235
262
 
263
+ @classmethod
264
+ def from_pdbe(cls, entry_id: str) -> "AsuContents":
265
+ contents = cls(copies=1)
266
+ for mol in pdbe.molecule_dicts(entry_id):
267
+ molecule_type = mol["molecule_type"].lower()
268
+ if "polypeptide" in molecule_type:
269
+ protein = Polymer.from_pdbe(mol, PolymerType.PROTEIN)
270
+ contents.proteins.append(protein)
271
+ elif "polyribonucleotide" in molecule_type:
272
+ rna = Polymer.from_pdbe(mol, PolymerType.RNA)
273
+ contents.rnas.append(rna)
274
+ elif "polydeoxyribonucleotide" in molecule_type:
275
+ dna = Polymer.from_pdbe(mol, PolymerType.DNA)
276
+ contents.dnas.append(dna)
277
+ elif "carbohydrate" in molecule_type:
278
+ carb = Carb.from_pdbe(mol)
279
+ contents.carbs.append(carb)
280
+ elif "bound" in molecule_type:
281
+ ligand = Ligand.from_pdbe(mol)
282
+ if is_buffer(ligand.code):
283
+ contents.buffers.append(ligand.code)
284
+ else:
285
+ contents.ligands.append(ligand)
286
+ contents.divide_stoichiometry()
287
+ return contents
288
+
289
+ def components(self) -> list[Component]:
290
+ return self.proteins + self.rnas + self.dnas + self.carbs + self.ligands
291
+
292
+ def divide_stoichiometry(self):
293
+ counts = []
294
+ for component in self.components():
295
+ if component.stoichiometry is not None:
296
+ counts.append(component.stoichiometry)
297
+ if len(counts) > 0:
298
+ if len(counts) > 1:
299
+ divisor = functools.reduce(math.gcd, counts)
300
+ else:
301
+ divisor = counts[0]
302
+ if divisor > 1:
303
+ self.copies *= divisor
304
+ for component in self.components():
305
+ component.stoichiometry //= divisor
306
+
236
307
  def monomer_codes(self) -> set:
237
308
  codes = set()
238
309
  for polymer in self.proteins + self.rnas + self.dnas:
239
- codes.update(set(polymer.residue_codes(modified=True)))
310
+ codes |= set(polymer.residue_codes(modified=True))
240
311
  for carb in self.carbs:
241
- codes.update(set(carb.codes.keys()))
312
+ codes |= set(carb.codes.keys())
242
313
  for ligand in self.ligands:
243
314
  codes.add(ligand.code)
244
- codes.update(set(self.buffers))
315
+ codes |= set(self.buffers)
245
316
  return codes
246
317
 
247
318
  def is_selenomet(self) -> bool:
248
319
  return len(self.proteins) > 0 and all(p.is_selenomet() for p in self.proteins)
249
320
 
321
+ def volume(self, monlib: MonLib) -> float:
322
+ return sum(c.volume(monlib) * (c.stoichiometry or 1) for c in self.components())
323
+
250
324
  def to_json(self) -> list:
251
325
  return {
252
326
  "copies": self.copies,
@@ -256,68 +330,18 @@ class AsuContents:
256
330
  "carbs": [carb.to_json() for carb in self.carbs],
257
331
  "ligands": [ligand.to_json() for ligand in self.ligands],
258
332
  "buffers": self.buffers,
259
- "smiles": self.smiles,
260
333
  }
261
334
 
262
335
  def write_json_file(self, path: str) -> None:
263
- with open(path, "w") as stream:
336
+ with open(path, "w", encoding="utf-8") as stream:
264
337
  json.dump(self.to_json(), stream, indent=2)
265
338
 
266
339
  def write_sequence_file(
267
340
  self, path: str, types: list = None, line_length: int = 60
268
341
  ) -> None:
269
- with open(path, "w") as stream:
342
+ with open(path, "w", encoding="utf-8") as stream:
270
343
  for polymer in self.proteins + self.rnas + self.dnas:
271
344
  if types is None or polymer.type in types:
272
- stream.write(f">{polymer.type.value}\n")
345
+ stream.write(f">{polymer.type.name}\n")
273
346
  for i in range(0, len(polymer.sequence), line_length):
274
347
  stream.write(polymer.sequence[i : i + line_length] + "\n")
275
-
276
-
277
- def code1_to_code3(code1: str, polymer_type: PolymerType) -> str:
278
- return {
279
- PolymerType.PROTEIN: PROTEIN_CODES.get(code1) or PROTEIN_CODES["X"],
280
- PolymerType.RNA: RNA_CODES.get(code1) or RNA_CODES["X"],
281
- PolymerType.DNA: DNA_CODES.get(code1) or DNA_CODES["X"],
282
- }[polymer_type]
283
-
284
-
285
- def guess_sequence_type(sequence: str) -> PolymerType:
286
- codes = set(sequence)
287
- if "U" in codes:
288
- return PolymerType.RNA
289
- if codes & set("DEFHIKLMNPQRSVWY"):
290
- return PolymerType.PROTEIN
291
- if codes == {"A"}:
292
- return PolymerType.PROTEIN
293
- if codes == {"G"}:
294
- return PolymerType.PROTEIN
295
- if "T" in codes:
296
- return PolymerType.DNA
297
- return PolymerType.RNA
298
-
299
-
300
- def sequences_in_file(contents: str) -> list:
301
- sequence = ""
302
- sequences = []
303
- skip_line = False
304
- skip_lines = False
305
- lines = contents.splitlines(keepends=False)
306
- for line in lines:
307
- if skip_line:
308
- skip_line = False
309
- continue
310
- if line[:1] == ">":
311
- if len(sequence) > 0:
312
- sequences.append(sequence)
313
- sequence = ""
314
- if line[1:3] in PIR_CODES and line[3:4] == ";":
315
- skip_line = True
316
- skip_lines = False
317
- elif line[:1] != ";" and not skip_lines:
318
- sequence += "".join(c for c in line if c.isalpha())
319
- if line[-1:] == "*":
320
- skip_lines = True
321
- if len(sequence) > 0:
322
- sequences.append(sequence)
323
- return sequences
modelcraft/environ.py CHANGED
@@ -6,10 +6,3 @@ def setup_environ():
6
6
  if variable not in os.environ:
7
7
  raise EnvironmentError(variable + " environment variable not set")
8
8
  os.environ["LD_LIBRARY_PATH"] = os.environ["CLIB"]
9
- os.environ["COOT_N_THREADS"] = "1"
10
- os.environ["GOTO_NUM_THREADS"] = "1"
11
- os.environ["MKL_NUM_THREADS"] = "1"
12
- os.environ["NUMEXPR_NUM_THREADS"] = "1"
13
- os.environ["OMP_NUM_THREADS"] = "1"
14
- os.environ["OPENBLAS_NUM_THREADS"] = "1"
15
- os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
modelcraft/geometry.py CHANGED
@@ -1,32 +1,44 @@
1
- import math
2
- import os
1
+ import collections
2
+
3
3
  import gemmi
4
- from modelcraft.monlib import in_library
4
+ import numpy as np
5
+
6
+ from .monlib import MonLib
7
+
8
+
9
+ def per_residue_geometry_rmsz(
10
+ structure: gemmi.Structure, monlib: MonLib, model_index: int = 0
11
+ ) -> dict:
12
+ atom_zs = _atom_zs(structure, monlib, model_index)
13
+ rv = {}
14
+ for chain in structure[model_index]:
15
+ for residue in chain:
16
+ zs = np.concatenate([atom_zs.get(atom.serial, []) for atom in residue])
17
+ rmsz = np.sqrt(np.mean(np.square(zs))) if len(zs) > 0 else np.nan
18
+ rv[(chain.name, str(residue.seqid))] = rmsz
19
+ return rv
5
20
 
6
21
 
7
- def rmsz(structure: gemmi.Structure) -> float:
8
- codes = [code for code in structure[0].get_all_residue_names() if in_library(code)]
9
- monlib = gemmi.read_monomer_lib(os.environ["CLIBD_MON"], codes)
10
- devnull = open(os.devnull, "w")
11
- topology = gemmi.prepare_topology(structure, monlib, warnings=devnull)
12
- num_of_squares = 0
13
- sum_of_squares = 0.0
14
- for bond in topology.bonds:
15
- num_of_squares += 1
16
- sum_of_squares += bond.calculate_z() ** 2
17
- for angle in topology.angles:
18
- num_of_squares += 1
19
- sum_of_squares += angle.calculate_z() ** 2
20
- for torsion in topology.torsions:
21
- if torsion.restr.esd > 0:
22
- num_of_squares += 1
23
- sum_of_squares += torsion.calculate_z() ** 2
24
- for plane in topology.planes:
22
+ def _atom_zs(structure: gemmi.Structure, monlib: MonLib, model_index: int) -> dict:
23
+ structure.assign_serial_numbers()
24
+ topo = gemmi.prepare_topology(structure, monlib, model_index)
25
+ atom_zs = collections.defaultdict(list)
26
+ for bond in topo.bonds:
27
+ z = bond.calculate_z()
28
+ for atom in bond.atoms:
29
+ atom_zs[atom.serial].append(z)
30
+ for angle in topo.angles:
31
+ z = angle.calculate_z()
32
+ for atom in angle.atoms:
33
+ atom_zs[atom.serial].append(z)
34
+ for torsion in topo.torsions:
35
+ if torsion.restr.esd > 0: # Some torsions are only restrained by planes
36
+ z = torsion.calculate_z()
37
+ for atom in torsion.atoms:
38
+ atom_zs[atom.serial].append(z)
39
+ for plane in topo.planes:
25
40
  best_plane = gemmi.find_best_plane(plane.atoms)
26
- max_z = 0
27
41
  for atom in plane.atoms:
28
- distance = gemmi.get_distance_from_plane(atom.pos, best_plane)
29
- max_z = max(distance / plane.restr.esd, max_z)
30
- num_of_squares += 1
31
- sum_of_squares += max_z ** 2
32
- return math.sqrt(sum_of_squares / num_of_squares)
42
+ z = gemmi.get_distance_from_plane(atom.pos, best_plane) / plane.restr.esd
43
+ atom_zs[atom.serial].append(z)
44
+ return atom_zs
modelcraft/job.py CHANGED
@@ -4,8 +4,9 @@ import shutil
4
4
  import subprocess
5
5
  import textwrap
6
6
  import time
7
+
7
8
  from .pipeline import Pipeline
8
- from .utils import random_id
9
+ from .utils import puid
9
10
 
10
11
 
11
12
  class Job(abc.ABC):
@@ -22,13 +23,13 @@ class Job(abc.ABC):
22
23
  if self._exe_path is None:
23
24
  raise ValueError(f"Executable '{self._exe_name}' not found")
24
25
  if pipeline is None:
25
- self._directory = f"job_{self._exe_name}_{random_id(length=20)}"
26
+ self._directory = f"job_{self._exe_name}_{puid(length=20)}"
26
27
  else:
27
28
  self._directory = pipeline.next_job_directory(self._exe_name)
28
29
  pipeline.report_job_start(self._exe_name)
29
30
  os.makedirs(self._directory, exist_ok=True)
30
31
  self._setup()
31
- with open(self._path("script.sh"), "w") as stream:
32
+ with open(self._path("script.sh"), "w", encoding="utf-8") as stream:
32
33
  stream.write(self._script())
33
34
  os.chmod(self._path("script.sh"), 0o755)
34
35
  start_time = time.time()
@@ -56,8 +57,8 @@ class Job(abc.ABC):
56
57
  pass
57
58
 
58
59
  def _run_subprocess(self):
59
- with open(self._path("stdout.txt"), "w") as out_stream:
60
- with open(self._path("stderr.txt"), "w") as err_stream:
60
+ with open(self._path("stdout.txt"), "w", encoding="utf-8") as out_stream:
61
+ with open(self._path("stderr.txt"), "w", encoding="utf-8") as err_stream:
61
62
  process = subprocess.Popen(
62
63
  args=[self._exe_path] + self._args,
63
64
  stdin=subprocess.PIPE if self._stdin else None,
modelcraft/jobs/acedrg.py CHANGED
@@ -1,5 +1,7 @@
1
1
  import dataclasses
2
+
2
3
  import gemmi
4
+
3
5
  from ..job import Job
4
6
 
5
7
 
@@ -1,11 +1,14 @@
1
1
  import dataclasses
2
2
  import os
3
3
  import xml.etree.ElementTree as ET
4
+
4
5
  import gemmi
5
- from ..contents import AsuContents, PolymerType, PROTEIN_CODES
6
+
7
+ from ..contents import AsuContents
6
8
  from ..job import Job
7
9
  from ..reflections import DataItem, write_mtz
8
- from ..structure import consecutive_residues, read_structure, write_mmcif
10
+ from ..sequence import PROTEIN_CODES, PolymerType
11
+ from ..structure import read_structure, write_mmcif
9
12
 
10
13
 
11
14
  @dataclasses.dataclass
@@ -124,12 +127,27 @@ class Buccaneer(Job):
124
127
  )
125
128
 
126
129
 
127
- def _known_structure_ids(structure: gemmi.Structure) -> list:
130
+ def _known_structure_ids(structure: gemmi.Structure):
128
131
  "Known structure IDs for ligands (but not modified residues) with a CA atom"
129
132
  protein_residue_names = set(PROTEIN_CODES.values()) | {"MSE", "UNK"}
130
133
  for chain in structure[0]:
131
- for residues in consecutive_residues(chain):
134
+ for residues in _consecutive_residues(chain):
132
135
  if not any(res.name in protein_residue_names for res in residues):
133
136
  for residue in residues:
134
137
  if "CA" in residue:
135
138
  yield f"/{chain.name}/{str(residue.seqid)}/*/:1.0"
139
+
140
+
141
+ def _consecutive_residues(chain: gemmi.Chain):
142
+ "Iterate through lists of residues with consecutive seqnums (first conformer only)"
143
+ consecutive = []
144
+ last_seqnum = None
145
+ for residue in chain.first_conformer():
146
+ if last_seqnum is None or residue.seqid.num == last_seqnum + 1:
147
+ consecutive.append(residue)
148
+ else:
149
+ yield consecutive
150
+ consecutive = [residue]
151
+ last_seqnum = residue.seqid.num
152
+ if len(consecutive) > 0:
153
+ yield consecutive
modelcraft/jobs/comit.py CHANGED
@@ -1,5 +1,7 @@
1
1
  import dataclasses
2
+
2
3
  import gemmi
4
+
3
5
  from ..job import Job
4
6
  from ..reflections import DataItem, write_mtz
5
7