modelcraft 5.0.2__py3-none-any.whl → 6.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. modelcraft/__init__.py +16 -31
  2. modelcraft/__main__.py +0 -1
  3. modelcraft/arguments.py +35 -7
  4. modelcraft/combine.py +22 -41
  5. modelcraft/contents.py +188 -164
  6. modelcraft/environ.py +0 -7
  7. modelcraft/geometry.py +39 -27
  8. modelcraft/job.py +6 -5
  9. modelcraft/jobs/acedrg.py +2 -0
  10. modelcraft/jobs/buccaneer.py +22 -4
  11. modelcraft/jobs/comit.py +2 -0
  12. modelcraft/jobs/ctruncate.py +3 -1
  13. modelcraft/jobs/emda.py +2 -0
  14. modelcraft/jobs/findwaters.py +2 -0
  15. modelcraft/jobs/freerflag.py +2 -0
  16. modelcraft/jobs/libg.py +2 -0
  17. modelcraft/jobs/molrep.py +2 -0
  18. modelcraft/jobs/nautilus.py +28 -14
  19. modelcraft/jobs/nucleofind.py +88 -0
  20. modelcraft/jobs/parrot.py +13 -2
  21. modelcraft/jobs/phasematch.py +2 -1
  22. modelcraft/jobs/refmac.py +3 -1
  23. modelcraft/jobs/servalcat.py +38 -4
  24. modelcraft/jobs/sheetbend.py +2 -0
  25. modelcraft/modelcraftem.py +49 -6
  26. modelcraft/modelcraftxray.py +90 -42
  27. modelcraft/monlib.py +55 -52
  28. modelcraft/pdbe.py +54 -0
  29. modelcraft/pipeline.py +1 -1
  30. modelcraft/prune.py +69 -0
  31. modelcraft/reflections.py +11 -1
  32. modelcraft/scripts/contents.py +5 -215
  33. modelcraft/scripts/copies.py +26 -17
  34. modelcraft/scripts/modelcraft.py +1 -0
  35. modelcraft/scripts/sidechains.py +141 -0
  36. modelcraft/scripts/validate.py +81 -0
  37. modelcraft/sequence.py +106 -0
  38. modelcraft/solvent.py +42 -113
  39. modelcraft/structure.py +64 -41
  40. modelcraft/tests/ccp4/__init__.py +7 -11
  41. modelcraft/tests/ccp4/test_acedrg.py +2 -0
  42. modelcraft/tests/ccp4/test_arguments.py +3 -0
  43. modelcraft/tests/ccp4/test_buccaneer.py +3 -2
  44. modelcraft/tests/ccp4/test_cell.py +4 -1
  45. modelcraft/tests/ccp4/test_comit.py +2 -0
  46. modelcraft/tests/ccp4/test_contents.py +99 -17
  47. modelcraft/tests/ccp4/test_copies.py +1 -0
  48. modelcraft/tests/ccp4/test_ctruncate.py +2 -0
  49. modelcraft/tests/ccp4/test_findwaters.py +2 -0
  50. modelcraft/tests/ccp4/test_freerflag.py +2 -0
  51. modelcraft/tests/ccp4/test_libg.py +1 -0
  52. modelcraft/tests/ccp4/test_molrep.py +3 -0
  53. modelcraft/tests/ccp4/test_monlib.py +75 -45
  54. modelcraft/tests/ccp4/test_nautilus.py +5 -3
  55. modelcraft/tests/ccp4/test_nucleofind.py +62 -0
  56. modelcraft/tests/ccp4/test_parrot.py +3 -1
  57. modelcraft/tests/ccp4/test_phasematch.py +2 -0
  58. modelcraft/tests/ccp4/test_prune.py +17 -0
  59. modelcraft/tests/ccp4/test_reflections.py +110 -1
  60. modelcraft/tests/ccp4/test_refmac.py +3 -0
  61. modelcraft/tests/{unittests/test_contents.py → ccp4/test_sequence.py} +5 -12
  62. modelcraft/tests/ccp4/test_servalcat.py +52 -0
  63. modelcraft/tests/ccp4/test_sheetbend.py +4 -3
  64. modelcraft/tests/ccp4/test_sidechains.py +25 -0
  65. modelcraft/tests/ccp4/test_solvent.py +12 -26
  66. modelcraft/tests/ccp4/test_structure.py +1 -0
  67. modelcraft/tests/ccp4/test_validation.py +19 -0
  68. modelcraft/tests/ccp4/test_xray.py +12 -6
  69. modelcraft/tests/ccpem/test_em.py +3 -0
  70. modelcraft/tests/ccpem/test_emda.py +2 -0
  71. modelcraft/tests/ccpem/test_refmac.py +1 -0
  72. modelcraft/tests/ccpem/test_servalcat.py +4 -3
  73. modelcraft/utils.py +16 -4
  74. modelcraft/validation.py +101 -0
  75. modelcraft-6.0.0.dist-info/METADATA +76 -0
  76. modelcraft-6.0.0.dist-info/RECORD +85 -0
  77. {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/WHEEL +1 -1
  78. {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/entry_points.txt +2 -0
  79. modelcraft/coot/prune.py +0 -1085
  80. modelcraft/coot/sidechains.py +0 -68
  81. modelcraft/jobs/acorn.py +0 -114
  82. modelcraft/jobs/coot.py +0 -104
  83. modelcraft/tests/ccp4/test_coot.py +0 -29
  84. modelcraft/tests/ccp4/test_geometry.py +0 -20
  85. modelcraft/tests/unittests/__init__.py +0 -0
  86. modelcraft/tests/unittests/test_reflections.py +0 -101
  87. modelcraft-5.0.2.dist-info/LICENSE +0 -504
  88. modelcraft-5.0.2.dist-info/METADATA +0 -48
  89. modelcraft-5.0.2.dist-info/RECORD +0 -82
  90. {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/top_level.txt +0 -0
modelcraft/coot/prune.py DELETED
@@ -1,1085 +0,0 @@
1
- import math
2
-
3
- # CONSTS
4
-
5
- atomic_numbers = {
6
- "H": 1,
7
- "HE": 2,
8
- "LI": 3,
9
- "BE": 4,
10
- "B": 5,
11
- "C": 6,
12
- "N": 7,
13
- "O": 8,
14
- "F": 9,
15
- "NE": 10,
16
- "NA": 11,
17
- "MG": 12,
18
- "AL": 13,
19
- "SI": 14,
20
- "P": 15,
21
- "S": 16,
22
- "CL": 17,
23
- "AR": 18,
24
- "K": 19,
25
- "CA": 20,
26
- "SC": 21,
27
- "TI": 22,
28
- "V": 23,
29
- "CR": 24,
30
- "MN": 25,
31
- "FE": 26,
32
- "CO": 27,
33
- "NI": 28,
34
- "CU": 29,
35
- "ZN": 30,
36
- "GA": 31,
37
- "GE": 32,
38
- "AS": 33,
39
- "SE": 34,
40
- "BR": 35,
41
- "KR": 36,
42
- "RB": 37,
43
- "SR": 38,
44
- "Y": 39,
45
- "ZR": 40,
46
- "NB": 41,
47
- "MO": 42,
48
- "TC": 43,
49
- "RU": 44,
50
- "RH": 45,
51
- "PD": 46,
52
- "AG": 47,
53
- "CD": 48,
54
- "IN": 49,
55
- "SN": 50,
56
- "SB": 51,
57
- "TE": 52,
58
- "I": 53,
59
- "XE": 54,
60
- "CS": 55,
61
- "BA": 56,
62
- "LA": 57,
63
- "CE": 58,
64
- "PR": 59,
65
- "ND": 60,
66
- "PM": 61,
67
- "SM": 62,
68
- "EU": 63,
69
- "GD": 64,
70
- "TB": 65,
71
- "DY": 66,
72
- "HO": 67,
73
- "ER": 68,
74
- "TM": 69,
75
- "YB": 70,
76
- "LU": 71,
77
- "HF": 72,
78
- "TA": 73,
79
- "W": 74,
80
- "RE": 75,
81
- "OS": 76,
82
- "IR": 77,
83
- "PT": 78,
84
- "AU": 79,
85
- "HG": 80,
86
- "TL": 81,
87
- "PB": 82,
88
- "BI": 83,
89
- "PO": 84,
90
- "AT": 85,
91
- "RN": 86,
92
- "FR": 87,
93
- "RA": 88,
94
- "AC": 89,
95
- "TH": 90,
96
- "PA": 91,
97
- "U": 92,
98
- "NP": 93,
99
- "PU": 94,
100
- "AM": 95,
101
- "CM": 96,
102
- "BK": 97,
103
- "CF": 98,
104
- "ES": 99,
105
- "FM": 100,
106
- "MD": 101,
107
- "NO": 102,
108
- "LR": 103,
109
- "RF": 104,
110
- "DB": 105,
111
- "SG": 106,
112
- "BH": 107,
113
- "HS": 108,
114
- "MT": 109,
115
- "DS": 110,
116
- "RG": 111,
117
- "CN": 112,
118
- "NH": 113,
119
- "FL": 114,
120
- "MC": 115,
121
- "LV": 116,
122
- "TS": 117,
123
- "OG": 118,
124
- }
125
-
126
- protein_residues = {
127
- "ALA",
128
- "ARG",
129
- "ASN",
130
- "ASP",
131
- "CYS",
132
- "GLN",
133
- "GLU",
134
- "GLY",
135
- "HIS",
136
- "ILE",
137
- "LEU",
138
- "LYS",
139
- "MET",
140
- "MSE",
141
- "PHE",
142
- "PRO",
143
- "SER",
144
- "THR",
145
- "TRP",
146
- "TYR",
147
- "UNK",
148
- "VAL",
149
- }
150
-
151
- main_chain_atoms = {" N ", " CA ", " C ", " O ", " CB "}
152
-
153
- bonded = {
154
- " C ": {" CA ", " O ", " OXT"},
155
- " CA ": {" C ", " CB ", " N "},
156
- " CB ": {" CA ", " CG ", " CG1", " CG2", " OG ", " OG1", " SG "},
157
- " CD ": {" CE ", " CG ", " NE ", " NE2", " OE1", " OE2"},
158
- " CD1": {" CE1", " CG ", " CG1", " NE1"},
159
- " CD2": {" CE2", " CE3", " CG ", " NE2"},
160
- " CE ": {" CD ", " NZ ", " SD ", "SE "},
161
- " CE1": {" CD1", " CZ ", " ND1", " NE2"},
162
- " CE2": {" CD2", " CZ ", " CZ2", " NE1"},
163
- " CE3": {" CD2", " CZ3"},
164
- " CG ": {" CB ", " CD ", " CD1", " CD2", " ND1", " ND2", " OD1", " OD2", " SD ", "SE ",},
165
- " CG1": {" CB ", " CD1"},
166
- " CG2": {" CB "},
167
- " CH2": {" CZ2", " CZ3"},
168
- " CZ ": {" CE1", " CE2", " NE ", " NH1", " NH2", " OH "},
169
- " CZ2": {" CE2", " CH2"},
170
- " CZ3": {" CE3", " CH2"},
171
- " N ": {" CA "},
172
- " ND1": {" CE1", " CG "},
173
- " ND2": {" CG "},
174
- " NE ": {" CD ", " CZ "},
175
- " NE1": {" CD1", " CE2"},
176
- " NE2": {" CD ", " CD2", " CE1"},
177
- " NH1": {" CZ "},
178
- " NH2": {" CZ "},
179
- " NZ ": {" CE "},
180
- " O ": {" C "},
181
- " OD1": {" CG "},
182
- " OD2": {" CG "},
183
- " OE1": {" CD "},
184
- " OE2": {" CD "},
185
- " OG ": {" CB "},
186
- " OG1": {" CB "},
187
- " OH ": {" CZ "},
188
- " OXT": {" C "},
189
- " SD ": {" CE ", " CG "},
190
- " SG ": {" CB "},
191
- "SE ": {" CE ", " CG "},
192
- }
193
-
194
- # UTILS
195
-
196
-
197
- def mean(values):
198
- return float(sum(values)) / len(values)
199
-
200
-
201
- def median(values):
202
- n = len(values)
203
- if n < 1:
204
- return None
205
- i = n // 2
206
- if n % 2 == 1:
207
- return sorted(values)[i]
208
- return sum(sorted(values)[i - 1 : i + 1]) / 2.0
209
-
210
-
211
- def median_absolute_deviation(values):
212
- median_value = median(values)
213
- abs_deviations = [abs(value - median_value) for value in values]
214
- return median(abs_deviations)
215
-
216
-
217
- def dot(xyz1, xyz2):
218
- return xyz1[0] * xyz2[0] + xyz1[1] * xyz2[1] + xyz1[2] * xyz2[2]
219
-
220
-
221
- def cross(xyz1, xyz2):
222
- return [
223
- xyz1[1] * xyz2[2] - xyz1[2] * xyz2[1],
224
- xyz1[2] * xyz2[0] - xyz1[0] * xyz2[2],
225
- xyz1[0] * xyz2[1] - xyz1[1] * xyz2[0],
226
- ]
227
-
228
-
229
- def magnitude(xyz):
230
- return (xyz[0] ** 2 + xyz[1] ** 2 + xyz[2] ** 2) ** 0.5
231
-
232
-
233
- def unit(xyz):
234
- length = magnitude(xyz)
235
- return [xyz[0] / length, xyz[1] / length, xyz[2] / length]
236
-
237
-
238
- def subtract(xyz1, xyz2):
239
- return [xyz1[0] - xyz2[0], xyz1[1] - xyz2[1], xyz1[2] - xyz2[2]]
240
-
241
-
242
- def distance(xyz1, xyz2):
243
- v = subtract(xyz1, xyz2)
244
- return magnitude(v)
245
-
246
-
247
- def angle(xyz1, xyz2, xyz3):
248
- v1 = subtract(xyz2, xyz1)
249
- v2 = subtract(xyz2, xyz3)
250
- angle = math.acos(dot(v1, v2) / (magnitude(v1) * magnitude(v2)))
251
- return math.degrees(angle)
252
-
253
-
254
- def torsion(xyz1, xyz2, xyz3, xyz4):
255
- b1 = subtract(xyz2, xyz1)
256
- b2 = subtract(xyz3, xyz2)
257
- b3 = subtract(xyz4, xyz3)
258
- n1 = cross(b1, b2)
259
- n2 = cross(b2, b3)
260
- m1 = cross(n1, n2)
261
- y = dot(m1, unit(b2))
262
- x = dot(n1, n2)
263
- angle = math.degrees(math.atan2(y, x))
264
- if angle > 180:
265
- angle -= 360
266
- return angle
267
-
268
-
269
- def halfway(xyz1, xyz2):
270
- x = (xyz1[0] + xyz2[0]) / 2
271
- y = (xyz1[1] + xyz2[1]) / 2
272
- z = (xyz1[2] + xyz2[2]) / 2
273
- return [x, y, z]
274
-
275
-
276
- def attached_atoms(atom, residue):
277
- if atom.name == " N " and residue.prev is not None:
278
- yield residue.prev.atoms[" C "]
279
- if atom.name == " C " and residue.next is not None:
280
- yield residue.next.atoms[" N "]
281
- for other in bonded[atom.name]:
282
- if other in residue.atoms:
283
- yield residue.atoms[other]
284
-
285
-
286
- # MACHINE LEARNING
287
-
288
- training_data = {
289
- "main": {
290
- "medians": [
291
- 0.8093730211257935,
292
- 0.0,
293
- 0.3797053494361412,
294
- 0.2948472222222224,
295
- 1.1603445955372607,
296
- 0.07967734307025365,
297
- -1.04405097291337,
298
- -0.7666229846405788,
299
- 0.09301174988214152,
300
- 0.2074185971626753,
301
- 1.9998366832733154,
302
- 7.473087887125843,
303
- ],
304
- "scaler.mean_": [
305
- 0.7635879979927991,
306
- 0.0119850162929945,
307
- 0.47070085149410484,
308
- 0.8779728019083975,
309
- 1.4010333429538593,
310
- -0.07809440994354622,
311
- -1.1634877907354524,
312
- -0.9231595065864178,
313
- 0.06630610233126723,
314
- 0.7376180259790496,
315
- 1.9333955416970983,
316
- 9.00267277208097,
317
- ],
318
- "scaler.scale_": [
319
- 0.15510631635324296,
320
- 0.10881808524983969,
321
- 0.49424003745942746,
322
- 2.1942551160797694,
323
- 1.298509344613297,
324
- 0.8836945495954351,
325
- 0.9345174344061477,
326
- 0.9874691652931548,
327
- 1.2037765763286472,
328
- 1.1729051296356117,
329
- 0.42936735360011963,
330
- 6.84475636137809,
331
- ],
332
- "regressor.coefs_": [
333
- [
334
- [
335
- -0.13040818548223823,
336
- -0.22295218610758016,
337
- -0.16258080220962234,
338
- 0.5988690897292985,
339
- 0.29886699802544087,
340
- -0.2138536188789283,
341
- 0.17640972642170277,
342
- 0.8323631686040229,
343
- 0.8080535193667213,
344
- -0.64199269078319,
345
- ],
346
- [
347
- 0.3371557445052779,
348
- -0.021990355769565446,
349
- 0.3393433644319566,
350
- 0.7031031102737114,
351
- -1.5952677749631456,
352
- -0.6291226390140611,
353
- -0.16470505242375907,
354
- 0.8959152619464158,
355
- 0.8165087694148113,
356
- 1.3192349998470725,
357
- ],
358
- [
359
- -0.0019874340519655917,
360
- 0.04016420712930254,
361
- -0.0016519722530829112,
362
- -0.05753723016959708,
363
- -1.0970634376604886,
364
- 0.13781915021293673,
365
- -0.043726103959200804,
366
- 0.010119141446418834,
367
- -0.09415567058366356,
368
- 0.10675615201134665,
369
- ],
370
- [
371
- -0.15553908141483186,
372
- 0.7815309718136879,
373
- -0.58301702342861,
374
- -0.3074194496122189,
375
- -0.708058122322275,
376
- 0.01629181598284882,
377
- -0.17713645375637574,
378
- -0.5780953291256867,
379
- -0.08330249685202995,
380
- -0.046797521299526336,
381
- ],
382
- [
383
- -0.16382881259387705,
384
- 0.1590019158219002,
385
- -0.15271138566338957,
386
- 0.004631275745636504,
387
- 0.14085902736133887,
388
- 0.044487235857714004,
389
- -0.22845244910340506,
390
- 0.03361790942261084,
391
- -0.09431223857684323,
392
- 0.1894309630932748,
393
- ],
394
- [
395
- 0.4600605680220205,
396
- 1.0475854981548196,
397
- 0.28528470975660497,
398
- -0.07122257966372195,
399
- -0.17193073732931968,
400
- -0.563441753208365,
401
- -0.6552700463525175,
402
- -0.08483854601112313,
403
- -0.17645291495517423,
404
- -0.44659761042818563,
405
- ],
406
- [
407
- -0.32437585347959863,
408
- -0.40621672156363603,
409
- -0.22786021141673057,
410
- -0.006696637890890252,
411
- 0.5191491920623607,
412
- 0.16595743564965426,
413
- 0.27911102055393494,
414
- 0.2640726988201516,
415
- -0.0016347547075830314,
416
- 0.24770754766817038,
417
- ],
418
- [
419
- 0.3728665898401538,
420
- 0.07700714533797552,
421
- 0.2311708633268853,
422
- -0.1401187037969537,
423
- 0.30234045086128297,
424
- -0.24458025060957192,
425
- 0.4302767405048504,
426
- -0.3703955591237981,
427
- 0.3484976393563264,
428
- -0.5594717372377868,
429
- ],
430
- [
431
- 0.00937493962029786,
432
- -0.04694394839682417,
433
- -0.028893614058871878,
434
- 0.27141499045973605,
435
- -0.07941767649526653,
436
- -0.03911716545463574,
437
- -0.12103549705527984,
438
- 0.2148437699515711,
439
- 0.08720611510332744,
440
- -0.13302591593197644,
441
- ],
442
- [
443
- 0.16013205898626096,
444
- 0.35382721838123166,
445
- 0.05844089574619886,
446
- -0.2090590614354888,
447
- 0.06453171825149609,
448
- -1.172220172769279,
449
- -0.14500654761975515,
450
- -0.35695026385344086,
451
- 0.7073129963007073,
452
- -1.1972037303520695,
453
- ],
454
- [
455
- 0.12999852919812566,
456
- -0.37003217528080845,
457
- 0.3596837609653873,
458
- 0.30447584223950475,
459
- -0.5844046567044845,
460
- 1.099321801469266,
461
- 0.28973479295322985,
462
- 0.6123382299120337,
463
- -0.8254996440849733,
464
- 0.3860328709153097,
465
- ],
466
- [
467
- -0.013864646575082658,
468
- 0.33883378654775376,
469
- -0.0432656081821551,
470
- -0.03831855907564628,
471
- -0.17009170626798892,
472
- 0.09755681267845129,
473
- -0.14149706478955423,
474
- 0.039883119397616046,
475
- -0.11814265819588236,
476
- 0.13867097327343803,
477
- ],
478
- ],
479
- [
480
- [0.4113990249648804],
481
- [-0.1635521743014414],
482
- [-0.3233742954955824],
483
- [-0.2832881263154175],
484
- [0.10184600411275878],
485
- [0.26297418395412697],
486
- [-0.38667783069289213],
487
- [0.18806411489930078],
488
- [0.2650096089001783],
489
- [-0.3432819700082653],
490
- ],
491
- ],
492
- "regressor.intercepts_": [
493
- [
494
- 0.05426873271484922,
495
- -0.17185888723743994,
496
- 0.49670571655441914,
497
- -0.26895026535845296,
498
- 1.2489563308304976,
499
- -0.21430979896458663,
500
- -1.0229946977640407,
501
- 0.43986781805151526,
502
- 0.32517336900734384,
503
- -1.1981611190470425,
504
- ],
505
- [0.2009473880274795],
506
- ],
507
- },
508
- "side": {
509
- "medians": [
510
- 0.7128432393074036,
511
- 0.3930308684784101,
512
- 0.12138492565055738,
513
- 0.7697625388342151,
514
- -0.01155913420195338,
515
- -0.7577415671212863,
516
- -0.5053093547058974,
517
- 1.9498454332351685,
518
- 36.654449462890625,
519
- ],
520
- "scaler.mean_": [
521
- 0.6699939922392967,
522
- 0.4861278093279958,
523
- 0.5598238115764408,
524
- 1.0098633759640239,
525
- -0.09092168866565035,
526
- -0.8734938716432699,
527
- -0.5939212054114583,
528
- 1.903840522636135,
529
- 41.034051026184834,
530
- ],
531
- "scaler.scale_": [
532
- 0.1922931323119439,
533
- 0.5148420550064197,
534
- 1.547688727501242,
535
- 1.6216833496523257,
536
- 0.8535355005798227,
537
- 1.063733997556021,
538
- 0.946219729526465,
539
- 0.42543155678705524,
540
- 29.99679803225501,
541
- ],
542
- "regressor.coefs_": [
543
- [
544
- [
545
- 0.056302872646141214,
546
- -0.42725673599695374,
547
- 0.23328615715807235,
548
- -0.25692317829492684,
549
- 0.116828236758043,
550
- 1.2384242768052554,
551
- 0.6748323891517939,
552
- 0.14291234817220463,
553
- 0.21380028409294824,
554
- 0.07679070847654368,
555
- ],
556
- [
557
- -0.46153688659142184,
558
- -0.004544981320490466,
559
- -0.2964031182985578,
560
- 0.09456605767558321,
561
- -0.06435341411291445,
562
- 0.12673028609583875,
563
- -0.2719367005541086,
564
- 0.29679974643773216,
565
- 0.11486700383265894,
566
- 0.23185475607178951,
567
- ],
568
- [
569
- 0.33853298205315957,
570
- 0.9079813683093967,
571
- 0.09346553254580311,
572
- -0.025784490716986762,
573
- -0.3475803251546684,
574
- -0.04639625178653906,
575
- -0.500881871296081,
576
- 0.7809152729815941,
577
- 0.406203973798232,
578
- -0.3284891104603179,
579
- ],
580
- [
581
- -0.2088543439928225,
582
- 0.47556925150407825,
583
- -0.19039046477157073,
584
- 0.20421072806892146,
585
- -0.23471331429141476,
586
- -0.08725373460798075,
587
- -0.0811783537165059,
588
- 0.4593053196175982,
589
- 0.4351302763377739,
590
- 0.11837686827471676,
591
- ],
592
- [
593
- -0.32138852798617806,
594
- 0.11322678209797299,
595
- -0.07514559826050492,
596
- 0.13497722508180546,
597
- 0.2308583983764714,
598
- 0.11180021860598335,
599
- -0.6957881978609368,
600
- 0.0005190646203973872,
601
- -0.2880602141275108,
602
- -0.13675732785562053,
603
- ],
604
- [
605
- 0.06423107339757392,
606
- 0.08120627709915425,
607
- 0.29375027114694363,
608
- -0.15903515362475384,
609
- -0.038018112519918414,
610
- 0.2893167384125542,
611
- 0.14378196316887357,
612
- 0.04488068580666965,
613
- 0.1342358964548304,
614
- 0.00855401074520736,
615
- ],
616
- [
617
- -0.06837070927792858,
618
- -0.45192674269454314,
619
- 0.059932235100582196,
620
- -0.15614805983481528,
621
- -0.3859760254395101,
622
- -0.3429573286705649,
623
- 0.6838364312078055,
624
- -0.347976240289938,
625
- 0.049432609557722565,
626
- -0.28055309729723454,
627
- ],
628
- [
629
- 0.6431046107009092,
630
- 0.49740363903294654,
631
- 0.2748436188374371,
632
- -0.18326862373951083,
633
- -0.2069737876487165,
634
- -0.2999102167712396,
635
- -0.1590891264716532,
636
- 0.13233313109972625,
637
- -0.030493374720953297,
638
- -0.324131016869104,
639
- ],
640
- [
641
- -0.44478515534248564,
642
- -0.22066143889313278,
643
- -0.31968023354915465,
644
- 0.025341827810811048,
645
- -0.08457818649830318,
646
- 0.16910787319016476,
647
- 0.38203483276154415,
648
- -0.10511631297976766,
649
- 0.08750071031831075,
650
- 0.2747264545695668,
651
- ],
652
- ],
653
- [
654
- [0.10508025998686506],
655
- [-0.2691991244124654],
656
- [0.21154928243402793],
657
- [0.6059699335423194],
658
- [-0.3520197843417094],
659
- [0.1473370411925756],
660
- [0.3586311702546054],
661
- [0.1428546809034874],
662
- [-0.2921184613487701],
663
- [0.20088114454313125],
664
- ],
665
- ],
666
- "regressor.intercepts_": [
667
- [
668
- 0.5652480230183061,
669
- -0.14461022167946497,
670
- 0.0734880322671333,
671
- 0.774381965871222,
672
- -0.5810353861392875,
673
- -0.6447073648626959,
674
- 0.6469258828055443,
675
- -0.8684424314182713,
676
- 0.42510685510971963,
677
- -0.0666156988876174,
678
- ],
679
- [0.12043980294087006],
680
- ],
681
- },
682
- }
683
-
684
-
685
- def main_features(model, res):
686
- return [
687
- res.main_chain_correlation,
688
- 1 if res.has_pepflip_peak else 0,
689
- max(atom.max_overlap for atom in res.main_chain_atoms),
690
- max(atom.bfactor_zscore for atom in res.main_chain_atoms),
691
- max(atom.bchange_zscore for atom in res.main_chain_atoms),
692
- mean([atom.density_zscore for atom in res.main_chain_atoms]),
693
- min(atom.diff_zscore for atom in res.main_chain_atoms),
694
- min(atom.density_zscore for atom in res.main_chain_atoms),
695
- None if res.next is None else res.next.atoms[" CA "].diff_zscore,
696
- res.ramachandran_score,
697
- model.resolution,
698
- res.twistedness,
699
- ]
700
-
701
-
702
- def side_features(model, res):
703
- return [
704
- res.side_chain_correlation,
705
- max(atom.max_overlap for atom in res.side_chain_atoms),
706
- max(atom.bfactor_zscore for atom in res.side_chain_atoms),
707
- max(atom.bchange_zscore for atom in res.side_chain_atoms),
708
- mean([atom.density_zscore for atom in res.side_chain_atoms]),
709
- min(atom.diff_zscore for atom in res.side_chain_atoms),
710
- min(atom.density_zscore for atom in res.side_chain_atoms),
711
- model.resolution,
712
- res.rotamer_score,
713
- ]
714
-
715
-
716
- def add_medians(X, medians):
717
- return [medians[i] if X[i] is None else X[i] for i in range(len(X))]
718
-
719
-
720
- def scale(X, means, scales):
721
- return [(X[i] - means[i]) / scales[i] for i in range(len(X))]
722
-
723
-
724
- def predict(X, coefs, intercepts):
725
- n_layers = len(coefs) + 1
726
- values = [None] * n_layers
727
- values[0] = list(X)
728
- for i in range(1, n_layers):
729
- values[i] = list(intercepts[i - 1])
730
- for j in range(len(intercepts[i - 1])):
731
- for k in range(len(values[i - 1])):
732
- values[i][j] += values[i - 1][k] * coefs[i - 1][k][j]
733
- if i < n_layers - 1:
734
- values[i][j] = math.tanh(values[i][j])
735
- return values[-1][0]
736
-
737
-
738
- # ATOM / RESIDUE / CHAIN / MODEL
739
-
740
-
741
- class Atom:
742
- def __init__(self, model, residue, atom_info):
743
- self.name = atom_info[0][0]
744
- self.alt_conf = atom_info[0][1]
745
- self.occupancy = atom_info[1][0]
746
- self.bfactor = atom_info[1][1]
747
- self.element = atom_info[1][2].strip()
748
- self.atomic_number = atomic_numbers[self.element]
749
- self.is_main_chain = self.name in main_chain_atoms
750
- self.is_side_chain = not self.is_main_chain
751
- self.point = atom_info[2]
752
- self.density = density_at_point(model.imap, *self.point)
753
- self.density_norm = self.density / self.atomic_number
754
- self.diff_density = density_at_point(model.imap_diff, *self.point)
755
- self.diff_norm = self.diff_density / self.atomic_number
756
- self.max_overlap = 0
757
-
758
-
759
- class Residue:
760
- def __init__(self, model, spec, name):
761
- self.spec = spec
762
- self.name = name
763
- self.chain = spec[0]
764
- self.resno = spec[1]
765
- self.ins_code = spec[2]
766
- self.next = None
767
- self.prev = None
768
- self.twistedness = None
769
- self.ramachandran_score = None
770
- self.rotamer_score = rotamer_score(model.imol, spec[0], spec[1], spec[2], "")
771
- self.atoms = {}
772
- for atom_info in residue_info_py(model.imol, *spec):
773
- atom = Atom(model, self, atom_info)
774
- model.atoms.append(atom)
775
- self.atoms[atom.name] = atom
776
- self.main_chain_atoms = [atom for atom in self.atoms.values() if atom.is_main_chain]
777
- self.side_chain_atoms = [atom for atom in self.atoms.values() if atom.is_side_chain]
778
- self.truncatable = len(self.side_chain_atoms) > 0
779
-
780
-
781
- class Chain:
782
- def __init__(self):
783
- self.residues = []
784
- self.correctnesses = []
785
-
786
-
787
- class Model:
788
- def __init__(self, imol, imap, imap_diff):
789
- self.imol = imol
790
- self.imap = imap
791
- self.imap_diff = imap_diff
792
- self.resolution = data_resolution(self.imap)
793
- self.residues = []
794
- self.residue_spec_dict = {}
795
- self.atoms = []
796
- for ichain in range(n_chains(imol)):
797
- chain_id = chain_id_py(imol, ichain)
798
- for serial_num in range(chain_n_residues(chain_id, imol)):
799
- resno = seqnum_from_serial_number(imol, chain_id, serial_num)
800
- ins_code = insertion_code_from_serial_number(imol, chain_id, serial_num)
801
- spec = [chain_id, resno, ins_code]
802
- name = residue_name(imol, *spec)
803
- if name in protein_residues:
804
- residue = Residue(self, spec, name)
805
- self.residues.append(residue)
806
- self.residue_spec_dict[tuple(spec)] = residue
807
- self.set_connections()
808
- self.set_correlations()
809
- self.set_bond_changes()
810
- self.set_zscores()
811
- self.set_ramachandran()
812
- self.set_overlaps()
813
- self.set_pepflip_peaks()
814
- self.set_correctness()
815
- self.set_chains()
816
-
817
- def set_connections(self):
818
- for i in range(len(self.residues) - 1):
819
- res1 = self.residues[i]
820
- res2 = self.residues[i + 1]
821
- if " CA " not in res1.atoms or " C " not in res1.atoms:
822
- continue
823
- if " N " not in res2.atoms or " CA " not in res2.atoms:
824
- continue
825
- point1 = res1.atoms[" C "].point
826
- point2 = res2.atoms[" N "].point
827
- if distance(point1, point2) < 1.7:
828
- res1.next = res2
829
- res2.prev = res1
830
- xyz1 = res1.atoms[" CA "].point
831
- xyz2 = res1.atoms[" C "].point
832
- xyz3 = res2.atoms[" N "].point
833
- xyz4 = res2.atoms[" CA "].point
834
- omega = abs(torsion(xyz1, xyz2, xyz3, xyz4))
835
- twistedness = min(omega, 180 - omega)
836
- if twistedness < 0:
837
- raise Exception("Negative twistedness between %s and %s" % (res1.spec, res2.spec))
838
- res1.twistedness = max(twistedness, 0 if res1.twistedness is None else res1.twistedness)
839
- res2.twistedness = max(twistedness, 0 if res2.twistedness is None else res2.twistedness)
840
-
841
- def set_correlations(self):
842
- self.set_correlation("main_chain_correlation", 1)
843
- self.set_correlation("side_chain_correlation", 3)
844
-
845
- def set_bond_changes(self):
846
- for residue in self.residues:
847
- for atom in residue.atoms.values():
848
- atom.bchange = -99999
849
- for attached in attached_atoms(atom, residue):
850
- bchange = (atom.bfactor - attached.bfactor) / attached.bfactor
851
- atom.bchange = max(bchange, atom.bchange)
852
- if atom.bchange == -99999:
853
- raise Exception("Residue %s atom %s is not bonded to anything" % (residue.spec, atom.name))
854
-
855
- def set_correlation(self, attr, mask):
856
- specs = [residue.spec for residue in self.residues]
857
- for correlation in map_to_model_correlation_per_residue_py(self.imol, specs, mask, self.imap):
858
- residue = self.residue_spec_dict[tuple(correlation[0][1:])]
859
- setattr(residue, attr, correlation[1])
860
-
861
- # Uses a modified Z-score calculated using medians
862
- # Boris Iglewicz and David Hoaglin
863
- # Volume 16: How to Detect and Handle Outliers
864
- # The ASQC Basic References in Quality Control: Statistical Techniques
865
- # Edited by Edward F. Mykytka, Ph.D.
866
- # Page 11
867
- # 1993
868
- def set_zscores(self):
869
- def set_zscore(attr, zscore_attr):
870
- main_values = [getattr(atom, attr) for atom in self.atoms if atom.is_main_chain]
871
- side_values = [getattr(atom, attr) for atom in self.atoms if atom.is_side_chain]
872
- main_median = median(main_values)
873
- side_median = median(side_values)
874
- main_mad = median_absolute_deviation(main_values)
875
- side_mad = median_absolute_deviation(side_values)
876
- for residue in self.residues:
877
- for atom in residue.atoms.values():
878
- value = getattr(atom, attr)
879
- if atom.is_main_chain:
880
- zscore = 0 if main_mad == 0 else 0.6745 * (value - main_median) / main_mad
881
- else:
882
- zscore = 0 if side_mad == 0 else 0.6745 * (value - side_median) / side_mad
883
- setattr(atom, zscore_attr, zscore)
884
-
885
- set_zscore("density_norm", "density_zscore")
886
- set_zscore("diff_norm", "diff_zscore")
887
- set_zscore("bfactor", "bfactor_zscore")
888
- set_zscore("bchange", "bchange_zscore")
889
-
890
- def set_ramachandran(self):
891
- for item in all_molecule_ramachandran_score_py(self.imol)[5]:
892
- spec = item[1][1:]
893
- if tuple(spec) in self.residue_spec_dict:
894
- residue = self.residue_spec_dict[tuple(spec)]
895
- residue.ramachandran_score = item[2]
896
-
897
- def set_overlaps(self):
898
- for overlap in molecule_atom_overlaps_py(self.imol):
899
- for n in (1, 2):
900
- spec = tuple(overlap["atom-%d-spec" % n][1:4])
901
- if spec in self.residue_spec_dict:
902
- atom_name = overlap["atom-%d-spec" % n][4]
903
- atom = self.residue_spec_dict[spec].atoms[atom_name]
904
- atom.max_overlap = max(atom.max_overlap, overlap["overlap-volume"])
905
-
906
- def set_pepflip_peaks(self):
907
- peaks = map_peaks_around_molecule_py(self.imap_diff, 4.62567528, False, self.imol)
908
-
909
- def has_o_moving_peak(residue):
910
- if residue.next is None:
911
- return False
912
- if " O " not in residue.atoms:
913
- return False
914
- ca1 = residue.atoms[" CA "].point
915
- c = residue.atoms[" C "].point
916
- o = residue.atoms[" O "].point
917
- ca2 = residue.next.atoms[" CA "].point
918
- for peak in peaks:
919
- point = peak[1]
920
- cd = distance(point, c)
921
- if cd < 1.09769614 or cd > 3.22217608:
922
- continue
923
- if angle(o, c, point) < 75.45064459:
924
- continue
925
- ca1d = distance(point, ca1)
926
- if ca1d < 0.66596873 or ca1d > 2.8910501:
927
- continue
928
- ca2d = distance(point, ca2)
929
- if ca2d < 2.23908466 or ca2d > 3.84813939:
930
- continue
931
- return True
932
- return False
933
-
934
- def has_n_moving_peak(residue):
935
- if residue.prev is None or not residue.prev.has_pepflip_peak:
936
- return False
937
- if " O " not in residue.prev.atoms:
938
- return False
939
- o = residue.prev.atoms[" O "].point
940
- ca = residue.atoms[" CA "].point
941
- for peak in peaks:
942
- point = peak[1]
943
- if distance(point, o) > 0.52516368:
944
- continue
945
- if distance(point, ca) > 2.76397302:
946
- continue
947
- return True
948
- return False
949
-
950
- for residue in self.residues:
951
- residue.has_pepflip_peak = has_o_moving_peak(residue) or has_n_moving_peak(residue)
952
-
953
- def set_correctness(self):
954
- if len(training_data) == 0:
955
- return
956
- for res in self.residues:
957
- X_main = main_features(self, res)
958
- X_main = add_medians(X_main, training_data["main"]["medians"])
959
- X_main = scale(X_main, training_data["main"]["scaler.mean_"], training_data["main"]["scaler.scale_"],)
960
- res.main_chain_correctness = predict(
961
- X_main, training_data["main"]["regressor.coefs_"], training_data["main"]["regressor.intercepts_"],
962
- )
963
- if res.truncatable:
964
- X_side = side_features(self, res)
965
- X_side = add_medians(X_side, training_data["side"]["medians"])
966
- X_side = scale(X_side, training_data["side"]["scaler.mean_"], training_data["side"]["scaler.scale_"],)
967
- res.side_chain_correctness = predict(
968
- X_side, training_data["side"]["regressor.coefs_"], training_data["side"]["regressor.intercepts_"],
969
- )
970
-
971
- def set_chains(self):
972
- self.chains = {}
973
- for residue in self.residues:
974
- chain_id = residue.chain
975
- if chain_id not in self.chains:
976
- self.chains[chain_id] = Chain()
977
- chain = self.chains[chain_id]
978
- chain.residues.append(residue)
979
- if hasattr(residue, "main_chain_correctness"):
980
- chain.correctnesses.append(residue.main_chain_correctness)
981
- for chain in self.chains.values():
982
- if len(chain.correctnesses) > 0:
983
- chain.correctness = mean(chain.correctnesses)
984
-
985
-
986
- # SCRIPTING
987
-
988
-
989
- def prune(
990
- imol,
991
- imap,
992
- imap_diff,
993
- chains=True,
994
- chain_threshold="auto",
995
- max_chain_fraction=0.2,
996
- max_chain_length=20,
997
- residues=True,
998
- residue_threshold="auto",
999
- max_residue_fraction=0.2,
1000
- remove_isolated_residues=True,
1001
- sidechains=True,
1002
- sidechain_threshold="auto",
1003
- max_sidechain_fraction=0.2,
1004
- ):
1005
-
1006
- model = Model(imol, imap, imap_diff)
1007
-
1008
- if len(model.chains) < 1:
1009
- return
1010
-
1011
- if chains:
1012
- main_median = median([r.main_chain_correctness for r in model.residues])
1013
- if chain_threshold == "auto":
1014
- chain_threshold = main_median * 0.2
1015
- print(
1016
- "ML_CORRECTNESS: Deleting chains (up to %d residues long) with scores < %.3f"
1017
- % (max_chain_length, chain_threshold)
1018
- )
1019
- print("ML_CORRECTNESS: Up to %.0f%% of residues will be deleted" % (max_chain_fraction * 100))
1020
- max_deleted = len(model.residues) * max_chain_fraction
1021
- deleted = 0
1022
- remaining = []
1023
- for chain_id in sorted(model.chains,):
1024
- chain = model.chains[chain_id]
1025
- if (
1026
- chain.correctness < chain_threshold
1027
- and len(chain.residues) <= max_chain_length
1028
- and deleted + len(chain.residues) <= max_deleted
1029
- ):
1030
- deleted += len(chain.residues)
1031
- delete_chain(imol, chain_id)
1032
- else:
1033
- remaining.extend([r for r in chain.residues])
1034
- print("ML_CORRECTNESS: Deleted %.0f%% of residues" % (float(deleted) / len(model.residues) * 100))
1035
- else:
1036
- remaining = [r for r in model.residues]
1037
-
1038
- if len(remaining) < 1:
1039
- return
1040
-
1041
- if residues:
1042
- main_median = median([r.main_chain_correctness for r in remaining])
1043
- if residue_threshold == "auto":
1044
- residue_threshold = main_median * 0.5
1045
- print("ML_CORRECTNESS: Deleting residues with scores < %.3f" % residue_threshold)
1046
- print("ML_CORRECTNESS: Up to %.0f%% of residues will be deleted" % (max_residue_fraction * 100))
1047
- max_deleted = len(remaining) * max_residue_fraction
1048
- deleted = 0
1049
- remaining.sort(key=lambda r: r.main_chain_correctness)
1050
- for residue in remaining:
1051
- residue.delete = False
1052
- if residue.main_chain_correctness < residue_threshold and deleted + 1 <= max_deleted:
1053
- deleted += 1
1054
- residue.delete = True
1055
- if remove_isolated_residues:
1056
- for residue in remaining:
1057
- if residue.prev is not None and not residue.prev.delete:
1058
- continue
1059
- if residue.next is not None and not residue.next.delete:
1060
- continue
1061
- residue.delete = True
1062
- for residue in remaining:
1063
- if residue.delete:
1064
- delete_residue(imol, residue.chain, residue.resno, residue.ins_code)
1065
- print("ML_CORRECTNESS: Deleted %.0f%% of residues" % (float(deleted) / len(remaining) * 100))
1066
- remaining = [r for r in remaining if not r.delete]
1067
-
1068
- remaining = [r for r in remaining if r.truncatable]
1069
- if len(remaining) < 1:
1070
- return
1071
-
1072
- if sidechains:
1073
- side_median = median([r.side_chain_correctness for r in model.residues if r.truncatable])
1074
- if sidechain_threshold == "auto":
1075
- sidechain_threshold = side_median * 0.5
1076
- print("ML_CORRECTNESS: Deleting sidechains with scores < %.3f" % sidechain_threshold)
1077
- print("ML_CORRECTNESS: Up to %.0f%% of sidechains will be deleted" % (max_sidechain_fraction * 100))
1078
- max_deleted = len(remaining) * max_sidechain_fraction
1079
- deleted = 0
1080
- remaining.sort(key=lambda r: r.side_chain_correctness)
1081
- for residue in remaining:
1082
- if residue.side_chain_correctness < sidechain_threshold and deleted + 1 < max_deleted:
1083
- deleted += 1
1084
- delete_residue_sidechain(imol, residue.chain, residue.resno, residue.ins_code, 0)
1085
- print("ML_CORRECTNESS: Deleted %.0f%% of sidechains" % (float(deleted) / len(remaining) * 100))