servalcat 0.4.131__cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- servalcat/__init__.py +10 -0
- servalcat/__main__.py +120 -0
- servalcat/ext.cpython-314t-x86_64-linux-gnu.so +0 -0
- servalcat/refine/__init__.py +0 -0
- servalcat/refine/cgsolve.py +100 -0
- servalcat/refine/refine.py +1162 -0
- servalcat/refine/refine_geom.py +245 -0
- servalcat/refine/refine_spa.py +400 -0
- servalcat/refine/refine_xtal.py +339 -0
- servalcat/refine/spa.py +151 -0
- servalcat/refine/xtal.py +312 -0
- servalcat/refmac/__init__.py +0 -0
- servalcat/refmac/exte.py +191 -0
- servalcat/refmac/refmac_keywords.py +660 -0
- servalcat/refmac/refmac_wrapper.py +423 -0
- servalcat/spa/__init__.py +0 -0
- servalcat/spa/fofc.py +488 -0
- servalcat/spa/fsc.py +391 -0
- servalcat/spa/localcc.py +197 -0
- servalcat/spa/realspcc_from_var.py +128 -0
- servalcat/spa/run_refmac.py +979 -0
- servalcat/spa/shift_maps.py +293 -0
- servalcat/spa/shiftback.py +137 -0
- servalcat/spa/translate.py +129 -0
- servalcat/utils/__init__.py +35 -0
- servalcat/utils/commands.py +1629 -0
- servalcat/utils/fileio.py +836 -0
- servalcat/utils/generate_operators.py +296 -0
- servalcat/utils/hkl.py +811 -0
- servalcat/utils/logger.py +140 -0
- servalcat/utils/maps.py +345 -0
- servalcat/utils/model.py +933 -0
- servalcat/utils/refmac.py +759 -0
- servalcat/utils/restraints.py +888 -0
- servalcat/utils/symmetry.py +298 -0
- servalcat/xtal/__init__.py +0 -0
- servalcat/xtal/french_wilson.py +262 -0
- servalcat/xtal/run_refmac_small.py +240 -0
- servalcat/xtal/sigmaa.py +1954 -0
- servalcat/xtal/twin.py +316 -0
- servalcat-0.4.131.dist-info/METADATA +60 -0
- servalcat-0.4.131.dist-info/RECORD +45 -0
- servalcat-0.4.131.dist-info/WHEEL +6 -0
- servalcat-0.4.131.dist-info/entry_points.txt +4 -0
- servalcat-0.4.131.dist-info/licenses/LICENSE +373 -0
|
@@ -0,0 +1,759 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Author: "Keitaro Yamashita, Garib N. Murshudov"
|
|
3
|
+
MRC Laboratory of Molecular Biology
|
|
4
|
+
|
|
5
|
+
This software is released under the
|
|
6
|
+
Mozilla Public License, version 2.0; see LICENSE.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import absolute_import, division, print_function, generators
|
|
9
|
+
import gemmi
|
|
10
|
+
import numpy
|
|
11
|
+
import subprocess
|
|
12
|
+
import shlex
|
|
13
|
+
import json
|
|
14
|
+
import copy
|
|
15
|
+
import re
|
|
16
|
+
import os
|
|
17
|
+
import string
|
|
18
|
+
import itertools
|
|
19
|
+
import tempfile
|
|
20
|
+
from servalcat.utils import logger
|
|
21
|
+
from servalcat.utils import fileio
|
|
22
|
+
|
|
23
|
+
re_version = re.compile("#.* Refmac *version ([^ ]+) ")
|
|
24
|
+
re_error = re.compile('(warn|error *[:]|error *==|^error)', re.IGNORECASE)
|
|
25
|
+
re_outlier_start = re.compile(r"\*\*\*\*.*outliers")
|
|
26
|
+
|
|
27
|
+
def check_version(exe="refmac5"):
|
|
28
|
+
ver = ()
|
|
29
|
+
output = ""
|
|
30
|
+
try:
|
|
31
|
+
output = subprocess.check_output([exe, "-i"], universal_newlines=True)
|
|
32
|
+
except OSError as e:
|
|
33
|
+
logger.writeln("Error: Cannot execute {}".format(exe))
|
|
34
|
+
r_ver = re.search("Program: .* version ([^ ]+)", output)
|
|
35
|
+
if r_ver:
|
|
36
|
+
logger.writeln("Refmac version: {}".format(r_ver.group(1)))
|
|
37
|
+
ver = tuple(map(int, r_ver.group(1).split(".")))
|
|
38
|
+
if not ver:
|
|
39
|
+
logger.writeln("\nError: failed to check the Refmac version. The raw output:\n")
|
|
40
|
+
logger.writeln(output)
|
|
41
|
+
return ver
|
|
42
|
+
# check_version()
|
|
43
|
+
|
|
44
|
+
def ensure_ccp4scr():
|
|
45
|
+
tmpdir = os.environ.get("CCP4_SCR")
|
|
46
|
+
if tmpdir:
|
|
47
|
+
if os.path.isdir(tmpdir): # TODO check writability
|
|
48
|
+
try:
|
|
49
|
+
t = tempfile.TemporaryFile(dir=tmpdir)
|
|
50
|
+
t.close()
|
|
51
|
+
return
|
|
52
|
+
except OSError:
|
|
53
|
+
logger.writeln("Warning: cannot write files in CCP4_SCR= {}".format(tmpdir))
|
|
54
|
+
else:
|
|
55
|
+
try:
|
|
56
|
+
os.makedirs(tmpdir)
|
|
57
|
+
return
|
|
58
|
+
except:
|
|
59
|
+
logger.writeln("Warning: cannot create CCP4_SCR= {}".format(tmpdir))
|
|
60
|
+
|
|
61
|
+
os.environ["CCP4_SCR"] = tempfile.mkdtemp(prefix="ccp4tmp")
|
|
62
|
+
logger.writeln("Updated CCP4_SCR= {}".format(os.environ["CCP4_SCR"]))
|
|
63
|
+
# ensure_ccp4scr()
|
|
64
|
+
|
|
65
|
+
def external_restraints_json_to_keywords(json_in):
|
|
66
|
+
ret = []
|
|
67
|
+
with open(json_in) as f: exte_list = json.load(f)
|
|
68
|
+
for e in exte_list:
|
|
69
|
+
if "use" in e:
|
|
70
|
+
ret.append("EXTERNAL USE {}".format(e["use"]))
|
|
71
|
+
if "dmax" in e:
|
|
72
|
+
ret.append("EXTERNAL DMAX {0}".format(e["dmax"]))
|
|
73
|
+
if "weight_scale" in e:
|
|
74
|
+
ret.append("EXTERNAL WEIGHT SCALE {0}".format(e["weight_scale"]))
|
|
75
|
+
if "weight_gmwt" in e:
|
|
76
|
+
ret.append("EXTERNAL WEIGHT GMWT {0}".format(e["weight_gmwt"]))
|
|
77
|
+
if "file" in e:
|
|
78
|
+
ret.append("@"+e["file"])
|
|
79
|
+
|
|
80
|
+
return "\n".join(ret) + "\n"
|
|
81
|
+
# external_restraints_json_to_keywords()
|
|
82
|
+
|
|
83
|
+
def read_tls_file(tlsin):
|
|
84
|
+
# TODO sort out L/S units - currently use Refmac tlsin/out as is
|
|
85
|
+
# TODO change to gemmi::TlsGroup?
|
|
86
|
+
|
|
87
|
+
groups = []
|
|
88
|
+
with open(tlsin) as ifs:
|
|
89
|
+
for l in ifs:
|
|
90
|
+
l = l.strip()
|
|
91
|
+
if l.startswith("TLS"):
|
|
92
|
+
title = l[4:]
|
|
93
|
+
groups.append(dict(title=title, ranges=[], origin=None, T=None, L=None, S=None))
|
|
94
|
+
elif l.startswith("RANG"):
|
|
95
|
+
r = l[l.index(" "):].strip()
|
|
96
|
+
groups[-1]["ranges"].append(r)
|
|
97
|
+
elif l.startswith("ORIG"):
|
|
98
|
+
try:
|
|
99
|
+
groups[-1]["origin"] = gemmi.Position(*(float(x) for x in l.split()[1:]))
|
|
100
|
+
except:
|
|
101
|
+
raise ValueError("Problem with TLS file: {}".format(l))
|
|
102
|
+
elif l.startswith("T "):
|
|
103
|
+
try:
|
|
104
|
+
groups[-1]["T"] = [float(x) for x in l.split()[1:7]]
|
|
105
|
+
except:
|
|
106
|
+
raise ValueError("Problem with TLS file: {}".format(l))
|
|
107
|
+
elif l.startswith("L "):
|
|
108
|
+
try:
|
|
109
|
+
groups[-1]["L"] = [float(x) for x in l.split()[1:7]]
|
|
110
|
+
except:
|
|
111
|
+
raise ValueError("Problem with TLS file: {}".format(l))
|
|
112
|
+
elif l.startswith("S "):
|
|
113
|
+
try:
|
|
114
|
+
groups[-1]["S"] = [float(x) for x in l.split()[1:10]]
|
|
115
|
+
except:
|
|
116
|
+
raise ValueError("Problem with TLS file: {}".format(l))
|
|
117
|
+
|
|
118
|
+
return groups
|
|
119
|
+
# read_tls_file()
|
|
120
|
+
|
|
121
|
+
def write_tls_file(groups, tlsout):
|
|
122
|
+
with open(tlsout, "w") as f:
|
|
123
|
+
for g in groups:
|
|
124
|
+
f.write("TLS {}\n".format(g["title"]))
|
|
125
|
+
for r in g["ranges"]:
|
|
126
|
+
f.write("RANGE {}\n".format(r))
|
|
127
|
+
if g["origin"] is not None:
|
|
128
|
+
f.write("ORIGIN ")
|
|
129
|
+
f.write(" ".join("{:8.4f}".format(x) for x in g["origin"].tolist()))
|
|
130
|
+
f.write("\n")
|
|
131
|
+
for k in "TLS":
|
|
132
|
+
if g[k] is not None:
|
|
133
|
+
f.write("{:4s}".format(k))
|
|
134
|
+
f.write(" ".join("{:8.4f}".format(x) for x in g[k]))
|
|
135
|
+
f.write("\n")
|
|
136
|
+
# write_tls_file()
|
|
137
|
+
|
|
138
|
+
class FixForRefmac:
|
|
139
|
+
"""
|
|
140
|
+
Workaround for Refmac limitations
|
|
141
|
+
- microheterogeneity
|
|
142
|
+
- residue number > 9999
|
|
143
|
+
|
|
144
|
+
XXX fix external restraints accordingly
|
|
145
|
+
TODO fix _struct_conf, _struct_sheet_range, _pdbx_struct_sheet_hbond
|
|
146
|
+
"""
|
|
147
|
+
def __init__(self):
|
|
148
|
+
self.MAXNUM = 9999
|
|
149
|
+
self.fixes = []
|
|
150
|
+
self.resn_old_new = []
|
|
151
|
+
self.res_labels = []
|
|
152
|
+
self.entities = None
|
|
153
|
+
|
|
154
|
+
def fix_before_topology(self, st, topo, fix_microheterogeneity=True, fix_resimax=True, fix_nonpolymer=True, add_gaps=False):
|
|
155
|
+
self.chainids = set(chain.name for chain in st[0])
|
|
156
|
+
if fix_microheterogeneity:
|
|
157
|
+
self.fix_microheterogeneity(st, topo)
|
|
158
|
+
if add_gaps:
|
|
159
|
+
self.add_gaps(st, topo)
|
|
160
|
+
if fix_resimax: # This modifies chains, so topo will be broken
|
|
161
|
+
self.fix_too_large_seqnum(st, topo)
|
|
162
|
+
if fix_nonpolymer: # This modifies chains, so topo will be broken
|
|
163
|
+
self.fix_nonpolymer(st)
|
|
164
|
+
|
|
165
|
+
def new_chain_id(self, original_chain_id):
|
|
166
|
+
# decide new chain ID
|
|
167
|
+
for i in itertools.count(start=1):
|
|
168
|
+
new_id = "{}{}".format(original_chain_id, i)
|
|
169
|
+
if new_id not in self.chainids:
|
|
170
|
+
self.chainids.add(new_id)
|
|
171
|
+
return new_id
|
|
172
|
+
|
|
173
|
+
def fix_metadata(self, st, changedict):
|
|
174
|
+
# fix connections
|
|
175
|
+
# changedict = dict(changes)
|
|
176
|
+
aa2tuple = lambda aa: (aa.chain_name, aa.res_id.seqid.num, chr(ord(aa.res_id.seqid.icode)|0x20))
|
|
177
|
+
for con in st.connections:
|
|
178
|
+
for aa in (con.partner1, con.partner2):
|
|
179
|
+
changeto = changedict.get(aa2tuple(aa))
|
|
180
|
+
if changeto is not None:
|
|
181
|
+
aa.chain_name = changeto[0]
|
|
182
|
+
aa.res_id.seqid.num = changeto[1]
|
|
183
|
+
aa.res_id.seqid.icode = changeto[2]
|
|
184
|
+
|
|
185
|
+
def add_gaps(self, st, topo):
|
|
186
|
+
# Refmac (as of 5.8.0352) has a bug that makes two links for IAS (IAS-pept and usual TRANS/CIS)
|
|
187
|
+
# However this implementation is even more harmful.. if gap is inserted to real gaps then necessary p link is also gone!
|
|
188
|
+
for chain in st[0]:
|
|
189
|
+
rs = chain.get_polymer()
|
|
190
|
+
for i in range(1, len(rs)):
|
|
191
|
+
res0 = rs[i-1]
|
|
192
|
+
res = rs[i]
|
|
193
|
+
links = topo.links_to_previous(res)
|
|
194
|
+
if len(links) == 0 or links[0].link_id in ("gap", "?"):
|
|
195
|
+
con = gemmi.Connection()
|
|
196
|
+
con.asu = gemmi.Asu.Same
|
|
197
|
+
con.type = gemmi.ConnectionType.Unknown
|
|
198
|
+
con.link_id = "gap"
|
|
199
|
+
con.partner1 = gemmi.AtomAddress(chain.name, res0.seqid, res0.name, "", "\0")
|
|
200
|
+
con.partner2 = gemmi.AtomAddress(chain.name, res.seqid, res.name, "", "\0")
|
|
201
|
+
logger.writeln("Refmac workaround (gap link): {}".format(con))
|
|
202
|
+
st.connections.append(con)
|
|
203
|
+
|
|
204
|
+
def fix_microheterogeneity(self, st, topo):
|
|
205
|
+
mh_res = []
|
|
206
|
+
chains = []
|
|
207
|
+
icodes = {} # to avoid overlaps
|
|
208
|
+
modifications = [] # return value
|
|
209
|
+
|
|
210
|
+
# Check if microheterogeneity exists
|
|
211
|
+
for chain in st[0]:
|
|
212
|
+
for rg in chain.get_polymer().residue_groups():
|
|
213
|
+
if len(rg) > 1:
|
|
214
|
+
ress = [r for r in rg]
|
|
215
|
+
chains.append(chain.name)
|
|
216
|
+
mh_res.append(ress)
|
|
217
|
+
ress_str = "/".join([str(r) for r in ress])
|
|
218
|
+
logger.writeln("Microheterogeneity detected in chain {}: {}".format(chain.name, ress_str))
|
|
219
|
+
|
|
220
|
+
if not mh_res: return
|
|
221
|
+
|
|
222
|
+
for chain in st[0]:
|
|
223
|
+
for res in chain:
|
|
224
|
+
if res.seqid.icode != " ":
|
|
225
|
+
icodes.setdefault(chain.name, {}).setdefault(res.seqid.num, []).append(res.seqid.icode)
|
|
226
|
+
|
|
227
|
+
def append_links(bond, prr, toappend):
|
|
228
|
+
atoms = bond.atoms
|
|
229
|
+
assert len(atoms) == 2
|
|
230
|
+
found = None
|
|
231
|
+
for i in range(2):
|
|
232
|
+
if any(filter(lambda ra: atoms[i]==ra, prr)): found = i
|
|
233
|
+
if found is not None:
|
|
234
|
+
toappend.append([atoms[i], atoms[1-i]]) # prev atom, current atom
|
|
235
|
+
# append_links()
|
|
236
|
+
|
|
237
|
+
mh_res_all = sum(mh_res, [])
|
|
238
|
+
mh_link = {}
|
|
239
|
+
|
|
240
|
+
# Check links
|
|
241
|
+
for chain in st[0]:
|
|
242
|
+
for res in chain:
|
|
243
|
+
# If this residue is microheterogeneous
|
|
244
|
+
if res in mh_res_all:
|
|
245
|
+
for link in topo.links_to_previous(res):
|
|
246
|
+
mh_link.setdefault(id(res), []).append([link.res1, "prev", link.link_id, []])
|
|
247
|
+
append_links(topo.first_bond_in_link(link), link.res1, mh_link[id(res)][-1][-1])
|
|
248
|
+
|
|
249
|
+
# Check if previous residue(s) is microheterogeneous
|
|
250
|
+
for link in topo.links_to_previous(res):
|
|
251
|
+
prr = link.res1
|
|
252
|
+
if prr in mh_res_all:
|
|
253
|
+
mh_link.setdefault(id(prr), []).append([res, "next", link.link_id, []])
|
|
254
|
+
append_links(topo.first_bond_in_link(link), prr, mh_link[id(prr)][-1][-1])
|
|
255
|
+
|
|
256
|
+
# Change IDs
|
|
257
|
+
for chain_name, rr in zip(chains, mh_res):
|
|
258
|
+
chars = string.ascii_uppercase
|
|
259
|
+
# avoid already used inscodes
|
|
260
|
+
if chain_name in icodes and rr[0].seqid.num in icodes[chain_name]:
|
|
261
|
+
used_codes = set(icodes[chain_name][rr[0].seqid.num])
|
|
262
|
+
chars = list(filter(lambda x: x not in used_codes, chars))
|
|
263
|
+
for ir, r in enumerate(rr[1:]):
|
|
264
|
+
modifications.append([(chain_name, r.seqid.num, r.seqid.icode),
|
|
265
|
+
(chain_name, r.seqid.num, chars[ir])])
|
|
266
|
+
r.seqid.icode = chars[ir]
|
|
267
|
+
|
|
268
|
+
logger.writeln("DEBUG: mh_link= {}".format(mh_link))
|
|
269
|
+
# Update connections (LINKR)
|
|
270
|
+
for chain_name, rr in zip(chains, mh_res):
|
|
271
|
+
for r in rr:
|
|
272
|
+
for p in mh_link.get(id(r), []):
|
|
273
|
+
for atoms in p[-1]:
|
|
274
|
+
con = gemmi.Connection()
|
|
275
|
+
con.asu = gemmi.Asu.Same
|
|
276
|
+
con.type = gemmi.ConnectionType.Covale
|
|
277
|
+
con.link_id = p[2]
|
|
278
|
+
if p[1] == "prev":
|
|
279
|
+
p1 = gemmi.AtomAddress(chain_name, p[0].seqid, p[0].name, atoms[1].name, atoms[1].altloc)
|
|
280
|
+
p2 = gemmi.AtomAddress(chain_name, r.seqid, r.name, atoms[0].name, atoms[0].altloc)
|
|
281
|
+
else:
|
|
282
|
+
p1 = gemmi.AtomAddress(chain_name, r.seqid, r.name, atoms[1].name, atoms[1].altloc)
|
|
283
|
+
p2 = gemmi.AtomAddress(chain_name, p[0].seqid, p[0].name, atoms[0].name, atoms[0].altloc)
|
|
284
|
+
|
|
285
|
+
con.partner1 = p1
|
|
286
|
+
con.partner2 = p2
|
|
287
|
+
logger.writeln(" Adding link: {}".format(con))
|
|
288
|
+
st.connections.append(con)
|
|
289
|
+
for r1, r2 in itertools.combinations(rr, 2):
|
|
290
|
+
for a1 in set([a.altloc for a in r1]):
|
|
291
|
+
for a2 in set([a.altloc for a in r2]):
|
|
292
|
+
con = gemmi.Connection()
|
|
293
|
+
con.asu = gemmi.Asu.Same
|
|
294
|
+
con.link_id = "gap"
|
|
295
|
+
# XXX altloc will be ignored when atom does not match.. grrr
|
|
296
|
+
con.partner1 = gemmi.AtomAddress(chain_name, r1.seqid, r1.name, "", a1)
|
|
297
|
+
con.partner2 = gemmi.AtomAddress(chain_name, r2.seqid, r2.name, "", a2)
|
|
298
|
+
st.connections.append(con)
|
|
299
|
+
|
|
300
|
+
self.fixes.append(modifications)
|
|
301
|
+
# fix_microheterogeneity()
|
|
302
|
+
|
|
303
|
+
def fix_nonpolymer(self, st):
|
|
304
|
+
# Refmac (as of 5.8.0352) has a bug that links non-neighbouring nucleotides
|
|
305
|
+
# It only happens with mmCIF file
|
|
306
|
+
newchains = []
|
|
307
|
+
changes = []
|
|
308
|
+
for chain in st[0]:
|
|
309
|
+
polymer = chain.get_polymer()
|
|
310
|
+
if len(polymer) == len(chain): continue
|
|
311
|
+
if len(polymer) == 0: continue
|
|
312
|
+
del_idxes = []
|
|
313
|
+
newchains.append(gemmi.Chain(self.new_chain_id(chain.name)))
|
|
314
|
+
logger.writeln("Refmac workaround (nonpolymer-fix) {} => {} ({} residues)".format(chain.name, newchains[-1].name,
|
|
315
|
+
len(chain) - len(polymer)))
|
|
316
|
+
for i, res in enumerate(chain):
|
|
317
|
+
if res in polymer: continue
|
|
318
|
+
newchains[-1].add_residue(res)
|
|
319
|
+
del_idxes.append(i)
|
|
320
|
+
changes.append([(chain.name, res.seqid.num, res.seqid.icode),
|
|
321
|
+
(newchains[-1].name, newchains[-1][-1].seqid.num, newchains[-1][-1].seqid.icode)])
|
|
322
|
+
for i in reversed(del_idxes):
|
|
323
|
+
del chain[i]
|
|
324
|
+
|
|
325
|
+
for c in newchains:
|
|
326
|
+
st[0].add_chain(c)
|
|
327
|
+
if changes:
|
|
328
|
+
st.remove_empty_chains()
|
|
329
|
+
self.fix_metadata(st, dict(changes))
|
|
330
|
+
self.fixes.append(changes)
|
|
331
|
+
|
|
332
|
+
def fix_too_large_seqnum(self, st, topo):
|
|
333
|
+
# Refmac cannot handle residue id > 9999
|
|
334
|
+
# What to do:
|
|
335
|
+
# - move to new chains
|
|
336
|
+
# - modify link records (and others?)
|
|
337
|
+
# - add link record if needed
|
|
338
|
+
newchains = []
|
|
339
|
+
changes = []
|
|
340
|
+
|
|
341
|
+
for chain in st[0]:
|
|
342
|
+
maxseqnum = max([r.seqid.num for r in chain])
|
|
343
|
+
if maxseqnum > self.MAXNUM:
|
|
344
|
+
offset = 0
|
|
345
|
+
#target = [res for res in chain if res.seqid.num > 9999]
|
|
346
|
+
del_idxes = []
|
|
347
|
+
for ires, res in enumerate(chain):
|
|
348
|
+
if res.seqid.num <= self.MAXNUM: continue
|
|
349
|
+
if res.seqid.num - offset > self.MAXNUM:
|
|
350
|
+
newchains.append(gemmi.Chain(self.new_chain_id(chain.name)))
|
|
351
|
+
offset = res.seqid.num - 1
|
|
352
|
+
# need to keep link to previous residue if exists
|
|
353
|
+
for link in topo.links_to_previous(res):
|
|
354
|
+
logger.writeln("Link: {} {} {} alt= {} {}".format(link.link_id, link.res1, link.res2,
|
|
355
|
+
link.alt1, link.alt2))
|
|
356
|
+
|
|
357
|
+
con = gemmi.Connection()
|
|
358
|
+
con.type = gemmi.ConnectionType.Covale
|
|
359
|
+
con.link_id = link.link_id
|
|
360
|
+
#return link
|
|
361
|
+
bond = topo.first_bond_in_link(link)
|
|
362
|
+
if bond is not None:
|
|
363
|
+
con.partner1 = gemmi.AtomAddress(chain.name, link.res1.seqid, link.res1.name, bond.atoms[0].name, bond.atoms[0].altloc)
|
|
364
|
+
con.partner2 = gemmi.AtomAddress(chain.name, link.res2.seqid, link.res2.name, bond.atoms[1].name, bond.atoms[1].altloc)
|
|
365
|
+
st.connections.append(con)
|
|
366
|
+
|
|
367
|
+
newchains[-1].add_residue(res)
|
|
368
|
+
newchains[-1][-1].seqid.num -= offset
|
|
369
|
+
del_idxes.append(ires)
|
|
370
|
+
prev = chain[ires-1].seqid if ires > 0 else None
|
|
371
|
+
changes.append([(chain.name, res.seqid.num, res.seqid.icode),
|
|
372
|
+
(newchains[-1].name, newchains[-1][-1].seqid.num, newchains[-1][-1].seqid.icode)])
|
|
373
|
+
logger.writeln("Refmac workaround (too large seq) {} => {} {}".format(changes[-1][0], changes[-1][1], res.name))
|
|
374
|
+
|
|
375
|
+
for i in reversed(del_idxes):
|
|
376
|
+
del chain[i]
|
|
377
|
+
|
|
378
|
+
for c in newchains:
|
|
379
|
+
st[0].add_chain(c)
|
|
380
|
+
if changes:
|
|
381
|
+
st.remove_empty_chains()
|
|
382
|
+
self.fix_metadata(st, dict(changes))
|
|
383
|
+
self.fixes.append(changes)
|
|
384
|
+
|
|
385
|
+
def fix_long_resnames(self, st):
|
|
386
|
+
# this function should be called separately (after preparing topology)
|
|
387
|
+
st.shorten_ccd_codes()
|
|
388
|
+
self.resn_old_new = [x for x in st.shortened_ccd_codes]
|
|
389
|
+
|
|
390
|
+
def store_res_labels(self, st):
|
|
391
|
+
self.res_labels = []
|
|
392
|
+
self.entities = gemmi.EntityList(st.entities)
|
|
393
|
+
for chain in st[0]:
|
|
394
|
+
self.res_labels.append([])
|
|
395
|
+
for res in chain:
|
|
396
|
+
self.res_labels[-1].append((res.subchain, res.entity_id, res.label_seq))
|
|
397
|
+
|
|
398
|
+
def fix_model(self, st, changedict):
|
|
399
|
+
chain_newid = set()
|
|
400
|
+
for chain in st[0]:
|
|
401
|
+
for res in chain:
|
|
402
|
+
changeto = changedict.get((chain.name, res.seqid.num, res.seqid.icode))
|
|
403
|
+
if changeto is not None:
|
|
404
|
+
logger.writeln("back: {} {} to {}".format(chain.name, res.seqid, changeto))
|
|
405
|
+
#chain.name = changeto[0] # this is ok when modify back
|
|
406
|
+
chain_newid.add((chain, changeto[0]))
|
|
407
|
+
res.seqid.num = changeto[1]
|
|
408
|
+
res.seqid.icode = changeto[2]
|
|
409
|
+
|
|
410
|
+
for chain, newid in chain_newid:
|
|
411
|
+
chain.name = newid
|
|
412
|
+
st.merge_chain_parts()
|
|
413
|
+
self.fix_metadata(st, changedict)
|
|
414
|
+
|
|
415
|
+
def modify_back(self, st):
|
|
416
|
+
for fix in reversed(self.fixes):
|
|
417
|
+
reschanges = dict([x[::-1] for x in fix])
|
|
418
|
+
self.fix_model(st, reschanges)
|
|
419
|
+
|
|
420
|
+
if self.resn_old_new:
|
|
421
|
+
st.shortened_ccd_codes = self.resn_old_new
|
|
422
|
+
st.restore_full_ccd_codes()
|
|
423
|
+
|
|
424
|
+
if self.res_labels:
|
|
425
|
+
st.entities = self.entities
|
|
426
|
+
#print(f"debug {len(self.res_labels)}")
|
|
427
|
+
#print(f"debug {[x.name for x in st[0]]}")
|
|
428
|
+
assert len(self.res_labels) == len(st[0])
|
|
429
|
+
for ic, chain in enumerate(st[0]):
|
|
430
|
+
assert len(self.res_labels[ic]) == len(chain)
|
|
431
|
+
for ir, res in enumerate(chain):
|
|
432
|
+
res.subchain, res.entity_id, res.label_seq = self.res_labels[ic][ir]
|
|
433
|
+
|
|
434
|
+
class Refmac:
|
|
435
|
+
def __init__(self, **kwargs):
|
|
436
|
+
self.prefix = "refmac"
|
|
437
|
+
self.hklin = self.xyzin = ""
|
|
438
|
+
self.source = "electron"
|
|
439
|
+
self.lab_f = None
|
|
440
|
+
self.lab_sigf = None
|
|
441
|
+
self.lab_phi = None
|
|
442
|
+
self.libin = None
|
|
443
|
+
self.tlsin = None
|
|
444
|
+
self.hydrogen = "all"
|
|
445
|
+
self.hout = False
|
|
446
|
+
self.ncycle = 10
|
|
447
|
+
self.tlscycle = 0
|
|
448
|
+
self.resolution = None
|
|
449
|
+
self.weight_matrix = None
|
|
450
|
+
self.weight_auto_scale = None
|
|
451
|
+
self.bfactor = None
|
|
452
|
+
self.jellybody = None
|
|
453
|
+
self.jellybody_sigma, self.jellybody_dmax = 0.01, 4.2
|
|
454
|
+
self.ncsr = None
|
|
455
|
+
self.shake = None
|
|
456
|
+
self.keyword_files = []
|
|
457
|
+
self.keywords = []
|
|
458
|
+
self.external_restraints_json = None
|
|
459
|
+
self.exe = "refmac5"
|
|
460
|
+
self.monlib_path = None
|
|
461
|
+
self.keep_chain_ids = False
|
|
462
|
+
self.show_log = False # summary only if false
|
|
463
|
+
self.global_mode = kwargs.get("global_mode")
|
|
464
|
+
|
|
465
|
+
for k in kwargs:
|
|
466
|
+
if k == "args":
|
|
467
|
+
self.init_from_args(kwargs["args"])
|
|
468
|
+
else:
|
|
469
|
+
setattr(self, k, kwargs[k])
|
|
470
|
+
|
|
471
|
+
ensure_ccp4scr()
|
|
472
|
+
# __init__()
|
|
473
|
+
|
|
474
|
+
def init_from_args(self, args):
|
|
475
|
+
self.hklin = args.mtz
|
|
476
|
+
self.xyzin = args.model
|
|
477
|
+
self.libin = args.ligand
|
|
478
|
+
self.tlsin = args.tlsin
|
|
479
|
+
self.ncycle = args.ncycle
|
|
480
|
+
self.tlscycle = args.tlscycle
|
|
481
|
+
self.lab_f = args.lab_f
|
|
482
|
+
self.lab_phi = args.lab_phi
|
|
483
|
+
self.lab_sigf = args.lab_sigf
|
|
484
|
+
self.hydrogen = args.hydrogen
|
|
485
|
+
self.hout = args.hout
|
|
486
|
+
self.ncsr = args.ncsr
|
|
487
|
+
self.bfactor = args.bfactor
|
|
488
|
+
self.jellybody = args.jellybody
|
|
489
|
+
self.jellybody_sigma, self.jellybody_dmax = args.jellybody_params
|
|
490
|
+
self.resolution = args.resolution
|
|
491
|
+
self.weight_auto_scale = args.weight_auto_scale
|
|
492
|
+
self.keyword_files = args.keyword_file
|
|
493
|
+
self.keywords = args.keywords
|
|
494
|
+
self.external_restraints_json = args.external_restraints_json
|
|
495
|
+
self.exe = args.exe
|
|
496
|
+
self.show_log = args.show_refmac_log
|
|
497
|
+
self.monlib_path = args.monlib
|
|
498
|
+
# init_from_args()
|
|
499
|
+
|
|
500
|
+
def copy(self, **kwargs):
|
|
501
|
+
ret = copy.deepcopy(self)
|
|
502
|
+
for k in kwargs:
|
|
503
|
+
setattr(ret, k, kwargs[k])
|
|
504
|
+
|
|
505
|
+
return ret
|
|
506
|
+
# copy()
|
|
507
|
+
|
|
508
|
+
def set_libin(self, ligands):
|
|
509
|
+
if not ligands: return
|
|
510
|
+
if len(ligands) > 1:
|
|
511
|
+
mcif = "merged_ligands.cif" # XXX directory!
|
|
512
|
+
logger.writeln("Merging ligand cif files: {}".format(ligands))
|
|
513
|
+
fileio.merge_ligand_cif(ligands, mcif)
|
|
514
|
+
self.libin = mcif
|
|
515
|
+
else:
|
|
516
|
+
self.libin = ligands[0]
|
|
517
|
+
# set_libin()
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def make_keywords(self):
|
|
521
|
+
ret = ""
|
|
522
|
+
labin = []
|
|
523
|
+
if self.lab_f: labin.append("FP={}".format(self.lab_f))
|
|
524
|
+
if self.lab_sigf: labin.append("SIGFP={}".format(self.lab_sigf))
|
|
525
|
+
if self.lab_phi: labin.append("PHIB={}".format(self.lab_phi))
|
|
526
|
+
if labin:
|
|
527
|
+
ret += "labin {}\n".format(" ".join(labin))
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
ret += "make hydr {}\n".format(self.hydrogen)
|
|
531
|
+
ret += "make hout {}\n".format("yes" if self.hout else "no")
|
|
532
|
+
|
|
533
|
+
if self.global_mode == "spa":
|
|
534
|
+
ret += "solvent no\n"
|
|
535
|
+
ret += "scale lssc isot\n"
|
|
536
|
+
ret += "source em mb\n"
|
|
537
|
+
elif self.source == "electron":
|
|
538
|
+
ret += "source ec mb\n"
|
|
539
|
+
elif self.source == "neutron":
|
|
540
|
+
ret += "source n\n"
|
|
541
|
+
|
|
542
|
+
ret += "ncycle {}\n".format(self.ncycle)
|
|
543
|
+
if self.resolution is not None:
|
|
544
|
+
ret += "reso {}\n".format(self.resolution)
|
|
545
|
+
if self.weight_matrix is not None:
|
|
546
|
+
ret += "weight matrix {}\n".format(self.weight_matrix)
|
|
547
|
+
elif self.weight_auto_scale is not None:
|
|
548
|
+
ret += "weight auto {:.2e}\n".format(self.weight_auto_scale)
|
|
549
|
+
else:
|
|
550
|
+
ret += "weight auto\n"
|
|
551
|
+
|
|
552
|
+
if self.bfactor is not None:
|
|
553
|
+
ret += "bfactor set {}\n".format(self.bfactor)
|
|
554
|
+
if self.jellybody:
|
|
555
|
+
ret += "ridge dist sigma {:.3e}\n".format(self.jellybody_sigma)
|
|
556
|
+
ret += "ridge dist dmax {:.2e}\n".format(self.jellybody_dmax)
|
|
557
|
+
if self.ncsr:
|
|
558
|
+
ret += "ncsr {}\n".format(self.ncsr)
|
|
559
|
+
if self.shake:
|
|
560
|
+
ret += "rand {}\n".format(self.shake)
|
|
561
|
+
if self.tlscycle > 0:
|
|
562
|
+
ret += "refi tlsc {}\n".format(self.tlscycle)
|
|
563
|
+
ret += "tlsout addu\n"
|
|
564
|
+
if self.keep_chain_ids:
|
|
565
|
+
ret += "pdbo keep auth\n"
|
|
566
|
+
|
|
567
|
+
if self.external_restraints_json:
|
|
568
|
+
ret += external_restraints_json_to_keywords(self.external_restraints_json)
|
|
569
|
+
|
|
570
|
+
if self.keyword_files:
|
|
571
|
+
for f in self.keyword_files:
|
|
572
|
+
ret += "@{}\n".format(f)
|
|
573
|
+
|
|
574
|
+
if self.keywords:
|
|
575
|
+
ret += "\n".join(self.keywords).strip() + "\n"
|
|
576
|
+
|
|
577
|
+
return ret
|
|
578
|
+
# make_keywords()
|
|
579
|
+
|
|
580
|
+
def xyzout(self): return self.prefix + ".pdb"
|
|
581
|
+
def hklout(self): return self.prefix + ".mtz"
|
|
582
|
+
def tlsout(self): return self.prefix + ".tls"
|
|
583
|
+
|
|
584
|
+
def make_cmd(self):
|
|
585
|
+
cmd = [self.exe]
|
|
586
|
+
cmd.extend(["hklin", self.hklin])
|
|
587
|
+
cmd.extend(["hklout", self.hklout()])
|
|
588
|
+
cmd.extend(["xyzin", self.xyzin])
|
|
589
|
+
cmd.extend(["xyzout", self.xyzout()])
|
|
590
|
+
if self.libin:
|
|
591
|
+
cmd.extend(["libin", self.libin])
|
|
592
|
+
if self.tlsin:
|
|
593
|
+
cmd.extend(["tlsin", self.tlsin])
|
|
594
|
+
if self.tlscycle > 0:
|
|
595
|
+
cmd.extend(["tlsout", self.tlsout()])
|
|
596
|
+
if self.source == "neutron":
|
|
597
|
+
cmd.extend(["atomsf", os.path.join(os.environ["CLIBD"], "atomsf_neutron.lib")])
|
|
598
|
+
|
|
599
|
+
return cmd
|
|
600
|
+
# make_cmd()
|
|
601
|
+
|
|
602
|
+
def run_refmac(self, write_summary_json=True):
|
|
603
|
+
cmd = self.make_cmd()
|
|
604
|
+
stdin = self.make_keywords()
|
|
605
|
+
with open(self.prefix+".inp", "w") as ofs: ofs.write(stdin)
|
|
606
|
+
|
|
607
|
+
logger.writeln("Running REFMAC5..")
|
|
608
|
+
logger.writeln("{} <<__eof__ > {}".format(" ".join(shlex.quote(x) for x in cmd), self.prefix+".log"))
|
|
609
|
+
logger.write(stdin)
|
|
610
|
+
logger.writeln("__eof__")
|
|
611
|
+
|
|
612
|
+
env = os.environ
|
|
613
|
+
if self.monlib_path: env["CLIBD_MON"] = os.path.join(self.monlib_path, "") # should end with /
|
|
614
|
+
|
|
615
|
+
with subprocess.Popen(cmd, shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
|
616
|
+
universal_newlines=True, env=env) as p:
|
|
617
|
+
p.stdin.write(stdin)
|
|
618
|
+
p.stdin.close()
|
|
619
|
+
|
|
620
|
+
log = open(self.prefix+".log", "w")
|
|
621
|
+
cycle = 0
|
|
622
|
+
re_cycle_table = re.compile("Cycle *([0-9]+). Rfactor analysis")
|
|
623
|
+
re_actual_weight = re.compile("Actual weight *([^ ]+) *is applied to the X-ray term")
|
|
624
|
+
rmsbond = ""
|
|
625
|
+
rmsangle = ""
|
|
626
|
+
log_delay = []
|
|
627
|
+
summary_write = (lambda x: log_delay.append(x)) if self.show_log else logger.writeln
|
|
628
|
+
outlier_flag = False
|
|
629
|
+
last_table_flag = False
|
|
630
|
+
last_table_keys = []
|
|
631
|
+
occ_flag = False
|
|
632
|
+
occ_cycles = 0
|
|
633
|
+
ret = {"version":None,
|
|
634
|
+
"cycles": [{"cycle":i} for i in range(self.ncycle+self.tlscycle+1)],
|
|
635
|
+
} # metadata
|
|
636
|
+
|
|
637
|
+
for l in iter(p.stdout.readline, ""):
|
|
638
|
+
log.write(l)
|
|
639
|
+
|
|
640
|
+
if self.show_log:
|
|
641
|
+
print(l, end="")
|
|
642
|
+
|
|
643
|
+
r_ver = re_version.search(l)
|
|
644
|
+
if r_ver:
|
|
645
|
+
ret["version"] = r_ver.group(1)
|
|
646
|
+
summary_write("Starting Refmac {} (PID: {})".format(r_ver.group(1), p.pid))
|
|
647
|
+
|
|
648
|
+
# print error/warning
|
|
649
|
+
r_err = re_error.search(l)
|
|
650
|
+
if r_err:
|
|
651
|
+
if self.global_mode == "spa":
|
|
652
|
+
if "Figure of merit of phases has not been assigned" in l:
|
|
653
|
+
continue
|
|
654
|
+
elif "They will be assumed to be equal to 1.0" in l:
|
|
655
|
+
continue
|
|
656
|
+
summary_write(l.rstrip())
|
|
657
|
+
|
|
658
|
+
# print outliers
|
|
659
|
+
r_outl = re_outlier_start.search(l)
|
|
660
|
+
if r_outl:
|
|
661
|
+
outlier_flag = True
|
|
662
|
+
summary_write(l.rstrip())
|
|
663
|
+
elif outlier_flag:
|
|
664
|
+
if l.strip() == "" or "monitored" in l or "dev=" in l or "sigma=" in l.lower() or "sigma.=" in l:
|
|
665
|
+
summary_write(l.rstrip())
|
|
666
|
+
else:
|
|
667
|
+
outlier_flag = False
|
|
668
|
+
|
|
669
|
+
if "TLS refinement cycle" in l:
|
|
670
|
+
cycle = int(l.split()[-1])
|
|
671
|
+
elif "----Group occupancy refinement----" in l:
|
|
672
|
+
occ_flag = True
|
|
673
|
+
occ_cycles += 1
|
|
674
|
+
cycle += 1
|
|
675
|
+
elif "CGMAT cycle number =" in l:
|
|
676
|
+
cycle = int(l[l.index("=")+1:]) + self.tlscycle + occ_cycles
|
|
677
|
+
occ_flag = False
|
|
678
|
+
|
|
679
|
+
r_cycle = re_cycle_table.search(l)
|
|
680
|
+
if r_cycle: cycle = int(r_cycle.group(1))
|
|
681
|
+
|
|
682
|
+
for i in range(len(ret["cycles"]), cycle):
|
|
683
|
+
ret["cycles"].append({"cycle":i})
|
|
684
|
+
|
|
685
|
+
if "Overall R factor =" in l and cycle > 0:
|
|
686
|
+
rfac = l[l.index("=")+1:].strip()
|
|
687
|
+
if self.global_mode != "spa":
|
|
688
|
+
ret["cycles"][cycle-1]["r_factor"] = rfac
|
|
689
|
+
summary_write(" cycle= {:3d} R= {}".format(cycle-1, rfac))
|
|
690
|
+
elif "Average Fourier shell correlation =" in l and cycle > 0:
|
|
691
|
+
fsc = l[l.index("=")+1:].strip()
|
|
692
|
+
if occ_flag:
|
|
693
|
+
note = "(occupancy)"
|
|
694
|
+
elif cycle == 1:
|
|
695
|
+
note = "(initial)"
|
|
696
|
+
elif cycle <= self.tlscycle+1:
|
|
697
|
+
note = "(TLS)"
|
|
698
|
+
elif cycle > self.ncycle + occ_cycles + self.tlscycle:
|
|
699
|
+
note = "(final)"
|
|
700
|
+
else:
|
|
701
|
+
note = ""
|
|
702
|
+
|
|
703
|
+
if self.global_mode == "spa":
|
|
704
|
+
ret["cycles"][cycle-1]["fsc_average"] = fsc
|
|
705
|
+
summary_write(" cycle= {:3d} FSCaverage= {} {}".format(cycle-1, fsc, note))
|
|
706
|
+
elif "Rms BondLength" in l:
|
|
707
|
+
rmsbond = l
|
|
708
|
+
elif "Rms BondAngle" in l:
|
|
709
|
+
rmsangle = l
|
|
710
|
+
|
|
711
|
+
r_actual_weight = re_actual_weight.search(l)
|
|
712
|
+
if r_actual_weight:
|
|
713
|
+
ret["cycles"][cycle-1]["actual_weight"] = r_actual_weight.group(1)
|
|
714
|
+
|
|
715
|
+
# Final table
|
|
716
|
+
if " Ncyc Rfact Rfree FOM" in l:
|
|
717
|
+
last_table_flag = True
|
|
718
|
+
last_table_keys = l.split()
|
|
719
|
+
if last_table_keys[-1] == "$$": del last_table_keys[-1]
|
|
720
|
+
elif last_table_flag:
|
|
721
|
+
if "$$ Final results $$" in l:
|
|
722
|
+
last_table_flag = False
|
|
723
|
+
continue
|
|
724
|
+
sp = l.split()
|
|
725
|
+
if len(sp) == len(last_table_keys) and sp[0] != "$$":
|
|
726
|
+
cyc = int(sp[last_table_keys.index("Ncyc")])
|
|
727
|
+
key_name = dict(rmsBOND="rms_bond", zBOND="rmsz_bond",
|
|
728
|
+
rmsANGL="rms_angle", zANGL="rmsz_angle",
|
|
729
|
+
rmsCHIRAL="rms_chiral")
|
|
730
|
+
for k in key_name:
|
|
731
|
+
if k in last_table_keys:
|
|
732
|
+
ret["cycles"][cyc][key_name[k]] = sp[last_table_keys.index(k)]
|
|
733
|
+
else:
|
|
734
|
+
logger.error("table does not have key {}?".format(k))
|
|
735
|
+
|
|
736
|
+
retcode = p.wait()
|
|
737
|
+
log.close()
|
|
738
|
+
if log_delay:
|
|
739
|
+
logger.writeln("== Summary of Refmac ==")
|
|
740
|
+
logger.writeln("\n".join(log_delay))
|
|
741
|
+
|
|
742
|
+
if rmsbond:
|
|
743
|
+
logger.writeln(" Initial Final")
|
|
744
|
+
logger.writeln(rmsbond.rstrip())
|
|
745
|
+
logger.writeln(rmsangle.rstrip())
|
|
746
|
+
|
|
747
|
+
logger.writeln("REFMAC5 finished with exit code= {}".format(retcode))
|
|
748
|
+
|
|
749
|
+
if write_summary_json:
|
|
750
|
+
with open("{}_summary.json".format(self.prefix), "w") as f:
|
|
751
|
+
json.dump(ret, f, indent=True)
|
|
752
|
+
|
|
753
|
+
# TODO check timestamp
|
|
754
|
+
if not os.path.isfile(self.xyzout()) or not os.path.isfile(self.hklout()):
|
|
755
|
+
raise RuntimeError("REFMAC5 did not produce output files. Check {}".format(self.prefix+".log"))
|
|
756
|
+
|
|
757
|
+
return ret
|
|
758
|
+
# run_refmac()
|
|
759
|
+
# class Refmac
|