servalcat 0.4.131__cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- servalcat/__init__.py +10 -0
- servalcat/__main__.py +120 -0
- servalcat/ext.cpython-314t-x86_64-linux-gnu.so +0 -0
- servalcat/refine/__init__.py +0 -0
- servalcat/refine/cgsolve.py +100 -0
- servalcat/refine/refine.py +1162 -0
- servalcat/refine/refine_geom.py +245 -0
- servalcat/refine/refine_spa.py +400 -0
- servalcat/refine/refine_xtal.py +339 -0
- servalcat/refine/spa.py +151 -0
- servalcat/refine/xtal.py +312 -0
- servalcat/refmac/__init__.py +0 -0
- servalcat/refmac/exte.py +191 -0
- servalcat/refmac/refmac_keywords.py +660 -0
- servalcat/refmac/refmac_wrapper.py +423 -0
- servalcat/spa/__init__.py +0 -0
- servalcat/spa/fofc.py +488 -0
- servalcat/spa/fsc.py +391 -0
- servalcat/spa/localcc.py +197 -0
- servalcat/spa/realspcc_from_var.py +128 -0
- servalcat/spa/run_refmac.py +979 -0
- servalcat/spa/shift_maps.py +293 -0
- servalcat/spa/shiftback.py +137 -0
- servalcat/spa/translate.py +129 -0
- servalcat/utils/__init__.py +35 -0
- servalcat/utils/commands.py +1629 -0
- servalcat/utils/fileio.py +836 -0
- servalcat/utils/generate_operators.py +296 -0
- servalcat/utils/hkl.py +811 -0
- servalcat/utils/logger.py +140 -0
- servalcat/utils/maps.py +345 -0
- servalcat/utils/model.py +933 -0
- servalcat/utils/refmac.py +759 -0
- servalcat/utils/restraints.py +888 -0
- servalcat/utils/symmetry.py +298 -0
- servalcat/xtal/__init__.py +0 -0
- servalcat/xtal/french_wilson.py +262 -0
- servalcat/xtal/run_refmac_small.py +240 -0
- servalcat/xtal/sigmaa.py +1954 -0
- servalcat/xtal/twin.py +316 -0
- servalcat-0.4.131.dist-info/METADATA +60 -0
- servalcat-0.4.131.dist-info/RECORD +45 -0
- servalcat-0.4.131.dist-info/WHEEL +6 -0
- servalcat-0.4.131.dist-info/entry_points.txt +4 -0
- servalcat-0.4.131.dist-info/licenses/LICENSE +373 -0
servalcat/spa/fsc.py
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Author: "Keitaro Yamashita, Garib N. Murshudov"
|
|
3
|
+
MRC Laboratory of Molecular Biology
|
|
4
|
+
|
|
5
|
+
This software is released under the
|
|
6
|
+
Mozilla Public License, version 2.0; see LICENSE.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import absolute_import, division, print_function, generators
|
|
9
|
+
import os
|
|
10
|
+
import gemmi
|
|
11
|
+
import numpy
|
|
12
|
+
import pandas
|
|
13
|
+
from servalcat.utils import logger
|
|
14
|
+
from servalcat import spa
|
|
15
|
+
from servalcat.spa.run_refmac import determine_b_before_mask
|
|
16
|
+
from servalcat import utils
|
|
17
|
+
|
|
18
|
+
def add_arguments(parser):
|
|
19
|
+
parser.description = 'FSC calculation'
|
|
20
|
+
|
|
21
|
+
parser.add_argument('--model',
|
|
22
|
+
help="")
|
|
23
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
24
|
+
group.add_argument('--map',
|
|
25
|
+
help='Input map file(s)')
|
|
26
|
+
group.add_argument("--halfmaps", nargs=2)
|
|
27
|
+
group.add_argument('--mtz',
|
|
28
|
+
help='Input mtz file.')
|
|
29
|
+
parser.add_argument('--labin', nargs=2,
|
|
30
|
+
help='label (F and PHI) for mtz')
|
|
31
|
+
parser.add_argument('--pixel_size', type=float,
|
|
32
|
+
help='Override pixel size (A)')
|
|
33
|
+
parser.add_argument('--mask', help='Mask file')
|
|
34
|
+
parser.add_argument('--mask_radius',
|
|
35
|
+
type=float, default=3,
|
|
36
|
+
help='calculate mask from model if provided')
|
|
37
|
+
parser.add_argument('--mask_soft_edge',
|
|
38
|
+
type=float, default=0,
|
|
39
|
+
help='Add soft edge to model mask.')
|
|
40
|
+
parser.add_argument('--mask_model', action='store_true',
|
|
41
|
+
help='Apply mask to model density')
|
|
42
|
+
parser.add_argument("--b_before_mask", type=float,
|
|
43
|
+
help="when model-based mask is used: sharpening B value for sharpen-mask-unsharpen procedure. By default it is determined automatically.")
|
|
44
|
+
parser.add_argument('--no_sharpen_before_mask', action='store_true',
|
|
45
|
+
help='when model-based mask is used: by default half maps are sharpened before masking by std of signal and unsharpened after masking. This option disables it.')
|
|
46
|
+
utils.symmetry.add_symmetry_args(parser) # add --pg etc
|
|
47
|
+
parser.add_argument('-d', '--resolution',
|
|
48
|
+
type=float,
|
|
49
|
+
help='Default: Nyquist')
|
|
50
|
+
parser.add_argument('--random_seed', type=float, default=1234,
|
|
51
|
+
help="random seed for phase randomized FSC")
|
|
52
|
+
parser.add_argument("-s", "--source", choices=["electron", "xray", "neutron", "custom"], default="electron")
|
|
53
|
+
parser.add_argument('-o', '--fsc_out',
|
|
54
|
+
default="fsc.dat",
|
|
55
|
+
help='')
|
|
56
|
+
parser.add_argument('--csv', action='store_true',
|
|
57
|
+
help="Write csv file")
|
|
58
|
+
parser.add_argument('--keep_charges', action='store_true',
|
|
59
|
+
help="Use scattering factor for charged atoms. Use it with care.")
|
|
60
|
+
|
|
61
|
+
# add_arguments()
|
|
62
|
+
|
|
63
|
+
def parse_args(arg_list):
|
|
64
|
+
parser = argparse.ArgumentParser()
|
|
65
|
+
add_arguments(parser)
|
|
66
|
+
return parser.parse_args(arg_list)
|
|
67
|
+
# parse_args()
|
|
68
|
+
|
|
69
|
+
def write_loggraph(stats, labs_fc, log_out):
|
|
70
|
+
model_labs1 = [l for l in stats if any(l.startswith("fsc_"+fc) for fc in labs_fc)]
|
|
71
|
+
model_labs2 = [l for l in stats if any(l.startswith(("cc_"+fc, "mcos_"+fc)) for fc in labs_fc)]
|
|
72
|
+
power_labs = [l for l in stats if l.startswith("power_")]
|
|
73
|
+
half_labs1 = [l for l in ("fsc_half_unmasked", "fsc_half_masked", "fsc_half_masked_rand", "fsc_half_masked_corrected") if l in stats]
|
|
74
|
+
half_labs2 = [l for l in ("cc_half", "mcos_half") if l in stats]
|
|
75
|
+
if not half_labs1 and "fsc_half" in stats:
|
|
76
|
+
half_labs1 = ["fsc_half"]
|
|
77
|
+
|
|
78
|
+
stats2 = stats.copy()
|
|
79
|
+
stats2.insert(0, "bin", stats.index)
|
|
80
|
+
for l in power_labs: stats2[l] = numpy.log(stats2[l])
|
|
81
|
+
title_labs = []
|
|
82
|
+
if half_labs1:
|
|
83
|
+
title_labs.append(("Phase randomized FSC" if len(half_labs1) > 1 else "Half map FSC",
|
|
84
|
+
half_labs1))
|
|
85
|
+
if half_labs2:
|
|
86
|
+
title_labs.append(("Half map amplitude CC and Mean(cos(dphi))",
|
|
87
|
+
half_labs2))
|
|
88
|
+
if model_labs1:
|
|
89
|
+
title_labs.append(("Map-model FSC", model_labs1))
|
|
90
|
+
if model_labs2:
|
|
91
|
+
title_labs.append(("Map-model amplitude CC and Mean(cos(dphi))", model_labs2))
|
|
92
|
+
if power_labs:
|
|
93
|
+
title_labs.append(("log(Power)", power_labs))
|
|
94
|
+
|
|
95
|
+
title_labs.append(("number of Fourier coefficients", ["ncoeffs"]))
|
|
96
|
+
with open(log_out, "w") as ofs:
|
|
97
|
+
ofs.write(utils.make_loggraph_str(stats2, main_title="FSC", title_labs=title_labs,
|
|
98
|
+
s2=1./stats2["d_min"]**2))
|
|
99
|
+
# write_loggraph()
|
|
100
|
+
|
|
101
|
+
def fsc_average(n, fsc):
|
|
102
|
+
return numpy.nansum(n * fsc) / numpy.nansum(n)
|
|
103
|
+
# fsc_average()
|
|
104
|
+
|
|
105
|
+
def randomized_f(f):
|
|
106
|
+
phase = numpy.random.uniform(0, 2, size=len(f)) * numpy.pi
|
|
107
|
+
rf = numpy.abs(f) * (numpy.cos(phase) + 1j*numpy.sin(phase))
|
|
108
|
+
return rf
|
|
109
|
+
# randomized_f()
|
|
110
|
+
|
|
111
|
+
def calc_fsc(hkldata, labs=None, fs=None):
|
|
112
|
+
if labs is not None:
|
|
113
|
+
assert len(labs) == 2
|
|
114
|
+
fs = [hkldata.df[l].to_numpy() for l in labs]
|
|
115
|
+
else:
|
|
116
|
+
assert fs is not None and len(fs) == 2
|
|
117
|
+
ret = []
|
|
118
|
+
for i_bin, idxes in hkldata.binned("stat"):
|
|
119
|
+
F1, F2 = fs[0][idxes], fs[1][idxes]
|
|
120
|
+
fsc = numpy.real(numpy.corrcoef(F1, F2)[1,0])
|
|
121
|
+
ret.append(fsc)
|
|
122
|
+
return ret
|
|
123
|
+
# calc_fsc()
|
|
124
|
+
|
|
125
|
+
def calc_phase_randomized_fsc(hkldata, mask, labs_half, labs_half_masked, randomize_fsc_at=0.8):
|
|
126
|
+
stats = hkldata.binned_df["stat"][["d_min", "d_max"]].copy()
|
|
127
|
+
stats["fsc_half_unmasked"] = calc_fsc(hkldata, labs=labs_half)
|
|
128
|
+
stats["fsc_half_masked"] = calc_fsc(hkldata, labs=labs_half_masked)
|
|
129
|
+
stats["ncoeffs"] = 0
|
|
130
|
+
|
|
131
|
+
# Randomize F
|
|
132
|
+
f_rands = [numpy.copy(hkldata.df[labs_half[i]]) for i in range(2)]
|
|
133
|
+
rand_start_bin = None
|
|
134
|
+
for i_bin, idxes in hkldata.binned("stat"):
|
|
135
|
+
stats.loc[i_bin, "ncoeffs"] = len(idxes)
|
|
136
|
+
fsc_half = stats["fsc_half_unmasked"][i_bin]
|
|
137
|
+
if rand_start_bin is None and fsc_half < randomize_fsc_at:
|
|
138
|
+
rand_start_bin = i_bin
|
|
139
|
+
logger.writeln(" randomize phase beyond {:.2f} A (bin {})".format(stats["d_max"][i_bin], i_bin))
|
|
140
|
+
|
|
141
|
+
if rand_start_bin is not None:
|
|
142
|
+
for i in range(2):
|
|
143
|
+
f_rands[i][idxes] = randomized_f(hkldata.df[labs_half[i]].to_numpy()[idxes])
|
|
144
|
+
|
|
145
|
+
# Multiply mask
|
|
146
|
+
for i in range(2):
|
|
147
|
+
g = hkldata.fft_map(data=f_rands[i], grid_size=mask.shape)
|
|
148
|
+
g.array[:] *= mask
|
|
149
|
+
fg = gemmi.transform_map_to_f_phi(g)
|
|
150
|
+
f_rands[i] = fg.get_value_by_hkl(hkldata.miller_array())
|
|
151
|
+
|
|
152
|
+
# Calc randomized fsc
|
|
153
|
+
stats["fsc_half_masked_rand"] = calc_fsc(hkldata, fs=f_rands)
|
|
154
|
+
|
|
155
|
+
# Calc corrected fsc
|
|
156
|
+
stats["fsc_half_masked_corrected"] = 0.
|
|
157
|
+
for i_bin in stats.index:
|
|
158
|
+
if i_bin < rand_start_bin + 2: # RELION way # FIXME rand_start_bin can be None
|
|
159
|
+
stats.loc[i_bin, "fsc_half_masked_corrected"] = stats["fsc_half_masked"][i_bin]
|
|
160
|
+
else:
|
|
161
|
+
fscn = stats["fsc_half_masked_rand"][i_bin]
|
|
162
|
+
fsct = stats["fsc_half_masked"][i_bin]
|
|
163
|
+
stats.loc[i_bin, "fsc_half_masked_corrected"] = (fsct - fscn) / (1. - fscn)
|
|
164
|
+
|
|
165
|
+
global_res = 999.
|
|
166
|
+
for i_bin in stats.index:
|
|
167
|
+
if stats["fsc_half_masked_corrected"][i_bin] < 0.143:
|
|
168
|
+
break
|
|
169
|
+
global_res = 1./(0.5*(1./stats["d_min"][i_bin]+1./stats["d_max"][i_bin])) # definition is slightly different from RELION
|
|
170
|
+
|
|
171
|
+
logger.writeln("resolution from mask corrected FSC = {:.2f} A".format(global_res))
|
|
172
|
+
|
|
173
|
+
return stats, global_res
|
|
174
|
+
# calc_maskphase_randomized_fsc()
|
|
175
|
+
|
|
176
|
+
def calc_fsc_all(hkldata, labs_fc, lab_f, labs_half=None,
|
|
177
|
+
labs_half_nomask=None, mask=None): # TODO name changed
|
|
178
|
+
if labs_half: assert len(labs_half) == 2
|
|
179
|
+
if labs_half_nomask: assert len(labs_half_nomask) == 2 # only used when mask is not None
|
|
180
|
+
|
|
181
|
+
if mask is not None and labs_half_nomask:
|
|
182
|
+
stats, global_res = calc_phase_randomized_fsc(hkldata, mask,
|
|
183
|
+
labs_half=labs_half_nomask,
|
|
184
|
+
labs_half_masked=labs_half)
|
|
185
|
+
half_fsc_done = True
|
|
186
|
+
else:
|
|
187
|
+
stats = hkldata.binned_df["stat"][["d_min", "d_max"]].copy()
|
|
188
|
+
half_fsc_done = False
|
|
189
|
+
|
|
190
|
+
stats["ncoeffs"] = 0
|
|
191
|
+
stats["power_{}".format(lab_f)] = 0.
|
|
192
|
+
for lab in labs_fc:
|
|
193
|
+
stats["power_{}".format(lab)] = 0.
|
|
194
|
+
stats["fsc_{}_full".format(lab)] = 0.
|
|
195
|
+
stats["Rcmplx_{}_full".format(lab)] = 0.
|
|
196
|
+
stats["cc_{}_full".format(lab)] = 0.
|
|
197
|
+
stats["mcos_{}_full".format(lab)] = 0.
|
|
198
|
+
if labs_half:
|
|
199
|
+
if not half_fsc_done: stats["fsc_half"] = 0.
|
|
200
|
+
stats["cc_half"] = 0.
|
|
201
|
+
stats["mcos_half"] = 0.
|
|
202
|
+
for lab in labs_fc:
|
|
203
|
+
stats["fsc_{}_half1".format(lab)] = 0.
|
|
204
|
+
stats["fsc_{}_half2".format(lab)] = 0.
|
|
205
|
+
|
|
206
|
+
for i_bin, idxes in hkldata.binned("stat"):
|
|
207
|
+
stats.loc[i_bin, "ncoeffs"] = len(idxes)
|
|
208
|
+
Fo = hkldata.df[lab_f].to_numpy()[idxes]
|
|
209
|
+
stats.loc[i_bin, "power_{}".format(lab_f)] = numpy.average(numpy.abs(Fo)**2)
|
|
210
|
+
if labs_half:
|
|
211
|
+
F1, F2 = hkldata.df[labs_half[0]].to_numpy()[idxes], hkldata.df[labs_half[1]].to_numpy()[idxes]
|
|
212
|
+
if not half_fsc_done: stats.loc[i_bin, "fsc_half"] = numpy.real(numpy.corrcoef(F1, F2)[1,0])
|
|
213
|
+
cc_half = numpy.corrcoef(numpy.abs(F1), numpy.abs(F2))[1,0]
|
|
214
|
+
mcos_half = numpy.mean(numpy.cos(numpy.angle(F1) - numpy.angle(F2))) # f1*f2.conj()/abs(f1)/abs(f2) is much faster, but in case zero..
|
|
215
|
+
stats.loc[i_bin, "cc_half"] = cc_half
|
|
216
|
+
stats.loc[i_bin, "mcos_half"] = mcos_half
|
|
217
|
+
else:
|
|
218
|
+
F1, F2 = None, None
|
|
219
|
+
|
|
220
|
+
for labfc in labs_fc:
|
|
221
|
+
Fc = hkldata.df[labfc].to_numpy()[idxes]
|
|
222
|
+
fsc_model = numpy.real(numpy.corrcoef(Fo, Fc)[1,0])
|
|
223
|
+
cc_model = numpy.corrcoef(numpy.abs(Fo), numpy.abs(Fc))[1,0]
|
|
224
|
+
mcos_model = numpy.mean(numpy.cos(numpy.angle(Fo) - numpy.angle(Fc)))
|
|
225
|
+
D = numpy.sum(numpy.real(Fo * numpy.conj(Fc)))/numpy.sum(numpy.abs(Fc)**2)
|
|
226
|
+
rcmplx_model = numpy.sum(numpy.abs(Fo-D*Fc))/numpy.sum(numpy.abs(Fo))
|
|
227
|
+
stats.loc[i_bin, "fsc_{}_full".format(labfc)] = fsc_model
|
|
228
|
+
stats.loc[i_bin, "cc_{}_full".format(labfc)] = cc_model
|
|
229
|
+
stats.loc[i_bin, "mcos_{}_full".format(labfc)] = mcos_model
|
|
230
|
+
stats.loc[i_bin, "Rcmplx_{}_full".format(labfc)] = rcmplx_model
|
|
231
|
+
stats.loc[i_bin, "power_{}".format(labfc)] = numpy.average(numpy.abs(Fc)**2)
|
|
232
|
+
if labs_half:
|
|
233
|
+
stats.loc[i_bin, "fsc_{}_half1".format(labfc)] = numpy.real(numpy.corrcoef(F1, Fc)[1,0])
|
|
234
|
+
stats.loc[i_bin, "fsc_{}_half2".format(labfc)] = numpy.real(numpy.corrcoef(F2, Fc)[1,0])
|
|
235
|
+
return stats
|
|
236
|
+
# calc_fsc_all()
|
|
237
|
+
|
|
238
|
+
def main(args):
|
|
239
|
+
if args.b_before_mask is not None and args.model is None:
|
|
240
|
+
raise SystemExit("--b_before_mask can be only used with --model.")
|
|
241
|
+
|
|
242
|
+
numpy.random.seed(args.random_seed)
|
|
243
|
+
if args.mask:
|
|
244
|
+
logger.writeln("Input mask file: {}".format(args.mask))
|
|
245
|
+
mask = utils.fileio.read_ccp4_map(args.mask)[0]
|
|
246
|
+
else:
|
|
247
|
+
mask = None
|
|
248
|
+
|
|
249
|
+
if args.halfmaps:
|
|
250
|
+
maps = utils.fileio.read_halfmaps(args.halfmaps, pixel_size=args.pixel_size)
|
|
251
|
+
unit_cell = maps[0][0].unit_cell
|
|
252
|
+
elif args.map:
|
|
253
|
+
maps = [utils.fileio.read_ccp4_map(args.map, pixel_size=args.pixel_size)]
|
|
254
|
+
unit_cell = maps[0][0].unit_cell
|
|
255
|
+
elif args.mtz:
|
|
256
|
+
mtz = utils.fileio.read_mmhkl(hklin)
|
|
257
|
+
if mask is not None and mask.unit_cell != mtz.cell:
|
|
258
|
+
raise SystemExit("Error: Inconsistent unit cell between mtz and mask")
|
|
259
|
+
gr = mtz.transform_f_phi_to_map(f=args.labin[0],
|
|
260
|
+
phi=args.labin[1],
|
|
261
|
+
exact_size=mask.shape if mask is not None else (0,0,0),
|
|
262
|
+
sample_rate=3 if mask is None else 0)
|
|
263
|
+
maps = [[gr, [0,0,0]]]
|
|
264
|
+
unit_cell = mtz.cell # TODO check cell of given label
|
|
265
|
+
d_min = numpy.min(mtz.make_d_array()[~numpy.isnan(mtz.column_with_label(args.labin[0]).array)])
|
|
266
|
+
if args.resolution is None:
|
|
267
|
+
args.resolution = d_min
|
|
268
|
+
elif args.resolution < d_min:
|
|
269
|
+
raise SystemExit("Error: --resolution ({}) is higher than actual resolution in mtz ({:.2f}).".format(args.resolution, d_min))
|
|
270
|
+
else:
|
|
271
|
+
raise SystemExit("Error: No input map/mtz found.")
|
|
272
|
+
|
|
273
|
+
if args.resolution is None:
|
|
274
|
+
args.resolution = utils.maps.nyquist_resolution(maps[0][0])
|
|
275
|
+
logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(args.resolution))
|
|
276
|
+
|
|
277
|
+
if args.model:
|
|
278
|
+
st = utils.fileio.read_structure(args.model)
|
|
279
|
+
st.cell = unit_cell
|
|
280
|
+
st.spacegroup_hm = "P1"
|
|
281
|
+
ccu = utils.model.CustomCoefUtil()
|
|
282
|
+
if not args.keep_charges:
|
|
283
|
+
utils.model.remove_charge([st])
|
|
284
|
+
if args.source == "custom":
|
|
285
|
+
ccu.read_from_cif(st, args.model)
|
|
286
|
+
ccu.show_info()
|
|
287
|
+
ccu.set_coeffs(st)
|
|
288
|
+
utils.symmetry.update_ncs_from_args(args, st, map_and_start=maps[0])
|
|
289
|
+
st_expanded = st.clone()
|
|
290
|
+
if len(st.ncs) > 0:
|
|
291
|
+
utils.model.expand_ncs(st_expanded)
|
|
292
|
+
if mask is None and args.mask_radius > 0:
|
|
293
|
+
# XXX if helical..
|
|
294
|
+
if args.twist is not None:
|
|
295
|
+
logger.writeln("Generating all helical copies in the box")
|
|
296
|
+
st_for_mask = st.clone()
|
|
297
|
+
utils.symmetry.update_ncs_from_args(args, st_for_mask, map_and_start=maps[0], filter_contacting=True)
|
|
298
|
+
utils.model.expand_ncs(st_for_mask)
|
|
299
|
+
else:
|
|
300
|
+
st_for_mask = st_expanded
|
|
301
|
+
mask = utils.maps.mask_from_model(st_for_mask, args.mask_radius, soft_edge=args.mask_soft_edge, grid=maps[0][0])
|
|
302
|
+
#utils.maps.write_ccp4_map("mask_from_model.ccp4", mask)
|
|
303
|
+
if not args.no_sharpen_before_mask and args.b_before_mask is None:
|
|
304
|
+
args.b_before_mask = determine_b_before_mask(st_for_mask, maps, maps[0][1], mask, args.resolution)
|
|
305
|
+
else:
|
|
306
|
+
st_expanded = None
|
|
307
|
+
|
|
308
|
+
hkldata = None
|
|
309
|
+
for j in range(2):
|
|
310
|
+
if j == 1:
|
|
311
|
+
if mask is None: break
|
|
312
|
+
if args.b_before_mask is None:
|
|
313
|
+
# modifies original data
|
|
314
|
+
for ma in maps: ma[0].array[:] *= mask
|
|
315
|
+
else:
|
|
316
|
+
maps = utils.maps.sharpen_mask_unsharpen(maps, mask, args.resolution, b=args.b_before_mask)
|
|
317
|
+
lab_suffix = ["_nomask", "_mask"][j]
|
|
318
|
+
for i, m in enumerate(maps):
|
|
319
|
+
if len(maps) == 2:
|
|
320
|
+
lab = "F_map{}".format(i+1)
|
|
321
|
+
else:
|
|
322
|
+
lab = "FP"
|
|
323
|
+
f_grid = gemmi.transform_map_to_f_phi(m[0])
|
|
324
|
+
if hkldata is None:
|
|
325
|
+
asudata = f_grid.prepare_asu_data(dmin=args.resolution, with_000=True)
|
|
326
|
+
hkldata = utils.hkl.hkldata_from_asu_data(asudata, lab + lab_suffix)
|
|
327
|
+
else:
|
|
328
|
+
hkldata.df[lab + lab_suffix] = f_grid.get_value_by_hkl(hkldata.miller_array())
|
|
329
|
+
|
|
330
|
+
if len(maps) == 2:
|
|
331
|
+
hkldata.df["FP_nomask"] = (hkldata.df.F_map1_nomask + hkldata.df.F_map2_nomask) * 0.5
|
|
332
|
+
if mask is not None:
|
|
333
|
+
hkldata.df["FP_mask"] = (hkldata.df.F_map1_mask + hkldata.df.F_map2_mask) * 0.5
|
|
334
|
+
|
|
335
|
+
if len(maps) == 2:
|
|
336
|
+
labs_half = ["F_map1_nomask", "F_map2_nomask"]
|
|
337
|
+
if mask is not None:
|
|
338
|
+
labs_half_masked = ["F_map1_mask", "F_map2_mask"]
|
|
339
|
+
else:
|
|
340
|
+
labs_half_masked = []
|
|
341
|
+
else:
|
|
342
|
+
labs_half, labs_half_masked = [], []
|
|
343
|
+
lab_f = "FP_nomask" if mask is None else "FP_mask"
|
|
344
|
+
labs_fc = []
|
|
345
|
+
if st_expanded is not None:
|
|
346
|
+
labs_fc.append("FC")
|
|
347
|
+
hkldata.df[labs_fc[-1]] = utils.model.calc_fc_fft(st_expanded, args.resolution - 1e-6, source=args.source,
|
|
348
|
+
miller_array=hkldata.miller_array())
|
|
349
|
+
if args.mask_model and mask is not None:
|
|
350
|
+
if args.b_before_mask is None:
|
|
351
|
+
normalizer = 1.
|
|
352
|
+
else:
|
|
353
|
+
normalizer = hkldata.debye_waller_factors(b_iso=args.b_before_mask)
|
|
354
|
+
g = hkldata.fft_map(data=hkldata.df[labs_fc[-1]] / normalizer, grid_size=mask.shape)
|
|
355
|
+
g.array[:] *= mask
|
|
356
|
+
fg = gemmi.transform_map_to_f_phi(g)
|
|
357
|
+
hkldata.df[labs_fc[-1]] = fg.get_value_by_hkl(hkldata.miller_array()) * normalizer
|
|
358
|
+
|
|
359
|
+
hkldata.setup_relion_binning("stat")
|
|
360
|
+
stats = calc_fsc_all(hkldata, labs_fc=labs_fc, lab_f=lab_f,
|
|
361
|
+
labs_half=labs_half_masked if mask is not None else labs_half,
|
|
362
|
+
labs_half_nomask=labs_half, mask=mask)
|
|
363
|
+
with open(args.fsc_out, "w") as ofs:
|
|
364
|
+
if args.mask:
|
|
365
|
+
ofs.write("# Mask= {}\n".format(args.mask))
|
|
366
|
+
if args.model is not None:
|
|
367
|
+
ofs.write("# {} from {}\n".format(labs_fc[0], args.model))
|
|
368
|
+
|
|
369
|
+
ofs.write(stats.to_string(index=False, index_names=False)+"\n")
|
|
370
|
+
for k in stats:
|
|
371
|
+
if k.startswith("fsc_FC_"):
|
|
372
|
+
logger.writeln("# FSCaverage of {} = {:.4f}".format(k, fsc_average(stats.ncoeffs, stats[k])), fs=ofs)
|
|
373
|
+
if k.startswith("Rcmplx_FC_"):
|
|
374
|
+
logger.writeln("# Average of {} = {:.4f}".format(k, fsc_average(stats.ncoeffs, stats[k])), fs=ofs)
|
|
375
|
+
|
|
376
|
+
logger.writeln("Data file: {}".format(args.fsc_out))
|
|
377
|
+
|
|
378
|
+
if args.csv:
|
|
379
|
+
csv_out = os.path.splitext(args.fsc_out)[0] + ".csv"
|
|
380
|
+
stats.to_csv(csv_out)
|
|
381
|
+
logger.writeln("CSV file: {}".format(csv_out))
|
|
382
|
+
|
|
383
|
+
log_out = os.path.splitext(args.fsc_out)[0] + ".log"
|
|
384
|
+
write_loggraph(stats, labs_fc, log_out)
|
|
385
|
+
logger.writeln("Run loggraph {} to see plots.".format(log_out))
|
|
386
|
+
# main()
|
|
387
|
+
|
|
388
|
+
if __name__ == "__main__":
|
|
389
|
+
import sys
|
|
390
|
+
args = parse_args(sys.argv[1:])
|
|
391
|
+
main(args)
|
servalcat/spa/localcc.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Author: "Keitaro Yamashita, Garib N. Murshudov"
|
|
3
|
+
MRC Laboratory of Molecular Biology
|
|
4
|
+
|
|
5
|
+
This software is released under the
|
|
6
|
+
Mozilla Public License, version 2.0; see LICENSE.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import absolute_import, division, print_function, generators
|
|
9
|
+
import gemmi
|
|
10
|
+
import numpy
|
|
11
|
+
import pandas
|
|
12
|
+
import os
|
|
13
|
+
import json
|
|
14
|
+
import argparse
|
|
15
|
+
from servalcat.utils import logger
|
|
16
|
+
from servalcat import utils
|
|
17
|
+
|
|
18
|
+
def add_arguments(parser):
|
|
19
|
+
parser.description = 'Calculate real space local correlation map from half maps and model'
|
|
20
|
+
parser.add_argument("--halfmaps", required=True, nargs=2,
|
|
21
|
+
help="Input half map files")
|
|
22
|
+
parser.add_argument('--pixel_size', type=float,
|
|
23
|
+
help='Override pixel size (A)')
|
|
24
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
25
|
+
group.add_argument("--kernel", type=int,
|
|
26
|
+
help="Kernel radius in pixel")
|
|
27
|
+
group.add_argument("--kernel_ang", type=float,
|
|
28
|
+
help="Kernel radius in Angstrom (hard sphere)")
|
|
29
|
+
parser.add_argument('--mask',
|
|
30
|
+
help="mask file")
|
|
31
|
+
parser.add_argument('--model',
|
|
32
|
+
help='Input atomic model file')
|
|
33
|
+
parser.add_argument('--resolution', type=float,
|
|
34
|
+
help='default: nyquist resolution')
|
|
35
|
+
parser.add_argument("-s", "--source", choices=["electron", "xray", "neutron", "custom"], default="electron")
|
|
36
|
+
parser.add_argument("--trim", action='store_true', help="Write trimmed map")
|
|
37
|
+
parser.add_argument('-o', '--output_prefix', default="ccmap",
|
|
38
|
+
help="default: %(default)s")
|
|
39
|
+
# add_arguments()
|
|
40
|
+
|
|
41
|
+
def parse_args(arg_list):
|
|
42
|
+
parser = argparse.ArgumentParser()
|
|
43
|
+
add_arguments(parser)
|
|
44
|
+
return parser.parse_args(arg_list)
|
|
45
|
+
# parse_args()
|
|
46
|
+
|
|
47
|
+
def setup_coeffs_for_halfmap_cc(maps, d_min, mask=None, st=None):
|
|
48
|
+
hkldata = utils.maps.mask_and_fft_maps(maps, d_min, mask)
|
|
49
|
+
hkldata.setup_relion_binning("ml")
|
|
50
|
+
utils.maps.calc_noise_var_from_halfmaps(hkldata)
|
|
51
|
+
|
|
52
|
+
nref = len(hkldata.df.index)
|
|
53
|
+
F1w = numpy.zeros(nref, dtype=complex)
|
|
54
|
+
F2w = numpy.zeros(nref, dtype=complex)
|
|
55
|
+
F1 = hkldata.df.F_map1.to_numpy()
|
|
56
|
+
F2 = hkldata.df.F_map2.to_numpy()
|
|
57
|
+
|
|
58
|
+
logger.writeln("Calculating weights for half map correlation.")
|
|
59
|
+
logger.writeln(" weight = sqrt(FSChalf / (2*var_noise + var_signal))")
|
|
60
|
+
hkldata.binned_df["ml"]["w2_half_varsignal"] = 0.
|
|
61
|
+
for i_bin, idxes in hkldata.binned("ml"):
|
|
62
|
+
fscfull = hkldata.binned_df["ml"].FSCfull[i_bin]
|
|
63
|
+
if fscfull < 0:
|
|
64
|
+
break # stop here so that higher resolution are all zero
|
|
65
|
+
fsc = fscfull / (2 - fscfull)
|
|
66
|
+
var_fo = 2 * hkldata.binned_df["ml"].var_noise[i_bin] + hkldata.binned_df["ml"].var_signal[i_bin]
|
|
67
|
+
w = numpy.sqrt(fsc / var_fo)
|
|
68
|
+
hkldata.binned_df["ml"].loc[i_bin, "w2_half_varsignal"] = fsc / var_fo * hkldata.binned_df["ml"].var_signal[i_bin]
|
|
69
|
+
F1w[idxes] = F1[idxes] * w
|
|
70
|
+
F2w[idxes] = F2[idxes] * w
|
|
71
|
+
|
|
72
|
+
hkldata.df["F_map1w"] = F1w
|
|
73
|
+
hkldata.df["F_map2w"] = F2w
|
|
74
|
+
|
|
75
|
+
return hkldata
|
|
76
|
+
# setup_coeffs_for_halfmap_cc()
|
|
77
|
+
|
|
78
|
+
def add_coeffs_for_model_cc(hkldata, st, source="electron"):
|
|
79
|
+
hkldata.df["FC"] = utils.model.calc_fc_fft(st, d_min=hkldata.d_min_max()[0]-1e-6,
|
|
80
|
+
source=source, miller_array=hkldata.miller_array())
|
|
81
|
+
nref = len(hkldata.df.index)
|
|
82
|
+
FCw = numpy.zeros(nref, dtype=complex)
|
|
83
|
+
FPw = numpy.zeros(nref, dtype=complex)
|
|
84
|
+
FP = hkldata.df.FP.to_numpy()
|
|
85
|
+
FC = hkldata.df.FC.to_numpy()
|
|
86
|
+
|
|
87
|
+
logger.writeln("Calculating weights for map-model correlation.")
|
|
88
|
+
logger.writeln(" weight for Fo = sqrt(FSCfull / var(Fo))")
|
|
89
|
+
logger.writeln(" weight for Fc = sqrt(FSCfull / var(Fc))")
|
|
90
|
+
hkldata.binned_df["ml"]["w_mapmodel_c"] = 0.
|
|
91
|
+
hkldata.binned_df["ml"]["w_mapmodel_o"] = 0.
|
|
92
|
+
hkldata.binned_df["ml"]["var_fc"] = 0.
|
|
93
|
+
for i_bin, idxes in hkldata.binned("ml"):
|
|
94
|
+
fscfull = hkldata.binned_df["ml"].FSCfull[i_bin]
|
|
95
|
+
if fscfull < 0: break
|
|
96
|
+
var_fc = numpy.var(FC[idxes])
|
|
97
|
+
wc = numpy.sqrt(fscfull / var_fc)
|
|
98
|
+
wo = numpy.sqrt(fscfull / numpy.var(FP[idxes]))
|
|
99
|
+
FCw[idxes] = FC[idxes] * wc
|
|
100
|
+
FPw[idxes] = FP[idxes] * wo
|
|
101
|
+
hkldata.binned_df["ml"].loc[i_bin, "w_mapmodel_c"] = wc
|
|
102
|
+
hkldata.binned_df["ml"].loc[i_bin, "w_mapmodel_o"] = wo
|
|
103
|
+
hkldata.binned_df["ml"].loc[i_bin, "var_fc"] = var_fc
|
|
104
|
+
|
|
105
|
+
hkldata.df["FPw"] = FPw
|
|
106
|
+
hkldata.df["FCw"] = FCw
|
|
107
|
+
# add_coeffs_for_model_cc()
|
|
108
|
+
|
|
109
|
+
def model_stats(st, modelcc_map, halfcc_map, loggraph_out=None, json_out=None):
|
|
110
|
+
tmp = dict(chain=[], seqid=[], resn=[], CC_mapmodel=[], CC_halfmap=[])
|
|
111
|
+
for chain in st[0]:
|
|
112
|
+
for res in chain:
|
|
113
|
+
mm = numpy.mean([modelcc_map.interpolate_value(atom.pos) for atom in res])
|
|
114
|
+
hc = numpy.mean([halfcc_map.interpolate_value(atom.pos) for atom in res])
|
|
115
|
+
tmp["chain"].append(chain.name)
|
|
116
|
+
tmp["seqid"].append(str(res.seqid))
|
|
117
|
+
tmp["resn"].append(res.name)
|
|
118
|
+
tmp["CC_mapmodel"].append(mm)
|
|
119
|
+
tmp["CC_halfmap"].append(hc)
|
|
120
|
+
|
|
121
|
+
df = pandas.DataFrame(tmp)
|
|
122
|
+
df["sqrt_CC_full"] = numpy.sqrt(2 * df.CC_halfmap / (1 + df.CC_halfmap))
|
|
123
|
+
if loggraph_out is not None:
|
|
124
|
+
with open(loggraph_out, "w") as ofs:
|
|
125
|
+
for c, g in df.groupby("chain", sort=False):
|
|
126
|
+
ofs.write("$TABLE: Chain {} :".format(c))
|
|
127
|
+
ofs.write("""
|
|
128
|
+
$GRAPHS
|
|
129
|
+
: average correlations :A:2,4,5,6:
|
|
130
|
+
$$
|
|
131
|
+
chain seqid resn CC(map,model) CC_half sqrt(CC_full)
|
|
132
|
+
$$
|
|
133
|
+
$$
|
|
134
|
+
""")
|
|
135
|
+
ofs.write(g.to_string(header=False, index=False))
|
|
136
|
+
ofs.write("\n\n")
|
|
137
|
+
if json_out is not None:
|
|
138
|
+
df.to_json(json_out, orient="records", indent=2)
|
|
139
|
+
return df
|
|
140
|
+
# model_stats()
|
|
141
|
+
|
|
142
|
+
def main(args):
|
|
143
|
+
maps = utils.fileio.read_halfmaps(args.halfmaps, pixel_size=args.pixel_size)
|
|
144
|
+
grid_shape = maps[0][0].shape
|
|
145
|
+
if args.mask:
|
|
146
|
+
mask = utils.fileio.read_ccp4_map(args.mask)[0]
|
|
147
|
+
else:
|
|
148
|
+
mask = None
|
|
149
|
+
|
|
150
|
+
if args.resolution is None:
|
|
151
|
+
d_min = utils.maps.nyquist_resolution(maps[0][0])
|
|
152
|
+
else:
|
|
153
|
+
d_min = args.resolution
|
|
154
|
+
|
|
155
|
+
hkldata = setup_coeffs_for_halfmap_cc(maps, d_min, mask)
|
|
156
|
+
if args.kernel is None:
|
|
157
|
+
prefix = "{}_r{}A".format(args.output_prefix, args.kernel_ang)
|
|
158
|
+
knl = hkldata.hard_sphere_kernel(r_ang=args.kernel_ang, grid_size=grid_shape)
|
|
159
|
+
else:
|
|
160
|
+
prefix = "{}_r{}px".format(args.output_prefix, args.kernel)
|
|
161
|
+
knl = utils.maps.raised_cosine_kernel(args.kernel)
|
|
162
|
+
|
|
163
|
+
halfcc_map = utils.maps.local_cc(hkldata.fft_map("F_map1w", grid_size=grid_shape),
|
|
164
|
+
hkldata.fft_map("F_map2w", grid_size=grid_shape),
|
|
165
|
+
knl, method="simple" if args.kernel is None else "scipy")
|
|
166
|
+
|
|
167
|
+
halfcc_map_in_mask = halfcc_map.array[mask.array>0.5] if mask is not None else halfcc_map
|
|
168
|
+
logger.writeln("Half map CC: min/max= {:.4f} {:.4f}".format(numpy.min(halfcc_map_in_mask), numpy.max(halfcc_map_in_mask)))
|
|
169
|
+
utils.maps.write_ccp4_map(prefix+"_half.mrc", halfcc_map, hkldata.cell, hkldata.sg,
|
|
170
|
+
mask_for_extent=mask if args.trim else None)
|
|
171
|
+
|
|
172
|
+
if args.model:
|
|
173
|
+
st = utils.fileio.read_structure(args.model)
|
|
174
|
+
utils.model.remove_charge([st])
|
|
175
|
+
ccu = utils.model.CustomCoefUtil()
|
|
176
|
+
if args.source == "custom":
|
|
177
|
+
ccu.read_from_cif(st, args.model)
|
|
178
|
+
ccu.show_info()
|
|
179
|
+
ccu.set_coeffs(st)
|
|
180
|
+
utils.model.expand_ncs(st)
|
|
181
|
+
st.cell = hkldata.cell
|
|
182
|
+
st.spacegroup_hm = hkldata.sg.xhm()
|
|
183
|
+
add_coeffs_for_model_cc(hkldata, st, args.source)
|
|
184
|
+
modelcc_map = utils.maps.local_cc(hkldata.fft_map("FPw", grid_size=grid_shape),
|
|
185
|
+
hkldata.fft_map("FCw", grid_size=grid_shape),
|
|
186
|
+
knl, method="simple" if args.kernel is None else "scipy")
|
|
187
|
+
modelcc_map_in_mask = modelcc_map.array[mask.array>0.5] if mask is not None else modelcc_map
|
|
188
|
+
logger.writeln("Model-map CC: min/max= {:.4f} {:.4f}".format(numpy.min(modelcc_map_in_mask), numpy.max(modelcc_map_in_mask)))
|
|
189
|
+
utils.maps.write_ccp4_map(prefix+"_model.mrc", modelcc_map, hkldata.cell, hkldata.sg,
|
|
190
|
+
mask_for_extent=mask if args.trim else None)
|
|
191
|
+
model_stats(st, modelcc_map, halfcc_map, loggraph_out=prefix+"_byresidue.log", json_out=prefix+"_byresidue.json")
|
|
192
|
+
# main()
|
|
193
|
+
|
|
194
|
+
if __name__ == "__main__":
|
|
195
|
+
import sys
|
|
196
|
+
args = parse_args(sys.argv[1:])
|
|
197
|
+
main(args)
|