servalcat 0.4.131__cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-314t-x86_64-linux-gnu.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +1162 -0
  7. servalcat/refine/refine_geom.py +245 -0
  8. servalcat/refine/refine_spa.py +400 -0
  9. servalcat/refine/refine_xtal.py +339 -0
  10. servalcat/refine/spa.py +151 -0
  11. servalcat/refine/xtal.py +312 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +191 -0
  14. servalcat/refmac/refmac_keywords.py +660 -0
  15. servalcat/refmac/refmac_wrapper.py +423 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +488 -0
  18. servalcat/spa/fsc.py +391 -0
  19. servalcat/spa/localcc.py +197 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +979 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1629 -0
  27. servalcat/utils/fileio.py +836 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +811 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +933 -0
  33. servalcat/utils/refmac.py +759 -0
  34. servalcat/utils/restraints.py +888 -0
  35. servalcat/utils/symmetry.py +298 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +262 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1954 -0
  40. servalcat/xtal/twin.py +316 -0
  41. servalcat-0.4.131.dist-info/METADATA +60 -0
  42. servalcat-0.4.131.dist-info/RECORD +45 -0
  43. servalcat-0.4.131.dist-info/WHEEL +6 -0
  44. servalcat-0.4.131.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.131.dist-info/licenses/LICENSE +373 -0
servalcat/spa/fsc.py ADDED
@@ -0,0 +1,391 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ import os
10
+ import gemmi
11
+ import numpy
12
+ import pandas
13
+ from servalcat.utils import logger
14
+ from servalcat import spa
15
+ from servalcat.spa.run_refmac import determine_b_before_mask
16
+ from servalcat import utils
17
+
18
+ def add_arguments(parser):
19
+ parser.description = 'FSC calculation'
20
+
21
+ parser.add_argument('--model',
22
+ help="")
23
+ group = parser.add_mutually_exclusive_group(required=True)
24
+ group.add_argument('--map',
25
+ help='Input map file(s)')
26
+ group.add_argument("--halfmaps", nargs=2)
27
+ group.add_argument('--mtz',
28
+ help='Input mtz file.')
29
+ parser.add_argument('--labin', nargs=2,
30
+ help='label (F and PHI) for mtz')
31
+ parser.add_argument('--pixel_size', type=float,
32
+ help='Override pixel size (A)')
33
+ parser.add_argument('--mask', help='Mask file')
34
+ parser.add_argument('--mask_radius',
35
+ type=float, default=3,
36
+ help='calculate mask from model if provided')
37
+ parser.add_argument('--mask_soft_edge',
38
+ type=float, default=0,
39
+ help='Add soft edge to model mask.')
40
+ parser.add_argument('--mask_model', action='store_true',
41
+ help='Apply mask to model density')
42
+ parser.add_argument("--b_before_mask", type=float,
43
+ help="when model-based mask is used: sharpening B value for sharpen-mask-unsharpen procedure. By default it is determined automatically.")
44
+ parser.add_argument('--no_sharpen_before_mask', action='store_true',
45
+ help='when model-based mask is used: by default half maps are sharpened before masking by std of signal and unsharpened after masking. This option disables it.')
46
+ utils.symmetry.add_symmetry_args(parser) # add --pg etc
47
+ parser.add_argument('-d', '--resolution',
48
+ type=float,
49
+ help='Default: Nyquist')
50
+ parser.add_argument('--random_seed', type=float, default=1234,
51
+ help="random seed for phase randomized FSC")
52
+ parser.add_argument("-s", "--source", choices=["electron", "xray", "neutron", "custom"], default="electron")
53
+ parser.add_argument('-o', '--fsc_out',
54
+ default="fsc.dat",
55
+ help='')
56
+ parser.add_argument('--csv', action='store_true',
57
+ help="Write csv file")
58
+ parser.add_argument('--keep_charges', action='store_true',
59
+ help="Use scattering factor for charged atoms. Use it with care.")
60
+
61
+ # add_arguments()
62
+
63
+ def parse_args(arg_list):
64
+ parser = argparse.ArgumentParser()
65
+ add_arguments(parser)
66
+ return parser.parse_args(arg_list)
67
+ # parse_args()
68
+
69
+ def write_loggraph(stats, labs_fc, log_out):
70
+ model_labs1 = [l for l in stats if any(l.startswith("fsc_"+fc) for fc in labs_fc)]
71
+ model_labs2 = [l for l in stats if any(l.startswith(("cc_"+fc, "mcos_"+fc)) for fc in labs_fc)]
72
+ power_labs = [l for l in stats if l.startswith("power_")]
73
+ half_labs1 = [l for l in ("fsc_half_unmasked", "fsc_half_masked", "fsc_half_masked_rand", "fsc_half_masked_corrected") if l in stats]
74
+ half_labs2 = [l for l in ("cc_half", "mcos_half") if l in stats]
75
+ if not half_labs1 and "fsc_half" in stats:
76
+ half_labs1 = ["fsc_half"]
77
+
78
+ stats2 = stats.copy()
79
+ stats2.insert(0, "bin", stats.index)
80
+ for l in power_labs: stats2[l] = numpy.log(stats2[l])
81
+ title_labs = []
82
+ if half_labs1:
83
+ title_labs.append(("Phase randomized FSC" if len(half_labs1) > 1 else "Half map FSC",
84
+ half_labs1))
85
+ if half_labs2:
86
+ title_labs.append(("Half map amplitude CC and Mean(cos(dphi))",
87
+ half_labs2))
88
+ if model_labs1:
89
+ title_labs.append(("Map-model FSC", model_labs1))
90
+ if model_labs2:
91
+ title_labs.append(("Map-model amplitude CC and Mean(cos(dphi))", model_labs2))
92
+ if power_labs:
93
+ title_labs.append(("log(Power)", power_labs))
94
+
95
+ title_labs.append(("number of Fourier coefficients", ["ncoeffs"]))
96
+ with open(log_out, "w") as ofs:
97
+ ofs.write(utils.make_loggraph_str(stats2, main_title="FSC", title_labs=title_labs,
98
+ s2=1./stats2["d_min"]**2))
99
+ # write_loggraph()
100
+
101
+ def fsc_average(n, fsc):
102
+ return numpy.nansum(n * fsc) / numpy.nansum(n)
103
+ # fsc_average()
104
+
105
+ def randomized_f(f):
106
+ phase = numpy.random.uniform(0, 2, size=len(f)) * numpy.pi
107
+ rf = numpy.abs(f) * (numpy.cos(phase) + 1j*numpy.sin(phase))
108
+ return rf
109
+ # randomized_f()
110
+
111
+ def calc_fsc(hkldata, labs=None, fs=None):
112
+ if labs is not None:
113
+ assert len(labs) == 2
114
+ fs = [hkldata.df[l].to_numpy() for l in labs]
115
+ else:
116
+ assert fs is not None and len(fs) == 2
117
+ ret = []
118
+ for i_bin, idxes in hkldata.binned("stat"):
119
+ F1, F2 = fs[0][idxes], fs[1][idxes]
120
+ fsc = numpy.real(numpy.corrcoef(F1, F2)[1,0])
121
+ ret.append(fsc)
122
+ return ret
123
+ # calc_fsc()
124
+
125
+ def calc_phase_randomized_fsc(hkldata, mask, labs_half, labs_half_masked, randomize_fsc_at=0.8):
126
+ stats = hkldata.binned_df["stat"][["d_min", "d_max"]].copy()
127
+ stats["fsc_half_unmasked"] = calc_fsc(hkldata, labs=labs_half)
128
+ stats["fsc_half_masked"] = calc_fsc(hkldata, labs=labs_half_masked)
129
+ stats["ncoeffs"] = 0
130
+
131
+ # Randomize F
132
+ f_rands = [numpy.copy(hkldata.df[labs_half[i]]) for i in range(2)]
133
+ rand_start_bin = None
134
+ for i_bin, idxes in hkldata.binned("stat"):
135
+ stats.loc[i_bin, "ncoeffs"] = len(idxes)
136
+ fsc_half = stats["fsc_half_unmasked"][i_bin]
137
+ if rand_start_bin is None and fsc_half < randomize_fsc_at:
138
+ rand_start_bin = i_bin
139
+ logger.writeln(" randomize phase beyond {:.2f} A (bin {})".format(stats["d_max"][i_bin], i_bin))
140
+
141
+ if rand_start_bin is not None:
142
+ for i in range(2):
143
+ f_rands[i][idxes] = randomized_f(hkldata.df[labs_half[i]].to_numpy()[idxes])
144
+
145
+ # Multiply mask
146
+ for i in range(2):
147
+ g = hkldata.fft_map(data=f_rands[i], grid_size=mask.shape)
148
+ g.array[:] *= mask
149
+ fg = gemmi.transform_map_to_f_phi(g)
150
+ f_rands[i] = fg.get_value_by_hkl(hkldata.miller_array())
151
+
152
+ # Calc randomized fsc
153
+ stats["fsc_half_masked_rand"] = calc_fsc(hkldata, fs=f_rands)
154
+
155
+ # Calc corrected fsc
156
+ stats["fsc_half_masked_corrected"] = 0.
157
+ for i_bin in stats.index:
158
+ if i_bin < rand_start_bin + 2: # RELION way # FIXME rand_start_bin can be None
159
+ stats.loc[i_bin, "fsc_half_masked_corrected"] = stats["fsc_half_masked"][i_bin]
160
+ else:
161
+ fscn = stats["fsc_half_masked_rand"][i_bin]
162
+ fsct = stats["fsc_half_masked"][i_bin]
163
+ stats.loc[i_bin, "fsc_half_masked_corrected"] = (fsct - fscn) / (1. - fscn)
164
+
165
+ global_res = 999.
166
+ for i_bin in stats.index:
167
+ if stats["fsc_half_masked_corrected"][i_bin] < 0.143:
168
+ break
169
+ global_res = 1./(0.5*(1./stats["d_min"][i_bin]+1./stats["d_max"][i_bin])) # definition is slightly different from RELION
170
+
171
+ logger.writeln("resolution from mask corrected FSC = {:.2f} A".format(global_res))
172
+
173
+ return stats, global_res
174
+ # calc_maskphase_randomized_fsc()
175
+
176
+ def calc_fsc_all(hkldata, labs_fc, lab_f, labs_half=None,
177
+ labs_half_nomask=None, mask=None): # TODO name changed
178
+ if labs_half: assert len(labs_half) == 2
179
+ if labs_half_nomask: assert len(labs_half_nomask) == 2 # only used when mask is not None
180
+
181
+ if mask is not None and labs_half_nomask:
182
+ stats, global_res = calc_phase_randomized_fsc(hkldata, mask,
183
+ labs_half=labs_half_nomask,
184
+ labs_half_masked=labs_half)
185
+ half_fsc_done = True
186
+ else:
187
+ stats = hkldata.binned_df["stat"][["d_min", "d_max"]].copy()
188
+ half_fsc_done = False
189
+
190
+ stats["ncoeffs"] = 0
191
+ stats["power_{}".format(lab_f)] = 0.
192
+ for lab in labs_fc:
193
+ stats["power_{}".format(lab)] = 0.
194
+ stats["fsc_{}_full".format(lab)] = 0.
195
+ stats["Rcmplx_{}_full".format(lab)] = 0.
196
+ stats["cc_{}_full".format(lab)] = 0.
197
+ stats["mcos_{}_full".format(lab)] = 0.
198
+ if labs_half:
199
+ if not half_fsc_done: stats["fsc_half"] = 0.
200
+ stats["cc_half"] = 0.
201
+ stats["mcos_half"] = 0.
202
+ for lab in labs_fc:
203
+ stats["fsc_{}_half1".format(lab)] = 0.
204
+ stats["fsc_{}_half2".format(lab)] = 0.
205
+
206
+ for i_bin, idxes in hkldata.binned("stat"):
207
+ stats.loc[i_bin, "ncoeffs"] = len(idxes)
208
+ Fo = hkldata.df[lab_f].to_numpy()[idxes]
209
+ stats.loc[i_bin, "power_{}".format(lab_f)] = numpy.average(numpy.abs(Fo)**2)
210
+ if labs_half:
211
+ F1, F2 = hkldata.df[labs_half[0]].to_numpy()[idxes], hkldata.df[labs_half[1]].to_numpy()[idxes]
212
+ if not half_fsc_done: stats.loc[i_bin, "fsc_half"] = numpy.real(numpy.corrcoef(F1, F2)[1,0])
213
+ cc_half = numpy.corrcoef(numpy.abs(F1), numpy.abs(F2))[1,0]
214
+ mcos_half = numpy.mean(numpy.cos(numpy.angle(F1) - numpy.angle(F2))) # f1*f2.conj()/abs(f1)/abs(f2) is much faster, but in case zero..
215
+ stats.loc[i_bin, "cc_half"] = cc_half
216
+ stats.loc[i_bin, "mcos_half"] = mcos_half
217
+ else:
218
+ F1, F2 = None, None
219
+
220
+ for labfc in labs_fc:
221
+ Fc = hkldata.df[labfc].to_numpy()[idxes]
222
+ fsc_model = numpy.real(numpy.corrcoef(Fo, Fc)[1,0])
223
+ cc_model = numpy.corrcoef(numpy.abs(Fo), numpy.abs(Fc))[1,0]
224
+ mcos_model = numpy.mean(numpy.cos(numpy.angle(Fo) - numpy.angle(Fc)))
225
+ D = numpy.sum(numpy.real(Fo * numpy.conj(Fc)))/numpy.sum(numpy.abs(Fc)**2)
226
+ rcmplx_model = numpy.sum(numpy.abs(Fo-D*Fc))/numpy.sum(numpy.abs(Fo))
227
+ stats.loc[i_bin, "fsc_{}_full".format(labfc)] = fsc_model
228
+ stats.loc[i_bin, "cc_{}_full".format(labfc)] = cc_model
229
+ stats.loc[i_bin, "mcos_{}_full".format(labfc)] = mcos_model
230
+ stats.loc[i_bin, "Rcmplx_{}_full".format(labfc)] = rcmplx_model
231
+ stats.loc[i_bin, "power_{}".format(labfc)] = numpy.average(numpy.abs(Fc)**2)
232
+ if labs_half:
233
+ stats.loc[i_bin, "fsc_{}_half1".format(labfc)] = numpy.real(numpy.corrcoef(F1, Fc)[1,0])
234
+ stats.loc[i_bin, "fsc_{}_half2".format(labfc)] = numpy.real(numpy.corrcoef(F2, Fc)[1,0])
235
+ return stats
236
+ # calc_fsc_all()
237
+
238
+ def main(args):
239
+ if args.b_before_mask is not None and args.model is None:
240
+ raise SystemExit("--b_before_mask can be only used with --model.")
241
+
242
+ numpy.random.seed(args.random_seed)
243
+ if args.mask:
244
+ logger.writeln("Input mask file: {}".format(args.mask))
245
+ mask = utils.fileio.read_ccp4_map(args.mask)[0]
246
+ else:
247
+ mask = None
248
+
249
+ if args.halfmaps:
250
+ maps = utils.fileio.read_halfmaps(args.halfmaps, pixel_size=args.pixel_size)
251
+ unit_cell = maps[0][0].unit_cell
252
+ elif args.map:
253
+ maps = [utils.fileio.read_ccp4_map(args.map, pixel_size=args.pixel_size)]
254
+ unit_cell = maps[0][0].unit_cell
255
+ elif args.mtz:
256
+ mtz = utils.fileio.read_mmhkl(hklin)
257
+ if mask is not None and mask.unit_cell != mtz.cell:
258
+ raise SystemExit("Error: Inconsistent unit cell between mtz and mask")
259
+ gr = mtz.transform_f_phi_to_map(f=args.labin[0],
260
+ phi=args.labin[1],
261
+ exact_size=mask.shape if mask is not None else (0,0,0),
262
+ sample_rate=3 if mask is None else 0)
263
+ maps = [[gr, [0,0,0]]]
264
+ unit_cell = mtz.cell # TODO check cell of given label
265
+ d_min = numpy.min(mtz.make_d_array()[~numpy.isnan(mtz.column_with_label(args.labin[0]).array)])
266
+ if args.resolution is None:
267
+ args.resolution = d_min
268
+ elif args.resolution < d_min:
269
+ raise SystemExit("Error: --resolution ({}) is higher than actual resolution in mtz ({:.2f}).".format(args.resolution, d_min))
270
+ else:
271
+ raise SystemExit("Error: No input map/mtz found.")
272
+
273
+ if args.resolution is None:
274
+ args.resolution = utils.maps.nyquist_resolution(maps[0][0])
275
+ logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(args.resolution))
276
+
277
+ if args.model:
278
+ st = utils.fileio.read_structure(args.model)
279
+ st.cell = unit_cell
280
+ st.spacegroup_hm = "P1"
281
+ ccu = utils.model.CustomCoefUtil()
282
+ if not args.keep_charges:
283
+ utils.model.remove_charge([st])
284
+ if args.source == "custom":
285
+ ccu.read_from_cif(st, args.model)
286
+ ccu.show_info()
287
+ ccu.set_coeffs(st)
288
+ utils.symmetry.update_ncs_from_args(args, st, map_and_start=maps[0])
289
+ st_expanded = st.clone()
290
+ if len(st.ncs) > 0:
291
+ utils.model.expand_ncs(st_expanded)
292
+ if mask is None and args.mask_radius > 0:
293
+ # XXX if helical..
294
+ if args.twist is not None:
295
+ logger.writeln("Generating all helical copies in the box")
296
+ st_for_mask = st.clone()
297
+ utils.symmetry.update_ncs_from_args(args, st_for_mask, map_and_start=maps[0], filter_contacting=True)
298
+ utils.model.expand_ncs(st_for_mask)
299
+ else:
300
+ st_for_mask = st_expanded
301
+ mask = utils.maps.mask_from_model(st_for_mask, args.mask_radius, soft_edge=args.mask_soft_edge, grid=maps[0][0])
302
+ #utils.maps.write_ccp4_map("mask_from_model.ccp4", mask)
303
+ if not args.no_sharpen_before_mask and args.b_before_mask is None:
304
+ args.b_before_mask = determine_b_before_mask(st_for_mask, maps, maps[0][1], mask, args.resolution)
305
+ else:
306
+ st_expanded = None
307
+
308
+ hkldata = None
309
+ for j in range(2):
310
+ if j == 1:
311
+ if mask is None: break
312
+ if args.b_before_mask is None:
313
+ # modifies original data
314
+ for ma in maps: ma[0].array[:] *= mask
315
+ else:
316
+ maps = utils.maps.sharpen_mask_unsharpen(maps, mask, args.resolution, b=args.b_before_mask)
317
+ lab_suffix = ["_nomask", "_mask"][j]
318
+ for i, m in enumerate(maps):
319
+ if len(maps) == 2:
320
+ lab = "F_map{}".format(i+1)
321
+ else:
322
+ lab = "FP"
323
+ f_grid = gemmi.transform_map_to_f_phi(m[0])
324
+ if hkldata is None:
325
+ asudata = f_grid.prepare_asu_data(dmin=args.resolution, with_000=True)
326
+ hkldata = utils.hkl.hkldata_from_asu_data(asudata, lab + lab_suffix)
327
+ else:
328
+ hkldata.df[lab + lab_suffix] = f_grid.get_value_by_hkl(hkldata.miller_array())
329
+
330
+ if len(maps) == 2:
331
+ hkldata.df["FP_nomask"] = (hkldata.df.F_map1_nomask + hkldata.df.F_map2_nomask) * 0.5
332
+ if mask is not None:
333
+ hkldata.df["FP_mask"] = (hkldata.df.F_map1_mask + hkldata.df.F_map2_mask) * 0.5
334
+
335
+ if len(maps) == 2:
336
+ labs_half = ["F_map1_nomask", "F_map2_nomask"]
337
+ if mask is not None:
338
+ labs_half_masked = ["F_map1_mask", "F_map2_mask"]
339
+ else:
340
+ labs_half_masked = []
341
+ else:
342
+ labs_half, labs_half_masked = [], []
343
+ lab_f = "FP_nomask" if mask is None else "FP_mask"
344
+ labs_fc = []
345
+ if st_expanded is not None:
346
+ labs_fc.append("FC")
347
+ hkldata.df[labs_fc[-1]] = utils.model.calc_fc_fft(st_expanded, args.resolution - 1e-6, source=args.source,
348
+ miller_array=hkldata.miller_array())
349
+ if args.mask_model and mask is not None:
350
+ if args.b_before_mask is None:
351
+ normalizer = 1.
352
+ else:
353
+ normalizer = hkldata.debye_waller_factors(b_iso=args.b_before_mask)
354
+ g = hkldata.fft_map(data=hkldata.df[labs_fc[-1]] / normalizer, grid_size=mask.shape)
355
+ g.array[:] *= mask
356
+ fg = gemmi.transform_map_to_f_phi(g)
357
+ hkldata.df[labs_fc[-1]] = fg.get_value_by_hkl(hkldata.miller_array()) * normalizer
358
+
359
+ hkldata.setup_relion_binning("stat")
360
+ stats = calc_fsc_all(hkldata, labs_fc=labs_fc, lab_f=lab_f,
361
+ labs_half=labs_half_masked if mask is not None else labs_half,
362
+ labs_half_nomask=labs_half, mask=mask)
363
+ with open(args.fsc_out, "w") as ofs:
364
+ if args.mask:
365
+ ofs.write("# Mask= {}\n".format(args.mask))
366
+ if args.model is not None:
367
+ ofs.write("# {} from {}\n".format(labs_fc[0], args.model))
368
+
369
+ ofs.write(stats.to_string(index=False, index_names=False)+"\n")
370
+ for k in stats:
371
+ if k.startswith("fsc_FC_"):
372
+ logger.writeln("# FSCaverage of {} = {:.4f}".format(k, fsc_average(stats.ncoeffs, stats[k])), fs=ofs)
373
+ if k.startswith("Rcmplx_FC_"):
374
+ logger.writeln("# Average of {} = {:.4f}".format(k, fsc_average(stats.ncoeffs, stats[k])), fs=ofs)
375
+
376
+ logger.writeln("Data file: {}".format(args.fsc_out))
377
+
378
+ if args.csv:
379
+ csv_out = os.path.splitext(args.fsc_out)[0] + ".csv"
380
+ stats.to_csv(csv_out)
381
+ logger.writeln("CSV file: {}".format(csv_out))
382
+
383
+ log_out = os.path.splitext(args.fsc_out)[0] + ".log"
384
+ write_loggraph(stats, labs_fc, log_out)
385
+ logger.writeln("Run loggraph {} to see plots.".format(log_out))
386
+ # main()
387
+
388
+ if __name__ == "__main__":
389
+ import sys
390
+ args = parse_args(sys.argv[1:])
391
+ main(args)
@@ -0,0 +1,197 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ import gemmi
10
+ import numpy
11
+ import pandas
12
+ import os
13
+ import json
14
+ import argparse
15
+ from servalcat.utils import logger
16
+ from servalcat import utils
17
+
18
+ def add_arguments(parser):
19
+ parser.description = 'Calculate real space local correlation map from half maps and model'
20
+ parser.add_argument("--halfmaps", required=True, nargs=2,
21
+ help="Input half map files")
22
+ parser.add_argument('--pixel_size', type=float,
23
+ help='Override pixel size (A)')
24
+ group = parser.add_mutually_exclusive_group(required=True)
25
+ group.add_argument("--kernel", type=int,
26
+ help="Kernel radius in pixel")
27
+ group.add_argument("--kernel_ang", type=float,
28
+ help="Kernel radius in Angstrom (hard sphere)")
29
+ parser.add_argument('--mask',
30
+ help="mask file")
31
+ parser.add_argument('--model',
32
+ help='Input atomic model file')
33
+ parser.add_argument('--resolution', type=float,
34
+ help='default: nyquist resolution')
35
+ parser.add_argument("-s", "--source", choices=["electron", "xray", "neutron", "custom"], default="electron")
36
+ parser.add_argument("--trim", action='store_true', help="Write trimmed map")
37
+ parser.add_argument('-o', '--output_prefix', default="ccmap",
38
+ help="default: %(default)s")
39
+ # add_arguments()
40
+
41
+ def parse_args(arg_list):
42
+ parser = argparse.ArgumentParser()
43
+ add_arguments(parser)
44
+ return parser.parse_args(arg_list)
45
+ # parse_args()
46
+
47
+ def setup_coeffs_for_halfmap_cc(maps, d_min, mask=None, st=None):
48
+ hkldata = utils.maps.mask_and_fft_maps(maps, d_min, mask)
49
+ hkldata.setup_relion_binning("ml")
50
+ utils.maps.calc_noise_var_from_halfmaps(hkldata)
51
+
52
+ nref = len(hkldata.df.index)
53
+ F1w = numpy.zeros(nref, dtype=complex)
54
+ F2w = numpy.zeros(nref, dtype=complex)
55
+ F1 = hkldata.df.F_map1.to_numpy()
56
+ F2 = hkldata.df.F_map2.to_numpy()
57
+
58
+ logger.writeln("Calculating weights for half map correlation.")
59
+ logger.writeln(" weight = sqrt(FSChalf / (2*var_noise + var_signal))")
60
+ hkldata.binned_df["ml"]["w2_half_varsignal"] = 0.
61
+ for i_bin, idxes in hkldata.binned("ml"):
62
+ fscfull = hkldata.binned_df["ml"].FSCfull[i_bin]
63
+ if fscfull < 0:
64
+ break # stop here so that higher resolution are all zero
65
+ fsc = fscfull / (2 - fscfull)
66
+ var_fo = 2 * hkldata.binned_df["ml"].var_noise[i_bin] + hkldata.binned_df["ml"].var_signal[i_bin]
67
+ w = numpy.sqrt(fsc / var_fo)
68
+ hkldata.binned_df["ml"].loc[i_bin, "w2_half_varsignal"] = fsc / var_fo * hkldata.binned_df["ml"].var_signal[i_bin]
69
+ F1w[idxes] = F1[idxes] * w
70
+ F2w[idxes] = F2[idxes] * w
71
+
72
+ hkldata.df["F_map1w"] = F1w
73
+ hkldata.df["F_map2w"] = F2w
74
+
75
+ return hkldata
76
+ # setup_coeffs_for_halfmap_cc()
77
+
78
+ def add_coeffs_for_model_cc(hkldata, st, source="electron"):
79
+ hkldata.df["FC"] = utils.model.calc_fc_fft(st, d_min=hkldata.d_min_max()[0]-1e-6,
80
+ source=source, miller_array=hkldata.miller_array())
81
+ nref = len(hkldata.df.index)
82
+ FCw = numpy.zeros(nref, dtype=complex)
83
+ FPw = numpy.zeros(nref, dtype=complex)
84
+ FP = hkldata.df.FP.to_numpy()
85
+ FC = hkldata.df.FC.to_numpy()
86
+
87
+ logger.writeln("Calculating weights for map-model correlation.")
88
+ logger.writeln(" weight for Fo = sqrt(FSCfull / var(Fo))")
89
+ logger.writeln(" weight for Fc = sqrt(FSCfull / var(Fc))")
90
+ hkldata.binned_df["ml"]["w_mapmodel_c"] = 0.
91
+ hkldata.binned_df["ml"]["w_mapmodel_o"] = 0.
92
+ hkldata.binned_df["ml"]["var_fc"] = 0.
93
+ for i_bin, idxes in hkldata.binned("ml"):
94
+ fscfull = hkldata.binned_df["ml"].FSCfull[i_bin]
95
+ if fscfull < 0: break
96
+ var_fc = numpy.var(FC[idxes])
97
+ wc = numpy.sqrt(fscfull / var_fc)
98
+ wo = numpy.sqrt(fscfull / numpy.var(FP[idxes]))
99
+ FCw[idxes] = FC[idxes] * wc
100
+ FPw[idxes] = FP[idxes] * wo
101
+ hkldata.binned_df["ml"].loc[i_bin, "w_mapmodel_c"] = wc
102
+ hkldata.binned_df["ml"].loc[i_bin, "w_mapmodel_o"] = wo
103
+ hkldata.binned_df["ml"].loc[i_bin, "var_fc"] = var_fc
104
+
105
+ hkldata.df["FPw"] = FPw
106
+ hkldata.df["FCw"] = FCw
107
+ # add_coeffs_for_model_cc()
108
+
109
+ def model_stats(st, modelcc_map, halfcc_map, loggraph_out=None, json_out=None):
110
+ tmp = dict(chain=[], seqid=[], resn=[], CC_mapmodel=[], CC_halfmap=[])
111
+ for chain in st[0]:
112
+ for res in chain:
113
+ mm = numpy.mean([modelcc_map.interpolate_value(atom.pos) for atom in res])
114
+ hc = numpy.mean([halfcc_map.interpolate_value(atom.pos) for atom in res])
115
+ tmp["chain"].append(chain.name)
116
+ tmp["seqid"].append(str(res.seqid))
117
+ tmp["resn"].append(res.name)
118
+ tmp["CC_mapmodel"].append(mm)
119
+ tmp["CC_halfmap"].append(hc)
120
+
121
+ df = pandas.DataFrame(tmp)
122
+ df["sqrt_CC_full"] = numpy.sqrt(2 * df.CC_halfmap / (1 + df.CC_halfmap))
123
+ if loggraph_out is not None:
124
+ with open(loggraph_out, "w") as ofs:
125
+ for c, g in df.groupby("chain", sort=False):
126
+ ofs.write("$TABLE: Chain {} :".format(c))
127
+ ofs.write("""
128
+ $GRAPHS
129
+ : average correlations :A:2,4,5,6:
130
+ $$
131
+ chain seqid resn CC(map,model) CC_half sqrt(CC_full)
132
+ $$
133
+ $$
134
+ """)
135
+ ofs.write(g.to_string(header=False, index=False))
136
+ ofs.write("\n\n")
137
+ if json_out is not None:
138
+ df.to_json(json_out, orient="records", indent=2)
139
+ return df
140
+ # model_stats()
141
+
142
+ def main(args):
143
+ maps = utils.fileio.read_halfmaps(args.halfmaps, pixel_size=args.pixel_size)
144
+ grid_shape = maps[0][0].shape
145
+ if args.mask:
146
+ mask = utils.fileio.read_ccp4_map(args.mask)[0]
147
+ else:
148
+ mask = None
149
+
150
+ if args.resolution is None:
151
+ d_min = utils.maps.nyquist_resolution(maps[0][0])
152
+ else:
153
+ d_min = args.resolution
154
+
155
+ hkldata = setup_coeffs_for_halfmap_cc(maps, d_min, mask)
156
+ if args.kernel is None:
157
+ prefix = "{}_r{}A".format(args.output_prefix, args.kernel_ang)
158
+ knl = hkldata.hard_sphere_kernel(r_ang=args.kernel_ang, grid_size=grid_shape)
159
+ else:
160
+ prefix = "{}_r{}px".format(args.output_prefix, args.kernel)
161
+ knl = utils.maps.raised_cosine_kernel(args.kernel)
162
+
163
+ halfcc_map = utils.maps.local_cc(hkldata.fft_map("F_map1w", grid_size=grid_shape),
164
+ hkldata.fft_map("F_map2w", grid_size=grid_shape),
165
+ knl, method="simple" if args.kernel is None else "scipy")
166
+
167
+ halfcc_map_in_mask = halfcc_map.array[mask.array>0.5] if mask is not None else halfcc_map
168
+ logger.writeln("Half map CC: min/max= {:.4f} {:.4f}".format(numpy.min(halfcc_map_in_mask), numpy.max(halfcc_map_in_mask)))
169
+ utils.maps.write_ccp4_map(prefix+"_half.mrc", halfcc_map, hkldata.cell, hkldata.sg,
170
+ mask_for_extent=mask if args.trim else None)
171
+
172
+ if args.model:
173
+ st = utils.fileio.read_structure(args.model)
174
+ utils.model.remove_charge([st])
175
+ ccu = utils.model.CustomCoefUtil()
176
+ if args.source == "custom":
177
+ ccu.read_from_cif(st, args.model)
178
+ ccu.show_info()
179
+ ccu.set_coeffs(st)
180
+ utils.model.expand_ncs(st)
181
+ st.cell = hkldata.cell
182
+ st.spacegroup_hm = hkldata.sg.xhm()
183
+ add_coeffs_for_model_cc(hkldata, st, args.source)
184
+ modelcc_map = utils.maps.local_cc(hkldata.fft_map("FPw", grid_size=grid_shape),
185
+ hkldata.fft_map("FCw", grid_size=grid_shape),
186
+ knl, method="simple" if args.kernel is None else "scipy")
187
+ modelcc_map_in_mask = modelcc_map.array[mask.array>0.5] if mask is not None else modelcc_map
188
+ logger.writeln("Model-map CC: min/max= {:.4f} {:.4f}".format(numpy.min(modelcc_map_in_mask), numpy.max(modelcc_map_in_mask)))
189
+ utils.maps.write_ccp4_map(prefix+"_model.mrc", modelcc_map, hkldata.cell, hkldata.sg,
190
+ mask_for_extent=mask if args.trim else None)
191
+ model_stats(st, modelcc_map, halfcc_map, loggraph_out=prefix+"_byresidue.log", json_out=prefix+"_byresidue.json")
192
+ # main()
193
+
194
+ if __name__ == "__main__":
195
+ import sys
196
+ args = parse_args(sys.argv[1:])
197
+ main(args)