servalcat 0.4.99__cp310-cp310-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-310-darwin.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +906 -0
  7. servalcat/refine/refine_geom.py +233 -0
  8. servalcat/refine/refine_spa.py +366 -0
  9. servalcat/refine/refine_xtal.py +281 -0
  10. servalcat/refine/spa.py +144 -0
  11. servalcat/refine/xtal.py +276 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +639 -0
  15. servalcat/refmac/refmac_wrapper.py +395 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +479 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +977 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1547 -0
  27. servalcat/utils/fileio.py +744 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +714 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +782 -0
  33. servalcat/utils/refmac.py +760 -0
  34. servalcat/utils/restraints.py +781 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +258 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1644 -0
  40. servalcat/xtal/twin.py +121 -0
  41. servalcat-0.4.99.dist-info/METADATA +55 -0
  42. servalcat-0.4.99.dist-info/RECORD +45 -0
  43. servalcat-0.4.99.dist-info/WHEEL +5 -0
  44. servalcat-0.4.99.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.99.dist-info/licenses/LICENSE +373 -0
servalcat/spa/fsc.py ADDED
@@ -0,0 +1,385 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ import os
10
+ import gemmi
11
+ import numpy
12
+ import pandas
13
+ from servalcat.utils import logger
14
+ from servalcat import spa
15
+ from servalcat.spa.run_refmac import determine_b_before_mask
16
+ from servalcat import utils
17
+
18
+ def add_arguments(parser):
19
+ parser.description = 'FSC calculation'
20
+
21
+ parser.add_argument('--model',
22
+ help="")
23
+ group = parser.add_mutually_exclusive_group(required=True)
24
+ group.add_argument('--map',
25
+ help='Input map file(s)')
26
+ group.add_argument("--halfmaps", nargs=2)
27
+ group.add_argument('--mtz',
28
+ help='Input mtz file.')
29
+ parser.add_argument('--labin', nargs=2,
30
+ help='label (F and PHI) for mtz')
31
+ parser.add_argument('--pixel_size', type=float,
32
+ help='Override pixel size (A)')
33
+ parser.add_argument('--mask', help='Mask file')
34
+ parser.add_argument('--mask_radius',
35
+ type=float, default=3,
36
+ help='calculate mask from model if provided')
37
+ parser.add_argument('--mask_soft_edge',
38
+ type=float, default=0,
39
+ help='Add soft edge to model mask.')
40
+ parser.add_argument('--mask_model', action='store_true',
41
+ help='Apply mask to model density')
42
+ parser.add_argument("--b_before_mask", type=float,
43
+ help="when model-based mask is used: sharpening B value for sharpen-mask-unsharpen procedure. By default it is determined automatically.")
44
+ parser.add_argument('--no_sharpen_before_mask', action='store_true',
45
+ help='when model-based mask is used: by default half maps are sharpened before masking by std of signal and unsharpened after masking. This option disables it.')
46
+ utils.symmetry.add_symmetry_args(parser) # add --pg etc
47
+ parser.add_argument('-d', '--resolution',
48
+ type=float,
49
+ help='Default: Nyquist')
50
+ parser.add_argument('--random_seed', type=float, default=1234,
51
+ help="random seed for phase randomized FSC")
52
+ parser.add_argument('-o', '--fsc_out',
53
+ default="fsc.dat",
54
+ help='')
55
+ parser.add_argument('--csv', action='store_true',
56
+ help="Write csv file")
57
+ parser.add_argument('--keep_charges', action='store_true',
58
+ help="Use scattering factor for charged atoms. Use it with care.")
59
+
60
+ # add_arguments()
61
+
62
+ def parse_args(arg_list):
63
+ parser = argparse.ArgumentParser()
64
+ add_arguments(parser)
65
+ return parser.parse_args(arg_list)
66
+ # parse_args()
67
+
68
+ def write_loggraph(stats, labs_fc, log_out):
69
+ model_labs1 = [l for l in stats if any(l.startswith("fsc_"+fc) for fc in labs_fc)]
70
+ model_labs2 = [l for l in stats if any(l.startswith(("cc_"+fc, "mcos_"+fc)) for fc in labs_fc)]
71
+ power_labs = [l for l in stats if l.startswith("power_")]
72
+ half_labs1 = [l for l in ("fsc_half_unmasked", "fsc_half_masked", "fsc_half_masked_rand", "fsc_half_masked_corrected") if l in stats]
73
+ half_labs2 = [l for l in ("cc_half", "mcos_half") if l in stats]
74
+ if not half_labs1 and "fsc_half" in stats:
75
+ half_labs1 = ["fsc_half"]
76
+
77
+ stats2 = stats.copy()
78
+ stats2.insert(0, "bin", stats.index)
79
+ for l in power_labs: stats2[l] = numpy.log(stats2[l])
80
+ title_labs = []
81
+ if half_labs1:
82
+ title_labs.append(("Phase randomized FSC" if len(half_labs1) > 1 else "Half map FSC",
83
+ half_labs1))
84
+ if half_labs2:
85
+ title_labs.append(("Half map amplitude CC and Mean(cos(dphi))",
86
+ half_labs2))
87
+ if model_labs1:
88
+ title_labs.append(("Map-model FSC", model_labs1))
89
+ if model_labs2:
90
+ title_labs.append(("Map-model amplitude CC and Mean(cos(dphi))", model_labs2))
91
+ if power_labs:
92
+ title_labs.append(("log(Power)", power_labs))
93
+
94
+ title_labs.append(("number of Fourier coefficients", ["ncoeffs"]))
95
+ with open(log_out, "w") as ofs:
96
+ ofs.write(utils.make_loggraph_str(stats2, main_title="FSC", title_labs=title_labs,
97
+ s2=1./stats2["d_min"]**2))
98
+ # write_loggraph()
99
+
100
+ def fsc_average(n, fsc):
101
+ return numpy.nansum(n * fsc) / numpy.nansum(n)
102
+ # fsc_average()
103
+
104
+ def randomized_f(f):
105
+ phase = numpy.random.uniform(0, 2, size=len(f)) * numpy.pi
106
+ rf = numpy.abs(f) * (numpy.cos(phase) + 1j*numpy.sin(phase))
107
+ return rf
108
+ # randomized_f()
109
+
110
+ def calc_fsc(hkldata, labs=None, fs=None):
111
+ if labs is not None:
112
+ assert len(labs) == 2
113
+ fs = [hkldata.df[l].to_numpy() for l in labs]
114
+ else:
115
+ assert fs is not None and len(fs) == 2
116
+ ret = []
117
+ for i_bin, idxes in hkldata.binned():
118
+ F1, F2 = fs[0][idxes], fs[1][idxes]
119
+ fsc = numpy.real(numpy.corrcoef(F1, F2)[1,0])
120
+ ret.append(fsc)
121
+ return ret
122
+ # calc_fsc()
123
+
124
+ def calc_phase_randomized_fsc(hkldata, mask, labs_half, labs_half_masked, randomize_fsc_at=0.8):
125
+ stats = hkldata.binned_df[["d_min", "d_max"]].copy()
126
+ stats["fsc_half_unmasked"] = calc_fsc(hkldata, labs=labs_half)
127
+ stats["fsc_half_masked"] = calc_fsc(hkldata, labs=labs_half_masked)
128
+ stats["ncoeffs"] = 0
129
+
130
+ # Randomize F
131
+ f_rands = [numpy.copy(hkldata.df[labs_half[i]]) for i in range(2)]
132
+ rand_start_bin = None
133
+ for i_bin, idxes in hkldata.binned():
134
+ stats.loc[i_bin, "ncoeffs"] = len(idxes)
135
+ fsc_half = stats["fsc_half_unmasked"][i_bin]
136
+ if rand_start_bin is None and fsc_half < randomize_fsc_at:
137
+ rand_start_bin = i_bin
138
+ logger.writeln(" randomize phase beyond {:.2f} A (bin {})".format(stats["d_max"][i_bin], i_bin))
139
+
140
+ if rand_start_bin is not None:
141
+ for i in range(2):
142
+ f_rands[i][idxes] = randomized_f(hkldata.df[labs_half[i]].to_numpy()[idxes])
143
+
144
+ # Multiply mask
145
+ for i in range(2):
146
+ g = hkldata.fft_map(data=f_rands[i], grid_size=mask.shape)
147
+ g.array[:] *= mask
148
+ fg = gemmi.transform_map_to_f_phi(g)
149
+ f_rands[i] = fg.get_value_by_hkl(hkldata.miller_array())
150
+
151
+ # Calc randomized fsc
152
+ stats["fsc_half_masked_rand"] = calc_fsc(hkldata, fs=f_rands)
153
+
154
+ # Calc corrected fsc
155
+ stats["fsc_half_masked_corrected"] = 0.
156
+ for i_bin in stats.index:
157
+ if i_bin < rand_start_bin + 2: # RELION way # FIXME rand_start_bin can be None
158
+ stats.loc[i_bin, "fsc_half_masked_corrected"] = stats["fsc_half_masked"][i_bin]
159
+ else:
160
+ fscn = stats["fsc_half_masked_rand"][i_bin]
161
+ fsct = stats["fsc_half_masked"][i_bin]
162
+ stats.loc[i_bin, "fsc_half_masked_corrected"] = (fsct - fscn) / (1. - fscn)
163
+
164
+ global_res = 999.
165
+ for i_bin in stats.index:
166
+ if stats["fsc_half_masked_corrected"][i_bin] < 0.143:
167
+ break
168
+ global_res = 1./(0.5*(1./stats["d_min"][i_bin]+1./stats["d_max"][i_bin])) # definition is slightly different from RELION
169
+
170
+ logger.writeln("resolution from mask corrected FSC = {:.2f} A".format(global_res))
171
+
172
+ return stats, global_res
173
+ # calc_maskphase_randomized_fsc()
174
+
175
+ def calc_fsc_all(hkldata, labs_fc, lab_f, labs_half=None,
176
+ labs_half_nomask=None, mask=None): # TODO name changed
177
+ if labs_half: assert len(labs_half) == 2
178
+ if labs_half_nomask: assert len(labs_half_nomask) == 2 # only used when mask is not None
179
+
180
+ if mask is not None and labs_half_nomask:
181
+ stats, global_res = calc_phase_randomized_fsc(hkldata, mask,
182
+ labs_half=labs_half_nomask,
183
+ labs_half_masked=labs_half)
184
+ half_fsc_done = True
185
+ else:
186
+ stats = hkldata.binned_df[["d_min", "d_max"]].copy()
187
+ half_fsc_done = False
188
+
189
+ stats["ncoeffs"] = 0
190
+ stats["power_{}".format(lab_f)] = 0.
191
+ for lab in labs_fc:
192
+ stats["power_{}".format(lab)] = 0.
193
+ stats["fsc_{}_full".format(lab)] = 0.
194
+ stats["Rcmplx_{}_full".format(lab)] = 0.
195
+ stats["cc_{}_full".format(lab)] = 0.
196
+ stats["mcos_{}_full".format(lab)] = 0.
197
+ if labs_half:
198
+ if not half_fsc_done: stats["fsc_half"] = 0.
199
+ stats["cc_half"] = 0.
200
+ stats["mcos_half"] = 0.
201
+ for lab in labs_fc:
202
+ stats["fsc_{}_half1".format(lab)] = 0.
203
+ stats["fsc_{}_half2".format(lab)] = 0.
204
+
205
+ for i_bin, idxes in hkldata.binned():
206
+ stats.loc[i_bin, "ncoeffs"] = len(idxes)
207
+ Fo = hkldata.df[lab_f].to_numpy()[idxes]
208
+ stats.loc[i_bin, "power_{}".format(lab_f)] = numpy.average(numpy.abs(Fo)**2)
209
+ if labs_half:
210
+ F1, F2 = hkldata.df[labs_half[0]].to_numpy()[idxes], hkldata.df[labs_half[1]].to_numpy()[idxes]
211
+ if not half_fsc_done: stats.loc[i_bin, "fsc_half"] = numpy.real(numpy.corrcoef(F1, F2)[1,0])
212
+ cc_half = numpy.corrcoef(numpy.abs(F1), numpy.abs(F2))[1,0]
213
+ mcos_half = numpy.mean(numpy.cos(numpy.angle(F1) - numpy.angle(F2))) # f1*f2.conj()/abs(f1)/abs(f2) is much faster, but in case zero..
214
+ stats.loc[i_bin, "cc_half"] = cc_half
215
+ stats.loc[i_bin, "mcos_half"] = mcos_half
216
+ else:
217
+ F1, F2 = None, None
218
+
219
+ for labfc in labs_fc:
220
+ Fc = hkldata.df[labfc].to_numpy()[idxes]
221
+ fsc_model = numpy.real(numpy.corrcoef(Fo, Fc)[1,0])
222
+ cc_model = numpy.corrcoef(numpy.abs(Fo), numpy.abs(Fc))[1,0]
223
+ mcos_model = numpy.mean(numpy.cos(numpy.angle(Fo) - numpy.angle(Fc)))
224
+ D = numpy.sum(numpy.real(Fo * numpy.conj(Fc)))/numpy.sum(numpy.abs(Fc)**2)
225
+ rcmplx_model = numpy.sum(numpy.abs(Fo-D*Fc))/numpy.sum(numpy.abs(Fo))
226
+ stats.loc[i_bin, "fsc_{}_full".format(labfc)] = fsc_model
227
+ stats.loc[i_bin, "cc_{}_full".format(labfc)] = cc_model
228
+ stats.loc[i_bin, "mcos_{}_full".format(labfc)] = mcos_model
229
+ stats.loc[i_bin, "Rcmplx_{}_full".format(labfc)] = rcmplx_model
230
+ stats.loc[i_bin, "power_{}".format(labfc)] = numpy.average(numpy.abs(Fc)**2)
231
+ if labs_half:
232
+ stats.loc[i_bin, "fsc_{}_half1".format(labfc)] = numpy.real(numpy.corrcoef(F1, Fc)[1,0])
233
+ stats.loc[i_bin, "fsc_{}_half2".format(labfc)] = numpy.real(numpy.corrcoef(F2, Fc)[1,0])
234
+ return stats
235
+ # calc_fsc_all()
236
+
237
+ def main(args):
238
+ if args.b_before_mask is not None and args.model is None:
239
+ raise SystemExit("--b_before_mask can be only used with --model.")
240
+
241
+ numpy.random.seed(args.random_seed)
242
+ if args.mask:
243
+ logger.writeln("Input mask file: {}".format(args.mask))
244
+ mask = utils.fileio.read_ccp4_map(args.mask)[0]
245
+ else:
246
+ mask = None
247
+
248
+ if args.halfmaps:
249
+ maps = utils.fileio.read_halfmaps(args.halfmaps, pixel_size=args.pixel_size)
250
+ unit_cell = maps[0][0].unit_cell
251
+ elif args.map:
252
+ maps = [utils.fileio.read_ccp4_map(args.map, pixel_size=args.pixel_size)]
253
+ unit_cell = maps[0][0].unit_cell
254
+ elif args.mtz:
255
+ mtz = utils.fileio.read_mmhkl(hklin)
256
+ if mask is not None and mask.unit_cell != mtz.cell:
257
+ raise SystemExit("Error: Inconsistent unit cell between mtz and mask")
258
+ gr = mtz.transform_f_phi_to_map(f=args.labin[0],
259
+ phi=args.labin[1],
260
+ exact_size=mask.shape if mask is not None else (0,0,0),
261
+ sample_rate=3 if mask is None else 0)
262
+ maps = [[gr, [0,0,0]]]
263
+ unit_cell = mtz.cell # TODO check cell of given label
264
+ d_min = numpy.min(mtz.make_d_array()[~numpy.isnan(mtz.column_with_label(args.labin[0]).array)])
265
+ if args.resolution is None:
266
+ args.resolution = d_min
267
+ elif args.resolution < d_min:
268
+ raise SystemExit("Error: --resolution ({}) is higher than actual resolution in mtz ({:.2f}).".format(args.resolution, d_min))
269
+ else:
270
+ raise SystemExit("Error: No input map/mtz found.")
271
+
272
+ if args.resolution is None:
273
+ args.resolution = utils.maps.nyquist_resolution(maps[0][0])
274
+ logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(args.resolution))
275
+
276
+ if args.model:
277
+ st = utils.fileio.read_structure(args.model)
278
+ st.cell = unit_cell
279
+ st.spacegroup_hm = "P1"
280
+ if not args.keep_charges:
281
+ utils.model.remove_charge([st])
282
+ utils.symmetry.update_ncs_from_args(args, st, map_and_start=maps[0])
283
+ st_expanded = st.clone()
284
+ if len(st.ncs) > 0:
285
+ utils.model.expand_ncs(st_expanded)
286
+ if mask is None and args.mask_radius > 0:
287
+ # XXX if helical..
288
+ if args.twist is not None:
289
+ logger.writeln("Generating all helical copies in the box")
290
+ st_for_mask = st.clone()
291
+ utils.symmetry.update_ncs_from_args(args, st_for_mask, map_and_start=maps[0], filter_contacting=True)
292
+ utils.model.expand_ncs(st_for_mask)
293
+ else:
294
+ st_for_mask = st_expanded
295
+ mask = utils.maps.mask_from_model(st_for_mask, args.mask_radius, soft_edge=args.mask_soft_edge, grid=maps[0][0])
296
+ #utils.maps.write_ccp4_map("mask_from_model.ccp4", mask)
297
+ if not args.no_sharpen_before_mask and args.b_before_mask is None:
298
+ args.b_before_mask = determine_b_before_mask(st_for_mask, maps, maps[0][1], mask, args.resolution)
299
+ else:
300
+ st_expanded = None
301
+
302
+ hkldata = None
303
+ for j in range(2):
304
+ if j == 1:
305
+ if mask is None: break
306
+ if args.b_before_mask is None:
307
+ # modifies original data
308
+ for ma in maps: ma[0].array[:] *= mask
309
+ else:
310
+ maps = utils.maps.sharpen_mask_unsharpen(maps, mask, args.resolution, b=args.b_before_mask)
311
+ lab_suffix = ["_nomask", "_mask"][j]
312
+ for i, m in enumerate(maps):
313
+ if len(maps) == 2:
314
+ lab = "F_map{}".format(i+1)
315
+ else:
316
+ lab = "FP"
317
+ f_grid = gemmi.transform_map_to_f_phi(m[0])
318
+ if hkldata is None:
319
+ asudata = f_grid.prepare_asu_data(dmin=args.resolution, with_000=True)
320
+ hkldata = utils.hkl.hkldata_from_asu_data(asudata, lab + lab_suffix)
321
+ else:
322
+ hkldata.df[lab + lab_suffix] = f_grid.get_value_by_hkl(hkldata.miller_array())
323
+
324
+ if len(maps) == 2:
325
+ hkldata.df["FP_nomask"] = (hkldata.df.F_map1_nomask + hkldata.df.F_map2_nomask) * 0.5
326
+ if mask is not None:
327
+ hkldata.df["FP_mask"] = (hkldata.df.F_map1_mask + hkldata.df.F_map2_mask) * 0.5
328
+
329
+ if len(maps) == 2:
330
+ labs_half = ["F_map1_nomask", "F_map2_nomask"]
331
+ if mask is not None:
332
+ labs_half_masked = ["F_map1_mask", "F_map2_mask"]
333
+ else:
334
+ labs_half_masked = []
335
+ else:
336
+ labs_half, labs_half_masked = [], []
337
+ lab_f = "FP_nomask" if mask is None else "FP_mask"
338
+ labs_fc = []
339
+ if st_expanded is not None:
340
+ labs_fc.append("FC")
341
+ hkldata.df[labs_fc[-1]] = utils.model.calc_fc_fft(st_expanded, args.resolution - 1e-6, source="electron",
342
+ miller_array=hkldata.miller_array())
343
+ if args.mask_model and mask is not None:
344
+ if args.b_before_mask is None:
345
+ normalizer = 1.
346
+ else:
347
+ normalizer = hkldata.debye_waller_factors(b_iso=args.b_before_mask)
348
+ g = hkldata.fft_map(data=hkldata.df[labs_fc[-1]] / normalizer, grid_size=mask.shape)
349
+ g.array[:] *= mask
350
+ fg = gemmi.transform_map_to_f_phi(g)
351
+ hkldata.df[labs_fc[-1]] = fg.get_value_by_hkl(hkldata.miller_array()) * normalizer
352
+
353
+ hkldata.setup_relion_binning()
354
+ stats = calc_fsc_all(hkldata, labs_fc=labs_fc, lab_f=lab_f,
355
+ labs_half=labs_half_masked if mask is not None else labs_half,
356
+ labs_half_nomask=labs_half, mask=mask)
357
+ with open(args.fsc_out, "w") as ofs:
358
+ if args.mask:
359
+ ofs.write("# Mask= {}\n".format(args.mask))
360
+ if args.model is not None:
361
+ ofs.write("# {} from {}\n".format(labs_fc[0], args.model))
362
+
363
+ ofs.write(stats.to_string(index=False, index_names=False)+"\n")
364
+ for k in stats:
365
+ if k.startswith("fsc_FC_"):
366
+ logger.writeln("# FSCaverage of {} = {:.4f}".format(k, fsc_average(stats.ncoeffs, stats[k])), fs=ofs)
367
+ if k.startswith("Rcmplx_FC_"):
368
+ logger.writeln("# Average of {} = {:.4f}".format(k, fsc_average(stats.ncoeffs, stats[k])), fs=ofs)
369
+
370
+ logger.writeln("Data file: {}".format(args.fsc_out))
371
+
372
+ if args.csv:
373
+ csv_out = os.path.splitext(args.fsc_out)[0] + ".csv"
374
+ stats.to_csv(csv_out)
375
+ logger.writeln("CSV file: {}".format(csv_out))
376
+
377
+ log_out = os.path.splitext(args.fsc_out)[0] + ".log"
378
+ write_loggraph(stats, labs_fc, log_out)
379
+ logger.writeln("Run loggraph {} to see plots.".format(log_out))
380
+ # main()
381
+
382
+ if __name__ == "__main__":
383
+ import sys
384
+ args = parse_args(sys.argv[1:])
385
+ main(args)
@@ -0,0 +1,188 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ import gemmi
10
+ import numpy
11
+ import pandas
12
+ import os
13
+ import argparse
14
+ from servalcat.utils import logger
15
+ from servalcat import utils
16
+
17
+ def add_arguments(parser):
18
+ parser.description = 'Calculate real space local correlation map from half maps and model'
19
+ parser.add_argument("--halfmaps", required=True, nargs=2,
20
+ help="Input half map files")
21
+ parser.add_argument('--pixel_size', type=float,
22
+ help='Override pixel size (A)')
23
+ group = parser.add_mutually_exclusive_group(required=True)
24
+ group.add_argument("--kernel", type=int,
25
+ help="Kernel radius in pixel")
26
+ group.add_argument("--kernel_ang", type=float,
27
+ help="Kernel radius in Angstrom (hard sphere)")
28
+ parser.add_argument('--mask',
29
+ help="mask file")
30
+ parser.add_argument('--model',
31
+ help='Input atomic model file')
32
+ parser.add_argument('--resolution', type=float,
33
+ help='default: nyquist resolution')
34
+ parser.add_argument("--trim", action='store_true', help="Write trimmed map")
35
+ parser.add_argument('-o', '--output_prefix', default="ccmap",
36
+ help="default: %(default)s")
37
+ # add_arguments()
38
+
39
+ def parse_args(arg_list):
40
+ parser = argparse.ArgumentParser()
41
+ add_arguments(parser)
42
+ return parser.parse_args(arg_list)
43
+ # parse_args()
44
+
45
+ def setup_coeffs_for_halfmap_cc(maps, d_min, mask=None, st=None):
46
+ hkldata = utils.maps.mask_and_fft_maps(maps, d_min, mask)
47
+ hkldata.setup_relion_binning()
48
+ utils.maps.calc_noise_var_from_halfmaps(hkldata)
49
+
50
+ nref = len(hkldata.df.index)
51
+ F1w = numpy.zeros(nref, dtype=complex)
52
+ F2w = numpy.zeros(nref, dtype=complex)
53
+ F1 = hkldata.df.F_map1.to_numpy()
54
+ F2 = hkldata.df.F_map2.to_numpy()
55
+
56
+ logger.writeln("Calculating weights for half map correlation.")
57
+ logger.writeln(" weight = sqrt(FSChalf / (2*var_noise + var_signal))")
58
+ hkldata.binned_df["w2_half_varsignal"] = 0.
59
+ for i_bin, idxes in hkldata.binned():
60
+ fscfull = hkldata.binned_df.FSCfull[i_bin]
61
+ if fscfull < 0:
62
+ break # stop here so that higher resolution are all zero
63
+ fsc = fscfull / (2 - fscfull)
64
+ var_fo = 2 * hkldata.binned_df.var_noise[i_bin] + hkldata.binned_df.var_signal[i_bin]
65
+ w = numpy.sqrt(fsc / var_fo)
66
+ hkldata.binned_df.loc[i_bin, "w2_half_varsignal"] = fsc / var_fo * hkldata.binned_df.var_signal[i_bin]
67
+ F1w[idxes] = F1[idxes] * w
68
+ F2w[idxes] = F2[idxes] * w
69
+
70
+ hkldata.df["F_map1w"] = F1w
71
+ hkldata.df["F_map2w"] = F2w
72
+
73
+ return hkldata
74
+ # setup_coeffs_for_halfmap_cc()
75
+
76
+ def add_coeffs_for_model_cc(hkldata, st):
77
+ hkldata.df["FC"] = utils.model.calc_fc_fft(st, d_min=hkldata.d_min_max()[0]-1e-6,
78
+ source="electron", miller_array=hkldata.miller_array())
79
+ nref = len(hkldata.df.index)
80
+ FCw = numpy.zeros(nref, dtype=complex)
81
+ FPw = numpy.zeros(nref, dtype=complex)
82
+ FP = hkldata.df.FP.to_numpy()
83
+ FC = hkldata.df.FC.to_numpy()
84
+
85
+ logger.writeln("Calculating weights for map-model correlation.")
86
+ logger.writeln(" weight for Fo = sqrt(FSCfull / var(Fo))")
87
+ logger.writeln(" weight for Fc = sqrt(FSCfull / var(Fc))")
88
+ hkldata.binned_df["w_mapmodel_c"] = 0.
89
+ hkldata.binned_df["w_mapmodel_o"] = 0.
90
+ hkldata.binned_df["var_fc"] = 0.
91
+ for i_bin, idxes in hkldata.binned():
92
+ fscfull = hkldata.binned_df.FSCfull[i_bin]
93
+ if fscfull < 0: break
94
+ var_fc = numpy.var(FC[idxes])
95
+ wc = numpy.sqrt(fscfull / var_fc)
96
+ wo = numpy.sqrt(fscfull / numpy.var(FP[idxes]))
97
+ FCw[idxes] = FC[idxes] * wc
98
+ FPw[idxes] = FP[idxes] * wo
99
+ hkldata.binned_df.loc[i_bin, "w_mapmodel_c"] = wc
100
+ hkldata.binned_df.loc[i_bin, "w_mapmodel_o"] = wo
101
+ hkldata.binned_df.loc[i_bin, "var_fc"] = var_fc
102
+
103
+ hkldata.df["FPw"] = FPw
104
+ hkldata.df["FCw"] = FCw
105
+ # add_coeffs_for_model_cc()
106
+
107
+ def model_stats(st, modelcc_map, halfcc_map, loggraph_out=None):
108
+ tmp = dict(chain=[], seqid=[], resn=[], CC_mapmodel=[], CC_halfmap=[])
109
+ for chain in st[0]:
110
+ for res in chain:
111
+ mm = numpy.mean([modelcc_map.interpolate_value(atom.pos) for atom in res])
112
+ hc = numpy.mean([halfcc_map.interpolate_value(atom.pos) for atom in res])
113
+ tmp["chain"].append(chain.name)
114
+ tmp["seqid"].append(str(res.seqid))
115
+ tmp["resn"].append(res.name)
116
+ tmp["CC_mapmodel"].append(mm)
117
+ tmp["CC_halfmap"].append(hc)
118
+
119
+ df = pandas.DataFrame(tmp)
120
+ df["sqrt_CC_full"] = numpy.sqrt(2 * df.CC_halfmap / (1 + df.CC_halfmap))
121
+ if loggraph_out is not None:
122
+ with open(loggraph_out, "w") as ofs:
123
+ for c, g in df.groupby("chain", sort=False):
124
+ ofs.write("$TABLE: Chain {} :".format(c))
125
+ ofs.write("""
126
+ $GRAPHS
127
+ : average correlations :A:2,4,5,6:
128
+ $$
129
+ chain seqid resn CC(map,model) CC_half sqrt(CC_full)
130
+ $$
131
+ $$
132
+ """)
133
+ ofs.write(g.to_string(header=False, index=False))
134
+ ofs.write("\n\n")
135
+ return df
136
+ # model_stats()
137
+
138
+ def main(args):
139
+ maps = utils.fileio.read_halfmaps(args.halfmaps, pixel_size=args.pixel_size)
140
+ grid_shape = maps[0][0].shape
141
+ if args.mask:
142
+ mask = utils.fileio.read_ccp4_map(args.mask)[0]
143
+ else:
144
+ mask = None
145
+
146
+ if args.resolution is None:
147
+ d_min = utils.maps.nyquist_resolution(maps[0][0])
148
+ else:
149
+ d_min = args.resolution
150
+
151
+ hkldata = setup_coeffs_for_halfmap_cc(maps, d_min, mask)
152
+ if args.kernel is None:
153
+ prefix = "{}_r{}A".format(args.output_prefix, args.kernel_ang)
154
+ knl = hkldata.hard_sphere_kernel(r_ang=args.kernel_ang, grid_size=grid_shape)
155
+ else:
156
+ prefix = "{}_r{}px".format(args.output_prefix, args.kernel)
157
+ knl = utils.maps.raised_cosine_kernel(args.kernel)
158
+
159
+ halfcc_map = utils.maps.local_cc(hkldata.fft_map("F_map1w", grid_size=grid_shape),
160
+ hkldata.fft_map("F_map2w", grid_size=grid_shape),
161
+ knl, method="simple" if args.kernel is None else "scipy")
162
+
163
+ halfcc_map_in_mask = halfcc_map.array[mask.array>0.5] if mask is not None else halfcc_map
164
+ logger.writeln("Half map CC: min/max= {:.4f} {:.4f}".format(numpy.min(halfcc_map_in_mask), numpy.max(halfcc_map_in_mask)))
165
+ utils.maps.write_ccp4_map(prefix+"_half.mrc", halfcc_map, hkldata.cell, hkldata.sg,
166
+ mask_for_extent=mask if args.trim else None)
167
+
168
+ if args.model:
169
+ st = utils.fileio.read_structure(args.model)
170
+ utils.model.remove_charge([st])
171
+ utils.model.expand_ncs(st)
172
+ st.cell = hkldata.cell
173
+ st.spacegroup_hm = hkldata.sg.xhm()
174
+ add_coeffs_for_model_cc(hkldata, st)
175
+ modelcc_map = utils.maps.local_cc(hkldata.fft_map("FPw", grid_size=grid_shape),
176
+ hkldata.fft_map("FCw", grid_size=grid_shape),
177
+ knl, method="simple" if args.kernel is None else "scipy")
178
+ modelcc_map_in_mask = modelcc_map.array[mask.array>0.5] if mask is not None else modelcc_map
179
+ logger.writeln("Model-map CC: min/max= {:.4f} {:.4f}".format(numpy.min(modelcc_map_in_mask), numpy.max(modelcc_map_in_mask)))
180
+ utils.maps.write_ccp4_map(prefix+"_model.mrc", modelcc_map, hkldata.cell, hkldata.sg,
181
+ mask_for_extent=mask if args.trim else None)
182
+ model_stats(st, modelcc_map, halfcc_map, loggraph_out=prefix+"_byresidue.log")
183
+ # main()
184
+
185
+ if __name__ == "__main__":
186
+ import sys
187
+ args = parse_args(sys.argv[1:])
188
+ main(args)