servalcat 0.4.60__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (44) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cp312-win_amd64.pyd +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +733 -0
  7. servalcat/refine/refine_geom.py +207 -0
  8. servalcat/refine/refine_spa.py +327 -0
  9. servalcat/refine/refine_xtal.py +242 -0
  10. servalcat/refine/spa.py +132 -0
  11. servalcat/refine/xtal.py +227 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +536 -0
  15. servalcat/refmac/refmac_wrapper.py +360 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +462 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +961 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1277 -0
  27. servalcat/utils/fileio.py +745 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +699 -0
  30. servalcat/utils/logger.py +116 -0
  31. servalcat/utils/maps.py +340 -0
  32. servalcat/utils/model.py +774 -0
  33. servalcat/utils/refmac.py +747 -0
  34. servalcat/utils/restraints.py +605 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +250 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1403 -0
  40. servalcat-0.4.60.dist-info/METADATA +56 -0
  41. servalcat-0.4.60.dist-info/RECORD +44 -0
  42. servalcat-0.4.60.dist-info/WHEEL +5 -0
  43. servalcat-0.4.60.dist-info/entry_points.txt +4 -0
  44. servalcat-0.4.60.dist-info/licenses/LICENSE +373 -0
servalcat/utils/hkl.py ADDED
@@ -0,0 +1,699 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ import numpy
10
+ import numpy.lib.recfunctions
11
+ import scipy.optimize
12
+ import pandas
13
+ import gemmi
14
+ from servalcat.utils import logger
15
+
16
+ dtypes64 = dict(i=numpy.int64, u=numpy.uint64, f=numpy.float64, c=numpy.complex128)
17
+ to64 = lambda x: x.astype(dtypes64.get(x.dtype.kind, x.dtype))
18
+
19
+ def r_factor(fo, fc):
20
+ if fo.size == 0:
21
+ return numpy.nan
22
+ return numpy.nansum(numpy.abs(fo-fc)) / numpy.nansum(fo)
23
+ def correlation(obs, calc):
24
+ if obs.size == 0:
25
+ return numpy.nan
26
+ sel = numpy.isfinite(obs)
27
+ return numpy.corrcoef(obs[sel], calc[sel])[0,1]
28
+
29
+ def df_from_asu_data(asu_data, label):
30
+ df = pandas.DataFrame(data=asu_data.miller_array,
31
+ columns=["H","K","L"])
32
+ if asu_data.value_array.dtype.names == ('value', 'sigma'):
33
+ df[label] = to64(asu_data.value_array["value"])
34
+ df["SIG"+label] = to64(asu_data.value_array["sigma"])
35
+ else:
36
+ df[label] = to64(asu_data.value_array)
37
+ return df
38
+
39
+ def df_from_raw(miller_array, value_array, label):
40
+ df = pandas.DataFrame(data=miller_array,
41
+ columns=["H","K","L"])
42
+ df[label] = to64(value_array)
43
+ return df
44
+
45
+ def hkldata_from_asu_data(asu_data, label):
46
+ df = df_from_asu_data(asu_data, label)
47
+ return HklData(asu_data.unit_cell, asu_data.spacegroup, df)
48
+ # hkldata_from_asu_data()
49
+
50
+ def mtz_find_data_columns(mtz, require_sigma=True):
51
+ # for now (+)/(-) (types K/M and L/G) are not supported
52
+ col_types = {x.label:x.type for x in mtz.columns}
53
+ ret = {"J": [], "F": [], "K": [], "G": []}
54
+ for col in col_types:
55
+ typ = col_types[col]
56
+ if typ in ("J", "F"):
57
+ ret[typ].append([col])
58
+ sig = "SIG" + col
59
+ if col_types.get(sig) == "Q":
60
+ ret[typ][-1].append(sig)
61
+ elif require_sigma:
62
+ ret[typ].pop()
63
+ elif typ in ("K", "G") and col.endswith("(+)"):
64
+ # we always need sigma - right?
65
+ col_minus = col.replace("(+)", "(-)")
66
+ sig_type = {"K": "M", "G": "L"}[typ]
67
+ if (col_types.get(col_minus) == typ and
68
+ col_types.get("SIG"+col) == sig_type and
69
+ col_types.get("SIG"+col_minus) == sig_type):
70
+ ret[typ].append([col, "SIG"+col, col_minus, "SIG"+col_minus])
71
+ return ret
72
+ # mtz_find_data_columns()
73
+
74
+ def mtz_find_free_columns(mtz):
75
+ col_types = {x.label:x.type for x in mtz.columns}
76
+ free_names = ("FREE", "RFREE", "FREER", "FreeR_flag", "R-free-flags", "FreeRflag")
77
+ ret = []
78
+ for col in col_types:
79
+ typ = col_types[col]
80
+ if typ == "I" and col in free_names:
81
+ ret.append(col)
82
+ return ret
83
+ # mtz_find_free_columns()
84
+
85
+ def hkldata_from_mtz(mtz, labels, newlabels=None, require_types=None):
86
+ assert type(mtz) == gemmi.Mtz
87
+ notfound = set(labels) - set(mtz.column_labels())
88
+ if notfound:
89
+ raise RuntimeError("MTZ columns not found: {}".format(" ".join(notfound)))
90
+ col_types = {x.label:x.type for x in mtz.columns}
91
+ if require_types:
92
+ mismatches = [l for l,r in zip(labels, require_types) if r is not None and r != col_types[l]]
93
+ if mismatches:
94
+ raise RuntimeError("MTZ column types mismatch: {}".format(" ".join(mismatches)))
95
+
96
+ df = pandas.DataFrame(data=numpy.array(mtz, copy=False), columns=mtz.column_labels())
97
+ df = df.astype({col: 'int32' for col in col_types if col_types[col] == "H"})
98
+ df = df.astype({col: 'Int64' for col in col_types if col_types[col] in ("B", "Y", "I")}) # pandas's nullable int
99
+ for lab in set(mtz.column_labels()).difference(labels+["H","K","L"]):
100
+ del df[lab]
101
+
102
+ if newlabels is not None:
103
+ assert len(newlabels) == len(labels)
104
+ for i in range(1, len(newlabels)):
105
+ if newlabels[i] == "": # means this is phase and should be transferred to previous column
106
+ assert col_types.get(labels[i]) == "P"
107
+ assert col_types.get(labels[i-1]) == "F"
108
+ ph = numpy.deg2rad(df[labels[i]])
109
+ df[labels[i-1]] = df[labels[i-1]] * (numpy.cos(ph) + 1j * numpy.sin(ph))
110
+ del df[labels[i]]
111
+
112
+ df.rename(columns={x:y for x,y in zip(labels, newlabels) if y != ""}, inplace=True)
113
+
114
+ return HklData(mtz.cell, mtz.spacegroup, df)
115
+ # hkldata_from_mtz()
116
+
117
+ def blur_mtz(mtz, B):
118
+ # modify given mtz object
119
+
120
+ s2 = mtz.make_1_d2_array()
121
+ k2 = numpy.exp(-B*s2/2)
122
+ k = numpy.exp(-B*s2/4)
123
+ i_labs = [c.label for c in mtz.columns if c.type in "JK"]
124
+ f_labs = [c.label for c in mtz.columns if c.type in "FDG"]
125
+ for labs in i_labs, f_labs:
126
+ for l in labs:
127
+ sl = "SIG"+l
128
+ if sl in mtz.column_labels(): labs.append(sl)
129
+
130
+ if i_labs:
131
+ logger.writeln("Intensities: {}".format(" ".join(i_labs)))
132
+ logger.writeln(" exp(-B*s^2/2) will be multiplied (B= {:.2f})".format(B))
133
+ if f_labs:
134
+ logger.writeln("Amplitudes: {}".format(" ".join(f_labs)))
135
+ logger.writeln(" exp(-B*s^2/4) will be multiplied (B= {:.2f})".format(B))
136
+
137
+ for l in i_labs:
138
+ c = mtz.column_with_label(l)
139
+ c.array[:] *= k2
140
+ for l in f_labs:
141
+ c = mtz.column_with_label(l)
142
+ c.array[:] *= k
143
+ # blur_mtz()
144
+
145
+ def mtz_selected(mtz, columns):
146
+ """
147
+ creates a new mtz object having specified `columns` of `mtz`
148
+ """
149
+ columns = ["H", "K", "L"] + columns # TODO make sure no duplicates
150
+ col_dict = {x.label:x for x in mtz.columns}
151
+ col_idxes = {x.label:i for i, x in enumerate(mtz.columns)}
152
+
153
+ notfound = list(set(columns) - set(col_idxes))
154
+ if notfound:
155
+ raise RuntimeError("specified columns not found: {}".format(str(notfound)))
156
+
157
+ # copy metadata
158
+ mtz2 = gemmi.Mtz()
159
+ for k in ("spacegroup", "cell", "history", "title"):
160
+ setattr(mtz2, k, getattr(mtz, k))
161
+
162
+ for ds in mtz.datasets:
163
+ ds2 = mtz2.add_dataset("")
164
+ for k in ("cell", "id", "crystal_name", "dataset_name", "project_name", "wavelength"):
165
+ setattr(ds2, k, getattr(ds, k))
166
+
167
+ # copy selected columns
168
+ for col in columns:
169
+ mtz2.add_column(col, col_dict[col].type,
170
+ dataset_id=col_dict[col].dataset_id, expand_data=False)
171
+
172
+ idxes = [col_idxes[col] for col in columns]
173
+ data = numpy.array(mtz, copy=False)[:, idxes]
174
+ mtz2.set_data(data)
175
+ return mtz2
176
+ # mtz_selected()
177
+
178
+ def decide_n_bins(n_per_bin, s_array, power=2, min_bins=1, max_bins=50):
179
+ sp = numpy.sort(s_array)**power
180
+ spmin, spmax = numpy.min(sp), numpy.max(sp)
181
+ n_bins = 1
182
+ if n_per_bin <= len(sp):
183
+ # Decide n_bins so that inner-shell has requested number
184
+ width = sp[n_per_bin - 1] - spmin
185
+ n_bins = int((spmax - spmin) / width)
186
+ if min_bins is not None:
187
+ n_bins = max(n_bins, min_bins)
188
+ if max_bins is not None:
189
+ n_bins = min(n_bins, max_bins)
190
+ return n_bins
191
+ # decide_n_bins()
192
+
193
+ class HklData:
194
+ def __init__(self, cell, sg, df=None, binned_df=None):
195
+ self.cell = cell
196
+ self.sg = sg
197
+ self.df = df
198
+ self.binned_df = binned_df
199
+ self._bin_and_indices = []
200
+ # __init__()
201
+
202
+ def update_cell(self, cell):
203
+ # update d
204
+ pass
205
+
206
+ def switch_to_asu(self):
207
+ # Need to care phases
208
+ assert not any(numpy.iscomplexobj(self.df[x]) for x in self.df)
209
+ hkl = self.miller_array().to_numpy()
210
+ self.sg.switch_to_asu(hkl)
211
+ self.df[["H","K","L"]] = hkl
212
+ # in some environment type changes to int64 even though hkl's dtype is int32
213
+ # it causes a problem in self.debye_waller_factors()
214
+ self.df = self.df.astype({x: numpy.int32 for x in "HKL"})
215
+
216
+ def copy(self, d_min=None, d_max=None):
217
+ # FIXME we should reset_index here? after resolution truncation, max(df.index) will be larger than size.
218
+ if (d_min, d_max).count(None) == 2:
219
+ df = self.df.copy()
220
+ binned_df = self.binned_df.copy() if self.binned_df is not None else None
221
+ else:
222
+ if d_min is None: d_min = 0
223
+ if d_max is None: d_max = float("inf")
224
+ d = self.d_spacings()
225
+ sel = (d >= d_min) & (d <= d_max)
226
+ df = self.df[sel].copy()
227
+ binned_df = None # no way to keep it
228
+
229
+ return HklData(self.cell, self.sg, df, binned_df)
230
+ # copy()
231
+
232
+ def selected(self, sel):
233
+ df = self.df[sel].copy()
234
+ return HklData(self.cell, self.sg, df)
235
+
236
+ def merge_asu_data(self, asu_data, label, common_only=True):
237
+ if self.df is not None and label in self.df:
238
+ raise Exception("Duplicated label")
239
+
240
+ df_tmp = df_from_asu_data(asu_data, label)
241
+
242
+ if self.df is None:
243
+ self.df = df_tmp
244
+ elif common_only:
245
+ self.df = self.df.merge(df_tmp)
246
+ else:
247
+ self.df = self.df.merge(df_tmp, how="outer")
248
+ # merge_asu_data()
249
+
250
+ def miller_array(self):
251
+ return self.df[["H","K","L"]]
252
+
253
+ def s_array(self):
254
+ hkl = self.miller_array()
255
+ return numpy.dot(hkl, self.cell.fractionalization_matrix)
256
+
257
+ def ssq_mat(self):
258
+ # k_aniso = exp(-s^T B_aniso s / 4)
259
+ # s^T B s / 4 can be reformulated as R b where R = 1x6 matrix and b = 6x1 matrix
260
+ # here R for all indices is returned with shape of (6, N)
261
+ # x[None,:].T <= (N, 6, 1)
262
+ # x.T[:,None] <= (N, 1, 6) they can be matmul'ed.
263
+ svecs = self.s_array()
264
+ tmp = (0.25 * svecs[:,0]**2, 0.25 * svecs[:,1]**2, 0.25 * svecs[:,2]**2,
265
+ 0.5 * svecs[:,0] * svecs[:,1], 0.5 * svecs[:,0] * svecs[:,2], 0.5 * svecs[:,1] * svecs[:,2])
266
+ return numpy.array(tmp)
267
+ # aniso_s_u_s_as_left_mat()
268
+
269
+ def debye_waller_factors(self, b_cart=None, b_iso=None):
270
+ if b_iso is not None:
271
+ s2 = 1 / self.d_spacings()**2
272
+ return numpy.exp(-b_iso / 4 * s2)
273
+ if b_cart is not None:
274
+ b_star = b_cart.transformed_by(self.cell.fractionalization_matrix)
275
+ return numpy.exp(-b_star.r_u_r(self.miller_array().to_numpy()) / 4)
276
+
277
+ def calc_d(self):
278
+ self.df["d"] = self.cell.calculate_d_array(self.miller_array())
279
+ # calc_d()
280
+
281
+ def calc_epsilon(self):
282
+ self.df["epsilon"] = self.sg.operations().epsilon_factor_without_centering_array(self.miller_array())
283
+ # calc_epsilon()
284
+
285
+ def calc_centric(self):
286
+ self.df["centric"] = self.sg.operations().centric_flag_array(self.miller_array()).astype(int)
287
+ # calc_centric()
288
+
289
+ def d_spacings(self):
290
+ if "d" not in self.df or self.df.d.isnull().values.any():
291
+ self.calc_d()
292
+ return self.df.d
293
+ # calc_d()
294
+
295
+ def sort_by_resolution(self, ascending=False):
296
+ self.d_spacings()
297
+ self.df.sort_values("d", ascending=ascending, inplace=True)
298
+ # sort_by_resolution()
299
+
300
+ def d_min_max(self):
301
+ d = self.d_spacings()
302
+ return numpy.min(d), numpy.max(d)
303
+ # d_min_max()
304
+
305
+ def complete(self):
306
+ # make complete set
307
+ d_min, d_max = self.d_min_max()
308
+ all_hkl = gemmi.make_miller_array(self.cell, self.sg, d_min, d_max)
309
+ match = gemmi.HklMatch(self.miller_array(), all_hkl)
310
+ missing_hkl_df = pandas.DataFrame(all_hkl[numpy.asarray(match.pos) < 0], columns=["H","K","L"])
311
+ self.df = pandas.concat([self.df, missing_hkl_df])
312
+ logger.writeln("Completing hkldata: {} reflections were missing".format(len(missing_hkl_df.index)))
313
+ self.calc_d()
314
+ # complete()
315
+
316
+ def completeness(self, label=None):
317
+ if label is None:
318
+ n_missing = numpy.sum(self.df.isna().any(axis=1))
319
+ else:
320
+ n_missing = numpy.sum(self.df[label].isna())
321
+ n_all = len(self.df.index)
322
+ return (n_all-n_missing)/n_all
323
+ # completeness()
324
+
325
+ def setup_binning(self, n_bins, method=gemmi.Binner.Method.Dstar2):
326
+ self.df.reset_index(drop=True, inplace=True)
327
+ s2 = 1/self.d_spacings().to_numpy()**2
328
+ binner = gemmi.Binner()
329
+ binner.setup_from_1_d2(n_bins, method, s2, self.cell)
330
+ self._bin_and_indices = []
331
+ d_limits = 1 / numpy.sqrt(binner.limits)
332
+ bin_number = binner.get_bins_from_1_d2(s2)
333
+ d_max_all = []
334
+ d_min_all = []
335
+ for i in range(binner.size):
336
+ left = numpy.max(self.d_spacings()) if i == 0 else d_limits[i-1]
337
+ right = numpy.min(self.d_spacings()) if i == binner.size -1 else d_limits[i]
338
+ sel = numpy.where(bin_number==i)[0] # slow?
339
+ d_max_all.append(left)
340
+ d_min_all.append(right)
341
+ self._bin_and_indices.append((i, sel))
342
+
343
+ self.df["bin"] = bin_number
344
+ self.binned_df = pandas.DataFrame(dict(d_max=d_max_all, d_min=d_min_all), index=list(range(binner.size)))
345
+ # setup_binning()
346
+
347
+ def setup_relion_binning(self, sort=False):
348
+ max_edge = max(self.cell.parameters[:3])
349
+ if sort:
350
+ self.sort_by_resolution()
351
+ self.df.reset_index(drop=True, inplace=True) # to allow numpy.array indexing
352
+
353
+ self.df["bin"] = (max_edge/self.d_spacings()+0.5).astype(int)
354
+ # Merge inner/outer shells if too few # TODO smarter way
355
+ bin_counts = []
356
+ bin_ranges = {}
357
+ modify_table = {}
358
+ for i_bin, g in self.df.groupby("bin", sort=True):
359
+ if i_bin == 0: continue # ignore DC component
360
+ bin_counts.append([i_bin, g.index])
361
+ bin_ranges[i_bin] = (numpy.max(g.d), numpy.min(g.d))
362
+
363
+ for i in range(len(bin_counts)):
364
+ if len(bin_counts[i][1]) < 10 and i < len(bin_counts)-1:
365
+ bin_counts[i+1][1] = bin_counts[i+1][1].union(bin_counts[i][1])
366
+ modify_table[bin_counts[i][0]] = bin_counts[i+1][0]
367
+ logger.writeln("Bin {} only has {} data. Merging with next bin.".format(bin_counts[i][0],
368
+ len(bin_counts[i][1])))
369
+ else: break
370
+
371
+ for i in reversed(range(len(bin_counts))):
372
+ if i > 0 and len(bin_counts[i][1])/len(bin_counts[i-1][1]) < 0.5:
373
+ bin_counts[i-1][1] = bin_counts[i-1][1].union(bin_counts[i][1])
374
+ modify_table[bin_counts[i][0]] = bin_counts[i-1][0]
375
+ logger.writeln("Bin {} only has {} data. Merging with previous bin.".format(bin_counts[i][0],
376
+ len(bin_counts[i][1])))
377
+ else: break
378
+
379
+ while True:
380
+ flag = True
381
+ for i_bin in modify_table:
382
+ if modify_table[i_bin] in modify_table:
383
+ modify_table[i_bin] = modify_table[modify_table[i_bin]]
384
+ flag = False
385
+ if flag: break
386
+
387
+ for i_bin in modify_table:
388
+ new_bin = modify_table[i_bin]
389
+ self.df["bin"] = numpy.where(self.df["bin"].to_numpy() == i_bin, new_bin, self.df["bin"].to_numpy())
390
+ bin_ranges[new_bin] = (max(bin_ranges[i_bin][0], bin_ranges[new_bin][0]),
391
+ min(bin_ranges[i_bin][1], bin_ranges[new_bin][1]))
392
+
393
+ self._bin_and_indices = []
394
+ bin_all = []
395
+ d_max_all = []
396
+ d_min_all = []
397
+ for i_bin, indices in bin_counts:
398
+ if i_bin in modify_table: continue
399
+ #if sort: # want this, but we cannot take len() for slice. we can add ncoeffs to binned_df
400
+ # self._bin_and_indices.append((i_bin, slice(numpy.min(indices), numpy.max(indices))))
401
+ #else:
402
+ self._bin_and_indices.append((i_bin, indices))
403
+
404
+ bin_all.append(i_bin)
405
+ d_max_all.append(bin_ranges[i_bin][0])
406
+ d_min_all.append(bin_ranges[i_bin][1])
407
+ self.binned_df = pandas.DataFrame(dict(d_max=d_max_all, d_min=d_min_all), index=bin_all)
408
+ # setup_relion_binning()
409
+
410
+ def binned_data_as_array(self, lab):
411
+ vals = numpy.zeros(len(self.df.index), dtype=self.binned_df[lab].dtype)
412
+ for i_bin, idxes in self.binned():
413
+ vals[idxes] = self.binned_df[lab][i_bin]
414
+ return vals
415
+ # binned_data_as_array()
416
+
417
+ def binned(self):
418
+ return self._bin_and_indices
419
+
420
+ def columns(self):
421
+ return [x for x in self.df.columns if x not in "HKL"]
422
+
423
+ def merge(self, other, common_only=True):
424
+ self.merge_df(other, common_only)
425
+ # merge()
426
+
427
+ def merge_df(self, other, common_only=True):
428
+ # TODO check space group, cell
429
+ # TODO transform to asu with phase shifts
430
+ # TODO check column labels. same names other than HKL?
431
+ #
432
+ if common_only:
433
+ self.df = self.df.merge(other)
434
+ else:
435
+ df = self.df.merge(other, indicator=True, how="outer")
436
+ df_left = df[df._merge=="left_only"]
437
+ df_right = df[df._merge=="right_only"]
438
+ df_both = df[df._merge=="both"]
439
+ # merge()
440
+
441
+ def guess_free_number(self, obs):
442
+ logger.writeln("Guessing test flag number")
443
+ sel = ~self.df[obs].isna()
444
+ free = self.df.loc[sel, "FREE"]
445
+ threshold = len(free.index) / 2
446
+ if free.isna().any():
447
+ raise RuntimeError("missing or invalid test flag")
448
+ counts = self.df.loc[sel, "FREE"].value_counts().sort_index()
449
+ logger.writeln(counts.to_string(header=False))
450
+ if len(counts.index) < 2:
451
+ raise RuntimeError("this does not appear to be test flag")
452
+ good_flags = [n for n, c in counts.items() if c < threshold]
453
+ if len(good_flags) > 0:
454
+ flag_num = min(good_flags)
455
+ else:
456
+ flag_num = min(counts.index)
457
+ logger.writeln(" best guess: free = {}".format(flag_num))
458
+ return flag_num
459
+ # guess_free_number()
460
+
461
+ def as_numpy_arrays(self, labels, omit_nan=True):
462
+ tmp = self.df[labels]
463
+ if omit_nan: tmp = tmp[~tmp.isna().any(axis=1)]
464
+ return [tmp[lab].to_numpy() for lab in labels]
465
+ # as_numpy_arrays()
466
+
467
+ def remove_nonpositive(self, label):
468
+ sel = self.df[label] <= 0
469
+ n_bad = sel.sum()
470
+ if n_bad > 0:
471
+ logger.writeln("Removing {} reflections with {}<=0".format(n_bad, label))
472
+ self.df = self.df[~sel]
473
+ # remove_nonpositive()
474
+
475
+ def remove_systematic_absences(self):
476
+ is_absent = self.sg.operations().systematic_absences(self.miller_array())
477
+ n_absent = numpy.sum(is_absent)
478
+ if n_absent > 0:
479
+ logger.writeln("Removing {} systematic absences".format(n_absent))
480
+ self.df = self.df[~is_absent]
481
+ # remove_systematic_absences()
482
+
483
+ def merge_anomalous(self, labs, newlabs):
484
+ assert len(labs) == 4 # i+,sigi+,i-,sigi- for example
485
+ assert len(newlabs) == 2
486
+ # skipna=True is default, so missing value is handled nicely.
487
+ self.df[newlabs[0]] = self.df[[labs[0], labs[2]]].mean(axis=1)
488
+ self.df[newlabs[1]] = self.df[[labs[1], labs[3]]].pow(2).mean(axis=1).pow(0.5)
489
+
490
+ def as_asu_data(self, label=None, data=None, label_sigma=None):
491
+ if label is None: assert data is not None
492
+ else: assert data is None
493
+
494
+ if label_sigma is not None:
495
+ assert data is None
496
+ assert not numpy.iscomplexobj(self.df[label])
497
+ sigma = self.df[label_sigma]
498
+ data = numpy.lib.recfunctions.unstructured_to_structured(self.df[[label,label_sigma]].to_numpy(),
499
+ numpy.dtype([("value", numpy.float32), ("sigma", numpy.float32)]))
500
+ elif data is None:
501
+ data = self.df[label]
502
+
503
+ if numpy.iscomplexobj(data):
504
+ asutype = gemmi.ComplexAsuData
505
+ elif issubclass(data.dtype.type, numpy.integer):
506
+ asutype = gemmi.IntAsuData
507
+ elif label_sigma is not None:
508
+ asutype = gemmi.ValueSigmaAsuData
509
+ else:
510
+ asutype = gemmi.FloatAsuData
511
+
512
+ return asutype(self.cell, self.sg,
513
+ self.miller_array(), data)
514
+ # as_asu_data()
515
+
516
+ def fft_map(self, label=None, data=None, grid_size=None, sample_rate=3):
517
+ if data is not None: data = data.astype(numpy.complex64) # we may want to keep complex128?
518
+ asu = self.as_asu_data(label=label, data=data)
519
+ if grid_size is None:
520
+ ma = asu.transform_f_phi_to_map(sample_rate=sample_rate, exact_size=(0, 0, 0)) # half_l=True
521
+ else:
522
+ ma = gemmi.transform_f_phi_grid_to_map(asu.get_f_phi_on_grid(grid_size)) # half_l=False
523
+
524
+ return ma
525
+ # fft_map()
526
+
527
+ def d_eff(self, label):
528
+ # Effective resolution defined using FSC
529
+ fsc = self.binned_df[label]
530
+ a = 0.
531
+ for i_bin, idxes in self.binned():
532
+ a += len(idxes) * fsc[i_bin]
533
+
534
+ fac = (a/len(self.df.index))**(1/3.)
535
+ d_min = self.d_min_max()[0]
536
+ ret = d_min/fac
537
+ return ret
538
+ # d_eff()
539
+
540
+ def hard_sphere_kernel(self, r_ang, grid_size):
541
+ s = 1. / self.d_spacings()
542
+ t = 2 * numpy.pi * s * r_ang
543
+ F_kernel = 3. * (-t * numpy.cos(t) + numpy.sin(t)) / t**3
544
+ knl = self.fft_map(data=F_kernel, grid_size=grid_size)
545
+ knl.array[:] += 1. / knl.unit_cell.volume # F000
546
+ knl.array[:] /= numpy.sum(knl.array)
547
+ return knl
548
+ # hard_sphere_kernel()
549
+
550
+ def scale_k_and_b(self, lab_ref, lab_scaled):
551
+ logger.writeln("Determining k, B scales between {} and {}".format(lab_ref, lab_scaled))
552
+ s2 = 1/self.d_spacings().to_numpy()**2
553
+ # determine scales that minimize (|f1|-|f2|*k*e^(-b*s2/4))^2
554
+ f1 = self.df[lab_ref].to_numpy()
555
+ f2 = self.df[lab_scaled].to_numpy()
556
+ if numpy.iscomplexobj(f1): f1 = numpy.abs(f1)
557
+ if numpy.iscomplexobj(f2): f2 = numpy.abs(f2)
558
+
559
+ sel_pos = numpy.logical_and(f1 > 0, f2 > 0) # this filters nan as well
560
+ f1p, f2p, s2p = f1[sel_pos], f2[sel_pos], s2[sel_pos]
561
+
562
+ # 1st step: minimize (log(|f1|)-log(|f2|*e^k*e^(-b*s2/4)))^2 starting with k=1, b=0.
563
+ tmp = numpy.log(f2p) - numpy.log(f1p)
564
+ # g = [dT/dk, dT/db]
565
+ g = numpy.array([2 * numpy.sum(tmp), -numpy.sum(tmp*s2p)/2])
566
+ H = numpy.zeros((2,2))
567
+ H[0,0] = 2*len(f1p)
568
+ H[1,1] = numpy.sum(s2p**2/8)
569
+ H[0,1] = H[1,0] = -numpy.sum(s2p)/2
570
+ x = -numpy.dot(numpy.linalg.inv(H), g)
571
+ k1 = numpy.exp(x[0])
572
+ B1 = x[1]
573
+ logger.writeln(" initial estimate using log: k= {:.2e} B= {:.2e}".format(k1, B1))
574
+ f2tmp = f2 * k1 * numpy.exp(-B1*s2/4)
575
+ r_step0 = r_factor(f1, f2)
576
+ r_step1 = r_factor(f1, f2tmp)
577
+ logger.writeln(" R= {:.4f} (was: {:.4f})".format(r_step1, r_step0))
578
+
579
+ # 2nd step: - minimize (|f1|-|f2|*k*e^(-b*s2/4))^2 iteratively (TODO with regularisation)
580
+
581
+ def grad2(x):
582
+ t = numpy.exp(-x[1]*s2/4)
583
+ tmp = (f1-f2*x[0]*t)*f2*t
584
+ return numpy.array([-2.*numpy.nansum(tmp),
585
+ 0.5*x[0]*numpy.nansum(tmp*s2)])
586
+
587
+ def hess2(x):
588
+ h = numpy.zeros((2, 2))
589
+ t = numpy.exp(-x[1]*s2/4)
590
+ t2 = t**2
591
+ h[0,0] = numpy.nansum(f2**2 * t2) * 2
592
+ h[1,1] = numpy.nansum(f2 * s2**2/4 * (-f1/2*t + f2*x[0]*t2)) * x[0]
593
+ h[1,0] = numpy.nansum(f2 * s2 * (f1/2*t - f2*x[0]*t2))
594
+ h[0,1] = h[1,0]
595
+ return h
596
+
597
+ res = scipy.optimize.minimize(fun=lambda x: numpy.nansum((f1-f2*x[0]*numpy.exp(-x[1]*s2/4))**2),
598
+ jac=grad2,
599
+ hess=hess2,
600
+ method="Newton-CG",
601
+ x0=numpy.array([k1, B1]),
602
+ )
603
+ logger.writeln(str(res))
604
+ k2, B2 = res.x
605
+ f2tmp2 = f2 * k2 * numpy.exp(-B2*s2/4)
606
+ r_step2 = r_factor(f1, f2tmp2)
607
+ logger.writeln(" Least-square estimate: k= {:.2e} B= {:.2e}".format(k2, B2))
608
+ logger.writeln(" R= {:.4f}".format(r_step2))
609
+
610
+ if 0:
611
+ self.setup_binning(40)
612
+ x = []
613
+ y0,y1,y2,y3=[],[],[],[]
614
+ for i_bin, idxes in self.binned():
615
+ bin_d_min = hkldata.binned_df.d_min[i_bin]
616
+ bin_d_max = hkldata.binned_df.d_max[i_bin]
617
+ x.append(1/bin_d_min**2)
618
+ y0.append(numpy.average(f1[idxes]))
619
+ y1.append(numpy.average(f2[idxes]))
620
+ y2.append(numpy.average(f2tmp[idxes]))
621
+ y3.append(numpy.average(f2tmp2[idxes]))
622
+
623
+ import matplotlib.pyplot as plt
624
+ plt.plot(x, y0, label="FC")
625
+ plt.plot(x, y1, label="FP")
626
+ plt.plot(x, y2, label="FP,scaled")
627
+ plt.plot(x, y3, label="FP,scaled2")
628
+ plt.legend()
629
+ plt.show()
630
+
631
+ if r_step2 < r_step1:
632
+ return k2, B2
633
+ else:
634
+ return k1, B1
635
+ # scale_k_and_b()
636
+
637
+ def translation_factor(self, shift):
638
+ if type(shift) != gemmi.Position:
639
+ shift = gemmi.Position(*shift)
640
+ return numpy.exp(2.j*numpy.pi*numpy.dot(self.miller_array(),
641
+ self.cell.fractionalize(shift).tolist()))
642
+ # translation_factor()
643
+ def translate(self, lab, shift):
644
+ # apply phase shift
645
+ assert numpy.iscomplexobj(self.df[lab])
646
+ self.df[lab] *= self.translation_factor(shift)
647
+ # translate()
648
+
649
+ def write_mtz(self, mtz_out, labs, types=None, phase_label_decorator=None,
650
+ exclude_000=True):
651
+ logger.writeln("Writing MTZ file: {}".format(mtz_out))
652
+ if self.sg.ccp4 < 1:
653
+ logger.writeln("WARNING: CCP4-unsupported space group ({})".format(self.sg.xhm()))
654
+ if types is None: types = {}
655
+ if exclude_000:
656
+ df = self.df.query("H!=0 | K!=0 | L!=0")
657
+ else:
658
+ df = self.df
659
+
660
+ ndata = sum(2 if numpy.iscomplexobj(df[lab]) else 1 for lab in labs)
661
+
662
+ data = numpy.empty((len(df.index), ndata + 3), dtype=numpy.float32)
663
+ data[:,:3] = df[["H","K","L"]]
664
+ idx = 3
665
+ for lab in labs:
666
+ if numpy.iscomplexobj(df[lab]):
667
+ data[:,idx] = numpy.abs(df[lab])
668
+ data[:,idx+1] = numpy.angle(df[lab], deg=True)
669
+ idx += 2
670
+ else:
671
+ data[:,idx] = df[lab].to_numpy(numpy.float32, na_value=numpy.nan) # for nullable integers
672
+ idx += 1
673
+
674
+ mtz = gemmi.Mtz()
675
+ mtz.spacegroup = self.sg
676
+ mtz.cell = self.cell
677
+ mtz.add_dataset('HKL_base')
678
+ for label in ['H', 'K', 'L']: mtz.add_column(label, 'H')
679
+
680
+ for lab in labs:
681
+ if numpy.iscomplexobj(df[lab]):
682
+ mtz.add_column(lab, "F")
683
+ if phase_label_decorator is None:
684
+ plab = {"FWT": "PHWT", "DELFWT": "PHDELWT", "FAN":"PHAN"}.get(lab, "PH"+lab)
685
+ else:
686
+ plab = phase_label_decorator(lab)
687
+ mtz.add_column(plab, "P")
688
+ else:
689
+ typ = types.get(lab)
690
+ if typ is None:
691
+ if issubclass(df[lab].dtype.type, numpy.integer):
692
+ typ = "I"
693
+ else:
694
+ typ = "R"
695
+ mtz.add_column(lab, typ)
696
+
697
+ mtz.set_data(data)
698
+ mtz.write_to_file(mtz_out)
699
+ # write_mtz()