servalcat 0.4.99__cp310-cp310-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-310-darwin.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +906 -0
  7. servalcat/refine/refine_geom.py +233 -0
  8. servalcat/refine/refine_spa.py +366 -0
  9. servalcat/refine/refine_xtal.py +281 -0
  10. servalcat/refine/spa.py +144 -0
  11. servalcat/refine/xtal.py +276 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +639 -0
  15. servalcat/refmac/refmac_wrapper.py +395 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +479 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +977 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1547 -0
  27. servalcat/utils/fileio.py +744 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +714 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +782 -0
  33. servalcat/utils/refmac.py +760 -0
  34. servalcat/utils/restraints.py +781 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +258 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1644 -0
  40. servalcat/xtal/twin.py +121 -0
  41. servalcat-0.4.99.dist-info/METADATA +55 -0
  42. servalcat-0.4.99.dist-info/RECORD +45 -0
  43. servalcat-0.4.99.dist-info/WHEEL +5 -0
  44. servalcat-0.4.99.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.99.dist-info/licenses/LICENSE +373 -0
servalcat/utils/hkl.py ADDED
@@ -0,0 +1,714 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ import numpy
10
+ import scipy.optimize
11
+ import pandas
12
+ import gemmi
13
+ from servalcat.utils import logger
14
+
15
+ dtypes64 = dict(i=numpy.int64, u=numpy.uint64, f=numpy.float64, c=numpy.complex128)
16
+ to64 = lambda x: x.astype(dtypes64.get(x.dtype.kind, x.dtype))
17
+
18
+ def r_factor(fo, fc):
19
+ denom = numpy.nansum(fo)
20
+ if denom == 0:
21
+ return numpy.nan
22
+ return numpy.nansum(numpy.abs(fo-fc)) / denom
23
+ def correlation(obs, calc):
24
+ sel = numpy.isfinite(obs)
25
+ if obs.size == 0 or numpy.all(~sel):
26
+ return numpy.nan
27
+ return numpy.corrcoef(obs[sel], calc[sel])[0,1]
28
+
29
+ def df_from_asu_data(asu_data, label):
30
+ df = pandas.DataFrame(data=asu_data.miller_array.astype(numpy.int32),
31
+ columns=["H","K","L"])
32
+ if type(asu_data) is gemmi.ValueSigmaAsuData:
33
+ df[label] = to64(asu_data.value_array[:,0])
34
+ df["SIG"+label] = to64(asu_data.value_array[:,1])
35
+ else:
36
+ df[label] = to64(asu_data.value_array)
37
+ return df
38
+
39
+ def df_from_raw(miller_array, value_array, label):
40
+ df = pandas.DataFrame(data=miller_array.astype(numpy.int32),
41
+ columns=["H","K","L"])
42
+ df[label] = to64(value_array)
43
+ return df
44
+
45
+ def hkldata_from_asu_data(asu_data, label):
46
+ df = df_from_asu_data(asu_data, label)
47
+ return HklData(asu_data.unit_cell, asu_data.spacegroup, df)
48
+ # hkldata_from_asu_data()
49
+
50
+ def mtz_find_data_columns(mtz, require_sigma=True):
51
+ # for now (+)/(-) (types K/M and L/G) are not supported
52
+ col_types = {x.label:x.type for x in mtz.columns}
53
+ ret = {"J": [], "F": [], "K": [], "G": []}
54
+ for col in col_types:
55
+ typ = col_types[col]
56
+ if typ in ("J", "F"):
57
+ ret[typ].append([col])
58
+ sig = "SIG" + col
59
+ if col_types.get(sig) == "Q":
60
+ ret[typ][-1].append(sig)
61
+ elif require_sigma:
62
+ ret[typ].pop()
63
+ elif typ in ("K", "G") and col.endswith(("(+)", "plus")):
64
+ # we always need sigma - right?
65
+ col_minus = col.replace("(+)", "(-)") if col.endswith("(+)") else col.replace("plus", "minus")
66
+ sig_type = {"K": "M", "G": "L"}[typ]
67
+ if (col_types.get(col_minus) == typ and
68
+ col_types.get("SIG"+col) == sig_type and
69
+ col_types.get("SIG"+col_minus) == sig_type):
70
+ ret[typ].append([col, "SIG"+col, col_minus, "SIG"+col_minus])
71
+ return ret
72
+ # mtz_find_data_columns()
73
+
74
+ def mtz_find_free_columns(mtz):
75
+ col_types = {x.label:x.type for x in mtz.columns}
76
+ free_names = ("FREE", "RFREE", "FREER", "FreeR_flag", "R-free-flags", "FreeRflag")
77
+ ret = []
78
+ for col in col_types:
79
+ typ = col_types[col]
80
+ if typ == "I" and col in free_names:
81
+ ret.append(col)
82
+ return ret
83
+ # mtz_find_free_columns()
84
+
85
+ def hkldata_from_mtz(mtz, labels, newlabels=None, require_types=None):
86
+ assert type(mtz) == gemmi.Mtz
87
+ notfound = set(labels) - set(mtz.column_labels())
88
+ if notfound:
89
+ raise RuntimeError("MTZ columns not found: {}".format(" ".join(notfound)))
90
+ col_types = {x.label:x.type for x in mtz.columns}
91
+ if require_types:
92
+ mismatches = [l for l,r in zip(labels, require_types) if r is not None and r != col_types[l]]
93
+ if mismatches:
94
+ raise RuntimeError("MTZ column types mismatch: {}".format(" ".join(mismatches)))
95
+
96
+ df = pandas.DataFrame(data=mtz.array, columns=mtz.column_labels())
97
+ df = df.astype({col: 'int32' for col in col_types if col_types[col] == "H"})
98
+ df = df.astype({col: 'Int64' for col in col_types if col_types[col] in ("B", "Y", "I")}) # pandas's nullable int
99
+ for lab in set(mtz.column_labels()).difference(labels+["H","K","L"]):
100
+ del df[lab]
101
+
102
+ if newlabels is not None:
103
+ assert len(newlabels) == len(labels)
104
+ for i in range(1, len(newlabels)):
105
+ if newlabels[i] == "": # means this is phase and should be transferred to previous column
106
+ assert col_types.get(labels[i]) == "P"
107
+ assert col_types.get(labels[i-1]) == "F"
108
+ ph = numpy.deg2rad(df[labels[i]])
109
+ df[labels[i-1]] = df[labels[i-1]] * (numpy.cos(ph) + 1j * numpy.sin(ph))
110
+ del df[labels[i]]
111
+
112
+ df.rename(columns={x:y for x,y in zip(labels, newlabels) if y != ""}, inplace=True)
113
+
114
+ return HklData(mtz.cell, mtz.spacegroup, df)
115
+ # hkldata_from_mtz()
116
+
117
+ def df_from_twin_data(twin_data, fc_labs):
118
+ df = pandas.DataFrame(data=twin_data.asu,
119
+ columns=["H","K","L"])
120
+ df[fc_labs] = twin_data.f_calc
121
+ return df
122
+
123
+ def blur_mtz(mtz, B):
124
+ # modify given mtz object
125
+
126
+ s2 = mtz.make_1_d2_array()
127
+ k2 = numpy.exp(-B*s2/2)
128
+ k = numpy.exp(-B*s2/4)
129
+ i_labs = [c.label for c in mtz.columns if c.type in "JK"]
130
+ f_labs = [c.label for c in mtz.columns if c.type in "FDG"]
131
+ for labs in i_labs, f_labs:
132
+ for l in labs:
133
+ sl = "SIG"+l
134
+ if sl in mtz.column_labels(): labs.append(sl)
135
+
136
+ if i_labs:
137
+ logger.writeln("Intensities: {}".format(" ".join(i_labs)))
138
+ logger.writeln(" exp(-B*s^2/2) will be multiplied (B= {:.2f})".format(B))
139
+ if f_labs:
140
+ logger.writeln("Amplitudes: {}".format(" ".join(f_labs)))
141
+ logger.writeln(" exp(-B*s^2/4) will be multiplied (B= {:.2f})".format(B))
142
+
143
+ for l in i_labs:
144
+ c = mtz.column_with_label(l)
145
+ c.array[:] *= k2
146
+ for l in f_labs:
147
+ c = mtz.column_with_label(l)
148
+ c.array[:] *= k
149
+ # blur_mtz()
150
+
151
+ def mtz_selected(mtz, columns):
152
+ """
153
+ creates a new mtz object having specified `columns` of `mtz`
154
+ """
155
+ columns = ["H", "K", "L"] + columns # TODO make sure no duplicates
156
+ col_dict = {x.label:x for x in mtz.columns}
157
+ col_idxes = {x.label:i for i, x in enumerate(mtz.columns)}
158
+
159
+ notfound = list(set(columns) - set(col_idxes))
160
+ if notfound:
161
+ raise RuntimeError("specified columns not found: {}".format(str(notfound)))
162
+
163
+ # copy metadata
164
+ mtz2 = gemmi.Mtz()
165
+ for k in ("spacegroup", "cell", "history", "title"):
166
+ setattr(mtz2, k, getattr(mtz, k))
167
+
168
+ for ds in mtz.datasets:
169
+ ds2 = mtz2.add_dataset("")
170
+ for k in ("cell", "id", "crystal_name", "dataset_name", "project_name", "wavelength"):
171
+ setattr(ds2, k, getattr(ds, k))
172
+
173
+ # copy selected columns
174
+ for col in columns:
175
+ mtz2.add_column(col, col_dict[col].type,
176
+ dataset_id=col_dict[col].dataset_id, expand_data=False)
177
+
178
+ idxes = [col_idxes[col] for col in columns]
179
+ data = mtz.array[:, idxes]
180
+ mtz2.set_data(data)
181
+ return mtz2
182
+ # mtz_selected()
183
+
184
+ def decide_n_bins(n_per_bin, s_array, power=2, min_bins=1, max_bins=50):
185
+ sp = numpy.sort(s_array)**power
186
+ spmin, spmax = numpy.min(sp), numpy.max(sp)
187
+ n_bins = 1
188
+ if n_per_bin <= len(sp):
189
+ # Decide n_bins so that inner-shell has requested number
190
+ width = sp[n_per_bin - 1] - spmin
191
+ n_bins = int((spmax - spmin) / width)
192
+ if min_bins is not None:
193
+ n_bins = max(n_bins, min_bins)
194
+ if max_bins is not None:
195
+ n_bins = min(n_bins, max_bins)
196
+ return n_bins
197
+ # decide_n_bins()
198
+
199
+ def fft_map(cell, sg, miller_array, data, grid_size=None, sample_rate=3):
200
+ if data is not None:
201
+ data = data.astype(numpy.complex64) # we may want to keep complex128?
202
+ if type(data) is pandas.core.series.Series:
203
+ data = data.to_numpy()
204
+ asu = gemmi.ComplexAsuData(cell, sg, miller_array, data)
205
+ if grid_size is None:
206
+ ma = asu.transform_f_phi_to_map(sample_rate=sample_rate, exact_size=(0, 0, 0)) # half_l=True
207
+ else:
208
+ ma = gemmi.transform_f_phi_grid_to_map(asu.get_f_phi_on_grid(grid_size)) # half_l=False
209
+ return ma
210
+ # fft_map()
211
+
212
+ class HklData:
213
+ def __init__(self, cell, sg, df=None, binned_df=None):
214
+ self.cell = cell
215
+ self.sg = sg
216
+ self.df = df
217
+ self.binned_df = binned_df
218
+ self._bin_and_indices = []
219
+ # __init__()
220
+
221
+ def update_cell(self, cell):
222
+ # update d
223
+ pass
224
+
225
+ def switch_to_asu(self):
226
+ # Need to care phases
227
+ assert not any(numpy.iscomplexobj(self.df[x]) for x in self.df)
228
+ hkl = self.miller_array()
229
+ self.sg.switch_to_asu(hkl)
230
+ self.df[["H","K","L"]] = hkl
231
+ # in some environment type changes to int64 even though hkl's dtype is int32
232
+ # it causes a problem in self.debye_waller_factors()
233
+ self.df = self.df.astype({x: numpy.int32 for x in "HKL"})
234
+
235
+ def copy(self, d_min=None, d_max=None):
236
+ # FIXME we should reset_index here? after resolution truncation, max(df.index) will be larger than size.
237
+ if (d_min, d_max).count(None) == 2:
238
+ df = self.df.copy()
239
+ binned_df = self.binned_df.copy() if self.binned_df is not None else None
240
+ else:
241
+ if d_min is None: d_min = 0
242
+ if d_max is None: d_max = float("inf")
243
+ d = self.d_spacings()
244
+ sel = (d >= d_min) & (d <= d_max)
245
+ df = self.df[sel].copy()
246
+ binned_df = None # no way to keep it
247
+
248
+ return HklData(self.cell, self.sg, df, binned_df)
249
+ # copy()
250
+
251
+ def selected(self, sel):
252
+ df = self.df[sel].copy()
253
+ return HklData(self.cell, self.sg, df)
254
+
255
+ def merge_asu_data(self, asu_data, label, common_only=True):
256
+ if self.df is not None and label in self.df:
257
+ raise Exception("Duplicated label")
258
+
259
+ df_tmp = df_from_asu_data(asu_data, label)
260
+
261
+ if self.df is None:
262
+ self.df = df_tmp
263
+ elif common_only:
264
+ self.df = self.df.merge(df_tmp)
265
+ else:
266
+ self.df = self.df.merge(df_tmp, how="outer")
267
+ # merge_asu_data()
268
+
269
+ def miller_array(self):
270
+ return self.df[["H","K","L"]].to_numpy()
271
+
272
+ def s_array(self):
273
+ hkl = self.miller_array()
274
+ return numpy.dot(hkl, self.cell.frac.mat.array)
275
+
276
+ def ssq_mat(self):
277
+ # k_aniso = exp(-s^T B_aniso s / 4)
278
+ # s^T B s / 4 can be reformulated as R b where R = 1x6 matrix and b = 6x1 matrix
279
+ # here R for all indices is returned with shape of (6, N)
280
+ # x[None,:].T <= (N, 6, 1)
281
+ # x.T[:,None] <= (N, 1, 6) they can be matmul'ed.
282
+ svecs = self.s_array()
283
+ tmp = (0.25 * svecs[:,0]**2, 0.25 * svecs[:,1]**2, 0.25 * svecs[:,2]**2,
284
+ 0.5 * svecs[:,0] * svecs[:,1], 0.5 * svecs[:,0] * svecs[:,2], 0.5 * svecs[:,1] * svecs[:,2])
285
+ return numpy.array(tmp)
286
+ # aniso_s_u_s_as_left_mat()
287
+
288
+ def debye_waller_factors(self, b_cart=None, b_iso=None):
289
+ if b_iso is not None:
290
+ s2 = 1 / self.d_spacings()**2
291
+ return numpy.exp(-b_iso / 4 * s2)
292
+ if b_cart is not None:
293
+ b_star = b_cart.transformed_by(self.cell.frac.mat)
294
+ return numpy.exp(-b_star.r_u_r(self.miller_array()) / 4)
295
+
296
+ def calc_d(self):
297
+ self.df["d"] = self.cell.calculate_d_array(self.miller_array())
298
+ # calc_d()
299
+
300
+ def calc_epsilon(self):
301
+ self.df["epsilon"] = self.sg.operations().epsilon_factor_without_centering_array(self.miller_array())
302
+ # calc_epsilon()
303
+
304
+ def calc_centric(self):
305
+ self.df["centric"] = self.sg.operations().centric_flag_array(self.miller_array()).astype(int)
306
+ # calc_centric()
307
+
308
+ def d_spacings(self):
309
+ if "d" not in self.df or self.df.d.isnull().values.any():
310
+ self.calc_d()
311
+ return self.df.d
312
+ # calc_d()
313
+
314
+ def sort_by_resolution(self, ascending=False):
315
+ self.d_spacings()
316
+ self.df.sort_values("d", ascending=ascending, inplace=True)
317
+ # sort_by_resolution()
318
+
319
+ def d_min_max(self, labs=None):
320
+ d = self.d_spacings()
321
+ if labs:
322
+ d = d[~self.df[labs].isna().any(axis=1)]
323
+ return numpy.min(d), numpy.max(d)
324
+ # d_min_max()
325
+
326
+ def complete(self):
327
+ # make complete set
328
+ d_min, d_max = self.d_min_max()
329
+ all_hkl = gemmi.make_miller_array(self.cell, self.sg, d_min, d_max)
330
+ match = gemmi.HklMatch(self.miller_array(), all_hkl)
331
+ missing_hkl_df = pandas.DataFrame(all_hkl[numpy.asarray(match.pos) < 0], columns=["H","K","L"])
332
+ self.df = pandas.concat([self.df, missing_hkl_df])
333
+ logger.writeln("Completing hkldata: {} reflections were missing".format(len(missing_hkl_df.index)))
334
+ self.calc_d()
335
+ # complete()
336
+
337
+ def completeness(self, label=None):
338
+ if label is None:
339
+ n_missing = numpy.sum(self.df.isna().any(axis=1))
340
+ else:
341
+ n_missing = numpy.sum(self.df[label].isna())
342
+ n_all = len(self.df.index)
343
+ return (n_all-n_missing)/n_all
344
+ # completeness()
345
+
346
+ def setup_binning(self, n_bins, method=gemmi.Binner.Method.Dstar2):
347
+ self.df.reset_index(drop=True, inplace=True)
348
+ s2 = 1/self.d_spacings().to_numpy()**2
349
+ binner = gemmi.Binner()
350
+ binner.setup_from_1_d2(n_bins, method, s2, self.cell)
351
+ self._bin_and_indices = []
352
+ d_limits = 1 / numpy.sqrt(binner.limits)
353
+ bin_number = binner.get_bins_from_1_d2(s2)
354
+ d_max_all = []
355
+ d_min_all = []
356
+ for i in range(binner.size):
357
+ left = numpy.max(self.d_spacings()) if i == 0 else d_limits[i-1]
358
+ right = numpy.min(self.d_spacings()) if i == binner.size -1 else d_limits[i]
359
+ sel = numpy.where(bin_number==i)[0] # slow?
360
+ d_max_all.append(left)
361
+ d_min_all.append(right)
362
+ self._bin_and_indices.append((i, sel))
363
+
364
+ self.df["bin"] = bin_number
365
+ self.binned_df = pandas.DataFrame(dict(d_max=d_max_all, d_min=d_min_all), index=list(range(binner.size)))
366
+ # setup_binning()
367
+
368
+ def setup_relion_binning(self, sort=False):
369
+ max_edge = max(self.cell.parameters[:3])
370
+ if sort:
371
+ self.sort_by_resolution()
372
+ self.df.reset_index(drop=True, inplace=True) # to allow numpy.array indexing
373
+
374
+ self.df["bin"] = (max_edge/self.d_spacings()+0.5).astype(int)
375
+ # Merge inner/outer shells if too few # TODO smarter way
376
+ bin_counts = []
377
+ bin_ranges = {}
378
+ modify_table = {}
379
+ for i_bin, g in self.df.groupby("bin", sort=True):
380
+ if i_bin == 0: continue # ignore DC component
381
+ bin_counts.append([i_bin, g.index])
382
+ bin_ranges[i_bin] = (numpy.max(g.d), numpy.min(g.d))
383
+
384
+ for i in range(len(bin_counts)):
385
+ if len(bin_counts[i][1]) < 10 and i < len(bin_counts)-1:
386
+ bin_counts[i+1][1] = bin_counts[i+1][1].union(bin_counts[i][1])
387
+ modify_table[bin_counts[i][0]] = bin_counts[i+1][0]
388
+ logger.writeln("Bin {} only has {} data. Merging with next bin.".format(bin_counts[i][0],
389
+ len(bin_counts[i][1])))
390
+ else: break
391
+
392
+ for i in reversed(range(len(bin_counts))):
393
+ if i > 0 and len(bin_counts[i][1])/len(bin_counts[i-1][1]) < 0.5:
394
+ bin_counts[i-1][1] = bin_counts[i-1][1].union(bin_counts[i][1])
395
+ modify_table[bin_counts[i][0]] = bin_counts[i-1][0]
396
+ logger.writeln("Bin {} only has {} data. Merging with previous bin.".format(bin_counts[i][0],
397
+ len(bin_counts[i][1])))
398
+ else: break
399
+
400
+ while True:
401
+ flag = True
402
+ for i_bin in modify_table:
403
+ if modify_table[i_bin] in modify_table:
404
+ modify_table[i_bin] = modify_table[modify_table[i_bin]]
405
+ flag = False
406
+ if flag: break
407
+
408
+ for i_bin in modify_table:
409
+ new_bin = modify_table[i_bin]
410
+ self.df["bin"] = numpy.where(self.df["bin"].to_numpy() == i_bin, new_bin, self.df["bin"].to_numpy())
411
+ bin_ranges[new_bin] = (max(bin_ranges[i_bin][0], bin_ranges[new_bin][0]),
412
+ min(bin_ranges[i_bin][1], bin_ranges[new_bin][1]))
413
+
414
+ self._bin_and_indices = []
415
+ bin_all = []
416
+ d_max_all = []
417
+ d_min_all = []
418
+ for i_bin, indices in bin_counts:
419
+ if i_bin in modify_table: continue
420
+ #if sort: # want this, but we cannot take len() for slice. we can add ncoeffs to binned_df
421
+ # self._bin_and_indices.append((i_bin, slice(numpy.min(indices), numpy.max(indices))))
422
+ #else:
423
+ self._bin_and_indices.append((i_bin, indices))
424
+
425
+ bin_all.append(i_bin)
426
+ d_max_all.append(bin_ranges[i_bin][0])
427
+ d_min_all.append(bin_ranges[i_bin][1])
428
+ self.binned_df = pandas.DataFrame(dict(d_max=d_max_all, d_min=d_min_all), index=bin_all)
429
+ # setup_relion_binning()
430
+
431
+ def binned_data_as_array(self, lab):
432
+ vals = numpy.zeros(len(self.df.index), dtype=self.binned_df[lab].dtype)
433
+ for i_bin, idxes in self.binned():
434
+ vals[idxes] = self.binned_df[lab][i_bin]
435
+ return vals
436
+ # binned_data_as_array()
437
+
438
+ def binned(self):
439
+ return self._bin_and_indices
440
+
441
+ def columns(self):
442
+ return [x for x in self.df.columns if x not in "HKL"]
443
+
444
+ def merge(self, other, common_only=True):
445
+ self.merge_df(other, common_only)
446
+ # merge()
447
+
448
+ def merge_df(self, other, common_only=True):
449
+ # TODO check space group, cell
450
+ # TODO transform to asu with phase shifts
451
+ # TODO check column labels. same names other than HKL?
452
+ #
453
+ if common_only:
454
+ self.df = self.df.merge(other)
455
+ else:
456
+ df = self.df.merge(other, indicator=True, how="outer")
457
+ df_left = df[df._merge=="left_only"]
458
+ df_right = df[df._merge=="right_only"]
459
+ df_both = df[df._merge=="both"]
460
+ # merge()
461
+
462
+ def guess_free_number(self, obs):
463
+ logger.writeln("Guessing test flag number")
464
+ sel = ~self.df[obs].isna()
465
+ free = self.df.loc[sel, "FREE"]
466
+ threshold = len(free.index) / 2
467
+ if free.isna().any():
468
+ raise RuntimeError("missing or invalid test flag")
469
+ counts = self.df.loc[sel, "FREE"].value_counts().sort_index()
470
+ logger.writeln(counts.to_string(header=False))
471
+ if len(counts.index) < 2:
472
+ raise RuntimeError("this does not appear to be test flag")
473
+ good_flags = [n for n, c in counts.items() if c < threshold]
474
+ if len(good_flags) > 0:
475
+ flag_num = min(good_flags)
476
+ else:
477
+ flag_num = min(counts.index)
478
+ logger.writeln(" best guess: free = {}".format(flag_num))
479
+ return flag_num
480
+ # guess_free_number()
481
+
482
+ def as_numpy_arrays(self, labels, omit_nan=True):
483
+ tmp = self.df[labels]
484
+ if omit_nan: tmp = tmp[~tmp.isna().any(axis=1)]
485
+ return [tmp[lab].to_numpy() for lab in labels]
486
+ # as_numpy_arrays()
487
+
488
+ def remove_nonpositive(self, label):
489
+ sel = self.df[label] <= 0
490
+ n_bad = sel.sum()
491
+ if n_bad > 0:
492
+ logger.writeln("Removing {} reflections with {}<=0".format(n_bad, label))
493
+ self.df = self.df[~sel]
494
+ # remove_nonpositive()
495
+
496
+ def remove_systematic_absences(self):
497
+ is_absent = self.sg.operations().systematic_absences(self.miller_array())
498
+ n_absent = numpy.sum(is_absent)
499
+ if n_absent > 0:
500
+ logger.writeln("Removing {} systematic absences".format(n_absent))
501
+ self.df = self.df[~is_absent]
502
+ # remove_systematic_absences()
503
+
504
+ def merge_anomalous(self, labs, newlabs):
505
+ assert len(labs) == 4 # i+,sigi+,i-,sigi- for example
506
+ assert len(newlabs) == 2
507
+ # skipna=True is default, so missing value is handled nicely.
508
+ self.df[newlabs[0]] = self.df[[labs[0], labs[2]]].mean(axis=1)
509
+ self.df[newlabs[1]] = self.df[[labs[1], labs[3]]].pow(2).mean(axis=1).pow(0.5)
510
+
511
+ def as_asu_data(self, label=None, data=None, label_sigma=None):
512
+ if label is None: assert data is not None
513
+ else: assert data is None
514
+
515
+ if label_sigma is not None:
516
+ assert data is None
517
+ assert not numpy.iscomplexobj(self.df[label])
518
+ data = self.df[[label,label_sigma]].to_numpy()
519
+ elif data is None:
520
+ data = self.df[label]
521
+
522
+ if numpy.iscomplexobj(data):
523
+ asutype = gemmi.ComplexAsuData
524
+ elif issubclass(data.dtype.type, numpy.integer):
525
+ asutype = gemmi.IntAsuData
526
+ elif label_sigma is not None:
527
+ asutype = gemmi.ValueSigmaAsuData
528
+ else:
529
+ asutype = gemmi.FloatAsuData
530
+
531
+ return asutype(self.cell, self.sg,
532
+ self.miller_array(), data)
533
+ # as_asu_data()
534
+
535
+ def fft_map(self, label=None, data=None, grid_size=None, sample_rate=3):
536
+ if data is None:
537
+ data = self.df[label].to_numpy()
538
+ return fft_map(self.cell, self.sg, self.miller_array(), data, grid_size, sample_rate)
539
+ # fft_map()
540
+
541
+ def d_eff(self, label):
542
+ # Effective resolution defined using FSC
543
+ fsc = self.binned_df[label]
544
+ a = 0.
545
+ for i_bin, idxes in self.binned():
546
+ a += len(idxes) * fsc[i_bin]
547
+
548
+ fac = (a/len(self.df.index))**(1/3.)
549
+ d_min = self.d_min_max()[0]
550
+ ret = d_min/fac
551
+ return ret
552
+ # d_eff()
553
+
554
+ def hard_sphere_kernel(self, r_ang, grid_size):
555
+ s = 1. / self.d_spacings()
556
+ t = 2 * numpy.pi * s * r_ang
557
+ F_kernel = 3. * (-t * numpy.cos(t) + numpy.sin(t)) / t**3
558
+ knl = self.fft_map(data=F_kernel, grid_size=grid_size)
559
+ knl.array[:] += 1. / knl.unit_cell.volume # F000
560
+ knl.array[:] /= numpy.sum(knl.array)
561
+ return knl
562
+ # hard_sphere_kernel()
563
+
564
+ def scale_k_and_b(self, lab_ref, lab_scaled, debug=False):
565
+ logger.writeln("Determining k, B scales between {} and {}".format(lab_ref, lab_scaled))
566
+ s2 = 1/self.d_spacings().to_numpy()**2
567
+ # determine scales that minimize (|f1|-|f2|*k*e^(-b*s2/4))^2
568
+ f1 = self.df[lab_ref].to_numpy()
569
+ f2 = self.df[lab_scaled].to_numpy()
570
+ if numpy.iscomplexobj(f1): f1 = numpy.abs(f1)
571
+ if numpy.iscomplexobj(f2): f2 = numpy.abs(f2)
572
+
573
+ sel_pos = numpy.logical_and(f1 > 0, f2 > 0) # this filters nan as well
574
+ f1p, f2p, s2p = f1[sel_pos], f2[sel_pos], s2[sel_pos]
575
+
576
+ # 1st step: minimize (log(|f1|)-log(|f2|*e^k*e^(-b*s2/4)))^2 starting with k=1, b=0.
577
+ tmp = numpy.log(f2p) - numpy.log(f1p)
578
+ # g = [dT/dk, dT/db]
579
+ g = numpy.array([2 * numpy.sum(tmp), -numpy.sum(tmp*s2p)/2])
580
+ H = numpy.zeros((2,2))
581
+ H[0,0] = 2*len(f1p)
582
+ H[1,1] = numpy.sum(s2p**2/8)
583
+ H[0,1] = H[1,0] = -numpy.sum(s2p)/2
584
+ x = -numpy.dot(numpy.linalg.inv(H), g)
585
+ k1 = numpy.exp(x[0])
586
+ B1 = x[1]
587
+ logger.writeln(" initial estimate using log: k= {:.2e} B= {:.2e}".format(k1, B1))
588
+ f2tmp = f2 * k1 * numpy.exp(-B1*s2/4)
589
+ r_step0 = r_factor(f1, f2)
590
+ r_step1 = r_factor(f1, f2tmp)
591
+ logger.writeln(" R= {:.4f} (was: {:.4f})".format(r_step1, r_step0))
592
+
593
+ # 2nd step: - minimize (|f1|-|f2|*k*e^(-b*s2/4))^2 iteratively (TODO with regularisation)
594
+
595
+ def grad2(x):
596
+ t = numpy.exp(-x[1]*s2/4)
597
+ tmp = (f1-f2*x[0]*t)*f2*t
598
+ return numpy.array([-2.*numpy.nansum(tmp),
599
+ 0.5*x[0]*numpy.nansum(tmp*s2)])
600
+
601
+ def hess2(x):
602
+ h = numpy.zeros((2, 2))
603
+ t = numpy.exp(-x[1]*s2/4)
604
+ t2 = t**2
605
+ h[0,0] = numpy.nansum(f2**2 * t2) * 2
606
+ h[1,1] = numpy.nansum(f2 * s2**2/4 * (-f1/2*t + f2*x[0]*t2)) * x[0]
607
+ h[1,0] = numpy.nansum(f2 * s2 * (f1/2*t - f2*x[0]*t2))
608
+ h[0,1] = h[1,0]
609
+ return h
610
+
611
+ res = scipy.optimize.minimize(fun=lambda x: numpy.nansum((f1-f2*x[0]*numpy.exp(-x[1]*s2/4))**2),
612
+ jac=grad2,
613
+ hess=hess2,
614
+ method="Newton-CG",
615
+ x0=numpy.array([k1, B1]),
616
+ )
617
+ if debug:
618
+ logger.writeln(str(res))
619
+ k2, B2 = res.x
620
+ f2tmp2 = f2 * k2 * numpy.exp(-B2*s2/4)
621
+ r_step2 = r_factor(f1, f2tmp2)
622
+ logger.writeln(" Least-square estimate: k= {:.2e} B= {:.2e}".format(k2, B2))
623
+ logger.writeln(" R= {:.4f}".format(r_step2))
624
+
625
+ if 0:
626
+ self.setup_binning(40)
627
+ x = []
628
+ y0,y1,y2,y3=[],[],[],[]
629
+ for i_bin, idxes in self.binned():
630
+ bin_d_min = hkldata.binned_df.d_min[i_bin]
631
+ bin_d_max = hkldata.binned_df.d_max[i_bin]
632
+ x.append(1/bin_d_min**2)
633
+ y0.append(numpy.average(f1[idxes]))
634
+ y1.append(numpy.average(f2[idxes]))
635
+ y2.append(numpy.average(f2tmp[idxes]))
636
+ y3.append(numpy.average(f2tmp2[idxes]))
637
+
638
+ import matplotlib.pyplot as plt
639
+ plt.plot(x, y0, label="FC")
640
+ plt.plot(x, y1, label="FP")
641
+ plt.plot(x, y2, label="FP,scaled")
642
+ plt.plot(x, y3, label="FP,scaled2")
643
+ plt.legend()
644
+ plt.show()
645
+
646
+ if r_step2 < r_step1:
647
+ return k2, B2
648
+ else:
649
+ return k1, B1
650
+ # scale_k_and_b()
651
+
652
+ def translation_factor(self, shift):
653
+ if type(shift) != gemmi.Position:
654
+ shift = gemmi.Position(*shift)
655
+ return numpy.exp(2.j*numpy.pi*numpy.dot(self.miller_array(),
656
+ self.cell.fractionalize(shift).tolist()))
657
+ # translation_factor()
658
+ def translate(self, lab, shift):
659
+ # apply phase shift
660
+ assert numpy.iscomplexobj(self.df[lab])
661
+ self.df[lab] *= self.translation_factor(shift)
662
+ # translate()
663
+
664
+ def write_mtz(self, mtz_out, labs, types=None, phase_label_decorator=None,
665
+ exclude_000=True):
666
+ logger.writeln("Writing MTZ file: {}".format(mtz_out))
667
+ if self.sg.ccp4 < 1:
668
+ logger.writeln("WARNING: CCP4-unsupported space group ({})".format(self.sg.xhm()))
669
+ if types is None: types = {}
670
+ if exclude_000:
671
+ df = self.df.query("H!=0 | K!=0 | L!=0")
672
+ else:
673
+ df = self.df
674
+
675
+ ndata = sum(2 if numpy.iscomplexobj(df[lab]) else 1 for lab in labs)
676
+
677
+ data = numpy.empty((len(df.index), ndata + 3), dtype=numpy.float32)
678
+ data[:,:3] = df[["H","K","L"]]
679
+ idx = 3
680
+ for lab in labs:
681
+ if numpy.iscomplexobj(df[lab]):
682
+ data[:,idx] = numpy.abs(df[lab])
683
+ data[:,idx+1] = numpy.angle(df[lab], deg=True)
684
+ idx += 2
685
+ else:
686
+ data[:,idx] = df[lab].to_numpy(numpy.float32, na_value=numpy.nan) # for nullable integers
687
+ idx += 1
688
+
689
+ mtz = gemmi.Mtz()
690
+ mtz.spacegroup = self.sg
691
+ mtz.cell = self.cell
692
+ mtz.add_dataset('HKL_base')
693
+ for label in ['H', 'K', 'L']: mtz.add_column(label, 'H')
694
+
695
+ for lab in labs:
696
+ if numpy.iscomplexobj(df[lab]):
697
+ mtz.add_column(lab, "F")
698
+ if phase_label_decorator is None:
699
+ plab = {"FWT": "PHWT", "DELFWT": "PHDELWT", "FAN":"PHAN"}.get(lab, "PH"+lab)
700
+ else:
701
+ plab = phase_label_decorator(lab)
702
+ mtz.add_column(plab, "P")
703
+ else:
704
+ typ = types.get(lab)
705
+ if typ is None:
706
+ if issubclass(df[lab].dtype.type, numpy.integer):
707
+ typ = "I"
708
+ else:
709
+ typ = "R"
710
+ mtz.add_column(lab, typ)
711
+
712
+ mtz.set_data(data)
713
+ mtz.write_to_file(mtz_out)
714
+ # write_mtz()