servalcat 0.4.88__cp310-cp310-win_amd64.whl → 0.4.100__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

servalcat/utils/hkl.py CHANGED
@@ -7,7 +7,6 @@ Mozilla Public License, version 2.0; see LICENSE.
7
7
  """
8
8
  from __future__ import absolute_import, division, print_function, generators
9
9
  import numpy
10
- import numpy.lib.recfunctions
11
10
  import scipy.optimize
12
11
  import pandas
13
12
  import gemmi
@@ -17,27 +16,28 @@ dtypes64 = dict(i=numpy.int64, u=numpy.uint64, f=numpy.float64, c=numpy.complex1
17
16
  to64 = lambda x: x.astype(dtypes64.get(x.dtype.kind, x.dtype))
18
17
 
19
18
  def r_factor(fo, fc):
20
- if fo.size == 0:
19
+ denom = numpy.nansum(fo)
20
+ if denom == 0:
21
21
  return numpy.nan
22
- return numpy.nansum(numpy.abs(fo-fc)) / numpy.nansum(fo)
22
+ return numpy.nansum(numpy.abs(fo-fc)) / denom
23
23
  def correlation(obs, calc):
24
- if obs.size == 0:
25
- return numpy.nan
26
24
  sel = numpy.isfinite(obs)
25
+ if obs.size == 0 or numpy.all(~sel):
26
+ return numpy.nan
27
27
  return numpy.corrcoef(obs[sel], calc[sel])[0,1]
28
28
 
29
29
  def df_from_asu_data(asu_data, label):
30
- df = pandas.DataFrame(data=asu_data.miller_array,
30
+ df = pandas.DataFrame(data=asu_data.miller_array.astype(numpy.int32),
31
31
  columns=["H","K","L"])
32
- if asu_data.value_array.dtype.names == ('value', 'sigma'):
33
- df[label] = to64(asu_data.value_array["value"])
34
- df["SIG"+label] = to64(asu_data.value_array["sigma"])
32
+ if type(asu_data) is gemmi.ValueSigmaAsuData:
33
+ df[label] = to64(asu_data.value_array[:,0])
34
+ df["SIG"+label] = to64(asu_data.value_array[:,1])
35
35
  else:
36
36
  df[label] = to64(asu_data.value_array)
37
37
  return df
38
38
 
39
39
  def df_from_raw(miller_array, value_array, label):
40
- df = pandas.DataFrame(data=miller_array,
40
+ df = pandas.DataFrame(data=miller_array.astype(numpy.int32),
41
41
  columns=["H","K","L"])
42
42
  df[label] = to64(value_array)
43
43
  return df
@@ -93,7 +93,7 @@ def hkldata_from_mtz(mtz, labels, newlabels=None, require_types=None):
93
93
  if mismatches:
94
94
  raise RuntimeError("MTZ column types mismatch: {}".format(" ".join(mismatches)))
95
95
 
96
- df = pandas.DataFrame(data=numpy.array(mtz, copy=False), columns=mtz.column_labels())
96
+ df = pandas.DataFrame(data=mtz.array, columns=mtz.column_labels())
97
97
  df = df.astype({col: 'int32' for col in col_types if col_types[col] == "H"})
98
98
  df = df.astype({col: 'Int64' for col in col_types if col_types[col] in ("B", "Y", "I")}) # pandas's nullable int
99
99
  for lab in set(mtz.column_labels()).difference(labels+["H","K","L"]):
@@ -176,7 +176,7 @@ def mtz_selected(mtz, columns):
176
176
  dataset_id=col_dict[col].dataset_id, expand_data=False)
177
177
 
178
178
  idxes = [col_idxes[col] for col in columns]
179
- data = numpy.array(mtz, copy=False)[:, idxes]
179
+ data = mtz.array[:, idxes]
180
180
  mtz2.set_data(data)
181
181
  return mtz2
182
182
  # mtz_selected()
@@ -199,6 +199,8 @@ def decide_n_bins(n_per_bin, s_array, power=2, min_bins=1, max_bins=50):
199
199
  def fft_map(cell, sg, miller_array, data, grid_size=None, sample_rate=3):
200
200
  if data is not None:
201
201
  data = data.astype(numpy.complex64) # we may want to keep complex128?
202
+ if type(data) is pandas.core.series.Series:
203
+ data = data.to_numpy()
202
204
  asu = gemmi.ComplexAsuData(cell, sg, miller_array, data)
203
205
  if grid_size is None:
204
206
  ma = asu.transform_f_phi_to_map(sample_rate=sample_rate, exact_size=(0, 0, 0)) # half_l=True
@@ -223,7 +225,7 @@ class HklData:
223
225
  def switch_to_asu(self):
224
226
  # Need to care phases
225
227
  assert not any(numpy.iscomplexobj(self.df[x]) for x in self.df)
226
- hkl = self.miller_array().to_numpy()
228
+ hkl = self.miller_array()
227
229
  self.sg.switch_to_asu(hkl)
228
230
  self.df[["H","K","L"]] = hkl
229
231
  # in some environment type changes to int64 even though hkl's dtype is int32
@@ -265,11 +267,11 @@ class HklData:
265
267
  # merge_asu_data()
266
268
 
267
269
  def miller_array(self):
268
- return self.df[["H","K","L"]]
270
+ return self.df[["H","K","L"]].to_numpy()
269
271
 
270
272
  def s_array(self):
271
273
  hkl = self.miller_array()
272
- return numpy.dot(hkl, self.cell.fractionalization_matrix)
274
+ return numpy.dot(hkl, self.cell.frac.mat.array)
273
275
 
274
276
  def ssq_mat(self):
275
277
  # k_aniso = exp(-s^T B_aniso s / 4)
@@ -288,8 +290,8 @@ class HklData:
288
290
  s2 = 1 / self.d_spacings()**2
289
291
  return numpy.exp(-b_iso / 4 * s2)
290
292
  if b_cart is not None:
291
- b_star = b_cart.transformed_by(self.cell.fractionalization_matrix)
292
- return numpy.exp(-b_star.r_u_r(self.miller_array().to_numpy()) / 4)
293
+ b_star = b_cart.transformed_by(self.cell.frac.mat)
294
+ return numpy.exp(-b_star.r_u_r(self.miller_array()) / 4)
293
295
 
294
296
  def calc_d(self):
295
297
  self.df["d"] = self.cell.calculate_d_array(self.miller_array())
@@ -314,8 +316,10 @@ class HklData:
314
316
  self.df.sort_values("d", ascending=ascending, inplace=True)
315
317
  # sort_by_resolution()
316
318
 
317
- def d_min_max(self):
319
+ def d_min_max(self, labs=None):
318
320
  d = self.d_spacings()
321
+ if labs:
322
+ d = d[~self.df[labs].isna().any(axis=1)]
319
323
  return numpy.min(d), numpy.max(d)
320
324
  # d_min_max()
321
325
 
@@ -489,6 +493,31 @@ class HklData:
489
493
  self.df = self.df[~sel]
490
494
  # remove_nonpositive()
491
495
 
496
+ def mask_invalid_obs_values(self, labels):
497
+ assert 1 < len(labels) < 6
498
+ assert labels[1].startswith("SIG")
499
+ def do_mask(label, target_labels):
500
+ sel = self.df[label] <= 0
501
+ n_bad = sel.sum()
502
+ if n_bad > 0:
503
+ logger.writeln("Removing {} reflections with {}<=0".format(n_bad, label))
504
+ self.df.loc[sel, target_labels] = numpy.nan
505
+ # If any element within target_labels is non-finite, mask all elements
506
+ self.df.loc[(~numpy.isfinite(self.df[target_labels])).any(axis=1), target_labels] = numpy.nan
507
+
508
+ if len(labels) < 4: # F/SIGF or I/SIGI
509
+ if labels[0].startswith("F"):
510
+ do_mask(labels[0], labels[:2]) # bad F
511
+ do_mask(labels[1], labels[:2]) # bad sigma
512
+ else: # I(+)/SIGI(+)/I(-)/SIGI(-) or F...
513
+ assert labels[3].startswith("SIG")
514
+ if labels[0].startswith("F"):
515
+ do_mask(labels[0], labels[:2]) # bad F+
516
+ do_mask(labels[2], labels[2:4]) # bad F-
517
+ do_mask(labels[1], labels[:2]) # bad sigma+
518
+ do_mask(labels[3], labels[2:4]) # bad sigma-
519
+ # mask_invalid_obs_values()
520
+
492
521
  def remove_systematic_absences(self):
493
522
  is_absent = self.sg.operations().systematic_absences(self.miller_array())
494
523
  n_absent = numpy.sum(is_absent)
@@ -497,12 +526,22 @@ class HklData:
497
526
  self.df = self.df[~is_absent]
498
527
  # remove_systematic_absences()
499
528
 
500
- def merge_anomalous(self, labs, newlabs):
529
+ def merge_anomalous(self, labs, newlabs, method="weighted"):
530
+ assert method in ("weighted", "simple")
501
531
  assert len(labs) == 4 # i+,sigi+,i-,sigi- for example
502
532
  assert len(newlabs) == 2
503
- # skipna=True is default, so missing value is handled nicely.
504
- self.df[newlabs[0]] = self.df[[labs[0], labs[2]]].mean(axis=1)
505
- self.df[newlabs[1]] = self.df[[labs[1], labs[3]]].pow(2).mean(axis=1).pow(0.5)
533
+ if method == "simple":
534
+ # skipna=True is default, so missing value is handled nicely.
535
+ self.df[newlabs[0]] = self.df[[labs[0], labs[2]]].mean(axis=1)
536
+ self.df[newlabs[1]] = self.df[[labs[1], labs[3]]].pow(2).mean(axis=1).pow(0.5)
537
+ else:
538
+ obs = self.df[[labs[0], labs[2]]].to_numpy()
539
+ weights = 1. / self.df[[labs[1], labs[3]]].to_numpy()**2
540
+ sum_w = numpy.nansum(weights, axis=1)
541
+ sum_w[sum_w == 0] = numpy.nan # mask when both are nan
542
+ self.df[newlabs[0]] = numpy.nansum(obs * weights, axis=1) / sum_w
543
+ self.df[newlabs[1]] = numpy.sqrt(1. / sum_w)
544
+ # merge_anomalous()
506
545
 
507
546
  def as_asu_data(self, label=None, data=None, label_sigma=None):
508
547
  if label is None: assert data is not None
@@ -511,9 +550,7 @@ class HklData:
511
550
  if label_sigma is not None:
512
551
  assert data is None
513
552
  assert not numpy.iscomplexobj(self.df[label])
514
- sigma = self.df[label_sigma]
515
- data = numpy.lib.recfunctions.unstructured_to_structured(self.df[[label,label_sigma]].to_numpy(),
516
- numpy.dtype([("value", numpy.float32), ("sigma", numpy.float32)]))
553
+ data = self.df[[label,label_sigma]].to_numpy()
517
554
  elif data is None:
518
555
  data = self.df[label]
519
556
 
@@ -532,7 +569,7 @@ class HklData:
532
569
 
533
570
  def fft_map(self, label=None, data=None, grid_size=None, sample_rate=3):
534
571
  if data is None:
535
- data = self.df[label]
572
+ data = self.df[label].to_numpy()
536
573
  return fft_map(self.cell, self.sg, self.miller_array(), data, grid_size, sample_rate)
537
574
  # fft_map()
538
575
 
servalcat/utils/logger.py CHANGED
@@ -18,12 +18,15 @@ class Logger(object):
18
18
  def __init__(self, file_out=None, append=True):
19
19
  self.ofs = None
20
20
  self.stopped = False
21
+ self.prefix = ""
21
22
  if file_out:
22
23
  self.set_file(file_out, append)
23
24
  # __init__()
24
25
  def stop_logging(self): self.stopped = True
25
26
  def start_logging(self): self.stopped = False
26
-
27
+ def set_prefix(self, p): self.prefix = p
28
+ def clear_prefix(self): self.prefix = ""
29
+
27
30
  def set_file(self, file_out, append=True):
28
31
  try:
29
32
  self.ofs = open(file_out, "a" if append else "w")
@@ -33,6 +36,8 @@ class Logger(object):
33
36
 
34
37
  def write(self, l, end="", flush=True, fs=None, print_fs=sys.stdout):
35
38
  if self.stopped: return
39
+ if self.prefix:
40
+ l = "".join(self.prefix + x for x in l.splitlines(keepends=True))
36
41
  print(l, end=end, file=print_fs, flush=flush)
37
42
  for f in (self.ofs, fs):
38
43
  if f is not None:
@@ -69,6 +74,25 @@ close = _logger.close
69
74
  flush = _logger.flush
70
75
  stop = _logger.stop_logging
71
76
  start = _logger.start_logging
77
+ set_prefix = _logger.set_prefix
78
+ clear_prefix = _logger.clear_prefix
79
+
80
+ def with_prefix(prefix):
81
+ class WithPrefix(object): # should keep original prefix and restore?
82
+ def __enter__(self):
83
+ _logger.set_prefix(prefix)
84
+ return _logger
85
+ def __exit__(self, exc_type, exc_val, exc_tb):
86
+ _logger.clear_prefix()
87
+ return WithPrefix()
88
+
89
+ def silent():
90
+ class Silent(object):
91
+ def write(self, *args, **kwargs):
92
+ pass
93
+ def flush(self):
94
+ pass
95
+ return Silent()
72
96
 
73
97
  def dependency_versions():
74
98
  import gemmi
servalcat/utils/maps.py CHANGED
@@ -268,9 +268,9 @@ def optimize_peak(grid, ini_pos):
268
268
  logger.writeln("Finding peak using interpolation..")
269
269
  x = grid.unit_cell.fractionalize(ini_pos)
270
270
  logger.writeln(" x0: [{}, {}, {}]".format(*x.tolist()))
271
- logger.writeln(" f0: {}".format(-grid.tricubic_interpolation(x)))
271
+ logger.writeln(" f0: {}".format(-grid.interpolate_value(x, order=3)))
272
272
 
273
- res = scipy.optimize.minimize(fun=lambda x:-grid.tricubic_interpolation(gemmi.Fractional(*x)),
273
+ res = scipy.optimize.minimize(fun=lambda x:-grid.interpolate_value(gemmi.Fractional(*x), order=3),
274
274
  x0=x.tolist(),
275
275
  jac=lambda x:-numpy.array(grid.tricubic_interpolation_der(gemmi.Fractional(*x))[1:])
276
276
  )
servalcat/utils/model.py CHANGED
@@ -73,11 +73,12 @@ def remove_charge(sts):
73
73
  def check_atomsf(sts, source, mott_bethe=True):
74
74
  assert source in ("xray", "electron", "neutron")
75
75
  if source != "electron": mott_bethe = False
76
- logger.writeln("Atomic scattering factors for {}".format("electron (Mott-Bethe)" if mott_bethe else source))
76
+ logger.writeln("Atomic scattering factors for {}".format("xray (use Mott-Bethe to convert to electrons)" if mott_bethe else source))
77
77
  if source != "xray" and not mott_bethe:
78
78
  logger.writeln(" Note that charges will be ignored")
79
79
  el_charges = {(cra.atom.element, cra.atom.charge) for st in sts for cra in st[0].all()}
80
80
  elems = {x[0] for x in el_charges}
81
+ tmp = {}
81
82
  if source == "xray" or mott_bethe:
82
83
  shown = set()
83
84
  for el, charge in sorted(el_charges, key=lambda x: (x[0].atomic_number, x[1])):
@@ -88,12 +89,16 @@ def check_atomsf(sts, source, mott_bethe=True):
88
89
  charge = 0
89
90
  if (el, charge) in shown: continue
90
91
  label = el.name if charge == 0 else "{}{:+}".format(el.name, charge)
91
- logger.writeln(" {} {}".format(label, tuple(sf.get_coefs())))
92
92
  shown.add((el, charge))
93
+ tmp[label] = {**{f"{k}{i+1}": x for k in ("a", "b") for i, x in enumerate(getattr(sf, k))}, "c": sf.c}
93
94
  else:
94
95
  for el in sorted(elems, key=lambda x: x.atomic_number):
95
- sf = el.c4322 if source == "electron" else el.neutron92
96
- logger.writeln(" {} {}".format(el.name, tuple(sf.get_coefs())))
96
+ if source == "electron":
97
+ tmp[el.name] = {f"{k}{i+1}": x for k in ("a", "b") for i, x in enumerate(getattr(el.c4322, k))}
98
+ else:
99
+ tmp[el.name] = {"a": el.neutron92.get_coefs()[0]}
100
+ with logger.with_prefix(" "):
101
+ logger.writeln(pandas.DataFrame(tmp).T.to_string())
97
102
  logger.writeln("")
98
103
  # check_atomsf()
99
104
 
@@ -347,8 +352,8 @@ def translate_into_box(st, origin=None, apply_shift=True):
347
352
  if origin is None: origin = gemmi.Position(0,0,0)
348
353
 
349
354
  # apply unit cell translations to put model into a box (unit cell)
350
- omat = numpy.array(st.cell.orthogonalization_matrix)
351
- fmat = numpy.array(st.cell.fractionalization_matrix).transpose()
355
+ omat = st.cell.orth.mat.array
356
+ fmat = st.cell.frac.mat.array.transpose()
352
357
  com = numpy.array((st[0].calculate_center_of_mass() - origin).tolist())
353
358
  shift = sum([omat[:,i]*numpy.floor(1-numpy.dot(com, fmat[:,i])) for i in range(3)])
354
359
  tr = gemmi.Transform(gemmi.Mat33(), gemmi.Vec3(*shift))
@@ -443,7 +448,7 @@ def find_special_positions(st, special_pos_threshold=0.2, fix_occ=True, fix_pos=
443
448
  logger.writeln(" correcting aniso= {}".format(tostr(atom.aniso.elements_pdb())))
444
449
  logger.writeln(" aniso_viol= {}".format(tostr(diff)))
445
450
 
446
- mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat for i in images]
451
+ mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat.array for i in images]
447
452
  mat_total = (numpy.identity(3) + sum(numpy.array(m) for m in mats)) / n_images
448
453
  mat_total_aniso = (numpy.identity(6) + sum(mat33_as66(m.tolist()) for m in mats)) / n_images
449
454
  mat_total_aniso = numpy.linalg.pinv(mat_total_aniso)
@@ -553,7 +558,9 @@ def reset_adp(model, bfactor=None, adp_mode="iso"):
553
558
  if adp_mode == "iso" or (adp_mode == "fix" and bfactor is not None):
554
559
  cra.atom.aniso = gemmi.SMat33f(0,0,0,0,0,0)
555
560
  elif adp_mode == "aniso":
556
- if not cra.atom.aniso.nonzero() or bfactor is not None:
561
+ if cra.atom.aniso.nonzero() and bfactor is None: # just in case
562
+ cra.atom.b_iso = numpy.mean(cra.atom.aniso.calculate_eigenvalues()) * u_to_b
563
+ else:
557
564
  u = cra.atom.b_iso * b_to_u
558
565
  cra.atom.aniso = gemmi.SMat33f(u, u, u, 0, 0, 0)
559
566
  # reset_adp()
@@ -630,7 +637,7 @@ def to_dataframe(st):
630
637
  for cra in m.all():
631
638
  c,r,a = cra.chain, cra.residue, cra.atom
632
639
  # TODO need support r.het_flag, r.flag, a.calc_flag, a.flag, a.serial?
633
- app("model", m.name)
640
+ app("model", m.num)
634
641
  app("chain", c.name)
635
642
  app("resn", r.name)
636
643
  app("subchain", r.subchain)
@@ -665,8 +672,8 @@ def from_dataframe(df, st=None): # Slow!
665
672
  for i in range(len(st)):
666
673
  del st[0]
667
674
 
668
- for m_name, dm in df.groupby("model"):
669
- st.add_model(gemmi.Model(m_name))
675
+ for m_num, dm in df.groupby("model"):
676
+ st.add_model(gemmi.Model(m_num))
670
677
  m = st[-1]
671
678
  for c_name, dc in dm.groupby("chain"):
672
679
  m.add_chain(gemmi.Chain(c_name))
@@ -704,7 +711,7 @@ def from_dataframe(df, st=None): # Slow!
704
711
 
705
712
  def st_from_positions(positions, bs=None, qs=None):
706
713
  st = gemmi.Structure()
707
- st.add_model(gemmi.Model("1"))
714
+ st.add_model(gemmi.Model(1))
708
715
  st[0].add_chain(gemmi.Chain("A"))
709
716
  c = st[0][0]
710
717
  if bs is None: bs = (0. for _ in range(len(positions)))
@@ -727,7 +734,7 @@ def st_from_positions(positions, bs=None, qs=None):
727
734
 
728
735
  def invert_model(st):
729
736
  # invert x-axis
730
- A = numpy.array(st.cell.orthogonalization_matrix.tolist())
737
+ A = st.cell.orth.mat.array
731
738
  center = numpy.sum(A,axis=1) / 2
732
739
  center = gemmi.Vec3(*center)
733
740
  mat = gemmi.Mat33([[-1,0,0],[0,1,0],[0,0,1]])
@@ -742,14 +749,14 @@ def cx_to_mx(ss): #SmallStructure to Structure
742
749
  st = gemmi.Structure()
743
750
  st.spacegroup_hm = ss.spacegroup.xhm()
744
751
  st.cell = ss.cell
745
- st.add_model(gemmi.Model("1"))
752
+ st.add_model(gemmi.Model(1))
746
753
  st[-1].add_chain(gemmi.Chain("A"))
747
754
  st[-1][-1].add_residue(gemmi.Residue())
748
755
  st[-1][-1][-1].seqid.num = 1
749
756
  st[-1][-1][-1].name = "00"
750
757
 
751
758
  ruc = ss.cell.reciprocal()
752
- cif2cart = ss.cell.orthogonalization_matrix.multiply_by_diagonal(gemmi.Vec3(ruc.a, ruc.b, ruc.c))
759
+ cif2cart = ss.cell.orth.mat.multiply_by_diagonal(gemmi.Vec3(ruc.a, ruc.b, ruc.c))
753
760
  as_smat33f = lambda x: gemmi.SMat33f(x.u11, x.u22, x.u33, x.u12, x.u13, x.u23)
754
761
 
755
762
  for site in ss.sites:
servalcat/utils/refmac.py CHANGED
@@ -22,7 +22,7 @@ from servalcat.utils import fileio
22
22
 
23
23
  re_version = re.compile("#.* Refmac *version ([^ ]+) ")
24
24
  re_error = re.compile('(warn|error *[:]|error *==|^error)', re.IGNORECASE)
25
- re_outlier_start = re.compile("\*\*\*\*.*outliers")
25
+ re_outlier_start = re.compile(r"\*\*\*\*.*outliers")
26
26
 
27
27
  def check_version(exe="refmac5"):
28
28
  ver = ()
@@ -10,7 +10,6 @@ from servalcat.utils import logger
10
10
  from servalcat.refmac import refmac_keywords
11
11
  from servalcat import ext
12
12
  import os
13
- import io
14
13
  import gemmi
15
14
  import string
16
15
  import random
@@ -87,14 +86,13 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
87
86
  if cif_files is None:
88
87
  cif_files = []
89
88
 
89
+ monlib = gemmi.MonLib()
90
90
  if monomer_dir and not ignore_monomer_dir:
91
91
  if not os.path.isdir(monomer_dir):
92
92
  raise RuntimeError("not a directory: {}".format(monomer_dir))
93
93
 
94
94
  logger.writeln("Reading monomers from {}".format(monomer_dir))
95
- monlib = gemmi.read_monomer_lib(monomer_dir, resnames, ignore_missing=True)
96
- else:
97
- monlib = gemmi.MonLib()
95
+ monlib.read_monomer_lib(monomer_dir, resnames, logger)
98
96
 
99
97
  for f in cif_files:
100
98
  logger.writeln("Reading monomer: {}".format(f))
@@ -109,8 +107,8 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
109
107
 
110
108
  # Check if bond length values are included
111
109
  # This is to fail if cif file is e.g. from PDB website
112
- if len(atom_id_list) > 1 and not b.find_values("_chem_comp_bond.value_dist"):
113
- raise RuntimeError("{} does not contain bond length value for {}. You need to generate restraints (e.g. using acedrg).".format(f, name))
110
+ if b.find_values("_chem_comp_bond.comp_id") and not b.find_values("_chem_comp_bond.value_dist"):
111
+ raise RuntimeError(f"Bond length information for {name} is missing from {f}. Please generate restraints using a tool like acedrg.")
114
112
 
115
113
  for row in b.find("_chem_link.", ["id"]):
116
114
  link_id = row.str(0)
@@ -148,7 +146,7 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
148
146
  logger.writeln(" it is strongly recommended to generate them using AceDRG.")
149
147
 
150
148
  if update_old_atom_names:
151
- logger.write(monlib.update_old_atom_names(st))
149
+ monlib.update_old_atom_names(st, logger)
152
150
 
153
151
  if params:
154
152
  update_torsions(monlib, params.get("restr", {}).get("torsion_include", {}))
@@ -158,6 +156,7 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
158
156
 
159
157
  def fix_elements_in_model(monlib, st):
160
158
  monlib_els = {m: {a.id: a.el for a in monlib.monomers[m].atoms} for m in monlib.monomers}
159
+ lookup = {x.atom: x for x in st[0].all()}
161
160
  for chain in st[0]:
162
161
  for res in chain:
163
162
  d = monlib_els.get(res.name)
@@ -167,7 +166,7 @@ def fix_elements_in_model(monlib, st):
167
166
  continue
168
167
  el = d[at.name]
169
168
  if at.element != el:
170
- logger.writeln(f"WARNING: correcting element of {st[0].get_cra(at)} to {el.name}")
169
+ logger.writeln(f"WARNING: correcting element of {lookup[at]} to {el.name}")
171
170
  at.element = el
172
171
  # correct_elements_in_model()
173
172
 
@@ -334,10 +333,9 @@ def prepare_topology(st, monlib, h_change, ignore_unknown_links=False, raise_err
334
333
  keywords = []
335
334
  # these checks can be done after sorting links
336
335
  logger.writeln("Creating restraints..")
337
- sio = io.StringIO()
338
- topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=sio, reorder=False,
339
- ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
340
- for l in sio.getvalue().splitlines(): logger.writeln(" " + l)
336
+ with logger.with_prefix(" "):
337
+ topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=logger, reorder=False,
338
+ ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
341
339
  unknown_cc = set()
342
340
  link_related = set()
343
341
  nan_hydr = set()
@@ -665,34 +663,35 @@ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
665
663
  q = [x.name for x in pols[i][1]]
666
664
  for j in range(i+1, len(pols)):
667
665
  al = gemmi.align_sequence_to_polymer(q, pols[j][1], pt, scoring)
668
- if 0: # debug
669
- wrap_width = 100
670
- logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
671
- logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
672
- logger.writeln(f"match_count: {al.match_count}")
673
- s1 = gemmi.one_letter_code(q)
674
- p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
675
- p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
676
- for k in range(0, len(p1), wrap_width):
677
- logger.writeln(" seq. {}".format(p1[k:k+wrap_width]))
678
- logger.writeln(" {}".format(al.match_string[k:k+wrap_width]))
679
- logger.writeln(" model {}\n".format(p2[k:k+wrap_width]))
680
666
  if al.match_count < min_nalign: continue
681
667
  su = gemmi.calculate_superposition(pols[i][1], pols[j][1], pt, gemmi.SupSelect.All)
682
- obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1])
668
+ obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1], pols[i][0].name, pols[j][0].name)
683
669
  obj.calculate_local_rms(rms_loc_nlen)
684
670
  if len(obj.local_rms) == 0 or numpy.all(numpy.isnan(obj.local_rms)):
685
671
  continue
686
672
  ave_local_rms = numpy.nanmean(obj.local_rms)
687
673
  if ave_local_rms > max_rms_loc: continue
688
674
  ncslist.ncss.append(obj)
689
- al_res.append({"chain_1": "{} ({}..{})".format(pols[i][0].name, pols[i][1][0].seqid, pols[i][1][-1].seqid),
690
- "chain_2": "{} ({}..{})".format(pols[j][0].name, pols[j][1][0].seqid, pols[j][1][-1].seqid),
675
+ al_res.append({"chain_1": "{} ({}..{})".format(obj.chains[0], obj.seqids[0][0], obj.seqids[-1][0]),
676
+ "chain_2": "{} ({}..{})".format(obj.chains[1], obj.seqids[0][1], obj.seqids[-1][1]),
691
677
  "aligned": al.match_count,
692
678
  "identity": al.calculate_identity(1),
693
679
  "rms": su.rmsd,
694
680
  "ave(rmsloc)": ave_local_rms,
695
681
  })
682
+ if al_res[-1]["identity"] < 100:
683
+ wrap_width = 100
684
+ logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
685
+ logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
686
+ logger.writeln(f"match_count: {al.match_count} (identity: {al_res[-1]['identity']:.2f})")
687
+ s1 = gemmi.one_letter_code(q)
688
+ p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
689
+ p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
690
+ for k in range(0, len(p1), wrap_width):
691
+ logger.writeln(" seq1 {}".format(p1[k:k+wrap_width]))
692
+ logger.writeln(" {}".format(al.match_string[k:k+wrap_width]))
693
+ logger.writeln(" seq2 {}\n".format(p2[k:k+wrap_width]))
694
+
696
695
  ncslist.set_pairs()
697
696
  df = pandas.DataFrame(al_res)
698
697
  df.index += 1
@@ -755,7 +754,7 @@ class MetalCoordination:
755
754
  logger.writeln(" (from ener_lib)")
756
755
  else:
757
756
  logger.writeln(" ".join("{:.4f} ({} coord)".format(x["median"], x["coord"]) for x in vals))
758
- ideals[el] = [(x["median"], x["mad"]) for x in vals if x["mad"] > 0]
757
+ ideals[el] = [(x["median"], max(0.02, x["mad"]*1.5)) for x in vals if x["mad"] > 0]
759
758
  logger.writeln("")
760
759
  for i, am in enumerate(coords[metal]):
761
760
  logger.writeln(" site {}: {}".format(i+1, lookup[am]))
@@ -55,7 +55,7 @@ def ncsops_from_args(args, cell, map_and_start=None, st=None, helical_min_n=None
55
55
  start_xyz = numpy.zeros(3)
56
56
 
57
57
  if args.center is None:
58
- A = numpy.array(cell.orthogonalization_matrix.tolist())
58
+ A = cell.orth.mat.array
59
59
  center = numpy.sum(A, axis=1) / 2 #+ start_xyz
60
60
  logger.writeln("Center: {}".format(center))
61
61
  else:
@@ -156,7 +156,7 @@ def show_operators_axis_angle(ops):
156
156
  def show_ncs_operators_axis_angle(ops):
157
157
  # ops: List of gemmi.NcsOp
158
158
  for i, op in enumerate(ops):
159
- op2 = numpy.array(op.tr.mat.tolist())
159
+ op2 = op.tr.mat.array
160
160
  ax, ang = generate_operators.Rotation2AxisAngle_general(op2)
161
161
  axlab = "[{: .4f}, {: .4f}, {: .4f}]".format(*ax)
162
162
  trlab = "[{: 9.4f}, {: 9.4f}, {: 9.4f}]".format(*op.tr.vec.tolist())
@@ -210,7 +210,7 @@ def generate_helical_operators(start_xyz, center, axsym, deltaphi, deltaz, axis1
210
210
 
211
211
  def make_NcsOps_from_matrices(matrices, cell=None, center=None):
212
212
  if center is None:
213
- A = numpy.array(cell.orthogonalization_matrix.tolist())
213
+ A = cell.orth.mat.array
214
214
  center = numpy.sum(A,axis=1) / 2
215
215
 
216
216
  center = gemmi.Vec3(*center)
@@ -225,9 +225,9 @@ def make_NcsOps_from_matrices(matrices, cell=None, center=None):
225
225
  # make_NcsOps_from_matrices()
226
226
 
227
227
  def find_center_of_origin(mat, vec): # may not be unique.
228
- tmp = numpy.identity(3) - numpy.array(mat)
228
+ tmp = numpy.identity(3) - numpy.array(mat.array)
229
229
  ret = numpy.dot(numpy.linalg.pinv(tmp), vec.tolist())
230
- resid = vec.tolist() - (numpy.dot(mat, -ret) + ret)
230
+ resid = vec.tolist() - (numpy.dot(mat.array, -ret) + ret)
231
231
  return gemmi.Vec3(*ret), gemmi.Vec3(*resid)
232
232
  # find_center_of_origin()
233
233
 
@@ -101,14 +101,14 @@ def determine_Sigma_and_aniso(hkldata):
101
101
  S = hkldata.binned_df.loc[i_bin, "S"]
102
102
  f0 = numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
103
103
  S * hkldata.df.epsilon.to_numpy()[idxes],
104
- 0, hkldata.df.centric.to_numpy()[idxes]+1))
104
+ numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
105
105
  shift = numpy.exp(ll_shift_bin_S(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
106
106
  S, hkldata.df.centric.to_numpy()[idxes]+1, hkldata.df.epsilon.to_numpy()[idxes]))
107
107
  for k in range(3):
108
108
  ss = shift**(1. / 2**k)
109
109
  f1 = numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
110
110
  S * ss * hkldata.df.epsilon.to_numpy()[idxes],
111
- 0, hkldata.df.centric.to_numpy()[idxes]+1))
111
+ numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
112
112
  #logger.writeln("bin {:3d} f0 = {:.3e} shift = {:.3e} df = {:.3e}".format(i_bin, f0, ss, f1 - f0))
113
113
  if f1 < f0:
114
114
  hkldata.binned_df.loc[i_bin, "S"] = S * ss
@@ -145,7 +145,7 @@ def ll_all_B(x, ssqmat, hkldata, adpdirs):
145
145
  for i_bin, idxes in hkldata.binned():
146
146
  ret += numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
147
147
  hkldata.binned_df.S[i_bin] * hkldata.df.epsilon.to_numpy()[idxes],
148
- 0, hkldata.df.centric.to_numpy()[idxes]+1))
148
+ numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
149
149
  return ret
150
150
 
151
151
  def ll_shift_bin_S(Io, sigIo, k_ani, S, c, eps, exp_trans=True):
@@ -176,10 +176,12 @@ def ll_shift_B(x, ssqmat, hkldata, adpdirs):
176
176
 
177
177
  def expected_F_from_int(Io, sigo, k_ani, eps, c, S):
178
178
  to = Io / sigo - sigo / c / k_ani**2 / S / eps
179
+ tf = numpy.zeros(Io.size)
180
+ sig1 = numpy.ones(Io.size)
179
181
  k_num = numpy.where(c == 1, 0.5, 0.)
180
- F = numpy.sqrt(sigo) * ext.integ_J_ratio(k_num, k_num - 0.5, False, to, 0., 1., c,
182
+ F = numpy.sqrt(sigo) * ext.integ_J_ratio(k_num, k_num - 0.5, False, to, tf, sig1, c,
181
183
  integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
182
- Fsq = sigo * ext.integ_J_ratio(k_num + 0.5, k_num - 0.5, False, to, 0., 1., c,
184
+ Fsq = sigo * ext.integ_J_ratio(k_num + 0.5, k_num - 0.5, False, to, tf, sig1, c,
183
185
  integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
184
186
  varF = Fsq - F**2
185
187
  return F, numpy.sqrt(varF)