servalcat 0.4.72__cp312-cp312-macosx_11_0_arm64.whl → 0.4.99__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

servalcat/utils/refmac.py CHANGED
@@ -22,7 +22,7 @@ from servalcat.utils import fileio
22
22
 
23
23
  re_version = re.compile("#.* Refmac *version ([^ ]+) ")
24
24
  re_error = re.compile('(warn|error *[:]|error *==|^error)', re.IGNORECASE)
25
- re_outlier_start = re.compile("\*\*\*\*.*outliers")
25
+ re_outlier_start = re.compile(r"\*\*\*\*.*outliers")
26
26
 
27
27
  def check_version(exe="refmac5"):
28
28
  ver = ()
@@ -148,6 +148,8 @@ class FixForRefmac:
148
148
  self.MAXNUM = 9999
149
149
  self.fixes = []
150
150
  self.resn_old_new = []
151
+ self.res_labels = []
152
+ self.entities = None
151
153
 
152
154
  def fix_before_topology(self, st, topo, fix_microheterogeneity=True, fix_resimax=True, fix_nonpolymer=True, add_gaps=False):
153
155
  self.chainids = set(chain.name for chain in st[0])
@@ -385,6 +387,14 @@ class FixForRefmac:
385
387
  st.shorten_ccd_codes()
386
388
  self.resn_old_new = [x for x in st.shortened_ccd_codes]
387
389
 
390
+ def store_res_labels(self, st):
391
+ self.res_labels = []
392
+ self.entities = gemmi.EntityList(st.entities)
393
+ for chain in st[0]:
394
+ self.res_labels.append([])
395
+ for res in chain:
396
+ self.res_labels[-1].append((res.subchain, res.entity_id, res.label_seq))
397
+
388
398
  def fix_model(self, st, changedict):
389
399
  chain_newid = set()
390
400
  for chain in st[0]:
@@ -411,6 +421,15 @@ class FixForRefmac:
411
421
  st.shortened_ccd_codes = self.resn_old_new
412
422
  st.restore_full_ccd_codes()
413
423
 
424
+ if self.res_labels:
425
+ st.entities = self.entities
426
+ #print(f"debug {len(self.res_labels)}")
427
+ #print(f"debug {[x.name for x in st[0]]}")
428
+ assert len(self.res_labels) == len(st[0])
429
+ for ic, chain in enumerate(st[0]):
430
+ assert len(self.res_labels[ic]) == len(chain)
431
+ for ir, res in enumerate(chain):
432
+ res.subchain, res.entity_id, res.label_seq = self.res_labels[ic][ir]
414
433
 
415
434
  class Refmac:
416
435
  def __init__(self, **kwargs):
@@ -10,7 +10,6 @@ from servalcat.utils import logger
10
10
  from servalcat.refmac import refmac_keywords
11
11
  from servalcat import ext
12
12
  import os
13
- import io
14
13
  import gemmi
15
14
  import string
16
15
  import random
@@ -87,14 +86,13 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
87
86
  if cif_files is None:
88
87
  cif_files = []
89
88
 
89
+ monlib = gemmi.MonLib()
90
90
  if monomer_dir and not ignore_monomer_dir:
91
91
  if not os.path.isdir(monomer_dir):
92
92
  raise RuntimeError("not a directory: {}".format(monomer_dir))
93
93
 
94
94
  logger.writeln("Reading monomers from {}".format(monomer_dir))
95
- monlib = gemmi.read_monomer_lib(monomer_dir, resnames, ignore_missing=True)
96
- else:
97
- monlib = gemmi.MonLib()
95
+ monlib.read_monomer_lib(monomer_dir, resnames, logger)
98
96
 
99
97
  for f in cif_files:
100
98
  logger.writeln("Reading monomer: {}".format(f))
@@ -109,8 +107,8 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
109
107
 
110
108
  # Check if bond length values are included
111
109
  # This is to fail if cif file is e.g. from PDB website
112
- if len(atom_id_list) > 1 and not b.find_values("_chem_comp_bond.value_dist"):
113
- raise RuntimeError("{} does not contain bond length value for {}. You need to generate restraints (e.g. using acedrg).".format(f, name))
110
+ if b.find_values("_chem_comp_bond.comp_id") and not b.find_values("_chem_comp_bond.value_dist"):
111
+ raise RuntimeError(f"Bond length information for {name} is missing from {f}. Please generate restraints using a tool like acedrg.")
114
112
 
115
113
  for row in b.find("_chem_link.", ["id"]):
116
114
  link_id = row.str(0)
@@ -148,7 +146,7 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
148
146
  logger.writeln(" it is strongly recommended to generate them using AceDRG.")
149
147
 
150
148
  if update_old_atom_names:
151
- logger.write(monlib.update_old_atom_names(st))
149
+ monlib.update_old_atom_names(st, logger)
152
150
 
153
151
  if params:
154
152
  update_torsions(monlib, params.get("restr", {}).get("torsion_include", {}))
@@ -158,6 +156,7 @@ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns
158
156
 
159
157
  def fix_elements_in_model(monlib, st):
160
158
  monlib_els = {m: {a.id: a.el for a in monlib.monomers[m].atoms} for m in monlib.monomers}
159
+ lookup = {x.atom: x for x in st[0].all()}
161
160
  for chain in st[0]:
162
161
  for res in chain:
163
162
  d = monlib_els.get(res.name)
@@ -167,7 +166,7 @@ def fix_elements_in_model(monlib, st):
167
166
  continue
168
167
  el = d[at.name]
169
168
  if at.element != el:
170
- logger.writeln(f"WARNING: correcting element of {st[0].get_cra(at)} to {el.name}")
169
+ logger.writeln(f"WARNING: correcting element of {lookup[at]} to {el.name}")
171
170
  at.element = el
172
171
  # correct_elements_in_model()
173
172
 
@@ -334,10 +333,9 @@ def prepare_topology(st, monlib, h_change, ignore_unknown_links=False, raise_err
334
333
  keywords = []
335
334
  # these checks can be done after sorting links
336
335
  logger.writeln("Creating restraints..")
337
- sio = io.StringIO()
338
- topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=sio, reorder=False,
339
- ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
340
- for l in sio.getvalue().splitlines(): logger.writeln(" " + l)
336
+ with logger.with_prefix(" "):
337
+ topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=logger, reorder=False,
338
+ ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
341
339
  unknown_cc = set()
342
340
  link_related = set()
343
341
  nan_hydr = set()
@@ -521,7 +519,7 @@ def find_and_fix_links(st, monlib, bond_margin=1.3, find_metal_links=True, add_f
521
519
  cra2.residue, cra2.atom.name, cra2.atom.altloc)
522
520
  if link:
523
521
  con.link_id = link.id
524
- elif find_metal_links and con.type == gemmi.ConnectionType.MetalC:
522
+ elif con.type == gemmi.ConnectionType.MetalC:
525
523
  logger.writeln(" Metal link will be added: {} dist= {:.2f}".format(atoms_str, dist))
526
524
  if cra2.atom.element.is_metal:
527
525
  inv = True # make metal first
@@ -587,6 +585,8 @@ def find_and_fix_links(st, monlib, bond_margin=1.3, find_metal_links=True, add_f
587
585
  logger.writeln(" {}Metal link found: {} dist= {:.2f} max_ideal= {:.2f}".format("*" if will_be_added else " ",
588
586
  atoms_str,
589
587
  r.dist, max_ideal))
588
+ else:
589
+ continue
590
590
  n_found += 1
591
591
  if not will_be_added: continue
592
592
  con = gemmi.Connection()
@@ -653,12 +653,7 @@ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
653
653
  gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid):
654
654
  polymers.setdefault(p_type, []).append((chain, rs))
655
655
 
656
- scoring = gemmi.AlignmentScoring()
657
- scoring.match = 0
658
- scoring.mismatch = -1
659
- scoring.gapo = 0
660
- scoring.gape = -1
661
-
656
+ scoring = gemmi.AlignmentScoring("p") # AlignmentScoring::partial_model
662
657
  al_res = []
663
658
  ncslist = ext.NcsList()
664
659
  for pt in polymers:
@@ -670,19 +665,33 @@ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
670
665
  al = gemmi.align_sequence_to_polymer(q, pols[j][1], pt, scoring)
671
666
  if al.match_count < min_nalign: continue
672
667
  su = gemmi.calculate_superposition(pols[i][1], pols[j][1], pt, gemmi.SupSelect.All)
673
- obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1])
668
+ obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1], pols[i][0].name, pols[j][0].name)
674
669
  obj.calculate_local_rms(rms_loc_nlen)
675
- if len(obj.local_rms) == 0: continue
676
- ave_local_rms = numpy.mean(obj.local_rms)
670
+ if len(obj.local_rms) == 0 or numpy.all(numpy.isnan(obj.local_rms)):
671
+ continue
672
+ ave_local_rms = numpy.nanmean(obj.local_rms)
677
673
  if ave_local_rms > max_rms_loc: continue
678
674
  ncslist.ncss.append(obj)
679
- al_res.append({"chain_1": "{} ({}..{})".format(pols[i][0].name, pols[i][1][0].seqid, pols[i][1][-1].seqid),
680
- "chain_2": "{} ({}..{})".format(pols[j][0].name, pols[j][1][0].seqid, pols[j][1][-1].seqid),
675
+ al_res.append({"chain_1": "{} ({}..{})".format(obj.chains[0], obj.seqids[0][0], obj.seqids[-1][0]),
676
+ "chain_2": "{} ({}..{})".format(obj.chains[1], obj.seqids[0][1], obj.seqids[-1][1]),
681
677
  "aligned": al.match_count,
682
678
  "identity": al.calculate_identity(1),
683
679
  "rms": su.rmsd,
684
680
  "ave(rmsloc)": ave_local_rms,
685
681
  })
682
+ if al_res[-1]["identity"] < 100:
683
+ wrap_width = 100
684
+ logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
685
+ logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
686
+ logger.writeln(f"match_count: {al.match_count} (identity: {al_res[-1]['identity']:.2f})")
687
+ s1 = gemmi.one_letter_code(q)
688
+ p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
689
+ p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
690
+ for k in range(0, len(p1), wrap_width):
691
+ logger.writeln(" seq1 {}".format(p1[k:k+wrap_width]))
692
+ logger.writeln(" {}".format(al.match_string[k:k+wrap_width]))
693
+ logger.writeln(" seq2 {}\n".format(p2[k:k+wrap_width]))
694
+
686
695
  ncslist.set_pairs()
687
696
  df = pandas.DataFrame(al_res)
688
697
  df.index += 1
@@ -745,7 +754,7 @@ class MetalCoordination:
745
754
  logger.writeln(" (from ener_lib)")
746
755
  else:
747
756
  logger.writeln(" ".join("{:.4f} ({} coord)".format(x["median"], x["coord"]) for x in vals))
748
- ideals[el] = [(x["median"], x["mad"]) for x in vals if x["mad"] > 0]
757
+ ideals[el] = [(x["median"], max(0.02, x["mad"]*1.5)) for x in vals if x["mad"] > 0]
749
758
  logger.writeln("")
750
759
  for i, am in enumerate(coords[metal]):
751
760
  logger.writeln(" site {}: {}".format(i+1, lookup[am]))
@@ -55,7 +55,7 @@ def ncsops_from_args(args, cell, map_and_start=None, st=None, helical_min_n=None
55
55
  start_xyz = numpy.zeros(3)
56
56
 
57
57
  if args.center is None:
58
- A = numpy.array(cell.orthogonalization_matrix.tolist())
58
+ A = cell.orth.mat.array
59
59
  center = numpy.sum(A, axis=1) / 2 #+ start_xyz
60
60
  logger.writeln("Center: {}".format(center))
61
61
  else:
@@ -156,7 +156,7 @@ def show_operators_axis_angle(ops):
156
156
  def show_ncs_operators_axis_angle(ops):
157
157
  # ops: List of gemmi.NcsOp
158
158
  for i, op in enumerate(ops):
159
- op2 = numpy.array(op.tr.mat.tolist())
159
+ op2 = op.tr.mat.array
160
160
  ax, ang = generate_operators.Rotation2AxisAngle_general(op2)
161
161
  axlab = "[{: .4f}, {: .4f}, {: .4f}]".format(*ax)
162
162
  trlab = "[{: 9.4f}, {: 9.4f}, {: 9.4f}]".format(*op.tr.vec.tolist())
@@ -210,7 +210,7 @@ def generate_helical_operators(start_xyz, center, axsym, deltaphi, deltaz, axis1
210
210
 
211
211
  def make_NcsOps_from_matrices(matrices, cell=None, center=None):
212
212
  if center is None:
213
- A = numpy.array(cell.orthogonalization_matrix.tolist())
213
+ A = cell.orth.mat.array
214
214
  center = numpy.sum(A,axis=1) / 2
215
215
 
216
216
  center = gemmi.Vec3(*center)
@@ -225,9 +225,9 @@ def make_NcsOps_from_matrices(matrices, cell=None, center=None):
225
225
  # make_NcsOps_from_matrices()
226
226
 
227
227
  def find_center_of_origin(mat, vec): # may not be unique.
228
- tmp = numpy.identity(3) - numpy.array(mat)
228
+ tmp = numpy.identity(3) - numpy.array(mat.array)
229
229
  ret = numpy.dot(numpy.linalg.pinv(tmp), vec.tolist())
230
- resid = vec.tolist() - (numpy.dot(mat, -ret) + ret)
230
+ resid = vec.tolist() - (numpy.dot(mat.array, -ret) + ret)
231
231
  return gemmi.Vec3(*ret), gemmi.Vec3(*resid)
232
232
  # find_center_of_origin()
233
233
 
@@ -101,14 +101,14 @@ def determine_Sigma_and_aniso(hkldata):
101
101
  S = hkldata.binned_df.loc[i_bin, "S"]
102
102
  f0 = numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
103
103
  S * hkldata.df.epsilon.to_numpy()[idxes],
104
- 0, hkldata.df.centric.to_numpy()[idxes]+1))
104
+ numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
105
105
  shift = numpy.exp(ll_shift_bin_S(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
106
106
  S, hkldata.df.centric.to_numpy()[idxes]+1, hkldata.df.epsilon.to_numpy()[idxes]))
107
107
  for k in range(3):
108
108
  ss = shift**(1. / 2**k)
109
109
  f1 = numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
110
110
  S * ss * hkldata.df.epsilon.to_numpy()[idxes],
111
- 0, hkldata.df.centric.to_numpy()[idxes]+1))
111
+ numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
112
112
  #logger.writeln("bin {:3d} f0 = {:.3e} shift = {:.3e} df = {:.3e}".format(i_bin, f0, ss, f1 - f0))
113
113
  if f1 < f0:
114
114
  hkldata.binned_df.loc[i_bin, "S"] = S * ss
@@ -145,7 +145,7 @@ def ll_all_B(x, ssqmat, hkldata, adpdirs):
145
145
  for i_bin, idxes in hkldata.binned():
146
146
  ret += numpy.nansum(integr.ll_int(hkldata.df.I.to_numpy()[idxes], hkldata.df.SIGI.to_numpy()[idxes], k_ani[idxes],
147
147
  hkldata.binned_df.S[i_bin] * hkldata.df.epsilon.to_numpy()[idxes],
148
- 0, hkldata.df.centric.to_numpy()[idxes]+1))
148
+ numpy.zeros(len(idxes)), hkldata.df.centric.to_numpy()[idxes]+1))
149
149
  return ret
150
150
 
151
151
  def ll_shift_bin_S(Io, sigIo, k_ani, S, c, eps, exp_trans=True):
@@ -174,52 +174,57 @@ def ll_shift_B(x, ssqmat, hkldata, adpdirs):
174
174
  g, H = numpy.dot(g, adpdirs.T), numpy.dot(adpdirs, numpy.dot(H, adpdirs.T))
175
175
  return -numpy.dot(g, numpy.linalg.pinv(H))
176
176
 
177
+ def expected_F_from_int(Io, sigo, k_ani, eps, c, S):
178
+ to = Io / sigo - sigo / c / k_ani**2 / S / eps
179
+ tf = numpy.zeros(Io.size)
180
+ sig1 = numpy.ones(Io.size)
181
+ k_num = numpy.where(c == 1, 0.5, 0.)
182
+ F = numpy.sqrt(sigo) * ext.integ_J_ratio(k_num, k_num - 0.5, False, to, tf, sig1, c,
183
+ integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
184
+ Fsq = sigo * ext.integ_J_ratio(k_num + 0.5, k_num - 0.5, False, to, tf, sig1, c,
185
+ integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
186
+ varF = Fsq - F**2
187
+ return F, numpy.sqrt(varF)
188
+
177
189
  def french_wilson(hkldata, B_aniso, labout=None):
178
190
  if labout is None: labout = ["F", "SIGF"]
179
- hkldata.df[labout[0]] = numpy.nan
180
- hkldata.df[labout[1]] = numpy.nan
181
- hkldata.df["to1"] = numpy.nan
182
191
  k_ani = hkldata.debye_waller_factors(b_cart=B_aniso)
183
-
192
+ has_ano = "I(+)" in hkldata.df and "I(-)" in hkldata.df
193
+ if has_ano:
194
+ ano_data = hkldata.df[["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"]].to_numpy()
195
+ if len(labout) == 2:
196
+ labout += [f"{labout[0]}(+)", f"{labout[1]}(+)", f"{labout[0]}(-)", f"{labout[1]}(-)"]
197
+ hkldata.df[labout] = numpy.nan
184
198
  for i_bin, idxes in hkldata.binned():
185
199
  S = hkldata.binned_df.S[i_bin]
186
200
  c = hkldata.df.centric.to_numpy()[idxes] + 1 # 1 for acentric, 2 for centric
187
201
  Io = hkldata.df.I.to_numpy()[idxes]
188
202
  sigo = hkldata.df.SIGI.to_numpy()[idxes]
189
203
  eps = hkldata.df.epsilon.to_numpy()[idxes]
190
- to = Io / sigo - sigo / c / k_ani[idxes]**2 / S / eps
191
- k_num = numpy.where(c == 1, 0.5, 0.)
192
- F = numpy.sqrt(sigo) * ext.integ_J_ratio(k_num, k_num - 0.5, False, to, 0., 1., c,
193
- integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
194
- Fsq = sigo * ext.integ_J_ratio(k_num + 0.5, k_num - 0.5, False, to, 0., 1., c,
195
- integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
196
- varF = Fsq - F**2
204
+ F, sigF = expected_F_from_int(Io, sigo, k_ani[idxes], eps, c, S)
197
205
  hkldata.df.loc[idxes, labout[0]] = F
198
- hkldata.df.loc[idxes, labout[1]] = numpy.sqrt(varF)
199
- hkldata.df.loc[idxes, "to1"] = to
206
+ hkldata.df.loc[idxes, labout[1]] = sigF
207
+ if has_ano:
208
+ Fp, sigFp = expected_F_from_int(ano_data[idxes,0], ano_data[idxes,1], k_ani[idxes], eps, c, S)
209
+ Fm, sigFm = expected_F_from_int(ano_data[idxes,2], ano_data[idxes,3], k_ani[idxes], eps, c, S)
210
+ hkldata.df.loc[idxes, labout[2]] = Fp
211
+ hkldata.df.loc[idxes, labout[3]] = sigFp
212
+ hkldata.df.loc[idxes, labout[4]] = Fm
213
+ hkldata.df.loc[idxes, labout[5]] = sigFm
200
214
 
201
215
  def main(args):
202
216
  if not args.output_prefix:
203
217
  args.output_prefix = utils.fileio.splitext(os.path.basename(args.hklin))[0] + "_fw"
218
+ try:
219
+ mtz = utils.fileio.read_mmhkl(args.hklin, cif_index=args.hklin_index)
220
+ except RuntimeError as e:
221
+ raise SystemExit("Error: {}".format(e))
204
222
  if not args.labin:
205
- try:
206
- mtz = utils.fileio.read_mmhkl(args.hklin, cif_index=args.hklin_index)
207
- except RuntimeError as e:
208
- raise SystemExit("Error: {}".format(e))
209
- dlabs = utils.hkl.mtz_find_data_columns(mtz)
210
- if dlabs["J"]:
211
- labin = dlabs["J"][0]
212
- else:
213
- raise SystemExit("Intensity not found from mtz")
214
- flabs = utils.hkl.mtz_find_free_columns(mtz)
215
- if flabs:
216
- labin += [flabs[0]]
217
- logger.writeln("MTZ columns automatically selected: {}".format(labin))
223
+ labin = sigmaa.decide_mtz_labels(mtz, require=("K", "J"))
218
224
  else:
219
225
  labin = args.labin.split(",")
220
-
221
226
  try:
222
- hkldata, _, _, _, _ = sigmaa.process_input(hklin=args.hklin,
227
+ hkldata, _, _, _, _ = sigmaa.process_input(hklin=mtz,
223
228
  labin=labin,
224
229
  n_bins=args.nbins,
225
230
  free=None,
@@ -237,6 +242,9 @@ def main(args):
237
242
  mtz_out = args.output_prefix+".mtz"
238
243
  lab_out = ["F", "SIGF", "I", "SIGI"]
239
244
  labo_types = {"F":"F", "SIGF":"Q", "I":"J", "SIGI":"Q"}
245
+ if "I(+)" in hkldata.df and "I(-)" in hkldata.df:
246
+ lab_out += ["F(+)", "SIGF(+)", "F(-)", "SIGF(-)"]
247
+ labo_types.update({"F(+)":"G", "SIGF(+)":"L", "F(-)":"G", "SIGF(-)":"L"})
240
248
  if len(labin) == 3:
241
249
  lab_out.append("FREE")
242
250
  labo_types[lab_out[-1]] = "I"