servalcat 0.4.72__cp312-cp312-macosx_11_0_arm64.whl → 0.4.88__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

servalcat/spa/fofc.py CHANGED
@@ -377,6 +377,17 @@ def write_coot_script(py_out, model_file, mtz_file, contour_fo=1.2, contour_fofc
377
377
  ofs.write("add_molecular_symmetry(imol, {})\n".format(",".join(str(x) for x in v)))
378
378
  # write_coot_script()
379
379
 
380
+ def write_chimerax_script(cxc_out, model_file, fo_mrc_file, fofc_mrc_file):
381
+ with open(cxc_out, "w") as ofs:
382
+ ofs.write('open {}\n'.format(model_file))
383
+ ofs.write('open {}\n'.format(fo_mrc_file))
384
+ ofs.write('open {}\n'.format(fofc_mrc_file))
385
+ ofs.write('volume #3 level 4 level -4 color #00FF00 color #FF0000 squaremesh false cap false style mesh meshlighting false\n')
386
+ ofs.write('isolde start\n')
387
+ ofs.write('clipper associate #2 toModel #1\n')
388
+ ofs.write('clipper associate #3 toModel #1\n')
389
+ # write_chimerax_script()
390
+
380
391
  def main(args):
381
392
  if not args.halfmaps and not args.map:
382
393
  raise SystemExit("Error: give --halfmaps or --map")
servalcat/spa/fsc.py CHANGED
@@ -36,6 +36,8 @@ def add_arguments(parser):
36
36
  parser.add_argument('--mask_soft_edge',
37
37
  type=float, default=0,
38
38
  help='Add soft edge to model mask.')
39
+ parser.add_argument('--mask_model', action='store_true',
40
+ help='Apply mask to model density')
39
41
  parser.add_argument("--b_before_mask", type=float,
40
42
  help="when model-based mask is used: sharpening B value for sharpen-mask-unsharpen procedure. By default it is determined automatically.")
41
43
  parser.add_argument('--no_sharpen_before_mask', action='store_true',
@@ -340,7 +342,7 @@ def main(args):
340
342
  labs_fc.append("FC")
341
343
  hkldata.df[labs_fc[-1]] = utils.model.calc_fc_fft(st_expanded, args.resolution - 1e-6, source="electron",
342
344
  miller_array=hkldata.miller_array())
343
- if mask is not None:
345
+ if args.mask_model and mask is not None:
344
346
  if args.b_before_mask is None:
345
347
  normalizer = 1.
346
348
  else:
@@ -311,6 +311,12 @@ def calc_fofc(st, st_expanded, maps, monlib, model_format, args, diffmap_prefix=
311
311
  contour_fo=None if mask is None else 1.2,
312
312
  contour_fofc=None if mask is None else 3.0,
313
313
  ncs_ops=st.ncs)
314
+
315
+ # Create ChimeraX script
316
+ spa.fofc.write_chimerax_script(cxc_out="{}_chimerax.cxc".format(args.output_prefix),
317
+ model_file="{}.mmcif".format(args.output_prefix), # ChimeraX handles mmcif just fine
318
+ fo_mrc_file="{}_normalized_fo.mrc".format(diffmap_prefix),
319
+ fofc_mrc_file="{}_normalized_fofc.mrc".format(diffmap_prefix))
314
320
  # calc_fofc()
315
321
 
316
322
  def write_final_summary(st, refmac_summary, fscavg_text, output_prefix, is_mask_given):
@@ -356,6 +362,9 @@ Weight used: {final_weight}
356
362
  Open refined model and diffmap.mtz with COOT:
357
363
  coot --script {prefix}_coot.py
358
364
 
365
+ Open refined model, map and difference map with ChimeraX/ISOLDE:
366
+ chimerax {prefix}_chimerax.cxc
367
+
359
368
  {map_peaks_msg}
360
369
  =============================================================================
361
370
  """.format(rmsbond=refmac_summary["cycles"][-1].get("rms_bond", "???"),
@@ -681,8 +690,9 @@ def check_args(args):
681
690
  if args.keyword_file:
682
691
  args.keyword_file = sum(args.keyword_file, [])
683
692
  for f in args.keyword_file:
693
+ if not os.path.exists(f):
694
+ raise SystemExit(f"Error: keyword file was not found: {f}")
684
695
  logger.writeln("Keyword file: {}".format(f))
685
- assert os.path.exists(f)
686
696
  else:
687
697
  args.keyword_file = []
688
698
 
servalcat/utils/fileio.py CHANGED
@@ -86,6 +86,7 @@ def write_mmcif(st, cif_out, cif_ref=None):
86
86
  groups.entity_poly_seq = True
87
87
  groups.cis = True
88
88
  groups.conn = True
89
+ groups.software = True
89
90
  # FIXME is this all?
90
91
  try:
91
92
  doc = read_cif_safe(cif_ref)
@@ -306,7 +307,7 @@ def read_cif_safe(cif_in):
306
307
  return doc
307
308
  # read_cif_safe()
308
309
 
309
- def read_structure(xyz_in, assign_het_flags=True):
310
+ def read_structure(xyz_in, assign_het_flags=True, merge_chain_parts=True):
310
311
  spext = splitext(xyz_in)
311
312
  st = None
312
313
  if spext[1].lower() in (".pdb", ".ent"):
@@ -354,6 +355,8 @@ def read_structure(xyz_in, assign_het_flags=True):
354
355
  logger.writeln("")
355
356
  if assign_het_flags:
356
357
  st.assign_het_flags()
358
+ if merge_chain_parts:
359
+ st.merge_chain_parts()
357
360
  return st
358
361
  # read_structure()
359
362
 
@@ -712,7 +715,7 @@ def read_small_molecule_files(files):
712
715
  logger.writeln("reflection data read from: {}".format(filename))
713
716
  elif b.find_loop("_refln_index_h"):
714
717
  mtz = read_smcif_hkl(filename, st.cell, st.find_spacegroup())
715
- except RuntimeError: # not a cif file
718
+ except ValueError: # not a cif file
716
719
  if ext == ".hkl":
717
720
  mtz = read_shelx_hkl(st.cell, st.find_spacegroup(), hklf, file_in=filename)
718
721
  logger.writeln("reflection data read from: {}".format(filename))
servalcat/utils/hkl.py CHANGED
@@ -114,6 +114,12 @@ def hkldata_from_mtz(mtz, labels, newlabels=None, require_types=None):
114
114
  return HklData(mtz.cell, mtz.spacegroup, df)
115
115
  # hkldata_from_mtz()
116
116
 
117
+ def df_from_twin_data(twin_data, fc_labs):
118
+ df = pandas.DataFrame(data=twin_data.asu,
119
+ columns=["H","K","L"])
120
+ df[fc_labs] = twin_data.f_calc
121
+ return df
122
+
117
123
  def blur_mtz(mtz, B):
118
124
  # modify given mtz object
119
125
 
@@ -190,6 +196,17 @@ def decide_n_bins(n_per_bin, s_array, power=2, min_bins=1, max_bins=50):
190
196
  return n_bins
191
197
  # decide_n_bins()
192
198
 
199
+ def fft_map(cell, sg, miller_array, data, grid_size=None, sample_rate=3):
200
+ if data is not None:
201
+ data = data.astype(numpy.complex64) # we may want to keep complex128?
202
+ asu = gemmi.ComplexAsuData(cell, sg, miller_array, data)
203
+ if grid_size is None:
204
+ ma = asu.transform_f_phi_to_map(sample_rate=sample_rate, exact_size=(0, 0, 0)) # half_l=True
205
+ else:
206
+ ma = gemmi.transform_f_phi_grid_to_map(asu.get_f_phi_on_grid(grid_size)) # half_l=False
207
+ return ma
208
+ # fft_map()
209
+
193
210
  class HklData:
194
211
  def __init__(self, cell, sg, df=None, binned_df=None):
195
212
  self.cell = cell
@@ -514,14 +531,9 @@ class HklData:
514
531
  # as_asu_data()
515
532
 
516
533
  def fft_map(self, label=None, data=None, grid_size=None, sample_rate=3):
517
- if data is not None: data = data.astype(numpy.complex64) # we may want to keep complex128?
518
- asu = self.as_asu_data(label=label, data=data)
519
- if grid_size is None:
520
- ma = asu.transform_f_phi_to_map(sample_rate=sample_rate, exact_size=(0, 0, 0)) # half_l=True
521
- else:
522
- ma = gemmi.transform_f_phi_grid_to_map(asu.get_f_phi_on_grid(grid_size)) # half_l=False
523
-
524
- return ma
534
+ if data is None:
535
+ data = self.df[label]
536
+ return fft_map(self.cell, self.sg, self.miller_array(), data, grid_size, sample_rate)
525
537
  # fft_map()
526
538
 
527
539
  def d_eff(self, label):
servalcat/utils/model.py CHANGED
@@ -389,6 +389,19 @@ def cra_to_atomaddress(cra):
389
389
  return aa
390
390
  # cra_to_atomaddress()
391
391
 
392
+ def check_occupancies(st, raise_error=False):
393
+ bad = []
394
+ for cra in st[0].all():
395
+ if not 0 <= cra.atom.occ <= 1 + 1e-6:
396
+ bad.append(cra)
397
+ if bad:
398
+ logger.writeln("Bad occupancies:")
399
+ for cra in bad:
400
+ logger.writeln(f" {cra} occ= {cra.atom.occ:.4f}")
401
+ if raise_error:
402
+ raise RuntimeError("Please check your model and fix bad occupancies")
403
+ # check_occupancies()
404
+
392
405
  def find_special_positions(st, special_pos_threshold=0.2, fix_occ=True, fix_pos=True, fix_adp=True):
393
406
  ns = gemmi.NeighborSearch(st[0], st.cell, 3).populate()
394
407
  cs = gemmi.ContactSearch(special_pos_threshold * 2)
servalcat/utils/refmac.py CHANGED
@@ -148,6 +148,8 @@ class FixForRefmac:
148
148
  self.MAXNUM = 9999
149
149
  self.fixes = []
150
150
  self.resn_old_new = []
151
+ self.res_labels = []
152
+ self.entities = None
151
153
 
152
154
  def fix_before_topology(self, st, topo, fix_microheterogeneity=True, fix_resimax=True, fix_nonpolymer=True, add_gaps=False):
153
155
  self.chainids = set(chain.name for chain in st[0])
@@ -385,6 +387,14 @@ class FixForRefmac:
385
387
  st.shorten_ccd_codes()
386
388
  self.resn_old_new = [x for x in st.shortened_ccd_codes]
387
389
 
390
+ def store_res_labels(self, st):
391
+ self.res_labels = []
392
+ self.entities = gemmi.EntityList(st.entities)
393
+ for chain in st[0]:
394
+ self.res_labels.append([])
395
+ for res in chain:
396
+ self.res_labels[-1].append((res.subchain, res.entity_id, res.label_seq))
397
+
388
398
  def fix_model(self, st, changedict):
389
399
  chain_newid = set()
390
400
  for chain in st[0]:
@@ -411,6 +421,15 @@ class FixForRefmac:
411
421
  st.shortened_ccd_codes = self.resn_old_new
412
422
  st.restore_full_ccd_codes()
413
423
 
424
+ if self.res_labels:
425
+ st.entities = self.entities
426
+ #print(f"debug {len(self.res_labels)}")
427
+ #print(f"debug {[x.name for x in st[0]]}")
428
+ assert len(self.res_labels) == len(st[0])
429
+ for ic, chain in enumerate(st[0]):
430
+ assert len(self.res_labels[ic]) == len(chain)
431
+ for ir, res in enumerate(chain):
432
+ res.subchain, res.entity_id, res.label_seq = self.res_labels[ic][ir]
414
433
 
415
434
  class Refmac:
416
435
  def __init__(self, **kwargs):
@@ -521,7 +521,7 @@ def find_and_fix_links(st, monlib, bond_margin=1.3, find_metal_links=True, add_f
521
521
  cra2.residue, cra2.atom.name, cra2.atom.altloc)
522
522
  if link:
523
523
  con.link_id = link.id
524
- elif find_metal_links and con.type == gemmi.ConnectionType.MetalC:
524
+ elif con.type == gemmi.ConnectionType.MetalC:
525
525
  logger.writeln(" Metal link will be added: {} dist= {:.2f}".format(atoms_str, dist))
526
526
  if cra2.atom.element.is_metal:
527
527
  inv = True # make metal first
@@ -587,6 +587,8 @@ def find_and_fix_links(st, monlib, bond_margin=1.3, find_metal_links=True, add_f
587
587
  logger.writeln(" {}Metal link found: {} dist= {:.2f} max_ideal= {:.2f}".format("*" if will_be_added else " ",
588
588
  atoms_str,
589
589
  r.dist, max_ideal))
590
+ else:
591
+ continue
590
592
  n_found += 1
591
593
  if not will_be_added: continue
592
594
  con = gemmi.Connection()
@@ -653,12 +655,7 @@ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
653
655
  gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid):
654
656
  polymers.setdefault(p_type, []).append((chain, rs))
655
657
 
656
- scoring = gemmi.AlignmentScoring()
657
- scoring.match = 0
658
- scoring.mismatch = -1
659
- scoring.gapo = 0
660
- scoring.gape = -1
661
-
658
+ scoring = gemmi.AlignmentScoring("p") # AlignmentScoring::partial_model
662
659
  al_res = []
663
660
  ncslist = ext.NcsList()
664
661
  for pt in polymers:
@@ -668,12 +665,25 @@ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
668
665
  q = [x.name for x in pols[i][1]]
669
666
  for j in range(i+1, len(pols)):
670
667
  al = gemmi.align_sequence_to_polymer(q, pols[j][1], pt, scoring)
668
+ if 0: # debug
669
+ wrap_width = 100
670
+ logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
671
+ logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
672
+ logger.writeln(f"match_count: {al.match_count}")
673
+ s1 = gemmi.one_letter_code(q)
674
+ p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
675
+ p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
676
+ for k in range(0, len(p1), wrap_width):
677
+ logger.writeln(" seq. {}".format(p1[k:k+wrap_width]))
678
+ logger.writeln(" {}".format(al.match_string[k:k+wrap_width]))
679
+ logger.writeln(" model {}\n".format(p2[k:k+wrap_width]))
671
680
  if al.match_count < min_nalign: continue
672
681
  su = gemmi.calculate_superposition(pols[i][1], pols[j][1], pt, gemmi.SupSelect.All)
673
682
  obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1])
674
683
  obj.calculate_local_rms(rms_loc_nlen)
675
- if len(obj.local_rms) == 0: continue
676
- ave_local_rms = numpy.mean(obj.local_rms)
684
+ if len(obj.local_rms) == 0 or numpy.all(numpy.isnan(obj.local_rms)):
685
+ continue
686
+ ave_local_rms = numpy.nanmean(obj.local_rms)
677
687
  if ave_local_rms > max_rms_loc: continue
678
688
  ncslist.ncss.append(obj)
679
689
  al_res.append({"chain_1": "{} ({}..{})".format(pols[i][0].name, pols[i][1][0].seqid, pols[i][1][-1].seqid),
@@ -174,52 +174,55 @@ def ll_shift_B(x, ssqmat, hkldata, adpdirs):
174
174
  g, H = numpy.dot(g, adpdirs.T), numpy.dot(adpdirs, numpy.dot(H, adpdirs.T))
175
175
  return -numpy.dot(g, numpy.linalg.pinv(H))
176
176
 
177
+ def expected_F_from_int(Io, sigo, k_ani, eps, c, S):
178
+ to = Io / sigo - sigo / c / k_ani**2 / S / eps
179
+ k_num = numpy.where(c == 1, 0.5, 0.)
180
+ F = numpy.sqrt(sigo) * ext.integ_J_ratio(k_num, k_num - 0.5, False, to, 0., 1., c,
181
+ integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
182
+ Fsq = sigo * ext.integ_J_ratio(k_num + 0.5, k_num - 0.5, False, to, 0., 1., c,
183
+ integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
184
+ varF = Fsq - F**2
185
+ return F, numpy.sqrt(varF)
186
+
177
187
  def french_wilson(hkldata, B_aniso, labout=None):
178
188
  if labout is None: labout = ["F", "SIGF"]
179
- hkldata.df[labout[0]] = numpy.nan
180
- hkldata.df[labout[1]] = numpy.nan
181
- hkldata.df["to1"] = numpy.nan
182
189
  k_ani = hkldata.debye_waller_factors(b_cart=B_aniso)
183
-
190
+ has_ano = "I(+)" in hkldata.df and "I(-)" in hkldata.df
191
+ if has_ano:
192
+ ano_data = hkldata.df[["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"]].to_numpy()
193
+ if len(labout) == 2:
194
+ labout += [f"{labout[0]}(+)", f"{labout[1]}(+)", f"{labout[0]}(-)", f"{labout[1]}(-)"]
195
+ hkldata.df[labout] = numpy.nan
184
196
  for i_bin, idxes in hkldata.binned():
185
197
  S = hkldata.binned_df.S[i_bin]
186
198
  c = hkldata.df.centric.to_numpy()[idxes] + 1 # 1 for acentric, 2 for centric
187
199
  Io = hkldata.df.I.to_numpy()[idxes]
188
200
  sigo = hkldata.df.SIGI.to_numpy()[idxes]
189
201
  eps = hkldata.df.epsilon.to_numpy()[idxes]
190
- to = Io / sigo - sigo / c / k_ani[idxes]**2 / S / eps
191
- k_num = numpy.where(c == 1, 0.5, 0.)
192
- F = numpy.sqrt(sigo) * ext.integ_J_ratio(k_num, k_num - 0.5, False, to, 0., 1., c,
193
- integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
194
- Fsq = sigo * ext.integ_J_ratio(k_num + 0.5, k_num - 0.5, False, to, 0., 1., c,
195
- integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
196
- varF = Fsq - F**2
202
+ F, sigF = expected_F_from_int(Io, sigo, k_ani[idxes], eps, c, S)
197
203
  hkldata.df.loc[idxes, labout[0]] = F
198
- hkldata.df.loc[idxes, labout[1]] = numpy.sqrt(varF)
199
- hkldata.df.loc[idxes, "to1"] = to
204
+ hkldata.df.loc[idxes, labout[1]] = sigF
205
+ if has_ano:
206
+ Fp, sigFp = expected_F_from_int(ano_data[idxes,0], ano_data[idxes,1], k_ani[idxes], eps, c, S)
207
+ Fm, sigFm = expected_F_from_int(ano_data[idxes,2], ano_data[idxes,3], k_ani[idxes], eps, c, S)
208
+ hkldata.df.loc[idxes, labout[2]] = Fp
209
+ hkldata.df.loc[idxes, labout[3]] = sigFp
210
+ hkldata.df.loc[idxes, labout[4]] = Fm
211
+ hkldata.df.loc[idxes, labout[5]] = sigFm
200
212
 
201
213
  def main(args):
202
214
  if not args.output_prefix:
203
215
  args.output_prefix = utils.fileio.splitext(os.path.basename(args.hklin))[0] + "_fw"
216
+ try:
217
+ mtz = utils.fileio.read_mmhkl(args.hklin, cif_index=args.hklin_index)
218
+ except RuntimeError as e:
219
+ raise SystemExit("Error: {}".format(e))
204
220
  if not args.labin:
205
- try:
206
- mtz = utils.fileio.read_mmhkl(args.hklin, cif_index=args.hklin_index)
207
- except RuntimeError as e:
208
- raise SystemExit("Error: {}".format(e))
209
- dlabs = utils.hkl.mtz_find_data_columns(mtz)
210
- if dlabs["J"]:
211
- labin = dlabs["J"][0]
212
- else:
213
- raise SystemExit("Intensity not found from mtz")
214
- flabs = utils.hkl.mtz_find_free_columns(mtz)
215
- if flabs:
216
- labin += [flabs[0]]
217
- logger.writeln("MTZ columns automatically selected: {}".format(labin))
221
+ labin = sigmaa.decide_mtz_labels(mtz, require=("K", "J"))
218
222
  else:
219
223
  labin = args.labin.split(",")
220
-
221
224
  try:
222
- hkldata, _, _, _, _ = sigmaa.process_input(hklin=args.hklin,
225
+ hkldata, _, _, _, _ = sigmaa.process_input(hklin=mtz,
223
226
  labin=labin,
224
227
  n_bins=args.nbins,
225
228
  free=None,
@@ -237,6 +240,9 @@ def main(args):
237
240
  mtz_out = args.output_prefix+".mtz"
238
241
  lab_out = ["F", "SIGF", "I", "SIGI"]
239
242
  labo_types = {"F":"F", "SIGF":"Q", "I":"J", "SIGI":"Q"}
243
+ if "I(+)" in hkldata.df and "I(-)" in hkldata.df:
244
+ lab_out += ["F(+)", "SIGF(+)", "F(-)", "SIGF(-)"]
245
+ labo_types.update({"F(+)":"G", "SIGF(+)":"L", "F(-)":"G", "SIGF(-)":"L"})
240
246
  if len(labin) == 3:
241
247
  lab_out.append("FREE")
242
248
  labo_types[lab_out[-1]] = "I"