servalcat 0.4.72__cp312-cp312-macosx_11_0_arm64.whl → 0.4.99__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

@@ -185,6 +185,17 @@ def add_arguments(p):
185
185
  parser.add_argument('-o', '--output_prefix',
186
186
  help="default: taken from input file")
187
187
 
188
+ # conf
189
+ parser = subparsers.add_parser("conf", description = 'Compare conformations')
190
+ parser.add_argument('models', nargs="+")
191
+ parser.add_argument("--min_diff", type=float, default=60.)
192
+ parser.add_argument('--ligand', nargs="*", action="append")
193
+ parser.add_argument("--monlib",
194
+ help="Monomer library path. Default: $CLIBD_MON")
195
+ parser.add_argument("--same_chain", action='store_true', help="Only between same chains (more than one file)")
196
+ parser.add_argument('-o', '--output_prefix', default="conf",
197
+ help="")
198
+
188
199
  # adp
189
200
  parser = subparsers.add_parser("adp", description = 'ADP analysis')
190
201
  parser.add_argument('model')
@@ -281,6 +292,9 @@ def add_arguments(p):
281
292
  parser = subparsers.add_parser("seq", description = 'Print/align model sequence')
282
293
  parser.add_argument("--model", required=True)
283
294
  parser.add_argument('--seq', nargs="*", action="append", help="Sequence file(s)")
295
+ parser.add_argument('--scoring', nargs=6, type=int, default=(1, 0, -1, -1, 0, -1),
296
+ metavar=("match", "mismatch", "gapo", "gape", "good_gapo", "bad_gapo"),
297
+ help="scoring function. default: %(default)s")
284
298
 
285
299
  # dnarna
286
300
  parser = subparsers.add_parser("dnarna", description = 'DNA to RNA or RNA to DNA model conversion')
@@ -919,6 +933,135 @@ def geometry(args):
919
933
  fileio.write_model(st, file_name="{}_per_atom_score{}".format(args.output_prefix, model_format))
920
934
  # geometry()
921
935
 
936
+ def compare_conf(args):
937
+ def angle_abs_diff(a, b, full=360.):
938
+ # from gemmi/math.hpp
939
+ d = abs(a - b)
940
+ if d > full:
941
+ d -= numpy.floor(d / full) * full
942
+ return min(d, full - d)
943
+ # angle_abs_diff()
944
+
945
+ if args.ligand: args.ligand = sum(args.ligand, [])
946
+ st = None
947
+ for i, f in enumerate(args.models):
948
+ tmp = fileio.read_structure(f)
949
+ if len(args.models) > 1:
950
+ for chain in tmp[0]:
951
+ chain.name = f"{i+1}_{chain.name}"
952
+ if i == 0:
953
+ st = tmp
954
+ else:
955
+ for chain in tmp[0]:
956
+ st[0].add_chain(chain)
957
+ try:
958
+ monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
959
+ stop_for_unknowns=True)
960
+ except RuntimeError as e:
961
+ raise SystemExit(f"Error: {e}")
962
+
963
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
964
+ try:
965
+ topo, _ = restraints.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.NoChange,
966
+ check_hydrogen=False)
967
+ except RuntimeError as e:
968
+ raise SystemExit(f"Error: {e}")
969
+ ncslist = restraints.prepare_ncs_restraints(st)
970
+ lookup = {x.atom: x for x in st[0].all()}
971
+ ptypes = {x.name: x.polymer_type for x in st.entities}
972
+ resn_lookup = {(chain.name, res.seqid): res.name for chain in st[0] for res in chain}
973
+ confs = {}
974
+ for t in topo.torsions:
975
+ cra = lookup[t.atoms[0]]
976
+ ptype = ptypes[cra.residue.entity_id]
977
+ is_peptide = ptype in (gemmi.PolymerType.PeptideL, gemmi.PolymerType.PeptideD)
978
+ is_peptide_tors = t.restr.label.startswith("chi") or t.restr.label in ("omega", "phi", "psi")
979
+ is_na = ptype in (gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid)
980
+ is_na_tors = t.restr.label in ("C2e-chi", "alpha", "beta", "gamma", "C2e-nyu0", "epsilon", "zeta")
981
+ if (is_peptide and is_peptide_tors) or (is_na and is_na_tors):
982
+ confs.setdefault(cra.chain.name, {}).setdefault(cra.residue.seqid, {})[t.restr.label] = numpy.rad2deg(t.calculate())
983
+ fulls = {("ARG", "chi5"): 180., ("TYR", "chi2"): 180., ("PHE", "chi2"): 180., ("ASP", "chi2"): 180., ("GLU", "chi3"): 180.}
984
+ ret = []
985
+ for_coot = []
986
+ for ncs in ncslist.ncss:
987
+ c1, c2 = ncs.chains
988
+ if args.same_chain and len(args.models) > 1 and c1[c1.index("_"):] != c2[c2.index("_"):]:
989
+ continue
990
+ for s1, s2 in ncs.seqids:
991
+ if c1 in confs and s1 in confs[c1] and c2 in confs and s2 in confs[c2]:
992
+ conf1, conf2 = confs[c1][s1], confs[c2][s2]
993
+ resn = resn_lookup[(c1, s1)]
994
+ for t in conf1:
995
+ if t in conf2:
996
+ d = angle_abs_diff(conf1[t], conf2[t], fulls.get((resn, t), 360.))
997
+ ret.append((c1, s1, c2, s2, resn, t, conf1[t], conf2[t], d))
998
+ if d > args.min_diff:
999
+ for_coot.append((c1, s1.num, c2, s2.num, resn, t, d))
1000
+ df = pandas.DataFrame(ret, columns=["chain_1", "seq_1", "chain_2", "seq_2", "resn", "label", "conf_1", "conf_2", "diff"])
1001
+ df.sort_values("diff", ascending=False, inplace=True)
1002
+ logger.writeln(f"\nList of torsion angle differences (>{args.min_diff})")
1003
+ logger.writeln(df[df["diff"] > args.min_diff].to_string(index=False))
1004
+
1005
+ for_coot.sort(key=lambda x:-x[-1])
1006
+ coot_out = args.output_prefix + "_coot.py"
1007
+ with open(coot_out, "w") as ofs:
1008
+ # https://python-gtk-3-tutorial.readthedocs.io/en/latest/treeview.html
1009
+ ofs.write("""\
1010
+ from __future__ import absolute_import, division, print_function
1011
+ import re
1012
+ import gtk
1013
+ class coot_serval_conf_list:
1014
+ def __init__(self):
1015
+ window = gtk.Window(gtk.WINDOW_TOPLEVEL)
1016
+ window.set_title("Different conformations (Servalcat)")
1017
+ window.set_default_size(600, 600)
1018
+ scrolled_win = gtk.ScrolledWindow()
1019
+ scrolled_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_ALWAYS)
1020
+ vbox = gtk.VBox(False, 2)
1021
+ self.liststore = gtk.ListStore(str, int, str, int, str, str, float)
1022
+ self.filter = self.liststore.filter_new()
1023
+ self.treeview = gtk.TreeView(model=self.filter)
1024
+ for i, column_title in enumerate(["chain_1", "seq_1", "chain_2", "seq_2", "resn", "label", "diff"]):
1025
+ renderer = gtk.CellRendererText()
1026
+ column = gtk.TreeViewColumn(column_title, renderer, text=i)
1027
+ self.treeview.append_column(column)
1028
+ self.data = {}
1029
+ self.add_data()
1030
+ scrolled_win.add_with_viewport(self.treeview) # add?
1031
+ vbox.pack_start(scrolled_win, True, True, 0)
1032
+ window.add(vbox)
1033
+ window.show_all()
1034
+ self.treeview.connect("row-activated", self.on_row_activated)
1035
+
1036
+ def on_row_activated(self, treeview, path, column):
1037
+ assert len(path) == 1
1038
+ col_idx = [i for i, c in enumerate(treeview.get_columns()) if column == c][0]
1039
+ row = self.liststore[path[0]]
1040
+ if col_idx < 2:
1041
+ chain, resi = row[0], row[1]
1042
+ elif col_idx < 4:
1043
+ chain, resi = row[2], row[3]
1044
+ else:
1045
+ return
1046
+ if re.search("^[0-9]+_[0-9A-Za-z]", chain):
1047
+ chain = chain[chain.index("_")+1:]
1048
+ imol = active_atom_spec()[1][0]
1049
+ for name in (" CA ", " C1'"):
1050
+ a = get_atom(imol, chain, resi, "", name)
1051
+ if a:
1052
+ set_rotation_center(*a[2])
1053
+ break
1054
+
1055
+ def add_data(self):
1056
+ for i, d in enumerate(self.data):
1057
+ self.liststore.append(d)
1058
+
1059
+ gui = coot_serval_conf_list()
1060
+ """.format(for_coot))
1061
+ logger.writeln("\nRun:")
1062
+ logger.writeln(f"coot --script {coot_out}")
1063
+ # compare_conf()
1064
+
922
1065
  def adp_stats(args):
923
1066
  if not args.output_prefix: args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_adp"
924
1067
  st = fileio.read_structure(args.model)
@@ -1242,6 +1385,9 @@ def seq(args):
1242
1385
  for sf in args.seq:
1243
1386
  seqs.extend(fileio.read_sequence_file(sf))
1244
1387
 
1388
+ sc = gemmi.AlignmentScoring()
1389
+ sc.match, sc.mismatch, sc.gapo, sc.gape, sc.good_gapo, sc.bad_gapo = args.scoring
1390
+
1245
1391
  st = fileio.read_structure(args.model) # TODO option to (or not to) expand NCS
1246
1392
  model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
1247
1393
  for chain in st[0]:
@@ -1257,17 +1403,20 @@ def seq(args):
1257
1403
  gemmi.PolymerType.Rna: gemmi.ResidueKind.RNA}.get(p_type, gemmi.ResidueKind.AA)
1258
1404
  s = [gemmi.expand_one_letter(x, kind) for x in seq]
1259
1405
  if None in s: continue
1260
- results.append([name, gemmi.align_sequence_to_polymer(s, p, p_type), seq])
1406
+ #als = [gemmi.align_sequence_to_polymer(s, p, p_type, gemmi.AlignmentScoring(x)) for x in ("s", "p")]
1407
+ #results.append([name, max(als, key=lambda x: x.match_count), seq])
1408
+ results.append([name, gemmi.align_sequence_to_polymer(s, p, p_type, sc), seq])
1261
1409
 
1262
1410
  if results:
1263
1411
  logger.writeln("Chain: {}".format(chain.name))
1264
1412
  logger.writeln(" polymer type: {}".format(str(p_type).replace("PolymerType.", "")))
1265
- name, al, s1 = max(results, key=lambda x: x[1].score)
1413
+ name, al, s1 = max(results, key=lambda x: (x[1].match_count, x[1].score))
1266
1414
  logger.writeln(" match: {}".format(name))
1415
+ logger.writeln(" aligned: {}".format(al.match_count))
1267
1416
  logger.writeln(" score: {}".format(al.score))
1268
1417
  p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
1269
- unkseq = [x.start() for x in re.finditer("\-", p1)]
1270
- mismatches = [x.start() for x in re.finditer("\.", al.match_string)]
1418
+ unkseq = [x.start() for x in re.finditer(r"\-", p1)]
1419
+ mismatches = [x.start() for x in re.finditer(r"\.", al.match_string)]
1271
1420
  if mismatches or unkseq:
1272
1421
  idxes = {x.start(): i for i, x in enumerate(re.finditer("[^-]", p2))}
1273
1422
  seqnums = [str(x.seqid) for x in p]
@@ -1371,6 +1520,7 @@ def main(args):
1371
1520
  merge_models=merge_models,
1372
1521
  merge_dicts=merge_dicts,
1373
1522
  geom=geometry,
1523
+ conf=compare_conf,
1374
1524
  adp=adp_stats,
1375
1525
  power=show_power,
1376
1526
  fcalc=fcalc,
servalcat/utils/fileio.py CHANGED
@@ -17,7 +17,6 @@ import re
17
17
  import subprocess
18
18
  import gemmi
19
19
  import numpy
20
- import numpy.lib.recfunctions
21
20
  import gzip
22
21
 
23
22
  def splitext(path):
@@ -83,9 +82,12 @@ def write_mmcif(st, cif_out, cif_ref=None):
83
82
  groups.scale = True
84
83
  groups.assembly = True
85
84
  groups.entity = True
85
+ groups.entity_poly = True
86
86
  groups.entity_poly_seq = True
87
87
  groups.cis = True
88
88
  groups.conn = True
89
+ groups.software = True
90
+ groups.auth_all = True
89
91
  # FIXME is this all?
90
92
  try:
91
93
  doc = read_cif_safe(cif_ref)
@@ -106,23 +108,27 @@ def write_mmcif(st, cif_out, cif_ref=None):
106
108
  block.find_mmcif_category("_atom_sites.").erase()
107
109
  st_new.update_mmcif_block(block, groups)
108
110
  if "_entry.id" in st_new.info: st_new.info["_entry.id"] = st_new.info["_entry.id"][:78]
109
- doc.write_file(cif_out, style=gemmi.cif.Style.Aligned)
111
+ doc.write_file(cif_out, options=gemmi.cif.Style.Aligned)
110
112
  else:
111
113
  st_new.name = st_new.name[:78] # this will become _entry.id
112
114
  if "_entry.id" in st_new.info: st_new.info["_entry.id"] = st_new.info["_entry.id"][:78]
113
- groups = gemmi.MmcifOutputGroups(True)
115
+ groups = gemmi.MmcifOutputGroups(True, auth_all=True)
114
116
  doc = gemmi.cif.Document()
115
117
  block = doc.add_new_block("new")
116
118
  st_new.update_mmcif_block(block, groups)
117
- doc.write_file(cif_out, style=gemmi.cif.Style.Aligned)
119
+ doc.write_file(cif_out, options=gemmi.cif.Style.Aligned)
118
120
  # write_mmcif()
119
121
 
120
122
  def write_pdb(st, pdb_out):
121
123
  logger.writeln("Writing PDB file: {}".format(pdb_out))
124
+ st = st.clone()
122
125
  chain_id_lens = [len(x) for x in model.all_chain_ids(st)]
123
126
  if chain_id_lens and max(chain_id_lens) > 2:
124
- st = st.clone()
125
127
  st.shorten_chain_names()
128
+ st.shorten_ccd_codes()
129
+ if st.shortened_ccd_codes:
130
+ msg = " ".join("{}->{}".format(o,n) for o,n in st.shortened_ccd_codes)
131
+ logger.writeln(" Using shortened residue names in the output pdb file: " + msg)
126
132
  st.write_pdb(pdb_out, use_linkr=True)
127
133
  # write_pdb()
128
134
 
@@ -306,7 +312,7 @@ def read_cif_safe(cif_in):
306
312
  return doc
307
313
  # read_cif_safe()
308
314
 
309
- def read_structure(xyz_in, assign_het_flags=True):
315
+ def read_structure(xyz_in, assign_het_flags=True, merge_chain_parts=True):
310
316
  spext = splitext(xyz_in)
311
317
  st = None
312
318
  if spext[1].lower() in (".pdb", ".ent"):
@@ -338,6 +344,8 @@ def read_structure(xyz_in, assign_het_flags=True):
338
344
  if st is None:
339
345
  logger.writeln("Reading chemical component file: {}".format(xyz_in))
340
346
  st = gemmi.make_structure_from_chemcomp_block(block)
347
+ for i in range(len(st)-1):
348
+ del st[1]
341
349
  elif spext[1].lower() in (".ins", ".res"):
342
350
  logger.writeln("Reading SHELX ins/res file: {}".format(xyz_in))
343
351
  st = model.cx_to_mx(read_shelx_ins(ins_in=xyz_in)[0])
@@ -354,6 +362,8 @@ def read_structure(xyz_in, assign_het_flags=True):
354
362
  logger.writeln("")
355
363
  if assign_het_flags:
356
364
  st.assign_het_flags()
365
+ if merge_chain_parts:
366
+ st.merge_chain_parts()
357
367
  return st
358
368
  # read_structure()
359
369
 
@@ -453,7 +463,7 @@ def merge_ligand_cif(cifs_in, cif_out):
453
463
  if b.name not in list_names:
454
464
  doc.add_copied_block(b)
455
465
 
456
- doc.write_file(cif_out, style=gemmi.cif.Style.Aligned)
466
+ doc.write_file(cif_out, options=gemmi.cif.Style.Aligned)
457
467
  # merge_ligand_cif()
458
468
 
459
469
  def read_shelx_ins(ins_in=None, lines_in=None, ignore_q_peaks=True): # TODO support gz?
@@ -568,7 +578,7 @@ def read_shelx_ins(ins_in=None, lines_in=None, ignore_q_peaks=True): # TODO supp
568
578
  symms.extend([x*gemmi.Op("-x,-y,-z") for x in symms])
569
579
 
570
580
  ss.symops = [op.triplet() for op in set(symms)]
571
- ss.set_spacegroup("s")
581
+ ss.determine_and_set_spacegroup("s")
572
582
  # in case of non-regular setting, gemmi.SpaceGroup cannot be constructed anyway.
573
583
  if ss.spacegroup is None:
574
584
  raise RuntimeError("Cannot construct space group from symbols: {}".format(ss.symops))
@@ -595,7 +605,7 @@ def read_shelx_hkl(cell, sg, hklf, file_in=None, lines_in=None):
595
605
  # wavelength = l[32:40]
596
606
 
597
607
  ints = gemmi.Intensities()
598
- ints.set_data(cell, sg, hkls, vals, sigs)
608
+ ints.set_data(cell, sg, numpy.asarray(hkls), numpy.asarray(vals), numpy.asarray(sigs))
599
609
  ints.merge_in_place(gemmi.DataType.Anomalous)
600
610
  if not (ints.isign_array < 0).any(): ints.type = gemmi.DataType.Mean
601
611
  logger.writeln(" Multiplicity: max= {} mean= {:.1f} min= {}".format(numpy.max(ints.nobs_array),
@@ -712,7 +722,7 @@ def read_small_molecule_files(files):
712
722
  logger.writeln("reflection data read from: {}".format(filename))
713
723
  elif b.find_loop("_refln_index_h"):
714
724
  mtz = read_smcif_hkl(filename, st.cell, st.find_spacegroup())
715
- except RuntimeError: # not a cif file
725
+ except ValueError: # not a cif file
716
726
  if ext == ".hkl":
717
727
  mtz = read_shelx_hkl(st.cell, st.find_spacegroup(), hklf, file_in=filename)
718
728
  logger.writeln("reflection data read from: {}".format(filename))
servalcat/utils/hkl.py CHANGED
@@ -7,7 +7,6 @@ Mozilla Public License, version 2.0; see LICENSE.
7
7
  """
8
8
  from __future__ import absolute_import, division, print_function, generators
9
9
  import numpy
10
- import numpy.lib.recfunctions
11
10
  import scipy.optimize
12
11
  import pandas
13
12
  import gemmi
@@ -17,27 +16,28 @@ dtypes64 = dict(i=numpy.int64, u=numpy.uint64, f=numpy.float64, c=numpy.complex1
17
16
  to64 = lambda x: x.astype(dtypes64.get(x.dtype.kind, x.dtype))
18
17
 
19
18
  def r_factor(fo, fc):
20
- if fo.size == 0:
19
+ denom = numpy.nansum(fo)
20
+ if denom == 0:
21
21
  return numpy.nan
22
- return numpy.nansum(numpy.abs(fo-fc)) / numpy.nansum(fo)
22
+ return numpy.nansum(numpy.abs(fo-fc)) / denom
23
23
  def correlation(obs, calc):
24
- if obs.size == 0:
25
- return numpy.nan
26
24
  sel = numpy.isfinite(obs)
25
+ if obs.size == 0 or numpy.all(~sel):
26
+ return numpy.nan
27
27
  return numpy.corrcoef(obs[sel], calc[sel])[0,1]
28
28
 
29
29
  def df_from_asu_data(asu_data, label):
30
- df = pandas.DataFrame(data=asu_data.miller_array,
30
+ df = pandas.DataFrame(data=asu_data.miller_array.astype(numpy.int32),
31
31
  columns=["H","K","L"])
32
- if asu_data.value_array.dtype.names == ('value', 'sigma'):
33
- df[label] = to64(asu_data.value_array["value"])
34
- df["SIG"+label] = to64(asu_data.value_array["sigma"])
32
+ if type(asu_data) is gemmi.ValueSigmaAsuData:
33
+ df[label] = to64(asu_data.value_array[:,0])
34
+ df["SIG"+label] = to64(asu_data.value_array[:,1])
35
35
  else:
36
36
  df[label] = to64(asu_data.value_array)
37
37
  return df
38
38
 
39
39
  def df_from_raw(miller_array, value_array, label):
40
- df = pandas.DataFrame(data=miller_array,
40
+ df = pandas.DataFrame(data=miller_array.astype(numpy.int32),
41
41
  columns=["H","K","L"])
42
42
  df[label] = to64(value_array)
43
43
  return df
@@ -93,7 +93,7 @@ def hkldata_from_mtz(mtz, labels, newlabels=None, require_types=None):
93
93
  if mismatches:
94
94
  raise RuntimeError("MTZ column types mismatch: {}".format(" ".join(mismatches)))
95
95
 
96
- df = pandas.DataFrame(data=numpy.array(mtz, copy=False), columns=mtz.column_labels())
96
+ df = pandas.DataFrame(data=mtz.array, columns=mtz.column_labels())
97
97
  df = df.astype({col: 'int32' for col in col_types if col_types[col] == "H"})
98
98
  df = df.astype({col: 'Int64' for col in col_types if col_types[col] in ("B", "Y", "I")}) # pandas's nullable int
99
99
  for lab in set(mtz.column_labels()).difference(labels+["H","K","L"]):
@@ -114,6 +114,12 @@ def hkldata_from_mtz(mtz, labels, newlabels=None, require_types=None):
114
114
  return HklData(mtz.cell, mtz.spacegroup, df)
115
115
  # hkldata_from_mtz()
116
116
 
117
+ def df_from_twin_data(twin_data, fc_labs):
118
+ df = pandas.DataFrame(data=twin_data.asu,
119
+ columns=["H","K","L"])
120
+ df[fc_labs] = twin_data.f_calc
121
+ return df
122
+
117
123
  def blur_mtz(mtz, B):
118
124
  # modify given mtz object
119
125
 
@@ -170,7 +176,7 @@ def mtz_selected(mtz, columns):
170
176
  dataset_id=col_dict[col].dataset_id, expand_data=False)
171
177
 
172
178
  idxes = [col_idxes[col] for col in columns]
173
- data = numpy.array(mtz, copy=False)[:, idxes]
179
+ data = mtz.array[:, idxes]
174
180
  mtz2.set_data(data)
175
181
  return mtz2
176
182
  # mtz_selected()
@@ -190,6 +196,19 @@ def decide_n_bins(n_per_bin, s_array, power=2, min_bins=1, max_bins=50):
190
196
  return n_bins
191
197
  # decide_n_bins()
192
198
 
199
+ def fft_map(cell, sg, miller_array, data, grid_size=None, sample_rate=3):
200
+ if data is not None:
201
+ data = data.astype(numpy.complex64) # we may want to keep complex128?
202
+ if type(data) is pandas.core.series.Series:
203
+ data = data.to_numpy()
204
+ asu = gemmi.ComplexAsuData(cell, sg, miller_array, data)
205
+ if grid_size is None:
206
+ ma = asu.transform_f_phi_to_map(sample_rate=sample_rate, exact_size=(0, 0, 0)) # half_l=True
207
+ else:
208
+ ma = gemmi.transform_f_phi_grid_to_map(asu.get_f_phi_on_grid(grid_size)) # half_l=False
209
+ return ma
210
+ # fft_map()
211
+
193
212
  class HklData:
194
213
  def __init__(self, cell, sg, df=None, binned_df=None):
195
214
  self.cell = cell
@@ -206,7 +225,7 @@ class HklData:
206
225
  def switch_to_asu(self):
207
226
  # Need to care phases
208
227
  assert not any(numpy.iscomplexobj(self.df[x]) for x in self.df)
209
- hkl = self.miller_array().to_numpy()
228
+ hkl = self.miller_array()
210
229
  self.sg.switch_to_asu(hkl)
211
230
  self.df[["H","K","L"]] = hkl
212
231
  # in some environment type changes to int64 even though hkl's dtype is int32
@@ -248,11 +267,11 @@ class HklData:
248
267
  # merge_asu_data()
249
268
 
250
269
  def miller_array(self):
251
- return self.df[["H","K","L"]]
270
+ return self.df[["H","K","L"]].to_numpy()
252
271
 
253
272
  def s_array(self):
254
273
  hkl = self.miller_array()
255
- return numpy.dot(hkl, self.cell.fractionalization_matrix)
274
+ return numpy.dot(hkl, self.cell.frac.mat.array)
256
275
 
257
276
  def ssq_mat(self):
258
277
  # k_aniso = exp(-s^T B_aniso s / 4)
@@ -271,8 +290,8 @@ class HklData:
271
290
  s2 = 1 / self.d_spacings()**2
272
291
  return numpy.exp(-b_iso / 4 * s2)
273
292
  if b_cart is not None:
274
- b_star = b_cart.transformed_by(self.cell.fractionalization_matrix)
275
- return numpy.exp(-b_star.r_u_r(self.miller_array().to_numpy()) / 4)
293
+ b_star = b_cart.transformed_by(self.cell.frac.mat)
294
+ return numpy.exp(-b_star.r_u_r(self.miller_array()) / 4)
276
295
 
277
296
  def calc_d(self):
278
297
  self.df["d"] = self.cell.calculate_d_array(self.miller_array())
@@ -297,8 +316,10 @@ class HklData:
297
316
  self.df.sort_values("d", ascending=ascending, inplace=True)
298
317
  # sort_by_resolution()
299
318
 
300
- def d_min_max(self):
319
+ def d_min_max(self, labs=None):
301
320
  d = self.d_spacings()
321
+ if labs:
322
+ d = d[~self.df[labs].isna().any(axis=1)]
302
323
  return numpy.min(d), numpy.max(d)
303
324
  # d_min_max()
304
325
 
@@ -494,9 +515,7 @@ class HklData:
494
515
  if label_sigma is not None:
495
516
  assert data is None
496
517
  assert not numpy.iscomplexobj(self.df[label])
497
- sigma = self.df[label_sigma]
498
- data = numpy.lib.recfunctions.unstructured_to_structured(self.df[[label,label_sigma]].to_numpy(),
499
- numpy.dtype([("value", numpy.float32), ("sigma", numpy.float32)]))
518
+ data = self.df[[label,label_sigma]].to_numpy()
500
519
  elif data is None:
501
520
  data = self.df[label]
502
521
 
@@ -514,14 +533,9 @@ class HklData:
514
533
  # as_asu_data()
515
534
 
516
535
  def fft_map(self, label=None, data=None, grid_size=None, sample_rate=3):
517
- if data is not None: data = data.astype(numpy.complex64) # we may want to keep complex128?
518
- asu = self.as_asu_data(label=label, data=data)
519
- if grid_size is None:
520
- ma = asu.transform_f_phi_to_map(sample_rate=sample_rate, exact_size=(0, 0, 0)) # half_l=True
521
- else:
522
- ma = gemmi.transform_f_phi_grid_to_map(asu.get_f_phi_on_grid(grid_size)) # half_l=False
523
-
524
- return ma
536
+ if data is None:
537
+ data = self.df[label].to_numpy()
538
+ return fft_map(self.cell, self.sg, self.miller_array(), data, grid_size, sample_rate)
525
539
  # fft_map()
526
540
 
527
541
  def d_eff(self, label):
servalcat/utils/logger.py CHANGED
@@ -18,12 +18,15 @@ class Logger(object):
18
18
  def __init__(self, file_out=None, append=True):
19
19
  self.ofs = None
20
20
  self.stopped = False
21
+ self.prefix = ""
21
22
  if file_out:
22
23
  self.set_file(file_out, append)
23
24
  # __init__()
24
25
  def stop_logging(self): self.stopped = True
25
26
  def start_logging(self): self.stopped = False
26
-
27
+ def set_prefix(self, p): self.prefix = p
28
+ def clear_prefix(self): self.prefix = ""
29
+
27
30
  def set_file(self, file_out, append=True):
28
31
  try:
29
32
  self.ofs = open(file_out, "a" if append else "w")
@@ -33,6 +36,8 @@ class Logger(object):
33
36
 
34
37
  def write(self, l, end="", flush=True, fs=None, print_fs=sys.stdout):
35
38
  if self.stopped: return
39
+ if self.prefix:
40
+ l = "\n".join(self.prefix + x for x in l.splitlines(keepends=True))
36
41
  print(l, end=end, file=print_fs, flush=flush)
37
42
  for f in (self.ofs, fs):
38
43
  if f is not None:
@@ -69,6 +74,25 @@ close = _logger.close
69
74
  flush = _logger.flush
70
75
  stop = _logger.stop_logging
71
76
  start = _logger.start_logging
77
+ set_prefix = _logger.set_prefix
78
+ clear_prefix = _logger.clear_prefix
79
+
80
+ def with_prefix(prefix):
81
+ class WithPrefix(object): # should keep original prefix and restore?
82
+ def __enter__(self):
83
+ _logger.set_prefix(prefix)
84
+ return _logger
85
+ def __exit__(self, exc_type, exc_val, exc_tb):
86
+ _logger.clear_prefix()
87
+ return WithPrefix()
88
+
89
+ def silent():
90
+ class Silent(object):
91
+ def write(self, *args, **kwargs):
92
+ pass
93
+ def flush(self):
94
+ pass
95
+ return Silent()
72
96
 
73
97
  def dependency_versions():
74
98
  import gemmi
servalcat/utils/maps.py CHANGED
@@ -268,9 +268,9 @@ def optimize_peak(grid, ini_pos):
268
268
  logger.writeln("Finding peak using interpolation..")
269
269
  x = grid.unit_cell.fractionalize(ini_pos)
270
270
  logger.writeln(" x0: [{}, {}, {}]".format(*x.tolist()))
271
- logger.writeln(" f0: {}".format(-grid.tricubic_interpolation(x)))
271
+ logger.writeln(" f0: {}".format(-grid.interpolate_value(x, order=3)))
272
272
 
273
- res = scipy.optimize.minimize(fun=lambda x:-grid.tricubic_interpolation(gemmi.Fractional(*x)),
273
+ res = scipy.optimize.minimize(fun=lambda x:-grid.interpolate_value(gemmi.Fractional(*x), order=3),
274
274
  x0=x.tolist(),
275
275
  jac=lambda x:-numpy.array(grid.tricubic_interpolation_der(gemmi.Fractional(*x))[1:])
276
276
  )
servalcat/utils/model.py CHANGED
@@ -347,8 +347,8 @@ def translate_into_box(st, origin=None, apply_shift=True):
347
347
  if origin is None: origin = gemmi.Position(0,0,0)
348
348
 
349
349
  # apply unit cell translations to put model into a box (unit cell)
350
- omat = numpy.array(st.cell.orthogonalization_matrix)
351
- fmat = numpy.array(st.cell.fractionalization_matrix).transpose()
350
+ omat = st.cell.orth.mat.array
351
+ fmat = st.cell.frac.mat.array.transpose()
352
352
  com = numpy.array((st[0].calculate_center_of_mass() - origin).tolist())
353
353
  shift = sum([omat[:,i]*numpy.floor(1-numpy.dot(com, fmat[:,i])) for i in range(3)])
354
354
  tr = gemmi.Transform(gemmi.Mat33(), gemmi.Vec3(*shift))
@@ -389,6 +389,19 @@ def cra_to_atomaddress(cra):
389
389
  return aa
390
390
  # cra_to_atomaddress()
391
391
 
392
+ def check_occupancies(st, raise_error=False):
393
+ bad = []
394
+ for cra in st[0].all():
395
+ if not 0 <= cra.atom.occ <= 1 + 1e-6:
396
+ bad.append(cra)
397
+ if bad:
398
+ logger.writeln("Bad occupancies:")
399
+ for cra in bad:
400
+ logger.writeln(f" {cra} occ= {cra.atom.occ:.4f}")
401
+ if raise_error:
402
+ raise RuntimeError("Please check your model and fix bad occupancies")
403
+ # check_occupancies()
404
+
392
405
  def find_special_positions(st, special_pos_threshold=0.2, fix_occ=True, fix_pos=True, fix_adp=True):
393
406
  ns = gemmi.NeighborSearch(st[0], st.cell, 3).populate()
394
407
  cs = gemmi.ContactSearch(special_pos_threshold * 2)
@@ -430,7 +443,7 @@ def find_special_positions(st, special_pos_threshold=0.2, fix_occ=True, fix_pos=
430
443
  logger.writeln(" correcting aniso= {}".format(tostr(atom.aniso.elements_pdb())))
431
444
  logger.writeln(" aniso_viol= {}".format(tostr(diff)))
432
445
 
433
- mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat for i in images]
446
+ mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat.array for i in images]
434
447
  mat_total = (numpy.identity(3) + sum(numpy.array(m) for m in mats)) / n_images
435
448
  mat_total_aniso = (numpy.identity(6) + sum(mat33_as66(m.tolist()) for m in mats)) / n_images
436
449
  mat_total_aniso = numpy.linalg.pinv(mat_total_aniso)
@@ -617,7 +630,7 @@ def to_dataframe(st):
617
630
  for cra in m.all():
618
631
  c,r,a = cra.chain, cra.residue, cra.atom
619
632
  # TODO need support r.het_flag, r.flag, a.calc_flag, a.flag, a.serial?
620
- app("model", m.name)
633
+ app("model", m.num)
621
634
  app("chain", c.name)
622
635
  app("resn", r.name)
623
636
  app("subchain", r.subchain)
@@ -652,8 +665,8 @@ def from_dataframe(df, st=None): # Slow!
652
665
  for i in range(len(st)):
653
666
  del st[0]
654
667
 
655
- for m_name, dm in df.groupby("model"):
656
- st.add_model(gemmi.Model(m_name))
668
+ for m_num, dm in df.groupby("model"):
669
+ st.add_model(gemmi.Model(m_num))
657
670
  m = st[-1]
658
671
  for c_name, dc in dm.groupby("chain"):
659
672
  m.add_chain(gemmi.Chain(c_name))
@@ -691,7 +704,7 @@ def from_dataframe(df, st=None): # Slow!
691
704
 
692
705
  def st_from_positions(positions, bs=None, qs=None):
693
706
  st = gemmi.Structure()
694
- st.add_model(gemmi.Model("1"))
707
+ st.add_model(gemmi.Model(1))
695
708
  st[0].add_chain(gemmi.Chain("A"))
696
709
  c = st[0][0]
697
710
  if bs is None: bs = (0. for _ in range(len(positions)))
@@ -714,7 +727,7 @@ def st_from_positions(positions, bs=None, qs=None):
714
727
 
715
728
  def invert_model(st):
716
729
  # invert x-axis
717
- A = numpy.array(st.cell.orthogonalization_matrix.tolist())
730
+ A = st.cell.orth.mat.array
718
731
  center = numpy.sum(A,axis=1) / 2
719
732
  center = gemmi.Vec3(*center)
720
733
  mat = gemmi.Mat33([[-1,0,0],[0,1,0],[0,0,1]])
@@ -729,14 +742,14 @@ def cx_to_mx(ss): #SmallStructure to Structure
729
742
  st = gemmi.Structure()
730
743
  st.spacegroup_hm = ss.spacegroup.xhm()
731
744
  st.cell = ss.cell
732
- st.add_model(gemmi.Model("1"))
745
+ st.add_model(gemmi.Model(1))
733
746
  st[-1].add_chain(gemmi.Chain("A"))
734
747
  st[-1][-1].add_residue(gemmi.Residue())
735
748
  st[-1][-1][-1].seqid.num = 1
736
749
  st[-1][-1][-1].name = "00"
737
750
 
738
751
  ruc = ss.cell.reciprocal()
739
- cif2cart = ss.cell.orthogonalization_matrix.multiply_by_diagonal(gemmi.Vec3(ruc.a, ruc.b, ruc.c))
752
+ cif2cart = ss.cell.orth.mat.multiply_by_diagonal(gemmi.Vec3(ruc.a, ruc.b, ruc.c))
740
753
  as_smat33f = lambda x: gemmi.SMat33f(x.u11, x.u22, x.u33, x.u12, x.u13, x.u23)
741
754
 
742
755
  for site in ss.sites: