servalcat 0.4.72__cp312-cp312-macosx_11_0_arm64.whl → 0.4.99__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of servalcat might be problematic. Click here for more details.
- servalcat/__init__.py +2 -2
- servalcat/ext.cpython-312-darwin.so +0 -0
- servalcat/refine/refine.py +152 -67
- servalcat/refine/refine_geom.py +32 -13
- servalcat/refine/refine_spa.py +70 -40
- servalcat/refine/refine_xtal.py +45 -13
- servalcat/refine/spa.py +15 -4
- servalcat/refine/xtal.py +147 -98
- servalcat/refmac/exte.py +7 -5
- servalcat/refmac/refmac_keywords.py +11 -9
- servalcat/refmac/refmac_wrapper.py +87 -60
- servalcat/spa/fofc.py +20 -3
- servalcat/spa/fsc.py +11 -11
- servalcat/spa/run_refmac.py +27 -12
- servalcat/spa/translate.py +2 -2
- servalcat/utils/commands.py +154 -4
- servalcat/utils/fileio.py +20 -10
- servalcat/utils/hkl.py +43 -29
- servalcat/utils/logger.py +25 -1
- servalcat/utils/maps.py +2 -2
- servalcat/utils/model.py +23 -10
- servalcat/utils/refmac.py +20 -1
- servalcat/utils/restraints.py +34 -25
- servalcat/utils/symmetry.py +5 -5
- servalcat/xtal/french_wilson.py +39 -31
- servalcat/xtal/sigmaa.py +382 -152
- servalcat/xtal/twin.py +121 -0
- {servalcat-0.4.72.dist-info → servalcat-0.4.99.dist-info}/METADATA +4 -4
- servalcat-0.4.99.dist-info/RECORD +45 -0
- {servalcat-0.4.72.dist-info → servalcat-0.4.99.dist-info}/WHEEL +1 -1
- servalcat-0.4.72.dist-info/RECORD +0 -44
- {servalcat-0.4.72.dist-info → servalcat-0.4.99.dist-info}/entry_points.txt +0 -0
- {servalcat-0.4.72.dist-info → servalcat-0.4.99.dist-info}/licenses/LICENSE +0 -0
servalcat/utils/commands.py
CHANGED
|
@@ -185,6 +185,17 @@ def add_arguments(p):
|
|
|
185
185
|
parser.add_argument('-o', '--output_prefix',
|
|
186
186
|
help="default: taken from input file")
|
|
187
187
|
|
|
188
|
+
# conf
|
|
189
|
+
parser = subparsers.add_parser("conf", description = 'Compare conformations')
|
|
190
|
+
parser.add_argument('models', nargs="+")
|
|
191
|
+
parser.add_argument("--min_diff", type=float, default=60.)
|
|
192
|
+
parser.add_argument('--ligand', nargs="*", action="append")
|
|
193
|
+
parser.add_argument("--monlib",
|
|
194
|
+
help="Monomer library path. Default: $CLIBD_MON")
|
|
195
|
+
parser.add_argument("--same_chain", action='store_true', help="Only between same chains (more than one file)")
|
|
196
|
+
parser.add_argument('-o', '--output_prefix', default="conf",
|
|
197
|
+
help="")
|
|
198
|
+
|
|
188
199
|
# adp
|
|
189
200
|
parser = subparsers.add_parser("adp", description = 'ADP analysis')
|
|
190
201
|
parser.add_argument('model')
|
|
@@ -281,6 +292,9 @@ def add_arguments(p):
|
|
|
281
292
|
parser = subparsers.add_parser("seq", description = 'Print/align model sequence')
|
|
282
293
|
parser.add_argument("--model", required=True)
|
|
283
294
|
parser.add_argument('--seq', nargs="*", action="append", help="Sequence file(s)")
|
|
295
|
+
parser.add_argument('--scoring', nargs=6, type=int, default=(1, 0, -1, -1, 0, -1),
|
|
296
|
+
metavar=("match", "mismatch", "gapo", "gape", "good_gapo", "bad_gapo"),
|
|
297
|
+
help="scoring function. default: %(default)s")
|
|
284
298
|
|
|
285
299
|
# dnarna
|
|
286
300
|
parser = subparsers.add_parser("dnarna", description = 'DNA to RNA or RNA to DNA model conversion')
|
|
@@ -919,6 +933,135 @@ def geometry(args):
|
|
|
919
933
|
fileio.write_model(st, file_name="{}_per_atom_score{}".format(args.output_prefix, model_format))
|
|
920
934
|
# geometry()
|
|
921
935
|
|
|
936
|
+
def compare_conf(args):
|
|
937
|
+
def angle_abs_diff(a, b, full=360.):
|
|
938
|
+
# from gemmi/math.hpp
|
|
939
|
+
d = abs(a - b)
|
|
940
|
+
if d > full:
|
|
941
|
+
d -= numpy.floor(d / full) * full
|
|
942
|
+
return min(d, full - d)
|
|
943
|
+
# angle_abs_diff()
|
|
944
|
+
|
|
945
|
+
if args.ligand: args.ligand = sum(args.ligand, [])
|
|
946
|
+
st = None
|
|
947
|
+
for i, f in enumerate(args.models):
|
|
948
|
+
tmp = fileio.read_structure(f)
|
|
949
|
+
if len(args.models) > 1:
|
|
950
|
+
for chain in tmp[0]:
|
|
951
|
+
chain.name = f"{i+1}_{chain.name}"
|
|
952
|
+
if i == 0:
|
|
953
|
+
st = tmp
|
|
954
|
+
else:
|
|
955
|
+
for chain in tmp[0]:
|
|
956
|
+
st[0].add_chain(chain)
|
|
957
|
+
try:
|
|
958
|
+
monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
|
|
959
|
+
stop_for_unknowns=True)
|
|
960
|
+
except RuntimeError as e:
|
|
961
|
+
raise SystemExit(f"Error: {e}")
|
|
962
|
+
|
|
963
|
+
model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
|
|
964
|
+
try:
|
|
965
|
+
topo, _ = restraints.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.NoChange,
|
|
966
|
+
check_hydrogen=False)
|
|
967
|
+
except RuntimeError as e:
|
|
968
|
+
raise SystemExit(f"Error: {e}")
|
|
969
|
+
ncslist = restraints.prepare_ncs_restraints(st)
|
|
970
|
+
lookup = {x.atom: x for x in st[0].all()}
|
|
971
|
+
ptypes = {x.name: x.polymer_type for x in st.entities}
|
|
972
|
+
resn_lookup = {(chain.name, res.seqid): res.name for chain in st[0] for res in chain}
|
|
973
|
+
confs = {}
|
|
974
|
+
for t in topo.torsions:
|
|
975
|
+
cra = lookup[t.atoms[0]]
|
|
976
|
+
ptype = ptypes[cra.residue.entity_id]
|
|
977
|
+
is_peptide = ptype in (gemmi.PolymerType.PeptideL, gemmi.PolymerType.PeptideD)
|
|
978
|
+
is_peptide_tors = t.restr.label.startswith("chi") or t.restr.label in ("omega", "phi", "psi")
|
|
979
|
+
is_na = ptype in (gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid)
|
|
980
|
+
is_na_tors = t.restr.label in ("C2e-chi", "alpha", "beta", "gamma", "C2e-nyu0", "epsilon", "zeta")
|
|
981
|
+
if (is_peptide and is_peptide_tors) or (is_na and is_na_tors):
|
|
982
|
+
confs.setdefault(cra.chain.name, {}).setdefault(cra.residue.seqid, {})[t.restr.label] = numpy.rad2deg(t.calculate())
|
|
983
|
+
fulls = {("ARG", "chi5"): 180., ("TYR", "chi2"): 180., ("PHE", "chi2"): 180., ("ASP", "chi2"): 180., ("GLU", "chi3"): 180.}
|
|
984
|
+
ret = []
|
|
985
|
+
for_coot = []
|
|
986
|
+
for ncs in ncslist.ncss:
|
|
987
|
+
c1, c2 = ncs.chains
|
|
988
|
+
if args.same_chain and len(args.models) > 1 and c1[c1.index("_"):] != c2[c2.index("_"):]:
|
|
989
|
+
continue
|
|
990
|
+
for s1, s2 in ncs.seqids:
|
|
991
|
+
if c1 in confs and s1 in confs[c1] and c2 in confs and s2 in confs[c2]:
|
|
992
|
+
conf1, conf2 = confs[c1][s1], confs[c2][s2]
|
|
993
|
+
resn = resn_lookup[(c1, s1)]
|
|
994
|
+
for t in conf1:
|
|
995
|
+
if t in conf2:
|
|
996
|
+
d = angle_abs_diff(conf1[t], conf2[t], fulls.get((resn, t), 360.))
|
|
997
|
+
ret.append((c1, s1, c2, s2, resn, t, conf1[t], conf2[t], d))
|
|
998
|
+
if d > args.min_diff:
|
|
999
|
+
for_coot.append((c1, s1.num, c2, s2.num, resn, t, d))
|
|
1000
|
+
df = pandas.DataFrame(ret, columns=["chain_1", "seq_1", "chain_2", "seq_2", "resn", "label", "conf_1", "conf_2", "diff"])
|
|
1001
|
+
df.sort_values("diff", ascending=False, inplace=True)
|
|
1002
|
+
logger.writeln(f"\nList of torsion angle differences (>{args.min_diff})")
|
|
1003
|
+
logger.writeln(df[df["diff"] > args.min_diff].to_string(index=False))
|
|
1004
|
+
|
|
1005
|
+
for_coot.sort(key=lambda x:-x[-1])
|
|
1006
|
+
coot_out = args.output_prefix + "_coot.py"
|
|
1007
|
+
with open(coot_out, "w") as ofs:
|
|
1008
|
+
# https://python-gtk-3-tutorial.readthedocs.io/en/latest/treeview.html
|
|
1009
|
+
ofs.write("""\
|
|
1010
|
+
from __future__ import absolute_import, division, print_function
|
|
1011
|
+
import re
|
|
1012
|
+
import gtk
|
|
1013
|
+
class coot_serval_conf_list:
|
|
1014
|
+
def __init__(self):
|
|
1015
|
+
window = gtk.Window(gtk.WINDOW_TOPLEVEL)
|
|
1016
|
+
window.set_title("Different conformations (Servalcat)")
|
|
1017
|
+
window.set_default_size(600, 600)
|
|
1018
|
+
scrolled_win = gtk.ScrolledWindow()
|
|
1019
|
+
scrolled_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_ALWAYS)
|
|
1020
|
+
vbox = gtk.VBox(False, 2)
|
|
1021
|
+
self.liststore = gtk.ListStore(str, int, str, int, str, str, float)
|
|
1022
|
+
self.filter = self.liststore.filter_new()
|
|
1023
|
+
self.treeview = gtk.TreeView(model=self.filter)
|
|
1024
|
+
for i, column_title in enumerate(["chain_1", "seq_1", "chain_2", "seq_2", "resn", "label", "diff"]):
|
|
1025
|
+
renderer = gtk.CellRendererText()
|
|
1026
|
+
column = gtk.TreeViewColumn(column_title, renderer, text=i)
|
|
1027
|
+
self.treeview.append_column(column)
|
|
1028
|
+
self.data = {}
|
|
1029
|
+
self.add_data()
|
|
1030
|
+
scrolled_win.add_with_viewport(self.treeview) # add?
|
|
1031
|
+
vbox.pack_start(scrolled_win, True, True, 0)
|
|
1032
|
+
window.add(vbox)
|
|
1033
|
+
window.show_all()
|
|
1034
|
+
self.treeview.connect("row-activated", self.on_row_activated)
|
|
1035
|
+
|
|
1036
|
+
def on_row_activated(self, treeview, path, column):
|
|
1037
|
+
assert len(path) == 1
|
|
1038
|
+
col_idx = [i for i, c in enumerate(treeview.get_columns()) if column == c][0]
|
|
1039
|
+
row = self.liststore[path[0]]
|
|
1040
|
+
if col_idx < 2:
|
|
1041
|
+
chain, resi = row[0], row[1]
|
|
1042
|
+
elif col_idx < 4:
|
|
1043
|
+
chain, resi = row[2], row[3]
|
|
1044
|
+
else:
|
|
1045
|
+
return
|
|
1046
|
+
if re.search("^[0-9]+_[0-9A-Za-z]", chain):
|
|
1047
|
+
chain = chain[chain.index("_")+1:]
|
|
1048
|
+
imol = active_atom_spec()[1][0]
|
|
1049
|
+
for name in (" CA ", " C1'"):
|
|
1050
|
+
a = get_atom(imol, chain, resi, "", name)
|
|
1051
|
+
if a:
|
|
1052
|
+
set_rotation_center(*a[2])
|
|
1053
|
+
break
|
|
1054
|
+
|
|
1055
|
+
def add_data(self):
|
|
1056
|
+
for i, d in enumerate(self.data):
|
|
1057
|
+
self.liststore.append(d)
|
|
1058
|
+
|
|
1059
|
+
gui = coot_serval_conf_list()
|
|
1060
|
+
""".format(for_coot))
|
|
1061
|
+
logger.writeln("\nRun:")
|
|
1062
|
+
logger.writeln(f"coot --script {coot_out}")
|
|
1063
|
+
# compare_conf()
|
|
1064
|
+
|
|
922
1065
|
def adp_stats(args):
|
|
923
1066
|
if not args.output_prefix: args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_adp"
|
|
924
1067
|
st = fileio.read_structure(args.model)
|
|
@@ -1242,6 +1385,9 @@ def seq(args):
|
|
|
1242
1385
|
for sf in args.seq:
|
|
1243
1386
|
seqs.extend(fileio.read_sequence_file(sf))
|
|
1244
1387
|
|
|
1388
|
+
sc = gemmi.AlignmentScoring()
|
|
1389
|
+
sc.match, sc.mismatch, sc.gapo, sc.gape, sc.good_gapo, sc.bad_gapo = args.scoring
|
|
1390
|
+
|
|
1245
1391
|
st = fileio.read_structure(args.model) # TODO option to (or not to) expand NCS
|
|
1246
1392
|
model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
|
|
1247
1393
|
for chain in st[0]:
|
|
@@ -1257,17 +1403,20 @@ def seq(args):
|
|
|
1257
1403
|
gemmi.PolymerType.Rna: gemmi.ResidueKind.RNA}.get(p_type, gemmi.ResidueKind.AA)
|
|
1258
1404
|
s = [gemmi.expand_one_letter(x, kind) for x in seq]
|
|
1259
1405
|
if None in s: continue
|
|
1260
|
-
|
|
1406
|
+
#als = [gemmi.align_sequence_to_polymer(s, p, p_type, gemmi.AlignmentScoring(x)) for x in ("s", "p")]
|
|
1407
|
+
#results.append([name, max(als, key=lambda x: x.match_count), seq])
|
|
1408
|
+
results.append([name, gemmi.align_sequence_to_polymer(s, p, p_type, sc), seq])
|
|
1261
1409
|
|
|
1262
1410
|
if results:
|
|
1263
1411
|
logger.writeln("Chain: {}".format(chain.name))
|
|
1264
1412
|
logger.writeln(" polymer type: {}".format(str(p_type).replace("PolymerType.", "")))
|
|
1265
|
-
name, al, s1 = max(results, key=lambda x: x[1].score)
|
|
1413
|
+
name, al, s1 = max(results, key=lambda x: (x[1].match_count, x[1].score))
|
|
1266
1414
|
logger.writeln(" match: {}".format(name))
|
|
1415
|
+
logger.writeln(" aligned: {}".format(al.match_count))
|
|
1267
1416
|
logger.writeln(" score: {}".format(al.score))
|
|
1268
1417
|
p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
|
|
1269
|
-
unkseq = [x.start() for x in re.finditer("\-", p1)]
|
|
1270
|
-
mismatches = [x.start() for x in re.finditer("\.", al.match_string)]
|
|
1418
|
+
unkseq = [x.start() for x in re.finditer(r"\-", p1)]
|
|
1419
|
+
mismatches = [x.start() for x in re.finditer(r"\.", al.match_string)]
|
|
1271
1420
|
if mismatches or unkseq:
|
|
1272
1421
|
idxes = {x.start(): i for i, x in enumerate(re.finditer("[^-]", p2))}
|
|
1273
1422
|
seqnums = [str(x.seqid) for x in p]
|
|
@@ -1371,6 +1520,7 @@ def main(args):
|
|
|
1371
1520
|
merge_models=merge_models,
|
|
1372
1521
|
merge_dicts=merge_dicts,
|
|
1373
1522
|
geom=geometry,
|
|
1523
|
+
conf=compare_conf,
|
|
1374
1524
|
adp=adp_stats,
|
|
1375
1525
|
power=show_power,
|
|
1376
1526
|
fcalc=fcalc,
|
servalcat/utils/fileio.py
CHANGED
|
@@ -17,7 +17,6 @@ import re
|
|
|
17
17
|
import subprocess
|
|
18
18
|
import gemmi
|
|
19
19
|
import numpy
|
|
20
|
-
import numpy.lib.recfunctions
|
|
21
20
|
import gzip
|
|
22
21
|
|
|
23
22
|
def splitext(path):
|
|
@@ -83,9 +82,12 @@ def write_mmcif(st, cif_out, cif_ref=None):
|
|
|
83
82
|
groups.scale = True
|
|
84
83
|
groups.assembly = True
|
|
85
84
|
groups.entity = True
|
|
85
|
+
groups.entity_poly = True
|
|
86
86
|
groups.entity_poly_seq = True
|
|
87
87
|
groups.cis = True
|
|
88
88
|
groups.conn = True
|
|
89
|
+
groups.software = True
|
|
90
|
+
groups.auth_all = True
|
|
89
91
|
# FIXME is this all?
|
|
90
92
|
try:
|
|
91
93
|
doc = read_cif_safe(cif_ref)
|
|
@@ -106,23 +108,27 @@ def write_mmcif(st, cif_out, cif_ref=None):
|
|
|
106
108
|
block.find_mmcif_category("_atom_sites.").erase()
|
|
107
109
|
st_new.update_mmcif_block(block, groups)
|
|
108
110
|
if "_entry.id" in st_new.info: st_new.info["_entry.id"] = st_new.info["_entry.id"][:78]
|
|
109
|
-
doc.write_file(cif_out,
|
|
111
|
+
doc.write_file(cif_out, options=gemmi.cif.Style.Aligned)
|
|
110
112
|
else:
|
|
111
113
|
st_new.name = st_new.name[:78] # this will become _entry.id
|
|
112
114
|
if "_entry.id" in st_new.info: st_new.info["_entry.id"] = st_new.info["_entry.id"][:78]
|
|
113
|
-
groups = gemmi.MmcifOutputGroups(True)
|
|
115
|
+
groups = gemmi.MmcifOutputGroups(True, auth_all=True)
|
|
114
116
|
doc = gemmi.cif.Document()
|
|
115
117
|
block = doc.add_new_block("new")
|
|
116
118
|
st_new.update_mmcif_block(block, groups)
|
|
117
|
-
doc.write_file(cif_out,
|
|
119
|
+
doc.write_file(cif_out, options=gemmi.cif.Style.Aligned)
|
|
118
120
|
# write_mmcif()
|
|
119
121
|
|
|
120
122
|
def write_pdb(st, pdb_out):
|
|
121
123
|
logger.writeln("Writing PDB file: {}".format(pdb_out))
|
|
124
|
+
st = st.clone()
|
|
122
125
|
chain_id_lens = [len(x) for x in model.all_chain_ids(st)]
|
|
123
126
|
if chain_id_lens and max(chain_id_lens) > 2:
|
|
124
|
-
st = st.clone()
|
|
125
127
|
st.shorten_chain_names()
|
|
128
|
+
st.shorten_ccd_codes()
|
|
129
|
+
if st.shortened_ccd_codes:
|
|
130
|
+
msg = " ".join("{}->{}".format(o,n) for o,n in st.shortened_ccd_codes)
|
|
131
|
+
logger.writeln(" Using shortened residue names in the output pdb file: " + msg)
|
|
126
132
|
st.write_pdb(pdb_out, use_linkr=True)
|
|
127
133
|
# write_pdb()
|
|
128
134
|
|
|
@@ -306,7 +312,7 @@ def read_cif_safe(cif_in):
|
|
|
306
312
|
return doc
|
|
307
313
|
# read_cif_safe()
|
|
308
314
|
|
|
309
|
-
def read_structure(xyz_in, assign_het_flags=True):
|
|
315
|
+
def read_structure(xyz_in, assign_het_flags=True, merge_chain_parts=True):
|
|
310
316
|
spext = splitext(xyz_in)
|
|
311
317
|
st = None
|
|
312
318
|
if spext[1].lower() in (".pdb", ".ent"):
|
|
@@ -338,6 +344,8 @@ def read_structure(xyz_in, assign_het_flags=True):
|
|
|
338
344
|
if st is None:
|
|
339
345
|
logger.writeln("Reading chemical component file: {}".format(xyz_in))
|
|
340
346
|
st = gemmi.make_structure_from_chemcomp_block(block)
|
|
347
|
+
for i in range(len(st)-1):
|
|
348
|
+
del st[1]
|
|
341
349
|
elif spext[1].lower() in (".ins", ".res"):
|
|
342
350
|
logger.writeln("Reading SHELX ins/res file: {}".format(xyz_in))
|
|
343
351
|
st = model.cx_to_mx(read_shelx_ins(ins_in=xyz_in)[0])
|
|
@@ -354,6 +362,8 @@ def read_structure(xyz_in, assign_het_flags=True):
|
|
|
354
362
|
logger.writeln("")
|
|
355
363
|
if assign_het_flags:
|
|
356
364
|
st.assign_het_flags()
|
|
365
|
+
if merge_chain_parts:
|
|
366
|
+
st.merge_chain_parts()
|
|
357
367
|
return st
|
|
358
368
|
# read_structure()
|
|
359
369
|
|
|
@@ -453,7 +463,7 @@ def merge_ligand_cif(cifs_in, cif_out):
|
|
|
453
463
|
if b.name not in list_names:
|
|
454
464
|
doc.add_copied_block(b)
|
|
455
465
|
|
|
456
|
-
doc.write_file(cif_out,
|
|
466
|
+
doc.write_file(cif_out, options=gemmi.cif.Style.Aligned)
|
|
457
467
|
# merge_ligand_cif()
|
|
458
468
|
|
|
459
469
|
def read_shelx_ins(ins_in=None, lines_in=None, ignore_q_peaks=True): # TODO support gz?
|
|
@@ -568,7 +578,7 @@ def read_shelx_ins(ins_in=None, lines_in=None, ignore_q_peaks=True): # TODO supp
|
|
|
568
578
|
symms.extend([x*gemmi.Op("-x,-y,-z") for x in symms])
|
|
569
579
|
|
|
570
580
|
ss.symops = [op.triplet() for op in set(symms)]
|
|
571
|
-
ss.
|
|
581
|
+
ss.determine_and_set_spacegroup("s")
|
|
572
582
|
# in case of non-regular setting, gemmi.SpaceGroup cannot be constructed anyway.
|
|
573
583
|
if ss.spacegroup is None:
|
|
574
584
|
raise RuntimeError("Cannot construct space group from symbols: {}".format(ss.symops))
|
|
@@ -595,7 +605,7 @@ def read_shelx_hkl(cell, sg, hklf, file_in=None, lines_in=None):
|
|
|
595
605
|
# wavelength = l[32:40]
|
|
596
606
|
|
|
597
607
|
ints = gemmi.Intensities()
|
|
598
|
-
ints.set_data(cell, sg, hkls, vals, sigs)
|
|
608
|
+
ints.set_data(cell, sg, numpy.asarray(hkls), numpy.asarray(vals), numpy.asarray(sigs))
|
|
599
609
|
ints.merge_in_place(gemmi.DataType.Anomalous)
|
|
600
610
|
if not (ints.isign_array < 0).any(): ints.type = gemmi.DataType.Mean
|
|
601
611
|
logger.writeln(" Multiplicity: max= {} mean= {:.1f} min= {}".format(numpy.max(ints.nobs_array),
|
|
@@ -712,7 +722,7 @@ def read_small_molecule_files(files):
|
|
|
712
722
|
logger.writeln("reflection data read from: {}".format(filename))
|
|
713
723
|
elif b.find_loop("_refln_index_h"):
|
|
714
724
|
mtz = read_smcif_hkl(filename, st.cell, st.find_spacegroup())
|
|
715
|
-
except
|
|
725
|
+
except ValueError: # not a cif file
|
|
716
726
|
if ext == ".hkl":
|
|
717
727
|
mtz = read_shelx_hkl(st.cell, st.find_spacegroup(), hklf, file_in=filename)
|
|
718
728
|
logger.writeln("reflection data read from: {}".format(filename))
|
servalcat/utils/hkl.py
CHANGED
|
@@ -7,7 +7,6 @@ Mozilla Public License, version 2.0; see LICENSE.
|
|
|
7
7
|
"""
|
|
8
8
|
from __future__ import absolute_import, division, print_function, generators
|
|
9
9
|
import numpy
|
|
10
|
-
import numpy.lib.recfunctions
|
|
11
10
|
import scipy.optimize
|
|
12
11
|
import pandas
|
|
13
12
|
import gemmi
|
|
@@ -17,27 +16,28 @@ dtypes64 = dict(i=numpy.int64, u=numpy.uint64, f=numpy.float64, c=numpy.complex1
|
|
|
17
16
|
to64 = lambda x: x.astype(dtypes64.get(x.dtype.kind, x.dtype))
|
|
18
17
|
|
|
19
18
|
def r_factor(fo, fc):
|
|
20
|
-
|
|
19
|
+
denom = numpy.nansum(fo)
|
|
20
|
+
if denom == 0:
|
|
21
21
|
return numpy.nan
|
|
22
|
-
return numpy.nansum(numpy.abs(fo-fc)) /
|
|
22
|
+
return numpy.nansum(numpy.abs(fo-fc)) / denom
|
|
23
23
|
def correlation(obs, calc):
|
|
24
|
-
if obs.size == 0:
|
|
25
|
-
return numpy.nan
|
|
26
24
|
sel = numpy.isfinite(obs)
|
|
25
|
+
if obs.size == 0 or numpy.all(~sel):
|
|
26
|
+
return numpy.nan
|
|
27
27
|
return numpy.corrcoef(obs[sel], calc[sel])[0,1]
|
|
28
28
|
|
|
29
29
|
def df_from_asu_data(asu_data, label):
|
|
30
|
-
df = pandas.DataFrame(data=asu_data.miller_array,
|
|
30
|
+
df = pandas.DataFrame(data=asu_data.miller_array.astype(numpy.int32),
|
|
31
31
|
columns=["H","K","L"])
|
|
32
|
-
if asu_data
|
|
33
|
-
df[label] = to64(asu_data.value_array[
|
|
34
|
-
df["SIG"+label] = to64(asu_data.value_array[
|
|
32
|
+
if type(asu_data) is gemmi.ValueSigmaAsuData:
|
|
33
|
+
df[label] = to64(asu_data.value_array[:,0])
|
|
34
|
+
df["SIG"+label] = to64(asu_data.value_array[:,1])
|
|
35
35
|
else:
|
|
36
36
|
df[label] = to64(asu_data.value_array)
|
|
37
37
|
return df
|
|
38
38
|
|
|
39
39
|
def df_from_raw(miller_array, value_array, label):
|
|
40
|
-
df = pandas.DataFrame(data=miller_array,
|
|
40
|
+
df = pandas.DataFrame(data=miller_array.astype(numpy.int32),
|
|
41
41
|
columns=["H","K","L"])
|
|
42
42
|
df[label] = to64(value_array)
|
|
43
43
|
return df
|
|
@@ -93,7 +93,7 @@ def hkldata_from_mtz(mtz, labels, newlabels=None, require_types=None):
|
|
|
93
93
|
if mismatches:
|
|
94
94
|
raise RuntimeError("MTZ column types mismatch: {}".format(" ".join(mismatches)))
|
|
95
95
|
|
|
96
|
-
df = pandas.DataFrame(data=
|
|
96
|
+
df = pandas.DataFrame(data=mtz.array, columns=mtz.column_labels())
|
|
97
97
|
df = df.astype({col: 'int32' for col in col_types if col_types[col] == "H"})
|
|
98
98
|
df = df.astype({col: 'Int64' for col in col_types if col_types[col] in ("B", "Y", "I")}) # pandas's nullable int
|
|
99
99
|
for lab in set(mtz.column_labels()).difference(labels+["H","K","L"]):
|
|
@@ -114,6 +114,12 @@ def hkldata_from_mtz(mtz, labels, newlabels=None, require_types=None):
|
|
|
114
114
|
return HklData(mtz.cell, mtz.spacegroup, df)
|
|
115
115
|
# hkldata_from_mtz()
|
|
116
116
|
|
|
117
|
+
def df_from_twin_data(twin_data, fc_labs):
|
|
118
|
+
df = pandas.DataFrame(data=twin_data.asu,
|
|
119
|
+
columns=["H","K","L"])
|
|
120
|
+
df[fc_labs] = twin_data.f_calc
|
|
121
|
+
return df
|
|
122
|
+
|
|
117
123
|
def blur_mtz(mtz, B):
|
|
118
124
|
# modify given mtz object
|
|
119
125
|
|
|
@@ -170,7 +176,7 @@ def mtz_selected(mtz, columns):
|
|
|
170
176
|
dataset_id=col_dict[col].dataset_id, expand_data=False)
|
|
171
177
|
|
|
172
178
|
idxes = [col_idxes[col] for col in columns]
|
|
173
|
-
data =
|
|
179
|
+
data = mtz.array[:, idxes]
|
|
174
180
|
mtz2.set_data(data)
|
|
175
181
|
return mtz2
|
|
176
182
|
# mtz_selected()
|
|
@@ -190,6 +196,19 @@ def decide_n_bins(n_per_bin, s_array, power=2, min_bins=1, max_bins=50):
|
|
|
190
196
|
return n_bins
|
|
191
197
|
# decide_n_bins()
|
|
192
198
|
|
|
199
|
+
def fft_map(cell, sg, miller_array, data, grid_size=None, sample_rate=3):
|
|
200
|
+
if data is not None:
|
|
201
|
+
data = data.astype(numpy.complex64) # we may want to keep complex128?
|
|
202
|
+
if type(data) is pandas.core.series.Series:
|
|
203
|
+
data = data.to_numpy()
|
|
204
|
+
asu = gemmi.ComplexAsuData(cell, sg, miller_array, data)
|
|
205
|
+
if grid_size is None:
|
|
206
|
+
ma = asu.transform_f_phi_to_map(sample_rate=sample_rate, exact_size=(0, 0, 0)) # half_l=True
|
|
207
|
+
else:
|
|
208
|
+
ma = gemmi.transform_f_phi_grid_to_map(asu.get_f_phi_on_grid(grid_size)) # half_l=False
|
|
209
|
+
return ma
|
|
210
|
+
# fft_map()
|
|
211
|
+
|
|
193
212
|
class HklData:
|
|
194
213
|
def __init__(self, cell, sg, df=None, binned_df=None):
|
|
195
214
|
self.cell = cell
|
|
@@ -206,7 +225,7 @@ class HklData:
|
|
|
206
225
|
def switch_to_asu(self):
|
|
207
226
|
# Need to care phases
|
|
208
227
|
assert not any(numpy.iscomplexobj(self.df[x]) for x in self.df)
|
|
209
|
-
hkl = self.miller_array()
|
|
228
|
+
hkl = self.miller_array()
|
|
210
229
|
self.sg.switch_to_asu(hkl)
|
|
211
230
|
self.df[["H","K","L"]] = hkl
|
|
212
231
|
# in some environment type changes to int64 even though hkl's dtype is int32
|
|
@@ -248,11 +267,11 @@ class HklData:
|
|
|
248
267
|
# merge_asu_data()
|
|
249
268
|
|
|
250
269
|
def miller_array(self):
|
|
251
|
-
return self.df[["H","K","L"]]
|
|
270
|
+
return self.df[["H","K","L"]].to_numpy()
|
|
252
271
|
|
|
253
272
|
def s_array(self):
|
|
254
273
|
hkl = self.miller_array()
|
|
255
|
-
return numpy.dot(hkl, self.cell.
|
|
274
|
+
return numpy.dot(hkl, self.cell.frac.mat.array)
|
|
256
275
|
|
|
257
276
|
def ssq_mat(self):
|
|
258
277
|
# k_aniso = exp(-s^T B_aniso s / 4)
|
|
@@ -271,8 +290,8 @@ class HklData:
|
|
|
271
290
|
s2 = 1 / self.d_spacings()**2
|
|
272
291
|
return numpy.exp(-b_iso / 4 * s2)
|
|
273
292
|
if b_cart is not None:
|
|
274
|
-
b_star = b_cart.transformed_by(self.cell.
|
|
275
|
-
return numpy.exp(-b_star.r_u_r(self.miller_array()
|
|
293
|
+
b_star = b_cart.transformed_by(self.cell.frac.mat)
|
|
294
|
+
return numpy.exp(-b_star.r_u_r(self.miller_array()) / 4)
|
|
276
295
|
|
|
277
296
|
def calc_d(self):
|
|
278
297
|
self.df["d"] = self.cell.calculate_d_array(self.miller_array())
|
|
@@ -297,8 +316,10 @@ class HklData:
|
|
|
297
316
|
self.df.sort_values("d", ascending=ascending, inplace=True)
|
|
298
317
|
# sort_by_resolution()
|
|
299
318
|
|
|
300
|
-
def d_min_max(self):
|
|
319
|
+
def d_min_max(self, labs=None):
|
|
301
320
|
d = self.d_spacings()
|
|
321
|
+
if labs:
|
|
322
|
+
d = d[~self.df[labs].isna().any(axis=1)]
|
|
302
323
|
return numpy.min(d), numpy.max(d)
|
|
303
324
|
# d_min_max()
|
|
304
325
|
|
|
@@ -494,9 +515,7 @@ class HklData:
|
|
|
494
515
|
if label_sigma is not None:
|
|
495
516
|
assert data is None
|
|
496
517
|
assert not numpy.iscomplexobj(self.df[label])
|
|
497
|
-
|
|
498
|
-
data = numpy.lib.recfunctions.unstructured_to_structured(self.df[[label,label_sigma]].to_numpy(),
|
|
499
|
-
numpy.dtype([("value", numpy.float32), ("sigma", numpy.float32)]))
|
|
518
|
+
data = self.df[[label,label_sigma]].to_numpy()
|
|
500
519
|
elif data is None:
|
|
501
520
|
data = self.df[label]
|
|
502
521
|
|
|
@@ -514,14 +533,9 @@ class HklData:
|
|
|
514
533
|
# as_asu_data()
|
|
515
534
|
|
|
516
535
|
def fft_map(self, label=None, data=None, grid_size=None, sample_rate=3):
|
|
517
|
-
if data is
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
ma = asu.transform_f_phi_to_map(sample_rate=sample_rate, exact_size=(0, 0, 0)) # half_l=True
|
|
521
|
-
else:
|
|
522
|
-
ma = gemmi.transform_f_phi_grid_to_map(asu.get_f_phi_on_grid(grid_size)) # half_l=False
|
|
523
|
-
|
|
524
|
-
return ma
|
|
536
|
+
if data is None:
|
|
537
|
+
data = self.df[label].to_numpy()
|
|
538
|
+
return fft_map(self.cell, self.sg, self.miller_array(), data, grid_size, sample_rate)
|
|
525
539
|
# fft_map()
|
|
526
540
|
|
|
527
541
|
def d_eff(self, label):
|
servalcat/utils/logger.py
CHANGED
|
@@ -18,12 +18,15 @@ class Logger(object):
|
|
|
18
18
|
def __init__(self, file_out=None, append=True):
|
|
19
19
|
self.ofs = None
|
|
20
20
|
self.stopped = False
|
|
21
|
+
self.prefix = ""
|
|
21
22
|
if file_out:
|
|
22
23
|
self.set_file(file_out, append)
|
|
23
24
|
# __init__()
|
|
24
25
|
def stop_logging(self): self.stopped = True
|
|
25
26
|
def start_logging(self): self.stopped = False
|
|
26
|
-
|
|
27
|
+
def set_prefix(self, p): self.prefix = p
|
|
28
|
+
def clear_prefix(self): self.prefix = ""
|
|
29
|
+
|
|
27
30
|
def set_file(self, file_out, append=True):
|
|
28
31
|
try:
|
|
29
32
|
self.ofs = open(file_out, "a" if append else "w")
|
|
@@ -33,6 +36,8 @@ class Logger(object):
|
|
|
33
36
|
|
|
34
37
|
def write(self, l, end="", flush=True, fs=None, print_fs=sys.stdout):
|
|
35
38
|
if self.stopped: return
|
|
39
|
+
if self.prefix:
|
|
40
|
+
l = "\n".join(self.prefix + x for x in l.splitlines(keepends=True))
|
|
36
41
|
print(l, end=end, file=print_fs, flush=flush)
|
|
37
42
|
for f in (self.ofs, fs):
|
|
38
43
|
if f is not None:
|
|
@@ -69,6 +74,25 @@ close = _logger.close
|
|
|
69
74
|
flush = _logger.flush
|
|
70
75
|
stop = _logger.stop_logging
|
|
71
76
|
start = _logger.start_logging
|
|
77
|
+
set_prefix = _logger.set_prefix
|
|
78
|
+
clear_prefix = _logger.clear_prefix
|
|
79
|
+
|
|
80
|
+
def with_prefix(prefix):
|
|
81
|
+
class WithPrefix(object): # should keep original prefix and restore?
|
|
82
|
+
def __enter__(self):
|
|
83
|
+
_logger.set_prefix(prefix)
|
|
84
|
+
return _logger
|
|
85
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
86
|
+
_logger.clear_prefix()
|
|
87
|
+
return WithPrefix()
|
|
88
|
+
|
|
89
|
+
def silent():
|
|
90
|
+
class Silent(object):
|
|
91
|
+
def write(self, *args, **kwargs):
|
|
92
|
+
pass
|
|
93
|
+
def flush(self):
|
|
94
|
+
pass
|
|
95
|
+
return Silent()
|
|
72
96
|
|
|
73
97
|
def dependency_versions():
|
|
74
98
|
import gemmi
|
servalcat/utils/maps.py
CHANGED
|
@@ -268,9 +268,9 @@ def optimize_peak(grid, ini_pos):
|
|
|
268
268
|
logger.writeln("Finding peak using interpolation..")
|
|
269
269
|
x = grid.unit_cell.fractionalize(ini_pos)
|
|
270
270
|
logger.writeln(" x0: [{}, {}, {}]".format(*x.tolist()))
|
|
271
|
-
logger.writeln(" f0: {}".format(-grid.
|
|
271
|
+
logger.writeln(" f0: {}".format(-grid.interpolate_value(x, order=3)))
|
|
272
272
|
|
|
273
|
-
res = scipy.optimize.minimize(fun=lambda x:-grid.
|
|
273
|
+
res = scipy.optimize.minimize(fun=lambda x:-grid.interpolate_value(gemmi.Fractional(*x), order=3),
|
|
274
274
|
x0=x.tolist(),
|
|
275
275
|
jac=lambda x:-numpy.array(grid.tricubic_interpolation_der(gemmi.Fractional(*x))[1:])
|
|
276
276
|
)
|
servalcat/utils/model.py
CHANGED
|
@@ -347,8 +347,8 @@ def translate_into_box(st, origin=None, apply_shift=True):
|
|
|
347
347
|
if origin is None: origin = gemmi.Position(0,0,0)
|
|
348
348
|
|
|
349
349
|
# apply unit cell translations to put model into a box (unit cell)
|
|
350
|
-
omat =
|
|
351
|
-
fmat =
|
|
350
|
+
omat = st.cell.orth.mat.array
|
|
351
|
+
fmat = st.cell.frac.mat.array.transpose()
|
|
352
352
|
com = numpy.array((st[0].calculate_center_of_mass() - origin).tolist())
|
|
353
353
|
shift = sum([omat[:,i]*numpy.floor(1-numpy.dot(com, fmat[:,i])) for i in range(3)])
|
|
354
354
|
tr = gemmi.Transform(gemmi.Mat33(), gemmi.Vec3(*shift))
|
|
@@ -389,6 +389,19 @@ def cra_to_atomaddress(cra):
|
|
|
389
389
|
return aa
|
|
390
390
|
# cra_to_atomaddress()
|
|
391
391
|
|
|
392
|
+
def check_occupancies(st, raise_error=False):
|
|
393
|
+
bad = []
|
|
394
|
+
for cra in st[0].all():
|
|
395
|
+
if not 0 <= cra.atom.occ <= 1 + 1e-6:
|
|
396
|
+
bad.append(cra)
|
|
397
|
+
if bad:
|
|
398
|
+
logger.writeln("Bad occupancies:")
|
|
399
|
+
for cra in bad:
|
|
400
|
+
logger.writeln(f" {cra} occ= {cra.atom.occ:.4f}")
|
|
401
|
+
if raise_error:
|
|
402
|
+
raise RuntimeError("Please check your model and fix bad occupancies")
|
|
403
|
+
# check_occupancies()
|
|
404
|
+
|
|
392
405
|
def find_special_positions(st, special_pos_threshold=0.2, fix_occ=True, fix_pos=True, fix_adp=True):
|
|
393
406
|
ns = gemmi.NeighborSearch(st[0], st.cell, 3).populate()
|
|
394
407
|
cs = gemmi.ContactSearch(special_pos_threshold * 2)
|
|
@@ -430,7 +443,7 @@ def find_special_positions(st, special_pos_threshold=0.2, fix_occ=True, fix_pos=
|
|
|
430
443
|
logger.writeln(" correcting aniso= {}".format(tostr(atom.aniso.elements_pdb())))
|
|
431
444
|
logger.writeln(" aniso_viol= {}".format(tostr(diff)))
|
|
432
445
|
|
|
433
|
-
mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat for i in images]
|
|
446
|
+
mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat.array for i in images]
|
|
434
447
|
mat_total = (numpy.identity(3) + sum(numpy.array(m) for m in mats)) / n_images
|
|
435
448
|
mat_total_aniso = (numpy.identity(6) + sum(mat33_as66(m.tolist()) for m in mats)) / n_images
|
|
436
449
|
mat_total_aniso = numpy.linalg.pinv(mat_total_aniso)
|
|
@@ -617,7 +630,7 @@ def to_dataframe(st):
|
|
|
617
630
|
for cra in m.all():
|
|
618
631
|
c,r,a = cra.chain, cra.residue, cra.atom
|
|
619
632
|
# TODO need support r.het_flag, r.flag, a.calc_flag, a.flag, a.serial?
|
|
620
|
-
app("model", m.
|
|
633
|
+
app("model", m.num)
|
|
621
634
|
app("chain", c.name)
|
|
622
635
|
app("resn", r.name)
|
|
623
636
|
app("subchain", r.subchain)
|
|
@@ -652,8 +665,8 @@ def from_dataframe(df, st=None): # Slow!
|
|
|
652
665
|
for i in range(len(st)):
|
|
653
666
|
del st[0]
|
|
654
667
|
|
|
655
|
-
for
|
|
656
|
-
st.add_model(gemmi.Model(
|
|
668
|
+
for m_num, dm in df.groupby("model"):
|
|
669
|
+
st.add_model(gemmi.Model(m_num))
|
|
657
670
|
m = st[-1]
|
|
658
671
|
for c_name, dc in dm.groupby("chain"):
|
|
659
672
|
m.add_chain(gemmi.Chain(c_name))
|
|
@@ -691,7 +704,7 @@ def from_dataframe(df, st=None): # Slow!
|
|
|
691
704
|
|
|
692
705
|
def st_from_positions(positions, bs=None, qs=None):
|
|
693
706
|
st = gemmi.Structure()
|
|
694
|
-
st.add_model(gemmi.Model(
|
|
707
|
+
st.add_model(gemmi.Model(1))
|
|
695
708
|
st[0].add_chain(gemmi.Chain("A"))
|
|
696
709
|
c = st[0][0]
|
|
697
710
|
if bs is None: bs = (0. for _ in range(len(positions)))
|
|
@@ -714,7 +727,7 @@ def st_from_positions(positions, bs=None, qs=None):
|
|
|
714
727
|
|
|
715
728
|
def invert_model(st):
|
|
716
729
|
# invert x-axis
|
|
717
|
-
A =
|
|
730
|
+
A = st.cell.orth.mat.array
|
|
718
731
|
center = numpy.sum(A,axis=1) / 2
|
|
719
732
|
center = gemmi.Vec3(*center)
|
|
720
733
|
mat = gemmi.Mat33([[-1,0,0],[0,1,0],[0,0,1]])
|
|
@@ -729,14 +742,14 @@ def cx_to_mx(ss): #SmallStructure to Structure
|
|
|
729
742
|
st = gemmi.Structure()
|
|
730
743
|
st.spacegroup_hm = ss.spacegroup.xhm()
|
|
731
744
|
st.cell = ss.cell
|
|
732
|
-
st.add_model(gemmi.Model(
|
|
745
|
+
st.add_model(gemmi.Model(1))
|
|
733
746
|
st[-1].add_chain(gemmi.Chain("A"))
|
|
734
747
|
st[-1][-1].add_residue(gemmi.Residue())
|
|
735
748
|
st[-1][-1][-1].seqid.num = 1
|
|
736
749
|
st[-1][-1][-1].name = "00"
|
|
737
750
|
|
|
738
751
|
ruc = ss.cell.reciprocal()
|
|
739
|
-
cif2cart = ss.cell.
|
|
752
|
+
cif2cart = ss.cell.orth.mat.multiply_by_diagonal(gemmi.Vec3(ruc.a, ruc.b, ruc.c))
|
|
740
753
|
as_smat33f = lambda x: gemmi.SMat33f(x.u11, x.u22, x.u33, x.u12, x.u13, x.u23)
|
|
741
754
|
|
|
742
755
|
for site in ss.sites:
|