servalcat 0.4.131__cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-314t-x86_64-linux-gnu.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +1162 -0
  7. servalcat/refine/refine_geom.py +245 -0
  8. servalcat/refine/refine_spa.py +400 -0
  9. servalcat/refine/refine_xtal.py +339 -0
  10. servalcat/refine/spa.py +151 -0
  11. servalcat/refine/xtal.py +312 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +191 -0
  14. servalcat/refmac/refmac_keywords.py +660 -0
  15. servalcat/refmac/refmac_wrapper.py +423 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +488 -0
  18. servalcat/spa/fsc.py +391 -0
  19. servalcat/spa/localcc.py +197 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +979 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1629 -0
  27. servalcat/utils/fileio.py +836 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +811 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +933 -0
  33. servalcat/utils/refmac.py +759 -0
  34. servalcat/utils/restraints.py +888 -0
  35. servalcat/utils/symmetry.py +298 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +262 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1954 -0
  40. servalcat/xtal/twin.py +316 -0
  41. servalcat-0.4.131.dist-info/METADATA +60 -0
  42. servalcat-0.4.131.dist-info/RECORD +45 -0
  43. servalcat-0.4.131.dist-info/WHEEL +6 -0
  44. servalcat-0.4.131.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.131.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,836 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ from servalcat.utils import logger
10
+ from servalcat.utils import model
11
+ from servalcat.utils import hkl
12
+ from servalcat.utils import restraints
13
+ import os
14
+ import shutil
15
+ import glob
16
+ import re
17
+ import subprocess
18
+ import gemmi
19
+ import numpy
20
+ import gzip
21
+ import traceback
22
+
23
+ def splitext(path):
24
+ if path.endswith((".bz2",".gz")):
25
+ return os.path.splitext(path[:path.rindex(".")])
26
+ else:
27
+ return os.path.splitext(path)
28
+ # splitext()
29
+
30
+ def rotate_file(filename, copy=False):
31
+ if not os.path.exists(filename): return
32
+
33
+ # make list [ [filename, number], ... ]
34
+ old_list = []
35
+ dot_files = glob.glob(filename + ".*")
36
+ for f in dot_files:
37
+ suffix = f.replace(filename+".", "")
38
+ try:
39
+ i = int(suffix)
40
+ if str(i) == suffix: # ignore if suffix was such as 003...
41
+ old_list.append([f, i])
42
+ except ValueError as e:
43
+ continue
44
+
45
+ old_list.sort(key=lambda x: x[1])
46
+
47
+ # rotate files
48
+ for f, i in reversed(old_list):
49
+ logger.writeln("Rotating file: {}".format(f))
50
+ os.rename(f, "%s.%d" % (f[:f.rfind(".")], i+1))
51
+
52
+ if copy:
53
+ shutil.copyfile(filename, filename + ".1")
54
+ else:
55
+ os.rename(filename, filename + ".1")
56
+
57
+ return filename + ".1"
58
+ # rotate_file()
59
+
60
+ def check_model_format(xyzin):
61
+ # TODO check format actually
62
+ # TODO mmjson is possible?
63
+ ext = splitext(xyzin)[1]
64
+ if ext.endswith("cif"):
65
+ return ".mmcif"
66
+ else:
67
+ return ".pdb"
68
+ # check_model_format()
69
+
70
+ def write_mmcif(st, cif_out, cif_ref=None, cif_ref_doc=None):
71
+ """
72
+ Refmac fails if _entry.id is longer than 80 chars including quotations
73
+ """
74
+ st_new = st.clone()
75
+ logger.writeln("Writing mmCIF file: {}".format(cif_out))
76
+ if cif_ref or cif_ref_doc:
77
+ if cif_ref:
78
+ logger.writeln(" using mmCIF metadata from: {}".format(cif_ref))
79
+ groups = gemmi.MmcifOutputGroups(False)
80
+ groups.group_pdb = True
81
+ groups.ncs = True
82
+ groups.atoms = True
83
+ groups.cell = True
84
+ groups.scale = True
85
+ groups.assembly = True
86
+ groups.entity = True
87
+ groups.entity_poly = True
88
+ groups.entity_poly_seq = True
89
+ groups.cis = True
90
+ groups.conn = True
91
+ groups.software = True
92
+ groups.auth_all = True
93
+ # FIXME is this all?
94
+ if cif_ref:
95
+ try:
96
+ cif_ref_doc = read_cif_safe(cif_ref)
97
+ except Exception as e:
98
+ # Sometimes refmac writes a broken mmcif file..
99
+ logger.error("Error in mmCIF reading: {}".format(e))
100
+ logger.error(" Give up using cif reference.")
101
+ return write_mmcif(st, cif_out)
102
+
103
+ blocks = list(filter(lambda b: b.find_loop("_atom_site.id"), cif_ref_doc))
104
+ if len(blocks) == 0:
105
+ logger.writeln("No _atom_site found in reference")
106
+ logger.writeln(" Give up using cif reference.")
107
+ return write_mmcif(st, cif_out)
108
+ block = blocks[0]
109
+ # to remove fract_transf_matrix. maybe we should keep some (like _atom_sites.solution_hydrogens)?
110
+ # we do not want this because cell may be updated
111
+ block.find_mmcif_category("_atom_sites.").erase()
112
+ st_new.update_mmcif_block(block, groups)
113
+ if "_entry.id" in st_new.info: st_new.info["_entry.id"] = st_new.info["_entry.id"][:78]
114
+ cif_ref_doc.write_file(cif_out, options=gemmi.cif.Style.Aligned)
115
+ else:
116
+ st_new.name = st_new.name[:78] # this will become _entry.id
117
+ if "_entry.id" in st_new.info: st_new.info["_entry.id"] = st_new.info["_entry.id"][:78]
118
+ groups = gemmi.MmcifOutputGroups(True, auth_all=True)
119
+ doc = gemmi.cif.Document()
120
+ block = doc.add_new_block("new")
121
+ st_new.update_mmcif_block(block, groups)
122
+ doc.write_file(cif_out, options=gemmi.cif.Style.Aligned)
123
+ # write_mmcif()
124
+
125
+ def write_pdb(st, pdb_out):
126
+ logger.writeln("Writing PDB file: {}".format(pdb_out))
127
+ st = st.clone()
128
+ chain_id_lens = [len(x) for x in model.all_chain_ids(st)]
129
+ if chain_id_lens and max(chain_id_lens) > 2:
130
+ st.shorten_chain_names()
131
+ st.shorten_ccd_codes()
132
+ if st.shortened_ccd_codes:
133
+ msg = " ".join("{}->{}".format(o,n) for o,n in st.shortened_ccd_codes)
134
+ logger.writeln(" Using shortened residue names in the output pdb file: " + msg)
135
+ st.write_pdb(pdb_out, use_linkr=True)
136
+ # write_pdb()
137
+
138
+ def write_model(st, prefix=None, file_name=None, pdb=False, cif=False, cif_ref=None, hout=True):
139
+ if not hout and st[0].has_hydrogen():
140
+ st = st.clone()
141
+ st.remove_hydrogens()
142
+ if file_name:
143
+ if file_name.endswith("cif"):
144
+ write_mmcif(st, file_name, cif_ref)
145
+ elif file_name.endswith(".pdb"):
146
+ write_pdb(st, file_name)
147
+ else:
148
+ raise Exception("Cannot determine file format from file name: {}".format(file_name))
149
+ else:
150
+ if cif:
151
+ write_mmcif(st, prefix+".mmcif", cif_ref)
152
+ if pdb:
153
+ write_pdb(st, prefix+".pdb")
154
+ # write_model()
155
+
156
+ def read_shifts_txt(shifts_txt):
157
+ ret = {}
158
+ with open(shifts_txt) as f:
159
+ s = f.read()
160
+ s = s.replace("\n"," ").split()
161
+ for i in range(len(s)-3):
162
+ if s[i] in ("pdbin", "pdbout") and s[i+1] in ("cell", "shifts"):
163
+ n = 6 if s[i+1] == "cell" else 3
164
+ ret["{} {}".format(s[i], s[i+1])] = [float(x) for x in s[i+2:i+2+n]]
165
+
166
+ return ret
167
+ # read_shifts_txt()
168
+
169
+ def read_ccp4_map(filename, header_only=False, setup=True, default_value=0., pixel_size=None, ignore_origin=True):
170
+ if header_only:
171
+ m = gemmi.read_ccp4_header(filename)
172
+ else:
173
+ m = gemmi.read_ccp4_map(filename)
174
+ grid_cell = [m.header_i32(x) for x in (8,9,10)]
175
+ grid_start = [m.header_i32(x) for x in (5,6,7)]
176
+ grid_shape = [m.header_i32(x) for x in (1,2,3)]
177
+ axis_pos = m.axis_positions()
178
+ axis_letters = ["","",""]
179
+ for i, l in zip(axis_pos, "XYZ"): axis_letters[i] = l
180
+ cell = gemmi.UnitCell(*(m.header_float(x) for x in range(11,17)))
181
+ spacings = [1./cell.reciprocal().parameters[i]/grid_cell[i] for i in (0,1,2)]
182
+ voxel_size = [cell.parameters[i]/grid_cell[i] for i in (0,1,2)]
183
+ origin = [m.header_float(x) for x in (50,51,52)]
184
+ label = m.header_str(57, 80)
185
+ label = label[:label.find("\0")]
186
+ logger.writeln("Reading CCP4/MRC map file {}".format(filename))
187
+ logger.writeln(" Cell Grid: {:4d} {:4d} {:4d}".format(*grid_cell))
188
+ logger.writeln(" Map mode: {}".format(m.header_i32(4)))
189
+ logger.writeln(" Start: {:4d} {:4d} {:4d}".format(*grid_start))
190
+ logger.writeln(" Shape: {:4d} {:4d} {:4d}".format(*grid_shape))
191
+ logger.writeln(" Cell: {} {} {} {} {} {}".format(*cell.parameters))
192
+ logger.writeln(" Axis order: {}".format(" ".join(axis_letters)))
193
+ logger.writeln(" Space group: {}".format(m.header_i32(23)))
194
+ logger.writeln(" Spacing: {:.6f} {:.6f} {:.6f}".format(*spacings))
195
+ logger.writeln(" Voxel size: {:.6f} {:.6f} {:.6f}".format(*voxel_size))
196
+ logger.writeln(" Origin: {:.6e} {:.6e} {:.6e}".format(*origin))
197
+ if not numpy.all(numpy.asarray(origin) == 0.):
198
+ logger.writeln(" ! WARNING: ORIGIN header is not supported.")
199
+ if ignore_origin:
200
+ logger.writeln(" ! WARNING: removing ORIGIN values. This might cause a misalignment between map and model.")
201
+ for i in (50,51,52): m.set_header_float(i, 0.)
202
+ logger.writeln(" Label: {}".format(label))
203
+ logger.writeln("")
204
+
205
+ if header_only:
206
+ grid = gemmi.FloatGrid(*grid_cell if setup else grid_shape) # waste of memory, but unavoidable for now
207
+ grid.set_unit_cell(cell)
208
+ grid.spacegroup = gemmi.find_spacegroup_by_number(m.header_i32(23))
209
+ else:
210
+ grid = m.grid
211
+
212
+ if setup:
213
+ if not header_only:
214
+ if default_value is None: default_value = float("nan")
215
+ m.setup(default_value)
216
+ grid_start = [grid_start[i] for i in axis_pos]
217
+
218
+ if pixel_size is not None:
219
+ try:
220
+ len(pixel_size)
221
+ except TypeError:
222
+ pixel_size = [pixel_size, pixel_size, pixel_size]
223
+
224
+ logger.writeln("Overriding pixel size with {:.6f} {:.6f} {:.6f}".format(*pixel_size))
225
+ orgc = grid.unit_cell.parameters
226
+ new_abc = [orgc[i]*pixel_size[i]/voxel_size[i] for i in (0,1,2)]
227
+ new_cell = gemmi.UnitCell(new_abc[0], new_abc[1], new_abc[2], orgc[3], orgc[4], orgc[5])
228
+ grid.set_unit_cell(new_cell)
229
+ logger.writeln(" New cell= {:.1f} {:.1f} {:.1f} {:.1f} {:.1f} {:.1f}".format(*grid.unit_cell.parameters))
230
+
231
+ return [grid, grid_start, grid_shape]
232
+ # read_ccp4_map()
233
+
234
+ def read_halfmaps(files, pixel_size=None, fail=True):
235
+ if fail and len(files) != 2:
236
+ raise SystemExit("Error: Give exactly two files for half maps")
237
+ maps = [read_ccp4_map(f, pixel_size=pixel_size) for f in files]
238
+ if numpy.array_equal(maps[0][0].array, maps[1][0].array):
239
+ raise SystemExit("Error: Half maps have exactly the same values. Check your input.")
240
+
241
+ assert maps[0][0].shape == maps[1][0].shape
242
+ assert maps[0][0].unit_cell == maps[1][0].unit_cell
243
+ assert maps[0][1] == maps[1][1]
244
+
245
+ return maps
246
+ # read_halfmaps()
247
+
248
+ def read_mmhkl(hklin, cif_index=0): # mtz or mmcif
249
+ spext = splitext(hklin)
250
+ if spext[1].lower() == ".mtz":
251
+ logger.writeln("Reading MTZ file: {}".format(hklin))
252
+ mtz = gemmi.read_mtz_file(hklin)
253
+ elif spext[1].lower() in (".cif", ".ent"):
254
+ logger.writeln("Reading mmCIF file (hkl data): {} at index {}".format(hklin, cif_index+1))
255
+ doc = gemmi.cif.read(hklin)
256
+ blocks = gemmi.as_refln_blocks(doc)
257
+ cif2mtz = gemmi.CifToMtz()
258
+ mtz = cif2mtz.convert_block_to_mtz(blocks[cif_index])
259
+ else:
260
+ raise RuntimeError("Unsupported file type: {}".format(spext[1]))
261
+ if mtz.spacegroup is None:
262
+ raise RuntimeError("Missing space group information")
263
+ logger.writeln(" Unit cell: {:.4f} {:.4f} {:.4f} {:.3f} {:.3f} {:.3f}".format(*mtz.cell.parameters))
264
+ logger.writeln(" Space group: {}".format(mtz.spacegroup.xhm()))
265
+ logger.writeln(" Columns: {}".format(" ".join(mtz.column_labels())))
266
+ logger.writeln("")
267
+ return mtz
268
+ # read_mmhkl()
269
+
270
+ def is_mmhkl_file(hklin):
271
+ spext = splitext(hklin)
272
+ if spext[1].lower() == ".mtz":
273
+ return True
274
+ if spext[1].lower() == ".hkl": # macromolecule files should not have .hkl extension
275
+ return False
276
+ if spext[1].lower() in (".cif", ".ent"):
277
+ for b in gemmi.cif.read(hklin):
278
+ if b.find_values("_refln.index_h"):
279
+ return True
280
+ if b.find_values("_refln_index_h"):
281
+ return False
282
+ # otherwise cannot decide
283
+ # is_smhkl()
284
+
285
+ def software_items_from_mtz(hklin):
286
+ try:
287
+ if type(hklin) is gemmi.Mtz:
288
+ mtz = hklin
289
+ elif splitext(hklin)[1].lower() != ".mtz":
290
+ return []
291
+ else:
292
+ mtz = gemmi.read_mtz_file(hklin, with_data=False)
293
+ return gemmi.get_software_from_mtz_history(mtz.history)
294
+ except:
295
+ logger.writeln(f"Failed to read software info from {hklin}")
296
+ logger.writeln(traceback.format_exc())
297
+ return []
298
+ # software_items_from_mtz()
299
+
300
+ def read_map_from_mtz(mtz_in, cols, grid_size=None, sample_rate=3):
301
+ mtz = read_mmhkl(mtz_in)
302
+ d_min = mtz.resolution_high() # TODO get resolution for column?
303
+ if grid_size is None:
304
+ grid_size = mtz.get_size_for_hkl(sample_rate=sample_rate)
305
+ F = mtz.get_f_phi_on_grid(cols[0], cols[1], grid_size)
306
+ m = gemmi.transform_f_phi_grid_to_map(F)
307
+ return d_min, m
308
+ # read_map_from_mtz()
309
+
310
+ def read_asu_data_from_mtz(mtz_in, cols):
311
+ assert 0 < len(cols) < 3
312
+ mtz = read_mmhkl(mtz_in)
313
+ sg = mtz.spacegroup
314
+ miller = mtz.make_miller_array()
315
+ f = mtz.column_with_label(cols[0])
316
+ cell = mtz.get_cell(f.dataset_id)
317
+ if len(cols) == 2:
318
+ phi = mtz.column_with_label(cols[1])
319
+ assert f.type == "F"
320
+ assert phi.type == "P"
321
+ phi = numpy.deg2rad(phi)
322
+ f_comp = f * (numpy.cos(phi) + 1j * numpy.sin(phi))
323
+ asu = gemmi.ComplexAsuData(cell, sg, miller, f_comp) # ensure asu?
324
+ return asu
325
+ else:
326
+ if f.is_integer():
327
+ gr_t = gemmi.IntAsuData
328
+ else:
329
+ gr_t = gemmi.FloatAsuData
330
+
331
+ asu = gr_t(cell, sg, miller, f)
332
+ return asu
333
+ # read_asu_data_from_mtz()
334
+
335
+ def read_cif_safe(cif_in):
336
+ with gzip.open(cif_in, "rt") if cif_in.endswith(".gz") else open(cif_in) as ifs:
337
+ s = ifs.read()
338
+ if "\0" in s: # Refmac occasionally writes \0 in some fields..
339
+ logger.writeln(" WARNING: null character detected. Replacing with '.'")
340
+ s = s.replace("\0", ".")
341
+ doc = gemmi.cif.read_string(s)
342
+ return doc
343
+ # read_cif_safe()
344
+
345
+ def read_structure(xyz_in, assign_het_flags=True, merge_chain_parts=True, ignore_ter=True):
346
+ spext = splitext(xyz_in)
347
+ st = None
348
+ if spext[1].lower() in (".pdb", ".ent"):
349
+ logger.writeln("Reading PDB file: {}".format(xyz_in))
350
+ st = gemmi.read_pdb(xyz_in, ignore_ter=ignore_ter)
351
+ elif spext[1].lower() in (".cif", ".mmcif"):
352
+ doc = read_cif_safe(xyz_in)
353
+ for block in doc:
354
+ if block.find_loop("_atom_site.id"):
355
+ if st is None:
356
+ logger.writeln("Reading mmCIF file: {}".format(xyz_in))
357
+ st = gemmi.make_structure_from_block(block)
358
+ else:
359
+ logger.writeln(" WARNING: more than one block having structure found. Will use first one.")
360
+ break
361
+ elif block.find_loop("_atom_site_label"):
362
+ if st is None:
363
+ logger.writeln("Reading smCIF file: {}".format(xyz_in))
364
+ ss = gemmi.read_small_structure(xyz_in)
365
+ if not ss.sites:
366
+ raise RuntimeError("No atoms found in cif file.")
367
+ st = model.cx_to_mx(ss)
368
+ else:
369
+ logger.writeln(" WARNING: more than one block having structure found. Will use first one.")
370
+ break
371
+ elif (block.find_loop("_chem_comp_atom.x") or
372
+ block.find_loop("_chem_comp_atom.model_Cartn_x") or
373
+ block.find_loop("_chem_comp_atom.pdbx_model_Cartn_x_ideal")):
374
+ if st is None:
375
+ logger.writeln("Reading chemical component file: {}".format(xyz_in))
376
+ st = gemmi.make_structure_from_chemcomp_block(block)
377
+ for i in range(len(st)-1):
378
+ del st[1]
379
+ elif spext[1].lower() in (".ins", ".res"):
380
+ logger.writeln("Reading SHELX ins/res file: {}".format(xyz_in))
381
+ st = model.cx_to_mx(read_shelx_ins(ins_in=xyz_in)[0])
382
+ st.setup_cell_images()
383
+ else:
384
+ raise RuntimeError("Unsupported file type: {}".format(spext[1]))
385
+ if st is not None:
386
+ if st.cell.is_crystal():
387
+ logger.writeln(" Unit cell: {:.4f} {:.4f} {:.4f} {:.3f} {:.3f} {:.3f}".format(*st.cell.parameters))
388
+ logger.writeln(" Space group: {}".format(st.spacegroup_hm))
389
+ if st.ncs:
390
+ n_given = sum(1 for x in st.ncs if x.given)
391
+ logger.writeln(" No. strict NCS: {} ({} already applied)".format(len(st.ncs), n_given))
392
+ logger.writeln("")
393
+ if assign_het_flags:
394
+ st.assign_het_flags()
395
+ if merge_chain_parts:
396
+ st.merge_chain_parts()
397
+ return st
398
+ # read_structure()
399
+
400
+ def read_structure_from_pdb_and_mmcif(xyz_in):
401
+ st = read_structure(xyz_in)
402
+ cif_ref = None
403
+ spext = splitext(xyz_in)
404
+ if spext[1] in (".pdb", ".ent"):
405
+ cif_in = spext[0] + ".mmcif"
406
+ if os.path.isfile(cif_in):
407
+ print(" Will use mmcif metadata from {}".format(cif_in))
408
+ cif_ref = cif_in
409
+ elif spext[1] in (".cif", ".mmcif"):
410
+ cif_ref = xyz_in
411
+ pdb_in = spext[0] + ".pdb"
412
+ if os.path.isfile(pdb_in):
413
+ print(" Reading PDB REMARKS from {}".format(pdb_in))
414
+ tmp = gemmi.read_structure(pdb_in)
415
+ st.raw_remarks = tmp.raw_remarks
416
+
417
+ if cif_ref is None and xyz_in.endswith("cif"):
418
+ cif_ref = xyz_in
419
+
420
+ return st, cif_ref
421
+ # read_structure_from_pdb_and_mmcif()
422
+
423
+ def merge_ligand_cif(cifs_in, cif_out):
424
+ docs = [gemmi.cif.read(x) for x in cifs_in]
425
+ tags = dict(comp=["_chem_comp.id"],
426
+ link=["_chem_link.id"],
427
+ mod=["_chem_mod.id"])
428
+ list_names = [k+"_list" for k in tags]
429
+
430
+ # Check duplicated block names
431
+ names = {}
432
+ for i, doc in enumerate(docs):
433
+ for j, b in enumerate(doc):
434
+ if b.name not in list_names and not b.name.startswith("mod_"):
435
+ names.setdefault(b.name, []).append((i,j))
436
+
437
+ # Keep only last one if duplicated
438
+ todel = []
439
+ for k in names:
440
+ if len(names[k]) > 1:
441
+ for i,j in reversed(names[k][:-1]):
442
+ logger.writeln("WARNING: removing duplicated {} from {}".format(k, cifs_in[i]))
443
+ todel.append((i,j))
444
+ for t in "comp", "link":
445
+ if k.startswith("{}_".format(t)):
446
+ comp_list = docs[i].find_block("{}_list".format(t))
447
+ table = comp_list.find("_chem_{}.".format(t), ["id"])
448
+ for l in reversed([l for l, row in enumerate(table) if row.str(0) == k[5:]]):
449
+ table.remove_row(l)
450
+
451
+ for i,j in sorted(todel, reverse=True):
452
+ del docs[i][j]
453
+
454
+ # Accumulate list
455
+ found = dict(comp=0, link=0, mod=0)
456
+ for d in docs:
457
+ for k in tags:
458
+ b = d.find_block("{}_list".format(k))
459
+ if not b: continue
460
+ found[k] += 1
461
+ l = b.find_loop(tags[k][0]).get_loop()
462
+ for t in l.tags:
463
+ if t not in tags[k]: tags[k].append(t)
464
+
465
+ # Check duplicated modifications
466
+ known_mods = [] # need to check monomer library?
467
+ for d in docs:
468
+ restraints.rename_cif_modification_if_necessary(d, known_mods)
469
+ mod_list = d.find_block("mod_list")
470
+ if mod_list:
471
+ for row in mod_list.find("_chem_mod.", ["id"]):
472
+ known_mods.append(row.str(0))
473
+
474
+ doc = gemmi.cif.Document()
475
+ # Add lists
476
+ for k in tags:
477
+ if not found[k]: continue
478
+ lst = doc.add_new_block("{}_list".format(k))
479
+ loop = lst.init_loop("", tags[k])
480
+ tags_for_find = [tags[k][0]] + ["?"+x for x in tags[k][1:]]
481
+
482
+ for d in docs:
483
+ b = d.find_block("{}_list".format(k))
484
+ if not b: continue
485
+ vals = b.find(tags_for_find)
486
+ for v in vals:
487
+ rl = [v.get(x) if v.has(x) else "." for x in range(len(tags[k]))]
488
+ loop.add_row(rl)
489
+
490
+ # Add other items
491
+ for d in docs:
492
+ for b in d:
493
+ if b.name not in list_names:
494
+ doc.add_copied_block(b)
495
+
496
+ doc.write_file(cif_out, options=gemmi.cif.Style.Aligned)
497
+ # merge_ligand_cif()
498
+
499
+ def read_shelx_ins(ins_in=None, lines_in=None, ignore_q_peaks=True): # TODO support gz?
500
+ assert (ins_in, lines_in).count(None) == 1
501
+ ss = gemmi.SmallStructure()
502
+
503
+ keywords = """
504
+ TITL CELL ZERR LATT SYMM SFAC NEUT DISP UNIT LAUE REM MORE END HKLF OMIT SHEL BASF TWIN TWST EXTI SWAT
505
+ ABIN ANSC ANSR MERG LIST SPEC RESI MOVE ANIS AFIX HFIX FRAG FEND EXYZ EADP EQIV CONN PART BIND FREE DFIX
506
+ DANG BUMP SAME SADI CHIV FLAT DELU SIMU RIGU PRIG DEFS ISOR XNPD NCSY SUMP L.S. CGLS BLOC DAMP STIR WGHT
507
+ FVAR WIGL BOND CONF MPLA RTAB HTAB ACTA SIZE TEMP WPDB FMAP GRID PLAN TIME HOPE MOLE
508
+ """.split()
509
+ re_kwd = re.compile("^({})_?".format("|".join(keywords)))
510
+
511
+ # remove comments/blanks and concatenate lines
512
+ lines = []
513
+ concat_flag = False
514
+ if ins_in:
515
+ with open(ins_in) as f:
516
+ lines_in = f.readlines()
517
+ for l in lines_in:
518
+ l = l.rstrip()
519
+ if l.startswith("REM"): continue
520
+ if l.startswith(";"): continue
521
+ if not l.strip(): continue
522
+
523
+ if l.endswith("="):
524
+ l = l[:l.rindex("=")]
525
+ if concat_flag:
526
+ lines[-1] += l
527
+ else:
528
+ lines.append(l)
529
+ concat_flag = True
530
+ elif concat_flag:
531
+ lines[-1] += l
532
+ concat_flag = False
533
+ else:
534
+ lines.append(l)
535
+
536
+ # parse lines
537
+ sfacs = []
538
+ latt, symms = 1, []
539
+ fvar = []
540
+ prev_free_u_iso = -1
541
+ info = dict(hklf=0)
542
+ cif2cart = None
543
+ for l in lines:
544
+ sp = l.split()
545
+ ins = sp[0].upper()
546
+ if ins == "TITL":
547
+ pass
548
+ elif l.startswith(" "): # title continued? instructions after space is allowed??
549
+ pass
550
+ elif ins == "CELL":
551
+ #ss.wavelength = float(sp[1]) # next gemmi ver.
552
+ ss.cell.set(*map(float, sp[2:]))
553
+ cif2cart = model.cif2cart_matrix(ss.cell)
554
+ elif ins == "LATT":
555
+ latt = int(sp[1])
556
+ elif ins == "SYMM":
557
+ symms.append(gemmi.Op("".join(sp[1:])).wrap())
558
+ elif ins == "SFAC": # TODO check numbers?
559
+ if len(sp) < 2: continue
560
+ sfacs.append(gemmi.Element(sp[1]))
561
+ if len(sp) > 2:
562
+ try: float(sp[2])
563
+ except ValueError:
564
+ sfacs.extend([gemmi.Element(x) for x in sp[2:]])
565
+ elif ins == "HKLF":
566
+ info["hklf"] = int(sp[1])
567
+ elif ins == "FVAR":
568
+ fvar.extend(map(float, sp[1:]))
569
+ elif not re_kwd.search(ins):
570
+ if not 4 < len(sp) < 13:
571
+ logger.writeln("cannot parse this line: {}".format(l))
572
+ continue
573
+ site = gemmi.SmallStructure.Site()
574
+ site.label = sp[0]
575
+ try:
576
+ site.element = sfacs[int(sp[1])-1]
577
+ except:
578
+ logger.error("failed to parse: {}".format(l))
579
+ continue
580
+
581
+ if site.label.startswith("Q") and ignore_q_peaks:
582
+ logger.writeln("skip Q peak: {}".format(l))
583
+ continue
584
+
585
+ site.fract.fromlist(list(map(float, sp[2:5])))
586
+ if len(sp) > 5:
587
+ q_code = float(sp[5])
588
+ # decompose q_code = 10 * m + p, where -5 < p <= 5 and m is an integer.
589
+ m, p = divmod(q_code, 10.0)
590
+ m = int(m)
591
+ if p > 5.0:
592
+ p -= 10.0
593
+ m += 1
594
+ if abs(m) > 1: # reference to an FVAR
595
+ if abs(m) > len(fvar):
596
+ logger.error("this line references an undefined FVAR: {}".format(l))
597
+ if m < 0:
598
+ # Here the SHELXL manual contradicts itself.
599
+ # It says -20.25 is m = -2, p = -0.25 but interprets it as 0.25 * (1 - fv2).
600
+ occ = (1 - fvar[-m - 1]) * -p
601
+ else:
602
+ occ = fvar[m - 1] * p
603
+ else:
604
+ occ = p
605
+
606
+ site.occ = occ
607
+
608
+ if len(sp) > 11:
609
+ u = list(map(float, sp[6:12]))
610
+ site.aniso = gemmi.SMat33d(u[0], u[1], u[2], u[5], u[4], u[3])
611
+ if cif2cart is None:
612
+ logger.writeln("WARNING: cannot calculate u_eq")
613
+ site.u_iso = sum(u[:3]) / 3.
614
+ else:
615
+ site.u_iso = site.aniso.transformed_by(cif2cart).trace() / 3
616
+
617
+ prev_free_u_iso = site.u_iso
618
+ logger.writeln(f"updated prev_free_u_iso to {site.u_iso} at {site.label}")
619
+ else:
620
+ u_iso_code = float(sp[6])
621
+ if -5 < u_iso_code and u_iso_code < -0.5:
622
+ if prev_free_u_iso > 0:
623
+ site.u_iso = -u_iso_code * prev_free_u_iso
624
+ # print(f"{prev_free_u_iso} * {-u_iso_code} = {site.u_iso} at {site.label}")
625
+ else:
626
+ logger.writeln(f"WARNING: parent atom not found for {site.label}")
627
+ elif u_iso_code > 0:
628
+ site.u_iso = u_iso_code
629
+ prev_free_u_iso = site.u_iso
630
+ # print(f"updated prev_free_u_iso at {site.label}")
631
+ else:
632
+ logger.writeln(f"WARNING: negative Ueq outside the (-0.5, -5) range for {site.label}")
633
+
634
+ ss.add_site(site)
635
+
636
+ # Determine space group
637
+ if gemmi.Op() not in symms: # identity operator may not be present in ins file
638
+ symms.append(gemmi.Op())
639
+
640
+ lops = {1: [], # P
641
+ 2: [gemmi.Op("x+1/2,y+1/2,z+1/2")], # I
642
+ 3: [gemmi.Op("x+2/3,y+1/3,z+1/3"), # R
643
+ gemmi.Op("x+1/3,y+2/3,z+2/3")],
644
+ 4: [gemmi.Op("x,y+1/2,z+1/2"), # F
645
+ gemmi.Op("x+1/2,y,z+1/2"),
646
+ gemmi.Op("x+1/2,y+1/2,z")],
647
+ 5: [gemmi.Op("x,y+1/2,z+1/2")], # A
648
+ 6: [gemmi.Op("x+1/2,y,z+1/2")], # B
649
+ 7: [gemmi.Op("x+1/2,y+1/2,z")], # C
650
+ }
651
+ for op in lops[abs(latt)]:
652
+ symms.extend([x*op for x in symms])
653
+ if latt > 0:
654
+ symms.extend([x*gemmi.Op("-x,-y,-z") for x in symms])
655
+
656
+ ss.symops = [op.triplet() for op in set(symms)]
657
+ ss.determine_and_set_spacegroup("s")
658
+ # in case of non-regular setting, gemmi.SpaceGroup cannot be constructed anyway.
659
+ if ss.spacegroup is None:
660
+ raise RuntimeError("Cannot construct space group from symbols: {}".format(ss.symops))
661
+ return ss, info
662
+ # read_shelx_ins()
663
+
664
+ def read_shelx_hkl(cell, sg, hklf, file_in=None, lines_in=None):
665
+ assert (file_in, lines_in).count(None) == 1
666
+ hkls, vals, sigs = [], [], []
667
+ if file_in:
668
+ with open(file_in) as f:
669
+ lines_in = f.readlines()
670
+ for l in lines_in:
671
+ if l.startswith(";"): continue
672
+ if not l.strip() or len(l) < 25: continue
673
+ try:
674
+ hkl = int(l[:4]), int(l[4:8]), int(l[8:12])
675
+ except ValueError:
676
+ logger.writeln("Error while parsing HKL part: {}".format(l))
677
+ break
678
+
679
+ if hkl == (0,0,0): break
680
+ hkls.append(hkl)
681
+ vals.append(float(l[12:20]))
682
+ sigs.append(float(l[20:28]))
683
+ # batch = l[28:32]
684
+ # wavelength = l[32:40]
685
+
686
+ ints = gemmi.Intensities()
687
+ ints.set_data(cell, sg, numpy.asarray(hkls), numpy.asarray(vals), numpy.asarray(sigs))
688
+ ints.merge_in_place(gemmi.DataType.Anomalous)
689
+ if not (ints.isign_array < 0).any(): ints.type = gemmi.DataType.Mean
690
+ logger.writeln(" Multiplicity: max= {} mean= {:.1f} min= {}".format(numpy.max(ints.nobs_array),
691
+ numpy.mean(ints.nobs_array),
692
+ numpy.min(ints.nobs_array)))
693
+ mtz = ints.prepare_merged_mtz(with_nobs=False)
694
+ if hklf == 3:
695
+ conv = {"IMEAN": ("FP", "F"),
696
+ "SIGIMEAN": ("SIGFP", "Q"),
697
+ "I(+)": ("F(+)", "G"),
698
+ "SIGI(+)": ("SIGF(+)", "L"),
699
+ "I(-)": ("F(-)", "G"),
700
+ "SIGI(-)": ("SIGF(-)", "L"),
701
+ }
702
+ for col in mtz.columns:
703
+ if col.label in conv:
704
+ col.label, col.type = conv[col.label]
705
+ return mtz
706
+ # read_shelx_hkl()
707
+
708
+ def read_smcif_hkl(cif_in, cell_if_absent=None, sg_if_absent=None):
709
+ # Very crude support for smcif - just because I do not know other varieties.
710
+ # TODO other possible data types? (amplitudes?)
711
+ # TODO check _refln_observed_status?
712
+ logger.writeln("Reading hkl data from smcif: {}".format(cif_in))
713
+ b = gemmi.cif.read(cif_in).sole_block()
714
+ try:
715
+ cell_par = [float(b.find_value("_cell_length_{}".format(x))) for x in ("a", "b", "c")]
716
+ cell_par += [float(b.find_value("_cell_angle_{}".format(x))) for x in ("alpha", "beta", "gamma")]
717
+ cell = gemmi.UnitCell(*cell_par)
718
+ logger.writeln(" Unit cell: {:.4f} {:.4f} {:.4f} {:.3f} {:.3f} {:.3f}".format(*cell.parameters))
719
+ except:
720
+ logger.writeln(" WARNING: no unit cell in this file")
721
+ cell = cell_if_absent
722
+
723
+ for optag in ("_space_group_symop_operation_xyz", "_symmetry_equiv_pos_as_xyz"):
724
+ ops = [gemmi.Op(gemmi.cif.as_string(x)) for x in b.find_loop(optag)]
725
+ sg = gemmi.find_spacegroup_by_ops(gemmi.GroupOps(ops))
726
+ if sg:
727
+ logger.writeln(" Space group: {}".format(sg.xhm()))
728
+ break
729
+ else:
730
+ sg = sg_if_absent
731
+
732
+ if cell is None or sg is None:
733
+ raise RuntimeError("Cell and/or symmetry operations not found in {}".format(cif_in))
734
+
735
+ l = b.find_values("_refln_index_h").get_loop()
736
+ if l:
737
+ i_hkl = [l.tags.index("_refln_index_{}".format(h)) for h in "hkl"]
738
+ i_int = l.tags.index("_refln_F_squared_meas")
739
+ i_sig = l.tags.index("_refln_F_squared_sigma")
740
+ else:
741
+ l = b.find_values("_diffrn_refln_index_h").get_loop()
742
+ i_hkl = [l.tags.index("_diffrn_refln_index_{}".format(h)) for h in "hkl"]
743
+ i_int = l.tags.index("_diffrn_refln_intensity_net") # this may not always exist?
744
+ i_sig = l.tags.index("_diffrn_refln_intensity_u")
745
+ hkls, vals, sigs = [], [], []
746
+ for i in range(l.length()):
747
+ hkl = [gemmi.cif.as_int(l[i, j]) for j in i_hkl]
748
+ hkls.append(hkl)
749
+ vals.append(gemmi.cif.as_number(l[i, i_int]))
750
+ sigs.append(gemmi.cif.as_number(l[i, i_sig]))
751
+
752
+ ints = gemmi.Intensities()
753
+ ints.set_data(cell, sg, numpy.asarray(hkls), numpy.asarray(vals), numpy.asarray(sigs))
754
+ ints.merge_in_place(gemmi.DataType.Anomalous)
755
+ if not (ints.isign_array < 0).any(): ints.type = gemmi.DataType.Mean
756
+ logger.writeln(" Multiplicity: max= {} mean= {:.1f} min= {}".format(numpy.max(ints.nobs_array),
757
+ numpy.mean(ints.nobs_array),
758
+ numpy.min(ints.nobs_array)))
759
+ logger.writeln("")
760
+ return ints.prepare_merged_mtz(with_nobs=False)
761
+ # read_smcif_hkl()
762
+
763
+ def read_smcif_shelx(cif_in):
764
+ logger.writeln("Reading small molecule cif: {}".format(cif_in))
765
+ b = gemmi.cif.read(cif_in).sole_block()
766
+ res_str = b.find_value("_shelx_res_file")
767
+ hkl_str = b.find_value("_shelx_hkl_file")
768
+ if not res_str: raise RuntimeError("_shelx_res_file not found in {}".format(cif_in))
769
+ if not hkl_str: raise RuntimeError("_shelx_hkl_file not found in {}".format(cif_in))
770
+
771
+ ss, info = read_shelx_ins(lines_in=res_str.splitlines())
772
+ mtz = read_shelx_hkl(ss.cell, ss.spacegroup, info.get("hklf"), lines_in=hkl_str.splitlines())
773
+ return mtz, ss, info
774
+ # read_smcif_shelx()
775
+
776
+ def read_small_molecule_files(files):
777
+ st, mtz, hklf = None, None, None
778
+ # first pass - find structure
779
+ for filename in files:
780
+ ext = splitext(filename)[1]
781
+ if ext in (".cif", ".res", ".ins", ".pdb", ".ent", ".mmcif"):
782
+ try:
783
+ st = read_structure(filename)
784
+ except:
785
+ continue
786
+ logger.writeln("Coordinates read from: {}".format(filename))
787
+ if ext in (".cif", ".res", ".ins"):
788
+ if ext == ".cif":
789
+ b = gemmi.cif.read(filename).sole_block()
790
+ res_str = b.find_value("_shelx_res_file")
791
+ else:
792
+ with open(filename) as f:
793
+ res_str = f.read()
794
+ if res_str:
795
+ _, info = read_shelx_ins(lines_in=res_str.splitlines())
796
+ hklf = info["hklf"]
797
+ if st is None:
798
+ logger.writeln("ERROR: coordinates not found.")
799
+ return None, None
800
+
801
+ # second pass - find hkl
802
+ for filename in files:
803
+ ext = splitext(filename)[1]
804
+ try:
805
+ b = gemmi.cif.read(filename).sole_block()
806
+ hkl_str = b.find_value("_shelx_hkl_file")
807
+ if hkl_str:
808
+ mtz = read_shelx_hkl(st.cell, st.find_spacegroup(), hklf, lines_in=hkl_str.splitlines())
809
+ logger.writeln("reflection data read from: {}".format(filename))
810
+ elif b.find_loop("_refln_index_h") or b.find_loop("_diffrn_refln_index_h"):
811
+ mtz = read_smcif_hkl(filename, st.cell, st.find_spacegroup())
812
+ except ValueError: # not a cif file
813
+ if ext == ".hkl":
814
+ mtz = read_shelx_hkl(st.cell, st.find_spacegroup(), hklf, file_in=filename)
815
+ logger.writeln("reflection data read from: {}".format(filename))
816
+
817
+ return st, mtz
818
+
819
+ def read_sequence_file(f):
820
+ # TODO needs improvement
821
+ # return a list of [name, sequence]
822
+ ret = []
823
+ with open(f) as ifs:
824
+ for i, l in enumerate(ifs):
825
+ l = l.strip()
826
+ if l.startswith(">"):
827
+ name = l[1:].strip()
828
+ ret.append([name, ""])
829
+ elif l:
830
+ if not ret: ret.append(["", ""])
831
+ tmp = l.replace("*", "").replace("-", "").upper()
832
+ r = re.search("[^A-Z]", tmp)
833
+ if r:
834
+ raise RuntimeError(f"Invalid character in the sequence file: {f}:{i+1}")
835
+ ret[-1][1] += tmp
836
+ return ret