servalcat 0.4.60__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (44) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cp312-win_amd64.pyd +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +733 -0
  7. servalcat/refine/refine_geom.py +207 -0
  8. servalcat/refine/refine_spa.py +327 -0
  9. servalcat/refine/refine_xtal.py +242 -0
  10. servalcat/refine/spa.py +132 -0
  11. servalcat/refine/xtal.py +227 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +536 -0
  15. servalcat/refmac/refmac_wrapper.py +360 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +462 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +961 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1277 -0
  27. servalcat/utils/fileio.py +745 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +699 -0
  30. servalcat/utils/logger.py +116 -0
  31. servalcat/utils/maps.py +340 -0
  32. servalcat/utils/model.py +774 -0
  33. servalcat/utils/refmac.py +747 -0
  34. servalcat/utils/restraints.py +605 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +250 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1403 -0
  40. servalcat-0.4.60.dist-info/METADATA +56 -0
  41. servalcat-0.4.60.dist-info/RECORD +44 -0
  42. servalcat-0.4.60.dist-info/WHEEL +5 -0
  43. servalcat-0.4.60.dist-info/entry_points.txt +4 -0
  44. servalcat-0.4.60.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,745 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ from servalcat.utils import logger
10
+ from servalcat.utils import model
11
+ from servalcat.utils import hkl
12
+ from servalcat.utils import restraints
13
+ import os
14
+ import shutil
15
+ import glob
16
+ import re
17
+ import subprocess
18
+ import gemmi
19
+ import numpy
20
+ import numpy.lib.recfunctions
21
+ import gzip
22
+
23
+ def splitext(path):
24
+ if path.endswith((".bz2",".gz")):
25
+ return os.path.splitext(path[:path.rindex(".")])
26
+ else:
27
+ return os.path.splitext(path)
28
+ # splitext()
29
+
30
+ def rotate_file(filename, copy=False):
31
+ if not os.path.exists(filename): return
32
+
33
+ # make list [ [filename, number], ... ]
34
+ old_list = []
35
+ dot_files = glob.glob(filename + ".*")
36
+ for f in dot_files:
37
+ suffix = f.replace(filename+".", "")
38
+ try:
39
+ i = int(suffix)
40
+ if str(i) == suffix: # ignore if suffix was such as 003...
41
+ old_list.append([f, i])
42
+ except ValueError as e:
43
+ continue
44
+
45
+ old_list.sort(key=lambda x: x[1])
46
+
47
+ # rotate files
48
+ for f, i in reversed(old_list):
49
+ logger.writeln("Rotating file: {}".format(f))
50
+ os.rename(f, "%s.%d" % (f[:f.rfind(".")], i+1))
51
+
52
+ if copy:
53
+ shutil.copyfile(filename, filename + ".1")
54
+ else:
55
+ os.rename(filename, filename + ".1")
56
+
57
+ return filename + ".1"
58
+ # rotate_file()
59
+
60
+ def check_model_format(xyzin):
61
+ # TODO check format actually
62
+ # TODO mmjson is possible?
63
+ ext = splitext(xyzin)[1]
64
+ if ext.endswith("cif"):
65
+ return ".mmcif"
66
+ else:
67
+ return ".pdb"
68
+ # check_model_format()
69
+
70
+ def write_mmcif(st, cif_out, cif_ref=None):
71
+ """
72
+ Refmac fails if _entry.id is longer than 80 chars including quotations
73
+ """
74
+ st_new = st.clone()
75
+ logger.writeln("Writing mmCIF file: {}".format(cif_out))
76
+ if cif_ref:
77
+ logger.writeln(" using mmCIF metadata from: {}".format(cif_ref))
78
+ groups = gemmi.MmcifOutputGroups(False)
79
+ groups.group_pdb = True
80
+ groups.ncs = True
81
+ groups.atoms = True
82
+ groups.cell = True
83
+ groups.scale = True
84
+ groups.assembly = True
85
+ groups.entity = True
86
+ groups.entity_poly_seq = True
87
+ groups.cis = True
88
+ groups.conn = True
89
+ # FIXME is this all?
90
+ try:
91
+ doc = read_cif_safe(cif_ref)
92
+ except Exception as e:
93
+ # Sometimes refmac writes a broken mmcif file..
94
+ logger.error("Error in mmCIF reading: {}".format(e))
95
+ logger.error(" Give up using cif reference.")
96
+ return write_mmcif(st, cif_out)
97
+
98
+ blocks = list(filter(lambda b: b.find_loop("_atom_site.id"), doc))
99
+ if len(blocks) == 0:
100
+ logger.writeln("No _atom_site found in {}".format(cif_ref))
101
+ logger.writeln(" Give up using cif reference.")
102
+ return write_mmcif(st, cif_out)
103
+ block = blocks[0]
104
+ # to remove fract_transf_matrix. maybe we should keep some (like _atom_sites.solution_hydrogens)?
105
+ # we do not want this because cell may be updated
106
+ block.find_mmcif_category("_atom_sites.").erase()
107
+ st_new.update_mmcif_block(block, groups)
108
+ if "_entry.id" in st_new.info: st_new.info["_entry.id"] = st_new.info["_entry.id"][:78]
109
+ doc.write_file(cif_out, style=gemmi.cif.Style.Aligned)
110
+ else:
111
+ st_new.name = st_new.name[:78] # this will become _entry.id
112
+ if "_entry.id" in st_new.info: st_new.info["_entry.id"] = st_new.info["_entry.id"][:78]
113
+ groups = gemmi.MmcifOutputGroups(True)
114
+ doc = gemmi.cif.Document()
115
+ block = doc.add_new_block("new")
116
+ st_new.update_mmcif_block(block, groups)
117
+ doc.write_file(cif_out, style=gemmi.cif.Style.Aligned)
118
+ # write_mmcif()
119
+
120
+ def write_pdb(st, pdb_out):
121
+ logger.writeln("Writing PDB file: {}".format(pdb_out))
122
+ chain_id_lens = [len(x) for x in model.all_chain_ids(st)]
123
+ if chain_id_lens and max(chain_id_lens) > 2:
124
+ st = st.clone()
125
+ st.shorten_chain_names()
126
+ st.write_pdb(pdb_out, use_linkr=True)
127
+ # write_pdb()
128
+
129
+ def write_model(st, prefix=None, file_name=None, pdb=False, cif=False, cif_ref=None):
130
+ if file_name:
131
+ if file_name.endswith("cif"):
132
+ write_mmcif(st, file_name, cif_ref)
133
+ elif file_name.endswith(".pdb"):
134
+ write_pdb(st, file_name)
135
+ else:
136
+ raise Exception("Cannot determine file format from file name: {}".format(file_name))
137
+ else:
138
+ if cif:
139
+ write_mmcif(st, prefix+".mmcif", cif_ref)
140
+ if pdb:
141
+ write_pdb(st, prefix+".pdb")
142
+ # write_model()
143
+
144
+ def read_shifts_txt(shifts_txt):
145
+ ret = {}
146
+ s = open(shifts_txt).read()
147
+ s = s.replace("\n"," ").split()
148
+ for i in range(len(s)-3):
149
+ if s[i] in ("pdbin", "pdbout") and s[i+1] in ("cell", "shifts"):
150
+ n = 6 if s[i+1] == "cell" else 3
151
+ ret["{} {}".format(s[i], s[i+1])] = [float(x) for x in s[i+2:i+2+n]]
152
+
153
+ return ret
154
+ # read_shifts_txt()
155
+
156
+ def read_ccp4_map(filename, setup=True, default_value=0., pixel_size=None, ignore_origin=True):
157
+ m = gemmi.read_ccp4_map(filename)
158
+ g = m.grid
159
+ grid_cell = [m.header_i32(x) for x in (8,9,10)]
160
+ grid_start = [m.header_i32(x) for x in (5,6,7)]
161
+ grid_shape = [m.header_i32(x) for x in (1,2,3)]
162
+ axis_pos = m.axis_positions()
163
+ axis_letters = ["","",""]
164
+ for i, l in zip(axis_pos, "XYZ"): axis_letters[i] = l
165
+ spacings = [1./g.unit_cell.reciprocal().parameters[i]/grid_cell[i] for i in (0,1,2)]
166
+ voxel_size = [g.unit_cell.parameters[i]/grid_cell[i] for i in (0,1,2)]
167
+ origin = [m.header_float(x) for x in (50,51,52)]
168
+ label = m.header_str(57, 80)
169
+ label = label[:label.find("\0")]
170
+ logger.writeln("Reading CCP4/MRC map file {}".format(filename))
171
+ logger.writeln(" Cell Grid: {:4d} {:4d} {:4d}".format(*grid_cell))
172
+ logger.writeln(" Map mode: {}".format(m.header_i32(4)))
173
+ logger.writeln(" Start: {:4d} {:4d} {:4d}".format(*grid_start))
174
+ logger.writeln(" Shape: {:4d} {:4d} {:4d}".format(*grid_shape))
175
+ logger.writeln(" Cell: {} {} {} {} {} {}".format(*g.unit_cell.parameters))
176
+ logger.writeln(" Axis order: {}".format(" ".join(axis_letters)))
177
+ logger.writeln(" Space group: {}".format(m.header_i32(23)))
178
+ logger.writeln(" Spacing: {:.6f} {:.6f} {:.6f}".format(*spacings))
179
+ logger.writeln(" Voxel size: {:.6f} {:.6f} {:.6f}".format(*voxel_size))
180
+ logger.writeln(" Origin: {:.6e} {:.6e} {:.6e}".format(*origin))
181
+ if not numpy.all(numpy.asarray(origin) == 0.):
182
+ logger.writeln(" ! WARNING: ORIGIN header is not supported.")
183
+ if ignore_origin:
184
+ logger.writeln(" ! WARNING: removing ORIGIN values. This might cause a misalignment between map and model.")
185
+ for i in (50,51,52): m.set_header_float(i, 0.)
186
+ logger.writeln(" Label: {}".format(label))
187
+ logger.writeln("")
188
+
189
+ if setup:
190
+ if default_value is None: default_value = float("nan")
191
+ m.setup(default_value)
192
+ grid_start = [grid_start[i] for i in axis_pos]
193
+
194
+ if pixel_size is not None:
195
+ try:
196
+ len(pixel_size)
197
+ except TypeError:
198
+ pixel_size = [pixel_size, pixel_size, pixel_size]
199
+
200
+ logger.writeln("Overriding pixel size with {:.6f} {:.6f} {:.6f}".format(*pixel_size))
201
+ orgc = m.grid.unit_cell.parameters
202
+ new_abc = [orgc[i]*pixel_size[i]/voxel_size[i] for i in (0,1,2)]
203
+ m.grid.unit_cell = gemmi.UnitCell(new_abc[0], new_abc[1], new_abc[2],
204
+ orgc[3], orgc[4], orgc[5])
205
+ logger.writeln(" New cell= {:.1f} {:.1f} {:.1f} {:.1f} {:.1f} {:.1f}".format(*m.grid.unit_cell.parameters))
206
+
207
+ return [m.grid, grid_start, grid_shape]
208
+ # read_ccp4_map()
209
+
210
+ def read_halfmaps(files, pixel_size=None, fail=True):
211
+ if fail and len(files) != 2:
212
+ raise SystemExit("Error: Give exactly two files for half maps")
213
+ maps = [read_ccp4_map(f, pixel_size=pixel_size) for f in files]
214
+ if numpy.array_equal(maps[0][0].array, maps[1][0].array):
215
+ raise SystemExit("Error: Half maps have exactly the same values. Check your input.")
216
+
217
+ assert maps[0][0].shape == maps[1][0].shape
218
+ assert maps[0][0].unit_cell == maps[1][0].unit_cell
219
+ assert maps[0][1] == maps[1][1]
220
+
221
+ return maps
222
+ # read_halfmaps()
223
+
224
+ def read_mmhkl(hklin, cif_index=0): # mtz or mmcif
225
+ spext = splitext(hklin)
226
+ if spext[1].lower() == ".mtz":
227
+ logger.writeln("Reading MTZ file: {}".format(hklin))
228
+ mtz = gemmi.read_mtz_file(hklin)
229
+ elif spext[1].lower() in (".cif", ".ent"):
230
+ logger.writeln("Reading mmCIF file (hkl data): {} at index {}".format(hklin, cif_index+1))
231
+ doc = gemmi.cif.read(hklin)
232
+ blocks = gemmi.as_refln_blocks(doc)
233
+ cif2mtz = gemmi.CifToMtz()
234
+ mtz = cif2mtz.convert_block_to_mtz(blocks[cif_index])
235
+ else:
236
+ raise RuntimeError("Unsupported file type: {}".format(spext[1]))
237
+ if mtz.spacegroup is None:
238
+ raise RuntimeError("Missing space group information")
239
+ logger.writeln(" Unit cell: {:.4f} {:.4f} {:.4f} {:.3f} {:.3f} {:.3f}".format(*mtz.cell.parameters))
240
+ logger.writeln(" Space group: {}".format(mtz.spacegroup.xhm()))
241
+ logger.writeln(" Columns: {}".format(" ".join(mtz.column_labels())))
242
+ logger.writeln("")
243
+ return mtz
244
+ # read_mmhkl()
245
+
246
+ def is_mmhkl_file(hklin):
247
+ spext = splitext(hklin)
248
+ if spext[1].lower() == ".mtz":
249
+ return True
250
+ if spext[1].lower() == ".hkl": # macromolecule files should not have .hkl extension
251
+ return False
252
+ if spext[1].lower() in (".cif", ".ent"):
253
+ for b in gemmi.cif.read(hklin):
254
+ if b.find_values("_refln.index_h"):
255
+ return True
256
+ if b.find_values("_refln_index_h"):
257
+ return False
258
+ # otherwise cannot decide
259
+ # is_smhkl()
260
+
261
+ def read_map_from_mtz(mtz_in, cols, grid_size=None, sample_rate=3):
262
+ mtz = read_mmhkl(mtz_in)
263
+ d_min = mtz.resolution_high() # TODO get resolution for column?
264
+ if grid_size is None:
265
+ grid_size = mtz.get_size_for_hkl(sample_rate=sample_rate)
266
+ F = mtz.get_f_phi_on_grid(cols[0], cols[1], grid_size)
267
+ m = gemmi.transform_f_phi_grid_to_map(F)
268
+ return d_min, m
269
+ # read_map_from_mtz()
270
+
271
+ def read_asu_data_from_mtz(mtz_in, cols):
272
+ assert 0 < len(cols) < 3
273
+ mtz = read_mmhkl(mtz_in)
274
+ sg = mtz.spacegroup
275
+ miller = mtz.make_miller_array()
276
+ f = mtz.column_with_label(cols[0])
277
+ cell = mtz.get_cell(f.dataset_id)
278
+ if len(cols) == 2:
279
+ phi = mtz.column_with_label(cols[1])
280
+ assert f.type == "F"
281
+ assert phi.type == "P"
282
+ phi = numpy.deg2rad(phi)
283
+ f_comp = f * (numpy.cos(phi) + 1j * numpy.sin(phi))
284
+ asu = gemmi.ComplexAsuData(cell, sg, miller, f_comp) # ensure asu?
285
+ return asu
286
+ else:
287
+ if f.is_integer():
288
+ gr_t = gemmi.IntAsuData
289
+ else:
290
+ gr_t = gemmi.FloatAsuData
291
+
292
+ asu = gr_t(cell, sg, miller, f)
293
+ return asu
294
+ # read_asu_data_from_mtz()
295
+
296
+ def read_cif_safe(cif_in):
297
+ ifs = gzip.open(cif_in, "rt") if cif_in.endswith(".gz") else open(cif_in)
298
+ s = ifs.read()
299
+ if "\0" in s: # Refmac occasionally writes \0 in some fields..
300
+ logger.writeln(" WARNING: null character detected. Replacing with '.'")
301
+ s = s.replace("\0", ".")
302
+ doc = gemmi.cif.read_string(s)
303
+ return doc
304
+ # read_cif_safe()
305
+
306
+ def read_structure(xyz_in):
307
+ spext = splitext(xyz_in)
308
+ st = None
309
+ if spext[1].lower() in (".pdb", ".ent"):
310
+ logger.writeln("Reading PDB file: {}".format(xyz_in))
311
+ st = gemmi.read_pdb(xyz_in)
312
+ elif spext[1].lower() in (".cif", ".mmcif"):
313
+ doc = read_cif_safe(xyz_in)
314
+ for block in doc:
315
+ if block.find_loop("_atom_site.id"):
316
+ if st is None:
317
+ logger.writeln("Reading mmCIF file: {}".format(xyz_in))
318
+ st = gemmi.make_structure_from_block(block)
319
+ else:
320
+ logger.writeln(" WARNING: more than one block having structure found. Will use first one.")
321
+ break
322
+ elif block.find_loop("_atom_site_label"):
323
+ if st is None:
324
+ logger.writeln("Reading smCIF file: {}".format(xyz_in))
325
+ ss = gemmi.read_small_structure(xyz_in)
326
+ if not ss.sites:
327
+ raise RuntimeError("No atoms found in cif file.")
328
+ st = model.cx_to_mx(ss)
329
+ else:
330
+ logger.writeln(" WARNING: more than one block having structure found. Will use first one.")
331
+ break
332
+ elif (block.find_loop("_chem_comp_atom.x") or
333
+ block.find_loop("_chem_comp_atom.model_Cartn_x") or
334
+ block.find_loop("_chem_comp_atom.pdbx_model_Cartn_x_ideal")):
335
+ if st is None:
336
+ logger.writeln("Reading chemical component file: {}".format(xyz_in))
337
+ st = gemmi.make_structure_from_chemcomp_block(block)
338
+ elif spext[1].lower() in (".ins", ".res"):
339
+ logger.writeln("Reading SHELX ins/res file: {}".format(xyz_in))
340
+ st = model.cx_to_mx(read_shelx_ins(ins_in=xyz_in)[0])
341
+ st.setup_cell_images()
342
+ else:
343
+ raise RuntimeError("Unsupported file type: {}".format(spext[1]))
344
+ if st is not None:
345
+ if st.cell.is_crystal():
346
+ logger.writeln(" Unit cell: {:.4f} {:.4f} {:.4f} {:.3f} {:.3f} {:.3f}".format(*st.cell.parameters))
347
+ logger.writeln(" Space group: {}".format(st.spacegroup_hm))
348
+ if st.ncs:
349
+ n_given = sum(1 for x in st.ncs if x.given)
350
+ logger.writeln(" No. strict NCS: {} ({} already applied)".format(len(st.ncs), n_given))
351
+ logger.writeln("")
352
+ return st
353
+ # read_structure()
354
+
355
+ def read_structure_from_pdb_and_mmcif(xyz_in):
356
+ st = read_structure(xyz_in)
357
+ cif_ref = None
358
+ spext = splitext(xyz_in)
359
+ if spext[1] in (".pdb", ".ent"):
360
+ cif_in = spext[0] + ".mmcif"
361
+ if os.path.isfile(cif_in):
362
+ print(" Will use mmcif metadata from {}".format(cif_in))
363
+ cif_ref = cif_in
364
+ elif spext[1] in (".cif", ".mmcif"):
365
+ cif_ref = xyz_in
366
+ pdb_in = spext[0] + ".pdb"
367
+ if os.path.isfile(pdb_in):
368
+ print(" Reading PDB REMARKS from {}".format(pdb_in))
369
+ tmp = gemmi.read_structure(pdb_in)
370
+ st.raw_remarks = tmp.raw_remarks
371
+
372
+ if cif_ref is None and xyz_in.endswith("cif"):
373
+ cif_ref = xyz_in
374
+
375
+ return st, cif_ref
376
+ # read_structure_from_pdb_and_mmcif()
377
+
378
+ def merge_ligand_cif(cifs_in, cif_out):
379
+ docs = [gemmi.cif.read(x) for x in cifs_in]
380
+ tags = dict(comp=["_chem_comp.id"],
381
+ link=["_chem_link.id"],
382
+ mod=["_chem_mod.id"])
383
+ list_names = [k+"_list" for k in tags]
384
+
385
+ # Check duplicated block names
386
+ names = {}
387
+ for i, doc in enumerate(docs):
388
+ for j, b in enumerate(doc):
389
+ if b.name not in list_names and not b.name.startswith("mod_"):
390
+ names.setdefault(b.name, []).append((i,j))
391
+
392
+ # Keep only last one if duplicated
393
+ todel = []
394
+ for k in names:
395
+ if len(names[k]) > 1:
396
+ for i,j in reversed(names[k][:-1]):
397
+ logger.writeln("WARNING: removing duplicated {} from {}".format(k, cifs_in[i]))
398
+ todel.append((i,j))
399
+ for t in "comp", "link":
400
+ if k.startswith("{}_".format(t)):
401
+ comp_list = docs[i].find_block("{}_list".format(t))
402
+ table = comp_list.find("_chem_{}.".format(t), ["id"])
403
+ for l in reversed([l for l, row in enumerate(table) if row.str(0) == k[5:]]):
404
+ table.remove_row(l)
405
+
406
+ for i,j in sorted(todel, reverse=True):
407
+ del docs[i][j]
408
+
409
+ # Accumulate list
410
+ found = dict(comp=0, link=0, mod=0)
411
+ for d in docs:
412
+ for k in tags:
413
+ b = d.find_block("{}_list".format(k))
414
+ if not b: continue
415
+ found[k] += 1
416
+ l = b.find_loop(tags[k][0]).get_loop()
417
+ for t in l.tags:
418
+ if t not in tags[k]: tags[k].append(t)
419
+
420
+ # Check duplicated modifications
421
+ known_mods = [] # need to check monomer library?
422
+ for d in docs:
423
+ restraints.rename_cif_modification_if_necessary(d, known_mods)
424
+ mod_list = d.find_block("mod_list")
425
+ if mod_list:
426
+ for row in mod_list.find("_chem_mod.", ["id"]):
427
+ known_mods.append(row.str(0))
428
+
429
+ doc = gemmi.cif.Document()
430
+ # Add lists
431
+ for k in tags:
432
+ if not found[k]: continue
433
+ lst = doc.add_new_block("{}_list".format(k))
434
+ loop = lst.init_loop("", tags[k])
435
+ tags_for_find = [tags[k][0]] + ["?"+x for x in tags[k][1:]]
436
+
437
+ for d in docs:
438
+ b = d.find_block("{}_list".format(k))
439
+ if not b: continue
440
+ vals = b.find(tags_for_find)
441
+ for v in vals:
442
+ rl = [v.get(x) if v.has(x) else "." for x in range(len(tags[k]))]
443
+ loop.add_row(rl)
444
+
445
+ # Add other items
446
+ for d in docs:
447
+ for b in d:
448
+ if b.name not in list_names:
449
+ doc.add_copied_block(b)
450
+
451
+ doc.write_file(cif_out, style=gemmi.cif.Style.Aligned)
452
+ # merge_ligand_cif()
453
+
454
+ def read_shelx_ins(ins_in=None, lines_in=None, ignore_q_peaks=True): # TODO support gz?
455
+ assert (ins_in, lines_in).count(None) == 1
456
+ ss = gemmi.SmallStructure()
457
+
458
+ keywords = """
459
+ TITL CELL ZERR LATT SYMM SFAC NEUT DISP UNIT LAUE REM MORE END HKLF OMIT SHEL BASF TWIN TWST EXTI SWAT
460
+ ABIN ANSC ANSR MERG LIST SPEC RESI MOVE ANIS AFIX HFIX FRAG FEND EXYZ EADP EQIV CONN PART BIND FREE DFIX
461
+ DANG BUMP SAME SADI CHIV FLAT DELU SIMU RIGU PRIG DEFS ISOR XNPD NCSY SUMP L.S. CGLS BLOC DAMP STIR WGHT
462
+ FVAR WIGL BOND CONF MPLA RTAB HTAB ACTA SIZE TEMP WPDB FMAP GRID PLAN TIME HOPE MOLE
463
+ """.split()
464
+ re_kwd = re.compile("^({})_?".format("|".join(keywords)))
465
+
466
+ # remove comments/blanks and concatenate lines
467
+ lines = []
468
+ concat_flag = False
469
+ for l in open(ins_in) if ins_in else lines_in:
470
+ l = l.rstrip()
471
+ if l.startswith("REM"): continue
472
+ if l.startswith(";"): continue
473
+ if not l.strip(): continue
474
+
475
+ if l.endswith("="):
476
+ l = l[:l.rindex("=")]
477
+ if concat_flag:
478
+ lines[-1] += l
479
+ else:
480
+ lines.append(l)
481
+ concat_flag = True
482
+ elif concat_flag:
483
+ lines[-1] += l
484
+ concat_flag = False
485
+ else:
486
+ lines.append(l)
487
+
488
+ # parse lines
489
+ sfacs = []
490
+ latt, symms = 1, []
491
+ info = dict(hklf=0)
492
+ for l in lines:
493
+ sp = l.split()
494
+ ins = sp[0].upper()
495
+ if ins == "TITL":
496
+ pass
497
+ elif l.startswith(" "): # title continued? instructions after space is allowed??
498
+ pass
499
+ elif ins == "CELL":
500
+ #ss.wavelength = float(sp[1]) # next gemmi ver.
501
+ ss.cell.set(*map(float, sp[2:]))
502
+ elif ins == "LATT":
503
+ latt = int(sp[1])
504
+ elif ins == "SYMM":
505
+ trp = re.sub("0*\.50*", "1/2", "".join(sp[1:]))
506
+ trp = re.sub("0*\.250*", "1/4", trp)
507
+ trp = re.sub("0*\.750*", "3/4", trp)
508
+ trp = re.sub("0*\.33*", "1/3", trp)
509
+ trp = re.sub("0*\.6[67]*", "2/3", trp)
510
+ trp = re.sub("0*\.16[67]*", "1/6", trp) # never seen?
511
+ trp = re.sub("0*\.833*", "5/6", trp) # never seen?
512
+ symms.append(gemmi.Op(trp).wrap())
513
+ elif ins == "SFAC": # TODO check numbers?
514
+ if len(sp) < 2: continue
515
+ sfacs.append(gemmi.Element(sp[1]))
516
+ if len(sp) > 2:
517
+ try: float(sp[2])
518
+ except ValueError:
519
+ sfacs.extend([gemmi.Element(x) for x in sp[2:]])
520
+ elif ins == "HKLF":
521
+ info["hklf"] = int(sp[1])
522
+ elif not re_kwd.search(ins):
523
+ if not 4 < len(sp) < 13:
524
+ logger.writeln("cannot parse this line: {}".format(l))
525
+ continue
526
+ site = gemmi.SmallStructure.Site()
527
+ site.label = sp[0]
528
+ try:
529
+ site.element = sfacs[int(sp[1])-1]
530
+ except:
531
+ logger.error("failed to parse: {}".format(l))
532
+ continue
533
+
534
+ if site.label.startswith("Q") and ignore_q_peaks:
535
+ logger.writeln("skip Q peak: {}".format(l))
536
+ continue
537
+
538
+ site.fract.fromlist(list(map(float, sp[2:5])))
539
+ if len(sp) > 5:
540
+ q = abs(float(sp[5]))
541
+ if q > 10: q = q % 10 # FIXME proper handling
542
+ site.occ = q
543
+ if len(sp) > 11:
544
+ u = list(map(float, sp[6:12]))
545
+ site.aniso = gemmi.SMat33d(u[0], u[1], u[2], u[5], u[4], u[3])
546
+ #TODO site.u_iso needs to be set?
547
+ else:
548
+ site.u_iso = float(sp[6])
549
+
550
+ ss.add_site(site)
551
+
552
+ # Determine space group
553
+ if gemmi.Op() not in symms: # identity operator may not be present in ins file
554
+ symms.append(gemmi.Op())
555
+
556
+ lops = {1: [], # P
557
+ 2: [gemmi.Op("x+1/2,y+1/2,z+1/2")], # I
558
+ 3: [gemmi.Op("x+2/3,y+1/3,z+1/3"), # R
559
+ gemmi.Op("x+1/3,y+2/3,z+2/3")],
560
+ 4: [gemmi.Op("x,y+1/2,z+1/2"), # F
561
+ gemmi.Op("x+1/2,y,z+1/2"),
562
+ gemmi.Op("x+1/2,y+1/2,z")],
563
+ 5: [gemmi.Op("x,y+1/2,z+1/2")], # A
564
+ 6: [gemmi.Op("x+1/2,y,z+1/2")], # B
565
+ 7: [gemmi.Op("x+1/2,y+1/2,z")], # C
566
+ }
567
+ for op in lops[abs(latt)]:
568
+ symms.extend([x*op for x in symms])
569
+ if latt > 0:
570
+ symms.extend([x*gemmi.Op("-x,-y,-z") for x in symms])
571
+
572
+ symms = list(set(symms))
573
+ sg = gemmi.find_spacegroup_by_ops(gemmi.GroupOps(symms))
574
+ # in case of non-regular setting, gemmi.SpaceGroup cannot be constructed anyway.
575
+ if sg is None:
576
+ logger.error("Cannot construct space group from symbols: {}".format([x.triplet() for x in symms]))
577
+ else:
578
+ ss.spacegroup_hm = sg.xhm()
579
+
580
+ if sg is not None: # debug
581
+ sgops = set(gemmi.SpaceGroup(ss.spacegroup_hm).operations())
582
+ opdiffs = sgops.symmetric_difference(symms)
583
+ if opdiffs:
584
+ logger.writeln("ops= {}".format(" ".join([x.triplet() for x in symms])))
585
+
586
+ return ss, info
587
+ # read_shelx_ins()
588
+
589
+ def read_shelx_hkl(cell, sg, hklf, file_in=None, lines_in=None):
590
+ assert (file_in, lines_in).count(None) == 1
591
+ hkls, vals, sigs = [], [], []
592
+ for l in open(file_in) if file_in else lines_in:
593
+ if l.startswith(";"): continue
594
+ if not l.strip() or len(l) < 25: continue
595
+ try:
596
+ hkl = int(l[:4]), int(l[4:8]), int(l[8:12])
597
+ except ValueError:
598
+ logger.writeln("Error while parsing HKL part: {}".format(l))
599
+ break
600
+
601
+ if hkl == (0,0,0): break
602
+ hkls.append(hkl)
603
+ vals.append(float(l[12:20]))
604
+ sigs.append(float(l[20:28]))
605
+ # batch = l[28:32]
606
+ # wavelength = l[32:40]
607
+
608
+ ints = gemmi.Intensities()
609
+ ints.set_data(cell, sg, hkls, vals, sigs)
610
+ ints.merge_in_place(gemmi.DataType.Anomalous)
611
+ if not (ints.isign_array < 0).any(): ints.type = gemmi.DataType.Mean
612
+ logger.writeln(" Multiplicity: max= {} mean= {:.1f} min= {}".format(numpy.max(ints.nobs_array),
613
+ numpy.mean(ints.nobs_array),
614
+ numpy.min(ints.nobs_array)))
615
+ mtz = ints.prepare_merged_mtz(with_nobs=False)
616
+ if hklf == 3:
617
+ conv = {"IMEAN": ("FP", "F"),
618
+ "SIGIMEAN": ("SIGFP", "Q"),
619
+ "I(+)": ("F(+)", "G"),
620
+ "SIGI(+)": ("SIGF(+)", "L"),
621
+ "I(-)": ("F(-)", "G"),
622
+ "SIGI(-)": ("SIGF(-)", "L"),
623
+ }
624
+ for col in mtz.columns:
625
+ if col.label in conv:
626
+ col.label, col.type = conv[col.label]
627
+ return mtz
628
+ # read_shelx_hkl()
629
+
630
+ def read_smcif_hkl(cif_in, cell_if_absent=None, sg_if_absent=None):
631
+ # Very crude support for smcif - just because I do not know other varieties.
632
+ # TODO other possible data types? (amplitudes?)
633
+ # TODO check _refln_observed_status?
634
+ logger.writeln("Reading hkl data from smcif: {}".format(cif_in))
635
+ b = gemmi.cif.read(cif_in).sole_block()
636
+ try:
637
+ cell_par = [float(b.find_value("_cell_length_{}".format(x))) for x in ("a", "b", "c")]
638
+ cell_par += [float(b.find_value("_cell_angle_{}".format(x))) for x in ("alpha", "beta", "gamma")]
639
+ cell = gemmi.UnitCell(*cell_par)
640
+ logger.writeln(" Unit cell: {:.4f} {:.4f} {:.4f} {:.3f} {:.3f} {:.3f}".format(*cell.parameters))
641
+ except:
642
+ logger.writeln(" WARNING: no unit cell in this file")
643
+ cell = cell_if_absent
644
+
645
+ for optag in ("_space_group_symop_operation_xyz", "_symmetry_equiv_pos_as_xyz"):
646
+ ops = [gemmi.Op(gemmi.cif.as_string(x)) for x in b.find_loop(optag)]
647
+ sg = gemmi.find_spacegroup_by_ops(gemmi.GroupOps(ops))
648
+ if sg:
649
+ logger.writeln(" Space group: {}".format(sg.xhm()))
650
+ break
651
+ else:
652
+ sg = sg_if_absent
653
+
654
+ if cell is None or sg is None:
655
+ raise RuntimeError("Cell and/or symmetry operations not found in {}".format(cif_in))
656
+
657
+ l = b.find_values("_refln_index_h").get_loop()
658
+ i_hkl = [l.tags.index("_refln_index_{}".format(h)) for h in "hkl"]
659
+ i_int = l.tags.index("_refln_F_squared_meas")
660
+ i_sig = l.tags.index("_refln_F_squared_sigma")
661
+ hkls, vals, sigs = [], [], []
662
+ for i in range(l.length()):
663
+ hkl = [gemmi.cif.as_int(l[i, j]) for j in i_hkl]
664
+ hkls.append(hkl)
665
+ vals.append(gemmi.cif.as_number(l[i, i_int]))
666
+ sigs.append(gemmi.cif.as_number(l[i, i_sig]))
667
+
668
+ ints = gemmi.Intensities()
669
+ ints.set_data(cell, sg, hkls, vals, sigs)
670
+ ints.merge_in_place(gemmi.DataType.Anomalous)
671
+ if not (ints.isign_array < 0).any(): ints.type = gemmi.DataType.Mean
672
+ logger.writeln(" Multiplicity: max= {} mean= {:.1f} min= {}".format(numpy.max(ints.nobs_array),
673
+ numpy.mean(ints.nobs_array),
674
+ numpy.min(ints.nobs_array)))
675
+ logger.writeln("")
676
+ return ints.prepare_merged_mtz(with_nobs=False)
677
+ # read_smcif_hkl()
678
+
679
+ def read_smcif_shelx(cif_in):
680
+ logger.writeln("Reading small molecule cif: {}".format(cif_in))
681
+ b = gemmi.cif.read(cif_in).sole_block()
682
+ res_str = b.find_value("_shelx_res_file")
683
+ hkl_str = b.find_value("_shelx_hkl_file")
684
+ if not res_str: raise RuntimeError("_shelx_res_file not found in {}".format(cif_in))
685
+ if not hkl_str: raise RuntimeError("_shelx_hkl_file not found in {}".format(cif_in))
686
+
687
+ ss, info = read_shelx_ins(lines_in=res_str.splitlines())
688
+ mtz = read_shelx_hkl(ss.cell, ss.find_spacegroup(), info.get("hklf"), lines_in=hkl_str.splitlines())
689
+ return mtz, ss, info
690
+ # read_smcif_shelx()
691
+
692
+ def read_small_molecule_files(files):
693
+ st, mtz, hklf = None, None, None
694
+ # first pass - find structure
695
+ for filename in files:
696
+ ext = splitext(filename)[1]
697
+ if ext in (".cif", ".res", ".ins"):
698
+ try:
699
+ st = read_structure(filename)
700
+ except:
701
+ continue
702
+ logger.writeln("Coordinates read from: {}".format(filename))
703
+ if ext == ".cif":
704
+ b = gemmi.cif.read(filename).sole_block()
705
+ res_str = b.find_value("_shelx_res_file")
706
+ else:
707
+ res_str = open(filename).read()
708
+ if res_str:
709
+ _, info = read_shelx_ins(lines_in=res_str.splitlines())
710
+ hklf = info["hklf"]
711
+ if st is None:
712
+ logger.writeln("ERROR: coordinates not found.")
713
+ return None, None
714
+
715
+ # second pass - find hkl
716
+ for filename in files:
717
+ ext = splitext(filename)[1]
718
+ try:
719
+ b = gemmi.cif.read(filename).sole_block()
720
+ hkl_str = b.find_value("_shelx_hkl_file")
721
+ if hkl_str:
722
+ mtz = read_shelx_hkl(st.cell, st.find_spacegroup(), hklf, lines_in=hkl_str.splitlines())
723
+ logger.writeln("reflection data read from: {}".format(filename))
724
+ elif b.find_loop("_refln_index_h"):
725
+ mtz = read_smcif_hkl(filename, st.cell, st.find_spacegroup())
726
+ except ValueError: # not a cif file
727
+ if ext == ".hkl":
728
+ mtz = read_shelx_hkl(st.cell, st.find_spacegroup(), hklf, file_in=filename)
729
+ logger.writeln("reflection data read from: {}".format(filename))
730
+
731
+ return st, mtz
732
+
733
+ def read_sequence_file(f):
734
+ # TODO needs improvement
735
+ # return a list of [name, sequence]
736
+ ret = []
737
+ for l in open(f):
738
+ l = l.strip()
739
+ if l.startswith(">"):
740
+ name = l[1:].strip()
741
+ ret.append([name, ""])
742
+ elif l:
743
+ if not ret: ret.append(["", ""])
744
+ ret[-1][1] += l.replace("*", "").replace("-", "").upper()
745
+ return ret