servalcat 0.4.88__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cp313-win_amd64.pyd +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +823 -0
  7. servalcat/refine/refine_geom.py +220 -0
  8. servalcat/refine/refine_spa.py +345 -0
  9. servalcat/refine/refine_xtal.py +268 -0
  10. servalcat/refine/spa.py +136 -0
  11. servalcat/refine/xtal.py +273 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +639 -0
  15. servalcat/refmac/refmac_wrapper.py +403 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +473 -0
  18. servalcat/spa/fsc.py +387 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +972 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1397 -0
  27. servalcat/utils/fileio.py +737 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +712 -0
  30. servalcat/utils/logger.py +116 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +782 -0
  33. servalcat/utils/refmac.py +760 -0
  34. servalcat/utils/restraints.py +782 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +256 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1622 -0
  40. servalcat/xtal/twin.py +115 -0
  41. servalcat-0.4.88.dist-info/METADATA +55 -0
  42. servalcat-0.4.88.dist-info/RECORD +45 -0
  43. servalcat-0.4.88.dist-info/WHEEL +5 -0
  44. servalcat-0.4.88.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.88.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,737 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ from servalcat.utils import logger
10
+ from servalcat.utils import model
11
+ from servalcat.utils import hkl
12
+ from servalcat.utils import restraints
13
+ import os
14
+ import shutil
15
+ import glob
16
+ import re
17
+ import subprocess
18
+ import gemmi
19
+ import numpy
20
+ import numpy.lib.recfunctions
21
+ import gzip
22
+
23
+ def splitext(path):
24
+ if path.endswith((".bz2",".gz")):
25
+ return os.path.splitext(path[:path.rindex(".")])
26
+ else:
27
+ return os.path.splitext(path)
28
+ # splitext()
29
+
30
+ def rotate_file(filename, copy=False):
31
+ if not os.path.exists(filename): return
32
+
33
+ # make list [ [filename, number], ... ]
34
+ old_list = []
35
+ dot_files = glob.glob(filename + ".*")
36
+ for f in dot_files:
37
+ suffix = f.replace(filename+".", "")
38
+ try:
39
+ i = int(suffix)
40
+ if str(i) == suffix: # ignore if suffix was such as 003...
41
+ old_list.append([f, i])
42
+ except ValueError as e:
43
+ continue
44
+
45
+ old_list.sort(key=lambda x: x[1])
46
+
47
+ # rotate files
48
+ for f, i in reversed(old_list):
49
+ logger.writeln("Rotating file: {}".format(f))
50
+ os.rename(f, "%s.%d" % (f[:f.rfind(".")], i+1))
51
+
52
+ if copy:
53
+ shutil.copyfile(filename, filename + ".1")
54
+ else:
55
+ os.rename(filename, filename + ".1")
56
+
57
+ return filename + ".1"
58
+ # rotate_file()
59
+
60
+ def check_model_format(xyzin):
61
+ # TODO check format actually
62
+ # TODO mmjson is possible?
63
+ ext = splitext(xyzin)[1]
64
+ if ext.endswith("cif"):
65
+ return ".mmcif"
66
+ else:
67
+ return ".pdb"
68
+ # check_model_format()
69
+
70
+ def write_mmcif(st, cif_out, cif_ref=None):
71
+ """
72
+ Refmac fails if _entry.id is longer than 80 chars including quotations
73
+ """
74
+ st_new = st.clone()
75
+ logger.writeln("Writing mmCIF file: {}".format(cif_out))
76
+ if cif_ref:
77
+ logger.writeln(" using mmCIF metadata from: {}".format(cif_ref))
78
+ groups = gemmi.MmcifOutputGroups(False)
79
+ groups.group_pdb = True
80
+ groups.ncs = True
81
+ groups.atoms = True
82
+ groups.cell = True
83
+ groups.scale = True
84
+ groups.assembly = True
85
+ groups.entity = True
86
+ groups.entity_poly_seq = True
87
+ groups.cis = True
88
+ groups.conn = True
89
+ groups.software = True
90
+ # FIXME is this all?
91
+ try:
92
+ doc = read_cif_safe(cif_ref)
93
+ except Exception as e:
94
+ # Sometimes refmac writes a broken mmcif file..
95
+ logger.error("Error in mmCIF reading: {}".format(e))
96
+ logger.error(" Give up using cif reference.")
97
+ return write_mmcif(st, cif_out)
98
+
99
+ blocks = list(filter(lambda b: b.find_loop("_atom_site.id"), doc))
100
+ if len(blocks) == 0:
101
+ logger.writeln("No _atom_site found in {}".format(cif_ref))
102
+ logger.writeln(" Give up using cif reference.")
103
+ return write_mmcif(st, cif_out)
104
+ block = blocks[0]
105
+ # to remove fract_transf_matrix. maybe we should keep some (like _atom_sites.solution_hydrogens)?
106
+ # we do not want this because cell may be updated
107
+ block.find_mmcif_category("_atom_sites.").erase()
108
+ st_new.update_mmcif_block(block, groups)
109
+ if "_entry.id" in st_new.info: st_new.info["_entry.id"] = st_new.info["_entry.id"][:78]
110
+ doc.write_file(cif_out, style=gemmi.cif.Style.Aligned)
111
+ else:
112
+ st_new.name = st_new.name[:78] # this will become _entry.id
113
+ if "_entry.id" in st_new.info: st_new.info["_entry.id"] = st_new.info["_entry.id"][:78]
114
+ groups = gemmi.MmcifOutputGroups(True)
115
+ doc = gemmi.cif.Document()
116
+ block = doc.add_new_block("new")
117
+ st_new.update_mmcif_block(block, groups)
118
+ doc.write_file(cif_out, style=gemmi.cif.Style.Aligned)
119
+ # write_mmcif()
120
+
121
+ def write_pdb(st, pdb_out):
122
+ logger.writeln("Writing PDB file: {}".format(pdb_out))
123
+ chain_id_lens = [len(x) for x in model.all_chain_ids(st)]
124
+ if chain_id_lens and max(chain_id_lens) > 2:
125
+ st = st.clone()
126
+ st.shorten_chain_names()
127
+ st.write_pdb(pdb_out, use_linkr=True)
128
+ # write_pdb()
129
+
130
+ def write_model(st, prefix=None, file_name=None, pdb=False, cif=False, cif_ref=None, hout=True):
131
+ if not hout and st[0].has_hydrogen():
132
+ st = st.clone()
133
+ st.remove_hydrogens()
134
+ if file_name:
135
+ if file_name.endswith("cif"):
136
+ write_mmcif(st, file_name, cif_ref)
137
+ elif file_name.endswith(".pdb"):
138
+ write_pdb(st, file_name)
139
+ else:
140
+ raise Exception("Cannot determine file format from file name: {}".format(file_name))
141
+ else:
142
+ if cif:
143
+ write_mmcif(st, prefix+".mmcif", cif_ref)
144
+ if pdb:
145
+ write_pdb(st, prefix+".pdb")
146
+ # write_model()
147
+
148
+ def read_shifts_txt(shifts_txt):
149
+ ret = {}
150
+ s = open(shifts_txt).read()
151
+ s = s.replace("\n"," ").split()
152
+ for i in range(len(s)-3):
153
+ if s[i] in ("pdbin", "pdbout") and s[i+1] in ("cell", "shifts"):
154
+ n = 6 if s[i+1] == "cell" else 3
155
+ ret["{} {}".format(s[i], s[i+1])] = [float(x) for x in s[i+2:i+2+n]]
156
+
157
+ return ret
158
+ # read_shifts_txt()
159
+
160
+ def read_ccp4_map(filename, setup=True, default_value=0., pixel_size=None, ignore_origin=True):
161
+ m = gemmi.read_ccp4_map(filename)
162
+ g = m.grid
163
+ grid_cell = [m.header_i32(x) for x in (8,9,10)]
164
+ grid_start = [m.header_i32(x) for x in (5,6,7)]
165
+ grid_shape = [m.header_i32(x) for x in (1,2,3)]
166
+ axis_pos = m.axis_positions()
167
+ axis_letters = ["","",""]
168
+ for i, l in zip(axis_pos, "XYZ"): axis_letters[i] = l
169
+ spacings = [1./g.unit_cell.reciprocal().parameters[i]/grid_cell[i] for i in (0,1,2)]
170
+ voxel_size = [g.unit_cell.parameters[i]/grid_cell[i] for i in (0,1,2)]
171
+ origin = [m.header_float(x) for x in (50,51,52)]
172
+ label = m.header_str(57, 80)
173
+ label = label[:label.find("\0")]
174
+ logger.writeln("Reading CCP4/MRC map file {}".format(filename))
175
+ logger.writeln(" Cell Grid: {:4d} {:4d} {:4d}".format(*grid_cell))
176
+ logger.writeln(" Map mode: {}".format(m.header_i32(4)))
177
+ logger.writeln(" Start: {:4d} {:4d} {:4d}".format(*grid_start))
178
+ logger.writeln(" Shape: {:4d} {:4d} {:4d}".format(*grid_shape))
179
+ logger.writeln(" Cell: {} {} {} {} {} {}".format(*g.unit_cell.parameters))
180
+ logger.writeln(" Axis order: {}".format(" ".join(axis_letters)))
181
+ logger.writeln(" Space group: {}".format(m.header_i32(23)))
182
+ logger.writeln(" Spacing: {:.6f} {:.6f} {:.6f}".format(*spacings))
183
+ logger.writeln(" Voxel size: {:.6f} {:.6f} {:.6f}".format(*voxel_size))
184
+ logger.writeln(" Origin: {:.6e} {:.6e} {:.6e}".format(*origin))
185
+ if not numpy.all(numpy.asarray(origin) == 0.):
186
+ logger.writeln(" ! WARNING: ORIGIN header is not supported.")
187
+ if ignore_origin:
188
+ logger.writeln(" ! WARNING: removing ORIGIN values. This might cause a misalignment between map and model.")
189
+ for i in (50,51,52): m.set_header_float(i, 0.)
190
+ logger.writeln(" Label: {}".format(label))
191
+ logger.writeln("")
192
+
193
+ if setup:
194
+ if default_value is None: default_value = float("nan")
195
+ m.setup(default_value)
196
+ grid_start = [grid_start[i] for i in axis_pos]
197
+
198
+ if pixel_size is not None:
199
+ try:
200
+ len(pixel_size)
201
+ except TypeError:
202
+ pixel_size = [pixel_size, pixel_size, pixel_size]
203
+
204
+ logger.writeln("Overriding pixel size with {:.6f} {:.6f} {:.6f}".format(*pixel_size))
205
+ orgc = m.grid.unit_cell.parameters
206
+ new_abc = [orgc[i]*pixel_size[i]/voxel_size[i] for i in (0,1,2)]
207
+ m.grid.unit_cell = gemmi.UnitCell(new_abc[0], new_abc[1], new_abc[2],
208
+ orgc[3], orgc[4], orgc[5])
209
+ logger.writeln(" New cell= {:.1f} {:.1f} {:.1f} {:.1f} {:.1f} {:.1f}".format(*m.grid.unit_cell.parameters))
210
+
211
+ return [m.grid, grid_start, grid_shape]
212
+ # read_ccp4_map()
213
+
214
+ def read_halfmaps(files, pixel_size=None, fail=True):
215
+ if fail and len(files) != 2:
216
+ raise SystemExit("Error: Give exactly two files for half maps")
217
+ maps = [read_ccp4_map(f, pixel_size=pixel_size) for f in files]
218
+ if numpy.array_equal(maps[0][0].array, maps[1][0].array):
219
+ raise SystemExit("Error: Half maps have exactly the same values. Check your input.")
220
+
221
+ assert maps[0][0].shape == maps[1][0].shape
222
+ assert maps[0][0].unit_cell == maps[1][0].unit_cell
223
+ assert maps[0][1] == maps[1][1]
224
+
225
+ return maps
226
+ # read_halfmaps()
227
+
228
+ def read_mmhkl(hklin, cif_index=0): # mtz or mmcif
229
+ spext = splitext(hklin)
230
+ if spext[1].lower() == ".mtz":
231
+ logger.writeln("Reading MTZ file: {}".format(hklin))
232
+ mtz = gemmi.read_mtz_file(hklin)
233
+ elif spext[1].lower() in (".cif", ".ent"):
234
+ logger.writeln("Reading mmCIF file (hkl data): {} at index {}".format(hklin, cif_index+1))
235
+ doc = gemmi.cif.read(hklin)
236
+ blocks = gemmi.as_refln_blocks(doc)
237
+ cif2mtz = gemmi.CifToMtz()
238
+ mtz = cif2mtz.convert_block_to_mtz(blocks[cif_index])
239
+ else:
240
+ raise RuntimeError("Unsupported file type: {}".format(spext[1]))
241
+ if mtz.spacegroup is None:
242
+ raise RuntimeError("Missing space group information")
243
+ logger.writeln(" Unit cell: {:.4f} {:.4f} {:.4f} {:.3f} {:.3f} {:.3f}".format(*mtz.cell.parameters))
244
+ logger.writeln(" Space group: {}".format(mtz.spacegroup.xhm()))
245
+ logger.writeln(" Columns: {}".format(" ".join(mtz.column_labels())))
246
+ logger.writeln("")
247
+ return mtz
248
+ # read_mmhkl()
249
+
250
+ def is_mmhkl_file(hklin):
251
+ spext = splitext(hklin)
252
+ if spext[1].lower() == ".mtz":
253
+ return True
254
+ if spext[1].lower() == ".hkl": # macromolecule files should not have .hkl extension
255
+ return False
256
+ if spext[1].lower() in (".cif", ".ent"):
257
+ for b in gemmi.cif.read(hklin):
258
+ if b.find_values("_refln.index_h"):
259
+ return True
260
+ if b.find_values("_refln_index_h"):
261
+ return False
262
+ # otherwise cannot decide
263
+ # is_smhkl()
264
+
265
+ def read_map_from_mtz(mtz_in, cols, grid_size=None, sample_rate=3):
266
+ mtz = read_mmhkl(mtz_in)
267
+ d_min = mtz.resolution_high() # TODO get resolution for column?
268
+ if grid_size is None:
269
+ grid_size = mtz.get_size_for_hkl(sample_rate=sample_rate)
270
+ F = mtz.get_f_phi_on_grid(cols[0], cols[1], grid_size)
271
+ m = gemmi.transform_f_phi_grid_to_map(F)
272
+ return d_min, m
273
+ # read_map_from_mtz()
274
+
275
+ def read_asu_data_from_mtz(mtz_in, cols):
276
+ assert 0 < len(cols) < 3
277
+ mtz = read_mmhkl(mtz_in)
278
+ sg = mtz.spacegroup
279
+ miller = mtz.make_miller_array()
280
+ f = mtz.column_with_label(cols[0])
281
+ cell = mtz.get_cell(f.dataset_id)
282
+ if len(cols) == 2:
283
+ phi = mtz.column_with_label(cols[1])
284
+ assert f.type == "F"
285
+ assert phi.type == "P"
286
+ phi = numpy.deg2rad(phi)
287
+ f_comp = f * (numpy.cos(phi) + 1j * numpy.sin(phi))
288
+ asu = gemmi.ComplexAsuData(cell, sg, miller, f_comp) # ensure asu?
289
+ return asu
290
+ else:
291
+ if f.is_integer():
292
+ gr_t = gemmi.IntAsuData
293
+ else:
294
+ gr_t = gemmi.FloatAsuData
295
+
296
+ asu = gr_t(cell, sg, miller, f)
297
+ return asu
298
+ # read_asu_data_from_mtz()
299
+
300
+ def read_cif_safe(cif_in):
301
+ ifs = gzip.open(cif_in, "rt") if cif_in.endswith(".gz") else open(cif_in)
302
+ s = ifs.read()
303
+ if "\0" in s: # Refmac occasionally writes \0 in some fields..
304
+ logger.writeln(" WARNING: null character detected. Replacing with '.'")
305
+ s = s.replace("\0", ".")
306
+ doc = gemmi.cif.read_string(s)
307
+ return doc
308
+ # read_cif_safe()
309
+
310
+ def read_structure(xyz_in, assign_het_flags=True, merge_chain_parts=True):
311
+ spext = splitext(xyz_in)
312
+ st = None
313
+ if spext[1].lower() in (".pdb", ".ent"):
314
+ logger.writeln("Reading PDB file: {}".format(xyz_in))
315
+ st = gemmi.read_pdb(xyz_in)
316
+ elif spext[1].lower() in (".cif", ".mmcif"):
317
+ doc = read_cif_safe(xyz_in)
318
+ for block in doc:
319
+ if block.find_loop("_atom_site.id"):
320
+ if st is None:
321
+ logger.writeln("Reading mmCIF file: {}".format(xyz_in))
322
+ st = gemmi.make_structure_from_block(block)
323
+ else:
324
+ logger.writeln(" WARNING: more than one block having structure found. Will use first one.")
325
+ break
326
+ elif block.find_loop("_atom_site_label"):
327
+ if st is None:
328
+ logger.writeln("Reading smCIF file: {}".format(xyz_in))
329
+ ss = gemmi.read_small_structure(xyz_in)
330
+ if not ss.sites:
331
+ raise RuntimeError("No atoms found in cif file.")
332
+ st = model.cx_to_mx(ss)
333
+ else:
334
+ logger.writeln(" WARNING: more than one block having structure found. Will use first one.")
335
+ break
336
+ elif (block.find_loop("_chem_comp_atom.x") or
337
+ block.find_loop("_chem_comp_atom.model_Cartn_x") or
338
+ block.find_loop("_chem_comp_atom.pdbx_model_Cartn_x_ideal")):
339
+ if st is None:
340
+ logger.writeln("Reading chemical component file: {}".format(xyz_in))
341
+ st = gemmi.make_structure_from_chemcomp_block(block)
342
+ elif spext[1].lower() in (".ins", ".res"):
343
+ logger.writeln("Reading SHELX ins/res file: {}".format(xyz_in))
344
+ st = model.cx_to_mx(read_shelx_ins(ins_in=xyz_in)[0])
345
+ st.setup_cell_images()
346
+ else:
347
+ raise RuntimeError("Unsupported file type: {}".format(spext[1]))
348
+ if st is not None:
349
+ if st.cell.is_crystal():
350
+ logger.writeln(" Unit cell: {:.4f} {:.4f} {:.4f} {:.3f} {:.3f} {:.3f}".format(*st.cell.parameters))
351
+ logger.writeln(" Space group: {}".format(st.spacegroup_hm))
352
+ if st.ncs:
353
+ n_given = sum(1 for x in st.ncs if x.given)
354
+ logger.writeln(" No. strict NCS: {} ({} already applied)".format(len(st.ncs), n_given))
355
+ logger.writeln("")
356
+ if assign_het_flags:
357
+ st.assign_het_flags()
358
+ if merge_chain_parts:
359
+ st.merge_chain_parts()
360
+ return st
361
+ # read_structure()
362
+
363
+ def read_structure_from_pdb_and_mmcif(xyz_in):
364
+ st = read_structure(xyz_in)
365
+ cif_ref = None
366
+ spext = splitext(xyz_in)
367
+ if spext[1] in (".pdb", ".ent"):
368
+ cif_in = spext[0] + ".mmcif"
369
+ if os.path.isfile(cif_in):
370
+ print(" Will use mmcif metadata from {}".format(cif_in))
371
+ cif_ref = cif_in
372
+ elif spext[1] in (".cif", ".mmcif"):
373
+ cif_ref = xyz_in
374
+ pdb_in = spext[0] + ".pdb"
375
+ if os.path.isfile(pdb_in):
376
+ print(" Reading PDB REMARKS from {}".format(pdb_in))
377
+ tmp = gemmi.read_structure(pdb_in)
378
+ st.raw_remarks = tmp.raw_remarks
379
+
380
+ if cif_ref is None and xyz_in.endswith("cif"):
381
+ cif_ref = xyz_in
382
+
383
+ return st, cif_ref
384
+ # read_structure_from_pdb_and_mmcif()
385
+
386
+ def merge_ligand_cif(cifs_in, cif_out):
387
+ docs = [gemmi.cif.read(x) for x in cifs_in]
388
+ tags = dict(comp=["_chem_comp.id"],
389
+ link=["_chem_link.id"],
390
+ mod=["_chem_mod.id"])
391
+ list_names = [k+"_list" for k in tags]
392
+
393
+ # Check duplicated block names
394
+ names = {}
395
+ for i, doc in enumerate(docs):
396
+ for j, b in enumerate(doc):
397
+ if b.name not in list_names and not b.name.startswith("mod_"):
398
+ names.setdefault(b.name, []).append((i,j))
399
+
400
+ # Keep only last one if duplicated
401
+ todel = []
402
+ for k in names:
403
+ if len(names[k]) > 1:
404
+ for i,j in reversed(names[k][:-1]):
405
+ logger.writeln("WARNING: removing duplicated {} from {}".format(k, cifs_in[i]))
406
+ todel.append((i,j))
407
+ for t in "comp", "link":
408
+ if k.startswith("{}_".format(t)):
409
+ comp_list = docs[i].find_block("{}_list".format(t))
410
+ table = comp_list.find("_chem_{}.".format(t), ["id"])
411
+ for l in reversed([l for l, row in enumerate(table) if row.str(0) == k[5:]]):
412
+ table.remove_row(l)
413
+
414
+ for i,j in sorted(todel, reverse=True):
415
+ del docs[i][j]
416
+
417
+ # Accumulate list
418
+ found = dict(comp=0, link=0, mod=0)
419
+ for d in docs:
420
+ for k in tags:
421
+ b = d.find_block("{}_list".format(k))
422
+ if not b: continue
423
+ found[k] += 1
424
+ l = b.find_loop(tags[k][0]).get_loop()
425
+ for t in l.tags:
426
+ if t not in tags[k]: tags[k].append(t)
427
+
428
+ # Check duplicated modifications
429
+ known_mods = [] # need to check monomer library?
430
+ for d in docs:
431
+ restraints.rename_cif_modification_if_necessary(d, known_mods)
432
+ mod_list = d.find_block("mod_list")
433
+ if mod_list:
434
+ for row in mod_list.find("_chem_mod.", ["id"]):
435
+ known_mods.append(row.str(0))
436
+
437
+ doc = gemmi.cif.Document()
438
+ # Add lists
439
+ for k in tags:
440
+ if not found[k]: continue
441
+ lst = doc.add_new_block("{}_list".format(k))
442
+ loop = lst.init_loop("", tags[k])
443
+ tags_for_find = [tags[k][0]] + ["?"+x for x in tags[k][1:]]
444
+
445
+ for d in docs:
446
+ b = d.find_block("{}_list".format(k))
447
+ if not b: continue
448
+ vals = b.find(tags_for_find)
449
+ for v in vals:
450
+ rl = [v.get(x) if v.has(x) else "." for x in range(len(tags[k]))]
451
+ loop.add_row(rl)
452
+
453
+ # Add other items
454
+ for d in docs:
455
+ for b in d:
456
+ if b.name not in list_names:
457
+ doc.add_copied_block(b)
458
+
459
+ doc.write_file(cif_out, style=gemmi.cif.Style.Aligned)
460
+ # merge_ligand_cif()
461
+
462
+ def read_shelx_ins(ins_in=None, lines_in=None, ignore_q_peaks=True): # TODO support gz?
463
+ assert (ins_in, lines_in).count(None) == 1
464
+ ss = gemmi.SmallStructure()
465
+
466
+ keywords = """
467
+ TITL CELL ZERR LATT SYMM SFAC NEUT DISP UNIT LAUE REM MORE END HKLF OMIT SHEL BASF TWIN TWST EXTI SWAT
468
+ ABIN ANSC ANSR MERG LIST SPEC RESI MOVE ANIS AFIX HFIX FRAG FEND EXYZ EADP EQIV CONN PART BIND FREE DFIX
469
+ DANG BUMP SAME SADI CHIV FLAT DELU SIMU RIGU PRIG DEFS ISOR XNPD NCSY SUMP L.S. CGLS BLOC DAMP STIR WGHT
470
+ FVAR WIGL BOND CONF MPLA RTAB HTAB ACTA SIZE TEMP WPDB FMAP GRID PLAN TIME HOPE MOLE
471
+ """.split()
472
+ re_kwd = re.compile("^({})_?".format("|".join(keywords)))
473
+
474
+ # remove comments/blanks and concatenate lines
475
+ lines = []
476
+ concat_flag = False
477
+ for l in open(ins_in) if ins_in else lines_in:
478
+ l = l.rstrip()
479
+ if l.startswith("REM"): continue
480
+ if l.startswith(";"): continue
481
+ if not l.strip(): continue
482
+
483
+ if l.endswith("="):
484
+ l = l[:l.rindex("=")]
485
+ if concat_flag:
486
+ lines[-1] += l
487
+ else:
488
+ lines.append(l)
489
+ concat_flag = True
490
+ elif concat_flag:
491
+ lines[-1] += l
492
+ concat_flag = False
493
+ else:
494
+ lines.append(l)
495
+
496
+ # parse lines
497
+ sfacs = []
498
+ latt, symms = 1, []
499
+ info = dict(hklf=0)
500
+ for l in lines:
501
+ sp = l.split()
502
+ ins = sp[0].upper()
503
+ if ins == "TITL":
504
+ pass
505
+ elif l.startswith(" "): # title continued? instructions after space is allowed??
506
+ pass
507
+ elif ins == "CELL":
508
+ #ss.wavelength = float(sp[1]) # next gemmi ver.
509
+ ss.cell.set(*map(float, sp[2:]))
510
+ elif ins == "LATT":
511
+ latt = int(sp[1])
512
+ elif ins == "SYMM":
513
+ symms.append(gemmi.Op("".join(sp[1:])).wrap())
514
+ elif ins == "SFAC": # TODO check numbers?
515
+ if len(sp) < 2: continue
516
+ sfacs.append(gemmi.Element(sp[1]))
517
+ if len(sp) > 2:
518
+ try: float(sp[2])
519
+ except ValueError:
520
+ sfacs.extend([gemmi.Element(x) for x in sp[2:]])
521
+ elif ins == "HKLF":
522
+ info["hklf"] = int(sp[1])
523
+ elif not re_kwd.search(ins):
524
+ if not 4 < len(sp) < 13:
525
+ logger.writeln("cannot parse this line: {}".format(l))
526
+ continue
527
+ site = gemmi.SmallStructure.Site()
528
+ site.label = sp[0]
529
+ try:
530
+ site.element = sfacs[int(sp[1])-1]
531
+ except:
532
+ logger.error("failed to parse: {}".format(l))
533
+ continue
534
+
535
+ if site.label.startswith("Q") and ignore_q_peaks:
536
+ logger.writeln("skip Q peak: {}".format(l))
537
+ continue
538
+
539
+ site.fract.fromlist(list(map(float, sp[2:5])))
540
+ if len(sp) > 5:
541
+ q = abs(float(sp[5]))
542
+ if q > 10: q = q % 10 # FIXME proper handling
543
+ site.occ = q
544
+ if len(sp) > 11:
545
+ u = list(map(float, sp[6:12]))
546
+ site.aniso = gemmi.SMat33d(u[0], u[1], u[2], u[5], u[4], u[3])
547
+ #TODO site.u_iso needs to be set?
548
+ else:
549
+ site.u_iso = float(sp[6])
550
+
551
+ ss.add_site(site)
552
+
553
+ # Determine space group
554
+ if gemmi.Op() not in symms: # identity operator may not be present in ins file
555
+ symms.append(gemmi.Op())
556
+
557
+ lops = {1: [], # P
558
+ 2: [gemmi.Op("x+1/2,y+1/2,z+1/2")], # I
559
+ 3: [gemmi.Op("x+2/3,y+1/3,z+1/3"), # R
560
+ gemmi.Op("x+1/3,y+2/3,z+2/3")],
561
+ 4: [gemmi.Op("x,y+1/2,z+1/2"), # F
562
+ gemmi.Op("x+1/2,y,z+1/2"),
563
+ gemmi.Op("x+1/2,y+1/2,z")],
564
+ 5: [gemmi.Op("x,y+1/2,z+1/2")], # A
565
+ 6: [gemmi.Op("x+1/2,y,z+1/2")], # B
566
+ 7: [gemmi.Op("x+1/2,y+1/2,z")], # C
567
+ }
568
+ for op in lops[abs(latt)]:
569
+ symms.extend([x*op for x in symms])
570
+ if latt > 0:
571
+ symms.extend([x*gemmi.Op("-x,-y,-z") for x in symms])
572
+
573
+ ss.symops = [op.triplet() for op in set(symms)]
574
+ ss.set_spacegroup("s")
575
+ # in case of non-regular setting, gemmi.SpaceGroup cannot be constructed anyway.
576
+ if ss.spacegroup is None:
577
+ raise RuntimeError("Cannot construct space group from symbols: {}".format(ss.symops))
578
+ return ss, info
579
+ # read_shelx_ins()
580
+
581
+ def read_shelx_hkl(cell, sg, hklf, file_in=None, lines_in=None):
582
+ assert (file_in, lines_in).count(None) == 1
583
+ hkls, vals, sigs = [], [], []
584
+ for l in open(file_in) if file_in else lines_in:
585
+ if l.startswith(";"): continue
586
+ if not l.strip() or len(l) < 25: continue
587
+ try:
588
+ hkl = int(l[:4]), int(l[4:8]), int(l[8:12])
589
+ except ValueError:
590
+ logger.writeln("Error while parsing HKL part: {}".format(l))
591
+ break
592
+
593
+ if hkl == (0,0,0): break
594
+ hkls.append(hkl)
595
+ vals.append(float(l[12:20]))
596
+ sigs.append(float(l[20:28]))
597
+ # batch = l[28:32]
598
+ # wavelength = l[32:40]
599
+
600
+ ints = gemmi.Intensities()
601
+ ints.set_data(cell, sg, hkls, vals, sigs)
602
+ ints.merge_in_place(gemmi.DataType.Anomalous)
603
+ if not (ints.isign_array < 0).any(): ints.type = gemmi.DataType.Mean
604
+ logger.writeln(" Multiplicity: max= {} mean= {:.1f} min= {}".format(numpy.max(ints.nobs_array),
605
+ numpy.mean(ints.nobs_array),
606
+ numpy.min(ints.nobs_array)))
607
+ mtz = ints.prepare_merged_mtz(with_nobs=False)
608
+ if hklf == 3:
609
+ conv = {"IMEAN": ("FP", "F"),
610
+ "SIGIMEAN": ("SIGFP", "Q"),
611
+ "I(+)": ("F(+)", "G"),
612
+ "SIGI(+)": ("SIGF(+)", "L"),
613
+ "I(-)": ("F(-)", "G"),
614
+ "SIGI(-)": ("SIGF(-)", "L"),
615
+ }
616
+ for col in mtz.columns:
617
+ if col.label in conv:
618
+ col.label, col.type = conv[col.label]
619
+ return mtz
620
+ # read_shelx_hkl()
621
+
622
+ def read_smcif_hkl(cif_in, cell_if_absent=None, sg_if_absent=None):
623
+ # Very crude support for smcif - just because I do not know other varieties.
624
+ # TODO other possible data types? (amplitudes?)
625
+ # TODO check _refln_observed_status?
626
+ logger.writeln("Reading hkl data from smcif: {}".format(cif_in))
627
+ b = gemmi.cif.read(cif_in).sole_block()
628
+ try:
629
+ cell_par = [float(b.find_value("_cell_length_{}".format(x))) for x in ("a", "b", "c")]
630
+ cell_par += [float(b.find_value("_cell_angle_{}".format(x))) for x in ("alpha", "beta", "gamma")]
631
+ cell = gemmi.UnitCell(*cell_par)
632
+ logger.writeln(" Unit cell: {:.4f} {:.4f} {:.4f} {:.3f} {:.3f} {:.3f}".format(*cell.parameters))
633
+ except:
634
+ logger.writeln(" WARNING: no unit cell in this file")
635
+ cell = cell_if_absent
636
+
637
+ for optag in ("_space_group_symop_operation_xyz", "_symmetry_equiv_pos_as_xyz"):
638
+ ops = [gemmi.Op(gemmi.cif.as_string(x)) for x in b.find_loop(optag)]
639
+ sg = gemmi.find_spacegroup_by_ops(gemmi.GroupOps(ops))
640
+ if sg:
641
+ logger.writeln(" Space group: {}".format(sg.xhm()))
642
+ break
643
+ else:
644
+ sg = sg_if_absent
645
+
646
+ if cell is None or sg is None:
647
+ raise RuntimeError("Cell and/or symmetry operations not found in {}".format(cif_in))
648
+
649
+ l = b.find_values("_refln_index_h").get_loop()
650
+ i_hkl = [l.tags.index("_refln_index_{}".format(h)) for h in "hkl"]
651
+ i_int = l.tags.index("_refln_F_squared_meas")
652
+ i_sig = l.tags.index("_refln_F_squared_sigma")
653
+ hkls, vals, sigs = [], [], []
654
+ for i in range(l.length()):
655
+ hkl = [gemmi.cif.as_int(l[i, j]) for j in i_hkl]
656
+ hkls.append(hkl)
657
+ vals.append(gemmi.cif.as_number(l[i, i_int]))
658
+ sigs.append(gemmi.cif.as_number(l[i, i_sig]))
659
+
660
+ ints = gemmi.Intensities()
661
+ ints.set_data(cell, sg, hkls, vals, sigs)
662
+ ints.merge_in_place(gemmi.DataType.Anomalous)
663
+ if not (ints.isign_array < 0).any(): ints.type = gemmi.DataType.Mean
664
+ logger.writeln(" Multiplicity: max= {} mean= {:.1f} min= {}".format(numpy.max(ints.nobs_array),
665
+ numpy.mean(ints.nobs_array),
666
+ numpy.min(ints.nobs_array)))
667
+ logger.writeln("")
668
+ return ints.prepare_merged_mtz(with_nobs=False)
669
+ # read_smcif_hkl()
670
+
671
+ def read_smcif_shelx(cif_in):
672
+ logger.writeln("Reading small molecule cif: {}".format(cif_in))
673
+ b = gemmi.cif.read(cif_in).sole_block()
674
+ res_str = b.find_value("_shelx_res_file")
675
+ hkl_str = b.find_value("_shelx_hkl_file")
676
+ if not res_str: raise RuntimeError("_shelx_res_file not found in {}".format(cif_in))
677
+ if not hkl_str: raise RuntimeError("_shelx_hkl_file not found in {}".format(cif_in))
678
+
679
+ ss, info = read_shelx_ins(lines_in=res_str.splitlines())
680
+ mtz = read_shelx_hkl(ss.cell, ss.spacegroup, info.get("hklf"), lines_in=hkl_str.splitlines())
681
+ return mtz, ss, info
682
+ # read_smcif_shelx()
683
+
684
+ def read_small_molecule_files(files):
685
+ st, mtz, hklf = None, None, None
686
+ # first pass - find structure
687
+ for filename in files:
688
+ ext = splitext(filename)[1]
689
+ if ext in (".cif", ".res", ".ins"):
690
+ try:
691
+ st = read_structure(filename)
692
+ except:
693
+ continue
694
+ logger.writeln("Coordinates read from: {}".format(filename))
695
+ if ext == ".cif":
696
+ b = gemmi.cif.read(filename).sole_block()
697
+ res_str = b.find_value("_shelx_res_file")
698
+ else:
699
+ res_str = open(filename).read()
700
+ if res_str:
701
+ _, info = read_shelx_ins(lines_in=res_str.splitlines())
702
+ hklf = info["hklf"]
703
+ if st is None:
704
+ logger.writeln("ERROR: coordinates not found.")
705
+ return None, None
706
+
707
+ # second pass - find hkl
708
+ for filename in files:
709
+ ext = splitext(filename)[1]
710
+ try:
711
+ b = gemmi.cif.read(filename).sole_block()
712
+ hkl_str = b.find_value("_shelx_hkl_file")
713
+ if hkl_str:
714
+ mtz = read_shelx_hkl(st.cell, st.find_spacegroup(), hklf, lines_in=hkl_str.splitlines())
715
+ logger.writeln("reflection data read from: {}".format(filename))
716
+ elif b.find_loop("_refln_index_h"):
717
+ mtz = read_smcif_hkl(filename, st.cell, st.find_spacegroup())
718
+ except ValueError: # not a cif file
719
+ if ext == ".hkl":
720
+ mtz = read_shelx_hkl(st.cell, st.find_spacegroup(), hklf, file_in=filename)
721
+ logger.writeln("reflection data read from: {}".format(filename))
722
+
723
+ return st, mtz
724
+
725
+ def read_sequence_file(f):
726
+ # TODO needs improvement
727
+ # return a list of [name, sequence]
728
+ ret = []
729
+ for l in open(f):
730
+ l = l.strip()
731
+ if l.startswith(">"):
732
+ name = l[1:].strip()
733
+ ret.append([name, ""])
734
+ elif l:
735
+ if not ret: ret.append(["", ""])
736
+ ret[-1][1] += l.replace("*", "").replace("-", "").upper()
737
+ return ret