servalcat 0.4.99__cp39-cp39-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-39-darwin.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +906 -0
  7. servalcat/refine/refine_geom.py +233 -0
  8. servalcat/refine/refine_spa.py +366 -0
  9. servalcat/refine/refine_xtal.py +281 -0
  10. servalcat/refine/spa.py +144 -0
  11. servalcat/refine/xtal.py +276 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +639 -0
  15. servalcat/refmac/refmac_wrapper.py +395 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +479 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +977 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1547 -0
  27. servalcat/utils/fileio.py +744 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +714 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +782 -0
  33. servalcat/utils/refmac.py +760 -0
  34. servalcat/utils/restraints.py +781 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +258 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1644 -0
  40. servalcat/xtal/twin.py +121 -0
  41. servalcat-0.4.99.dist-info/METADATA +55 -0
  42. servalcat-0.4.99.dist-info/RECORD +45 -0
  43. servalcat-0.4.99.dist-info/WHEEL +5 -0
  44. servalcat-0.4.99.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.99.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,744 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ from servalcat.utils import logger
10
+ from servalcat.utils import model
11
+ from servalcat.utils import hkl
12
+ from servalcat.utils import restraints
13
+ import os
14
+ import shutil
15
+ import glob
16
+ import re
17
+ import subprocess
18
+ import gemmi
19
+ import numpy
20
+ import gzip
21
+
22
+ def splitext(path):
23
+ if path.endswith((".bz2",".gz")):
24
+ return os.path.splitext(path[:path.rindex(".")])
25
+ else:
26
+ return os.path.splitext(path)
27
+ # splitext()
28
+
29
+ def rotate_file(filename, copy=False):
30
+ if not os.path.exists(filename): return
31
+
32
+ # make list [ [filename, number], ... ]
33
+ old_list = []
34
+ dot_files = glob.glob(filename + ".*")
35
+ for f in dot_files:
36
+ suffix = f.replace(filename+".", "")
37
+ try:
38
+ i = int(suffix)
39
+ if str(i) == suffix: # ignore if suffix was such as 003...
40
+ old_list.append([f, i])
41
+ except ValueError as e:
42
+ continue
43
+
44
+ old_list.sort(key=lambda x: x[1])
45
+
46
+ # rotate files
47
+ for f, i in reversed(old_list):
48
+ logger.writeln("Rotating file: {}".format(f))
49
+ os.rename(f, "%s.%d" % (f[:f.rfind(".")], i+1))
50
+
51
+ if copy:
52
+ shutil.copyfile(filename, filename + ".1")
53
+ else:
54
+ os.rename(filename, filename + ".1")
55
+
56
+ return filename + ".1"
57
+ # rotate_file()
58
+
59
+ def check_model_format(xyzin):
60
+ # TODO check format actually
61
+ # TODO mmjson is possible?
62
+ ext = splitext(xyzin)[1]
63
+ if ext.endswith("cif"):
64
+ return ".mmcif"
65
+ else:
66
+ return ".pdb"
67
+ # check_model_format()
68
+
69
+ def write_mmcif(st, cif_out, cif_ref=None):
70
+ """
71
+ Refmac fails if _entry.id is longer than 80 chars including quotations
72
+ """
73
+ st_new = st.clone()
74
+ logger.writeln("Writing mmCIF file: {}".format(cif_out))
75
+ if cif_ref:
76
+ logger.writeln(" using mmCIF metadata from: {}".format(cif_ref))
77
+ groups = gemmi.MmcifOutputGroups(False)
78
+ groups.group_pdb = True
79
+ groups.ncs = True
80
+ groups.atoms = True
81
+ groups.cell = True
82
+ groups.scale = True
83
+ groups.assembly = True
84
+ groups.entity = True
85
+ groups.entity_poly = True
86
+ groups.entity_poly_seq = True
87
+ groups.cis = True
88
+ groups.conn = True
89
+ groups.software = True
90
+ groups.auth_all = True
91
+ # FIXME is this all?
92
+ try:
93
+ doc = read_cif_safe(cif_ref)
94
+ except Exception as e:
95
+ # Sometimes refmac writes a broken mmcif file..
96
+ logger.error("Error in mmCIF reading: {}".format(e))
97
+ logger.error(" Give up using cif reference.")
98
+ return write_mmcif(st, cif_out)
99
+
100
+ blocks = list(filter(lambda b: b.find_loop("_atom_site.id"), doc))
101
+ if len(blocks) == 0:
102
+ logger.writeln("No _atom_site found in {}".format(cif_ref))
103
+ logger.writeln(" Give up using cif reference.")
104
+ return write_mmcif(st, cif_out)
105
+ block = blocks[0]
106
+ # to remove fract_transf_matrix. maybe we should keep some (like _atom_sites.solution_hydrogens)?
107
+ # we do not want this because cell may be updated
108
+ block.find_mmcif_category("_atom_sites.").erase()
109
+ st_new.update_mmcif_block(block, groups)
110
+ if "_entry.id" in st_new.info: st_new.info["_entry.id"] = st_new.info["_entry.id"][:78]
111
+ doc.write_file(cif_out, options=gemmi.cif.Style.Aligned)
112
+ else:
113
+ st_new.name = st_new.name[:78] # this will become _entry.id
114
+ if "_entry.id" in st_new.info: st_new.info["_entry.id"] = st_new.info["_entry.id"][:78]
115
+ groups = gemmi.MmcifOutputGroups(True, auth_all=True)
116
+ doc = gemmi.cif.Document()
117
+ block = doc.add_new_block("new")
118
+ st_new.update_mmcif_block(block, groups)
119
+ doc.write_file(cif_out, options=gemmi.cif.Style.Aligned)
120
+ # write_mmcif()
121
+
122
+ def write_pdb(st, pdb_out):
123
+ logger.writeln("Writing PDB file: {}".format(pdb_out))
124
+ st = st.clone()
125
+ chain_id_lens = [len(x) for x in model.all_chain_ids(st)]
126
+ if chain_id_lens and max(chain_id_lens) > 2:
127
+ st.shorten_chain_names()
128
+ st.shorten_ccd_codes()
129
+ if st.shortened_ccd_codes:
130
+ msg = " ".join("{}->{}".format(o,n) for o,n in st.shortened_ccd_codes)
131
+ logger.writeln(" Using shortened residue names in the output pdb file: " + msg)
132
+ st.write_pdb(pdb_out, use_linkr=True)
133
+ # write_pdb()
134
+
135
+ def write_model(st, prefix=None, file_name=None, pdb=False, cif=False, cif_ref=None, hout=True):
136
+ if not hout and st[0].has_hydrogen():
137
+ st = st.clone()
138
+ st.remove_hydrogens()
139
+ if file_name:
140
+ if file_name.endswith("cif"):
141
+ write_mmcif(st, file_name, cif_ref)
142
+ elif file_name.endswith(".pdb"):
143
+ write_pdb(st, file_name)
144
+ else:
145
+ raise Exception("Cannot determine file format from file name: {}".format(file_name))
146
+ else:
147
+ if cif:
148
+ write_mmcif(st, prefix+".mmcif", cif_ref)
149
+ if pdb:
150
+ write_pdb(st, prefix+".pdb")
151
+ # write_model()
152
+
153
+ def read_shifts_txt(shifts_txt):
154
+ ret = {}
155
+ s = open(shifts_txt).read()
156
+ s = s.replace("\n"," ").split()
157
+ for i in range(len(s)-3):
158
+ if s[i] in ("pdbin", "pdbout") and s[i+1] in ("cell", "shifts"):
159
+ n = 6 if s[i+1] == "cell" else 3
160
+ ret["{} {}".format(s[i], s[i+1])] = [float(x) for x in s[i+2:i+2+n]]
161
+
162
+ return ret
163
+ # read_shifts_txt()
164
+
165
+ def read_ccp4_map(filename, setup=True, default_value=0., pixel_size=None, ignore_origin=True):
166
+ m = gemmi.read_ccp4_map(filename)
167
+ g = m.grid
168
+ grid_cell = [m.header_i32(x) for x in (8,9,10)]
169
+ grid_start = [m.header_i32(x) for x in (5,6,7)]
170
+ grid_shape = [m.header_i32(x) for x in (1,2,3)]
171
+ axis_pos = m.axis_positions()
172
+ axis_letters = ["","",""]
173
+ for i, l in zip(axis_pos, "XYZ"): axis_letters[i] = l
174
+ spacings = [1./g.unit_cell.reciprocal().parameters[i]/grid_cell[i] for i in (0,1,2)]
175
+ voxel_size = [g.unit_cell.parameters[i]/grid_cell[i] for i in (0,1,2)]
176
+ origin = [m.header_float(x) for x in (50,51,52)]
177
+ label = m.header_str(57, 80)
178
+ label = label[:label.find("\0")]
179
+ logger.writeln("Reading CCP4/MRC map file {}".format(filename))
180
+ logger.writeln(" Cell Grid: {:4d} {:4d} {:4d}".format(*grid_cell))
181
+ logger.writeln(" Map mode: {}".format(m.header_i32(4)))
182
+ logger.writeln(" Start: {:4d} {:4d} {:4d}".format(*grid_start))
183
+ logger.writeln(" Shape: {:4d} {:4d} {:4d}".format(*grid_shape))
184
+ logger.writeln(" Cell: {} {} {} {} {} {}".format(*g.unit_cell.parameters))
185
+ logger.writeln(" Axis order: {}".format(" ".join(axis_letters)))
186
+ logger.writeln(" Space group: {}".format(m.header_i32(23)))
187
+ logger.writeln(" Spacing: {:.6f} {:.6f} {:.6f}".format(*spacings))
188
+ logger.writeln(" Voxel size: {:.6f} {:.6f} {:.6f}".format(*voxel_size))
189
+ logger.writeln(" Origin: {:.6e} {:.6e} {:.6e}".format(*origin))
190
+ if not numpy.all(numpy.asarray(origin) == 0.):
191
+ logger.writeln(" ! WARNING: ORIGIN header is not supported.")
192
+ if ignore_origin:
193
+ logger.writeln(" ! WARNING: removing ORIGIN values. This might cause a misalignment between map and model.")
194
+ for i in (50,51,52): m.set_header_float(i, 0.)
195
+ logger.writeln(" Label: {}".format(label))
196
+ logger.writeln("")
197
+
198
+ if setup:
199
+ if default_value is None: default_value = float("nan")
200
+ m.setup(default_value)
201
+ grid_start = [grid_start[i] for i in axis_pos]
202
+
203
+ if pixel_size is not None:
204
+ try:
205
+ len(pixel_size)
206
+ except TypeError:
207
+ pixel_size = [pixel_size, pixel_size, pixel_size]
208
+
209
+ logger.writeln("Overriding pixel size with {:.6f} {:.6f} {:.6f}".format(*pixel_size))
210
+ orgc = m.grid.unit_cell.parameters
211
+ new_abc = [orgc[i]*pixel_size[i]/voxel_size[i] for i in (0,1,2)]
212
+ m.grid.unit_cell = gemmi.UnitCell(new_abc[0], new_abc[1], new_abc[2],
213
+ orgc[3], orgc[4], orgc[5])
214
+ logger.writeln(" New cell= {:.1f} {:.1f} {:.1f} {:.1f} {:.1f} {:.1f}".format(*m.grid.unit_cell.parameters))
215
+
216
+ return [m.grid, grid_start, grid_shape]
217
+ # read_ccp4_map()
218
+
219
+ def read_halfmaps(files, pixel_size=None, fail=True):
220
+ if fail and len(files) != 2:
221
+ raise SystemExit("Error: Give exactly two files for half maps")
222
+ maps = [read_ccp4_map(f, pixel_size=pixel_size) for f in files]
223
+ if numpy.array_equal(maps[0][0].array, maps[1][0].array):
224
+ raise SystemExit("Error: Half maps have exactly the same values. Check your input.")
225
+
226
+ assert maps[0][0].shape == maps[1][0].shape
227
+ assert maps[0][0].unit_cell == maps[1][0].unit_cell
228
+ assert maps[0][1] == maps[1][1]
229
+
230
+ return maps
231
+ # read_halfmaps()
232
+
233
+ def read_mmhkl(hklin, cif_index=0): # mtz or mmcif
234
+ spext = splitext(hklin)
235
+ if spext[1].lower() == ".mtz":
236
+ logger.writeln("Reading MTZ file: {}".format(hklin))
237
+ mtz = gemmi.read_mtz_file(hklin)
238
+ elif spext[1].lower() in (".cif", ".ent"):
239
+ logger.writeln("Reading mmCIF file (hkl data): {} at index {}".format(hklin, cif_index+1))
240
+ doc = gemmi.cif.read(hklin)
241
+ blocks = gemmi.as_refln_blocks(doc)
242
+ cif2mtz = gemmi.CifToMtz()
243
+ mtz = cif2mtz.convert_block_to_mtz(blocks[cif_index])
244
+ else:
245
+ raise RuntimeError("Unsupported file type: {}".format(spext[1]))
246
+ if mtz.spacegroup is None:
247
+ raise RuntimeError("Missing space group information")
248
+ logger.writeln(" Unit cell: {:.4f} {:.4f} {:.4f} {:.3f} {:.3f} {:.3f}".format(*mtz.cell.parameters))
249
+ logger.writeln(" Space group: {}".format(mtz.spacegroup.xhm()))
250
+ logger.writeln(" Columns: {}".format(" ".join(mtz.column_labels())))
251
+ logger.writeln("")
252
+ return mtz
253
+ # read_mmhkl()
254
+
255
+ def is_mmhkl_file(hklin):
256
+ spext = splitext(hklin)
257
+ if spext[1].lower() == ".mtz":
258
+ return True
259
+ if spext[1].lower() == ".hkl": # macromolecule files should not have .hkl extension
260
+ return False
261
+ if spext[1].lower() in (".cif", ".ent"):
262
+ for b in gemmi.cif.read(hklin):
263
+ if b.find_values("_refln.index_h"):
264
+ return True
265
+ if b.find_values("_refln_index_h"):
266
+ return False
267
+ # otherwise cannot decide
268
+ # is_smhkl()
269
+
270
+ def read_map_from_mtz(mtz_in, cols, grid_size=None, sample_rate=3):
271
+ mtz = read_mmhkl(mtz_in)
272
+ d_min = mtz.resolution_high() # TODO get resolution for column?
273
+ if grid_size is None:
274
+ grid_size = mtz.get_size_for_hkl(sample_rate=sample_rate)
275
+ F = mtz.get_f_phi_on_grid(cols[0], cols[1], grid_size)
276
+ m = gemmi.transform_f_phi_grid_to_map(F)
277
+ return d_min, m
278
+ # read_map_from_mtz()
279
+
280
+ def read_asu_data_from_mtz(mtz_in, cols):
281
+ assert 0 < len(cols) < 3
282
+ mtz = read_mmhkl(mtz_in)
283
+ sg = mtz.spacegroup
284
+ miller = mtz.make_miller_array()
285
+ f = mtz.column_with_label(cols[0])
286
+ cell = mtz.get_cell(f.dataset_id)
287
+ if len(cols) == 2:
288
+ phi = mtz.column_with_label(cols[1])
289
+ assert f.type == "F"
290
+ assert phi.type == "P"
291
+ phi = numpy.deg2rad(phi)
292
+ f_comp = f * (numpy.cos(phi) + 1j * numpy.sin(phi))
293
+ asu = gemmi.ComplexAsuData(cell, sg, miller, f_comp) # ensure asu?
294
+ return asu
295
+ else:
296
+ if f.is_integer():
297
+ gr_t = gemmi.IntAsuData
298
+ else:
299
+ gr_t = gemmi.FloatAsuData
300
+
301
+ asu = gr_t(cell, sg, miller, f)
302
+ return asu
303
+ # read_asu_data_from_mtz()
304
+
305
+ def read_cif_safe(cif_in):
306
+ ifs = gzip.open(cif_in, "rt") if cif_in.endswith(".gz") else open(cif_in)
307
+ s = ifs.read()
308
+ if "\0" in s: # Refmac occasionally writes \0 in some fields..
309
+ logger.writeln(" WARNING: null character detected. Replacing with '.'")
310
+ s = s.replace("\0", ".")
311
+ doc = gemmi.cif.read_string(s)
312
+ return doc
313
+ # read_cif_safe()
314
+
315
+ def read_structure(xyz_in, assign_het_flags=True, merge_chain_parts=True):
316
+ spext = splitext(xyz_in)
317
+ st = None
318
+ if spext[1].lower() in (".pdb", ".ent"):
319
+ logger.writeln("Reading PDB file: {}".format(xyz_in))
320
+ st = gemmi.read_pdb(xyz_in)
321
+ elif spext[1].lower() in (".cif", ".mmcif"):
322
+ doc = read_cif_safe(xyz_in)
323
+ for block in doc:
324
+ if block.find_loop("_atom_site.id"):
325
+ if st is None:
326
+ logger.writeln("Reading mmCIF file: {}".format(xyz_in))
327
+ st = gemmi.make_structure_from_block(block)
328
+ else:
329
+ logger.writeln(" WARNING: more than one block having structure found. Will use first one.")
330
+ break
331
+ elif block.find_loop("_atom_site_label"):
332
+ if st is None:
333
+ logger.writeln("Reading smCIF file: {}".format(xyz_in))
334
+ ss = gemmi.read_small_structure(xyz_in)
335
+ if not ss.sites:
336
+ raise RuntimeError("No atoms found in cif file.")
337
+ st = model.cx_to_mx(ss)
338
+ else:
339
+ logger.writeln(" WARNING: more than one block having structure found. Will use first one.")
340
+ break
341
+ elif (block.find_loop("_chem_comp_atom.x") or
342
+ block.find_loop("_chem_comp_atom.model_Cartn_x") or
343
+ block.find_loop("_chem_comp_atom.pdbx_model_Cartn_x_ideal")):
344
+ if st is None:
345
+ logger.writeln("Reading chemical component file: {}".format(xyz_in))
346
+ st = gemmi.make_structure_from_chemcomp_block(block)
347
+ for i in range(len(st)-1):
348
+ del st[1]
349
+ elif spext[1].lower() in (".ins", ".res"):
350
+ logger.writeln("Reading SHELX ins/res file: {}".format(xyz_in))
351
+ st = model.cx_to_mx(read_shelx_ins(ins_in=xyz_in)[0])
352
+ st.setup_cell_images()
353
+ else:
354
+ raise RuntimeError("Unsupported file type: {}".format(spext[1]))
355
+ if st is not None:
356
+ if st.cell.is_crystal():
357
+ logger.writeln(" Unit cell: {:.4f} {:.4f} {:.4f} {:.3f} {:.3f} {:.3f}".format(*st.cell.parameters))
358
+ logger.writeln(" Space group: {}".format(st.spacegroup_hm))
359
+ if st.ncs:
360
+ n_given = sum(1 for x in st.ncs if x.given)
361
+ logger.writeln(" No. strict NCS: {} ({} already applied)".format(len(st.ncs), n_given))
362
+ logger.writeln("")
363
+ if assign_het_flags:
364
+ st.assign_het_flags()
365
+ if merge_chain_parts:
366
+ st.merge_chain_parts()
367
+ return st
368
+ # read_structure()
369
+
370
+ def read_structure_from_pdb_and_mmcif(xyz_in):
371
+ st = read_structure(xyz_in)
372
+ cif_ref = None
373
+ spext = splitext(xyz_in)
374
+ if spext[1] in (".pdb", ".ent"):
375
+ cif_in = spext[0] + ".mmcif"
376
+ if os.path.isfile(cif_in):
377
+ print(" Will use mmcif metadata from {}".format(cif_in))
378
+ cif_ref = cif_in
379
+ elif spext[1] in (".cif", ".mmcif"):
380
+ cif_ref = xyz_in
381
+ pdb_in = spext[0] + ".pdb"
382
+ if os.path.isfile(pdb_in):
383
+ print(" Reading PDB REMARKS from {}".format(pdb_in))
384
+ tmp = gemmi.read_structure(pdb_in)
385
+ st.raw_remarks = tmp.raw_remarks
386
+
387
+ if cif_ref is None and xyz_in.endswith("cif"):
388
+ cif_ref = xyz_in
389
+
390
+ return st, cif_ref
391
+ # read_structure_from_pdb_and_mmcif()
392
+
393
+ def merge_ligand_cif(cifs_in, cif_out):
394
+ docs = [gemmi.cif.read(x) for x in cifs_in]
395
+ tags = dict(comp=["_chem_comp.id"],
396
+ link=["_chem_link.id"],
397
+ mod=["_chem_mod.id"])
398
+ list_names = [k+"_list" for k in tags]
399
+
400
+ # Check duplicated block names
401
+ names = {}
402
+ for i, doc in enumerate(docs):
403
+ for j, b in enumerate(doc):
404
+ if b.name not in list_names and not b.name.startswith("mod_"):
405
+ names.setdefault(b.name, []).append((i,j))
406
+
407
+ # Keep only last one if duplicated
408
+ todel = []
409
+ for k in names:
410
+ if len(names[k]) > 1:
411
+ for i,j in reversed(names[k][:-1]):
412
+ logger.writeln("WARNING: removing duplicated {} from {}".format(k, cifs_in[i]))
413
+ todel.append((i,j))
414
+ for t in "comp", "link":
415
+ if k.startswith("{}_".format(t)):
416
+ comp_list = docs[i].find_block("{}_list".format(t))
417
+ table = comp_list.find("_chem_{}.".format(t), ["id"])
418
+ for l in reversed([l for l, row in enumerate(table) if row.str(0) == k[5:]]):
419
+ table.remove_row(l)
420
+
421
+ for i,j in sorted(todel, reverse=True):
422
+ del docs[i][j]
423
+
424
+ # Accumulate list
425
+ found = dict(comp=0, link=0, mod=0)
426
+ for d in docs:
427
+ for k in tags:
428
+ b = d.find_block("{}_list".format(k))
429
+ if not b: continue
430
+ found[k] += 1
431
+ l = b.find_loop(tags[k][0]).get_loop()
432
+ for t in l.tags:
433
+ if t not in tags[k]: tags[k].append(t)
434
+
435
+ # Check duplicated modifications
436
+ known_mods = [] # need to check monomer library?
437
+ for d in docs:
438
+ restraints.rename_cif_modification_if_necessary(d, known_mods)
439
+ mod_list = d.find_block("mod_list")
440
+ if mod_list:
441
+ for row in mod_list.find("_chem_mod.", ["id"]):
442
+ known_mods.append(row.str(0))
443
+
444
+ doc = gemmi.cif.Document()
445
+ # Add lists
446
+ for k in tags:
447
+ if not found[k]: continue
448
+ lst = doc.add_new_block("{}_list".format(k))
449
+ loop = lst.init_loop("", tags[k])
450
+ tags_for_find = [tags[k][0]] + ["?"+x for x in tags[k][1:]]
451
+
452
+ for d in docs:
453
+ b = d.find_block("{}_list".format(k))
454
+ if not b: continue
455
+ vals = b.find(tags_for_find)
456
+ for v in vals:
457
+ rl = [v.get(x) if v.has(x) else "." for x in range(len(tags[k]))]
458
+ loop.add_row(rl)
459
+
460
+ # Add other items
461
+ for d in docs:
462
+ for b in d:
463
+ if b.name not in list_names:
464
+ doc.add_copied_block(b)
465
+
466
+ doc.write_file(cif_out, options=gemmi.cif.Style.Aligned)
467
+ # merge_ligand_cif()
468
+
469
+ def read_shelx_ins(ins_in=None, lines_in=None, ignore_q_peaks=True): # TODO support gz?
470
+ assert (ins_in, lines_in).count(None) == 1
471
+ ss = gemmi.SmallStructure()
472
+
473
+ keywords = """
474
+ TITL CELL ZERR LATT SYMM SFAC NEUT DISP UNIT LAUE REM MORE END HKLF OMIT SHEL BASF TWIN TWST EXTI SWAT
475
+ ABIN ANSC ANSR MERG LIST SPEC RESI MOVE ANIS AFIX HFIX FRAG FEND EXYZ EADP EQIV CONN PART BIND FREE DFIX
476
+ DANG BUMP SAME SADI CHIV FLAT DELU SIMU RIGU PRIG DEFS ISOR XNPD NCSY SUMP L.S. CGLS BLOC DAMP STIR WGHT
477
+ FVAR WIGL BOND CONF MPLA RTAB HTAB ACTA SIZE TEMP WPDB FMAP GRID PLAN TIME HOPE MOLE
478
+ """.split()
479
+ re_kwd = re.compile("^({})_?".format("|".join(keywords)))
480
+
481
+ # remove comments/blanks and concatenate lines
482
+ lines = []
483
+ concat_flag = False
484
+ for l in open(ins_in) if ins_in else lines_in:
485
+ l = l.rstrip()
486
+ if l.startswith("REM"): continue
487
+ if l.startswith(";"): continue
488
+ if not l.strip(): continue
489
+
490
+ if l.endswith("="):
491
+ l = l[:l.rindex("=")]
492
+ if concat_flag:
493
+ lines[-1] += l
494
+ else:
495
+ lines.append(l)
496
+ concat_flag = True
497
+ elif concat_flag:
498
+ lines[-1] += l
499
+ concat_flag = False
500
+ else:
501
+ lines.append(l)
502
+
503
+ # parse lines
504
+ sfacs = []
505
+ latt, symms = 1, []
506
+ info = dict(hklf=0)
507
+ for l in lines:
508
+ sp = l.split()
509
+ ins = sp[0].upper()
510
+ if ins == "TITL":
511
+ pass
512
+ elif l.startswith(" "): # title continued? instructions after space is allowed??
513
+ pass
514
+ elif ins == "CELL":
515
+ #ss.wavelength = float(sp[1]) # next gemmi ver.
516
+ ss.cell.set(*map(float, sp[2:]))
517
+ elif ins == "LATT":
518
+ latt = int(sp[1])
519
+ elif ins == "SYMM":
520
+ symms.append(gemmi.Op("".join(sp[1:])).wrap())
521
+ elif ins == "SFAC": # TODO check numbers?
522
+ if len(sp) < 2: continue
523
+ sfacs.append(gemmi.Element(sp[1]))
524
+ if len(sp) > 2:
525
+ try: float(sp[2])
526
+ except ValueError:
527
+ sfacs.extend([gemmi.Element(x) for x in sp[2:]])
528
+ elif ins == "HKLF":
529
+ info["hklf"] = int(sp[1])
530
+ elif not re_kwd.search(ins):
531
+ if not 4 < len(sp) < 13:
532
+ logger.writeln("cannot parse this line: {}".format(l))
533
+ continue
534
+ site = gemmi.SmallStructure.Site()
535
+ site.label = sp[0]
536
+ try:
537
+ site.element = sfacs[int(sp[1])-1]
538
+ except:
539
+ logger.error("failed to parse: {}".format(l))
540
+ continue
541
+
542
+ if site.label.startswith("Q") and ignore_q_peaks:
543
+ logger.writeln("skip Q peak: {}".format(l))
544
+ continue
545
+
546
+ site.fract.fromlist(list(map(float, sp[2:5])))
547
+ if len(sp) > 5:
548
+ q = abs(float(sp[5]))
549
+ if q > 10: q = q % 10 # FIXME proper handling
550
+ site.occ = q
551
+ if len(sp) > 11:
552
+ u = list(map(float, sp[6:12]))
553
+ site.aniso = gemmi.SMat33d(u[0], u[1], u[2], u[5], u[4], u[3])
554
+ #TODO site.u_iso needs to be set?
555
+ else:
556
+ site.u_iso = float(sp[6])
557
+
558
+ ss.add_site(site)
559
+
560
+ # Determine space group
561
+ if gemmi.Op() not in symms: # identity operator may not be present in ins file
562
+ symms.append(gemmi.Op())
563
+
564
+ lops = {1: [], # P
565
+ 2: [gemmi.Op("x+1/2,y+1/2,z+1/2")], # I
566
+ 3: [gemmi.Op("x+2/3,y+1/3,z+1/3"), # R
567
+ gemmi.Op("x+1/3,y+2/3,z+2/3")],
568
+ 4: [gemmi.Op("x,y+1/2,z+1/2"), # F
569
+ gemmi.Op("x+1/2,y,z+1/2"),
570
+ gemmi.Op("x+1/2,y+1/2,z")],
571
+ 5: [gemmi.Op("x,y+1/2,z+1/2")], # A
572
+ 6: [gemmi.Op("x+1/2,y,z+1/2")], # B
573
+ 7: [gemmi.Op("x+1/2,y+1/2,z")], # C
574
+ }
575
+ for op in lops[abs(latt)]:
576
+ symms.extend([x*op for x in symms])
577
+ if latt > 0:
578
+ symms.extend([x*gemmi.Op("-x,-y,-z") for x in symms])
579
+
580
+ ss.symops = [op.triplet() for op in set(symms)]
581
+ ss.determine_and_set_spacegroup("s")
582
+ # in case of non-regular setting, gemmi.SpaceGroup cannot be constructed anyway.
583
+ if ss.spacegroup is None:
584
+ raise RuntimeError("Cannot construct space group from symbols: {}".format(ss.symops))
585
+ return ss, info
586
+ # read_shelx_ins()
587
+
588
+ def read_shelx_hkl(cell, sg, hklf, file_in=None, lines_in=None):
589
+ assert (file_in, lines_in).count(None) == 1
590
+ hkls, vals, sigs = [], [], []
591
+ for l in open(file_in) if file_in else lines_in:
592
+ if l.startswith(";"): continue
593
+ if not l.strip() or len(l) < 25: continue
594
+ try:
595
+ hkl = int(l[:4]), int(l[4:8]), int(l[8:12])
596
+ except ValueError:
597
+ logger.writeln("Error while parsing HKL part: {}".format(l))
598
+ break
599
+
600
+ if hkl == (0,0,0): break
601
+ hkls.append(hkl)
602
+ vals.append(float(l[12:20]))
603
+ sigs.append(float(l[20:28]))
604
+ # batch = l[28:32]
605
+ # wavelength = l[32:40]
606
+
607
+ ints = gemmi.Intensities()
608
+ ints.set_data(cell, sg, numpy.asarray(hkls), numpy.asarray(vals), numpy.asarray(sigs))
609
+ ints.merge_in_place(gemmi.DataType.Anomalous)
610
+ if not (ints.isign_array < 0).any(): ints.type = gemmi.DataType.Mean
611
+ logger.writeln(" Multiplicity: max= {} mean= {:.1f} min= {}".format(numpy.max(ints.nobs_array),
612
+ numpy.mean(ints.nobs_array),
613
+ numpy.min(ints.nobs_array)))
614
+ mtz = ints.prepare_merged_mtz(with_nobs=False)
615
+ if hklf == 3:
616
+ conv = {"IMEAN": ("FP", "F"),
617
+ "SIGIMEAN": ("SIGFP", "Q"),
618
+ "I(+)": ("F(+)", "G"),
619
+ "SIGI(+)": ("SIGF(+)", "L"),
620
+ "I(-)": ("F(-)", "G"),
621
+ "SIGI(-)": ("SIGF(-)", "L"),
622
+ }
623
+ for col in mtz.columns:
624
+ if col.label in conv:
625
+ col.label, col.type = conv[col.label]
626
+ return mtz
627
+ # read_shelx_hkl()
628
+
629
+ def read_smcif_hkl(cif_in, cell_if_absent=None, sg_if_absent=None):
630
+ # Very crude support for smcif - just because I do not know other varieties.
631
+ # TODO other possible data types? (amplitudes?)
632
+ # TODO check _refln_observed_status?
633
+ logger.writeln("Reading hkl data from smcif: {}".format(cif_in))
634
+ b = gemmi.cif.read(cif_in).sole_block()
635
+ try:
636
+ cell_par = [float(b.find_value("_cell_length_{}".format(x))) for x in ("a", "b", "c")]
637
+ cell_par += [float(b.find_value("_cell_angle_{}".format(x))) for x in ("alpha", "beta", "gamma")]
638
+ cell = gemmi.UnitCell(*cell_par)
639
+ logger.writeln(" Unit cell: {:.4f} {:.4f} {:.4f} {:.3f} {:.3f} {:.3f}".format(*cell.parameters))
640
+ except:
641
+ logger.writeln(" WARNING: no unit cell in this file")
642
+ cell = cell_if_absent
643
+
644
+ for optag in ("_space_group_symop_operation_xyz", "_symmetry_equiv_pos_as_xyz"):
645
+ ops = [gemmi.Op(gemmi.cif.as_string(x)) for x in b.find_loop(optag)]
646
+ sg = gemmi.find_spacegroup_by_ops(gemmi.GroupOps(ops))
647
+ if sg:
648
+ logger.writeln(" Space group: {}".format(sg.xhm()))
649
+ break
650
+ else:
651
+ sg = sg_if_absent
652
+
653
+ if cell is None or sg is None:
654
+ raise RuntimeError("Cell and/or symmetry operations not found in {}".format(cif_in))
655
+
656
+ l = b.find_values("_refln_index_h").get_loop()
657
+ i_hkl = [l.tags.index("_refln_index_{}".format(h)) for h in "hkl"]
658
+ i_int = l.tags.index("_refln_F_squared_meas")
659
+ i_sig = l.tags.index("_refln_F_squared_sigma")
660
+ hkls, vals, sigs = [], [], []
661
+ for i in range(l.length()):
662
+ hkl = [gemmi.cif.as_int(l[i, j]) for j in i_hkl]
663
+ hkls.append(hkl)
664
+ vals.append(gemmi.cif.as_number(l[i, i_int]))
665
+ sigs.append(gemmi.cif.as_number(l[i, i_sig]))
666
+
667
+ ints = gemmi.Intensities()
668
+ ints.set_data(cell, sg, hkls, vals, sigs)
669
+ ints.merge_in_place(gemmi.DataType.Anomalous)
670
+ if not (ints.isign_array < 0).any(): ints.type = gemmi.DataType.Mean
671
+ logger.writeln(" Multiplicity: max= {} mean= {:.1f} min= {}".format(numpy.max(ints.nobs_array),
672
+ numpy.mean(ints.nobs_array),
673
+ numpy.min(ints.nobs_array)))
674
+ logger.writeln("")
675
+ return ints.prepare_merged_mtz(with_nobs=False)
676
+ # read_smcif_hkl()
677
+
678
+ def read_smcif_shelx(cif_in):
679
+ logger.writeln("Reading small molecule cif: {}".format(cif_in))
680
+ b = gemmi.cif.read(cif_in).sole_block()
681
+ res_str = b.find_value("_shelx_res_file")
682
+ hkl_str = b.find_value("_shelx_hkl_file")
683
+ if not res_str: raise RuntimeError("_shelx_res_file not found in {}".format(cif_in))
684
+ if not hkl_str: raise RuntimeError("_shelx_hkl_file not found in {}".format(cif_in))
685
+
686
+ ss, info = read_shelx_ins(lines_in=res_str.splitlines())
687
+ mtz = read_shelx_hkl(ss.cell, ss.spacegroup, info.get("hklf"), lines_in=hkl_str.splitlines())
688
+ return mtz, ss, info
689
+ # read_smcif_shelx()
690
+
691
+ def read_small_molecule_files(files):
692
+ st, mtz, hklf = None, None, None
693
+ # first pass - find structure
694
+ for filename in files:
695
+ ext = splitext(filename)[1]
696
+ if ext in (".cif", ".res", ".ins"):
697
+ try:
698
+ st = read_structure(filename)
699
+ except:
700
+ continue
701
+ logger.writeln("Coordinates read from: {}".format(filename))
702
+ if ext == ".cif":
703
+ b = gemmi.cif.read(filename).sole_block()
704
+ res_str = b.find_value("_shelx_res_file")
705
+ else:
706
+ res_str = open(filename).read()
707
+ if res_str:
708
+ _, info = read_shelx_ins(lines_in=res_str.splitlines())
709
+ hklf = info["hklf"]
710
+ if st is None:
711
+ logger.writeln("ERROR: coordinates not found.")
712
+ return None, None
713
+
714
+ # second pass - find hkl
715
+ for filename in files:
716
+ ext = splitext(filename)[1]
717
+ try:
718
+ b = gemmi.cif.read(filename).sole_block()
719
+ hkl_str = b.find_value("_shelx_hkl_file")
720
+ if hkl_str:
721
+ mtz = read_shelx_hkl(st.cell, st.find_spacegroup(), hklf, lines_in=hkl_str.splitlines())
722
+ logger.writeln("reflection data read from: {}".format(filename))
723
+ elif b.find_loop("_refln_index_h"):
724
+ mtz = read_smcif_hkl(filename, st.cell, st.find_spacegroup())
725
+ except ValueError: # not a cif file
726
+ if ext == ".hkl":
727
+ mtz = read_shelx_hkl(st.cell, st.find_spacegroup(), hklf, file_in=filename)
728
+ logger.writeln("reflection data read from: {}".format(filename))
729
+
730
+ return st, mtz
731
+
732
+ def read_sequence_file(f):
733
+ # TODO needs improvement
734
+ # return a list of [name, sequence]
735
+ ret = []
736
+ for l in open(f):
737
+ l = l.strip()
738
+ if l.startswith(">"):
739
+ name = l[1:].strip()
740
+ ret.append([name, ""])
741
+ elif l:
742
+ if not ret: ret.append(["", ""])
743
+ ret[-1][1] += l.replace("*", "").replace("-", "").upper()
744
+ return ret