servalcat 0.4.131__cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-314t-x86_64-linux-gnu.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +1162 -0
  7. servalcat/refine/refine_geom.py +245 -0
  8. servalcat/refine/refine_spa.py +400 -0
  9. servalcat/refine/refine_xtal.py +339 -0
  10. servalcat/refine/spa.py +151 -0
  11. servalcat/refine/xtal.py +312 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +191 -0
  14. servalcat/refmac/refmac_keywords.py +660 -0
  15. servalcat/refmac/refmac_wrapper.py +423 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +488 -0
  18. servalcat/spa/fsc.py +391 -0
  19. servalcat/spa/localcc.py +197 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +979 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1629 -0
  27. servalcat/utils/fileio.py +836 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +811 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +933 -0
  33. servalcat/utils/refmac.py +759 -0
  34. servalcat/utils/restraints.py +888 -0
  35. servalcat/utils/symmetry.py +298 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +262 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1954 -0
  40. servalcat/xtal/twin.py +316 -0
  41. servalcat-0.4.131.dist-info/METADATA +60 -0
  42. servalcat-0.4.131.dist-info/RECORD +45 -0
  43. servalcat-0.4.131.dist-info/WHEEL +6 -0
  44. servalcat-0.4.131.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.131.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,1629 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ from servalcat.utils import logger
10
+ from servalcat.utils import fileio
11
+ from servalcat.utils import symmetry
12
+ from servalcat.utils import model
13
+ from servalcat.utils import hkl
14
+ from servalcat.utils import restraints
15
+ from servalcat.utils import maps
16
+ from servalcat.refmac import refmac_keywords
17
+ from servalcat.refine.refine import Geom, RefineParams
18
+ from servalcat import ext
19
+ import os
20
+ import gemmi
21
+ import numpy
22
+ import scipy.spatial
23
+ import pandas
24
+ import json
25
+ import re
26
+ import argparse
27
+
28
+ def add_arguments(p):
29
+ subparsers = p.add_subparsers(dest="subcommand")
30
+
31
+ # show
32
+ parser = subparsers.add_parser("show", description = 'Show file info supported by the program')
33
+ parser.add_argument('files', nargs='+')
34
+
35
+ # json2csv
36
+ parser = subparsers.add_parser("json2csv", description = 'Convert json to csv for plotting')
37
+ parser.add_argument('json')
38
+ parser.add_argument('-o', '--output_prefix')
39
+
40
+ # symmodel
41
+ parser = subparsers.add_parser("symmodel", description="Add symmetry annotation to model")
42
+ parser.add_argument('--model', required=True)
43
+ group = parser.add_mutually_exclusive_group()
44
+ group.add_argument('--map', help="Take box size from the map")
45
+ group.add_argument('--cell', type=float, nargs=6, metavar=("a", "b", "c", "alpha", "beta", "gamma"),
46
+ help="Box size")
47
+ sym_group = parser.add_argument_group("symmetry")
48
+ symmetry.add_symmetry_args(sym_group, require_pg=True)
49
+ parser.add_argument('--contacting_only', action="store_true", help="Filter out non-contacting NCS")
50
+ parser.add_argument('--chains', nargs="*", action="append", help="Select chains to keep")
51
+ parser.add_argument('--howtoname', choices=["dup", "short", "number"], default="short",
52
+ help="How to decide new chain IDs in expanded model (default: short); "
53
+ "dup: use original chain IDs (with different segment IDs), "
54
+ "short: use unique new IDs, "
55
+ "number: add number to original chain ID")
56
+ parser.add_argument('--biomt', action="store_true", help="Add BIOMT also")
57
+ parser.add_argument('-o', '--output_prfix')
58
+ parser.add_argument('--pdb', action="store_true", help="Write a pdb file")
59
+ parser.add_argument('--cif', action="store_true", help="Write a cif file")
60
+
61
+ # helical_biomt
62
+ parser = subparsers.add_parser("helical_biomt", description="generate BIOMT of helical reconstruction for PDB deposition")
63
+ parser.add_argument('--model', required=True)
64
+ group = parser.add_mutually_exclusive_group()
65
+ group.add_argument('--map', help="Take box size from the map")
66
+ group.add_argument('--cell', type=float, nargs=6, metavar=("a", "b", "c", "alpha", "beta", "gamma"),
67
+ help="Box size")
68
+ sym_group = parser.add_argument_group("symmetry")
69
+ symmetry.add_symmetry_args(sym_group, require_pg=True)
70
+ parser.add_argument('--start', type=int)
71
+ parser.add_argument('--end', type=int)
72
+ parser.add_argument('--howtoname', choices=["dup", "short", "number"], default="short",
73
+ help="How to decide new chain IDs in expanded model (default: short); "
74
+ "dup: use original chain IDs (with different segment IDs), "
75
+ "short: use unique new IDs, "
76
+ "number: add number to original chain ID")
77
+ parser.add_argument('-o', '--output_prfix')
78
+
79
+ # expand
80
+ parser = subparsers.add_parser("expand", description="Expand symmetry")
81
+ parser.add_argument('--model', required=True)
82
+ parser.add_argument('--chains', nargs="*", action="append", help="Select chains to keep")
83
+ group = parser.add_mutually_exclusive_group()
84
+ group.add_argument('--howtoname', choices=["dup", "short", "number"], default="short",
85
+ help="How to decide new chain IDs in expanded model (default: short); "
86
+ "dup: use original chain IDs (with different segment IDs), "
87
+ "short: use unique new IDs, "
88
+ "number: add number to original chain ID")
89
+ group.add_argument("--split", action="store_true", help="split file for each operator")
90
+ parser.add_argument('-o', '--output_prfix')
91
+ parser.add_argument('--pdb', action="store_true", help="Write a pdb file")
92
+ parser.add_argument('--cif', action="store_true", help="Write a cif file")
93
+
94
+ # h_add
95
+ parser = subparsers.add_parser("h_add", description = 'Add hydrogen in riding position')
96
+ parser.add_argument('model')
97
+ parser.add_argument('--ligand', nargs="*", action="append")
98
+ parser.add_argument("--monlib",
99
+ help="Monomer library path. Default: $CLIBD_MON")
100
+ parser.add_argument('-o','--output')
101
+ parser.add_argument("--pos", choices=["elec", "nucl"], default="elec")
102
+
103
+ # add_op3
104
+ parser = subparsers.add_parser("add_op3", description = "Add OP3 atoms to 5' ends")
105
+ parser.add_argument('model')
106
+ parser.add_argument('--chains', nargs="*", action="append", help="For selected chains only")
107
+ parser.add_argument('--ligand', nargs="*", action="append")
108
+ parser.add_argument("--monlib",
109
+ help="Monomer library path. Default: $CLIBD_MON")
110
+ parser.add_argument('-o','--output')
111
+
112
+ # map_peaks
113
+ parser = subparsers.add_parser("map_peaks", description = 'List density peaks and write a coot script')
114
+ parser.add_argument('--model', required=True, help="Model")
115
+ group = parser.add_mutually_exclusive_group(required=True)
116
+ group.add_argument('--map', help="Map file")
117
+ group.add_argument('--mtz', help="MTZ for map file")
118
+ parser.add_argument('--mtz_labels', default="DELFWT,PHDELWT", help='F,PHI labels (default: %(default)s)')
119
+ parser.add_argument('--oversample_pixel', type=float, help='Desired pixel spacing in map (Angstrom)')
120
+ group = parser.add_mutually_exclusive_group(required=True)
121
+ group.add_argument('--sigma_level', type=float, help="Threshold map level in sigma unit")
122
+ group.add_argument('--abs_level', type=float, help="Threshold map level in absolute unit")
123
+ parser.add_argument('--blob_pos', choices=["peak", "centroid"], default="centroid",
124
+ help="default: %(default)s")
125
+ parser.add_argument('--min_volume', type=float, default=0.3, help="minimum blob volume (default: %(default).1f)")
126
+ parser.add_argument('--max_volume', type=float, help="maximum blob volume (default: none)")
127
+ parser.add_argument('-o','--output_prefix', default="peaks")
128
+
129
+ # h_density
130
+ parser = subparsers.add_parser("h_density", description = 'Hydrogen density analysis')
131
+ parser.add_argument('--model', required=True, help="Model with hydrogen atoms")
132
+ group = parser.add_mutually_exclusive_group(required=True)
133
+ group.add_argument('--map', help="Fo-Fc map file")
134
+ group.add_argument('--mtz', help="MTZ for Fo-Fc map file")
135
+ parser.add_argument('--mtz_labels', default="DELFWT,PHDELWT", help='F,PHI labels (default: %(default)s)')
136
+ parser.add_argument('--oversample_pixel', type=float, help='Desired pixel spacing in map (Angstrom)')
137
+ #parser.add_argument("--source", choices=["electron", "xray", "neutron"], default="electron")
138
+ group = parser.add_mutually_exclusive_group(required=True)
139
+ group.add_argument('--sigma_level', type=float, help="Threshold map level in sigma unit")
140
+ group.add_argument('--abs_level', type=float, help="Threshold map level in absolute unit")
141
+ parser.add_argument('--max_dist', type=float, default=0.5, help="max distance between peak and hydrogen position in the model (default: %(default).1f)")
142
+ parser.add_argument('--blob_pos', choices=["peak", "centroid"], default="centroid",
143
+ help="default: %(default)s")
144
+ parser.add_argument('--min_volume', type=float, default=0.3, help="minimum blob volume (default: %(default).1f)")
145
+ parser.add_argument('--max_volume', type=float, default=3, help="maximum blob volume (default: %(default).1f)")
146
+ parser.add_argument('-o','--output_prefix')
147
+
148
+ # fix_link
149
+ parser = subparsers.add_parser("fix_link", description = 'Fix LINKR/_struct_conn records in the model')
150
+ parser.add_argument('model')
151
+ parser.add_argument('--ligand', nargs="*", action="append")
152
+ parser.add_argument("--monlib",
153
+ help="Monomer library path. Default: $CLIBD_MON")
154
+ parser.add_argument('--bond_margin', type=float, default=1.3, help='(default: %(default).1f)')
155
+ parser.add_argument('--metal_margin', type=float, default=1.1, help='(default: %(default).1f)')
156
+ parser.add_argument('-o','--output', help="Default: input_fixlink.{pdb|mmcif}")
157
+
158
+ # merge_models
159
+ parser = subparsers.add_parser("merge_models", description = 'Merge multiple model files')
160
+ parser.add_argument('models', nargs="+")
161
+ parser.add_argument('-o','--output', required=True)
162
+
163
+ # merge_dicts
164
+ parser = subparsers.add_parser("merge_dicts", description = 'Merge restraint dictionary cif files')
165
+ parser.add_argument('cifs', nargs="+")
166
+ parser.add_argument('-o','--output', default="merged.cif", help="Output cif file (default: %(default)s)")
167
+
168
+ # geom
169
+ parser = subparsers.add_parser("geom", description = 'Calculate geometry and show outliers')
170
+ parser.add_argument('model')
171
+ parser.add_argument('--ligand', nargs="*", action="append")
172
+ parser.add_argument("--monlib",
173
+ help="Monomer library path. Default: $CLIBD_MON")
174
+ parser.add_argument('--keywords', nargs='+', action="append",
175
+ help="refmac keyword(s)")
176
+ parser.add_argument('--keyword_file', nargs='+', action="append",
177
+ help="refmac keyword file(s)")
178
+ parser.add_argument('--sigma', type=float, default=5,
179
+ help="sigma cutoff to print outliers (default: %(default).1f)")
180
+ parser.add_argument('--per_atom_score_as_b', action='store_true',
181
+ help="write model file with per-atom score as B values")
182
+ parser.add_argument("--check_skew", action='store_true', help="(experimental) check bond skew to test magnification")
183
+ parser.add_argument('-n', '--nucleus', action="store_true", help="Use nucleus distances (for neutron)")
184
+ parser.add_argument("--ignore_h", action='store_true', help="ignore hydrogen")
185
+ parser.add_argument("--selection", help="evaluate part of the model")
186
+ parser.add_argument('--dump_all', action="store_true", help=argparse.SUPPRESS)
187
+ parser.add_argument('-o', '--output_prefix',
188
+ help="default: taken from input file")
189
+
190
+ # conf
191
+ parser = subparsers.add_parser("conf", description = 'Compare conformations')
192
+ parser.add_argument('models', nargs="+")
193
+ parser.add_argument("--min_diff", type=float, default=60.)
194
+ parser.add_argument('--ligand', nargs="*", action="append")
195
+ parser.add_argument("--monlib",
196
+ help="Monomer library path. Default: $CLIBD_MON")
197
+ parser.add_argument("--same_chain", action='store_true', help="Only between same chains (more than one file)")
198
+ parser.add_argument('-o', '--output_prefix', default="conf",
199
+ help="")
200
+
201
+ # adp
202
+ parser = subparsers.add_parser("adp", description = 'ADP analysis')
203
+ parser.add_argument('model')
204
+ parser.add_argument('-o', '--output_prefix',
205
+ help="default: taken from input file")
206
+
207
+ # power
208
+ parser = subparsers.add_parser("power", description = 'Show power spectrum')
209
+ parser.add_argument("--map", nargs="*", action="append")
210
+ parser.add_argument("--halfmaps", nargs="*", action="append")
211
+ parser.add_argument('--mask', help='Mask file')
212
+ parser.add_argument('-d', '--resolution', type=float)
213
+ parser.add_argument('-o', '--output_prefix', default="power")
214
+
215
+ # fcalc
216
+ parser = subparsers.add_parser("fcalc", description = 'Structure factor from model')
217
+ parser.add_argument('--model', required=True)
218
+ parser.add_argument("--no_expand_ncs", action='store_true', help="Do not expand strict NCS in MTRIX or _struct_ncs_oper")
219
+ parser.add_argument("--method", choices=["fft", "direct"], default="fft")
220
+ parser.add_argument("--source", choices=["electron", "xray", "neutron", "custom"], default="electron")
221
+ parser.add_argument('--ligand', nargs="*", action="append")
222
+ parser.add_argument("--monlib",
223
+ help="Monomer library path. Default: $CLIBD_MON")
224
+ parser.add_argument('--cell', type=float, nargs=6, metavar=("a", "b", "c", "alpha", "beta", "gamma"),
225
+ help="Override unit cell")
226
+ parser.add_argument('--auto_box_with_padding', type=float, help="Determine box size from model with specified padding")
227
+ parser.add_argument('--cutoff', type=float, default=1e-5)
228
+ parser.add_argument('--rate', type=float, default=1.5)
229
+ parser.add_argument('--add_dummy_sigma', action='store_true', help="write dummy SIGF")
230
+ parser.add_argument('--as_intensity', action='store_true', help="if you want |F|^2")
231
+ parser.add_argument('--keep_charges', action='store_true',
232
+ help="Use scattering factor for charged atoms. Use it with care.")
233
+ parser.add_argument('-d', '--resolution', type=float, required=True)
234
+ parser.add_argument('-o', '--output_prefix')
235
+
236
+ # nemap
237
+ parser = subparsers.add_parser("nemap", description = 'Normalized expected map calculation from half maps')
238
+ parser.add_argument("--halfmaps", required=True, nargs=2)
239
+ parser.add_argument('--pixel_size', type=float, help='Override pixel size (A)')
240
+ parser.add_argument("--half1_only", action='store_true', help="Only use half 1 for map calculation (use half 2 only for noise estimation)")
241
+ parser.add_argument('-B', type=float, help="local B value")
242
+ parser.add_argument("--no_fsc_weights", action='store_true',
243
+ help="Just for debugging purpose: turn off FSC-based weighting")
244
+ parser.add_argument("--sharpening_b", type=float,
245
+ help="Use B value (negative value for sharpening) instead of standard deviation of the signal")
246
+ parser.add_argument("-d", '--resolution', type=float)
247
+ parser.add_argument('-m', '--mask', help="mask file")
248
+ parser.add_argument('-o', '--output_prefix', default='nemap')
249
+ parser.add_argument("--trim", action='store_true', help="Write trimmed maps")
250
+ parser.add_argument("--trim_mtz", action='store_true', help="Write trimmed mtz")
251
+ parser.add_argument("--local_fourier_weighting_with", type=float, default=0,
252
+ help="Experimental: give kernel size in A^-1 unit to use local Fourier weighting instead of resolution-dependent weights")
253
+
254
+ # blur
255
+ parser = subparsers.add_parser("blur", description = 'Blur data by specified B value')
256
+ parser.add_argument('--hklin', required=True, help="input MTZ file")
257
+ parser.add_argument('-B', type=float, required=True, help="B value for blurring (negative value for sharpening)")
258
+ parser.add_argument('-o', '--output_prefix')
259
+
260
+ # mask_from_model
261
+ parser = subparsers.add_parser("mask_from_model", description = 'Make a mask from model')
262
+ parser.add_argument("--map", required=True, help="For unit cell and pixel size reference")
263
+ parser.add_argument("--model", required=True)
264
+ parser.add_argument("--selection")
265
+ parser.add_argument('--radius', type=float, required=True,
266
+ help='Radius in angstrom')
267
+ parser.add_argument('--soft_edge', type=float, default=0,
268
+ help='Soft edge (default: %(default).1f)')
269
+ parser.add_argument('-o', '--output', default="mask_from_model.mrc")
270
+
271
+ # applymask (and normalize within mask)
272
+ parser = subparsers.add_parser("applymask", description = 'Apply mask and optionally normalize map within mask')
273
+ parser.add_argument("--map", required=True)
274
+ parser.add_argument('--mask', required=True, help='Mask file')
275
+ parser.add_argument("--normalize", action='store_true',
276
+ help="Normalize map values using mean and sd within the mask")
277
+ parser.add_argument("--trim", action='store_true', help="Write trimmed map")
278
+ parser.add_argument('--mask_cutoff', type=float, default=0.5,
279
+ help="cutoff value for normalization and trimming (default: %(default)s)")
280
+ parser.add_argument('-o', '--output_prefix')
281
+
282
+ # map2mtz
283
+ parser = subparsers.add_parser("map2mtz", description = 'FFT map and write an mtz')
284
+ parser.add_argument("--map", required=True)
285
+ parser.add_argument("-d", '--resolution', type=float)
286
+ parser.add_argument('-o', '--output')
287
+
288
+ # sm2mm
289
+ parser = subparsers.add_parser("sm2mm", description = 'Small molecule files (cif/hkl/res/ins) to macromolecules (pdb/mmcif/mtz)')
290
+ parser.add_argument('files', nargs='+', help='Cif/ins/res/hkl files')
291
+ parser.add_argument('-o', '--output_prefix')
292
+
293
+ # mm2ins
294
+ parser = subparsers.add_parser("mm2ins", description = 'convert pdb/mmcif to ins for shelxl/olex2')
295
+ parser.add_argument('model')
296
+ parser.add_argument('--hklin')
297
+ parser.add_argument('-o', '--output')
298
+
299
+ # seq
300
+ parser = subparsers.add_parser("seq", description = 'Print/align model sequence')
301
+ parser.add_argument("--model", required=True)
302
+ parser.add_argument('--seq', nargs="*", action="append", help="Sequence file(s)")
303
+ parser.add_argument('--scoring', nargs=6, type=int, default=(1, 0, -1, -1, 0, -1),
304
+ metavar=("match", "mismatch", "gapo", "gape", "good_gapo", "bad_gapo"),
305
+ help="scoring function. default: %(default)s")
306
+
307
+ # dnarna
308
+ parser = subparsers.add_parser("dnarna", description = 'DNA to RNA or RNA to DNA model conversion')
309
+ parser.add_argument("model")
310
+ group = parser.add_mutually_exclusive_group(required=True)
311
+ group.add_argument('--to_dna', action='store_true', help="To DNA")
312
+ group.add_argument('--to_rna', action='store_true', help="To RNA")
313
+ parser.add_argument('--chains', nargs="*", action="append", help="Select chains to convert")
314
+ parser.add_argument('-o', '--output')
315
+
316
+ # add_arguments()
317
+
318
+ def parse_args(arg_list):
319
+ parser = argparse.ArgumentParser()
320
+ add_arguments(parser)
321
+ return parser.parse_args(arg_list)
322
+ # parse_args()
323
+
324
+ def symmodel(args):
325
+ if args.chains: args.chains = sum(args.chains, [])
326
+ model_format = fileio.check_model_format(args.model)
327
+
328
+ howtoname = dict(dup=gemmi.HowToNameCopiedChain.Dup,
329
+ short=gemmi.HowToNameCopiedChain.Short,
330
+ number=gemmi.HowToNameCopiedChain.AddNumber)[args.howtoname]
331
+
332
+ if (args.twist, args.rise).count(None) == 1:
333
+ raise SystemExit("ERROR: give both helical parameters --twist and --rise")
334
+
335
+ is_helical = args.twist is not None
336
+ st, cif_ref = fileio.read_structure_from_pdb_and_mmcif(args.model)
337
+ st.spacegroup_hm = "P 1"
338
+ map_and_start = None
339
+ if args.map:
340
+ logger.writeln("Reading cell from map")
341
+ map_and_start = fileio.read_ccp4_map(args.map, header_only=True)
342
+ st.cell = map_and_start[0].unit_cell
343
+ elif args.cell:
344
+ st.cell = gemmi.UnitCell(*args.cell)
345
+ elif not st.cell.is_crystal():
346
+ raise SystemExit("Error: Unit cell parameters look wrong. Please use --map or --cell")
347
+
348
+ if args.chains:
349
+ logger.writeln("Keep {} chains only".format(" ".join(args.chains)))
350
+ chains = set(args.chains)
351
+ for m in st:
352
+ to_del = [c.name for c in m if c.name not in chains]
353
+ for c in to_del: m.remove_chain(c)
354
+ if st[0].count_atom_sites() == 0:
355
+ raise SystemExit("ERROR: no atoms left. Check --chains option.")
356
+
357
+ all_chains = [c.name for c in st[0] if c.name not in st[0]]
358
+
359
+ symmetry.update_ncs_from_args(args, st, map_and_start=map_and_start, filter_contacting=args.contacting_only)
360
+
361
+ if args.biomt:
362
+ st.assemblies.clear()
363
+ st.raw_remarks = []
364
+ a = model.prepare_assembly("1", all_chains, st.ncs, is_helical=is_helical)
365
+ st.assemblies.append(a)
366
+
367
+ if not args.output_prfix:
368
+ args.output_prfix = fileio.splitext(os.path.basename(args.model))[0] + "_asu"
369
+
370
+ if args.pdb or args.cif:
371
+ fileio.write_model(st, args.output_prfix, pdb=args.pdb, cif=args.cif, cif_ref=cif_ref)
372
+ else:
373
+ fileio.write_model(st, file_name=args.output_prfix+model_format, cif_ref=cif_ref)
374
+
375
+ # Sym expand
376
+ model.expand_ncs(st, howtoname=howtoname)
377
+ st.assemblies.clear()
378
+ args.output_prfix += "_expanded"
379
+ if args.pdb or args.cif:
380
+ fileio.write_model(st, args.output_prfix, pdb=args.pdb, cif=args.cif)
381
+ else:
382
+ fileio.write_model(st, file_name=args.output_prfix+model_format)
383
+ # symmodel()
384
+
385
+ def helical_biomt(args):
386
+ if (args.twist, args.rise).count(None) > 0:
387
+ raise SystemExit("ERROR: give helical parameters --twist and --rise")
388
+
389
+ model_format = fileio.check_model_format(args.model)
390
+ howtoname = dict(dup=gemmi.HowToNameCopiedChain.Dup,
391
+ short=gemmi.HowToNameCopiedChain.Short,
392
+ number=gemmi.HowToNameCopiedChain.AddNumber)[args.howtoname]
393
+
394
+ st, cif_ref = fileio.read_structure_from_pdb_and_mmcif(args.model)
395
+ st.spacegroup_hm = "P 1"
396
+ map_and_start = None
397
+ if args.map:
398
+ logger.writeln("Reading cell from map")
399
+ map_and_start = fileio.read_ccp4_map(args.map, header_only=True)
400
+ st.cell = map_and_start[0].unit_cell
401
+ elif args.cell:
402
+ st.cell = gemmi.UnitCell(*args.cell)
403
+ elif not st.cell.is_crystal():
404
+ raise SystemExit("Error: Unit cell parameters look wrong. Please use --map or --cell")
405
+
406
+ all_chains = [c.name for c in st[0] if c.name not in st[0]]
407
+
408
+ ncsops = symmetry.ncsops_from_args(args, st.cell, map_and_start=map_and_start, st=st,
409
+ helical_min_n=args.start, helical_max_n=args.end)
410
+ #ncsops = [x for x in ncsops if not x.tr.is_identity()] # remove identity
411
+
412
+ logger.writeln("")
413
+ logger.writeln("-------------------------------------------------------------")
414
+ logger.writeln("You may need to write following matrices in OneDep interface:")
415
+ for idx, op in enumerate(ncsops):
416
+ logger.writeln("")
417
+ logger.writeln("operator {}".format(idx+1))
418
+ mat = op.tr.mat.tolist()
419
+ vec = op.tr.vec.tolist()
420
+ for i in range(3):
421
+ mstr = ["{:10.6f}".format(mat[i][j]) for j in range(3)]
422
+ logger.writeln("{} {:14.5f}".format(" ".join(mstr), vec[i]))
423
+ logger.writeln("-------------------------------------------------------------")
424
+ logger.writeln("")
425
+
426
+ # BIOMT
427
+ st.assemblies.clear()
428
+ st.raw_remarks = []
429
+ a = model.prepare_assembly("1", all_chains, ncsops, is_helical=True)
430
+ st.assemblies.append(a)
431
+
432
+ if not args.output_prfix:
433
+ args.output_prfix = fileio.splitext(os.path.basename(args.model))[0] + "_biomt"
434
+
435
+ fileio.write_model(st, args.output_prfix, pdb=(model_format == ".pdb"), cif=True, cif_ref=cif_ref)
436
+ logger.writeln("")
437
+ logger.writeln("These {}.* files may be used for deposition (once OneDep implemented reading BIOMT from file..)".format(args.output_prfix))
438
+ logger.writeln("")
439
+ # BIOMT expand
440
+ st.transform_to_assembly("1", howtoname)
441
+ args.output_prfix += "_expanded"
442
+ fileio.write_model(st, file_name=args.output_prfix+model_format)
443
+ logger.writeln(" note that this expanded model file is just for visual inspection, *not* for deposition!")
444
+ # helical_biomt()
445
+
446
+ def symexpand(args):
447
+ if args.chains: args.chains = sum(args.chains, [])
448
+ model_format = fileio.check_model_format(args.model)
449
+ if not args.split:
450
+ howtoname = dict(dup=gemmi.HowToNameCopiedChain.Dup,
451
+ short=gemmi.HowToNameCopiedChain.Short,
452
+ number=gemmi.HowToNameCopiedChain.AddNumber)[args.howtoname]
453
+
454
+ st = fileio.read_structure(args.model)
455
+
456
+ if args.chains:
457
+ logger.writeln("Keep {} chains only".format(" ".join(args.chains)))
458
+ chains = set(args.chains)
459
+ for m in st:
460
+ to_del = [c.name for c in m if c.name not in chains]
461
+ for c in to_del: m.remove_chain(c)
462
+
463
+ all_chains = [c.name for c in st[0] if c.name not in st[0]]
464
+
465
+ if not args.output_prfix:
466
+ args.output_prfix = fileio.splitext(os.path.basename(args.model))[0]
467
+
468
+ if len(st.ncs) > 0:
469
+ symmetry.show_ncs_operators_axis_angle(st.ncs)
470
+ non_given = [op for op in st.ncs if not op.given]
471
+ if len(non_given) > 0:
472
+ if args.split:
473
+ for i, op in enumerate(st.ncs):
474
+ if op.given: continue
475
+ st_tmp = st.clone()
476
+ for m in st_tmp: m.transform_pos_and_adp(op.tr)
477
+ output_prfix = args.output_prfix + "_ncs_{:02d}".format(i+1)
478
+ if args.pdb or args.cif:
479
+ fileio.write_model(st_tmp, output_prfix, pdb=args.pdb, cif=args.cif)
480
+ else:
481
+ fileio.write_model(st_tmp, file_name=output_prfix+model_format)
482
+ else:
483
+ st_tmp = st.clone()
484
+ model.expand_ncs(st_tmp, howtoname=howtoname)
485
+ output_prfix = args.output_prfix + "_ncs_expanded"
486
+ if args.pdb or args.cif:
487
+ fileio.write_model(st_tmp, output_prfix, pdb=args.pdb, cif=args.cif)
488
+ else:
489
+ fileio.write_model(st_tmp, file_name=output_prfix+model_format)
490
+ else:
491
+ logger.writeln("All operators are already expanded (marked as given). Exiting.")
492
+ else:
493
+ logger.writeln("No NCS operators found. Exiting.")
494
+
495
+ if len(st.assemblies) > 0: # should we support BIOMT?
496
+ pass
497
+ # symexpand()
498
+
499
+ def h_add(args):
500
+ st = fileio.read_structure(args.model)
501
+ model_format = fileio.check_model_format(args.model)
502
+
503
+ if not args.output:
504
+ tmp = fileio.splitext(os.path.basename(args.model))[0]
505
+ args.output = tmp + "_h" + model_format
506
+ logger.writeln("Output file: {}".format(args.output))
507
+
508
+ args.ligand = sum(args.ligand, []) if args.ligand else []
509
+ monlib = restraints.load_monomer_library(st,
510
+ monomer_dir=args.monlib,
511
+ cif_files=args.ligand)
512
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
513
+ restraints.find_and_fix_links(st, monlib, find_metal_links=False, add_found=False)
514
+ try:
515
+ restraints.add_hydrogens(st, monlib, args.pos)
516
+ except RuntimeError as e:
517
+ raise SystemExit("Error: {}".format(e))
518
+
519
+ fileio.write_model(st, file_name=args.output)
520
+ # h_add()
521
+
522
+ def add_op3(args):
523
+ if args.chains: args.chains = sum(args.chains, [])
524
+ st = fileio.read_structure(args.model)
525
+ model_format = fileio.check_model_format(args.model)
526
+
527
+ if not args.output:
528
+ tmp = fileio.splitext(os.path.basename(args.model))[0]
529
+ args.output = tmp + "_op3" + model_format
530
+ logger.writeln("Output file: {}".format(args.output))
531
+
532
+ args.ligand = sum(args.ligand, []) if args.ligand else []
533
+ monlib = restraints.load_monomer_library(st,
534
+ monomer_dir=args.monlib,
535
+ cif_files=args.ligand)
536
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
537
+
538
+ for chain in st[0]:
539
+ if args.chains and chain.name not in args.chains: continue
540
+ p = chain.get_polymer()
541
+ if not p: continue
542
+ p_type = p.check_polymer_type()
543
+ if p_type not in (gemmi.PolymerType.Dna, gemmi.PolymerType.Rna): continue
544
+ r0 = p[0]
545
+ # TODO: alias
546
+ # TODO: altlocs
547
+ alt = "*"
548
+ if r0.find_atom("OP3", alt): continue
549
+ a_op1 = r0.find_atom("OP1", alt)
550
+ a_op2 = r0.find_atom("OP2", alt)
551
+ a_o5p = r0.find_atom("O5'", alt)
552
+ a_p = r0.find_atom("P", alt)
553
+ if None in (a_op1, a_op2, a_o5p, a_p):
554
+ logger.writeln(f"Error: atoms not found. skipping {chain.name}/{r0}")
555
+ continue
556
+ logger.writeln(f"Adding OP3 to {chain.name}/{r0}")
557
+ a_op3 = r0.add_atom(a_p) # inherit ADP and occupancy
558
+ a_op3.name = "OP3"
559
+ a_op3.element = gemmi.Element("O")
560
+ v1 = a_p.pos - a_op1.pos
561
+ v2 = a_p.pos - a_op2.pos
562
+ v3 = a_p.pos - a_o5p.pos
563
+ v = v1 + v2 + v3
564
+ a_op3.pos = a_p.pos + v / v.length() * 1.517
565
+
566
+ fileio.write_model(st, file_name=args.output)
567
+ # add_op3()
568
+
569
+ def read_map_and_oversample(map_in=None, mtz_in=None, mtz_labs=None, oversample_pixel=None):
570
+ if mtz_in is not None:
571
+ mtz = fileio.read_mmhkl(mtz_in)
572
+ lab_f, lab_phi = mtz_labs.split(",")
573
+ asu = mtz.get_f_phi(lab_f, lab_phi)
574
+ if oversample_pixel is not None:
575
+ d_min = numpy.min(asu.make_d_array())
576
+ sample_rate = d_min / oversample_pixel
577
+ else:
578
+ sample_rate = 3
579
+ gr = asu.transform_f_phi_to_map(sample_rate=sample_rate)
580
+ elif map_in is not None:
581
+ gr = fileio.read_ccp4_map(map_in)[0]
582
+ if oversample_pixel is not None:
583
+ asu = gemmi.transform_map_to_f_phi(gr).prepare_asu_data()
584
+ d_min = numpy.min(asu.make_d_array())
585
+ sample_rate = d_min / oversample_pixel
586
+ gr = asu.transform_f_phi_to_map(sample_rate=sample_rate)
587
+ else:
588
+ raise SystemExit("Invalid input")
589
+
590
+ if oversample_pixel is not None:
591
+ logger.writeln("--oversample_pixel= {} is requested.".format(oversample_pixel))
592
+ logger.writeln(" recalculated grid:")
593
+ logger.writeln(" {:4d} {:4d} {:4d}".format(*gr.shape))
594
+ logger.writeln(" spacings:")
595
+ logger.writeln(" {:.6f} {:.6f} {:.6f}".format(*gr.spacing))
596
+ #maps.write_ccp4_map("{}_oversampled.mrc".format(output_prefix), gr)
597
+
598
+ return gr
599
+ # read_map_and_oversample()
600
+
601
+ def map_peaks(args):
602
+ st = fileio.read_structure(args.model)
603
+ gr = read_map_and_oversample(map_in=args.map, mtz_in=args.mtz, mtz_labs=args.mtz_labels,
604
+ oversample_pixel=args.oversample_pixel)
605
+ gr_sigma = numpy.std(gr)
606
+ if args.abs_level is not None:
607
+ cutoff = args.abs_level
608
+ else:
609
+ cutoff = args.sigma_level * gr_sigma # assuming mean(gr) = 0
610
+
611
+ blobs = gemmi.find_blobs_by_flood_fill(gr, cutoff,
612
+ min_volume=args.min_volume, min_score=0)
613
+ blobs.extend(gemmi.find_blobs_by_flood_fill(gr, cutoff, negate=True,
614
+ min_volume=args.min_volume, min_score=0))
615
+ getpos = dict(peak=lambda x: x.peak_pos,
616
+ centroid=lambda x: x.centroid)[args.blob_pos]
617
+ st_peaks = model.st_from_positions([getpos(b) for b in blobs])
618
+ st_peaks.cell = st.cell
619
+ st_peaks.ncs = st.ncs
620
+ st_peaks.setup_cell_images()
621
+ logger.writeln("{} peaks detected".format(len(blobs)))
622
+ #st_peaks.write_pdb("peaks.pdb")
623
+
624
+ # Filter symmetry related
625
+ ns = gemmi.NeighborSearch(st_peaks[0], st_peaks.cell, 5.).populate()
626
+ cs = gemmi.ContactSearch(1.)
627
+ cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
628
+ results = cs.find_contacts(ns)
629
+ del_idxes = set()
630
+ for r in results:
631
+ if r.partner1.residue.seqid.num not in del_idxes:
632
+ del_idxes.add(r.partner2.residue.seqid.num)
633
+ for i in reversed(sorted(del_idxes)):
634
+ del st_peaks[0][0][i]
635
+ del blobs[i]
636
+ #st_peaks.write_pdb("peaks_asu.pdb")
637
+ logger.writeln("{} peaks after removing symmetry equivalents".format(len(blobs)))
638
+
639
+ # Assign to nearest atom
640
+ ns = gemmi.NeighborSearch(st[0], st.cell, 10.).populate() # blob is rejected if > 10 A. ok?
641
+ peaks = []
642
+ for b in blobs:
643
+ bpos = getpos(b)
644
+ map_val = gr.interpolate_value(bpos)
645
+ if (args.max_volume is not None and b.volume > args.max_volume) or abs(map_val) < cutoff: continue
646
+ x = ns.find_nearest_atom(bpos)
647
+ if x is None: # this should not happen
648
+ logger.writeln("no nearest atom: value={:.2e} volume= {:.2f} pos= {}".format(map_val, b.volume, bpos))
649
+ continue
650
+ chain = st[0][x.chain_idx]
651
+ res = chain[x.residue_idx]
652
+ atom = res[x.atom_idx]
653
+ im = st.cell.find_nearest_image(atom.pos, bpos, gemmi.Asu.Any)
654
+ if st.cell.is_crystal():
655
+ bpos = st.cell.find_nearest_pbc_position(atom.pos, bpos, im.sym_idx)
656
+ elif im.sym_idx > 0:
657
+ bpos = st.cell.orthogonalize(st.cell.images[im.sym_idx - 1].apply(st.cell.fractionalize(bpos)))
658
+ dist = atom.pos.dist(bpos)
659
+ peaks.append((map_val, b.volume, bpos, dist, chain, res, atom))
660
+
661
+ if len(peaks) == 0:
662
+ logger.writeln("No peaks found. Change parameter(s).")
663
+ return
664
+
665
+ # Print and write coot script
666
+ peaks.sort(reverse=True, key=lambda x:(abs(x[0]), x[1]))
667
+ for_coot = []
668
+ for_df = []
669
+ for i, p in enumerate(peaks):
670
+ map_val, volume, mpos, dist, chain, res, atom = p
671
+ mpos_str = "({: 7.2f},{: 7.2f},{: 7.2f})".format(mpos.x, mpos.y, mpos.z)
672
+ atom_name = atom.name + ("." + atom.altloc if atom.altloc != "\0" else "")
673
+ atom_str = "{}/{}/{}".format(chain.name, res.seqid, atom_name)
674
+ if args.abs_level is None:
675
+ map_val /= gr_sigma
676
+ lab_str = "Peak {:4d} value= {: .2e} volume= {:5.1f} pos= {} closest= {:10s} dist= {:.2f}".format(i+1, map_val, volume, mpos_str, atom_str, dist)
677
+ for_coot.append((lab_str, (mpos.x, mpos.y, mpos.z)))
678
+ for_df.append((map_val, volume, mpos.x, mpos.y, mpos.z, chain.name, str(res.seqid), res.name, atom_name, dist))
679
+ df = pandas.DataFrame(for_df, columns=["map_value" if args.abs_level is not None else "sigma_level",
680
+ "volume", "x", "y", "z", "chain", "seqid", "residue", "atom", "dist"])
681
+ logger.writeln(df.to_string())
682
+ with open(args.output_prefix + ".json", "w") as ofs:
683
+ df.to_json(ofs, orient="records", indent=2)
684
+ logger.writeln("saved: {}".format(ofs.name))
685
+ coot_out = args.output_prefix + "_coot.py"
686
+ with open(coot_out, "w") as ofs:
687
+ ofs.write("""\
688
+ from __future__ import absolute_import, division, print_function
689
+ import gtk
690
+ class coot_serval_map_peak_list:
691
+ def __init__(self):
692
+ window = gtk.Window(gtk.WINDOW_TOPLEVEL)
693
+ window.set_title("Map peaks (Servalcat)")
694
+ window.set_default_size(600, 600)
695
+ scrolled_win = gtk.ScrolledWindow()
696
+ scrolled_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_ALWAYS)
697
+ vbox = gtk.VBox(False, 2)
698
+ frame_vbox = gtk.VBox(False, 0)
699
+ frame_vbox.set_border_width(3)
700
+ self.btns = []
701
+ self.data = {}
702
+ self.add_data(frame_vbox)
703
+ scrolled_win.add_with_viewport(frame_vbox)
704
+ vbox.pack_start(scrolled_win, True, True, 0)
705
+ window.add(vbox)
706
+ window.show_all()
707
+ self.toggled(self.btns[0], 0)
708
+
709
+ def toggled(self, btn, i):
710
+ if btn.get_active():
711
+ set_rotation_centre(*self.data[i][1])
712
+ add_status_bar_text(self.data[i][0])
713
+
714
+ def add_data(self, vbox):
715
+ for i, d in enumerate(self.data):
716
+ self.btns.append(gtk.RadioButton(None if i == 0 else self.btns[0], d[0]))
717
+ vbox.pack_start(self.btns[-1], False, False, 0)
718
+ self.btns[-1].connect('toggled', self.toggled, i)
719
+
720
+ gui = coot_serval_map_peak_list()
721
+ """.format(for_coot))
722
+ logger.writeln("\nRun:")
723
+ logger.writeln("coot --script {}".format(coot_out))
724
+ # map_peaks()
725
+
726
+ def h_density_analysis(args):
727
+ #if args.source != "electron":
728
+ # raise SystemExit("Only electron source is supported.")
729
+ model_format = fileio.check_model_format(args.model)
730
+ st = fileio.read_structure(args.model)
731
+ if not st[0].has_hydrogen():
732
+ raise SystemExit("No hydrogen in model.")
733
+
734
+ if args.output_prefix is None:
735
+ args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_hana"
736
+
737
+ gr = read_map_and_oversample(map_in=args.map, mtz_in=args.mtz, mtz_labs=args.mtz_labels,
738
+ oversample_pixel=args.oversample_pixel)
739
+
740
+ if args.abs_level is not None:
741
+ cutoff = args.abs_level
742
+ else:
743
+ cutoff = args.sigma_level * numpy.std(gr) # assuming mean(gr) = 0
744
+
745
+ blobs = gemmi.find_blobs_by_flood_fill(gr, cutoff,
746
+ min_volume=args.min_volume, min_score=0)
747
+ getpos = dict(peak=lambda x: x.peak_pos,
748
+ centroid=lambda x: x.centroid)[args.blob_pos]
749
+
750
+ peaks = [getpos(b).tolist() for b in blobs]
751
+ kdtree = scipy.spatial.cKDTree(peaks)
752
+ found = []
753
+ n_hydr = 0
754
+ h_assigned = [0 for _ in range(len(blobs))]
755
+ st2 = st.clone()
756
+ for ic, chain in enumerate(st[0]):
757
+ for ir, res in enumerate(chain):
758
+ for ia, atom in reversed(list(enumerate(res))):
759
+ if not atom.is_hydrogen(): continue
760
+ n_hydr += 1
761
+ dist, idx = kdtree.query(atom.pos.tolist(), k=1, p=2)
762
+ map_val = gr.interpolate_value(getpos(blobs[idx]))
763
+ if dist < args.max_dist and blobs[idx].volume < args.max_volume and map_val > cutoff:
764
+ found.append((getpos(blobs[idx]), map_val, dist, blobs[idx].volume,
765
+ chain.name, str(res.seqid), res.name,
766
+ atom.name, atom.altloc.replace("\0","")))
767
+ h_assigned[idx] = 1
768
+ else:
769
+ del st2[0][ic][ir][ia]
770
+
771
+ found.sort(key=lambda x: x[1], reverse=True)
772
+ logger.writeln("")
773
+ logger.writeln("Found hydrogen peaks:")
774
+ logger.writeln("dist map vol atom")
775
+ for _, map_val, dist, volume, chain, resi, resn, atom, alt in found:
776
+ logger.writeln("{:.2f} {:.2f} {:.2f} {}/{} {}/{}{}".format(dist, map_val, volume,
777
+ chain, resn, resi,
778
+ atom, "."+alt if alt else ""))
779
+
780
+ logger.writeln("")
781
+ logger.writeln("Result:")
782
+ logger.writeln(" number of hydrogen in the model : {}".format(n_hydr))
783
+ logger.writeln(" number of peaks close to hydrogen: {} ({:.1%})".format(len(found), len(found)/n_hydr))
784
+ logger.writeln("")
785
+
786
+ st_peaks = model.st_from_positions([getpos(b) for b in blobs],
787
+ bs=[gr.interpolate_value(getpos(b)) for b in blobs],
788
+ qs=h_assigned)
789
+ fileio.write_model(st_peaks, file_name="{}_peaks.mmcif".format(args.output_prefix))
790
+ logger.writeln(" this file includes peak positions")
791
+ logger.writeln(" occ=1: hydrogen assigned, occ=0: unassigned.")
792
+ logger.writeln(" B: density value at {}".format(args.blob_pos))
793
+ logger.writeln("")
794
+
795
+ fileio.write_model(st2, file_name="{}_h_with_peak{}".format(args.output_prefix, model_format))
796
+ logger.writeln(" this file is a copy of input model, where hydrogen atoms without peaks are removed.")
797
+ # h_density_analysis()
798
+
799
+ def fix_link(args):
800
+ st = fileio.read_structure(args.model)
801
+ model_format = fileio.check_model_format(args.model)
802
+
803
+ if not args.output:
804
+ tmp = fileio.splitext(os.path.basename(args.model))[0]
805
+ args.output = tmp + "_fixlink" + model_format
806
+ logger.writeln("Output file: {}".format(args.output))
807
+
808
+ args.ligand = sum(args.ligand, []) if args.ligand else []
809
+ monlib = restraints.load_monomer_library(st,
810
+ monomer_dir=args.monlib,
811
+ cif_files=args.ligand)
812
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
813
+ restraints.find_and_fix_links(st, monlib, bond_margin=args.bond_margin,
814
+ metal_margin=args.metal_margin)
815
+ fileio.write_model(st, file_name=args.output)
816
+ # fix_link()
817
+
818
+ def merge_models(args):
819
+ logger.writeln("Reading file 1: {}".format(args.models[0]))
820
+ st = fileio.read_structure(args.models[0])
821
+ logger.writeln(" chains {}".format(" ".join([c.name for c in st[0]])))
822
+
823
+ for i, f in enumerate(args.models[1:]):
824
+ logger.writeln("Reading file {:3d}: {}".format(i+2, f))
825
+ st2 = fileio.read_structure(f)
826
+ for c in st2[0]:
827
+ org_id = c.name
828
+ c2 = st[0].add_chain(c, unique_name=True)
829
+ if c.name != c2.name:
830
+ logger.writeln(" chain {} merged (ID changed to {})".format(c.name, c2.name))
831
+ else:
832
+ logger.writeln(" chain {} merged".format(c.name))
833
+
834
+ fileio.write_model(st, file_name=args.output)
835
+ # merge_models()
836
+
837
+ def merge_dicts(args):
838
+ fileio.merge_ligand_cif(args.cifs, args.output)
839
+ # merge_dicts()
840
+
841
+ def geometry(args):
842
+ if args.ligand: args.ligand = sum(args.ligand, [])
843
+ if not args.output_prefix: args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_geom"
844
+ keywords = []
845
+ if args.keywords or args.keyword_file:
846
+ if args.keywords: keywords = sum(args.keywords, [])
847
+ if args.keyword_file: keywords.extend(l for f in sum(args.keyword_file, []) for l in open(f))
848
+ params = refmac_keywords.parse_keywords(keywords)
849
+ st = fileio.read_structure(args.model)
850
+ if args.ignore_h:
851
+ st.remove_hydrogens()
852
+ try:
853
+ monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
854
+ stop_for_unknowns=True, params=params)
855
+ except RuntimeError as e:
856
+ raise SystemExit("Error: {}".format(e))
857
+
858
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
859
+ restraints.find_and_fix_links(st, monlib)
860
+ try:
861
+ topo, _ = restraints.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.NoChange,
862
+ check_hydrogen=True, params=params)
863
+ except RuntimeError as e:
864
+ raise SystemExit("Error: {}".format(e))
865
+
866
+ refine_params = RefineParams(st, refine_xyz=True)
867
+ if args.selection:
868
+ sel = gemmi.Selection(args.selection)
869
+ geom_w = [0. for _ in range(st[0].count_atom_sites())]
870
+ n = 0
871
+ for chain in sel.chains(st[0]):
872
+ for res in sel.residues(chain):
873
+ for atom in sel.atoms(res):
874
+ geom_w[atom.serial-1] = 1.
875
+ n += 1
876
+ logger.writeln("Using selection '{}': {} atoms out of {}".format(args.selection, n, len(geom_w)))
877
+ refine_params.geom_weights[:] = geom_w
878
+
879
+ geom = Geom(st, topo, monlib, refine_params,
880
+ params=params, use_nucleus=args.nucleus)
881
+ for k in geom.outlier_sigmas: geom.outlier_sigmas[k] = args.sigma
882
+ geom.setup_nonbonded()
883
+ ret = geom.show_model_stats()
884
+
885
+ with open(args.output_prefix + "_summary.json", "w") as ofs:
886
+ ret["summary"].to_json(ofs, indent=2)
887
+ logger.writeln("saved: {}".format(ofs.name))
888
+ with open(args.output_prefix + "_outliers.json", "w") as ofs:
889
+ for k in ret["outliers"]:
890
+ ret["outliers"][k] = ret["outliers"][k].to_dict(orient="records")
891
+ json.dump(ret["outliers"], ofs, indent=2)
892
+ logger.writeln("saved: {}".format(ofs.name))
893
+
894
+ if args.dump_all: # for debug, unfinished
895
+ dd = {"bonds": [], "vdw": []}
896
+ for t in geom.geom.bonds:
897
+ dd["bonds"].append({"atoms":[str(geom.lookup[x]) for x in t.atoms],
898
+ "ideals": [{"ideal":x.value, "sigma":x.sigma} for x in t.values],
899
+ "alpha": t.alpha, "type": t.type,
900
+ "sym_idx": t.sym_idx, "pbc_shift": t.pbc_shift})
901
+ for t in geom.geom.vdws:
902
+ dd["vdw"].append({"atoms":[str(geom.lookup[x]) for x in t.atoms],
903
+ "ideal": t.value, "sigma": t.sigma,
904
+ "type": t.type, "sym_idx": t.sym_idx, "pbc_shift": t.pbc_shift})
905
+ with open(args.output_prefix + "_restraints.json", "w") as ofs:
906
+ json.dump(dd, ofs)
907
+ logger.writeln("saved: {}".format(ofs.name))
908
+ if args.check_skew:
909
+ logger.writeln("\nChecking skewness of bond length deviation")
910
+ # better to ignore hydrogen
911
+ tab = geom.geom.reporting.get_bond_outliers(use_nucleus=geom.use_nucleus, min_z=0)
912
+ for i in range(2):
913
+ tab[f"atom{i+1}"] = [str(geom.lookup[r.atoms[i]]) for r in tab["restr"]]
914
+ del tab["restr"]
915
+ df = pandas.DataFrame(tab)
916
+ df["dev"] = df["value"] - df["ideal"]
917
+ df = df.reindex(df.dev.abs().sort_values(ascending=False).index)
918
+ logger.writeln("Bond length deviations:")
919
+ logger.writeln(df.to_string(max_rows=20))
920
+ q1, q2, q3 = numpy.percentile(df["dev"], [25, 50, 75])
921
+ sk2 = (q1 + q3 - 2 * q2) / (q3 - q1)
922
+ logger.writeln("bond_dev_median= {:.6f}".format(q2))
923
+ logger.writeln("bond_dev_skew= {:.4f}".format(df["dev"].skew()))
924
+ logger.writeln("bond_dev_sk2= {:.4f}".format(sk2))
925
+ with open(args.output_prefix + "_bond_dev.html", "w") as ofs:
926
+ ofs.write("""\
927
+ <html>
928
+ <head>
929
+ <meta charset="utf-8" />
930
+ <script src="https://cdn.plot.ly/plotly-2.20.0.min.js" charset="utf-8"></script>
931
+ </head>
932
+ <body>
933
+ <div id="hist"></div>
934
+ <script>
935
+ var trace = {
936
+ x: %s,
937
+ type: 'histogram'
938
+ };
939
+ var layout = {
940
+ title: "median: %.4f, sk2: %.4f",
941
+ xaxis: {title: "bond distance - ideal"},
942
+ yaxis: {title: "count"},
943
+ shapes: [{
944
+ type: 'line',
945
+ yref: 'paper',
946
+ x0: 0, y0: 0,
947
+ x1: 0, y1: 1}]
948
+ };
949
+ target = document.getElementById('hist');
950
+ Plotly.newPlot(target, [trace], layout);
951
+ </script>
952
+ </body>
953
+ </html>
954
+ """ % (str(list(df.dev)), q2, sk2))
955
+ logger.writeln("check histogram: {}".format(ofs.name))
956
+
957
+ # Note that this modifies st
958
+ if args.per_atom_score_as_b:
959
+ model_format = fileio.check_model_format(args.model)
960
+ peratom = geom.geom.reporting.per_atom_score(len(geom.atoms), geom.use_nucleus, "mean")
961
+ for i, score in enumerate(peratom["total"]):
962
+ geom.atoms[i].b_iso = score
963
+ fileio.write_model(st, file_name="{}_per_atom_score{}".format(args.output_prefix, model_format))
964
+ # geometry()
965
+
966
+ def compare_conf(args):
967
+ def angle_abs_diff(a, b, full=360.):
968
+ # from gemmi/math.hpp
969
+ d = abs(a - b)
970
+ if d > full:
971
+ d -= numpy.floor(d / full) * full
972
+ return min(d, full - d)
973
+ # angle_abs_diff()
974
+
975
+ if args.ligand: args.ligand = sum(args.ligand, [])
976
+ st = None
977
+ for i, f in enumerate(args.models):
978
+ tmp = fileio.read_structure(f)
979
+ if len(args.models) > 1:
980
+ for chain in tmp[0]:
981
+ chain.name = f"{i+1}_{chain.name}"
982
+ if i == 0:
983
+ st = tmp
984
+ else:
985
+ for chain in tmp[0]:
986
+ st[0].add_chain(chain)
987
+ try:
988
+ monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
989
+ stop_for_unknowns=True)
990
+ except RuntimeError as e:
991
+ raise SystemExit(f"Error: {e}")
992
+
993
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
994
+ try:
995
+ topo, _ = restraints.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.NoChange,
996
+ check_hydrogen=False)
997
+ except RuntimeError as e:
998
+ raise SystemExit(f"Error: {e}")
999
+ ncslist = restraints.prepare_ncs_restraints(st)
1000
+ lookup = {x.atom: x for x in st[0].all()}
1001
+ ptypes = {x.name: x.polymer_type for x in st.entities}
1002
+ resn_lookup = {(chain.name, res.seqid): res.name for chain in st[0] for res in chain}
1003
+ confs = {}
1004
+ for t in topo.torsions:
1005
+ cra = lookup[t.atoms[0]]
1006
+ ptype = ptypes[cra.residue.entity_id]
1007
+ is_peptide = ptype in (gemmi.PolymerType.PeptideL, gemmi.PolymerType.PeptideD)
1008
+ is_peptide_tors = t.restr.label.startswith("chi") or t.restr.label in ("omega", "phi", "psi")
1009
+ is_na = ptype in (gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid)
1010
+ is_na_tors = t.restr.label in ("C2e-chi", "alpha", "beta", "gamma", "C2e-nyu0", "epsilon", "zeta")
1011
+ if (is_peptide and is_peptide_tors) or (is_na and is_na_tors):
1012
+ confs.setdefault(cra.chain.name, {}).setdefault(cra.residue.seqid, {})[t.restr.label] = numpy.rad2deg(t.calculate())
1013
+ fulls = {("ARG", "chi5"): 180., ("TYR", "chi2"): 180., ("PHE", "chi2"): 180., ("ASP", "chi2"): 180., ("GLU", "chi3"): 180.}
1014
+ ret = []
1015
+ for_coot = []
1016
+ for ncs in ncslist.ncss:
1017
+ c1, c2 = ncs.chains
1018
+ if args.same_chain and len(args.models) > 1 and c1[c1.index("_"):] != c2[c2.index("_"):]:
1019
+ continue
1020
+ for s1, s2 in ncs.seqids:
1021
+ if c1 in confs and s1 in confs[c1] and c2 in confs and s2 in confs[c2]:
1022
+ conf1, conf2 = confs[c1][s1], confs[c2][s2]
1023
+ resn = resn_lookup[(c1, s1)]
1024
+ for t in conf1:
1025
+ if t in conf2:
1026
+ d = float(angle_abs_diff(conf1[t], conf2[t], fulls.get((resn, t), 360.)))
1027
+ ret.append((c1, s1, c2, s2, resn, t, conf1[t], conf2[t], d))
1028
+ if d > args.min_diff:
1029
+ for_coot.append((c1, s1.num, c2, s2.num, resn, t, d))
1030
+ df = pandas.DataFrame(ret, columns=["chain_1", "seq_1", "chain_2", "seq_2", "resn", "label", "conf_1", "conf_2", "diff"])
1031
+ df.sort_values("diff", ascending=False, inplace=True)
1032
+ logger.writeln(f"\nList of torsion angle differences (>{args.min_diff})")
1033
+ logger.writeln(df[df["diff"] > args.min_diff].to_string(index=False))
1034
+
1035
+ for_coot.sort(key=lambda x:-x[-1])
1036
+ coot_out = args.output_prefix + "_coot.py"
1037
+ with open(coot_out, "w") as ofs:
1038
+ # https://python-gtk-3-tutorial.readthedocs.io/en/latest/treeview.html
1039
+ ofs.write("""\
1040
+ from __future__ import absolute_import, division, print_function
1041
+ import re
1042
+ import gtk
1043
+ class coot_serval_conf_list:
1044
+ def __init__(self):
1045
+ window = gtk.Window(gtk.WINDOW_TOPLEVEL)
1046
+ window.set_title("Different conformations (Servalcat)")
1047
+ window.set_default_size(600, 600)
1048
+ scrolled_win = gtk.ScrolledWindow()
1049
+ scrolled_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_ALWAYS)
1050
+ vbox = gtk.VBox(False, 2)
1051
+ self.liststore = gtk.ListStore(str, int, str, int, str, str, float)
1052
+ self.filter = self.liststore.filter_new()
1053
+ self.treeview = gtk.TreeView(model=self.filter)
1054
+ for i, column_title in enumerate(["chain_1", "seq_1", "chain_2", "seq_2", "resn", "label", "diff"]):
1055
+ renderer = gtk.CellRendererText()
1056
+ column = gtk.TreeViewColumn(column_title, renderer, text=i)
1057
+ self.treeview.append_column(column)
1058
+ self.data = {}
1059
+ self.add_data()
1060
+ scrolled_win.add_with_viewport(self.treeview) # add?
1061
+ vbox.pack_start(scrolled_win, True, True, 0)
1062
+ window.add(vbox)
1063
+ window.show_all()
1064
+ self.treeview.connect("row-activated", self.on_row_activated)
1065
+
1066
+ def on_row_activated(self, treeview, path, column):
1067
+ assert len(path) == 1
1068
+ col_idx = [i for i, c in enumerate(treeview.get_columns()) if column == c][0]
1069
+ row = self.liststore[path[0]]
1070
+ if col_idx < 2:
1071
+ chain, resi = row[0], row[1]
1072
+ elif col_idx < 4:
1073
+ chain, resi = row[2], row[3]
1074
+ else:
1075
+ return
1076
+ if re.search("^[0-9]+_[0-9A-Za-z]", chain):
1077
+ chain = chain[chain.index("_")+1:]
1078
+ imol = active_atom_spec()[1][0]
1079
+ for name in (" CA ", " C1'"):
1080
+ a = get_atom(imol, chain, resi, "", name)
1081
+ if a:
1082
+ set_rotation_center(*a[2])
1083
+ break
1084
+
1085
+ def add_data(self):
1086
+ for i, d in enumerate(self.data):
1087
+ self.liststore.append(d)
1088
+
1089
+ gui = coot_serval_conf_list()
1090
+ """.format(for_coot))
1091
+ logger.writeln("\nRun:")
1092
+ logger.writeln(f"coot --script {coot_out}")
1093
+ # compare_conf()
1094
+
1095
+ def adp_stats(args):
1096
+ if not args.output_prefix: args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_adp"
1097
+ st = fileio.read_structure(args.model)
1098
+ model.adp_analysis(st)
1099
+ b_all = [cra.atom.b_iso for cra in st[0].all() if cra.atom.occ > 0]
1100
+
1101
+ # bin width from Freedman–Diaconis rule
1102
+ qs = numpy.quantile(b_all, [0, 0.25, 0.75, 1])
1103
+ bin_h = 2 * (qs[2] - qs[1]) / len(b_all)**(1/3.)
1104
+
1105
+ # for plotly
1106
+ traces = []
1107
+ traces.append("x: [%s], type: 'histogram', name: 'All', xbins: {size: %f}"
1108
+ % (",".join("%.2f"%x for x in b_all), bin_h))
1109
+ if len(st[0]) > 1:
1110
+ b_chain = {}
1111
+ for c in st[0]:
1112
+ b_chain.setdefault(c.name, []).extend(a.b_iso for r in c for a in r if a.occ > 0)
1113
+ for c in b_chain:
1114
+ bs = ",".join("%.2f" % x for x in b_chain[c])
1115
+ traces.append("x: [%s], type: 'histogram', name: 'Chain %s'" % (bs, c))
1116
+ with open(args.output_prefix + "_hist.html", "w") as ofs:
1117
+ ofs.write("""\
1118
+ <html>
1119
+ <head>
1120
+ <meta charset="utf-8" />
1121
+ <script src="https://cdn.plot.ly/plotly-2.20.0.min.js" charset="utf-8"></script>
1122
+ </head>
1123
+ <body>
1124
+ <div id="hist"></div>
1125
+ <script>
1126
+ """)
1127
+ for i, t in enumerate(traces):
1128
+ ofs.write("var trace%d = {%s};\n" % (i+1, t))
1129
+ ofs.write("""\
1130
+ var layout = {
1131
+ title: "isotropic B histogram",
1132
+ xaxis: {title: "B"},
1133
+ yaxis: {title: "count"},
1134
+ barmode: "stack"
1135
+ };
1136
+ target = document.getElementById('hist');
1137
+ Plotly.newPlot(target, [%s], layout);
1138
+ </script>
1139
+ </body>
1140
+ </html>
1141
+ """ % (",".join("trace%d" % (i+1) for i in range(len(traces)))))
1142
+ logger.writeln("check histogram: {}".format(ofs.name))
1143
+ # adp_stats()
1144
+
1145
+ def show_power(args):
1146
+ maps_in = []
1147
+ if args.map:
1148
+ print(args.map)
1149
+ print(sum(args.map, []))
1150
+ maps_in = [(f,) for f in sum(args.map, [])]
1151
+
1152
+ if args.halfmaps:
1153
+ args.halfmaps = sum(args.halfmaps, [])
1154
+ if len(args.halfmaps)%2 != 0:
1155
+ raise RuntimeError("Number of half maps is not even.")
1156
+ maps_in.extend([(args.halfmaps[2*i],args.halfmaps[2*i+1]) for i in range(len(args.halfmaps)//2)])
1157
+
1158
+ if args.mask:
1159
+ mask = fileio.read_ccp4_map(args.mask)[0]
1160
+ else:
1161
+ mask = None
1162
+
1163
+ hkldata = None
1164
+ labs = []
1165
+ for mapin in maps_in: # TODO rewrite in faster way
1166
+ ms = [fileio.read_ccp4_map(f) for f in mapin]
1167
+ d_min = args.resolution
1168
+ if d_min is None:
1169
+ d_min = maps.nyquist_resolution(ms[0][0])
1170
+ logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(d_min))
1171
+ tmp = maps.mask_and_fft_maps(ms, d_min, mask)
1172
+ labs.append("F{:02d}".format(len(labs)+1))
1173
+ tmp.df.rename(columns=dict(FP=labs[-1]), inplace=True)
1174
+ if hkldata is None:
1175
+ hkldata = tmp
1176
+ else:
1177
+ if hkldata.cell.parameters != tmp.cell.parameters: raise RuntimeError("Different unit cell!")
1178
+ hkldata.merge(tmp.df[["H","K","L",labs[-1]]])
1179
+
1180
+ if not labs:
1181
+ raise SystemExit("No map files given. Exiting.")
1182
+
1183
+ hkldata.setup_relion_binning("stat")
1184
+
1185
+ ofs = open(args.output_prefix+".log", "w")
1186
+ ofs.write("Input:\n")
1187
+ for i in range(len(maps_in)):
1188
+ ofs.write("{} from {}\n".format(labs[i], " ".join(maps_in[i])))
1189
+ ofs.write("\n")
1190
+
1191
+ ofs.write("""$TABLE: Power spectrum :
1192
+ $GRAPHS
1193
+ : log10(Mn(|F|^2)) :A:1,{}:
1194
+ $$
1195
+ 1/resol^2 n d_max d_min {}
1196
+ $$
1197
+ $$
1198
+ """.format(",".join([str(i+5) for i in range(len(labs))]), " ".join(labs)))
1199
+ print(hkldata.df)
1200
+ abssqr = dict((lab, numpy.abs(hkldata.df[lab].to_numpy())**2) for lab in labs)
1201
+ for i_bin, idxes in hkldata.binned("stat"):
1202
+ bin_d_min = hkldata.binned_df["stat"].d_min[i_bin]
1203
+ bin_d_max = hkldata.binned_df["stat"].d_max[i_bin]
1204
+ ofs.write("{:.4f} {:7d} {:7.3f} {:7.3f}".format(1/bin_d_min**2, len(idxes), bin_d_max, bin_d_min,))
1205
+ for lab in labs:
1206
+ pwr = numpy.log10(numpy.average(abssqr[lab][idxes]))
1207
+ ofs.write(" {:.4e}".format(pwr))
1208
+ ofs.write("\n")
1209
+ ofs.write("$$\n")
1210
+ ofs.close()
1211
+ # show_power()
1212
+
1213
+ def fcalc(args):
1214
+ if (args.auto_box_with_padding, args.cell).count(None) == 0:
1215
+ raise SystemExit("Error: you cannot specify both --auto_box_with_padding and --cell")
1216
+
1217
+ if args.ligand: args.ligand = sum(args.ligand, [])
1218
+ if not args.output_prefix: args.output_prefix = "{}_fcalc_{}".format(fileio.splitext(os.path.basename(args.model))[0], args.source)
1219
+
1220
+ st = fileio.read_structure(args.model)
1221
+ ccu = model.CustomCoefUtil()
1222
+ if not args.keep_charges:
1223
+ model.remove_charge([st])
1224
+ if args.source == "custom":
1225
+ ccu.read_from_cif(st, args.model)
1226
+ ccu.show_info()
1227
+ ccu.set_coeffs(st)
1228
+ else:
1229
+ model.check_atomsf([st], args.source)
1230
+ if not args.no_expand_ncs:
1231
+ model.expand_ncs(st)
1232
+
1233
+ if args.cell is not None:
1234
+ st.cell = gemmi.UnitCell(*args.cell)
1235
+ elif args.auto_box_with_padding is not None:
1236
+ st.cell = model.box_from_model(st[0], args.auto_box_with_padding)
1237
+ st.spacegroup_hm = "P 1"
1238
+ logger.writeln("Box size from the model with padding of {}: {}".format(args.auto_box_with_padding, st.cell.parameters))
1239
+
1240
+ if not st.cell.is_crystal():
1241
+ raise SystemExit("ERROR: No unit cell information. Give --cell or --auto_box_with_padding.")
1242
+
1243
+ if args.source=="electron" and st[0].has_hydrogen():
1244
+ monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
1245
+ stop_for_unknowns=False)
1246
+ else:
1247
+ monlib = None
1248
+
1249
+ if args.method == "fft":
1250
+ fc_asu = model.calc_fc_fft(st, args.resolution, cutoff=args.cutoff, rate=args.rate,
1251
+ mott_bethe=args.source=="electron",
1252
+ monlib=monlib, source=args.source)
1253
+ else:
1254
+ fc_asu = model.calc_fc_direct(st, args.resolution, source=args.source,
1255
+ mott_bethe=args.source=="electron", monlib=monlib)
1256
+
1257
+ hkldata = hkl.hkldata_from_asu_data(fc_asu, "FC")
1258
+ if args.as_intensity:
1259
+ hkldata.df["IC"] = numpy.abs(hkldata.df.FC)**2
1260
+ labout = ["IC"]
1261
+ if args.add_dummy_sigma:
1262
+ hkldata.df["SIGIC"] = 1.
1263
+ labout.append("SIGIC")
1264
+ else:
1265
+ labout = ["FC"]
1266
+ if args.add_dummy_sigma:
1267
+ hkldata.df["SIGFC"] = 1.
1268
+ labout.append("SIGFC")
1269
+
1270
+ hkldata.write_mtz(args.output_prefix+".mtz", labout, types=dict(IC="J", SIGIC="Q", SIGFC="Q"))
1271
+ # fcalc()
1272
+
1273
+ def nemap(args):
1274
+ from servalcat.spa import fofc
1275
+
1276
+ if (args.trim or args.trim_mtz) and args.mask is None:
1277
+ raise SystemExit("\nError: You need to give --mask as you requested --trim or --trim_mtz.\n")
1278
+
1279
+ if args.mask:
1280
+ mask = fileio.read_ccp4_map(args.mask)[0]
1281
+ else:
1282
+ mask = None
1283
+
1284
+ halfmaps = fileio.read_halfmaps(args.halfmaps, pixel_size=args.pixel_size)
1285
+ if args.resolution is None:
1286
+ args.resolution = maps.nyquist_resolution(halfmaps[0][0])
1287
+ logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(args.resolution))
1288
+
1289
+ d_min = args.resolution
1290
+ if args.local_fourier_weighting_with > 0:
1291
+ d_min = 1 / (args.local_fourier_weighting_with + 1 / d_min)
1292
+ logger.writeln("adjusting d_min= {:.2f} for local correlation".format(d_min))
1293
+ hkldata = maps.mask_and_fft_maps(halfmaps, d_min, mask)
1294
+
1295
+ if args.local_fourier_weighting_with > 0:
1296
+ asu1 = hkldata.as_asu_data("F_map1")
1297
+ asu2 = hkldata.as_asu_data("F_map2")
1298
+ size = asu1.get_size_for_hkl(sample_rate=3)
1299
+ logger.writeln("using grid {}".format(size))
1300
+ gr1 = asu1.get_f_phi_on_grid(size)
1301
+ gr2 = asu2.get_f_phi_on_grid(size)
1302
+ kernel = ext.hard_sphere_kernel_recgrid(size, asu1.unit_cell, args.local_fourier_weighting_with)
1303
+ cc = maps.local_cc(gr1, gr2, kernel.array.real, method="simple")
1304
+ cc.array[cc.array < 0] = 0 # negative cc cannot be used anyway
1305
+ cc.array[:] = 2 * cc.array.real / (1 + cc.array.real) # to full map cc
1306
+ hkldata.df["cc"] = numpy.real(cc.get_value_by_hkl(hkldata.miller_array()))
1307
+ grf = type(gr1)((gr1.array + gr2.array) / 2, gr1.unit_cell, gr1.spacegroup)
1308
+ var_f = maps.local_var(grf, kernel.array.real, method="simple")
1309
+ hkldata.df["var_f"] = numpy.real(var_f.get_value_by_hkl(hkldata.miller_array()))
1310
+ if args.B is not None:
1311
+ k2_l = numpy.exp(-args.B / hkldata.d_spacings()**2 / 2)
1312
+ hkldata.df.cc = k2_l * hkldata.df.cc / (1 + (k2_l - 1) * hkldata.df.cc)
1313
+ hkldata.df["FWT"] = hkldata.df.FP * numpy.sqrt(hkldata.df.cc / hkldata.df.var_f)
1314
+ hkldata.df["kernel"] = numpy.real(kernel.get_value_by_hkl(hkldata.miller_array()))
1315
+ hkldata.write_mtz(args.output_prefix+"_cc.mtz", ["cc", "kernel"])
1316
+ hkldata = hkldata.copy(d_min=args.resolution)
1317
+ map_labs = ["FWT"]
1318
+ else:
1319
+ hkldata.setup_relion_binning("ml")
1320
+ maps.calc_noise_var_from_halfmaps(hkldata)
1321
+ map_labs = fofc.calc_maps(hkldata, B=args.B, has_halfmaps=True, half1_only=args.half1_only,
1322
+ no_fsc_weights=args.no_fsc_weights, sharpening_b=args.sharpening_b)
1323
+ fofc.write_files(hkldata, map_labs, grid_start=halfmaps[0][1], stats_str=None,
1324
+ mask=mask, output_prefix=args.output_prefix,
1325
+ trim_map=args.trim, trim_mtz=args.trim_mtz)
1326
+ # nemap()
1327
+
1328
+ def blur(args):
1329
+ if args.output_prefix is None:
1330
+ args.output_prefix = fileio.splitext(os.path.basename(args.hklin))[0]
1331
+
1332
+ if fileio.is_mmhkl_file(args.hklin):
1333
+ mtz = fileio.read_mmhkl(args.hklin)
1334
+ hkl.blur_mtz(mtz, args.B)
1335
+ suffix = ("_blur" if args.B > 0 else "_sharpen") + "_{:.2f}.mtz".format(abs(args.B))
1336
+ mtz.write_to_file(args.output_prefix+suffix)
1337
+ logger.writeln("Written: {}".format(args.output_prefix+suffix))
1338
+ else:
1339
+ raise SystemExit("ERROR: Unsupported file type: {}".format(args.hklin))
1340
+ # blur()
1341
+
1342
+ def mask_from_model(args):
1343
+ st = fileio.read_structure(args.model) # TODO option to (or not to) expand NCS
1344
+ if args.selection:
1345
+ gemmi.Selection(args.selection).remove_not_selected(st)
1346
+ gr, grid_start, _ = fileio.read_ccp4_map(args.map, header_only=True)
1347
+ mask = maps.mask_from_model(st, args.radius, soft_edge=args.soft_edge, grid=gr)
1348
+ maps.write_ccp4_map(args.output, mask, grid_start=grid_start)
1349
+ # mask_from_model()
1350
+
1351
+ def applymask(args):
1352
+ if args.output_prefix is None:
1353
+ args.output_prefix = fileio.splitext(os.path.basename(args.map))[0] + "_masked"
1354
+
1355
+ grid, grid_start, _ = fileio.read_ccp4_map(args.map)
1356
+ mask = fileio.read_ccp4_map(args.mask)[0]
1357
+ logger.writeln("Applying mask")
1358
+ logger.writeln(" mask min: {:.3f} max: {:.3f}".format(numpy.min(mask), numpy.max(mask)))
1359
+ grid.array[:] *= mask.array
1360
+
1361
+ if args.normalize:
1362
+ masked = grid.array[mask.array>args.mask_cutoff]
1363
+ masked_mean = numpy.average(masked)
1364
+ masked_std = numpy.std(masked)
1365
+ logger.writeln("Normalizing map values within mask")
1366
+ logger.writeln(" masked volume: {} mean: {:.3e} sd: {:.3e}".format(len(masked), masked_mean, masked_std))
1367
+ grid.array[:] = (grid.array - masked_mean) / masked_std
1368
+
1369
+ maps.write_ccp4_map(args.output_prefix+".mrc", grid,
1370
+ grid_start=grid_start,
1371
+ mask_for_extent=mask.array if args.trim else None,
1372
+ mask_threshold=args.mask_cutoff)
1373
+ # applymask()
1374
+
1375
+ def map2mtz(args):
1376
+ if args.output is None:
1377
+ args.output = fileio.splitext(os.path.basename(args.map))[0] + "_fft.mtz"
1378
+ grid, grid_start, grid_shape = fileio.read_ccp4_map(args.map)
1379
+ if args.resolution is None:
1380
+ args.resolution = maps.nyquist_resolution(grid)
1381
+ logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(args.resolution))
1382
+
1383
+ if grid_start != (0,0,0) or grid.shape != tuple(grid_shape):
1384
+ # If only subregion of whole grid in map, unit cell needs to be re-defined.
1385
+ if grid.shape != tuple(grid_shape):
1386
+ new_abc = [grid.unit_cell.parameters[i] * grid_shape[i] / grid.shape[i] for i in range(3)]
1387
+ cell = gemmi.UnitCell(*new_abc, *grid.unit_cell.parameters[3:])
1388
+ logger.writeln("Changing unit cell to {}".format(cell.parameters))
1389
+ else:
1390
+ cell = grid.unit_cell
1391
+ grid = gemmi.FloatGrid(grid.get_subarray(grid_start, grid_shape),
1392
+ cell, grid.spacegroup)
1393
+
1394
+ f_grid = gemmi.transform_map_to_f_phi(grid)
1395
+ asudata = f_grid.prepare_asu_data(dmin=args.resolution, with_000=True)
1396
+ hkldata = hkl.hkldata_from_asu_data(asudata, "F")
1397
+ if grid_start != (0,0,0):
1398
+ shifts = grid.get_position(*grid_start)
1399
+ hkldata.translate("F", shifts)
1400
+ logger.writeln("Applying phase shift with translation {}".format(shifts.tolist()))
1401
+ hkldata.write_mtz(args.output, ["F"])
1402
+ # map2mtz()
1403
+
1404
+ def sm2mm(args):
1405
+ if args.output_prefix is None:
1406
+ args.output_prefix = os.path.basename(fileio.splitext(args.files[0])[0])
1407
+ st, mtz = fileio.read_small_molecule_files(args.files)
1408
+ if st is not None:
1409
+ fileio.write_model(st, prefix=args.output_prefix, pdb=True, cif=True)
1410
+ if mtz is not None:
1411
+ mtz_out = args.output_prefix + ".mtz"
1412
+ logger.writeln("Writing MTZ file: {}".format(mtz_out))
1413
+ mtz.write_to_file(mtz_out)
1414
+ # sm2mm()
1415
+
1416
+ def mm2ins(args):
1417
+ if args.output is None:
1418
+ args.output = os.path.basename(fileio.splitext(args.model)[0]) + ".ins"
1419
+ st = fileio.read_structure(args.model)
1420
+ sg = st.find_spacegroup()
1421
+ elems = [cra.atom.element.name for cra in st[0].all()]
1422
+ counts = {x:elems.count(x) for x in set(elems)}
1423
+ elems = sorted(counts)
1424
+ cell = st.cell
1425
+ if args.hklin:
1426
+ mtz = fileio.read_mmhkl(args.hklin)
1427
+ cell = mtz.cell
1428
+ wavelength = next((x.wavelength for x in mtz.datasets if x==x and x.wavelength > 0), None)
1429
+ else:
1430
+ mtz = None
1431
+ wavelength = None
1432
+
1433
+ latt = dict(P=1, I=2, R=3, F=4, A=5, B=6, C=7).get(sg.centring_type())
1434
+ if not sg.is_centrosymmetric():
1435
+ latt *= -1
1436
+
1437
+ with open(args.output, "w") as ofs:
1438
+ ofs.write(f"TITL {os.path.basename(args.model)} in {sg.xhm()}\n")
1439
+ ofs.write(f"CELL {wavelength if wavelength else '????'} ")
1440
+ ofs.write(" ".join(str(x) for x in cell.parameters) + "\n")
1441
+ ofs.write("ZERR 1 0 0 0 0 0 0\n")
1442
+ ofs.write(f"LATT {latt}\n")
1443
+ for op in sg.operations().sym_ops[1:]: # the first is identity
1444
+ ofs.write(f"SYMM {op.triplet('X')}\n")
1445
+ ofs.write(f"SFAC {' '.join(elems)}\n")
1446
+ ofs.write(f"UNIT {' '.join(str(int(counts[x])) for x in elems)}\n\n")
1447
+ ofs.write("""\
1448
+ L.S. 10
1449
+ ACTA
1450
+ LIST 6
1451
+ MORE -1\n\n""")
1452
+ for cra in st[0].all():
1453
+ frac = st.cell.fractionalize(cra.atom.pos)
1454
+ u_iso = model.b_to_u * cra.atom.b_iso
1455
+ if cra.atom.is_hydrogen():
1456
+ u_iso = -1.2
1457
+ ofs.write(f"{cra.atom.name} {elems.index(cra.atom.element.name)+1} {frac.x:.6f} {frac.y:.6f} {frac.z:.6f} {10+cra.atom.occ} {u_iso:.5f}\n")
1458
+ logger.writeln(f"Written: {args.output}")
1459
+ # mm2ins()
1460
+
1461
+ def seq(args):
1462
+ wrap_width = 100
1463
+ seqs = []
1464
+ if args.seq:
1465
+ args.seq = sum(args.seq, [])
1466
+ for sf in args.seq:
1467
+ seqs.extend(fileio.read_sequence_file(sf))
1468
+
1469
+ sc = gemmi.AlignmentScoring()
1470
+ sc.match, sc.mismatch, sc.gapo, sc.gape, sc.good_gapo, sc.bad_gapo = args.scoring
1471
+
1472
+ st = fileio.read_structure(args.model) # TODO option to (or not to) expand NCS
1473
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
1474
+ for chain in st[0]:
1475
+ p = chain.get_polymer()
1476
+ if not p: continue
1477
+ p_type = p.check_polymer_type()
1478
+ if p_type in (gemmi.PolymerType.SaccharideD, gemmi.PolymerType.SaccharideL): continue
1479
+ p_seq = gemmi.one_letter_code(p.extract_sequence())
1480
+ results = []
1481
+ for name, seq in seqs:
1482
+ # what if DnaRnaHybrid?
1483
+ kind = {gemmi.PolymerType.Dna: gemmi.ResidueKind.DNA,
1484
+ gemmi.PolymerType.Rna: gemmi.ResidueKind.RNA}.get(p_type, gemmi.ResidueKind.AA)
1485
+ s = [gemmi.expand_one_letter(x, kind) for x in seq]
1486
+ if None in s: continue
1487
+ #als = [gemmi.align_sequence_to_polymer(s, p, p_type, gemmi.AlignmentScoring(x)) for x in ("s", "p")]
1488
+ #results.append([name, max(als, key=lambda x: x.match_count), seq])
1489
+ results.append([name, gemmi.align_sequence_to_polymer(s, p, p_type, sc), seq])
1490
+
1491
+ if results:
1492
+ logger.writeln("Chain: {}".format(chain.name))
1493
+ logger.writeln(" polymer type: {}".format(str(p_type).replace("PolymerType.", "")))
1494
+ name, al, s1 = max(results, key=lambda x: (x[1].match_count, x[1].score))
1495
+ logger.writeln(" match: {}".format(name))
1496
+ logger.writeln(" aligned: {}".format(al.match_count))
1497
+ logger.writeln(" score: {}".format(al.score))
1498
+ p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
1499
+ unkseq = [x.start() for x in re.finditer(r"\-", p1)]
1500
+ mismatches = [x.start() for x in re.finditer(r"\.", al.match_string)]
1501
+ if mismatches or unkseq:
1502
+ idxes = {x.start(): i for i, x in enumerate(re.finditer("[^-]", p2))}
1503
+ seqnums = [str(x.seqid) for x in p]
1504
+ if mismatches:
1505
+ logger.write(" mismatches: ")
1506
+ logger.writeln(", ".join("{}({}>{})".format(seqnums[idxes[i]], p1[i], p2[i]) for i in mismatches))
1507
+ if unkseq:
1508
+ logger.write(" unknown sequence: ")
1509
+ logger.writeln(", ".join("{}({})".format(seqnums[idxes[i]], p2[i]) for i in unkseq))
1510
+
1511
+ logger.writeln("")
1512
+ for i in range(0, len(p1), wrap_width):
1513
+ logger.writeln(" seq. {}".format(p1[i:i+wrap_width]))
1514
+ logger.writeln(" {}".format(al.match_string[i:i+wrap_width]))
1515
+ logger.writeln(" model {}\n".format(p2[i:i+wrap_width]))
1516
+ else:
1517
+ logger.writeln("> Chain: {}".format(chain.name))
1518
+ logger.writeln(gemmi.one_letter_code(p.extract_sequence()))
1519
+ logger.writeln("")
1520
+ # seq()
1521
+
1522
+ def dnarna(args):
1523
+ import scipy.spatial.transform
1524
+ rna_res = {"A":"DA", "G":"DG", "C":"DC", "U":"DT"}
1525
+ dna_res = {"DA":"A", "DG":"G", "DC":"C", "DT":"U"}
1526
+ if args.chains: args.chains = sum(args.chains, [])
1527
+ model_format = fileio.check_model_format(args.model)
1528
+ if not args.output:
1529
+ args.output = fileio.splitext(os.path.basename(args.model))[0] + "_conv" + model_format
1530
+ st = fileio.read_structure(args.model)
1531
+ if st[0].has_hydrogen():
1532
+ logger.writeln("Hydrogen atoms are detected. I cannot take care of them, so I will remove them.")
1533
+ st.remove_hydrogens()
1534
+ for chain in st[0]:
1535
+ if args.chains and chain.name not in args.chains:
1536
+ continue
1537
+ for res in chain:
1538
+ alt = "*" # XXX
1539
+ if res.name in rna_res and args.to_dna:
1540
+ logger.writeln(f"Changing {chain.name}/{res.seqid} {res.name} to DNA")
1541
+ res.name = rna_res[res.name]
1542
+ res.remove_atom("O2'", alt)
1543
+ if res.name == "DT":
1544
+ C4 = res.find_atom("C4", alt)
1545
+ C5 = res.find_atom("C5", alt)
1546
+ C6 = res.find_atom("C6", alt)
1547
+ v1 = C5.pos - C4.pos
1548
+ v2 = C5.pos - C6.pos
1549
+ v = v1 + v2
1550
+ res.add_atom(C5)
1551
+ res[-1].name = "C7"
1552
+ res[-1].pos = C5.pos + v / v.length() * 1.5
1553
+ elif res.name in dna_res and args.to_rna:
1554
+ logger.writeln(f"Changing {chain.name}/{res.seqid} {res.name} to RNA")
1555
+ res.name = dna_res[res.name]
1556
+ C1p = numpy.array(res.find_atom("C1'", alt).pos.tolist())
1557
+ C2p = numpy.array(res.find_atom("C2'", alt).pos.tolist())
1558
+ C3p = numpy.array(res.find_atom("C3'", alt).pos.tolist())
1559
+ rotvec = C2p - C3p
1560
+ rotvec /= numpy.linalg.norm(rotvec)
1561
+ r = scipy.spatial.transform.Rotation.from_rotvec(-rotvec * 120,
1562
+ degrees=True)
1563
+ rotated = r.apply(C1p - C2p)
1564
+ rotated *= 1.411 / numpy.linalg.norm(rotated)
1565
+ res.add_atom(res.find_atom("O3'", alt))
1566
+ res[-1].name = "O2'"
1567
+ res[-1].pos.fromlist(C2p + rotated)
1568
+ if res.name == "U":
1569
+ res.remove_atom("C7", alt)
1570
+ fileio.write_model(st, file_name=args.output)
1571
+ # dnarna()
1572
+
1573
+ def show(args):
1574
+ for filename in args.files:
1575
+ ext = fileio.splitext(filename)[1]
1576
+ if ext in (".mrc", ".ccp4", ".map"):
1577
+ fileio.read_ccp4_map(filename)
1578
+ logger.writeln("\n")
1579
+ # show()
1580
+
1581
+ def json2csv(args):
1582
+ if not args.output_prefix:
1583
+ args.output_prefix = fileio.splitext(os.path.basename(args.json))[0]
1584
+
1585
+ df = pandas.read_json(args.json)
1586
+ df.to_csv(args.output_prefix+".csv", index=False)
1587
+ logger.writeln("Output: {}".format(args.output_prefix+".csv"))
1588
+ # json2csv()
1589
+
1590
+ def main(args):
1591
+ comms = dict(show=show,
1592
+ json2csv=json2csv,
1593
+ symmodel=symmodel,
1594
+ helical_biomt=helical_biomt,
1595
+ expand=symexpand,
1596
+ h_add=h_add,
1597
+ add_op3=add_op3,
1598
+ map_peaks=map_peaks,
1599
+ h_density=h_density_analysis,
1600
+ fix_link=fix_link,
1601
+ merge_models=merge_models,
1602
+ merge_dicts=merge_dicts,
1603
+ geom=geometry,
1604
+ conf=compare_conf,
1605
+ adp=adp_stats,
1606
+ power=show_power,
1607
+ fcalc=fcalc,
1608
+ nemap=nemap,
1609
+ blur=blur,
1610
+ mask_from_model=mask_from_model,
1611
+ applymask=applymask,
1612
+ map2mtz=map2mtz,
1613
+ sm2mm=sm2mm,
1614
+ mm2ins=mm2ins,
1615
+ seq=seq,
1616
+ dnarna=dnarna)
1617
+
1618
+ com = args.subcommand
1619
+ f = comms.get(com)
1620
+ if f:
1621
+ return f(args)
1622
+ else:
1623
+ raise SystemExit("Unknown subcommand: {}".format(com))
1624
+ # main()
1625
+
1626
+ if __name__ == "__main__":
1627
+ import sys
1628
+ args = parse_args(sys.argv[1:])
1629
+ main(args)