servalcat 0.4.99__cp310-cp310-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-310-darwin.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +906 -0
  7. servalcat/refine/refine_geom.py +233 -0
  8. servalcat/refine/refine_spa.py +366 -0
  9. servalcat/refine/refine_xtal.py +281 -0
  10. servalcat/refine/spa.py +144 -0
  11. servalcat/refine/xtal.py +276 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +639 -0
  15. servalcat/refmac/refmac_wrapper.py +395 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +479 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +977 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1547 -0
  27. servalcat/utils/fileio.py +744 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +714 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +782 -0
  33. servalcat/utils/refmac.py +760 -0
  34. servalcat/utils/restraints.py +781 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +258 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1644 -0
  40. servalcat/xtal/twin.py +121 -0
  41. servalcat-0.4.99.dist-info/METADATA +55 -0
  42. servalcat-0.4.99.dist-info/RECORD +45 -0
  43. servalcat-0.4.99.dist-info/WHEEL +5 -0
  44. servalcat-0.4.99.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.99.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,1547 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ from servalcat.utils import logger
10
+ from servalcat.utils import fileio
11
+ from servalcat.utils import symmetry
12
+ from servalcat.utils import model
13
+ from servalcat.utils import hkl
14
+ from servalcat.utils import restraints
15
+ from servalcat.utils import maps
16
+ from servalcat.refmac import refmac_keywords
17
+ from servalcat.refine.refine import Geom
18
+ from servalcat import ext
19
+ import os
20
+ import gemmi
21
+ import numpy
22
+ import scipy.spatial
23
+ import pandas
24
+ import json
25
+ import re
26
+ import argparse
27
+
28
+ def add_arguments(p):
29
+ subparsers = p.add_subparsers(dest="subcommand")
30
+
31
+ # show
32
+ parser = subparsers.add_parser("show", description = 'Show file info supported by the program')
33
+ parser.add_argument('files', nargs='+')
34
+
35
+ # json2csv
36
+ parser = subparsers.add_parser("json2csv", description = 'Convert json to csv for plotting')
37
+ parser.add_argument('json')
38
+ parser.add_argument('-o', '--output_prefix')
39
+
40
+ # symmodel
41
+ parser = subparsers.add_parser("symmodel", description="Add symmetry annotation to model")
42
+ parser.add_argument('--model', required=True)
43
+ group = parser.add_mutually_exclusive_group()
44
+ group.add_argument('--map', help="Take box size from the map")
45
+ group.add_argument('--cell', type=float, nargs=6, metavar=("a", "b", "c", "alpha", "beta", "gamma"),
46
+ help="Box size")
47
+ sym_group = parser.add_argument_group("symmetry")
48
+ symmetry.add_symmetry_args(sym_group, require_pg=True)
49
+ parser.add_argument('--contacting_only', action="store_true", help="Filter out non-contacting NCS")
50
+ parser.add_argument('--chains', nargs="*", action="append", help="Select chains to keep")
51
+ parser.add_argument('--howtoname', choices=["dup", "short", "number"], default="short",
52
+ help="How to decide new chain IDs in expanded model (default: short); "
53
+ "dup: use original chain IDs (with different segment IDs), "
54
+ "short: use unique new IDs, "
55
+ "number: add number to original chain ID")
56
+ parser.add_argument('--biomt', action="store_true", help="Add BIOMT also")
57
+ parser.add_argument('-o', '--output_prfix')
58
+ parser.add_argument('--pdb', action="store_true", help="Write a pdb file")
59
+ parser.add_argument('--cif', action="store_true", help="Write a cif file")
60
+
61
+ # helical_biomt
62
+ parser = subparsers.add_parser("helical_biomt", description="generate BIOMT of helical reconstruction for PDB deposition")
63
+ parser.add_argument('--model', required=True)
64
+ group = parser.add_mutually_exclusive_group()
65
+ group.add_argument('--map', help="Take box size from the map")
66
+ group.add_argument('--cell', type=float, nargs=6, metavar=("a", "b", "c", "alpha", "beta", "gamma"),
67
+ help="Box size")
68
+ sym_group = parser.add_argument_group("symmetry")
69
+ symmetry.add_symmetry_args(sym_group, require_pg=True)
70
+ parser.add_argument('--start', type=int)
71
+ parser.add_argument('--end', type=int)
72
+ parser.add_argument('--howtoname', choices=["dup", "short", "number"], default="short",
73
+ help="How to decide new chain IDs in expanded model (default: short); "
74
+ "dup: use original chain IDs (with different segment IDs), "
75
+ "short: use unique new IDs, "
76
+ "number: add number to original chain ID")
77
+ parser.add_argument('-o', '--output_prfix')
78
+
79
+ # expand
80
+ parser = subparsers.add_parser("expand", description="Expand symmetry")
81
+ parser.add_argument('--model', required=True)
82
+ parser.add_argument('--chains', nargs="*", action="append", help="Select chains to keep")
83
+ group = parser.add_mutually_exclusive_group()
84
+ group.add_argument('--howtoname', choices=["dup", "short", "number"], default="short",
85
+ help="How to decide new chain IDs in expanded model (default: short); "
86
+ "dup: use original chain IDs (with different segment IDs), "
87
+ "short: use unique new IDs, "
88
+ "number: add number to original chain ID")
89
+ group.add_argument("--split", action="store_true", help="split file for each operator")
90
+ parser.add_argument('-o', '--output_prfix')
91
+ parser.add_argument('--pdb', action="store_true", help="Write a pdb file")
92
+ parser.add_argument('--cif', action="store_true", help="Write a cif file")
93
+
94
+ # h_add
95
+ parser = subparsers.add_parser("h_add", description = 'Add hydrogen in riding position')
96
+ parser.add_argument('model')
97
+ parser.add_argument('--ligand', nargs="*", action="append")
98
+ parser.add_argument("--monlib",
99
+ help="Monomer library path. Default: $CLIBD_MON")
100
+ parser.add_argument('-o','--output')
101
+ parser.add_argument("--pos", choices=["elec", "nucl"], default="elec")
102
+
103
+ # add_op3
104
+ parser = subparsers.add_parser("add_op3", description = "Add OP3 atoms to 5' ends")
105
+ parser.add_argument('model')
106
+ parser.add_argument('--ligand', nargs="*", action="append")
107
+ parser.add_argument("--monlib",
108
+ help="Monomer library path. Default: $CLIBD_MON")
109
+ parser.add_argument('-o','--output')
110
+
111
+ # map_peaks
112
+ parser = subparsers.add_parser("map_peaks", description = 'List density peaks and write a coot script')
113
+ parser.add_argument('--model', required=True, help="Model")
114
+ group = parser.add_mutually_exclusive_group(required=True)
115
+ group.add_argument('--map', help="Map file")
116
+ group.add_argument('--mtz', help="MTZ for map file")
117
+ parser.add_argument('--mtz_labels', default="DELFWT,PHDELWT", help='F,PHI labels (default: %(default)s)')
118
+ parser.add_argument('--oversample_pixel', type=float, help='Desired pixel spacing in map (Angstrom)')
119
+ group = parser.add_mutually_exclusive_group(required=True)
120
+ group.add_argument('--sigma_level', type=float, help="Threshold map level in sigma unit")
121
+ group.add_argument('--abs_level', type=float, help="Threshold map level in absolute unit")
122
+ parser.add_argument('--blob_pos', choices=["peak", "centroid"], default="centroid",
123
+ help="default: %(default)s")
124
+ parser.add_argument('--min_volume', type=float, default=0.3, help="minimum blob volume (default: %(default).1f)")
125
+ parser.add_argument('--max_volume', type=float, help="maximum blob volume (default: none)")
126
+ parser.add_argument('-o','--output_prefix', default="peaks")
127
+
128
+ # h_density
129
+ parser = subparsers.add_parser("h_density", description = 'Hydrogen density analysis')
130
+ parser.add_argument('--model', required=True, help="Model with hydrogen atoms")
131
+ group = parser.add_mutually_exclusive_group(required=True)
132
+ group.add_argument('--map', help="Fo-Fc map file")
133
+ group.add_argument('--mtz', help="MTZ for Fo-Fc map file")
134
+ parser.add_argument('--mtz_labels', default="DELFWT,PHDELWT", help='F,PHI labels (default: %(default)s)')
135
+ parser.add_argument('--oversample_pixel', type=float, help='Desired pixel spacing in map (Angstrom)')
136
+ #parser.add_argument("--source", choices=["electron", "xray", "neutron"], default="electron")
137
+ group = parser.add_mutually_exclusive_group(required=True)
138
+ group.add_argument('--sigma_level', type=float, help="Threshold map level in sigma unit")
139
+ group.add_argument('--abs_level', type=float, help="Threshold map level in absolute unit")
140
+ parser.add_argument('--max_dist', type=float, default=0.5, help="max distance between peak and hydrogen position in the model (default: %(default).1f)")
141
+ parser.add_argument('--blob_pos', choices=["peak", "centroid"], default="centroid",
142
+ help="default: %(default)s")
143
+ parser.add_argument('--min_volume', type=float, default=0.3, help="minimum blob volume (default: %(default).1f)")
144
+ parser.add_argument('--max_volume', type=float, default=3, help="maximum blob volume (default: %(default).1f)")
145
+ parser.add_argument('-o','--output_prefix')
146
+
147
+ # fix_link
148
+ parser = subparsers.add_parser("fix_link", description = 'Fix LINKR/_struct_conn records in the model')
149
+ parser.add_argument('model')
150
+ parser.add_argument('--ligand', nargs="*", action="append")
151
+ parser.add_argument("--monlib",
152
+ help="Monomer library path. Default: $CLIBD_MON")
153
+ parser.add_argument('--bond_margin', type=float, default=1.3, help='(default: %(default).1f)')
154
+ parser.add_argument('--metal_margin', type=float, default=1.1, help='(default: %(default).1f)')
155
+ parser.add_argument('-o','--output', help="Default: input_fixlink.{pdb|mmcif}")
156
+
157
+ # merge_models
158
+ parser = subparsers.add_parser("merge_models", description = 'Merge multiple model files')
159
+ parser.add_argument('models', nargs="+")
160
+ parser.add_argument('-o','--output', required=True)
161
+
162
+ # merge_dicts
163
+ parser = subparsers.add_parser("merge_dicts", description = 'Merge restraint dictionary cif files')
164
+ parser.add_argument('cifs', nargs="+")
165
+ parser.add_argument('-o','--output', default="merged.cif", help="Output cif file (default: %(default)s)")
166
+
167
+ # geom
168
+ parser = subparsers.add_parser("geom", description = 'Calculate geometry and show outliers')
169
+ parser.add_argument('model')
170
+ parser.add_argument('--ligand', nargs="*", action="append")
171
+ parser.add_argument("--monlib",
172
+ help="Monomer library path. Default: $CLIBD_MON")
173
+ parser.add_argument('--keywords', nargs='+', action="append",
174
+ help="refmac keyword(s)")
175
+ parser.add_argument('--keyword_file', nargs='+', action="append",
176
+ help="refmac keyword file(s)")
177
+ parser.add_argument('--sigma', type=float, default=5,
178
+ help="sigma cutoff to print outliers (default: %(default).1f)")
179
+ parser.add_argument('--per_atom_score_as_b', action='store_true',
180
+ help="write model file with per-atom score as B values")
181
+ parser.add_argument("--check_skew", action='store_true', help="(experimental) check bond skew to test magnification")
182
+ parser.add_argument('-n', '--nucleus', action="store_true", help="Use nucleus distances (for neutron)")
183
+ parser.add_argument("--ignore_h", action='store_true', help="ignore hydrogen")
184
+ parser.add_argument("--selection", help="evaluate part of the model")
185
+ parser.add_argument('-o', '--output_prefix',
186
+ help="default: taken from input file")
187
+
188
+ # conf
189
+ parser = subparsers.add_parser("conf", description = 'Compare conformations')
190
+ parser.add_argument('models', nargs="+")
191
+ parser.add_argument("--min_diff", type=float, default=60.)
192
+ parser.add_argument('--ligand', nargs="*", action="append")
193
+ parser.add_argument("--monlib",
194
+ help="Monomer library path. Default: $CLIBD_MON")
195
+ parser.add_argument("--same_chain", action='store_true', help="Only between same chains (more than one file)")
196
+ parser.add_argument('-o', '--output_prefix', default="conf",
197
+ help="")
198
+
199
+ # adp
200
+ parser = subparsers.add_parser("adp", description = 'ADP analysis')
201
+ parser.add_argument('model')
202
+ parser.add_argument('-o', '--output_prefix',
203
+ help="default: taken from input file")
204
+
205
+ # power
206
+ parser = subparsers.add_parser("power", description = 'Show power spectrum')
207
+ parser.add_argument("--map", nargs="*", action="append")
208
+ parser.add_argument("--halfmaps", nargs="*", action="append")
209
+ parser.add_argument('--mask', help='Mask file')
210
+ parser.add_argument('-d', '--resolution', type=float)
211
+ parser.add_argument('-o', '--output_prefix', default="power")
212
+
213
+ # fcalc
214
+ parser = subparsers.add_parser("fcalc", description = 'Structure factor from model')
215
+ parser.add_argument('--model', required=True)
216
+ parser.add_argument("--no_expand_ncs", action='store_true', help="Do not expand strict NCS in MTRIX or _struct_ncs_oper")
217
+ parser.add_argument("--method", choices=["fft", "direct"], default="fft")
218
+ parser.add_argument("--source", choices=["electron", "xray", "neutron"], default="electron")
219
+ parser.add_argument('--ligand', nargs="*", action="append")
220
+ parser.add_argument("--monlib",
221
+ help="Monomer library path. Default: $CLIBD_MON")
222
+ parser.add_argument('--cell', type=float, nargs=6, metavar=("a", "b", "c", "alpha", "beta", "gamma"),
223
+ help="Override unit cell")
224
+ parser.add_argument('--auto_box_with_padding', type=float, help="Determine box size from model with specified padding")
225
+ parser.add_argument('--cutoff', type=float, default=1e-5)
226
+ parser.add_argument('--rate', type=float, default=1.5)
227
+ parser.add_argument('--add_dummy_sigma', action='store_true', help="write dummy SIGF")
228
+ parser.add_argument('--as_intensity', action='store_true', help="if you want |F|^2")
229
+ parser.add_argument('--keep_charges', action='store_true',
230
+ help="Use scattering factor for charged atoms. Use it with care.")
231
+ parser.add_argument('-d', '--resolution', type=float, required=True)
232
+ parser.add_argument('-o', '--output_prefix')
233
+
234
+ # nemap
235
+ parser = subparsers.add_parser("nemap", description = 'Normalized expected map calculation from half maps')
236
+ parser.add_argument("--halfmaps", required=True, nargs=2)
237
+ parser.add_argument('--pixel_size', type=float, help='Override pixel size (A)')
238
+ parser.add_argument("--half1_only", action='store_true', help="Only use half 1 for map calculation (use half 2 only for noise estimation)")
239
+ parser.add_argument('-B', type=float, help="local B value")
240
+ parser.add_argument("--no_fsc_weights", action='store_true',
241
+ help="Just for debugging purpose: turn off FSC-based weighting")
242
+ parser.add_argument("--sharpening_b", type=float,
243
+ help="Use B value (negative value for sharpening) instead of standard deviation of the signal")
244
+ parser.add_argument("-d", '--resolution', type=float)
245
+ parser.add_argument('-m', '--mask', help="mask file")
246
+ parser.add_argument('-o', '--output_prefix', default='nemap')
247
+ parser.add_argument("--trim", action='store_true', help="Write trimmed maps")
248
+ parser.add_argument("--trim_mtz", action='store_true', help="Write trimmed mtz")
249
+ parser.add_argument("--local_fourier_weighting_with", type=float, default=0,
250
+ help="Experimental: give kernel size in A^-1 unit to use local Fourier weighting instead of resolution-dependent weights")
251
+
252
+ # blur
253
+ parser = subparsers.add_parser("blur", description = 'Blur data by specified B value')
254
+ parser.add_argument('--hklin', required=True, help="input MTZ file")
255
+ parser.add_argument('-B', type=float, required=True, help="B value for blurring (negative value for sharpening)")
256
+ parser.add_argument('-o', '--output_prefix')
257
+
258
+ # mask_from_model
259
+ parser = subparsers.add_parser("mask_from_model", description = 'Make a mask from model')
260
+ parser.add_argument("--map", required=True, help="For unit cell and pixel size reference")
261
+ parser.add_argument("--model", required=True)
262
+ parser.add_argument("--selection")
263
+ parser.add_argument('--radius', type=float, required=True,
264
+ help='Radius in angstrom')
265
+ parser.add_argument('--soft_edge', type=float, default=0,
266
+ help='Soft edge (default: %(default).1f)')
267
+ parser.add_argument('-o', '--output', default="mask_from_model.mrc")
268
+
269
+ # applymask (and normalize within mask)
270
+ parser = subparsers.add_parser("applymask", description = 'Apply mask and optionally normalize map within mask')
271
+ parser.add_argument("--map", required=True)
272
+ parser.add_argument('--mask', required=True, help='Mask file')
273
+ parser.add_argument("--normalize", action='store_true',
274
+ help="Normalize map values using mean and sd within the mask")
275
+ parser.add_argument("--trim", action='store_true', help="Write trimmed map")
276
+ parser.add_argument('--mask_cutoff', type=float, default=0.5,
277
+ help="cutoff value for normalization and trimming (default: %(default)s)")
278
+ parser.add_argument('-o', '--output_prefix')
279
+
280
+ # map2mtz
281
+ parser = subparsers.add_parser("map2mtz", description = 'FFT map and write an mtz')
282
+ parser.add_argument("--map", required=True)
283
+ parser.add_argument("-d", '--resolution', type=float)
284
+ parser.add_argument('-o', '--output')
285
+
286
+ # sm2mm
287
+ parser = subparsers.add_parser("sm2mm", description = 'Small molecule files (cif/hkl/res/ins) to macromolecules (pdb/mmcif/mtz)')
288
+ parser.add_argument('files', nargs='+', help='Cif/ins/res/hkl files')
289
+ parser.add_argument('-o', '--output_prefix')
290
+
291
+ # seq
292
+ parser = subparsers.add_parser("seq", description = 'Print/align model sequence')
293
+ parser.add_argument("--model", required=True)
294
+ parser.add_argument('--seq', nargs="*", action="append", help="Sequence file(s)")
295
+ parser.add_argument('--scoring', nargs=6, type=int, default=(1, 0, -1, -1, 0, -1),
296
+ metavar=("match", "mismatch", "gapo", "gape", "good_gapo", "bad_gapo"),
297
+ help="scoring function. default: %(default)s")
298
+
299
+ # dnarna
300
+ parser = subparsers.add_parser("dnarna", description = 'DNA to RNA or RNA to DNA model conversion')
301
+ parser.add_argument("model")
302
+ group = parser.add_mutually_exclusive_group(required=True)
303
+ group.add_argument('--to_dna', action='store_true', help="To DNA")
304
+ group.add_argument('--to_rna', action='store_true', help="To RNA")
305
+ parser.add_argument('--chains', nargs="*", action="append", help="Select chains to convert")
306
+ parser.add_argument('-o', '--output')
307
+
308
+ # add_arguments()
309
+
310
+ def parse_args(arg_list):
311
+ parser = argparse.ArgumentParser()
312
+ add_arguments(parser)
313
+ return parser.parse_args(arg_list)
314
+ # parse_args()
315
+
316
+ def symmodel(args):
317
+ if args.chains: args.chains = sum(args.chains, [])
318
+ model_format = fileio.check_model_format(args.model)
319
+
320
+ howtoname = dict(dup=gemmi.HowToNameCopiedChain.Dup,
321
+ short=gemmi.HowToNameCopiedChain.Short,
322
+ number=gemmi.HowToNameCopiedChain.AddNumber)[args.howtoname]
323
+
324
+ if (args.twist, args.rise).count(None) == 1:
325
+ raise SystemExit("ERROR: give both helical parameters --twist and --rise")
326
+
327
+ is_helical = args.twist is not None
328
+ st, cif_ref = fileio.read_structure_from_pdb_and_mmcif(args.model)
329
+ st.spacegroup_hm = "P 1"
330
+ map_and_start = None
331
+ if args.map:
332
+ logger.writeln("Reading cell from map")
333
+ map_and_start = fileio.read_ccp4_map(args.map)
334
+ st.cell = map_and_start[0].unit_cell
335
+ elif args.cell:
336
+ st.cell = gemmi.UnitCell(*args.cell)
337
+ elif not st.cell.is_crystal():
338
+ raise SystemExit("Error: Unit cell parameters look wrong. Please use --map or --cell")
339
+
340
+ if args.chains:
341
+ logger.writeln("Keep {} chains only".format(" ".join(args.chains)))
342
+ chains = set(args.chains)
343
+ for m in st:
344
+ to_del = [c.name for c in m if c.name not in chains]
345
+ for c in to_del: m.remove_chain(c)
346
+ if st[0].count_atom_sites() == 0:
347
+ raise SystemExit("ERROR: no atoms left. Check --chains option.")
348
+
349
+ all_chains = [c.name for c in st[0] if c.name not in st[0]]
350
+
351
+ symmetry.update_ncs_from_args(args, st, map_and_start=map_and_start, filter_contacting=args.contacting_only)
352
+
353
+ if args.biomt:
354
+ st.assemblies.clear()
355
+ st.raw_remarks = []
356
+ a = model.prepare_assembly("1", all_chains, st.ncs, is_helical=is_helical)
357
+ st.assemblies.append(a)
358
+
359
+ if not args.output_prfix:
360
+ args.output_prfix = fileio.splitext(os.path.basename(args.model))[0] + "_asu"
361
+
362
+ if args.pdb or args.cif:
363
+ fileio.write_model(st, args.output_prfix, pdb=args.pdb, cif=args.cif, cif_ref=cif_ref)
364
+ else:
365
+ fileio.write_model(st, file_name=args.output_prfix+model_format, cif_ref=cif_ref)
366
+
367
+ # Sym expand
368
+ model.expand_ncs(st, howtoname=howtoname)
369
+ st.assemblies.clear()
370
+ args.output_prfix += "_expanded"
371
+ if args.pdb or args.cif:
372
+ fileio.write_model(st, args.output_prfix, pdb=args.pdb, cif=args.cif)
373
+ else:
374
+ fileio.write_model(st, file_name=args.output_prfix+model_format)
375
+ # symmodel()
376
+
377
+ def helical_biomt(args):
378
+ if (args.twist, args.rise).count(None) > 0:
379
+ raise SystemExit("ERROR: give helical parameters --twist and --rise")
380
+
381
+ model_format = fileio.check_model_format(args.model)
382
+ howtoname = dict(dup=gemmi.HowToNameCopiedChain.Dup,
383
+ short=gemmi.HowToNameCopiedChain.Short,
384
+ number=gemmi.HowToNameCopiedChain.AddNumber)[args.howtoname]
385
+
386
+ st, cif_ref = fileio.read_structure_from_pdb_and_mmcif(args.model)
387
+ st.spacegroup_hm = "P 1"
388
+ map_and_start = None
389
+ if args.map:
390
+ logger.writeln("Reading cell from map")
391
+ map_and_start = fileio.read_ccp4_map(args.map)
392
+ st.cell = map_and_start[0].unit_cell
393
+ elif args.cell:
394
+ st.cell = gemmi.UnitCell(*args.cell)
395
+ elif not st.cell.is_crystal():
396
+ raise SystemExit("Error: Unit cell parameters look wrong. Please use --map or --cell")
397
+
398
+ all_chains = [c.name for c in st[0] if c.name not in st[0]]
399
+
400
+ ncsops = symmetry.ncsops_from_args(args, st.cell, map_and_start=map_and_start, st=st,
401
+ helical_min_n=args.start, helical_max_n=args.end)
402
+ #ncsops = [x for x in ncsops if not x.tr.is_identity()] # remove identity
403
+
404
+ logger.writeln("")
405
+ logger.writeln("-------------------------------------------------------------")
406
+ logger.writeln("You may need to write following matrices in OneDep interface:")
407
+ for idx, op in enumerate(ncsops):
408
+ logger.writeln("")
409
+ logger.writeln("operator {}".format(idx+1))
410
+ mat = op.tr.mat.tolist()
411
+ vec = op.tr.vec.tolist()
412
+ for i in range(3):
413
+ mstr = ["{:10.6f}".format(mat[i][j]) for j in range(3)]
414
+ logger.writeln("{} {:14.5f}".format(" ".join(mstr), vec[i]))
415
+ logger.writeln("-------------------------------------------------------------")
416
+ logger.writeln("")
417
+
418
+ # BIOMT
419
+ st.assemblies.clear()
420
+ st.raw_remarks = []
421
+ a = model.prepare_assembly("1", all_chains, ncsops, is_helical=True)
422
+ st.assemblies.append(a)
423
+
424
+ if not args.output_prfix:
425
+ args.output_prfix = fileio.splitext(os.path.basename(args.model))[0] + "_biomt"
426
+
427
+ fileio.write_model(st, args.output_prfix, pdb=(model_format == ".pdb"), cif=True, cif_ref=cif_ref)
428
+ logger.writeln("")
429
+ logger.writeln("These {}.* files may be used for deposition (once OneDep implemented reading BIOMT from file..)".format(args.output_prfix))
430
+ logger.writeln("")
431
+ # BIOMT expand
432
+ st.transform_to_assembly("1", howtoname)
433
+ args.output_prfix += "_expanded"
434
+ fileio.write_model(st, file_name=args.output_prfix+model_format)
435
+ logger.writeln(" note that this expanded model file is just for visual inspection, *not* for deposition!")
436
+ # helical_biomt()
437
+
438
+ def symexpand(args):
439
+ if args.chains: args.chains = sum(args.chains, [])
440
+ model_format = fileio.check_model_format(args.model)
441
+ if not args.split:
442
+ howtoname = dict(dup=gemmi.HowToNameCopiedChain.Dup,
443
+ short=gemmi.HowToNameCopiedChain.Short,
444
+ number=gemmi.HowToNameCopiedChain.AddNumber)[args.howtoname]
445
+
446
+ st = fileio.read_structure(args.model)
447
+
448
+ if args.chains:
449
+ logger.writeln("Keep {} chains only".format(" ".join(args.chains)))
450
+ chains = set(args.chains)
451
+ for m in st:
452
+ to_del = [c.name for c in m if c.name not in chains]
453
+ for c in to_del: m.remove_chain(c)
454
+
455
+ all_chains = [c.name for c in st[0] if c.name not in st[0]]
456
+
457
+ if not args.output_prfix:
458
+ args.output_prfix = fileio.splitext(os.path.basename(args.model))[0]
459
+
460
+ if len(st.ncs) > 0:
461
+ symmetry.show_ncs_operators_axis_angle(st.ncs)
462
+ non_given = [op for op in st.ncs if not op.given]
463
+ if len(non_given) > 0:
464
+ if args.split:
465
+ for i, op in enumerate(st.ncs):
466
+ if op.given: continue
467
+ st_tmp = st.clone()
468
+ for m in st_tmp: m.transform_pos_and_adp(op.tr)
469
+ output_prfix = args.output_prfix + "_ncs_{:02d}".format(i+1)
470
+ if args.pdb or args.cif:
471
+ fileio.write_model(st_tmp, output_prfix, pdb=args.pdb, cif=args.cif)
472
+ else:
473
+ fileio.write_model(st_tmp, file_name=output_prfix+model_format)
474
+ else:
475
+ st_tmp = st.clone()
476
+ model.expand_ncs(st_tmp, howtoname=howtoname)
477
+ output_prfix = args.output_prfix + "_ncs_expanded"
478
+ if args.pdb or args.cif:
479
+ fileio.write_model(st_tmp, output_prfix, pdb=args.pdb, cif=args.cif)
480
+ else:
481
+ fileio.write_model(st_tmp, file_name=output_prfix+model_format)
482
+ else:
483
+ logger.writeln("All operators are already expanded (marked as given). Exiting.")
484
+ else:
485
+ logger.writeln("No NCS operators found. Exiting.")
486
+
487
+ if len(st.assemblies) > 0: # should we support BIOMT?
488
+ pass
489
+ # symexpand()
490
+
491
+ def h_add(args):
492
+ st = fileio.read_structure(args.model)
493
+ model_format = fileio.check_model_format(args.model)
494
+
495
+ if not args.output:
496
+ tmp = fileio.splitext(os.path.basename(args.model))[0]
497
+ args.output = tmp + "_h" + model_format
498
+ logger.writeln("Output file: {}".format(args.output))
499
+
500
+ args.ligand = sum(args.ligand, []) if args.ligand else []
501
+ monlib = restraints.load_monomer_library(st,
502
+ monomer_dir=args.monlib,
503
+ cif_files=args.ligand)
504
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
505
+ try:
506
+ restraints.add_hydrogens(st, monlib, args.pos)
507
+ except RuntimeError as e:
508
+ raise SystemExit("Error: {}".format(e))
509
+
510
+ fileio.write_model(st, file_name=args.output)
511
+ # h_add()
512
+
513
+ def add_op3(args):
514
+ st = fileio.read_structure(args.model)
515
+ model_format = fileio.check_model_format(args.model)
516
+
517
+ if not args.output:
518
+ tmp = fileio.splitext(os.path.basename(args.model))[0]
519
+ args.output = tmp + "_op3" + model_format
520
+ logger.writeln("Output file: {}".format(args.output))
521
+
522
+ args.ligand = sum(args.ligand, []) if args.ligand else []
523
+ monlib = restraints.load_monomer_library(st,
524
+ monomer_dir=args.monlib,
525
+ cif_files=args.ligand)
526
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
527
+
528
+ for chain in st[0]:
529
+ p = chain.get_polymer()
530
+ if not p: continue
531
+ p_type = p.check_polymer_type()
532
+ if p_type not in (gemmi.PolymerType.Dna, gemmi.PolymerType.Rna): continue
533
+ r0 = p[0]
534
+ # TODO: alias
535
+ # TODO: altlocs
536
+ alt = "*"
537
+ if r0.find_atom("OP3", alt): continue
538
+ a_op1 = r0.find_atom("OP1", alt)
539
+ a_op2 = r0.find_atom("OP2", alt)
540
+ a_o5p = r0.find_atom("O5'", alt)
541
+ a_p = r0.find_atom("P", alt)
542
+ if None in (a_op1, a_op2, a_o5p, a_p):
543
+ logger.writeln(f"Error: atoms not found. skipping {chain.name}/{r0}")
544
+ continue
545
+ logger.writeln(f"Adding OP3 to {chain.name}/{r0}")
546
+ a_op3 = r0.add_atom(a_p) # inherit ADP and occupancy
547
+ a_op3.name = "OP3"
548
+ a_op3.element = gemmi.Element("O")
549
+ v1 = a_p.pos - a_op1.pos
550
+ v2 = a_p.pos - a_op2.pos
551
+ v3 = a_p.pos - a_o5p.pos
552
+ v = v1 + v2 + v3
553
+ a_op3.pos = a_p.pos + v / v.length() * 1.517
554
+
555
+ fileio.write_model(st, file_name=args.output)
556
+ # add_op3()
557
+
558
+ def read_map_and_oversample(map_in=None, mtz_in=None, mtz_labs=None, oversample_pixel=None):
559
+ if mtz_in is not None:
560
+ mtz = fileio.read_mmhkl(mtz_in)
561
+ lab_f, lab_phi = mtz_labs.split(",")
562
+ asu = mtz.get_f_phi(lab_f, lab_phi)
563
+ if oversample_pixel is not None:
564
+ d_min = numpy.min(asu.make_d_array())
565
+ sample_rate = d_min / oversample_pixel
566
+ else:
567
+ sample_rate = 3
568
+ gr = asu.transform_f_phi_to_map(sample_rate=sample_rate)
569
+ elif map_in is not None:
570
+ gr = fileio.read_ccp4_map(map_in)[0]
571
+ if oversample_pixel is not None:
572
+ asu = gemmi.transform_map_to_f_phi(gr).prepare_asu_data()
573
+ d_min = numpy.min(asu.make_d_array())
574
+ sample_rate = d_min / oversample_pixel
575
+ gr = asu.transform_f_phi_to_map(sample_rate=sample_rate)
576
+ else:
577
+ raise SystemExit("Invalid input")
578
+
579
+ if oversample_pixel is not None:
580
+ logger.writeln("--oversample_pixel= {} is requested.".format(oversample_pixel))
581
+ logger.writeln(" recalculated grid:")
582
+ logger.writeln(" {:4d} {:4d} {:4d}".format(*gr.shape))
583
+ logger.writeln(" spacings:")
584
+ logger.writeln(" {:.6f} {:.6f} {:.6f}".format(*gr.spacing))
585
+ #maps.write_ccp4_map("{}_oversampled.mrc".format(output_prefix), gr)
586
+
587
+ return gr
588
+ # read_map_and_oversample()
589
+
590
+ def map_peaks(args):
591
+ st = fileio.read_structure(args.model)
592
+ gr = read_map_and_oversample(map_in=args.map, mtz_in=args.mtz, mtz_labs=args.mtz_labels,
593
+ oversample_pixel=args.oversample_pixel)
594
+ gr_sigma = numpy.std(gr)
595
+ if args.abs_level is not None:
596
+ cutoff = args.abs_level
597
+ else:
598
+ cutoff = args.sigma_level * gr_sigma # assuming mean(gr) = 0
599
+
600
+ blobs = gemmi.find_blobs_by_flood_fill(gr, cutoff,
601
+ min_volume=args.min_volume, min_score=0)
602
+ blobs.extend(gemmi.find_blobs_by_flood_fill(gr, cutoff, negate=True,
603
+ min_volume=args.min_volume, min_score=0))
604
+ getpos = dict(peak=lambda x: x.peak_pos,
605
+ centroid=lambda x: x.centroid)[args.blob_pos]
606
+ st_peaks = model.st_from_positions([getpos(b) for b in blobs])
607
+ st_peaks.cell = st.cell
608
+ st_peaks.ncs = st.ncs
609
+ st_peaks.setup_cell_images()
610
+ logger.writeln("{} peaks detected".format(len(blobs)))
611
+ #st_peaks.write_pdb("peaks.pdb")
612
+
613
+ # Filter symmetry related
614
+ ns = gemmi.NeighborSearch(st_peaks[0], st_peaks.cell, 5.).populate()
615
+ cs = gemmi.ContactSearch(1.)
616
+ cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
617
+ results = cs.find_contacts(ns)
618
+ del_idxes = set()
619
+ for r in results:
620
+ if r.partner1.residue.seqid.num not in del_idxes:
621
+ del_idxes.add(r.partner2.residue.seqid.num)
622
+ for i in reversed(sorted(del_idxes)):
623
+ del st_peaks[0][0][i]
624
+ del blobs[i]
625
+ #st_peaks.write_pdb("peaks_asu.pdb")
626
+ logger.writeln("{} peaks after removing symmetry equivalents".format(len(blobs)))
627
+
628
+ # Assign to nearest atom
629
+ ns = gemmi.NeighborSearch(st[0], st.cell, 10.).populate() # blob is rejected if > 10 A. ok?
630
+ peaks = []
631
+ for b in blobs:
632
+ bpos = getpos(b)
633
+ map_val = gr.interpolate_value(bpos)
634
+ if (args.max_volume is not None and b.volume > args.max_volume) or abs(map_val) < cutoff: continue
635
+ x = ns.find_nearest_atom(bpos)
636
+ if x is None: # this should not happen
637
+ logger.writeln("no nearest atom: value={:.2e} volume= {:.2f} pos= {}".format(map_val, b.volume, bpos))
638
+ continue
639
+ chain = st[0][x.chain_idx]
640
+ res = chain[x.residue_idx]
641
+ atom = res[x.atom_idx]
642
+ im = st.cell.find_nearest_image(atom.pos, bpos, gemmi.Asu.Any)
643
+ mpos = st.cell.find_nearest_pbc_position(atom.pos, bpos, im.sym_idx)
644
+ dist = atom.pos.dist(mpos)
645
+ peaks.append((map_val, b.volume, mpos, dist, chain, res, atom))
646
+
647
+ if len(peaks) == 0:
648
+ logger.writeln("No peaks found. Change parameter(s).")
649
+ return
650
+
651
+ # Print and write coot script
652
+ peaks.sort(reverse=True, key=lambda x:(abs(x[0]), x[1]))
653
+ for_coot = []
654
+ for_df = []
655
+ for i, p in enumerate(peaks):
656
+ map_val, volume, mpos, dist, chain, res, atom = p
657
+ mpos_str = "({: 7.2f},{: 7.2f},{: 7.2f})".format(mpos.x, mpos.y, mpos.z)
658
+ atom_name = atom.name + ("." + atom.altloc if atom.altloc != "\0" else "")
659
+ atom_str = "{}/{}/{}".format(chain.name, res.seqid, atom_name)
660
+ if args.abs_level is None:
661
+ map_val /= gr_sigma
662
+ lab_str = "Peak {:4d} value= {: .2e} volume= {:5.1f} pos= {} closest= {:10s} dist= {:.2f}".format(i+1, map_val, volume, mpos_str, atom_str, dist)
663
+ for_coot.append((lab_str, (mpos.x, mpos.y, mpos.z)))
664
+ for_df.append((map_val, volume, mpos.x, mpos.y, mpos.z, chain.name, str(res.seqid), atom_name, dist))
665
+ df = pandas.DataFrame(for_df, columns=["map_value" if args.abs_level is not None else "sigma_level",
666
+ "volume", "x", "y", "z", "chain", "residue", "atom", "dist"])
667
+ logger.writeln(df.to_string())
668
+ with open(args.output_prefix + ".json", "w") as ofs:
669
+ df.to_json(ofs, orient="records", indent=2)
670
+ logger.writeln("saved: {}".format(ofs.name))
671
+ coot_out = args.output_prefix + "_coot.py"
672
+ with open(coot_out, "w") as ofs:
673
+ ofs.write("""\
674
+ from __future__ import absolute_import, division, print_function
675
+ import gtk
676
+ class coot_serval_map_peak_list:
677
+ def __init__(self):
678
+ window = gtk.Window(gtk.WINDOW_TOPLEVEL)
679
+ window.set_title("Map peaks (Servalcat)")
680
+ window.set_default_size(600, 600)
681
+ scrolled_win = gtk.ScrolledWindow()
682
+ scrolled_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_ALWAYS)
683
+ vbox = gtk.VBox(False, 2)
684
+ frame_vbox = gtk.VBox(False, 0)
685
+ frame_vbox.set_border_width(3)
686
+ self.btns = []
687
+ self.data = {}
688
+ self.add_data(frame_vbox)
689
+ scrolled_win.add_with_viewport(frame_vbox)
690
+ vbox.pack_start(scrolled_win, True, True, 0)
691
+ window.add(vbox)
692
+ window.show_all()
693
+ self.toggled(self.btns[0], 0)
694
+
695
+ def toggled(self, btn, i):
696
+ if btn.get_active():
697
+ set_rotation_centre(*self.data[i][1])
698
+ add_status_bar_text(self.data[i][0])
699
+
700
+ def add_data(self, vbox):
701
+ for i, d in enumerate(self.data):
702
+ self.btns.append(gtk.RadioButton(None if i == 0 else self.btns[0], d[0]))
703
+ vbox.pack_start(self.btns[-1], False, False, 0)
704
+ self.btns[-1].connect('toggled', self.toggled, i)
705
+
706
+ gui = coot_serval_map_peak_list()
707
+ """.format(for_coot))
708
+ logger.writeln("\nRun:")
709
+ logger.writeln("coot --script {}".format(coot_out))
710
+ # map_peaks()
711
+
712
+ def h_density_analysis(args):
713
+ #if args.source != "electron":
714
+ # raise SystemExit("Only electron source is supported.")
715
+ model_format = fileio.check_model_format(args.model)
716
+ st = fileio.read_structure(args.model)
717
+ if not st[0].has_hydrogen():
718
+ raise SystemExit("No hydrogen in model.")
719
+
720
+ if args.output_prefix is None:
721
+ args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_hana"
722
+
723
+ gr = read_map_and_oversample(map_in=args.map, mtz_in=args.mtz, mtz_labs=args.mtz_labels,
724
+ oversample_pixel=args.oversample_pixel)
725
+
726
+ if args.abs_level is not None:
727
+ cutoff = args.abs_level
728
+ else:
729
+ cutoff = args.sigma_level * numpy.std(gr) # assuming mean(gr) = 0
730
+
731
+ blobs = gemmi.find_blobs_by_flood_fill(gr, cutoff,
732
+ min_volume=args.min_volume, min_score=0)
733
+ getpos = dict(peak=lambda x: x.peak_pos,
734
+ centroid=lambda x: x.centroid)[args.blob_pos]
735
+
736
+ peaks = [getpos(b).tolist() for b in blobs]
737
+ kdtree = scipy.spatial.cKDTree(peaks)
738
+ found = []
739
+ n_hydr = 0
740
+ h_assigned = [0 for _ in range(len(blobs))]
741
+ st2 = st.clone()
742
+ for ic, chain in enumerate(st[0]):
743
+ for ir, res in enumerate(chain):
744
+ for ia, atom in reversed(list(enumerate(res))):
745
+ if not atom.is_hydrogen(): continue
746
+ n_hydr += 1
747
+ dist, idx = kdtree.query(atom.pos.tolist(), k=1, p=2)
748
+ map_val = gr.interpolate_value(getpos(blobs[idx]))
749
+ if dist < args.max_dist and blobs[idx].volume < args.max_volume and map_val > cutoff:
750
+ found.append((getpos(blobs[idx]), map_val, dist, blobs[idx].volume,
751
+ chain.name, str(res.seqid), res.name,
752
+ atom.name, atom.altloc.replace("\0","")))
753
+ h_assigned[idx] = 1
754
+ else:
755
+ del st2[0][ic][ir][ia]
756
+
757
+ found.sort(key=lambda x: x[1], reverse=True)
758
+ logger.writeln("")
759
+ logger.writeln("Found hydrogen peaks:")
760
+ logger.writeln("dist map vol atom")
761
+ for _, map_val, dist, volume, chain, resi, resn, atom, alt in found:
762
+ logger.writeln("{:.2f} {:.2f} {:.2f} {}/{} {}/{}{}".format(dist, map_val, volume,
763
+ chain, resn, resi,
764
+ atom, "."+alt if alt else ""))
765
+
766
+ logger.writeln("")
767
+ logger.writeln("Result:")
768
+ logger.writeln(" number of hydrogen in the model : {}".format(n_hydr))
769
+ logger.writeln(" number of peaks close to hydrogen: {} ({:.1%})".format(len(found), len(found)/n_hydr))
770
+ logger.writeln("")
771
+
772
+ st_peaks = model.st_from_positions([getpos(b) for b in blobs],
773
+ bs=[gr.interpolate_value(getpos(b)) for b in blobs],
774
+ qs=h_assigned)
775
+ fileio.write_model(st_peaks, file_name="{}_peaks.mmcif".format(args.output_prefix))
776
+ logger.writeln(" this file includes peak positions")
777
+ logger.writeln(" occ=1: hydrogen assigned, occ=0: unassigned.")
778
+ logger.writeln(" B: density value at {}".format(args.blob_pos))
779
+ logger.writeln("")
780
+
781
+ fileio.write_model(st2, file_name="{}_h_with_peak{}".format(args.output_prefix, model_format))
782
+ logger.writeln(" this file is a copy of input model, where hydrogen atoms without peaks are removed.")
783
+ # h_density_analysis()
784
+
785
+ def fix_link(args):
786
+ st = fileio.read_structure(args.model)
787
+ model_format = fileio.check_model_format(args.model)
788
+
789
+ if not args.output:
790
+ tmp = fileio.splitext(os.path.basename(args.model))[0]
791
+ args.output = tmp + "_fixlink" + model_format
792
+ logger.writeln("Output file: {}".format(args.output))
793
+
794
+ args.ligand = sum(args.ligand, []) if args.ligand else []
795
+ monlib = restraints.load_monomer_library(st,
796
+ monomer_dir=args.monlib,
797
+ cif_files=args.ligand)
798
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
799
+ restraints.find_and_fix_links(st, monlib, bond_margin=args.bond_margin,
800
+ metal_margin=args.metal_margin)
801
+ fileio.write_model(st, file_name=args.output)
802
+ # fix_link()
803
+
804
+ def merge_models(args):
805
+ logger.writeln("Reading file 1: {}".format(args.models[0]))
806
+ st = fileio.read_structure(args.models[0])
807
+ logger.writeln(" chains {}".format(" ".join([c.name for c in st[0]])))
808
+
809
+ for i, f in enumerate(args.models[1:]):
810
+ logger.writeln("Reading file {:3d}: {}".format(i+2, f))
811
+ st2 = fileio.read_structure(f)
812
+ for c in st2[0]:
813
+ org_id = c.name
814
+ c2 = st[0].add_chain(c, unique_name=True)
815
+ if c.name != c2.name:
816
+ logger.writeln(" chain {} merged (ID changed to {})".format(c.name, c2.name))
817
+ else:
818
+ logger.writeln(" chain {} merged".format(c.name))
819
+
820
+ fileio.write_model(st, file_name=args.output)
821
+ # merge_models()
822
+
823
+ def merge_dicts(args):
824
+ fileio.merge_ligand_cif(args.cifs, args.output)
825
+ # merge_dicts()
826
+
827
+ def geometry(args):
828
+ if args.ligand: args.ligand = sum(args.ligand, [])
829
+ if not args.output_prefix: args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_geom"
830
+ keywords = []
831
+ if args.keywords or args.keyword_file:
832
+ if args.keywords: keywords = sum(args.keywords, [])
833
+ if args.keyword_file: keywords.extend(l for f in sum(args.keyword_file, []) for l in open(f))
834
+ params = refmac_keywords.parse_keywords(keywords)
835
+ st = fileio.read_structure(args.model)
836
+ if args.ignore_h:
837
+ st.remove_hydrogens()
838
+ try:
839
+ monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
840
+ stop_for_unknowns=True, params=params)
841
+ except RuntimeError as e:
842
+ raise SystemExit("Error: {}".format(e))
843
+
844
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
845
+ restraints.find_and_fix_links(st, monlib)
846
+ try:
847
+ topo, _ = restraints.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.NoChange,
848
+ check_hydrogen=True, params=params)
849
+ except RuntimeError as e:
850
+ raise SystemExit("Error: {}".format(e))
851
+
852
+ if args.selection:
853
+ sel = gemmi.Selection(args.selection)
854
+ atom_pos = [-1 for _ in range(st[0].count_atom_sites())]
855
+ n = 0
856
+ for chain in sel.chains(st[0]):
857
+ for res in sel.residues(chain):
858
+ for atom in sel.atoms(res):
859
+ atom_pos[atom.serial-1] = n
860
+ n += 1
861
+ logger.writeln("Using selection '{}': {} atoms out of {}".format(args.selection, n, len(atom_pos)))
862
+ else:
863
+ atom_pos = None
864
+
865
+ geom = Geom(st, topo, monlib, params=params, atom_pos=atom_pos, use_nucleus=args.nucleus)
866
+ for k in geom.outlier_sigmas: geom.outlier_sigmas[k] = args.sigma
867
+ geom.setup_nonbonded(True)
868
+ ret = geom.show_model_stats()
869
+
870
+ with open(args.output_prefix + "_summary.json", "w") as ofs:
871
+ ret["summary"].to_json(ofs, indent=2)
872
+ logger.writeln("saved: {}".format(ofs.name))
873
+ with open(args.output_prefix + "_outliers.json", "w") as ofs:
874
+ for k in ret["outliers"]:
875
+ ret["outliers"][k] = ret["outliers"][k].to_dict(orient="records")
876
+ json.dump(ret["outliers"], ofs, indent=2)
877
+ logger.writeln("saved: {}".format(ofs.name))
878
+
879
+ if args.check_skew:
880
+ logger.writeln("\nChecking skewness of bond length deviation")
881
+ # better to ignore hydrogen
882
+ tab = geom.geom.reporting.get_bond_outliers(use_nucleus=geom.use_nucleus, min_z=0)
883
+ for a in "atom1", "atom2":
884
+ tab[a] = [str(geom.lookup[x]) for x in tab[a]]
885
+ df = pandas.DataFrame(tab)
886
+ df["dev"] = df["value"] - df["ideal"]
887
+ df = df.reindex(df.dev.abs().sort_values(ascending=False).index)
888
+ logger.writeln("Bond length deviations:")
889
+ logger.writeln(df.to_string(max_rows=20))
890
+ q1, q2, q3 = numpy.percentile(df["dev"], [25, 50, 75])
891
+ sk2 = (q1 + q3 - 2 * q2) / (q3 - q1)
892
+ logger.writeln("bond_dev_median= {:.6f}".format(q2))
893
+ logger.writeln("bond_dev_skew= {:.4f}".format(df["dev"].skew()))
894
+ logger.writeln("bond_dev_sk2= {:.4f}".format(sk2))
895
+ with open(args.output_prefix + "_bond_dev.html", "w") as ofs:
896
+ ofs.write("""\
897
+ <html>
898
+ <head>
899
+ <meta charset="utf-8" />
900
+ <script src="https://cdn.plot.ly/plotly-2.20.0.min.js" charset="utf-8"></script>
901
+ </head>
902
+ <body>
903
+ <div id="hist"></div>
904
+ <script>
905
+ var trace = {
906
+ x: %s,
907
+ type: 'histogram'
908
+ };
909
+ var layout = {
910
+ title: "median: %.4f, sk2: %.4f",
911
+ xaxis: {title: "bond distance - ideal"},
912
+ yaxis: {title: "count"},
913
+ shapes: [{
914
+ type: 'line',
915
+ yref: 'paper',
916
+ x0: 0, y0: 0,
917
+ x1: 0, y1: 1}]
918
+ };
919
+ target = document.getElementById('hist');
920
+ Plotly.newPlot(target, [trace], layout);
921
+ </script>
922
+ </body>
923
+ </html>
924
+ """ % (str(list(df.dev)), q2, sk2))
925
+ logger.writeln("check histogram: {}".format(ofs.name))
926
+
927
+ # Note that this modifies st
928
+ if args.per_atom_score_as_b:
929
+ model_format = fileio.check_model_format(args.model)
930
+ peratom = geom.geom.reporting.per_atom_score(len(geom.atoms), geom.use_nucleus, "mean")
931
+ for i, score in enumerate(peratom["total"]):
932
+ geom.atoms[i].b_iso = score
933
+ fileio.write_model(st, file_name="{}_per_atom_score{}".format(args.output_prefix, model_format))
934
+ # geometry()
935
+
936
+ def compare_conf(args):
937
+ def angle_abs_diff(a, b, full=360.):
938
+ # from gemmi/math.hpp
939
+ d = abs(a - b)
940
+ if d > full:
941
+ d -= numpy.floor(d / full) * full
942
+ return min(d, full - d)
943
+ # angle_abs_diff()
944
+
945
+ if args.ligand: args.ligand = sum(args.ligand, [])
946
+ st = None
947
+ for i, f in enumerate(args.models):
948
+ tmp = fileio.read_structure(f)
949
+ if len(args.models) > 1:
950
+ for chain in tmp[0]:
951
+ chain.name = f"{i+1}_{chain.name}"
952
+ if i == 0:
953
+ st = tmp
954
+ else:
955
+ for chain in tmp[0]:
956
+ st[0].add_chain(chain)
957
+ try:
958
+ monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
959
+ stop_for_unknowns=True)
960
+ except RuntimeError as e:
961
+ raise SystemExit(f"Error: {e}")
962
+
963
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
964
+ try:
965
+ topo, _ = restraints.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.NoChange,
966
+ check_hydrogen=False)
967
+ except RuntimeError as e:
968
+ raise SystemExit(f"Error: {e}")
969
+ ncslist = restraints.prepare_ncs_restraints(st)
970
+ lookup = {x.atom: x for x in st[0].all()}
971
+ ptypes = {x.name: x.polymer_type for x in st.entities}
972
+ resn_lookup = {(chain.name, res.seqid): res.name for chain in st[0] for res in chain}
973
+ confs = {}
974
+ for t in topo.torsions:
975
+ cra = lookup[t.atoms[0]]
976
+ ptype = ptypes[cra.residue.entity_id]
977
+ is_peptide = ptype in (gemmi.PolymerType.PeptideL, gemmi.PolymerType.PeptideD)
978
+ is_peptide_tors = t.restr.label.startswith("chi") or t.restr.label in ("omega", "phi", "psi")
979
+ is_na = ptype in (gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid)
980
+ is_na_tors = t.restr.label in ("C2e-chi", "alpha", "beta", "gamma", "C2e-nyu0", "epsilon", "zeta")
981
+ if (is_peptide and is_peptide_tors) or (is_na and is_na_tors):
982
+ confs.setdefault(cra.chain.name, {}).setdefault(cra.residue.seqid, {})[t.restr.label] = numpy.rad2deg(t.calculate())
983
+ fulls = {("ARG", "chi5"): 180., ("TYR", "chi2"): 180., ("PHE", "chi2"): 180., ("ASP", "chi2"): 180., ("GLU", "chi3"): 180.}
984
+ ret = []
985
+ for_coot = []
986
+ for ncs in ncslist.ncss:
987
+ c1, c2 = ncs.chains
988
+ if args.same_chain and len(args.models) > 1 and c1[c1.index("_"):] != c2[c2.index("_"):]:
989
+ continue
990
+ for s1, s2 in ncs.seqids:
991
+ if c1 in confs and s1 in confs[c1] and c2 in confs and s2 in confs[c2]:
992
+ conf1, conf2 = confs[c1][s1], confs[c2][s2]
993
+ resn = resn_lookup[(c1, s1)]
994
+ for t in conf1:
995
+ if t in conf2:
996
+ d = angle_abs_diff(conf1[t], conf2[t], fulls.get((resn, t), 360.))
997
+ ret.append((c1, s1, c2, s2, resn, t, conf1[t], conf2[t], d))
998
+ if d > args.min_diff:
999
+ for_coot.append((c1, s1.num, c2, s2.num, resn, t, d))
1000
+ df = pandas.DataFrame(ret, columns=["chain_1", "seq_1", "chain_2", "seq_2", "resn", "label", "conf_1", "conf_2", "diff"])
1001
+ df.sort_values("diff", ascending=False, inplace=True)
1002
+ logger.writeln(f"\nList of torsion angle differences (>{args.min_diff})")
1003
+ logger.writeln(df[df["diff"] > args.min_diff].to_string(index=False))
1004
+
1005
+ for_coot.sort(key=lambda x:-x[-1])
1006
+ coot_out = args.output_prefix + "_coot.py"
1007
+ with open(coot_out, "w") as ofs:
1008
+ # https://python-gtk-3-tutorial.readthedocs.io/en/latest/treeview.html
1009
+ ofs.write("""\
1010
+ from __future__ import absolute_import, division, print_function
1011
+ import re
1012
+ import gtk
1013
+ class coot_serval_conf_list:
1014
+ def __init__(self):
1015
+ window = gtk.Window(gtk.WINDOW_TOPLEVEL)
1016
+ window.set_title("Different conformations (Servalcat)")
1017
+ window.set_default_size(600, 600)
1018
+ scrolled_win = gtk.ScrolledWindow()
1019
+ scrolled_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_ALWAYS)
1020
+ vbox = gtk.VBox(False, 2)
1021
+ self.liststore = gtk.ListStore(str, int, str, int, str, str, float)
1022
+ self.filter = self.liststore.filter_new()
1023
+ self.treeview = gtk.TreeView(model=self.filter)
1024
+ for i, column_title in enumerate(["chain_1", "seq_1", "chain_2", "seq_2", "resn", "label", "diff"]):
1025
+ renderer = gtk.CellRendererText()
1026
+ column = gtk.TreeViewColumn(column_title, renderer, text=i)
1027
+ self.treeview.append_column(column)
1028
+ self.data = {}
1029
+ self.add_data()
1030
+ scrolled_win.add_with_viewport(self.treeview) # add?
1031
+ vbox.pack_start(scrolled_win, True, True, 0)
1032
+ window.add(vbox)
1033
+ window.show_all()
1034
+ self.treeview.connect("row-activated", self.on_row_activated)
1035
+
1036
+ def on_row_activated(self, treeview, path, column):
1037
+ assert len(path) == 1
1038
+ col_idx = [i for i, c in enumerate(treeview.get_columns()) if column == c][0]
1039
+ row = self.liststore[path[0]]
1040
+ if col_idx < 2:
1041
+ chain, resi = row[0], row[1]
1042
+ elif col_idx < 4:
1043
+ chain, resi = row[2], row[3]
1044
+ else:
1045
+ return
1046
+ if re.search("^[0-9]+_[0-9A-Za-z]", chain):
1047
+ chain = chain[chain.index("_")+1:]
1048
+ imol = active_atom_spec()[1][0]
1049
+ for name in (" CA ", " C1'"):
1050
+ a = get_atom(imol, chain, resi, "", name)
1051
+ if a:
1052
+ set_rotation_center(*a[2])
1053
+ break
1054
+
1055
+ def add_data(self):
1056
+ for i, d in enumerate(self.data):
1057
+ self.liststore.append(d)
1058
+
1059
+ gui = coot_serval_conf_list()
1060
+ """.format(for_coot))
1061
+ logger.writeln("\nRun:")
1062
+ logger.writeln(f"coot --script {coot_out}")
1063
+ # compare_conf()
1064
+
1065
+ def adp_stats(args):
1066
+ if not args.output_prefix: args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_adp"
1067
+ st = fileio.read_structure(args.model)
1068
+ model.adp_analysis(st)
1069
+ b_all = [cra.atom.b_iso for cra in st[0].all() if cra.atom.occ > 0]
1070
+
1071
+ # bin width from Freedman–Diaconis rule
1072
+ qs = numpy.quantile(b_all, [0, 0.25, 0.75, 1])
1073
+ bin_h = 2 * (qs[2] - qs[1]) / len(b_all)**(1/3.)
1074
+
1075
+ # for plotly
1076
+ traces = []
1077
+ traces.append("x: [%s], type: 'histogram', name: 'All', xbins: {size: %f}"
1078
+ % (",".join("%.2f"%x for x in b_all), bin_h))
1079
+ if len(st[0]) > 1:
1080
+ b_chain = {}
1081
+ for c in st[0]:
1082
+ b_chain.setdefault(c.name, []).extend(a.b_iso for r in c for a in r if a.occ > 0)
1083
+ for c in b_chain:
1084
+ bs = ",".join("%.2f" % x for x in b_chain[c])
1085
+ traces.append("x: [%s], type: 'histogram', name: 'Chain %s'" % (bs, c))
1086
+ with open(args.output_prefix + "_hist.html", "w") as ofs:
1087
+ ofs.write("""\
1088
+ <html>
1089
+ <head>
1090
+ <meta charset="utf-8" />
1091
+ <script src="https://cdn.plot.ly/plotly-2.20.0.min.js" charset="utf-8"></script>
1092
+ </head>
1093
+ <body>
1094
+ <div id="hist"></div>
1095
+ <script>
1096
+ """)
1097
+ for i, t in enumerate(traces):
1098
+ ofs.write("var trace%d = {%s};\n" % (i+1, t))
1099
+ ofs.write("""\
1100
+ var layout = {
1101
+ title: "isotropic B histogram",
1102
+ xaxis: {title: "B"},
1103
+ yaxis: {title: "count"},
1104
+ barmode: "stack"
1105
+ };
1106
+ target = document.getElementById('hist');
1107
+ Plotly.newPlot(target, [%s], layout);
1108
+ </script>
1109
+ </body>
1110
+ </html>
1111
+ """ % (",".join("trace%d" % (i+1) for i in range(len(traces)))))
1112
+ logger.writeln("check histogram: {}".format(ofs.name))
1113
+ # adp_stats()
1114
+
1115
+ def show_power(args):
1116
+ maps_in = []
1117
+ if args.map:
1118
+ print(args.map)
1119
+ print(sum(args.map, []))
1120
+ maps_in = [(f,) for f in sum(args.map, [])]
1121
+
1122
+ if args.halfmaps:
1123
+ args.halfmaps = sum(args.halfmaps, [])
1124
+ if len(args.halfmaps)%2 != 0:
1125
+ raise RuntimeError("Number of half maps is not even.")
1126
+ maps_in.extend([(args.halfmaps[2*i],args.halfmaps[2*i+1]) for i in range(len(args.halfmaps)//2)])
1127
+
1128
+ if args.mask:
1129
+ mask = fileio.read_ccp4_map(args.mask)[0]
1130
+ else:
1131
+ mask = None
1132
+
1133
+ hkldata = None
1134
+ labs = []
1135
+ for mapin in maps_in: # TODO rewrite in faster way
1136
+ ms = [fileio.read_ccp4_map(f) for f in mapin]
1137
+ d_min = args.resolution
1138
+ if d_min is None:
1139
+ d_min = maps.nyquist_resolution(ms[0][0])
1140
+ logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(d_min))
1141
+ tmp = maps.mask_and_fft_maps(ms, d_min, mask)
1142
+ labs.append("F{:02d}".format(len(labs)+1))
1143
+ tmp.df.rename(columns=dict(FP=labs[-1]), inplace=True)
1144
+ if hkldata is None:
1145
+ hkldata = tmp
1146
+ else:
1147
+ if hkldata.cell.parameters != tmp.cell.parameters: raise RuntimeError("Different unit cell!")
1148
+ hkldata.merge(tmp.df[["H","K","L",labs[-1]]])
1149
+
1150
+ if not labs:
1151
+ raise SystemExit("No map files given. Exiting.")
1152
+
1153
+ hkldata.setup_relion_binning()
1154
+
1155
+ ofs = open(args.output_prefix+".log", "w")
1156
+ ofs.write("Input:\n")
1157
+ for i in range(len(maps_in)):
1158
+ ofs.write("{} from {}\n".format(labs[i], " ".join(maps_in[i])))
1159
+ ofs.write("\n")
1160
+
1161
+ ofs.write("""$TABLE: Power spectrum :
1162
+ $GRAPHS
1163
+ : log10(Mn(|F|^2)) :A:1,{}:
1164
+ $$
1165
+ 1/resol^2 n d_max d_min {}
1166
+ $$
1167
+ $$
1168
+ """.format(",".join([str(i+5) for i in range(len(labs))]), " ".join(labs)))
1169
+ print(hkldata.df)
1170
+ abssqr = dict((lab, numpy.abs(hkldata.df[lab].to_numpy())**2) for lab in labs)
1171
+ for i_bin, idxes in hkldata.binned():
1172
+ bin_d_min = hkldata.binned_df.d_min[i_bin]
1173
+ bin_d_max = hkldata.binned_df.d_max[i_bin]
1174
+ ofs.write("{:.4f} {:7d} {:7.3f} {:7.3f}".format(1/bin_d_min**2, len(idxes), bin_d_max, bin_d_min,))
1175
+ for lab in labs:
1176
+ pwr = numpy.log10(numpy.average(abssqr[lab][idxes]))
1177
+ ofs.write(" {:.4e}".format(pwr))
1178
+ ofs.write("\n")
1179
+ ofs.write("$$\n")
1180
+ ofs.close()
1181
+ # show_power()
1182
+
1183
+ def fcalc(args):
1184
+ if (args.auto_box_with_padding, args.cell).count(None) == 0:
1185
+ raise SystemExit("Error: you cannot specify both --auto_box_with_padding and --cell")
1186
+
1187
+ if args.ligand: args.ligand = sum(args.ligand, [])
1188
+ if not args.output_prefix: args.output_prefix = "{}_fcalc_{}".format(fileio.splitext(os.path.basename(args.model))[0], args.source)
1189
+
1190
+ st = fileio.read_structure(args.model)
1191
+ if not args.keep_charges:
1192
+ model.remove_charge([st])
1193
+ model.check_atomsf([st], args.source)
1194
+ if not args.no_expand_ncs:
1195
+ model.expand_ncs(st)
1196
+
1197
+ if args.cell is not None:
1198
+ st.cell = gemmi.UnitCell(*args.cell)
1199
+ elif args.auto_box_with_padding is not None:
1200
+ st.cell = model.box_from_model(st[0], args.auto_box_with_padding)
1201
+ st.spacegroup_hm = "P 1"
1202
+ logger.writeln("Box size from the model with padding of {}: {}".format(args.auto_box_with_padding, st.cell.parameters))
1203
+
1204
+ if not st.cell.is_crystal():
1205
+ raise SystemExit("ERROR: No unit cell information. Give --cell or --auto_box_with_padding.")
1206
+
1207
+ if args.source=="electron" and st[0].has_hydrogen():
1208
+ monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
1209
+ stop_for_unknowns=False)
1210
+ else:
1211
+ monlib = None
1212
+
1213
+ if args.method == "fft":
1214
+ fc_asu = model.calc_fc_fft(st, args.resolution, cutoff=args.cutoff, rate=args.rate,
1215
+ mott_bethe=args.source=="electron",
1216
+ monlib=monlib, source=args.source)
1217
+ else:
1218
+ fc_asu = model.calc_fc_direct(st, args.resolution, source=args.source,
1219
+ mott_bethe=args.source=="electron", monlib=monlib)
1220
+
1221
+ hkldata = hkl.hkldata_from_asu_data(fc_asu, "FC")
1222
+ if args.as_intensity:
1223
+ hkldata.df["IC"] = numpy.abs(hkldata.df.FC)**2
1224
+ labout = ["IC"]
1225
+ if args.add_dummy_sigma:
1226
+ hkldata.df["SIGIC"] = 1.
1227
+ labout.append("SIGIC")
1228
+ else:
1229
+ labout = ["FC"]
1230
+ if args.add_dummy_sigma:
1231
+ hkldata.df["SIGFC"] = 1.
1232
+ labout.append("SIGFC")
1233
+
1234
+ hkldata.write_mtz(args.output_prefix+".mtz", labout, types=dict(IC="J", SIGIC="Q", SIGFC="Q"))
1235
+ # fcalc()
1236
+
1237
+ def nemap(args):
1238
+ from servalcat.spa import fofc
1239
+
1240
+ if (args.trim or args.trim_mtz) and args.mask is None:
1241
+ raise SystemExit("\nError: You need to give --mask as you requested --trim or --trim_mtz.\n")
1242
+
1243
+ if args.mask:
1244
+ mask = fileio.read_ccp4_map(args.mask)[0]
1245
+ else:
1246
+ mask = None
1247
+
1248
+ halfmaps = fileio.read_halfmaps(args.halfmaps, pixel_size=args.pixel_size)
1249
+ if args.resolution is None:
1250
+ args.resolution = maps.nyquist_resolution(halfmaps[0][0])
1251
+ logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(args.resolution))
1252
+
1253
+ d_min = args.resolution
1254
+ if args.local_fourier_weighting_with > 0:
1255
+ d_min = 1 / (args.local_fourier_weighting_with + 1 / d_min)
1256
+ logger.writeln("adjusting d_min= {:.2f} for local correlation".format(d_min))
1257
+ hkldata = maps.mask_and_fft_maps(halfmaps, d_min, mask)
1258
+
1259
+ if args.local_fourier_weighting_with > 0:
1260
+ asu1 = hkldata.as_asu_data("F_map1")
1261
+ asu2 = hkldata.as_asu_data("F_map2")
1262
+ size = asu1.get_size_for_hkl(sample_rate=3)
1263
+ logger.writeln("using grid {}".format(size))
1264
+ gr1 = asu1.get_f_phi_on_grid(size)
1265
+ gr2 = asu2.get_f_phi_on_grid(size)
1266
+ kernel = ext.hard_sphere_kernel_recgrid(size, asu1.unit_cell, args.local_fourier_weighting_with)
1267
+ cc = maps.local_cc(gr1, gr2, kernel.array.real, method="simple")
1268
+ cc.array[cc.array < 0] = 0 # negative cc cannot be used anyway
1269
+ cc.array[:] = 2 * cc.array.real / (1 + cc.array.real) # to full map cc
1270
+ hkldata.df["cc"] = numpy.real(cc.get_value_by_hkl(hkldata.miller_array()))
1271
+ grf = type(gr1)((gr1.array + gr2.array) / 2, gr1.unit_cell, gr1.spacegroup)
1272
+ var_f = maps.local_var(grf, kernel.array.real, method="simple")
1273
+ hkldata.df["var_f"] = numpy.real(var_f.get_value_by_hkl(hkldata.miller_array()))
1274
+ if args.B is not None:
1275
+ k2_l = numpy.exp(-args.B / hkldata.d_spacings()**2 / 2)
1276
+ hkldata.df.cc = k2_l * hkldata.df.cc / (1 + (k2_l - 1) * hkldata.df.cc)
1277
+ hkldata.df["FWT"] = hkldata.df.FP * numpy.sqrt(hkldata.df.cc / hkldata.df.var_f)
1278
+ hkldata.df["kernel"] = numpy.real(kernel.get_value_by_hkl(hkldata.miller_array()))
1279
+ hkldata.write_mtz(args.output_prefix+"_cc.mtz", ["cc", "kernel"])
1280
+ hkldata = hkldata.copy(d_min=args.resolution)
1281
+ map_labs = ["FWT"]
1282
+ else:
1283
+ hkldata.setup_relion_binning()
1284
+ maps.calc_noise_var_from_halfmaps(hkldata)
1285
+ map_labs = fofc.calc_maps(hkldata, B=args.B, has_halfmaps=True, half1_only=args.half1_only,
1286
+ no_fsc_weights=args.no_fsc_weights, sharpening_b=args.sharpening_b)
1287
+ fofc.write_files(hkldata, map_labs, grid_start=halfmaps[0][1], stats_str=None,
1288
+ mask=mask, output_prefix=args.output_prefix,
1289
+ trim_map=args.trim, trim_mtz=args.trim_mtz)
1290
+ # nemap()
1291
+
1292
+ def blur(args):
1293
+ if args.output_prefix is None:
1294
+ args.output_prefix = fileio.splitext(os.path.basename(args.hklin))[0]
1295
+
1296
+ if fileio.is_mmhkl_file(args.hklin):
1297
+ mtz = fileio.read_mmhkl(args.hklin)
1298
+ hkl.blur_mtz(mtz, args.B)
1299
+ suffix = ("_blur" if args.B > 0 else "_sharpen") + "_{:.2f}.mtz".format(abs(args.B))
1300
+ mtz.write_to_file(args.output_prefix+suffix)
1301
+ logger.writeln("Written: {}".format(args.output_prefix+suffix))
1302
+ else:
1303
+ raise SystemExit("ERROR: Unsupported file type: {}".format(args.hklin))
1304
+ # blur()
1305
+
1306
+ def mask_from_model(args):
1307
+ st = fileio.read_structure(args.model) # TODO option to (or not to) expand NCS
1308
+ if args.selection:
1309
+ gemmi.Selection(args.selection).remove_not_selected(st)
1310
+ gr, grid_start, _ = fileio.read_ccp4_map(args.map)
1311
+ mask = maps.mask_from_model(st, args.radius, soft_edge=args.soft_edge, grid=gr)
1312
+ maps.write_ccp4_map(args.output, mask, grid_start=grid_start)
1313
+ # mask_from_model()
1314
+
1315
+ def applymask(args):
1316
+ if args.output_prefix is None:
1317
+ args.output_prefix = fileio.splitext(os.path.basename(args.map))[0] + "_masked"
1318
+
1319
+ grid, grid_start, _ = fileio.read_ccp4_map(args.map)
1320
+ mask = fileio.read_ccp4_map(args.mask)[0]
1321
+ logger.writeln("Applying mask")
1322
+ logger.writeln(" mask min: {:.3f} max: {:.3f}".format(numpy.min(mask), numpy.max(mask)))
1323
+ grid.array[:] *= mask.array
1324
+
1325
+ if args.normalize:
1326
+ masked = grid.array[mask.array>args.mask_cutoff]
1327
+ masked_mean = numpy.average(masked)
1328
+ masked_std = numpy.std(masked)
1329
+ logger.writeln("Normalizing map values within mask")
1330
+ logger.writeln(" masked volume: {} mean: {:.3e} sd: {:.3e}".format(len(masked), masked_mean, masked_std))
1331
+ grid.array[:] = (grid.array - masked_mean) / masked_std
1332
+
1333
+ maps.write_ccp4_map(args.output_prefix+".mrc", grid,
1334
+ grid_start=grid_start,
1335
+ mask_for_extent=mask.array if args.trim else None,
1336
+ mask_threshold=args.mask_cutoff)
1337
+ # applymask()
1338
+
1339
+ def map2mtz(args):
1340
+ if args.output is None:
1341
+ args.output = fileio.splitext(os.path.basename(args.map))[0] + "_fft.mtz"
1342
+ grid, grid_start, grid_shape = fileio.read_ccp4_map(args.map)
1343
+ if args.resolution is None:
1344
+ args.resolution = maps.nyquist_resolution(grid)
1345
+ logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(args.resolution))
1346
+
1347
+ if grid_start != (0,0,0) or grid.shape != tuple(grid_shape):
1348
+ # If only subregion of whole grid in map, unit cell needs to be re-defined.
1349
+ if grid.shape != tuple(grid_shape):
1350
+ new_abc = [grid.unit_cell.parameters[i] * grid_shape[i] / grid.shape[i] for i in range(3)]
1351
+ cell = gemmi.UnitCell(*new_abc, *grid.unit_cell.parameters[3:])
1352
+ logger.writeln("Changing unit cell to {}".format(cell.parameters))
1353
+ else:
1354
+ cell = grid.unit_cell
1355
+ grid = gemmi.FloatGrid(grid.get_subarray(grid_start, grid_shape),
1356
+ cell, grid.spacegroup)
1357
+
1358
+ f_grid = gemmi.transform_map_to_f_phi(grid)
1359
+ asudata = f_grid.prepare_asu_data(dmin=args.resolution, with_000=True)
1360
+ hkldata = hkl.hkldata_from_asu_data(asudata, "F")
1361
+ if grid_start != (0,0,0):
1362
+ shifts = grid.get_position(*grid_start)
1363
+ hkldata.translate("F", shifts)
1364
+ logger.writeln("Applying phase shift with translation {}".format(shifts.tolist()))
1365
+ hkldata.write_mtz(args.output, ["F"])
1366
+ # map2mtz()
1367
+
1368
+ def sm2mm(args):
1369
+ if args.output_prefix is None:
1370
+ args.output_prefix = fileio.splitext(args.files[0])[0]
1371
+ st, mtz = fileio.read_small_molecule_files(args.files)
1372
+ if st is not None:
1373
+ fileio.write_model(st, prefix=args.output_prefix, pdb=True, cif=True)
1374
+ if mtz is not None:
1375
+ mtz_out = args.output_prefix + ".mtz"
1376
+ logger.writeln("Writing MTZ file: {}".format(mtz_out))
1377
+ mtz.write_to_file(mtz_out)
1378
+ # sm2mm()
1379
+
1380
+ def seq(args):
1381
+ wrap_width = 100
1382
+ seqs = []
1383
+ if args.seq:
1384
+ args.seq = sum(args.seq, [])
1385
+ for sf in args.seq:
1386
+ seqs.extend(fileio.read_sequence_file(sf))
1387
+
1388
+ sc = gemmi.AlignmentScoring()
1389
+ sc.match, sc.mismatch, sc.gapo, sc.gape, sc.good_gapo, sc.bad_gapo = args.scoring
1390
+
1391
+ st = fileio.read_structure(args.model) # TODO option to (or not to) expand NCS
1392
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
1393
+ for chain in st[0]:
1394
+ p = chain.get_polymer()
1395
+ if not p: continue
1396
+ p_type = p.check_polymer_type()
1397
+ if p_type in (gemmi.PolymerType.SaccharideD, gemmi.PolymerType.SaccharideL): continue
1398
+ p_seq = gemmi.one_letter_code(p.extract_sequence())
1399
+ results = []
1400
+ for name, seq in seqs:
1401
+ # what if DnaRnaHybrid?
1402
+ kind = {gemmi.PolymerType.Dna: gemmi.ResidueKind.DNA,
1403
+ gemmi.PolymerType.Rna: gemmi.ResidueKind.RNA}.get(p_type, gemmi.ResidueKind.AA)
1404
+ s = [gemmi.expand_one_letter(x, kind) for x in seq]
1405
+ if None in s: continue
1406
+ #als = [gemmi.align_sequence_to_polymer(s, p, p_type, gemmi.AlignmentScoring(x)) for x in ("s", "p")]
1407
+ #results.append([name, max(als, key=lambda x: x.match_count), seq])
1408
+ results.append([name, gemmi.align_sequence_to_polymer(s, p, p_type, sc), seq])
1409
+
1410
+ if results:
1411
+ logger.writeln("Chain: {}".format(chain.name))
1412
+ logger.writeln(" polymer type: {}".format(str(p_type).replace("PolymerType.", "")))
1413
+ name, al, s1 = max(results, key=lambda x: (x[1].match_count, x[1].score))
1414
+ logger.writeln(" match: {}".format(name))
1415
+ logger.writeln(" aligned: {}".format(al.match_count))
1416
+ logger.writeln(" score: {}".format(al.score))
1417
+ p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
1418
+ unkseq = [x.start() for x in re.finditer(r"\-", p1)]
1419
+ mismatches = [x.start() for x in re.finditer(r"\.", al.match_string)]
1420
+ if mismatches or unkseq:
1421
+ idxes = {x.start(): i for i, x in enumerate(re.finditer("[^-]", p2))}
1422
+ seqnums = [str(x.seqid) for x in p]
1423
+ if mismatches:
1424
+ logger.write(" mismatches: ")
1425
+ logger.writeln(", ".join("{}({}>{})".format(seqnums[idxes[i]], p1[i], p2[i]) for i in mismatches))
1426
+ if unkseq:
1427
+ logger.write(" unknown sequence: ")
1428
+ logger.writeln(", ".join("{}({})".format(seqnums[idxes[i]], p2[i]) for i in unkseq))
1429
+
1430
+ logger.writeln("")
1431
+ for i in range(0, len(p1), wrap_width):
1432
+ logger.writeln(" seq. {}".format(p1[i:i+wrap_width]))
1433
+ logger.writeln(" {}".format(al.match_string[i:i+wrap_width]))
1434
+ logger.writeln(" model {}\n".format(p2[i:i+wrap_width]))
1435
+ else:
1436
+ logger.writeln("> Chain: {}".format(chain.name))
1437
+ logger.writeln(gemmi.one_letter_code(p.extract_sequence()))
1438
+ logger.writeln("")
1439
+ # seq()
1440
+
1441
+ def dnarna(args):
1442
+ import scipy.spatial.transform
1443
+ rna_res = {"A":"DA", "G":"DG", "C":"DC", "U":"DT"}
1444
+ dna_res = {"DA":"A", "DG":"G", "DC":"C", "DT":"U"}
1445
+ if args.chains: args.chains = sum(args.chains, [])
1446
+ model_format = fileio.check_model_format(args.model)
1447
+ if not args.output:
1448
+ args.output = fileio.splitext(os.path.basename(args.model))[0] + "_conv" + model_format
1449
+ st = fileio.read_structure(args.model)
1450
+ if st[0].has_hydrogen():
1451
+ logger.writeln("Hydrogen atoms are detected. I cannot take care of them, so I will remove them.")
1452
+ st.remove_hydrogens()
1453
+ for chain in st[0]:
1454
+ if args.chains and chain.name not in args.chains:
1455
+ continue
1456
+ for res in chain:
1457
+ alt = "*" # XXX
1458
+ if res.name in rna_res and args.to_dna:
1459
+ logger.writeln(f"Changing {chain.name}/{res.seqid} {res.name} to DNA")
1460
+ res.name = rna_res[res.name]
1461
+ res.remove_atom("O2'", alt)
1462
+ if res.name == "DT":
1463
+ C4 = res.find_atom("C4", alt)
1464
+ C5 = res.find_atom("C5", alt)
1465
+ C6 = res.find_atom("C6", alt)
1466
+ v1 = C5.pos - C4.pos
1467
+ v2 = C5.pos - C6.pos
1468
+ v = v1 + v2
1469
+ res.add_atom(C5)
1470
+ res[-1].name = "C7"
1471
+ res[-1].pos = C5.pos + v / v.length() * 1.5
1472
+ elif res.name in dna_res and args.to_rna:
1473
+ logger.writeln(f"Changing {chain.name}/{res.seqid} {res.name} to RNA")
1474
+ res.name = dna_res[res.name]
1475
+ C1p = numpy.array(res.find_atom("C1'", alt).pos.tolist())
1476
+ C2p = numpy.array(res.find_atom("C2'", alt).pos.tolist())
1477
+ C3p = numpy.array(res.find_atom("C3'", alt).pos.tolist())
1478
+ rotvec = C2p - C3p
1479
+ rotvec /= numpy.linalg.norm(rotvec)
1480
+ r = scipy.spatial.transform.Rotation.from_rotvec(-rotvec * 120,
1481
+ degrees=True)
1482
+ rotated = r.apply(C1p - C2p)
1483
+ rotated *= 1.411 / numpy.linalg.norm(rotated)
1484
+ res.add_atom(res.find_atom("O3'", alt))
1485
+ res[-1].name = "O2'"
1486
+ res[-1].pos.fromlist(C2p + rotated)
1487
+ if res.name == "U":
1488
+ res.remove_atom("C7", alt)
1489
+ fileio.write_model(st, file_name=args.output)
1490
+ # dnarna()
1491
+
1492
+ def show(args):
1493
+ for filename in args.files:
1494
+ ext = fileio.splitext(filename)[1]
1495
+ if ext in (".mrc", ".ccp4", ".map"):
1496
+ fileio.read_ccp4_map(filename)
1497
+ logger.writeln("\n")
1498
+ # show()
1499
+
1500
+ def json2csv(args):
1501
+ if not args.output_prefix:
1502
+ args.output_prefix = fileio.splitext(os.path.basename(args.json))[0]
1503
+
1504
+ df = pandas.read_json(args.json)
1505
+ df.to_csv(args.output_prefix+".csv", index=False)
1506
+ logger.writeln("Output: {}".format(args.output_prefix+".csv"))
1507
+ # json2csv()
1508
+
1509
+ def main(args):
1510
+ comms = dict(show=show,
1511
+ json2csv=json2csv,
1512
+ symmodel=symmodel,
1513
+ helical_biomt=helical_biomt,
1514
+ expand=symexpand,
1515
+ h_add=h_add,
1516
+ add_op3=add_op3,
1517
+ map_peaks=map_peaks,
1518
+ h_density=h_density_analysis,
1519
+ fix_link=fix_link,
1520
+ merge_models=merge_models,
1521
+ merge_dicts=merge_dicts,
1522
+ geom=geometry,
1523
+ conf=compare_conf,
1524
+ adp=adp_stats,
1525
+ power=show_power,
1526
+ fcalc=fcalc,
1527
+ nemap=nemap,
1528
+ blur=blur,
1529
+ mask_from_model=mask_from_model,
1530
+ applymask=applymask,
1531
+ map2mtz=map2mtz,
1532
+ sm2mm=sm2mm,
1533
+ seq=seq,
1534
+ dnarna=dnarna)
1535
+
1536
+ com = args.subcommand
1537
+ f = comms.get(com)
1538
+ if f:
1539
+ return f(args)
1540
+ else:
1541
+ raise SystemExit("Unknown subcommand: {}".format(com))
1542
+ # main()
1543
+
1544
+ if __name__ == "__main__":
1545
+ import sys
1546
+ args = parse_args(sys.argv[1:])
1547
+ main(args)