servalcat 0.4.60__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (44) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-312-darwin.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +733 -0
  7. servalcat/refine/refine_geom.py +207 -0
  8. servalcat/refine/refine_spa.py +327 -0
  9. servalcat/refine/refine_xtal.py +242 -0
  10. servalcat/refine/spa.py +132 -0
  11. servalcat/refine/xtal.py +227 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +536 -0
  15. servalcat/refmac/refmac_wrapper.py +360 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +462 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +961 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1277 -0
  27. servalcat/utils/fileio.py +745 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +699 -0
  30. servalcat/utils/logger.py +116 -0
  31. servalcat/utils/maps.py +340 -0
  32. servalcat/utils/model.py +774 -0
  33. servalcat/utils/refmac.py +747 -0
  34. servalcat/utils/restraints.py +605 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +250 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1403 -0
  40. servalcat-0.4.60.dist-info/METADATA +56 -0
  41. servalcat-0.4.60.dist-info/RECORD +44 -0
  42. servalcat-0.4.60.dist-info/WHEEL +5 -0
  43. servalcat-0.4.60.dist-info/entry_points.txt +4 -0
  44. servalcat-0.4.60.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,1277 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ from servalcat.utils import logger
10
+ from servalcat.utils import fileio
11
+ from servalcat.utils import symmetry
12
+ from servalcat.utils import model
13
+ from servalcat.utils import hkl
14
+ from servalcat.utils import restraints
15
+ from servalcat.utils import maps
16
+ from servalcat.refine.refine import Geom
17
+ from servalcat import ext
18
+ import os
19
+ import gemmi
20
+ import numpy
21
+ import scipy.spatial
22
+ import pandas
23
+ import json
24
+ import re
25
+ import argparse
26
+
27
+ def add_arguments(p):
28
+ subparsers = p.add_subparsers(dest="subcommand")
29
+
30
+ # show
31
+ parser = subparsers.add_parser("show", description = 'Show file info supported by the program')
32
+ parser.add_argument('files', nargs='+')
33
+
34
+ # json2csv
35
+ parser = subparsers.add_parser("json2csv", description = 'Convert json to csv for plotting')
36
+ parser.add_argument('json')
37
+ parser.add_argument('-o', '--output_prefix')
38
+
39
+ # symmodel
40
+ parser = subparsers.add_parser("symmodel", description="Add symmetry annotation to model")
41
+ parser.add_argument('--model', required=True)
42
+ group = parser.add_mutually_exclusive_group()
43
+ group.add_argument('--map', help="Take box size from the map")
44
+ group.add_argument('--cell', type=float, nargs=6, metavar=("a", "b", "c", "alpha", "beta", "gamma"),
45
+ help="Box size")
46
+ sym_group = parser.add_argument_group("symmetry")
47
+ symmetry.add_symmetry_args(sym_group, require_pg=True)
48
+ parser.add_argument('--contacting_only', action="store_true", help="Filter out non-contacting NCS")
49
+ parser.add_argument('--chains', nargs="*", action="append", help="Select chains to keep")
50
+ parser.add_argument('--howtoname', choices=["dup", "short", "number"], default="short",
51
+ help="How to decide new chain IDs in expanded model (default: short); "
52
+ "dup: use original chain IDs (with different segment IDs), "
53
+ "short: use unique new IDs, "
54
+ "number: add number to original chain ID")
55
+ parser.add_argument('--biomt', action="store_true", help="Add BIOMT also")
56
+ parser.add_argument('-o', '--output_prfix')
57
+ parser.add_argument('--pdb', action="store_true", help="Write a pdb file")
58
+ parser.add_argument('--cif', action="store_true", help="Write a cif file")
59
+
60
+ # helical_biomt
61
+ parser = subparsers.add_parser("helical_biomt", description="generate BIOMT of helical reconstruction for PDB deposition")
62
+ parser.add_argument('--model', required=True)
63
+ group = parser.add_mutually_exclusive_group()
64
+ group.add_argument('--map', help="Take box size from the map")
65
+ group.add_argument('--cell', type=float, nargs=6, metavar=("a", "b", "c", "alpha", "beta", "gamma"),
66
+ help="Box size")
67
+ sym_group = parser.add_argument_group("symmetry")
68
+ symmetry.add_symmetry_args(sym_group, require_pg=True)
69
+ parser.add_argument('--start', type=int)
70
+ parser.add_argument('--end', type=int)
71
+ parser.add_argument('--howtoname', choices=["dup", "short", "number"], default="short",
72
+ help="How to decide new chain IDs in expanded model (default: short); "
73
+ "dup: use original chain IDs (with different segment IDs), "
74
+ "short: use unique new IDs, "
75
+ "number: add number to original chain ID")
76
+ parser.add_argument('-o', '--output_prfix')
77
+
78
+ # expand
79
+ parser = subparsers.add_parser("expand", description="Expand symmetry")
80
+ parser.add_argument('--model', required=True)
81
+ parser.add_argument('--chains', nargs="*", action="append", help="Select chains to keep")
82
+ group = parser.add_mutually_exclusive_group()
83
+ group.add_argument('--howtoname', choices=["dup", "short", "number"], default="short",
84
+ help="How to decide new chain IDs in expanded model (default: short); "
85
+ "dup: use original chain IDs (with different segment IDs), "
86
+ "short: use unique new IDs, "
87
+ "number: add number to original chain ID")
88
+ group.add_argument("--split", action="store_true", help="split file for each operator")
89
+ parser.add_argument('-o', '--output_prfix')
90
+ parser.add_argument('--pdb', action="store_true", help="Write a pdb file")
91
+ parser.add_argument('--cif', action="store_true", help="Write a cif file")
92
+
93
+ # h_add
94
+ parser = subparsers.add_parser("h_add", description = 'Add hydrogen in riding position')
95
+ parser.add_argument('model')
96
+ parser.add_argument('--ligand', nargs="*", action="append")
97
+ parser.add_argument("--monlib",
98
+ help="Monomer library path. Default: $CLIBD_MON")
99
+ parser.add_argument('-o','--output')
100
+ parser.add_argument("--pos", choices=["elec", "nucl"], default="elec")
101
+
102
+ # map_peaks
103
+ parser = subparsers.add_parser("map_peaks", description = 'List density peaks and write a coot script')
104
+ parser.add_argument('--model', required=True, help="Model")
105
+ group = parser.add_mutually_exclusive_group(required=True)
106
+ group.add_argument('--map', help="Map file")
107
+ group.add_argument('--mtz', help="MTZ for map file")
108
+ parser.add_argument('--mtz_labels', default="DELFWT,PHDELWT", help='F,PHI labels (default: %(default)s)')
109
+ parser.add_argument('--oversample_pixel', type=float, help='Desired pixel spacing in map (Angstrom)')
110
+ group = parser.add_mutually_exclusive_group(required=True)
111
+ group.add_argument('--sigma_level', type=float, help="Threshold map level in sigma unit")
112
+ group.add_argument('--abs_level', type=float, help="Threshold map level in absolute unit")
113
+ parser.add_argument('--blob_pos', choices=["peak", "centroid"], default="centroid",
114
+ help="default: %(default)s")
115
+ parser.add_argument('--min_volume', type=float, default=0.3, help="minimum blob volume (default: %(default).1f)")
116
+ parser.add_argument('--max_volume', type=float, help="maximum blob volume (default: none)")
117
+ parser.add_argument('-o','--output_prefix', default="peaks")
118
+
119
+ # h_density
120
+ parser = subparsers.add_parser("h_density", description = 'Hydrogen density analysis')
121
+ parser.add_argument('--model', required=True, help="Model with hydrogen atoms")
122
+ group = parser.add_mutually_exclusive_group(required=True)
123
+ group.add_argument('--map', help="Fo-Fc map file")
124
+ group.add_argument('--mtz', help="MTZ for Fo-Fc map file")
125
+ parser.add_argument('--mtz_labels', default="DELFWT,PHDELWT", help='F,PHI labels (default: %(default)s)')
126
+ parser.add_argument('--oversample_pixel', type=float, help='Desired pixel spacing in map (Angstrom)')
127
+ #parser.add_argument("--source", choices=["electron", "xray", "neutron"], default="electron")
128
+ group = parser.add_mutually_exclusive_group(required=True)
129
+ group.add_argument('--sigma_level', type=float, help="Threshold map level in sigma unit")
130
+ group.add_argument('--abs_level', type=float, help="Threshold map level in absolute unit")
131
+ parser.add_argument('--max_dist', type=float, default=0.5, help="max distance between peak and hydrogen position in the model (default: %(default).1f)")
132
+ parser.add_argument('--blob_pos', choices=["peak", "centroid"], default="centroid",
133
+ help="default: %(default)s")
134
+ parser.add_argument('--min_volume', type=float, default=0.3, help="minimum blob volume (default: %(default).1f)")
135
+ parser.add_argument('--max_volume', type=float, default=3, help="maximum blob volume (default: %(default).1f)")
136
+ parser.add_argument('-o','--output_prefix')
137
+
138
+ # fix_link
139
+ parser = subparsers.add_parser("fix_link", description = 'Fix LINKR/_struct_conn records in the model')
140
+ parser.add_argument('model')
141
+ parser.add_argument('--ligand', nargs="*", action="append")
142
+ parser.add_argument("--monlib",
143
+ help="Monomer library path. Default: $CLIBD_MON")
144
+ parser.add_argument('--bond_margin', type=float, default=1.3, help='(default: %(default).1f)')
145
+ parser.add_argument('-o','--output', help="Default: input_fixlink.{pdb|mmcif}")
146
+
147
+ # merge_models
148
+ parser = subparsers.add_parser("merge_models", description = 'Merge multiple model files')
149
+ parser.add_argument('models', nargs="+")
150
+ parser.add_argument('-o','--output', required=True)
151
+
152
+ # merge_dicts
153
+ parser = subparsers.add_parser("merge_dicts", description = 'Merge restraint dictionary cif files')
154
+ parser.add_argument('cifs', nargs="+")
155
+ parser.add_argument('-o','--output', default="merged.cif", help="Output cif file (default: %(default)s)")
156
+
157
+ # geom
158
+ parser = subparsers.add_parser("geom", description = 'Calculate geometry and show outliers')
159
+ parser.add_argument('model')
160
+ parser.add_argument('--ligand', nargs="*", action="append")
161
+ parser.add_argument("--monlib",
162
+ help="Monomer library path. Default: $CLIBD_MON")
163
+ parser.add_argument('--keywords', nargs='+', action="append",
164
+ help="refmac keyword(s)")
165
+ parser.add_argument('--keyword_file', nargs='+', action="append",
166
+ help="refmac keyword file(s)")
167
+ parser.add_argument('--sigma', type=float, default=5,
168
+ help="sigma cutoff to print outliers (default: %(default).1f)")
169
+ parser.add_argument('--per_atom_score_as_b', action='store_true',
170
+ help="write model file with per-atom score as B values")
171
+ parser.add_argument("--check_skew", action='store_true', help="(experimental) check bond skew to test magnification")
172
+ parser.add_argument("--ignore_h", action='store_true', help="ignore hydrogen")
173
+ parser.add_argument("--selection", help="evaluate part of the model")
174
+ parser.add_argument('-o', '--output_prefix',
175
+ help="default: taken from input file")
176
+
177
+ # adp
178
+ parser = subparsers.add_parser("adp", description = 'ADP analysis')
179
+ parser.add_argument('model')
180
+ parser.add_argument('-o', '--output_prefix',
181
+ help="default: taken from input file")
182
+
183
+ # power
184
+ parser = subparsers.add_parser("power", description = 'Show power spectrum')
185
+ parser.add_argument("--map", nargs="*", action="append")
186
+ parser.add_argument("--halfmaps", nargs="*", action="append")
187
+ parser.add_argument('--mask', help='Mask file')
188
+ parser.add_argument('-d', '--resolution', type=float)
189
+ parser.add_argument('-o', '--output_prefix', default="power")
190
+
191
+ # fcalc
192
+ parser = subparsers.add_parser("fcalc", description = 'Structure factor from model')
193
+ parser.add_argument('--model', required=True)
194
+ parser.add_argument("--no_expand_ncs", action='store_true', help="Do not expand strict NCS in MTRIX or _struct_ncs_oper")
195
+ parser.add_argument("--method", choices=["fft", "direct"], default="fft")
196
+ parser.add_argument("--source", choices=["electron", "xray", "neutron"], default="electron")
197
+ parser.add_argument('--ligand', nargs="*", action="append")
198
+ parser.add_argument("--monlib",
199
+ help="Monomer library path. Default: $CLIBD_MON")
200
+ parser.add_argument('--cell', type=float, nargs=6, metavar=("a", "b", "c", "alpha", "beta", "gamma"),
201
+ help="Override unit cell")
202
+ parser.add_argument('--auto_box_with_padding', type=float, help="Determine box size from model with specified padding")
203
+ parser.add_argument('--cutoff', type=float, default=1e-5)
204
+ parser.add_argument('--rate', type=float, default=1.5)
205
+ parser.add_argument('--add_dummy_sigma', action='store_true', help="write dummy SIGF")
206
+ parser.add_argument('--as_intensity', action='store_true', help="if you want |F|^2")
207
+ parser.add_argument('--keep_charges', action='store_true',
208
+ help="Use scattering factor for charged atoms. Use it with care.")
209
+ parser.add_argument('-d', '--resolution', type=float, required=True)
210
+ parser.add_argument('-o', '--output_prefix')
211
+
212
+ # nemap
213
+ parser = subparsers.add_parser("nemap", description = 'Normalized expected map calculation from half maps')
214
+ parser.add_argument("--halfmaps", required=True, nargs=2)
215
+ parser.add_argument('--pixel_size', type=float, help='Override pixel size (A)')
216
+ parser.add_argument("--half1_only", action='store_true', help="Only use half 1 for map calculation (use half 2 only for noise estimation)")
217
+ parser.add_argument('-B', type=float, help="local B value")
218
+ parser.add_argument("--no_fsc_weights", action='store_true',
219
+ help="Just for debugging purpose: turn off FSC-based weighting")
220
+ parser.add_argument("--sharpening_b", type=float,
221
+ help="Use B value (negative value for sharpening) instead of standard deviation of the signal")
222
+ parser.add_argument("-d", '--resolution', type=float)
223
+ parser.add_argument('-m', '--mask', help="mask file")
224
+ parser.add_argument('-o', '--output_prefix', default='nemap')
225
+ parser.add_argument("--trim", action='store_true', help="Write trimmed maps")
226
+ parser.add_argument("--trim_mtz", action='store_true', help="Write trimmed mtz")
227
+ parser.add_argument("--local_fourier_weighting_with", type=float, default=0,
228
+ help="Experimental: give kernel size in A^-1 unit to use local Fourier weighting instead of resolution-dependent weights")
229
+
230
+ # blur
231
+ parser = subparsers.add_parser("blur", description = 'Blur data by specified B value')
232
+ parser.add_argument('--hklin', required=True, help="input MTZ file")
233
+ parser.add_argument('-B', type=float, required=True, help="B value for blurring (negative value for sharpening)")
234
+ parser.add_argument('-o', '--output_prefix')
235
+
236
+ # mask_from_model
237
+ parser = subparsers.add_parser("mask_from_model", description = 'Make a mask from model')
238
+ parser.add_argument("--map", required=True, help="For unit cell and pixel size reference")
239
+ parser.add_argument("--model", required=True)
240
+ parser.add_argument("--selection")
241
+ parser.add_argument('--radius', type=float, required=True,
242
+ help='Radius in angstrom')
243
+ parser.add_argument('--soft_edge', type=float, default=0,
244
+ help='Soft edge (default: %(default).1f)')
245
+ parser.add_argument('-o', '--output', default="mask_from_model.mrc")
246
+
247
+ # applymask (and normalize within mask)
248
+ parser = subparsers.add_parser("applymask", description = 'Apply mask and optionally normalize map within mask')
249
+ parser.add_argument("--map", required=True)
250
+ parser.add_argument('--mask', required=True, help='Mask file')
251
+ parser.add_argument("--normalize", action='store_true',
252
+ help="Normalize map values using mean and sd within the mask")
253
+ parser.add_argument("--trim", action='store_true', help="Write trimmed map")
254
+ parser.add_argument('--mask_cutoff', type=float, default=0.5,
255
+ help="cutoff value for normalization and trimming (default: %(default)s)")
256
+ parser.add_argument('-o', '--output_prefix')
257
+
258
+ # map2mtz
259
+ parser = subparsers.add_parser("map2mtz", description = 'FFT map and write an mtz')
260
+ parser.add_argument("--map", required=True)
261
+ parser.add_argument("-d", '--resolution', type=float)
262
+ parser.add_argument('-o', '--output')
263
+
264
+ # sm2mm
265
+ parser = subparsers.add_parser("sm2mm", description = 'Small molecule files (cif/hkl/res/ins) to macromolecules (pdb/mmcif/mtz)')
266
+ parser.add_argument('files', nargs='+', help='Cif/ins/res/hkl files')
267
+ parser.add_argument('-o', '--output_prefix')
268
+
269
+ # seq
270
+ parser = subparsers.add_parser("seq", description = 'Print/align model sequence')
271
+ parser.add_argument("--model", required=True)
272
+ parser.add_argument('--seq', nargs="*", action="append", help="Sequence file(s)")
273
+
274
+ # add_arguments()
275
+
276
+ def parse_args(arg_list):
277
+ parser = argparse.ArgumentParser()
278
+ add_arguments(parser)
279
+ return parser.parse_args(arg_list)
280
+ # parse_args()
281
+
282
+ def symmodel(args):
283
+ if args.chains: args.chains = sum(args.chains, [])
284
+ model_format = fileio.check_model_format(args.model)
285
+
286
+ howtoname = dict(dup=gemmi.HowToNameCopiedChain.Dup,
287
+ short=gemmi.HowToNameCopiedChain.Short,
288
+ number=gemmi.HowToNameCopiedChain.AddNumber)[args.howtoname]
289
+
290
+ if (args.twist, args.rise).count(None) == 1:
291
+ raise SystemExit("ERROR: give both helical parameters --twist and --rise")
292
+
293
+ is_helical = args.twist is not None
294
+ st, cif_ref = fileio.read_structure_from_pdb_and_mmcif(args.model)
295
+ st.spacegroup_hm = "P 1"
296
+ map_and_start = None
297
+ if args.map:
298
+ logger.writeln("Reading cell from map")
299
+ map_and_start = fileio.read_ccp4_map(args.map)
300
+ st.cell = map_and_start[0].unit_cell
301
+ elif args.cell:
302
+ st.cell = gemmi.UnitCell(*args.cell)
303
+ elif not st.cell.is_crystal():
304
+ raise SystemExit("Error: Unit cell parameters look wrong. Please use --map or --cell")
305
+
306
+ if args.chains:
307
+ logger.writeln("Keep {} chains only".format(" ".join(args.chains)))
308
+ chains = set(args.chains)
309
+ for m in st:
310
+ to_del = [c.name for c in m if c.name not in chains]
311
+ for c in to_del: m.remove_chain(c)
312
+ if st[0].count_atom_sites() == 0:
313
+ raise SystemExit("ERROR: no atoms left. Check --chains option.")
314
+
315
+ all_chains = [c.name for c in st[0] if c.name not in st[0]]
316
+
317
+ symmetry.update_ncs_from_args(args, st, map_and_start=map_and_start, filter_contacting=args.contacting_only)
318
+
319
+ if args.biomt:
320
+ st.assemblies.clear()
321
+ st.raw_remarks = []
322
+ a = model.prepare_assembly("1", all_chains, st.ncs, is_helical=is_helical)
323
+ st.assemblies.append(a)
324
+
325
+ if not args.output_prfix:
326
+ args.output_prfix = fileio.splitext(os.path.basename(args.model))[0] + "_asu"
327
+
328
+ if args.pdb or args.cif:
329
+ fileio.write_model(st, args.output_prfix, pdb=args.pdb, cif=args.cif, cif_ref=cif_ref)
330
+ else:
331
+ fileio.write_model(st, file_name=args.output_prfix+model_format, cif_ref=cif_ref)
332
+
333
+ # Sym expand
334
+ model.expand_ncs(st, howtoname=howtoname)
335
+ st.assemblies.clear()
336
+ args.output_prfix += "_expanded"
337
+ if args.pdb or args.cif:
338
+ fileio.write_model(st, args.output_prfix, pdb=args.pdb, cif=args.cif)
339
+ else:
340
+ fileio.write_model(st, file_name=args.output_prfix+model_format)
341
+ # symmodel()
342
+
343
+ def helical_biomt(args):
344
+ if (args.twist, args.rise).count(None) > 0:
345
+ raise SystemExit("ERROR: give helical parameters --twist and --rise")
346
+
347
+ model_format = fileio.check_model_format(args.model)
348
+ howtoname = dict(dup=gemmi.HowToNameCopiedChain.Dup,
349
+ short=gemmi.HowToNameCopiedChain.Short,
350
+ number=gemmi.HowToNameCopiedChain.AddNumber)[args.howtoname]
351
+
352
+ st, cif_ref = fileio.read_structure_from_pdb_and_mmcif(args.model)
353
+ st.spacegroup_hm = "P 1"
354
+ map_and_start = None
355
+ if args.map:
356
+ logger.writeln("Reading cell from map")
357
+ map_and_start = fileio.read_ccp4_map(args.map)
358
+ st.cell = map_and_start[0].unit_cell
359
+ elif args.cell:
360
+ st.cell = gemmi.UnitCell(*args.cell)
361
+ elif not st.cell.is_crystal():
362
+ raise SystemExit("Error: Unit cell parameters look wrong. Please use --map or --cell")
363
+
364
+ all_chains = [c.name for c in st[0] if c.name not in st[0]]
365
+
366
+ ncsops = symmetry.ncsops_from_args(args, st.cell, map_and_start=map_and_start, st=st,
367
+ helical_min_n=args.start, helical_max_n=args.end)
368
+ #ncsops = [x for x in ncsops if not x.tr.is_identity()] # remove identity
369
+
370
+ logger.writeln("")
371
+ logger.writeln("-------------------------------------------------------------")
372
+ logger.writeln("You may need to write following matrices in OneDep interface:")
373
+ for idx, op in enumerate(ncsops):
374
+ logger.writeln("")
375
+ logger.writeln("operator {}".format(idx+1))
376
+ mat = op.tr.mat.tolist()
377
+ vec = op.tr.vec.tolist()
378
+ for i in range(3):
379
+ mstr = ["{:10.6f}".format(mat[i][j]) for j in range(3)]
380
+ logger.writeln("{} {:14.5f}".format(" ".join(mstr), vec[i]))
381
+ logger.writeln("-------------------------------------------------------------")
382
+ logger.writeln("")
383
+
384
+ # BIOMT
385
+ st.assemblies.clear()
386
+ st.raw_remarks = []
387
+ a = model.prepare_assembly("1", all_chains, ncsops, is_helical=True)
388
+ st.assemblies.append(a)
389
+
390
+ if not args.output_prfix:
391
+ args.output_prfix = fileio.splitext(os.path.basename(args.model))[0] + "_biomt"
392
+
393
+ fileio.write_model(st, args.output_prfix, pdb=(model_format == ".pdb"), cif=True, cif_ref=cif_ref)
394
+ logger.writeln("")
395
+ logger.writeln("These {}.* files may be used for deposition (once OneDep implemented reading BIOMT from file..)".format(args.output_prfix))
396
+ logger.writeln("")
397
+ # BIOMT expand
398
+ st.transform_to_assembly("1", howtoname)
399
+ args.output_prfix += "_expanded"
400
+ fileio.write_model(st, file_name=args.output_prfix+model_format)
401
+ logger.writeln(" note that this expanded model file is just for visual inspection, *not* for deposition!")
402
+ # helical_biomt()
403
+
404
+ def symexpand(args):
405
+ if args.chains: args.chains = sum(args.chains, [])
406
+ model_format = fileio.check_model_format(args.model)
407
+ if not args.split:
408
+ howtoname = dict(dup=gemmi.HowToNameCopiedChain.Dup,
409
+ short=gemmi.HowToNameCopiedChain.Short,
410
+ number=gemmi.HowToNameCopiedChain.AddNumber)[args.howtoname]
411
+
412
+ st = fileio.read_structure(args.model)
413
+
414
+ if args.chains:
415
+ logger.writeln("Keep {} chains only".format(" ".join(args.chains)))
416
+ chains = set(args.chains)
417
+ for m in st:
418
+ to_del = [c.name for c in m if c.name not in chains]
419
+ for c in to_del: m.remove_chain(c)
420
+
421
+ all_chains = [c.name for c in st[0] if c.name not in st[0]]
422
+
423
+ if not args.output_prfix:
424
+ args.output_prfix = fileio.splitext(os.path.basename(args.model))[0]
425
+
426
+ if len(st.ncs) > 0:
427
+ symmetry.show_ncs_operators_axis_angle(st.ncs)
428
+ non_given = [op for op in st.ncs if not op.given]
429
+ if len(non_given) > 0:
430
+ if args.split:
431
+ for i, op in enumerate(st.ncs):
432
+ if op.given: continue
433
+ st_tmp = st.clone()
434
+ for m in st_tmp: m.transform_pos_and_adp(op.tr)
435
+ output_prfix = args.output_prfix + "_ncs_{:02d}".format(i+1)
436
+ if args.pdb or args.cif:
437
+ fileio.write_model(st_tmp, output_prfix, pdb=args.pdb, cif=args.cif)
438
+ else:
439
+ fileio.write_model(st_tmp, file_name=output_prfix+model_format)
440
+ else:
441
+ st_tmp = st.clone()
442
+ model.expand_ncs(st_tmp, howtoname=howtoname)
443
+ output_prfix = args.output_prfix + "_ncs_expanded"
444
+ if args.pdb or args.cif:
445
+ fileio.write_model(st_tmp, output_prfix, pdb=args.pdb, cif=args.cif)
446
+ else:
447
+ fileio.write_model(st_tmp, file_name=output_prfix+model_format)
448
+ else:
449
+ logger.writeln("All operators are already expanded (marked as given). Exiting.")
450
+ else:
451
+ logger.writeln("No NCS operators found. Exiting.")
452
+
453
+ if len(st.assemblies) > 0: # should we support BIOMT?
454
+ pass
455
+ # symexpand()
456
+
457
+ def h_add(args):
458
+ st = fileio.read_structure(args.model)
459
+ model_format = fileio.check_model_format(args.model)
460
+
461
+ if not args.output:
462
+ tmp = fileio.splitext(os.path.basename(args.model))[0]
463
+ args.output = tmp + "_h" + model_format
464
+ logger.writeln("Output file: {}".format(args.output))
465
+
466
+ args.ligand = sum(args.ligand, []) if args.ligand else []
467
+ monlib = restraints.load_monomer_library(st,
468
+ monomer_dir=args.monlib,
469
+ cif_files=args.ligand)
470
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
471
+ try:
472
+ restraints.add_hydrogens(st, monlib, args.pos)
473
+ except RuntimeError as e:
474
+ raise SystemExit("Error: {}".format(e))
475
+
476
+ fileio.write_model(st, file_name=args.output)
477
+ # h_add()
478
+
479
+ def read_map_and_oversample(map_in=None, mtz_in=None, mtz_labs=None, oversample_pixel=None):
480
+ if mtz_in is not None:
481
+ mtz = fileio.read_mmhkl(mtz_in)
482
+ lab_f, lab_phi = mtz_labs.split(",")
483
+ asu = mtz.get_f_phi(lab_f, lab_phi)
484
+ if oversample_pixel is not None:
485
+ d_min = numpy.min(asu.make_d_array())
486
+ sample_rate = d_min / oversample_pixel
487
+ else:
488
+ sample_rate = 3
489
+ gr = asu.transform_f_phi_to_map(sample_rate=sample_rate)
490
+ elif map_in is not None:
491
+ gr = fileio.read_ccp4_map(map_in)[0]
492
+ if oversample_pixel is not None:
493
+ asu = gemmi.transform_map_to_f_phi(gr).prepare_asu_data()
494
+ d_min = numpy.min(asu.make_d_array())
495
+ sample_rate = d_min / oversample_pixel
496
+ gr = asu.transform_f_phi_to_map(sample_rate=sample_rate)
497
+ else:
498
+ raise SystemExit("Invalid input")
499
+
500
+ if oversample_pixel is not None:
501
+ logger.writeln("--oversample_pixel= {} is requested.".format(oversample_pixel))
502
+ logger.writeln(" recalculated grid:")
503
+ logger.writeln(" {:4d} {:4d} {:4d}".format(*gr.shape))
504
+ logger.writeln(" spacings:")
505
+ logger.writeln(" {:.6f} {:.6f} {:.6f}".format(*gr.spacing))
506
+ #maps.write_ccp4_map("{}_oversampled.mrc".format(output_prefix), gr)
507
+
508
+ return gr
509
+ # read_map_and_oversample()
510
+
511
+ def map_peaks(args):
512
+ st = fileio.read_structure(args.model)
513
+ gr = read_map_and_oversample(map_in=args.map, mtz_in=args.mtz, mtz_labs=args.mtz_labels,
514
+ oversample_pixel=args.oversample_pixel)
515
+ gr_sigma = numpy.std(gr)
516
+ if args.abs_level is not None:
517
+ cutoff = args.abs_level
518
+ else:
519
+ cutoff = args.sigma_level * gr_sigma # assuming mean(gr) = 0
520
+
521
+ blobs = gemmi.find_blobs_by_flood_fill(gr, cutoff,
522
+ min_volume=args.min_volume, min_score=0)
523
+ blobs.extend(gemmi.find_blobs_by_flood_fill(gr, cutoff, negate=True,
524
+ min_volume=args.min_volume, min_score=0))
525
+ getpos = dict(peak=lambda x: x.peak_pos,
526
+ centroid=lambda x: x.centroid)[args.blob_pos]
527
+ st_peaks = model.st_from_positions([getpos(b) for b in blobs])
528
+ st_peaks.cell = st.cell
529
+ st_peaks.ncs = st.ncs
530
+ st_peaks.setup_cell_images()
531
+ logger.writeln("{} peaks detected".format(len(blobs)))
532
+ #st_peaks.write_pdb("peaks.pdb")
533
+
534
+ # Filter symmetry related
535
+ ns = gemmi.NeighborSearch(st_peaks[0], st_peaks.cell, 5.).populate()
536
+ cs = gemmi.ContactSearch(1.)
537
+ cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
538
+ results = cs.find_contacts(ns)
539
+ del_idxes = set()
540
+ for r in results:
541
+ if r.partner1.residue.seqid.num not in del_idxes:
542
+ del_idxes.add(r.partner2.residue.seqid.num)
543
+ for i in reversed(sorted(del_idxes)):
544
+ del st_peaks[0][0][i]
545
+ del blobs[i]
546
+ #st_peaks.write_pdb("peaks_asu.pdb")
547
+ logger.writeln("{} peaks after removing symmetry equivalents".format(len(blobs)))
548
+
549
+ # Assign to nearest atom
550
+ ns = gemmi.NeighborSearch(st[0], st.cell, 10.).populate() # blob is rejected if > 10 A. ok?
551
+ peaks = []
552
+ for b in blobs:
553
+ bpos = getpos(b)
554
+ map_val = gr.interpolate_value(bpos)
555
+ if (args.max_volume is not None and b.volume > args.max_volume) or abs(map_val) < cutoff: continue
556
+ x = ns.find_nearest_atom(bpos)
557
+ if x is None: # this should not happen
558
+ logger.writeln("no nearest atom: value={:.2e} volume= {:.2f} pos= {}".format(map_val, b.volume, bpos))
559
+ continue
560
+ chain = st[0][x.chain_idx]
561
+ res = chain[x.residue_idx]
562
+ atom = res[x.atom_idx]
563
+ im = st.cell.find_nearest_image(atom.pos, bpos, gemmi.Asu.Any)
564
+ mpos = st.cell.find_nearest_pbc_position(atom.pos, bpos, im.sym_idx)
565
+ dist = atom.pos.dist(mpos)
566
+ peaks.append((map_val, b.volume, mpos, dist, chain, res, atom))
567
+
568
+ if len(peaks) == 0:
569
+ logger.writeln("No peaks found. Change parameter(s).")
570
+ return
571
+
572
+ # Print and write coot script
573
+ peaks.sort(reverse=True, key=lambda x:(abs(x[0]), x[1]))
574
+ for_coot = []
575
+ for_df = []
576
+ for i, p in enumerate(peaks):
577
+ map_val, volume, mpos, dist, chain, res, atom = p
578
+ mpos_str = "({: 7.2f},{: 7.2f},{: 7.2f})".format(mpos.x, mpos.y, mpos.z)
579
+ atom_name = atom.name + ("." + atom.altloc if atom.altloc != "\0" else "")
580
+ atom_str = "{}/{}/{}".format(chain.name, res.seqid, atom_name)
581
+ if args.abs_level is None:
582
+ map_val /= gr_sigma
583
+ lab_str = "Peak {:4d} value= {: .2e} volume= {:5.1f} pos= {} closest= {:10s} dist= {:.2f}".format(i+1, map_val, volume, mpos_str, atom_str, dist)
584
+ for_coot.append((lab_str, (mpos.x, mpos.y, mpos.z)))
585
+ for_df.append((map_val, volume, mpos.x, mpos.y, mpos.z, chain.name, str(res.seqid), atom_name, dist))
586
+ df = pandas.DataFrame(for_df, columns=["map_value" if args.abs_level is not None else "sigma_level",
587
+ "volume", "x", "y", "z", "chain", "residue", "atom", "dist"])
588
+ logger.writeln(df.to_string())
589
+ with open(args.output_prefix + ".json", "w") as ofs:
590
+ df.to_json(ofs, orient="records", indent=2)
591
+ logger.writeln("saved: {}".format(ofs.name))
592
+ coot_out = args.output_prefix + "_coot.py"
593
+ with open(coot_out, "w") as ofs:
594
+ ofs.write("""\
595
+ from __future__ import absolute_import, division, print_function
596
+ import gtk
597
+ class coot_serval_map_peak_list:
598
+ def __init__(self):
599
+ window = gtk.Window(gtk.WINDOW_TOPLEVEL)
600
+ window.set_title("Map peaks (Servalcat)")
601
+ window.set_default_size(600, 600)
602
+ scrolled_win = gtk.ScrolledWindow()
603
+ scrolled_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_ALWAYS)
604
+ vbox = gtk.VBox(False, 2)
605
+ frame_vbox = gtk.VBox(False, 0)
606
+ frame_vbox.set_border_width(3)
607
+ self.btns = []
608
+ self.data = {}
609
+ self.add_data(frame_vbox)
610
+ scrolled_win.add_with_viewport(frame_vbox)
611
+ vbox.pack_start(scrolled_win, True, True, 0)
612
+ window.add(vbox)
613
+ window.show_all()
614
+ self.toggled(self.btns[0], 0)
615
+
616
+ def toggled(self, btn, i):
617
+ if btn.get_active():
618
+ set_rotation_centre(*self.data[i][1])
619
+ add_status_bar_text(self.data[i][0])
620
+
621
+ def add_data(self, vbox):
622
+ for i, d in enumerate(self.data):
623
+ self.btns.append(gtk.RadioButton(None if i == 0 else self.btns[0], d[0]))
624
+ vbox.pack_start(self.btns[-1], False, False, 0)
625
+ self.btns[-1].connect('toggled', self.toggled, i)
626
+
627
+ gui = coot_serval_map_peak_list()
628
+ """.format(for_coot))
629
+ logger.writeln("\nRun:")
630
+ logger.writeln("coot --script {}".format(coot_out))
631
+ # map_peaks()
632
+
633
+ def h_density_analysis(args):
634
+ #if args.source != "electron":
635
+ # raise SystemExit("Only electron source is supported.")
636
+ model_format = fileio.check_model_format(args.model)
637
+ st = fileio.read_structure(args.model)
638
+ if not st[0].has_hydrogen():
639
+ raise SystemExit("No hydrogen in model.")
640
+
641
+ if args.output_prefix is None:
642
+ args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_hana"
643
+
644
+ gr = read_map_and_oversample(map_in=args.map, mtz_in=args.mtz, mtz_labs=args.mtz_labels,
645
+ oversample_pixel=args.oversample_pixel)
646
+
647
+ if args.abs_level is not None:
648
+ cutoff = args.abs_level
649
+ else:
650
+ cutoff = args.sigma_level * numpy.std(gr) # assuming mean(gr) = 0
651
+
652
+ blobs = gemmi.find_blobs_by_flood_fill(gr, cutoff,
653
+ min_volume=args.min_volume, min_score=0)
654
+ getpos = dict(peak=lambda x: x.peak_pos,
655
+ centroid=lambda x: x.centroid)[args.blob_pos]
656
+
657
+ peaks = [getpos(b).tolist() for b in blobs]
658
+ kdtree = scipy.spatial.cKDTree(peaks)
659
+ found = []
660
+ n_hydr = 0
661
+ h_assigned = [0 for _ in range(len(blobs))]
662
+ st2 = st.clone()
663
+ for ic, chain in enumerate(st[0]):
664
+ for ir, res in enumerate(chain):
665
+ for ia, atom in reversed(list(enumerate(res))):
666
+ if not atom.is_hydrogen(): continue
667
+ n_hydr += 1
668
+ dist, idx = kdtree.query(atom.pos.tolist(), k=1, p=2)
669
+ map_val = gr.interpolate_value(getpos(blobs[idx]))
670
+ if dist < args.max_dist and blobs[idx].volume < args.max_volume and map_val > cutoff:
671
+ found.append((getpos(blobs[idx]), map_val, dist, blobs[idx].volume,
672
+ chain.name, str(res.seqid), res.name,
673
+ atom.name, atom.altloc.replace("\0","")))
674
+ h_assigned[idx] = 1
675
+ else:
676
+ del st2[0][ic][ir][ia]
677
+
678
+ found.sort(key=lambda x: x[1], reverse=True)
679
+ logger.writeln("")
680
+ logger.writeln("Found hydrogen peaks:")
681
+ logger.writeln("dist map vol atom")
682
+ for _, map_val, dist, volume, chain, resi, resn, atom, alt in found:
683
+ logger.writeln("{:.2f} {:.2f} {:.2f} {}/{} {}/{}{}".format(dist, map_val, volume,
684
+ chain, resn, resi,
685
+ atom, "."+alt if alt else ""))
686
+
687
+ logger.writeln("")
688
+ logger.writeln("Result:")
689
+ logger.writeln(" number of hydrogen in the model : {}".format(n_hydr))
690
+ logger.writeln(" number of peaks close to hydrogen: {} ({:.1%})".format(len(found), len(found)/n_hydr))
691
+ logger.writeln("")
692
+
693
+ st_peaks = model.st_from_positions([getpos(b) for b in blobs],
694
+ bs=[gr.interpolate_value(getpos(b)) for b in blobs],
695
+ qs=h_assigned)
696
+ fileio.write_model(st_peaks, file_name="{}_peaks.mmcif".format(args.output_prefix))
697
+ logger.writeln(" this file includes peak positions")
698
+ logger.writeln(" occ=1: hydrogen assigned, occ=0: unassigned.")
699
+ logger.writeln(" B: density value at {}".format(args.blob_pos))
700
+ logger.writeln("")
701
+
702
+ fileio.write_model(st2, file_name="{}_h_with_peak{}".format(args.output_prefix, model_format))
703
+ logger.writeln(" this file is a copy of input model, where hydrogen atoms without peaks are removed.")
704
+ # h_density_analysis()
705
+
706
+ def fix_link(args):
707
+ st = fileio.read_structure(args.model)
708
+ model_format = fileio.check_model_format(args.model)
709
+
710
+ if not args.output:
711
+ tmp = fileio.splitext(os.path.basename(args.model))[0]
712
+ args.output = tmp + "_fixlink" + model_format
713
+ logger.writeln("Output file: {}".format(args.output))
714
+
715
+ args.ligand = sum(args.ligand, []) if args.ligand else []
716
+ monlib = restraints.load_monomer_library(st,
717
+ monomer_dir=args.monlib,
718
+ cif_files=args.ligand)
719
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
720
+ restraints.find_and_fix_links(st, monlib, bond_margin=args.bond_margin)
721
+ fileio.write_model(st, file_name=args.output)
722
+ # fix_link()
723
+
724
+ def merge_models(args):
725
+ logger.writeln("Reading file 1: {}".format(args.models[0]))
726
+ st = fileio.read_structure(args.models[0])
727
+ logger.writeln(" chains {}".format(" ".join([c.name for c in st[0]])))
728
+
729
+ for i, f in enumerate(args.models[1:]):
730
+ logger.writeln("Reading file {:3d}: {}".format(i+2, f))
731
+ st2 = fileio.read_structure(f)
732
+ for c in st2[0]:
733
+ org_id = c.name
734
+ c2 = st[0].add_chain(c, unique_name=True)
735
+ if c.name != c2.name:
736
+ logger.writeln(" chain {} merged (ID changed to {})".format(c.name, c2.name))
737
+ else:
738
+ logger.writeln(" chain {} merged".format(c.name))
739
+
740
+ fileio.write_model(st, file_name=args.output)
741
+ # merge_models()
742
+
743
+ def merge_dicts(args):
744
+ fileio.merge_ligand_cif(args.cifs, args.output)
745
+ # merge_dicts()
746
+
747
+ def geometry(args):
748
+ if args.ligand: args.ligand = sum(args.ligand, [])
749
+ if not args.output_prefix: args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_geom"
750
+ keywords = []
751
+ if args.keywords or args.keyword_file:
752
+ if args.keywords: keywords = sum(args.keywords, [])
753
+ if args.keyword_file: keywords.extend(l for f in sum(args.keyword_file, []) for l in open(f))
754
+ st = fileio.read_structure(args.model)
755
+ if args.ignore_h:
756
+ st.remove_hydrogens()
757
+ try:
758
+ monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
759
+ stop_for_unknowns=True)
760
+ except RuntimeError as e:
761
+ raise SystemExit("Error: {}".format(e))
762
+
763
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
764
+ restraints.find_and_fix_links(st, monlib)
765
+ try:
766
+ topo, metal_keywords = restraints.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.NoChange,
767
+ check_hydrogen=True)
768
+ except RuntimeError as e:
769
+ raise SystemExit("Error: {}".format(e))
770
+
771
+ if args.selection:
772
+ sel = gemmi.Selection(args.selection)
773
+ atom_pos = [-1 for _ in range(st[0].count_atom_sites())]
774
+ n = 0
775
+ for chain in sel.chains(st[0]):
776
+ for res in sel.residues(chain):
777
+ for atom in sel.atoms(res):
778
+ atom_pos[atom.serial-1] = n
779
+ n += 1
780
+ logger.writeln("Using selection '{}': {} atoms out of {}".format(args.selection, n, len(atom_pos)))
781
+ else:
782
+ atom_pos = None
783
+
784
+ geom = Geom(st, topo, monlib, refmac_keywords=metal_keywords + keywords, atom_pos=atom_pos)
785
+ for k in geom.outlier_sigmas: geom.outlier_sigmas[k] = args.sigma
786
+ geom.setup_nonbonded(True)
787
+ ret = geom.show_model_stats()
788
+
789
+ with open(args.output_prefix + "_summary.json", "w") as ofs:
790
+ ret["summary"].to_json(ofs, indent=2)
791
+ logger.writeln("saved: {}".format(ofs.name))
792
+ with open(args.output_prefix + "_outliers.json", "w") as ofs:
793
+ for k in ret["outliers"]:
794
+ ret["outliers"][k] = ret["outliers"][k].to_dict(orient="records")
795
+ json.dump(ret["outliers"], ofs, indent=2)
796
+ logger.writeln("saved: {}".format(ofs.name))
797
+
798
+ if args.check_skew:
799
+ logger.writeln("\nChecking skewness of bond length deviation")
800
+ # better to ignore hydrogen
801
+ tab = geom.geom.reporting.get_bond_outliers(use_nucleus=geom.use_nucleus, min_z=0)
802
+ for a in "atom1", "atom2":
803
+ tab[a] = [str(geom.lookup[x]) for x in tab[a]]
804
+ df = pandas.DataFrame(tab)
805
+ df["dev"] = df["value"] - df["ideal"]
806
+ df = df.reindex(df.dev.abs().sort_values(ascending=False).index)
807
+ logger.writeln("Bond length deviations:")
808
+ logger.writeln(df.to_string(max_rows=20))
809
+ q1, q2, q3 = numpy.percentile(df["dev"], [25, 50, 75])
810
+ sk2 = (q1 + q3 - 2 * q2) / (q3 - q1)
811
+ logger.writeln("bond_dev_median= {:.6f}".format(q2))
812
+ logger.writeln("bond_dev_skew= {:.4f}".format(df["dev"].skew()))
813
+ logger.writeln("bond_dev_sk2= {:.4f}".format(sk2))
814
+ with open(args.output_prefix + "_bond_dev.html", "w") as ofs:
815
+ ofs.write("""\
816
+ <html>
817
+ <head>
818
+ <meta charset="utf-8" />
819
+ <script src="https://cdn.plot.ly/plotly-2.20.0.min.js" charset="utf-8"></script>
820
+ </head>
821
+ <body>
822
+ <div id="hist"></div>
823
+ <script>
824
+ var trace = {
825
+ x: %s,
826
+ type: 'histogram'
827
+ };
828
+ var layout = {
829
+ title: "median: %.4f, sk2: %.4f",
830
+ xaxis: {title: "bond distance - ideal"},
831
+ yaxis: {title: "count"},
832
+ shapes: [{
833
+ type: 'line',
834
+ yref: 'paper',
835
+ x0: 0, y0: 0,
836
+ x1: 0, y1: 1}]
837
+ };
838
+ target = document.getElementById('hist');
839
+ Plotly.newPlot(target, [trace], layout);
840
+ </script>
841
+ </body>
842
+ </html>
843
+ """ % (str(list(df.dev)), q2, sk2))
844
+ logger.writeln("check histogram: {}".format(ofs.name))
845
+
846
+ # Note that this modifies st
847
+ if args.per_atom_score_as_b:
848
+ model_format = fileio.check_model_format(args.model)
849
+ peratom = geom.geom.reporting.per_atom_score(len(geom.atoms), geom.use_nucleus, "mean")
850
+ for i, score in enumerate(peratom["total"]):
851
+ geom.atoms[i].b_iso = score
852
+ fileio.write_model(st, file_name="{}_per_atom_score{}".format(args.output_prefix, model_format))
853
+ # geometry()
854
+
855
+ def adp_stats(args):
856
+ if not args.output_prefix: args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_adp"
857
+ st = fileio.read_structure(args.model)
858
+ model.adp_analysis(st)
859
+ b_all = [cra.atom.b_iso for cra in st[0].all() if cra.atom.occ > 0]
860
+
861
+ # bin width from Freedman–Diaconis rule
862
+ qs = numpy.quantile(b_all, [0, 0.25, 0.75, 1])
863
+ bin_h = 2 * (qs[2] - qs[1]) / len(b_all)**(1/3.)
864
+
865
+ # for plotly
866
+ traces = []
867
+ traces.append("x: [%s], type: 'histogram', name: 'All', xbins: {size: %f}"
868
+ % (",".join("%.2f"%x for x in b_all), bin_h))
869
+ if len(st[0]) > 1:
870
+ b_chain = {}
871
+ for c in st[0]:
872
+ b_chain.setdefault(c.name, []).extend(a.b_iso for r in c for a in r if a.occ > 0)
873
+ for c in b_chain:
874
+ bs = ",".join("%.2f" % x for x in b_chain[c])
875
+ traces.append("x: [%s], type: 'histogram', name: 'Chain %s'" % (bs, c))
876
+ with open(args.output_prefix + "_hist.html", "w") as ofs:
877
+ ofs.write("""\
878
+ <html>
879
+ <head>
880
+ <meta charset="utf-8" />
881
+ <script src="https://cdn.plot.ly/plotly-2.20.0.min.js" charset="utf-8"></script>
882
+ </head>
883
+ <body>
884
+ <div id="hist"></div>
885
+ <script>
886
+ """)
887
+ for i, t in enumerate(traces):
888
+ ofs.write("var trace%d = {%s};\n" % (i+1, t))
889
+ ofs.write("""\
890
+ var layout = {
891
+ title: "isotropic B histogram",
892
+ xaxis: {title: "B"},
893
+ yaxis: {title: "count"},
894
+ barmode: "stack"
895
+ };
896
+ target = document.getElementById('hist');
897
+ Plotly.newPlot(target, [%s], layout);
898
+ </script>
899
+ </body>
900
+ </html>
901
+ """ % (",".join("trace%d" % (i+1) for i in range(len(traces)))))
902
+ logger.writeln("check histogram: {}".format(ofs.name))
903
+ # adp_stats()
904
+
905
+ def show_power(args):
906
+ maps_in = []
907
+ if args.map:
908
+ print(args.map)
909
+ print(sum(args.map, []))
910
+ maps_in = [(f,) for f in sum(args.map, [])]
911
+
912
+ if args.halfmaps:
913
+ args.halfmaps = sum(args.halfmaps, [])
914
+ if len(args.halfmaps)%2 != 0:
915
+ raise RuntimeError("Number of half maps is not even.")
916
+ maps_in.extend([(args.halfmaps[2*i],args.halfmaps[2*i+1]) for i in range(len(args.halfmaps)//2)])
917
+
918
+ if args.mask:
919
+ mask = fileio.read_ccp4_map(args.mask)[0]
920
+ else:
921
+ mask = None
922
+
923
+ hkldata = None
924
+ labs = []
925
+ for mapin in maps_in: # TODO rewrite in faster way
926
+ ms = [fileio.read_ccp4_map(f) for f in mapin]
927
+ d_min = args.resolution
928
+ if d_min is None:
929
+ d_min = maps.nyquist_resolution(ms[0][0])
930
+ logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(d_min))
931
+ tmp = maps.mask_and_fft_maps(ms, d_min, mask)
932
+ labs.append("F{:02d}".format(len(labs)+1))
933
+ tmp.df.rename(columns=dict(FP=labs[-1]), inplace=True)
934
+ if hkldata is None:
935
+ hkldata = tmp
936
+ else:
937
+ if hkldata.cell.parameters != tmp.cell.parameters: raise RuntimeError("Different unit cell!")
938
+ hkldata.merge(tmp.df[["H","K","L",labs[-1]]])
939
+
940
+ if not labs:
941
+ raise SystemExit("No map files given. Exiting.")
942
+
943
+ hkldata.setup_relion_binning()
944
+
945
+ ofs = open(args.output_prefix+".log", "w")
946
+ ofs.write("Input:\n")
947
+ for i in range(len(maps_in)):
948
+ ofs.write("{} from {}\n".format(labs[i], " ".join(maps_in[i])))
949
+ ofs.write("\n")
950
+
951
+ ofs.write("""$TABLE: Power spectrum :
952
+ $GRAPHS
953
+ : log10(Mn(|F|^2)) :A:1,{}:
954
+ $$
955
+ 1/resol^2 n d_max d_min {}
956
+ $$
957
+ $$
958
+ """.format(",".join([str(i+5) for i in range(len(labs))]), " ".join(labs)))
959
+ print(hkldata.df)
960
+ abssqr = dict((lab, numpy.abs(hkldata.df[lab].to_numpy())**2) for lab in labs)
961
+ for i_bin, idxes in hkldata.binned():
962
+ bin_d_min = hkldata.binned_df.d_min[i_bin]
963
+ bin_d_max = hkldata.binned_df.d_max[i_bin]
964
+ ofs.write("{:.4f} {:7d} {:7.3f} {:7.3f}".format(1/bin_d_min**2, len(idxes), bin_d_max, bin_d_min,))
965
+ for lab in labs:
966
+ pwr = numpy.log10(numpy.average(abssqr[lab][idxes]))
967
+ ofs.write(" {:.4e}".format(pwr))
968
+ ofs.write("\n")
969
+ ofs.write("$$\n")
970
+ ofs.close()
971
+ # show_power()
972
+
973
+ def fcalc(args):
974
+ if (args.auto_box_with_padding, args.cell).count(None) == 0:
975
+ raise SystemExit("Error: you cannot specify both --auto_box_with_padding and --cell")
976
+
977
+ if args.ligand: args.ligand = sum(args.ligand, [])
978
+ if not args.output_prefix: args.output_prefix = "{}_fcalc_{}".format(fileio.splitext(os.path.basename(args.model))[0], args.source)
979
+
980
+ st = fileio.read_structure(args.model)
981
+ if not args.keep_charges:
982
+ model.remove_charge([st])
983
+ model.check_atomsf([st], args.source)
984
+ if not args.no_expand_ncs:
985
+ model.expand_ncs(st)
986
+
987
+ if args.cell is not None:
988
+ st.cell = gemmi.UnitCell(*args.cell)
989
+ elif args.auto_box_with_padding is not None:
990
+ st.cell = model.box_from_model(st[0], args.auto_box_with_padding)
991
+ st.spacegroup_hm = "P 1"
992
+ logger.writeln("Box size from the model with padding of {}: {}".format(args.auto_box_with_padding, st.cell.parameters))
993
+
994
+ if not st.cell.is_crystal():
995
+ raise SystemExit("ERROR: No unit cell information. Give --cell or --auto_box_with_padding.")
996
+
997
+ if args.source=="electron" and st[0].has_hydrogen():
998
+ monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
999
+ stop_for_unknowns=False)
1000
+ else:
1001
+ monlib = None
1002
+
1003
+ if args.method == "fft":
1004
+ fc_asu = model.calc_fc_fft(st, args.resolution, cutoff=args.cutoff, rate=args.rate,
1005
+ mott_bethe=args.source=="electron",
1006
+ monlib=monlib, source=args.source)
1007
+ else:
1008
+ fc_asu = model.calc_fc_direct(st, args.resolution, source=args.source,
1009
+ mott_bethe=args.source=="electron", monlib=monlib)
1010
+
1011
+ hkldata = hkl.hkldata_from_asu_data(fc_asu, "FC")
1012
+ if args.as_intensity:
1013
+ hkldata.df["IC"] = numpy.abs(hkldata.df.FC)**2
1014
+ labout = ["IC"]
1015
+ if args.add_dummy_sigma:
1016
+ hkldata.df["SIGIC"] = 1.
1017
+ labout.append("SIGIC")
1018
+ else:
1019
+ labout = ["FC"]
1020
+ if args.add_dummy_sigma:
1021
+ hkldata.df["SIGFC"] = 1.
1022
+ labout.append("SIGFC")
1023
+
1024
+ hkldata.write_mtz(args.output_prefix+".mtz", labout, types=dict(IC="J", SIGIC="Q", SIGFC="Q"))
1025
+ # fcalc()
1026
+
1027
+ def nemap(args):
1028
+ from servalcat.spa import fofc
1029
+
1030
+ if (args.trim or args.trim_mtz) and args.mask is None:
1031
+ raise SystemExit("\nError: You need to give --mask as you requested --trim or --trim_mtz.\n")
1032
+
1033
+ if args.mask:
1034
+ mask = fileio.read_ccp4_map(args.mask)[0]
1035
+ else:
1036
+ mask = None
1037
+
1038
+ halfmaps = fileio.read_halfmaps(args.halfmaps, pixel_size=args.pixel_size)
1039
+ if args.resolution is None:
1040
+ args.resolution = maps.nyquist_resolution(halfmaps[0][0])
1041
+ logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(args.resolution))
1042
+
1043
+ d_min = args.resolution
1044
+ if args.local_fourier_weighting_with > 0:
1045
+ d_min = 1 / (args.local_fourier_weighting_with + 1 / d_min)
1046
+ logger.writeln("adjusting d_min= {:.2f} for local correlation".format(d_min))
1047
+ hkldata = maps.mask_and_fft_maps(halfmaps, d_min, mask)
1048
+
1049
+ if args.local_fourier_weighting_with > 0:
1050
+ asu1 = hkldata.as_asu_data("F_map1")
1051
+ asu2 = hkldata.as_asu_data("F_map2")
1052
+ size = asu1.get_size_for_hkl(sample_rate=3)
1053
+ logger.writeln("using grid {}".format(size))
1054
+ gr1 = asu1.get_f_phi_on_grid(size)
1055
+ gr2 = asu2.get_f_phi_on_grid(size)
1056
+ kernel = ext.hard_sphere_kernel_recgrid(size, asu1.unit_cell, args.local_fourier_weighting_with)
1057
+ cc = maps.local_cc(gr1, gr2, kernel.array.real, method="simple")
1058
+ cc.array[cc.array < 0] = 0 # negative cc cannot be used anyway
1059
+ cc.array[:] = 2 * cc.array.real / (1 + cc.array.real) # to full map cc
1060
+ hkldata.df["cc"] = numpy.real(cc.get_value_by_hkl(hkldata.miller_array()))
1061
+ grf = type(gr1)((gr1.array + gr2.array) / 2, gr1.unit_cell, gr1.spacegroup)
1062
+ var_f = maps.local_var(grf, kernel.array.real, method="simple")
1063
+ hkldata.df["var_f"] = numpy.real(var_f.get_value_by_hkl(hkldata.miller_array()))
1064
+ if args.B is not None:
1065
+ k2_l = numpy.exp(-args.B / hkldata.d_spacings()**2 / 2)
1066
+ hkldata.df.cc = k2_l * hkldata.df.cc / (1 + (k2_l - 1) * hkldata.df.cc)
1067
+ hkldata.df["FWT"] = hkldata.df.FP * numpy.sqrt(hkldata.df.cc / hkldata.df.var_f)
1068
+ hkldata.df["kernel"] = numpy.real(kernel.get_value_by_hkl(hkldata.miller_array()))
1069
+ hkldata.write_mtz(args.output_prefix+"_cc.mtz", ["cc", "kernel"])
1070
+ hkldata = hkldata.copy(d_min=args.resolution)
1071
+ map_labs = ["FWT"]
1072
+ else:
1073
+ hkldata.setup_relion_binning()
1074
+ maps.calc_noise_var_from_halfmaps(hkldata)
1075
+ map_labs = fofc.calc_maps(hkldata, B=args.B, has_halfmaps=True, half1_only=args.half1_only,
1076
+ no_fsc_weights=args.no_fsc_weights, sharpening_b=args.sharpening_b)
1077
+ fofc.write_files(hkldata, map_labs, grid_start=halfmaps[0][1], stats_str=None,
1078
+ mask=mask, output_prefix=args.output_prefix,
1079
+ trim_map=args.trim, trim_mtz=args.trim_mtz)
1080
+ # nemap()
1081
+
1082
+ def blur(args):
1083
+ if args.output_prefix is None:
1084
+ args.output_prefix = fileio.splitext(os.path.basename(args.hklin))[0]
1085
+
1086
+ if fileio.is_mmhkl_file(args.hklin):
1087
+ mtz = fileio.read_mmhkl(args.hklin)
1088
+ hkl.blur_mtz(mtz, args.B)
1089
+ suffix = ("_blur" if args.B > 0 else "_sharpen") + "_{:.2f}.mtz".format(abs(args.B))
1090
+ mtz.write_to_file(args.output_prefix+suffix)
1091
+ logger.writeln("Written: {}".format(args.output_prefix+suffix))
1092
+ else:
1093
+ raise SystemExit("ERROR: Unsupported file type: {}".format(args.hklin))
1094
+ # blur()
1095
+
1096
+ def mask_from_model(args):
1097
+ st = fileio.read_structure(args.model) # TODO option to (or not to) expand NCS
1098
+ if args.selection:
1099
+ gemmi.Selection(args.selection).remove_not_selected(st)
1100
+ gr, grid_start, _ = fileio.read_ccp4_map(args.map)
1101
+ mask = maps.mask_from_model(st, args.radius, soft_edge=args.soft_edge, grid=gr)
1102
+ maps.write_ccp4_map(args.output, mask, grid_start=grid_start)
1103
+ # mask_from_model()
1104
+
1105
+ def applymask(args):
1106
+ if args.output_prefix is None:
1107
+ args.output_prefix = fileio.splitext(os.path.basename(args.map))[0] + "_masked"
1108
+
1109
+ grid, grid_start, _ = fileio.read_ccp4_map(args.map)
1110
+ mask = fileio.read_ccp4_map(args.mask)[0]
1111
+ logger.writeln("Applying mask")
1112
+ logger.writeln(" mask min: {:.3f} max: {:.3f}".format(numpy.min(mask), numpy.max(mask)))
1113
+ grid.array[:] *= mask.array
1114
+
1115
+ if args.normalize:
1116
+ masked = grid.array[mask.array>args.mask_cutoff]
1117
+ masked_mean = numpy.average(masked)
1118
+ masked_std = numpy.std(masked)
1119
+ logger.writeln("Normalizing map values within mask")
1120
+ logger.writeln(" masked volume: {} mean: {:.3e} sd: {:.3e}".format(len(masked), masked_mean, masked_std))
1121
+ grid.array[:] = (grid.array - masked_mean) / masked_std
1122
+
1123
+ maps.write_ccp4_map(args.output_prefix+".mrc", grid,
1124
+ grid_start=grid_start,
1125
+ mask_for_extent=mask.array if args.trim else None,
1126
+ mask_threshold=args.mask_cutoff)
1127
+ # applymask()
1128
+
1129
+ def map2mtz(args):
1130
+ if args.output is None:
1131
+ args.output = fileio.splitext(os.path.basename(args.map))[0] + "_fft.mtz"
1132
+ grid, grid_start, grid_shape = fileio.read_ccp4_map(args.map)
1133
+ if args.resolution is None:
1134
+ args.resolution = maps.nyquist_resolution(grid)
1135
+ logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(args.resolution))
1136
+
1137
+ if grid_start != (0,0,0) or grid.shape != tuple(grid_shape):
1138
+ # If only subregion of whole grid in map, unit cell needs to be re-defined.
1139
+ if grid.shape != tuple(grid_shape):
1140
+ new_abc = [grid.unit_cell.parameters[i] * grid_shape[i] / grid.shape[i] for i in range(3)]
1141
+ cell = gemmi.UnitCell(*new_abc, *grid.unit_cell.parameters[3:])
1142
+ logger.writeln("Changing unit cell to {}".format(cell.parameters))
1143
+ else:
1144
+ cell = grid.unit_cell
1145
+ grid = gemmi.FloatGrid(grid.get_subarray(grid_start, grid_shape),
1146
+ cell, grid.spacegroup)
1147
+
1148
+ f_grid = gemmi.transform_map_to_f_phi(grid)
1149
+ asudata = f_grid.prepare_asu_data(dmin=args.resolution, with_000=True)
1150
+ hkldata = hkl.hkldata_from_asu_data(asudata, "F")
1151
+ if grid_start != (0,0,0):
1152
+ shifts = grid.get_position(*grid_start)
1153
+ hkldata.translate("F", shifts)
1154
+ logger.writeln("Applying phase shift with translation {}".format(shifts.tolist()))
1155
+ hkldata.write_mtz(args.output, ["F"])
1156
+ # map2mtz()
1157
+
1158
+ def sm2mm(args):
1159
+ if args.output_prefix is None:
1160
+ args.output_prefix = fileio.splitext(args.files[0])[0]
1161
+ st, mtz = fileio.read_small_molecule_files(args.files)
1162
+ if st is not None:
1163
+ fileio.write_model(st, prefix=args.output_prefix, pdb=True, cif=True)
1164
+ if mtz is not None:
1165
+ mtz_out = args.output_prefix + ".mtz"
1166
+ logger.writeln("Writing MTZ file: {}".format(mtz_out))
1167
+ mtz.write_to_file(mtz_out)
1168
+ # sm2mm()
1169
+
1170
+ def seq(args):
1171
+ wrap_width = 100
1172
+ seqs = []
1173
+ if args.seq:
1174
+ args.seq = sum(args.seq, [])
1175
+ for sf in args.seq:
1176
+ seqs.extend(fileio.read_sequence_file(sf))
1177
+
1178
+ st = fileio.read_structure(args.model) # TODO option to (or not to) expand NCS
1179
+ model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
1180
+ for chain in st[0]:
1181
+ p = chain.get_polymer()
1182
+ if not p: continue
1183
+ p_type = p.check_polymer_type()
1184
+ if p_type in (gemmi.PolymerType.SaccharideD, gemmi.PolymerType.SaccharideL): continue
1185
+ p_seq = gemmi.one_letter_code(p.extract_sequence())
1186
+ results = []
1187
+ for name, seq in seqs:
1188
+ # what if DnaRnaHybrid?
1189
+ kind = {gemmi.PolymerType.Dna: gemmi.ResidueKind.DNA,
1190
+ gemmi.PolymerType.Rna: gemmi.ResidueKind.RNA}.get(p_type, gemmi.ResidueKind.AA)
1191
+ s = [gemmi.expand_one_letter(x, kind) for x in seq]
1192
+ if None in s: continue
1193
+ results.append([name, gemmi.align_sequence_to_polymer(s, p, p_type), seq])
1194
+
1195
+ if results:
1196
+ logger.writeln("Chain: {}".format(chain.name))
1197
+ logger.writeln(" polymer type: {}".format(str(p_type).replace("PolymerType.", "")))
1198
+ name, al, s1 = max(results, key=lambda x: x[1].score)
1199
+ logger.writeln(" match: {}".format(name))
1200
+ logger.writeln(" score: {}".format(al.score))
1201
+ p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
1202
+ unkseq = [x.start() for x in re.finditer("\-", p1)]
1203
+ mismatches = [x.start() for x in re.finditer("\.", al.match_string)]
1204
+ if mismatches or unkseq:
1205
+ idxes = {x.start(): i for i, x in enumerate(re.finditer("[^-]", p2))}
1206
+ seqnums = [str(x.seqid) for x in p]
1207
+ if mismatches:
1208
+ logger.write(" mismatches: ")
1209
+ logger.writeln(", ".join("{}({}>{})".format(seqnums[idxes[i]], p1[i], p2[i]) for i in mismatches))
1210
+ if unkseq:
1211
+ logger.write(" unknown sequence: ")
1212
+ logger.writeln(", ".join("{}({})".format(seqnums[idxes[i]], p2[i]) for i in unkseq))
1213
+
1214
+ logger.writeln("")
1215
+ for i in range(0, len(p1), wrap_width):
1216
+ logger.writeln(" seq. {}".format(p1[i:i+wrap_width]))
1217
+ logger.writeln(" {}".format(al.match_string[i:i+wrap_width]))
1218
+ logger.writeln(" model {}\n".format(p2[i:i+wrap_width]))
1219
+ else:
1220
+ logger.writeln("> Chain: {}".format(chain.name))
1221
+ logger.writeln(gemmi.one_letter_code(p.extract_sequence()))
1222
+ logger.writeln("")
1223
+ # seq()
1224
+
1225
+ def show(args):
1226
+ for filename in args.files:
1227
+ ext = fileio.splitext(filename)[1]
1228
+ if ext in (".mrc", ".ccp4", ".map"):
1229
+ fileio.read_ccp4_map(filename)
1230
+ logger.writeln("\n")
1231
+ # show()
1232
+
1233
+ def json2csv(args):
1234
+ if not args.output_prefix:
1235
+ args.output_prefix = fileio.splitext(os.path.basename(args.json))[0]
1236
+
1237
+ df = pandas.read_json(args.json)
1238
+ df.to_csv(args.output_prefix+".csv", index=False)
1239
+ logger.writeln("Output: {}".format(args.output_prefix+".csv"))
1240
+ # json2csv()
1241
+
1242
+ def main(args):
1243
+ comms = dict(show=show,
1244
+ json2csv=json2csv,
1245
+ symmodel=symmodel,
1246
+ helical_biomt=helical_biomt,
1247
+ expand=symexpand,
1248
+ h_add=h_add,
1249
+ map_peaks=map_peaks,
1250
+ h_density=h_density_analysis,
1251
+ fix_link=fix_link,
1252
+ merge_models=merge_models,
1253
+ merge_dicts=merge_dicts,
1254
+ geom=geometry,
1255
+ adp=adp_stats,
1256
+ power=show_power,
1257
+ fcalc=fcalc,
1258
+ nemap=nemap,
1259
+ blur=blur,
1260
+ mask_from_model=mask_from_model,
1261
+ applymask=applymask,
1262
+ map2mtz=map2mtz,
1263
+ sm2mm=sm2mm,
1264
+ seq=seq)
1265
+
1266
+ com = args.subcommand
1267
+ f = comms.get(com)
1268
+ if f:
1269
+ return f(args)
1270
+ else:
1271
+ raise SystemExit("Unknown subcommand: {}".format(com))
1272
+ # main()
1273
+
1274
+ if __name__ == "__main__":
1275
+ import sys
1276
+ args = parse_args(sys.argv[1:])
1277
+ main(args)