servalcat 0.4.131__cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- servalcat/__init__.py +10 -0
- servalcat/__main__.py +120 -0
- servalcat/ext.cpython-314t-x86_64-linux-gnu.so +0 -0
- servalcat/refine/__init__.py +0 -0
- servalcat/refine/cgsolve.py +100 -0
- servalcat/refine/refine.py +1162 -0
- servalcat/refine/refine_geom.py +245 -0
- servalcat/refine/refine_spa.py +400 -0
- servalcat/refine/refine_xtal.py +339 -0
- servalcat/refine/spa.py +151 -0
- servalcat/refine/xtal.py +312 -0
- servalcat/refmac/__init__.py +0 -0
- servalcat/refmac/exte.py +191 -0
- servalcat/refmac/refmac_keywords.py +660 -0
- servalcat/refmac/refmac_wrapper.py +423 -0
- servalcat/spa/__init__.py +0 -0
- servalcat/spa/fofc.py +488 -0
- servalcat/spa/fsc.py +391 -0
- servalcat/spa/localcc.py +197 -0
- servalcat/spa/realspcc_from_var.py +128 -0
- servalcat/spa/run_refmac.py +979 -0
- servalcat/spa/shift_maps.py +293 -0
- servalcat/spa/shiftback.py +137 -0
- servalcat/spa/translate.py +129 -0
- servalcat/utils/__init__.py +35 -0
- servalcat/utils/commands.py +1629 -0
- servalcat/utils/fileio.py +836 -0
- servalcat/utils/generate_operators.py +296 -0
- servalcat/utils/hkl.py +811 -0
- servalcat/utils/logger.py +140 -0
- servalcat/utils/maps.py +345 -0
- servalcat/utils/model.py +933 -0
- servalcat/utils/refmac.py +759 -0
- servalcat/utils/restraints.py +888 -0
- servalcat/utils/symmetry.py +298 -0
- servalcat/xtal/__init__.py +0 -0
- servalcat/xtal/french_wilson.py +262 -0
- servalcat/xtal/run_refmac_small.py +240 -0
- servalcat/xtal/sigmaa.py +1954 -0
- servalcat/xtal/twin.py +316 -0
- servalcat-0.4.131.dist-info/METADATA +60 -0
- servalcat-0.4.131.dist-info/RECORD +45 -0
- servalcat-0.4.131.dist-info/WHEEL +6 -0
- servalcat-0.4.131.dist-info/entry_points.txt +4 -0
- servalcat-0.4.131.dist-info/licenses/LICENSE +373 -0
|
@@ -0,0 +1,1629 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Author: "Keitaro Yamashita, Garib N. Murshudov"
|
|
3
|
+
MRC Laboratory of Molecular Biology
|
|
4
|
+
|
|
5
|
+
This software is released under the
|
|
6
|
+
Mozilla Public License, version 2.0; see LICENSE.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import absolute_import, division, print_function, generators
|
|
9
|
+
from servalcat.utils import logger
|
|
10
|
+
from servalcat.utils import fileio
|
|
11
|
+
from servalcat.utils import symmetry
|
|
12
|
+
from servalcat.utils import model
|
|
13
|
+
from servalcat.utils import hkl
|
|
14
|
+
from servalcat.utils import restraints
|
|
15
|
+
from servalcat.utils import maps
|
|
16
|
+
from servalcat.refmac import refmac_keywords
|
|
17
|
+
from servalcat.refine.refine import Geom, RefineParams
|
|
18
|
+
from servalcat import ext
|
|
19
|
+
import os
|
|
20
|
+
import gemmi
|
|
21
|
+
import numpy
|
|
22
|
+
import scipy.spatial
|
|
23
|
+
import pandas
|
|
24
|
+
import json
|
|
25
|
+
import re
|
|
26
|
+
import argparse
|
|
27
|
+
|
|
28
|
+
def add_arguments(p):
|
|
29
|
+
subparsers = p.add_subparsers(dest="subcommand")
|
|
30
|
+
|
|
31
|
+
# show
|
|
32
|
+
parser = subparsers.add_parser("show", description = 'Show file info supported by the program')
|
|
33
|
+
parser.add_argument('files', nargs='+')
|
|
34
|
+
|
|
35
|
+
# json2csv
|
|
36
|
+
parser = subparsers.add_parser("json2csv", description = 'Convert json to csv for plotting')
|
|
37
|
+
parser.add_argument('json')
|
|
38
|
+
parser.add_argument('-o', '--output_prefix')
|
|
39
|
+
|
|
40
|
+
# symmodel
|
|
41
|
+
parser = subparsers.add_parser("symmodel", description="Add symmetry annotation to model")
|
|
42
|
+
parser.add_argument('--model', required=True)
|
|
43
|
+
group = parser.add_mutually_exclusive_group()
|
|
44
|
+
group.add_argument('--map', help="Take box size from the map")
|
|
45
|
+
group.add_argument('--cell', type=float, nargs=6, metavar=("a", "b", "c", "alpha", "beta", "gamma"),
|
|
46
|
+
help="Box size")
|
|
47
|
+
sym_group = parser.add_argument_group("symmetry")
|
|
48
|
+
symmetry.add_symmetry_args(sym_group, require_pg=True)
|
|
49
|
+
parser.add_argument('--contacting_only', action="store_true", help="Filter out non-contacting NCS")
|
|
50
|
+
parser.add_argument('--chains', nargs="*", action="append", help="Select chains to keep")
|
|
51
|
+
parser.add_argument('--howtoname', choices=["dup", "short", "number"], default="short",
|
|
52
|
+
help="How to decide new chain IDs in expanded model (default: short); "
|
|
53
|
+
"dup: use original chain IDs (with different segment IDs), "
|
|
54
|
+
"short: use unique new IDs, "
|
|
55
|
+
"number: add number to original chain ID")
|
|
56
|
+
parser.add_argument('--biomt', action="store_true", help="Add BIOMT also")
|
|
57
|
+
parser.add_argument('-o', '--output_prfix')
|
|
58
|
+
parser.add_argument('--pdb', action="store_true", help="Write a pdb file")
|
|
59
|
+
parser.add_argument('--cif', action="store_true", help="Write a cif file")
|
|
60
|
+
|
|
61
|
+
# helical_biomt
|
|
62
|
+
parser = subparsers.add_parser("helical_biomt", description="generate BIOMT of helical reconstruction for PDB deposition")
|
|
63
|
+
parser.add_argument('--model', required=True)
|
|
64
|
+
group = parser.add_mutually_exclusive_group()
|
|
65
|
+
group.add_argument('--map', help="Take box size from the map")
|
|
66
|
+
group.add_argument('--cell', type=float, nargs=6, metavar=("a", "b", "c", "alpha", "beta", "gamma"),
|
|
67
|
+
help="Box size")
|
|
68
|
+
sym_group = parser.add_argument_group("symmetry")
|
|
69
|
+
symmetry.add_symmetry_args(sym_group, require_pg=True)
|
|
70
|
+
parser.add_argument('--start', type=int)
|
|
71
|
+
parser.add_argument('--end', type=int)
|
|
72
|
+
parser.add_argument('--howtoname', choices=["dup", "short", "number"], default="short",
|
|
73
|
+
help="How to decide new chain IDs in expanded model (default: short); "
|
|
74
|
+
"dup: use original chain IDs (with different segment IDs), "
|
|
75
|
+
"short: use unique new IDs, "
|
|
76
|
+
"number: add number to original chain ID")
|
|
77
|
+
parser.add_argument('-o', '--output_prfix')
|
|
78
|
+
|
|
79
|
+
# expand
|
|
80
|
+
parser = subparsers.add_parser("expand", description="Expand symmetry")
|
|
81
|
+
parser.add_argument('--model', required=True)
|
|
82
|
+
parser.add_argument('--chains', nargs="*", action="append", help="Select chains to keep")
|
|
83
|
+
group = parser.add_mutually_exclusive_group()
|
|
84
|
+
group.add_argument('--howtoname', choices=["dup", "short", "number"], default="short",
|
|
85
|
+
help="How to decide new chain IDs in expanded model (default: short); "
|
|
86
|
+
"dup: use original chain IDs (with different segment IDs), "
|
|
87
|
+
"short: use unique new IDs, "
|
|
88
|
+
"number: add number to original chain ID")
|
|
89
|
+
group.add_argument("--split", action="store_true", help="split file for each operator")
|
|
90
|
+
parser.add_argument('-o', '--output_prfix')
|
|
91
|
+
parser.add_argument('--pdb', action="store_true", help="Write a pdb file")
|
|
92
|
+
parser.add_argument('--cif', action="store_true", help="Write a cif file")
|
|
93
|
+
|
|
94
|
+
# h_add
|
|
95
|
+
parser = subparsers.add_parser("h_add", description = 'Add hydrogen in riding position')
|
|
96
|
+
parser.add_argument('model')
|
|
97
|
+
parser.add_argument('--ligand', nargs="*", action="append")
|
|
98
|
+
parser.add_argument("--monlib",
|
|
99
|
+
help="Monomer library path. Default: $CLIBD_MON")
|
|
100
|
+
parser.add_argument('-o','--output')
|
|
101
|
+
parser.add_argument("--pos", choices=["elec", "nucl"], default="elec")
|
|
102
|
+
|
|
103
|
+
# add_op3
|
|
104
|
+
parser = subparsers.add_parser("add_op3", description = "Add OP3 atoms to 5' ends")
|
|
105
|
+
parser.add_argument('model')
|
|
106
|
+
parser.add_argument('--chains', nargs="*", action="append", help="For selected chains only")
|
|
107
|
+
parser.add_argument('--ligand', nargs="*", action="append")
|
|
108
|
+
parser.add_argument("--monlib",
|
|
109
|
+
help="Monomer library path. Default: $CLIBD_MON")
|
|
110
|
+
parser.add_argument('-o','--output')
|
|
111
|
+
|
|
112
|
+
# map_peaks
|
|
113
|
+
parser = subparsers.add_parser("map_peaks", description = 'List density peaks and write a coot script')
|
|
114
|
+
parser.add_argument('--model', required=True, help="Model")
|
|
115
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
116
|
+
group.add_argument('--map', help="Map file")
|
|
117
|
+
group.add_argument('--mtz', help="MTZ for map file")
|
|
118
|
+
parser.add_argument('--mtz_labels', default="DELFWT,PHDELWT", help='F,PHI labels (default: %(default)s)')
|
|
119
|
+
parser.add_argument('--oversample_pixel', type=float, help='Desired pixel spacing in map (Angstrom)')
|
|
120
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
121
|
+
group.add_argument('--sigma_level', type=float, help="Threshold map level in sigma unit")
|
|
122
|
+
group.add_argument('--abs_level', type=float, help="Threshold map level in absolute unit")
|
|
123
|
+
parser.add_argument('--blob_pos', choices=["peak", "centroid"], default="centroid",
|
|
124
|
+
help="default: %(default)s")
|
|
125
|
+
parser.add_argument('--min_volume', type=float, default=0.3, help="minimum blob volume (default: %(default).1f)")
|
|
126
|
+
parser.add_argument('--max_volume', type=float, help="maximum blob volume (default: none)")
|
|
127
|
+
parser.add_argument('-o','--output_prefix', default="peaks")
|
|
128
|
+
|
|
129
|
+
# h_density
|
|
130
|
+
parser = subparsers.add_parser("h_density", description = 'Hydrogen density analysis')
|
|
131
|
+
parser.add_argument('--model', required=True, help="Model with hydrogen atoms")
|
|
132
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
133
|
+
group.add_argument('--map', help="Fo-Fc map file")
|
|
134
|
+
group.add_argument('--mtz', help="MTZ for Fo-Fc map file")
|
|
135
|
+
parser.add_argument('--mtz_labels', default="DELFWT,PHDELWT", help='F,PHI labels (default: %(default)s)')
|
|
136
|
+
parser.add_argument('--oversample_pixel', type=float, help='Desired pixel spacing in map (Angstrom)')
|
|
137
|
+
#parser.add_argument("--source", choices=["electron", "xray", "neutron"], default="electron")
|
|
138
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
139
|
+
group.add_argument('--sigma_level', type=float, help="Threshold map level in sigma unit")
|
|
140
|
+
group.add_argument('--abs_level', type=float, help="Threshold map level in absolute unit")
|
|
141
|
+
parser.add_argument('--max_dist', type=float, default=0.5, help="max distance between peak and hydrogen position in the model (default: %(default).1f)")
|
|
142
|
+
parser.add_argument('--blob_pos', choices=["peak", "centroid"], default="centroid",
|
|
143
|
+
help="default: %(default)s")
|
|
144
|
+
parser.add_argument('--min_volume', type=float, default=0.3, help="minimum blob volume (default: %(default).1f)")
|
|
145
|
+
parser.add_argument('--max_volume', type=float, default=3, help="maximum blob volume (default: %(default).1f)")
|
|
146
|
+
parser.add_argument('-o','--output_prefix')
|
|
147
|
+
|
|
148
|
+
# fix_link
|
|
149
|
+
parser = subparsers.add_parser("fix_link", description = 'Fix LINKR/_struct_conn records in the model')
|
|
150
|
+
parser.add_argument('model')
|
|
151
|
+
parser.add_argument('--ligand', nargs="*", action="append")
|
|
152
|
+
parser.add_argument("--monlib",
|
|
153
|
+
help="Monomer library path. Default: $CLIBD_MON")
|
|
154
|
+
parser.add_argument('--bond_margin', type=float, default=1.3, help='(default: %(default).1f)')
|
|
155
|
+
parser.add_argument('--metal_margin', type=float, default=1.1, help='(default: %(default).1f)')
|
|
156
|
+
parser.add_argument('-o','--output', help="Default: input_fixlink.{pdb|mmcif}")
|
|
157
|
+
|
|
158
|
+
# merge_models
|
|
159
|
+
parser = subparsers.add_parser("merge_models", description = 'Merge multiple model files')
|
|
160
|
+
parser.add_argument('models', nargs="+")
|
|
161
|
+
parser.add_argument('-o','--output', required=True)
|
|
162
|
+
|
|
163
|
+
# merge_dicts
|
|
164
|
+
parser = subparsers.add_parser("merge_dicts", description = 'Merge restraint dictionary cif files')
|
|
165
|
+
parser.add_argument('cifs', nargs="+")
|
|
166
|
+
parser.add_argument('-o','--output', default="merged.cif", help="Output cif file (default: %(default)s)")
|
|
167
|
+
|
|
168
|
+
# geom
|
|
169
|
+
parser = subparsers.add_parser("geom", description = 'Calculate geometry and show outliers')
|
|
170
|
+
parser.add_argument('model')
|
|
171
|
+
parser.add_argument('--ligand', nargs="*", action="append")
|
|
172
|
+
parser.add_argument("--monlib",
|
|
173
|
+
help="Monomer library path. Default: $CLIBD_MON")
|
|
174
|
+
parser.add_argument('--keywords', nargs='+', action="append",
|
|
175
|
+
help="refmac keyword(s)")
|
|
176
|
+
parser.add_argument('--keyword_file', nargs='+', action="append",
|
|
177
|
+
help="refmac keyword file(s)")
|
|
178
|
+
parser.add_argument('--sigma', type=float, default=5,
|
|
179
|
+
help="sigma cutoff to print outliers (default: %(default).1f)")
|
|
180
|
+
parser.add_argument('--per_atom_score_as_b', action='store_true',
|
|
181
|
+
help="write model file with per-atom score as B values")
|
|
182
|
+
parser.add_argument("--check_skew", action='store_true', help="(experimental) check bond skew to test magnification")
|
|
183
|
+
parser.add_argument('-n', '--nucleus', action="store_true", help="Use nucleus distances (for neutron)")
|
|
184
|
+
parser.add_argument("--ignore_h", action='store_true', help="ignore hydrogen")
|
|
185
|
+
parser.add_argument("--selection", help="evaluate part of the model")
|
|
186
|
+
parser.add_argument('--dump_all', action="store_true", help=argparse.SUPPRESS)
|
|
187
|
+
parser.add_argument('-o', '--output_prefix',
|
|
188
|
+
help="default: taken from input file")
|
|
189
|
+
|
|
190
|
+
# conf
|
|
191
|
+
parser = subparsers.add_parser("conf", description = 'Compare conformations')
|
|
192
|
+
parser.add_argument('models', nargs="+")
|
|
193
|
+
parser.add_argument("--min_diff", type=float, default=60.)
|
|
194
|
+
parser.add_argument('--ligand', nargs="*", action="append")
|
|
195
|
+
parser.add_argument("--monlib",
|
|
196
|
+
help="Monomer library path. Default: $CLIBD_MON")
|
|
197
|
+
parser.add_argument("--same_chain", action='store_true', help="Only between same chains (more than one file)")
|
|
198
|
+
parser.add_argument('-o', '--output_prefix', default="conf",
|
|
199
|
+
help="")
|
|
200
|
+
|
|
201
|
+
# adp
|
|
202
|
+
parser = subparsers.add_parser("adp", description = 'ADP analysis')
|
|
203
|
+
parser.add_argument('model')
|
|
204
|
+
parser.add_argument('-o', '--output_prefix',
|
|
205
|
+
help="default: taken from input file")
|
|
206
|
+
|
|
207
|
+
# power
|
|
208
|
+
parser = subparsers.add_parser("power", description = 'Show power spectrum')
|
|
209
|
+
parser.add_argument("--map", nargs="*", action="append")
|
|
210
|
+
parser.add_argument("--halfmaps", nargs="*", action="append")
|
|
211
|
+
parser.add_argument('--mask', help='Mask file')
|
|
212
|
+
parser.add_argument('-d', '--resolution', type=float)
|
|
213
|
+
parser.add_argument('-o', '--output_prefix', default="power")
|
|
214
|
+
|
|
215
|
+
# fcalc
|
|
216
|
+
parser = subparsers.add_parser("fcalc", description = 'Structure factor from model')
|
|
217
|
+
parser.add_argument('--model', required=True)
|
|
218
|
+
parser.add_argument("--no_expand_ncs", action='store_true', help="Do not expand strict NCS in MTRIX or _struct_ncs_oper")
|
|
219
|
+
parser.add_argument("--method", choices=["fft", "direct"], default="fft")
|
|
220
|
+
parser.add_argument("--source", choices=["electron", "xray", "neutron", "custom"], default="electron")
|
|
221
|
+
parser.add_argument('--ligand', nargs="*", action="append")
|
|
222
|
+
parser.add_argument("--monlib",
|
|
223
|
+
help="Monomer library path. Default: $CLIBD_MON")
|
|
224
|
+
parser.add_argument('--cell', type=float, nargs=6, metavar=("a", "b", "c", "alpha", "beta", "gamma"),
|
|
225
|
+
help="Override unit cell")
|
|
226
|
+
parser.add_argument('--auto_box_with_padding', type=float, help="Determine box size from model with specified padding")
|
|
227
|
+
parser.add_argument('--cutoff', type=float, default=1e-5)
|
|
228
|
+
parser.add_argument('--rate', type=float, default=1.5)
|
|
229
|
+
parser.add_argument('--add_dummy_sigma', action='store_true', help="write dummy SIGF")
|
|
230
|
+
parser.add_argument('--as_intensity', action='store_true', help="if you want |F|^2")
|
|
231
|
+
parser.add_argument('--keep_charges', action='store_true',
|
|
232
|
+
help="Use scattering factor for charged atoms. Use it with care.")
|
|
233
|
+
parser.add_argument('-d', '--resolution', type=float, required=True)
|
|
234
|
+
parser.add_argument('-o', '--output_prefix')
|
|
235
|
+
|
|
236
|
+
# nemap
|
|
237
|
+
parser = subparsers.add_parser("nemap", description = 'Normalized expected map calculation from half maps')
|
|
238
|
+
parser.add_argument("--halfmaps", required=True, nargs=2)
|
|
239
|
+
parser.add_argument('--pixel_size', type=float, help='Override pixel size (A)')
|
|
240
|
+
parser.add_argument("--half1_only", action='store_true', help="Only use half 1 for map calculation (use half 2 only for noise estimation)")
|
|
241
|
+
parser.add_argument('-B', type=float, help="local B value")
|
|
242
|
+
parser.add_argument("--no_fsc_weights", action='store_true',
|
|
243
|
+
help="Just for debugging purpose: turn off FSC-based weighting")
|
|
244
|
+
parser.add_argument("--sharpening_b", type=float,
|
|
245
|
+
help="Use B value (negative value for sharpening) instead of standard deviation of the signal")
|
|
246
|
+
parser.add_argument("-d", '--resolution', type=float)
|
|
247
|
+
parser.add_argument('-m', '--mask', help="mask file")
|
|
248
|
+
parser.add_argument('-o', '--output_prefix', default='nemap')
|
|
249
|
+
parser.add_argument("--trim", action='store_true', help="Write trimmed maps")
|
|
250
|
+
parser.add_argument("--trim_mtz", action='store_true', help="Write trimmed mtz")
|
|
251
|
+
parser.add_argument("--local_fourier_weighting_with", type=float, default=0,
|
|
252
|
+
help="Experimental: give kernel size in A^-1 unit to use local Fourier weighting instead of resolution-dependent weights")
|
|
253
|
+
|
|
254
|
+
# blur
|
|
255
|
+
parser = subparsers.add_parser("blur", description = 'Blur data by specified B value')
|
|
256
|
+
parser.add_argument('--hklin', required=True, help="input MTZ file")
|
|
257
|
+
parser.add_argument('-B', type=float, required=True, help="B value for blurring (negative value for sharpening)")
|
|
258
|
+
parser.add_argument('-o', '--output_prefix')
|
|
259
|
+
|
|
260
|
+
# mask_from_model
|
|
261
|
+
parser = subparsers.add_parser("mask_from_model", description = 'Make a mask from model')
|
|
262
|
+
parser.add_argument("--map", required=True, help="For unit cell and pixel size reference")
|
|
263
|
+
parser.add_argument("--model", required=True)
|
|
264
|
+
parser.add_argument("--selection")
|
|
265
|
+
parser.add_argument('--radius', type=float, required=True,
|
|
266
|
+
help='Radius in angstrom')
|
|
267
|
+
parser.add_argument('--soft_edge', type=float, default=0,
|
|
268
|
+
help='Soft edge (default: %(default).1f)')
|
|
269
|
+
parser.add_argument('-o', '--output', default="mask_from_model.mrc")
|
|
270
|
+
|
|
271
|
+
# applymask (and normalize within mask)
|
|
272
|
+
parser = subparsers.add_parser("applymask", description = 'Apply mask and optionally normalize map within mask')
|
|
273
|
+
parser.add_argument("--map", required=True)
|
|
274
|
+
parser.add_argument('--mask', required=True, help='Mask file')
|
|
275
|
+
parser.add_argument("--normalize", action='store_true',
|
|
276
|
+
help="Normalize map values using mean and sd within the mask")
|
|
277
|
+
parser.add_argument("--trim", action='store_true', help="Write trimmed map")
|
|
278
|
+
parser.add_argument('--mask_cutoff', type=float, default=0.5,
|
|
279
|
+
help="cutoff value for normalization and trimming (default: %(default)s)")
|
|
280
|
+
parser.add_argument('-o', '--output_prefix')
|
|
281
|
+
|
|
282
|
+
# map2mtz
|
|
283
|
+
parser = subparsers.add_parser("map2mtz", description = 'FFT map and write an mtz')
|
|
284
|
+
parser.add_argument("--map", required=True)
|
|
285
|
+
parser.add_argument("-d", '--resolution', type=float)
|
|
286
|
+
parser.add_argument('-o', '--output')
|
|
287
|
+
|
|
288
|
+
# sm2mm
|
|
289
|
+
parser = subparsers.add_parser("sm2mm", description = 'Small molecule files (cif/hkl/res/ins) to macromolecules (pdb/mmcif/mtz)')
|
|
290
|
+
parser.add_argument('files', nargs='+', help='Cif/ins/res/hkl files')
|
|
291
|
+
parser.add_argument('-o', '--output_prefix')
|
|
292
|
+
|
|
293
|
+
# mm2ins
|
|
294
|
+
parser = subparsers.add_parser("mm2ins", description = 'convert pdb/mmcif to ins for shelxl/olex2')
|
|
295
|
+
parser.add_argument('model')
|
|
296
|
+
parser.add_argument('--hklin')
|
|
297
|
+
parser.add_argument('-o', '--output')
|
|
298
|
+
|
|
299
|
+
# seq
|
|
300
|
+
parser = subparsers.add_parser("seq", description = 'Print/align model sequence')
|
|
301
|
+
parser.add_argument("--model", required=True)
|
|
302
|
+
parser.add_argument('--seq', nargs="*", action="append", help="Sequence file(s)")
|
|
303
|
+
parser.add_argument('--scoring', nargs=6, type=int, default=(1, 0, -1, -1, 0, -1),
|
|
304
|
+
metavar=("match", "mismatch", "gapo", "gape", "good_gapo", "bad_gapo"),
|
|
305
|
+
help="scoring function. default: %(default)s")
|
|
306
|
+
|
|
307
|
+
# dnarna
|
|
308
|
+
parser = subparsers.add_parser("dnarna", description = 'DNA to RNA or RNA to DNA model conversion')
|
|
309
|
+
parser.add_argument("model")
|
|
310
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
311
|
+
group.add_argument('--to_dna', action='store_true', help="To DNA")
|
|
312
|
+
group.add_argument('--to_rna', action='store_true', help="To RNA")
|
|
313
|
+
parser.add_argument('--chains', nargs="*", action="append", help="Select chains to convert")
|
|
314
|
+
parser.add_argument('-o', '--output')
|
|
315
|
+
|
|
316
|
+
# add_arguments()
|
|
317
|
+
|
|
318
|
+
def parse_args(arg_list):
|
|
319
|
+
parser = argparse.ArgumentParser()
|
|
320
|
+
add_arguments(parser)
|
|
321
|
+
return parser.parse_args(arg_list)
|
|
322
|
+
# parse_args()
|
|
323
|
+
|
|
324
|
+
def symmodel(args):
|
|
325
|
+
if args.chains: args.chains = sum(args.chains, [])
|
|
326
|
+
model_format = fileio.check_model_format(args.model)
|
|
327
|
+
|
|
328
|
+
howtoname = dict(dup=gemmi.HowToNameCopiedChain.Dup,
|
|
329
|
+
short=gemmi.HowToNameCopiedChain.Short,
|
|
330
|
+
number=gemmi.HowToNameCopiedChain.AddNumber)[args.howtoname]
|
|
331
|
+
|
|
332
|
+
if (args.twist, args.rise).count(None) == 1:
|
|
333
|
+
raise SystemExit("ERROR: give both helical parameters --twist and --rise")
|
|
334
|
+
|
|
335
|
+
is_helical = args.twist is not None
|
|
336
|
+
st, cif_ref = fileio.read_structure_from_pdb_and_mmcif(args.model)
|
|
337
|
+
st.spacegroup_hm = "P 1"
|
|
338
|
+
map_and_start = None
|
|
339
|
+
if args.map:
|
|
340
|
+
logger.writeln("Reading cell from map")
|
|
341
|
+
map_and_start = fileio.read_ccp4_map(args.map, header_only=True)
|
|
342
|
+
st.cell = map_and_start[0].unit_cell
|
|
343
|
+
elif args.cell:
|
|
344
|
+
st.cell = gemmi.UnitCell(*args.cell)
|
|
345
|
+
elif not st.cell.is_crystal():
|
|
346
|
+
raise SystemExit("Error: Unit cell parameters look wrong. Please use --map or --cell")
|
|
347
|
+
|
|
348
|
+
if args.chains:
|
|
349
|
+
logger.writeln("Keep {} chains only".format(" ".join(args.chains)))
|
|
350
|
+
chains = set(args.chains)
|
|
351
|
+
for m in st:
|
|
352
|
+
to_del = [c.name for c in m if c.name not in chains]
|
|
353
|
+
for c in to_del: m.remove_chain(c)
|
|
354
|
+
if st[0].count_atom_sites() == 0:
|
|
355
|
+
raise SystemExit("ERROR: no atoms left. Check --chains option.")
|
|
356
|
+
|
|
357
|
+
all_chains = [c.name for c in st[0] if c.name not in st[0]]
|
|
358
|
+
|
|
359
|
+
symmetry.update_ncs_from_args(args, st, map_and_start=map_and_start, filter_contacting=args.contacting_only)
|
|
360
|
+
|
|
361
|
+
if args.biomt:
|
|
362
|
+
st.assemblies.clear()
|
|
363
|
+
st.raw_remarks = []
|
|
364
|
+
a = model.prepare_assembly("1", all_chains, st.ncs, is_helical=is_helical)
|
|
365
|
+
st.assemblies.append(a)
|
|
366
|
+
|
|
367
|
+
if not args.output_prfix:
|
|
368
|
+
args.output_prfix = fileio.splitext(os.path.basename(args.model))[0] + "_asu"
|
|
369
|
+
|
|
370
|
+
if args.pdb or args.cif:
|
|
371
|
+
fileio.write_model(st, args.output_prfix, pdb=args.pdb, cif=args.cif, cif_ref=cif_ref)
|
|
372
|
+
else:
|
|
373
|
+
fileio.write_model(st, file_name=args.output_prfix+model_format, cif_ref=cif_ref)
|
|
374
|
+
|
|
375
|
+
# Sym expand
|
|
376
|
+
model.expand_ncs(st, howtoname=howtoname)
|
|
377
|
+
st.assemblies.clear()
|
|
378
|
+
args.output_prfix += "_expanded"
|
|
379
|
+
if args.pdb or args.cif:
|
|
380
|
+
fileio.write_model(st, args.output_prfix, pdb=args.pdb, cif=args.cif)
|
|
381
|
+
else:
|
|
382
|
+
fileio.write_model(st, file_name=args.output_prfix+model_format)
|
|
383
|
+
# symmodel()
|
|
384
|
+
|
|
385
|
+
def helical_biomt(args):
|
|
386
|
+
if (args.twist, args.rise).count(None) > 0:
|
|
387
|
+
raise SystemExit("ERROR: give helical parameters --twist and --rise")
|
|
388
|
+
|
|
389
|
+
model_format = fileio.check_model_format(args.model)
|
|
390
|
+
howtoname = dict(dup=gemmi.HowToNameCopiedChain.Dup,
|
|
391
|
+
short=gemmi.HowToNameCopiedChain.Short,
|
|
392
|
+
number=gemmi.HowToNameCopiedChain.AddNumber)[args.howtoname]
|
|
393
|
+
|
|
394
|
+
st, cif_ref = fileio.read_structure_from_pdb_and_mmcif(args.model)
|
|
395
|
+
st.spacegroup_hm = "P 1"
|
|
396
|
+
map_and_start = None
|
|
397
|
+
if args.map:
|
|
398
|
+
logger.writeln("Reading cell from map")
|
|
399
|
+
map_and_start = fileio.read_ccp4_map(args.map, header_only=True)
|
|
400
|
+
st.cell = map_and_start[0].unit_cell
|
|
401
|
+
elif args.cell:
|
|
402
|
+
st.cell = gemmi.UnitCell(*args.cell)
|
|
403
|
+
elif not st.cell.is_crystal():
|
|
404
|
+
raise SystemExit("Error: Unit cell parameters look wrong. Please use --map or --cell")
|
|
405
|
+
|
|
406
|
+
all_chains = [c.name for c in st[0] if c.name not in st[0]]
|
|
407
|
+
|
|
408
|
+
ncsops = symmetry.ncsops_from_args(args, st.cell, map_and_start=map_and_start, st=st,
|
|
409
|
+
helical_min_n=args.start, helical_max_n=args.end)
|
|
410
|
+
#ncsops = [x for x in ncsops if not x.tr.is_identity()] # remove identity
|
|
411
|
+
|
|
412
|
+
logger.writeln("")
|
|
413
|
+
logger.writeln("-------------------------------------------------------------")
|
|
414
|
+
logger.writeln("You may need to write following matrices in OneDep interface:")
|
|
415
|
+
for idx, op in enumerate(ncsops):
|
|
416
|
+
logger.writeln("")
|
|
417
|
+
logger.writeln("operator {}".format(idx+1))
|
|
418
|
+
mat = op.tr.mat.tolist()
|
|
419
|
+
vec = op.tr.vec.tolist()
|
|
420
|
+
for i in range(3):
|
|
421
|
+
mstr = ["{:10.6f}".format(mat[i][j]) for j in range(3)]
|
|
422
|
+
logger.writeln("{} {:14.5f}".format(" ".join(mstr), vec[i]))
|
|
423
|
+
logger.writeln("-------------------------------------------------------------")
|
|
424
|
+
logger.writeln("")
|
|
425
|
+
|
|
426
|
+
# BIOMT
|
|
427
|
+
st.assemblies.clear()
|
|
428
|
+
st.raw_remarks = []
|
|
429
|
+
a = model.prepare_assembly("1", all_chains, ncsops, is_helical=True)
|
|
430
|
+
st.assemblies.append(a)
|
|
431
|
+
|
|
432
|
+
if not args.output_prfix:
|
|
433
|
+
args.output_prfix = fileio.splitext(os.path.basename(args.model))[0] + "_biomt"
|
|
434
|
+
|
|
435
|
+
fileio.write_model(st, args.output_prfix, pdb=(model_format == ".pdb"), cif=True, cif_ref=cif_ref)
|
|
436
|
+
logger.writeln("")
|
|
437
|
+
logger.writeln("These {}.* files may be used for deposition (once OneDep implemented reading BIOMT from file..)".format(args.output_prfix))
|
|
438
|
+
logger.writeln("")
|
|
439
|
+
# BIOMT expand
|
|
440
|
+
st.transform_to_assembly("1", howtoname)
|
|
441
|
+
args.output_prfix += "_expanded"
|
|
442
|
+
fileio.write_model(st, file_name=args.output_prfix+model_format)
|
|
443
|
+
logger.writeln(" note that this expanded model file is just for visual inspection, *not* for deposition!")
|
|
444
|
+
# helical_biomt()
|
|
445
|
+
|
|
446
|
+
def symexpand(args):
|
|
447
|
+
if args.chains: args.chains = sum(args.chains, [])
|
|
448
|
+
model_format = fileio.check_model_format(args.model)
|
|
449
|
+
if not args.split:
|
|
450
|
+
howtoname = dict(dup=gemmi.HowToNameCopiedChain.Dup,
|
|
451
|
+
short=gemmi.HowToNameCopiedChain.Short,
|
|
452
|
+
number=gemmi.HowToNameCopiedChain.AddNumber)[args.howtoname]
|
|
453
|
+
|
|
454
|
+
st = fileio.read_structure(args.model)
|
|
455
|
+
|
|
456
|
+
if args.chains:
|
|
457
|
+
logger.writeln("Keep {} chains only".format(" ".join(args.chains)))
|
|
458
|
+
chains = set(args.chains)
|
|
459
|
+
for m in st:
|
|
460
|
+
to_del = [c.name for c in m if c.name not in chains]
|
|
461
|
+
for c in to_del: m.remove_chain(c)
|
|
462
|
+
|
|
463
|
+
all_chains = [c.name for c in st[0] if c.name not in st[0]]
|
|
464
|
+
|
|
465
|
+
if not args.output_prfix:
|
|
466
|
+
args.output_prfix = fileio.splitext(os.path.basename(args.model))[0]
|
|
467
|
+
|
|
468
|
+
if len(st.ncs) > 0:
|
|
469
|
+
symmetry.show_ncs_operators_axis_angle(st.ncs)
|
|
470
|
+
non_given = [op for op in st.ncs if not op.given]
|
|
471
|
+
if len(non_given) > 0:
|
|
472
|
+
if args.split:
|
|
473
|
+
for i, op in enumerate(st.ncs):
|
|
474
|
+
if op.given: continue
|
|
475
|
+
st_tmp = st.clone()
|
|
476
|
+
for m in st_tmp: m.transform_pos_and_adp(op.tr)
|
|
477
|
+
output_prfix = args.output_prfix + "_ncs_{:02d}".format(i+1)
|
|
478
|
+
if args.pdb or args.cif:
|
|
479
|
+
fileio.write_model(st_tmp, output_prfix, pdb=args.pdb, cif=args.cif)
|
|
480
|
+
else:
|
|
481
|
+
fileio.write_model(st_tmp, file_name=output_prfix+model_format)
|
|
482
|
+
else:
|
|
483
|
+
st_tmp = st.clone()
|
|
484
|
+
model.expand_ncs(st_tmp, howtoname=howtoname)
|
|
485
|
+
output_prfix = args.output_prfix + "_ncs_expanded"
|
|
486
|
+
if args.pdb or args.cif:
|
|
487
|
+
fileio.write_model(st_tmp, output_prfix, pdb=args.pdb, cif=args.cif)
|
|
488
|
+
else:
|
|
489
|
+
fileio.write_model(st_tmp, file_name=output_prfix+model_format)
|
|
490
|
+
else:
|
|
491
|
+
logger.writeln("All operators are already expanded (marked as given). Exiting.")
|
|
492
|
+
else:
|
|
493
|
+
logger.writeln("No NCS operators found. Exiting.")
|
|
494
|
+
|
|
495
|
+
if len(st.assemblies) > 0: # should we support BIOMT?
|
|
496
|
+
pass
|
|
497
|
+
# symexpand()
|
|
498
|
+
|
|
499
|
+
def h_add(args):
|
|
500
|
+
st = fileio.read_structure(args.model)
|
|
501
|
+
model_format = fileio.check_model_format(args.model)
|
|
502
|
+
|
|
503
|
+
if not args.output:
|
|
504
|
+
tmp = fileio.splitext(os.path.basename(args.model))[0]
|
|
505
|
+
args.output = tmp + "_h" + model_format
|
|
506
|
+
logger.writeln("Output file: {}".format(args.output))
|
|
507
|
+
|
|
508
|
+
args.ligand = sum(args.ligand, []) if args.ligand else []
|
|
509
|
+
monlib = restraints.load_monomer_library(st,
|
|
510
|
+
monomer_dir=args.monlib,
|
|
511
|
+
cif_files=args.ligand)
|
|
512
|
+
model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
|
|
513
|
+
restraints.find_and_fix_links(st, monlib, find_metal_links=False, add_found=False)
|
|
514
|
+
try:
|
|
515
|
+
restraints.add_hydrogens(st, monlib, args.pos)
|
|
516
|
+
except RuntimeError as e:
|
|
517
|
+
raise SystemExit("Error: {}".format(e))
|
|
518
|
+
|
|
519
|
+
fileio.write_model(st, file_name=args.output)
|
|
520
|
+
# h_add()
|
|
521
|
+
|
|
522
|
+
def add_op3(args):
|
|
523
|
+
if args.chains: args.chains = sum(args.chains, [])
|
|
524
|
+
st = fileio.read_structure(args.model)
|
|
525
|
+
model_format = fileio.check_model_format(args.model)
|
|
526
|
+
|
|
527
|
+
if not args.output:
|
|
528
|
+
tmp = fileio.splitext(os.path.basename(args.model))[0]
|
|
529
|
+
args.output = tmp + "_op3" + model_format
|
|
530
|
+
logger.writeln("Output file: {}".format(args.output))
|
|
531
|
+
|
|
532
|
+
args.ligand = sum(args.ligand, []) if args.ligand else []
|
|
533
|
+
monlib = restraints.load_monomer_library(st,
|
|
534
|
+
monomer_dir=args.monlib,
|
|
535
|
+
cif_files=args.ligand)
|
|
536
|
+
model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
|
|
537
|
+
|
|
538
|
+
for chain in st[0]:
|
|
539
|
+
if args.chains and chain.name not in args.chains: continue
|
|
540
|
+
p = chain.get_polymer()
|
|
541
|
+
if not p: continue
|
|
542
|
+
p_type = p.check_polymer_type()
|
|
543
|
+
if p_type not in (gemmi.PolymerType.Dna, gemmi.PolymerType.Rna): continue
|
|
544
|
+
r0 = p[0]
|
|
545
|
+
# TODO: alias
|
|
546
|
+
# TODO: altlocs
|
|
547
|
+
alt = "*"
|
|
548
|
+
if r0.find_atom("OP3", alt): continue
|
|
549
|
+
a_op1 = r0.find_atom("OP1", alt)
|
|
550
|
+
a_op2 = r0.find_atom("OP2", alt)
|
|
551
|
+
a_o5p = r0.find_atom("O5'", alt)
|
|
552
|
+
a_p = r0.find_atom("P", alt)
|
|
553
|
+
if None in (a_op1, a_op2, a_o5p, a_p):
|
|
554
|
+
logger.writeln(f"Error: atoms not found. skipping {chain.name}/{r0}")
|
|
555
|
+
continue
|
|
556
|
+
logger.writeln(f"Adding OP3 to {chain.name}/{r0}")
|
|
557
|
+
a_op3 = r0.add_atom(a_p) # inherit ADP and occupancy
|
|
558
|
+
a_op3.name = "OP3"
|
|
559
|
+
a_op3.element = gemmi.Element("O")
|
|
560
|
+
v1 = a_p.pos - a_op1.pos
|
|
561
|
+
v2 = a_p.pos - a_op2.pos
|
|
562
|
+
v3 = a_p.pos - a_o5p.pos
|
|
563
|
+
v = v1 + v2 + v3
|
|
564
|
+
a_op3.pos = a_p.pos + v / v.length() * 1.517
|
|
565
|
+
|
|
566
|
+
fileio.write_model(st, file_name=args.output)
|
|
567
|
+
# add_op3()
|
|
568
|
+
|
|
569
|
+
def read_map_and_oversample(map_in=None, mtz_in=None, mtz_labs=None, oversample_pixel=None):
|
|
570
|
+
if mtz_in is not None:
|
|
571
|
+
mtz = fileio.read_mmhkl(mtz_in)
|
|
572
|
+
lab_f, lab_phi = mtz_labs.split(",")
|
|
573
|
+
asu = mtz.get_f_phi(lab_f, lab_phi)
|
|
574
|
+
if oversample_pixel is not None:
|
|
575
|
+
d_min = numpy.min(asu.make_d_array())
|
|
576
|
+
sample_rate = d_min / oversample_pixel
|
|
577
|
+
else:
|
|
578
|
+
sample_rate = 3
|
|
579
|
+
gr = asu.transform_f_phi_to_map(sample_rate=sample_rate)
|
|
580
|
+
elif map_in is not None:
|
|
581
|
+
gr = fileio.read_ccp4_map(map_in)[0]
|
|
582
|
+
if oversample_pixel is not None:
|
|
583
|
+
asu = gemmi.transform_map_to_f_phi(gr).prepare_asu_data()
|
|
584
|
+
d_min = numpy.min(asu.make_d_array())
|
|
585
|
+
sample_rate = d_min / oversample_pixel
|
|
586
|
+
gr = asu.transform_f_phi_to_map(sample_rate=sample_rate)
|
|
587
|
+
else:
|
|
588
|
+
raise SystemExit("Invalid input")
|
|
589
|
+
|
|
590
|
+
if oversample_pixel is not None:
|
|
591
|
+
logger.writeln("--oversample_pixel= {} is requested.".format(oversample_pixel))
|
|
592
|
+
logger.writeln(" recalculated grid:")
|
|
593
|
+
logger.writeln(" {:4d} {:4d} {:4d}".format(*gr.shape))
|
|
594
|
+
logger.writeln(" spacings:")
|
|
595
|
+
logger.writeln(" {:.6f} {:.6f} {:.6f}".format(*gr.spacing))
|
|
596
|
+
#maps.write_ccp4_map("{}_oversampled.mrc".format(output_prefix), gr)
|
|
597
|
+
|
|
598
|
+
return gr
|
|
599
|
+
# read_map_and_oversample()
|
|
600
|
+
|
|
601
|
+
def map_peaks(args):
|
|
602
|
+
st = fileio.read_structure(args.model)
|
|
603
|
+
gr = read_map_and_oversample(map_in=args.map, mtz_in=args.mtz, mtz_labs=args.mtz_labels,
|
|
604
|
+
oversample_pixel=args.oversample_pixel)
|
|
605
|
+
gr_sigma = numpy.std(gr)
|
|
606
|
+
if args.abs_level is not None:
|
|
607
|
+
cutoff = args.abs_level
|
|
608
|
+
else:
|
|
609
|
+
cutoff = args.sigma_level * gr_sigma # assuming mean(gr) = 0
|
|
610
|
+
|
|
611
|
+
blobs = gemmi.find_blobs_by_flood_fill(gr, cutoff,
|
|
612
|
+
min_volume=args.min_volume, min_score=0)
|
|
613
|
+
blobs.extend(gemmi.find_blobs_by_flood_fill(gr, cutoff, negate=True,
|
|
614
|
+
min_volume=args.min_volume, min_score=0))
|
|
615
|
+
getpos = dict(peak=lambda x: x.peak_pos,
|
|
616
|
+
centroid=lambda x: x.centroid)[args.blob_pos]
|
|
617
|
+
st_peaks = model.st_from_positions([getpos(b) for b in blobs])
|
|
618
|
+
st_peaks.cell = st.cell
|
|
619
|
+
st_peaks.ncs = st.ncs
|
|
620
|
+
st_peaks.setup_cell_images()
|
|
621
|
+
logger.writeln("{} peaks detected".format(len(blobs)))
|
|
622
|
+
#st_peaks.write_pdb("peaks.pdb")
|
|
623
|
+
|
|
624
|
+
# Filter symmetry related
|
|
625
|
+
ns = gemmi.NeighborSearch(st_peaks[0], st_peaks.cell, 5.).populate()
|
|
626
|
+
cs = gemmi.ContactSearch(1.)
|
|
627
|
+
cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
|
|
628
|
+
results = cs.find_contacts(ns)
|
|
629
|
+
del_idxes = set()
|
|
630
|
+
for r in results:
|
|
631
|
+
if r.partner1.residue.seqid.num not in del_idxes:
|
|
632
|
+
del_idxes.add(r.partner2.residue.seqid.num)
|
|
633
|
+
for i in reversed(sorted(del_idxes)):
|
|
634
|
+
del st_peaks[0][0][i]
|
|
635
|
+
del blobs[i]
|
|
636
|
+
#st_peaks.write_pdb("peaks_asu.pdb")
|
|
637
|
+
logger.writeln("{} peaks after removing symmetry equivalents".format(len(blobs)))
|
|
638
|
+
|
|
639
|
+
# Assign to nearest atom
|
|
640
|
+
ns = gemmi.NeighborSearch(st[0], st.cell, 10.).populate() # blob is rejected if > 10 A. ok?
|
|
641
|
+
peaks = []
|
|
642
|
+
for b in blobs:
|
|
643
|
+
bpos = getpos(b)
|
|
644
|
+
map_val = gr.interpolate_value(bpos)
|
|
645
|
+
if (args.max_volume is not None and b.volume > args.max_volume) or abs(map_val) < cutoff: continue
|
|
646
|
+
x = ns.find_nearest_atom(bpos)
|
|
647
|
+
if x is None: # this should not happen
|
|
648
|
+
logger.writeln("no nearest atom: value={:.2e} volume= {:.2f} pos= {}".format(map_val, b.volume, bpos))
|
|
649
|
+
continue
|
|
650
|
+
chain = st[0][x.chain_idx]
|
|
651
|
+
res = chain[x.residue_idx]
|
|
652
|
+
atom = res[x.atom_idx]
|
|
653
|
+
im = st.cell.find_nearest_image(atom.pos, bpos, gemmi.Asu.Any)
|
|
654
|
+
if st.cell.is_crystal():
|
|
655
|
+
bpos = st.cell.find_nearest_pbc_position(atom.pos, bpos, im.sym_idx)
|
|
656
|
+
elif im.sym_idx > 0:
|
|
657
|
+
bpos = st.cell.orthogonalize(st.cell.images[im.sym_idx - 1].apply(st.cell.fractionalize(bpos)))
|
|
658
|
+
dist = atom.pos.dist(bpos)
|
|
659
|
+
peaks.append((map_val, b.volume, bpos, dist, chain, res, atom))
|
|
660
|
+
|
|
661
|
+
if len(peaks) == 0:
|
|
662
|
+
logger.writeln("No peaks found. Change parameter(s).")
|
|
663
|
+
return
|
|
664
|
+
|
|
665
|
+
# Print and write coot script
|
|
666
|
+
peaks.sort(reverse=True, key=lambda x:(abs(x[0]), x[1]))
|
|
667
|
+
for_coot = []
|
|
668
|
+
for_df = []
|
|
669
|
+
for i, p in enumerate(peaks):
|
|
670
|
+
map_val, volume, mpos, dist, chain, res, atom = p
|
|
671
|
+
mpos_str = "({: 7.2f},{: 7.2f},{: 7.2f})".format(mpos.x, mpos.y, mpos.z)
|
|
672
|
+
atom_name = atom.name + ("." + atom.altloc if atom.altloc != "\0" else "")
|
|
673
|
+
atom_str = "{}/{}/{}".format(chain.name, res.seqid, atom_name)
|
|
674
|
+
if args.abs_level is None:
|
|
675
|
+
map_val /= gr_sigma
|
|
676
|
+
lab_str = "Peak {:4d} value= {: .2e} volume= {:5.1f} pos= {} closest= {:10s} dist= {:.2f}".format(i+1, map_val, volume, mpos_str, atom_str, dist)
|
|
677
|
+
for_coot.append((lab_str, (mpos.x, mpos.y, mpos.z)))
|
|
678
|
+
for_df.append((map_val, volume, mpos.x, mpos.y, mpos.z, chain.name, str(res.seqid), res.name, atom_name, dist))
|
|
679
|
+
df = pandas.DataFrame(for_df, columns=["map_value" if args.abs_level is not None else "sigma_level",
|
|
680
|
+
"volume", "x", "y", "z", "chain", "seqid", "residue", "atom", "dist"])
|
|
681
|
+
logger.writeln(df.to_string())
|
|
682
|
+
with open(args.output_prefix + ".json", "w") as ofs:
|
|
683
|
+
df.to_json(ofs, orient="records", indent=2)
|
|
684
|
+
logger.writeln("saved: {}".format(ofs.name))
|
|
685
|
+
coot_out = args.output_prefix + "_coot.py"
|
|
686
|
+
with open(coot_out, "w") as ofs:
|
|
687
|
+
ofs.write("""\
|
|
688
|
+
from __future__ import absolute_import, division, print_function
|
|
689
|
+
import gtk
|
|
690
|
+
class coot_serval_map_peak_list:
|
|
691
|
+
def __init__(self):
|
|
692
|
+
window = gtk.Window(gtk.WINDOW_TOPLEVEL)
|
|
693
|
+
window.set_title("Map peaks (Servalcat)")
|
|
694
|
+
window.set_default_size(600, 600)
|
|
695
|
+
scrolled_win = gtk.ScrolledWindow()
|
|
696
|
+
scrolled_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_ALWAYS)
|
|
697
|
+
vbox = gtk.VBox(False, 2)
|
|
698
|
+
frame_vbox = gtk.VBox(False, 0)
|
|
699
|
+
frame_vbox.set_border_width(3)
|
|
700
|
+
self.btns = []
|
|
701
|
+
self.data = {}
|
|
702
|
+
self.add_data(frame_vbox)
|
|
703
|
+
scrolled_win.add_with_viewport(frame_vbox)
|
|
704
|
+
vbox.pack_start(scrolled_win, True, True, 0)
|
|
705
|
+
window.add(vbox)
|
|
706
|
+
window.show_all()
|
|
707
|
+
self.toggled(self.btns[0], 0)
|
|
708
|
+
|
|
709
|
+
def toggled(self, btn, i):
|
|
710
|
+
if btn.get_active():
|
|
711
|
+
set_rotation_centre(*self.data[i][1])
|
|
712
|
+
add_status_bar_text(self.data[i][0])
|
|
713
|
+
|
|
714
|
+
def add_data(self, vbox):
|
|
715
|
+
for i, d in enumerate(self.data):
|
|
716
|
+
self.btns.append(gtk.RadioButton(None if i == 0 else self.btns[0], d[0]))
|
|
717
|
+
vbox.pack_start(self.btns[-1], False, False, 0)
|
|
718
|
+
self.btns[-1].connect('toggled', self.toggled, i)
|
|
719
|
+
|
|
720
|
+
gui = coot_serval_map_peak_list()
|
|
721
|
+
""".format(for_coot))
|
|
722
|
+
logger.writeln("\nRun:")
|
|
723
|
+
logger.writeln("coot --script {}".format(coot_out))
|
|
724
|
+
# map_peaks()
|
|
725
|
+
|
|
726
|
+
def h_density_analysis(args):
|
|
727
|
+
#if args.source != "electron":
|
|
728
|
+
# raise SystemExit("Only electron source is supported.")
|
|
729
|
+
model_format = fileio.check_model_format(args.model)
|
|
730
|
+
st = fileio.read_structure(args.model)
|
|
731
|
+
if not st[0].has_hydrogen():
|
|
732
|
+
raise SystemExit("No hydrogen in model.")
|
|
733
|
+
|
|
734
|
+
if args.output_prefix is None:
|
|
735
|
+
args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_hana"
|
|
736
|
+
|
|
737
|
+
gr = read_map_and_oversample(map_in=args.map, mtz_in=args.mtz, mtz_labs=args.mtz_labels,
|
|
738
|
+
oversample_pixel=args.oversample_pixel)
|
|
739
|
+
|
|
740
|
+
if args.abs_level is not None:
|
|
741
|
+
cutoff = args.abs_level
|
|
742
|
+
else:
|
|
743
|
+
cutoff = args.sigma_level * numpy.std(gr) # assuming mean(gr) = 0
|
|
744
|
+
|
|
745
|
+
blobs = gemmi.find_blobs_by_flood_fill(gr, cutoff,
|
|
746
|
+
min_volume=args.min_volume, min_score=0)
|
|
747
|
+
getpos = dict(peak=lambda x: x.peak_pos,
|
|
748
|
+
centroid=lambda x: x.centroid)[args.blob_pos]
|
|
749
|
+
|
|
750
|
+
peaks = [getpos(b).tolist() for b in blobs]
|
|
751
|
+
kdtree = scipy.spatial.cKDTree(peaks)
|
|
752
|
+
found = []
|
|
753
|
+
n_hydr = 0
|
|
754
|
+
h_assigned = [0 for _ in range(len(blobs))]
|
|
755
|
+
st2 = st.clone()
|
|
756
|
+
for ic, chain in enumerate(st[0]):
|
|
757
|
+
for ir, res in enumerate(chain):
|
|
758
|
+
for ia, atom in reversed(list(enumerate(res))):
|
|
759
|
+
if not atom.is_hydrogen(): continue
|
|
760
|
+
n_hydr += 1
|
|
761
|
+
dist, idx = kdtree.query(atom.pos.tolist(), k=1, p=2)
|
|
762
|
+
map_val = gr.interpolate_value(getpos(blobs[idx]))
|
|
763
|
+
if dist < args.max_dist and blobs[idx].volume < args.max_volume and map_val > cutoff:
|
|
764
|
+
found.append((getpos(blobs[idx]), map_val, dist, blobs[idx].volume,
|
|
765
|
+
chain.name, str(res.seqid), res.name,
|
|
766
|
+
atom.name, atom.altloc.replace("\0","")))
|
|
767
|
+
h_assigned[idx] = 1
|
|
768
|
+
else:
|
|
769
|
+
del st2[0][ic][ir][ia]
|
|
770
|
+
|
|
771
|
+
found.sort(key=lambda x: x[1], reverse=True)
|
|
772
|
+
logger.writeln("")
|
|
773
|
+
logger.writeln("Found hydrogen peaks:")
|
|
774
|
+
logger.writeln("dist map vol atom")
|
|
775
|
+
for _, map_val, dist, volume, chain, resi, resn, atom, alt in found:
|
|
776
|
+
logger.writeln("{:.2f} {:.2f} {:.2f} {}/{} {}/{}{}".format(dist, map_val, volume,
|
|
777
|
+
chain, resn, resi,
|
|
778
|
+
atom, "."+alt if alt else ""))
|
|
779
|
+
|
|
780
|
+
logger.writeln("")
|
|
781
|
+
logger.writeln("Result:")
|
|
782
|
+
logger.writeln(" number of hydrogen in the model : {}".format(n_hydr))
|
|
783
|
+
logger.writeln(" number of peaks close to hydrogen: {} ({:.1%})".format(len(found), len(found)/n_hydr))
|
|
784
|
+
logger.writeln("")
|
|
785
|
+
|
|
786
|
+
st_peaks = model.st_from_positions([getpos(b) for b in blobs],
|
|
787
|
+
bs=[gr.interpolate_value(getpos(b)) for b in blobs],
|
|
788
|
+
qs=h_assigned)
|
|
789
|
+
fileio.write_model(st_peaks, file_name="{}_peaks.mmcif".format(args.output_prefix))
|
|
790
|
+
logger.writeln(" this file includes peak positions")
|
|
791
|
+
logger.writeln(" occ=1: hydrogen assigned, occ=0: unassigned.")
|
|
792
|
+
logger.writeln(" B: density value at {}".format(args.blob_pos))
|
|
793
|
+
logger.writeln("")
|
|
794
|
+
|
|
795
|
+
fileio.write_model(st2, file_name="{}_h_with_peak{}".format(args.output_prefix, model_format))
|
|
796
|
+
logger.writeln(" this file is a copy of input model, where hydrogen atoms without peaks are removed.")
|
|
797
|
+
# h_density_analysis()
|
|
798
|
+
|
|
799
|
+
def fix_link(args):
|
|
800
|
+
st = fileio.read_structure(args.model)
|
|
801
|
+
model_format = fileio.check_model_format(args.model)
|
|
802
|
+
|
|
803
|
+
if not args.output:
|
|
804
|
+
tmp = fileio.splitext(os.path.basename(args.model))[0]
|
|
805
|
+
args.output = tmp + "_fixlink" + model_format
|
|
806
|
+
logger.writeln("Output file: {}".format(args.output))
|
|
807
|
+
|
|
808
|
+
args.ligand = sum(args.ligand, []) if args.ligand else []
|
|
809
|
+
monlib = restraints.load_monomer_library(st,
|
|
810
|
+
monomer_dir=args.monlib,
|
|
811
|
+
cif_files=args.ligand)
|
|
812
|
+
model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
|
|
813
|
+
restraints.find_and_fix_links(st, monlib, bond_margin=args.bond_margin,
|
|
814
|
+
metal_margin=args.metal_margin)
|
|
815
|
+
fileio.write_model(st, file_name=args.output)
|
|
816
|
+
# fix_link()
|
|
817
|
+
|
|
818
|
+
def merge_models(args):
|
|
819
|
+
logger.writeln("Reading file 1: {}".format(args.models[0]))
|
|
820
|
+
st = fileio.read_structure(args.models[0])
|
|
821
|
+
logger.writeln(" chains {}".format(" ".join([c.name for c in st[0]])))
|
|
822
|
+
|
|
823
|
+
for i, f in enumerate(args.models[1:]):
|
|
824
|
+
logger.writeln("Reading file {:3d}: {}".format(i+2, f))
|
|
825
|
+
st2 = fileio.read_structure(f)
|
|
826
|
+
for c in st2[0]:
|
|
827
|
+
org_id = c.name
|
|
828
|
+
c2 = st[0].add_chain(c, unique_name=True)
|
|
829
|
+
if c.name != c2.name:
|
|
830
|
+
logger.writeln(" chain {} merged (ID changed to {})".format(c.name, c2.name))
|
|
831
|
+
else:
|
|
832
|
+
logger.writeln(" chain {} merged".format(c.name))
|
|
833
|
+
|
|
834
|
+
fileio.write_model(st, file_name=args.output)
|
|
835
|
+
# merge_models()
|
|
836
|
+
|
|
837
|
+
def merge_dicts(args):
|
|
838
|
+
fileio.merge_ligand_cif(args.cifs, args.output)
|
|
839
|
+
# merge_dicts()
|
|
840
|
+
|
|
841
|
+
def geometry(args):
|
|
842
|
+
if args.ligand: args.ligand = sum(args.ligand, [])
|
|
843
|
+
if not args.output_prefix: args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_geom"
|
|
844
|
+
keywords = []
|
|
845
|
+
if args.keywords or args.keyword_file:
|
|
846
|
+
if args.keywords: keywords = sum(args.keywords, [])
|
|
847
|
+
if args.keyword_file: keywords.extend(l for f in sum(args.keyword_file, []) for l in open(f))
|
|
848
|
+
params = refmac_keywords.parse_keywords(keywords)
|
|
849
|
+
st = fileio.read_structure(args.model)
|
|
850
|
+
if args.ignore_h:
|
|
851
|
+
st.remove_hydrogens()
|
|
852
|
+
try:
|
|
853
|
+
monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
|
|
854
|
+
stop_for_unknowns=True, params=params)
|
|
855
|
+
except RuntimeError as e:
|
|
856
|
+
raise SystemExit("Error: {}".format(e))
|
|
857
|
+
|
|
858
|
+
model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
|
|
859
|
+
restraints.find_and_fix_links(st, monlib)
|
|
860
|
+
try:
|
|
861
|
+
topo, _ = restraints.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.NoChange,
|
|
862
|
+
check_hydrogen=True, params=params)
|
|
863
|
+
except RuntimeError as e:
|
|
864
|
+
raise SystemExit("Error: {}".format(e))
|
|
865
|
+
|
|
866
|
+
refine_params = RefineParams(st, refine_xyz=True)
|
|
867
|
+
if args.selection:
|
|
868
|
+
sel = gemmi.Selection(args.selection)
|
|
869
|
+
geom_w = [0. for _ in range(st[0].count_atom_sites())]
|
|
870
|
+
n = 0
|
|
871
|
+
for chain in sel.chains(st[0]):
|
|
872
|
+
for res in sel.residues(chain):
|
|
873
|
+
for atom in sel.atoms(res):
|
|
874
|
+
geom_w[atom.serial-1] = 1.
|
|
875
|
+
n += 1
|
|
876
|
+
logger.writeln("Using selection '{}': {} atoms out of {}".format(args.selection, n, len(geom_w)))
|
|
877
|
+
refine_params.geom_weights[:] = geom_w
|
|
878
|
+
|
|
879
|
+
geom = Geom(st, topo, monlib, refine_params,
|
|
880
|
+
params=params, use_nucleus=args.nucleus)
|
|
881
|
+
for k in geom.outlier_sigmas: geom.outlier_sigmas[k] = args.sigma
|
|
882
|
+
geom.setup_nonbonded()
|
|
883
|
+
ret = geom.show_model_stats()
|
|
884
|
+
|
|
885
|
+
with open(args.output_prefix + "_summary.json", "w") as ofs:
|
|
886
|
+
ret["summary"].to_json(ofs, indent=2)
|
|
887
|
+
logger.writeln("saved: {}".format(ofs.name))
|
|
888
|
+
with open(args.output_prefix + "_outliers.json", "w") as ofs:
|
|
889
|
+
for k in ret["outliers"]:
|
|
890
|
+
ret["outliers"][k] = ret["outliers"][k].to_dict(orient="records")
|
|
891
|
+
json.dump(ret["outliers"], ofs, indent=2)
|
|
892
|
+
logger.writeln("saved: {}".format(ofs.name))
|
|
893
|
+
|
|
894
|
+
if args.dump_all: # for debug, unfinished
|
|
895
|
+
dd = {"bonds": [], "vdw": []}
|
|
896
|
+
for t in geom.geom.bonds:
|
|
897
|
+
dd["bonds"].append({"atoms":[str(geom.lookup[x]) for x in t.atoms],
|
|
898
|
+
"ideals": [{"ideal":x.value, "sigma":x.sigma} for x in t.values],
|
|
899
|
+
"alpha": t.alpha, "type": t.type,
|
|
900
|
+
"sym_idx": t.sym_idx, "pbc_shift": t.pbc_shift})
|
|
901
|
+
for t in geom.geom.vdws:
|
|
902
|
+
dd["vdw"].append({"atoms":[str(geom.lookup[x]) for x in t.atoms],
|
|
903
|
+
"ideal": t.value, "sigma": t.sigma,
|
|
904
|
+
"type": t.type, "sym_idx": t.sym_idx, "pbc_shift": t.pbc_shift})
|
|
905
|
+
with open(args.output_prefix + "_restraints.json", "w") as ofs:
|
|
906
|
+
json.dump(dd, ofs)
|
|
907
|
+
logger.writeln("saved: {}".format(ofs.name))
|
|
908
|
+
if args.check_skew:
|
|
909
|
+
logger.writeln("\nChecking skewness of bond length deviation")
|
|
910
|
+
# better to ignore hydrogen
|
|
911
|
+
tab = geom.geom.reporting.get_bond_outliers(use_nucleus=geom.use_nucleus, min_z=0)
|
|
912
|
+
for i in range(2):
|
|
913
|
+
tab[f"atom{i+1}"] = [str(geom.lookup[r.atoms[i]]) for r in tab["restr"]]
|
|
914
|
+
del tab["restr"]
|
|
915
|
+
df = pandas.DataFrame(tab)
|
|
916
|
+
df["dev"] = df["value"] - df["ideal"]
|
|
917
|
+
df = df.reindex(df.dev.abs().sort_values(ascending=False).index)
|
|
918
|
+
logger.writeln("Bond length deviations:")
|
|
919
|
+
logger.writeln(df.to_string(max_rows=20))
|
|
920
|
+
q1, q2, q3 = numpy.percentile(df["dev"], [25, 50, 75])
|
|
921
|
+
sk2 = (q1 + q3 - 2 * q2) / (q3 - q1)
|
|
922
|
+
logger.writeln("bond_dev_median= {:.6f}".format(q2))
|
|
923
|
+
logger.writeln("bond_dev_skew= {:.4f}".format(df["dev"].skew()))
|
|
924
|
+
logger.writeln("bond_dev_sk2= {:.4f}".format(sk2))
|
|
925
|
+
with open(args.output_prefix + "_bond_dev.html", "w") as ofs:
|
|
926
|
+
ofs.write("""\
|
|
927
|
+
<html>
|
|
928
|
+
<head>
|
|
929
|
+
<meta charset="utf-8" />
|
|
930
|
+
<script src="https://cdn.plot.ly/plotly-2.20.0.min.js" charset="utf-8"></script>
|
|
931
|
+
</head>
|
|
932
|
+
<body>
|
|
933
|
+
<div id="hist"></div>
|
|
934
|
+
<script>
|
|
935
|
+
var trace = {
|
|
936
|
+
x: %s,
|
|
937
|
+
type: 'histogram'
|
|
938
|
+
};
|
|
939
|
+
var layout = {
|
|
940
|
+
title: "median: %.4f, sk2: %.4f",
|
|
941
|
+
xaxis: {title: "bond distance - ideal"},
|
|
942
|
+
yaxis: {title: "count"},
|
|
943
|
+
shapes: [{
|
|
944
|
+
type: 'line',
|
|
945
|
+
yref: 'paper',
|
|
946
|
+
x0: 0, y0: 0,
|
|
947
|
+
x1: 0, y1: 1}]
|
|
948
|
+
};
|
|
949
|
+
target = document.getElementById('hist');
|
|
950
|
+
Plotly.newPlot(target, [trace], layout);
|
|
951
|
+
</script>
|
|
952
|
+
</body>
|
|
953
|
+
</html>
|
|
954
|
+
""" % (str(list(df.dev)), q2, sk2))
|
|
955
|
+
logger.writeln("check histogram: {}".format(ofs.name))
|
|
956
|
+
|
|
957
|
+
# Note that this modifies st
|
|
958
|
+
if args.per_atom_score_as_b:
|
|
959
|
+
model_format = fileio.check_model_format(args.model)
|
|
960
|
+
peratom = geom.geom.reporting.per_atom_score(len(geom.atoms), geom.use_nucleus, "mean")
|
|
961
|
+
for i, score in enumerate(peratom["total"]):
|
|
962
|
+
geom.atoms[i].b_iso = score
|
|
963
|
+
fileio.write_model(st, file_name="{}_per_atom_score{}".format(args.output_prefix, model_format))
|
|
964
|
+
# geometry()
|
|
965
|
+
|
|
966
|
+
def compare_conf(args):
|
|
967
|
+
def angle_abs_diff(a, b, full=360.):
|
|
968
|
+
# from gemmi/math.hpp
|
|
969
|
+
d = abs(a - b)
|
|
970
|
+
if d > full:
|
|
971
|
+
d -= numpy.floor(d / full) * full
|
|
972
|
+
return min(d, full - d)
|
|
973
|
+
# angle_abs_diff()
|
|
974
|
+
|
|
975
|
+
if args.ligand: args.ligand = sum(args.ligand, [])
|
|
976
|
+
st = None
|
|
977
|
+
for i, f in enumerate(args.models):
|
|
978
|
+
tmp = fileio.read_structure(f)
|
|
979
|
+
if len(args.models) > 1:
|
|
980
|
+
for chain in tmp[0]:
|
|
981
|
+
chain.name = f"{i+1}_{chain.name}"
|
|
982
|
+
if i == 0:
|
|
983
|
+
st = tmp
|
|
984
|
+
else:
|
|
985
|
+
for chain in tmp[0]:
|
|
986
|
+
st[0].add_chain(chain)
|
|
987
|
+
try:
|
|
988
|
+
monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
|
|
989
|
+
stop_for_unknowns=True)
|
|
990
|
+
except RuntimeError as e:
|
|
991
|
+
raise SystemExit(f"Error: {e}")
|
|
992
|
+
|
|
993
|
+
model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
|
|
994
|
+
try:
|
|
995
|
+
topo, _ = restraints.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.NoChange,
|
|
996
|
+
check_hydrogen=False)
|
|
997
|
+
except RuntimeError as e:
|
|
998
|
+
raise SystemExit(f"Error: {e}")
|
|
999
|
+
ncslist = restraints.prepare_ncs_restraints(st)
|
|
1000
|
+
lookup = {x.atom: x for x in st[0].all()}
|
|
1001
|
+
ptypes = {x.name: x.polymer_type for x in st.entities}
|
|
1002
|
+
resn_lookup = {(chain.name, res.seqid): res.name for chain in st[0] for res in chain}
|
|
1003
|
+
confs = {}
|
|
1004
|
+
for t in topo.torsions:
|
|
1005
|
+
cra = lookup[t.atoms[0]]
|
|
1006
|
+
ptype = ptypes[cra.residue.entity_id]
|
|
1007
|
+
is_peptide = ptype in (gemmi.PolymerType.PeptideL, gemmi.PolymerType.PeptideD)
|
|
1008
|
+
is_peptide_tors = t.restr.label.startswith("chi") or t.restr.label in ("omega", "phi", "psi")
|
|
1009
|
+
is_na = ptype in (gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid)
|
|
1010
|
+
is_na_tors = t.restr.label in ("C2e-chi", "alpha", "beta", "gamma", "C2e-nyu0", "epsilon", "zeta")
|
|
1011
|
+
if (is_peptide and is_peptide_tors) or (is_na and is_na_tors):
|
|
1012
|
+
confs.setdefault(cra.chain.name, {}).setdefault(cra.residue.seqid, {})[t.restr.label] = numpy.rad2deg(t.calculate())
|
|
1013
|
+
fulls = {("ARG", "chi5"): 180., ("TYR", "chi2"): 180., ("PHE", "chi2"): 180., ("ASP", "chi2"): 180., ("GLU", "chi3"): 180.}
|
|
1014
|
+
ret = []
|
|
1015
|
+
for_coot = []
|
|
1016
|
+
for ncs in ncslist.ncss:
|
|
1017
|
+
c1, c2 = ncs.chains
|
|
1018
|
+
if args.same_chain and len(args.models) > 1 and c1[c1.index("_"):] != c2[c2.index("_"):]:
|
|
1019
|
+
continue
|
|
1020
|
+
for s1, s2 in ncs.seqids:
|
|
1021
|
+
if c1 in confs and s1 in confs[c1] and c2 in confs and s2 in confs[c2]:
|
|
1022
|
+
conf1, conf2 = confs[c1][s1], confs[c2][s2]
|
|
1023
|
+
resn = resn_lookup[(c1, s1)]
|
|
1024
|
+
for t in conf1:
|
|
1025
|
+
if t in conf2:
|
|
1026
|
+
d = float(angle_abs_diff(conf1[t], conf2[t], fulls.get((resn, t), 360.)))
|
|
1027
|
+
ret.append((c1, s1, c2, s2, resn, t, conf1[t], conf2[t], d))
|
|
1028
|
+
if d > args.min_diff:
|
|
1029
|
+
for_coot.append((c1, s1.num, c2, s2.num, resn, t, d))
|
|
1030
|
+
df = pandas.DataFrame(ret, columns=["chain_1", "seq_1", "chain_2", "seq_2", "resn", "label", "conf_1", "conf_2", "diff"])
|
|
1031
|
+
df.sort_values("diff", ascending=False, inplace=True)
|
|
1032
|
+
logger.writeln(f"\nList of torsion angle differences (>{args.min_diff})")
|
|
1033
|
+
logger.writeln(df[df["diff"] > args.min_diff].to_string(index=False))
|
|
1034
|
+
|
|
1035
|
+
for_coot.sort(key=lambda x:-x[-1])
|
|
1036
|
+
coot_out = args.output_prefix + "_coot.py"
|
|
1037
|
+
with open(coot_out, "w") as ofs:
|
|
1038
|
+
# https://python-gtk-3-tutorial.readthedocs.io/en/latest/treeview.html
|
|
1039
|
+
ofs.write("""\
|
|
1040
|
+
from __future__ import absolute_import, division, print_function
|
|
1041
|
+
import re
|
|
1042
|
+
import gtk
|
|
1043
|
+
class coot_serval_conf_list:
|
|
1044
|
+
def __init__(self):
|
|
1045
|
+
window = gtk.Window(gtk.WINDOW_TOPLEVEL)
|
|
1046
|
+
window.set_title("Different conformations (Servalcat)")
|
|
1047
|
+
window.set_default_size(600, 600)
|
|
1048
|
+
scrolled_win = gtk.ScrolledWindow()
|
|
1049
|
+
scrolled_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_ALWAYS)
|
|
1050
|
+
vbox = gtk.VBox(False, 2)
|
|
1051
|
+
self.liststore = gtk.ListStore(str, int, str, int, str, str, float)
|
|
1052
|
+
self.filter = self.liststore.filter_new()
|
|
1053
|
+
self.treeview = gtk.TreeView(model=self.filter)
|
|
1054
|
+
for i, column_title in enumerate(["chain_1", "seq_1", "chain_2", "seq_2", "resn", "label", "diff"]):
|
|
1055
|
+
renderer = gtk.CellRendererText()
|
|
1056
|
+
column = gtk.TreeViewColumn(column_title, renderer, text=i)
|
|
1057
|
+
self.treeview.append_column(column)
|
|
1058
|
+
self.data = {}
|
|
1059
|
+
self.add_data()
|
|
1060
|
+
scrolled_win.add_with_viewport(self.treeview) # add?
|
|
1061
|
+
vbox.pack_start(scrolled_win, True, True, 0)
|
|
1062
|
+
window.add(vbox)
|
|
1063
|
+
window.show_all()
|
|
1064
|
+
self.treeview.connect("row-activated", self.on_row_activated)
|
|
1065
|
+
|
|
1066
|
+
def on_row_activated(self, treeview, path, column):
|
|
1067
|
+
assert len(path) == 1
|
|
1068
|
+
col_idx = [i for i, c in enumerate(treeview.get_columns()) if column == c][0]
|
|
1069
|
+
row = self.liststore[path[0]]
|
|
1070
|
+
if col_idx < 2:
|
|
1071
|
+
chain, resi = row[0], row[1]
|
|
1072
|
+
elif col_idx < 4:
|
|
1073
|
+
chain, resi = row[2], row[3]
|
|
1074
|
+
else:
|
|
1075
|
+
return
|
|
1076
|
+
if re.search("^[0-9]+_[0-9A-Za-z]", chain):
|
|
1077
|
+
chain = chain[chain.index("_")+1:]
|
|
1078
|
+
imol = active_atom_spec()[1][0]
|
|
1079
|
+
for name in (" CA ", " C1'"):
|
|
1080
|
+
a = get_atom(imol, chain, resi, "", name)
|
|
1081
|
+
if a:
|
|
1082
|
+
set_rotation_center(*a[2])
|
|
1083
|
+
break
|
|
1084
|
+
|
|
1085
|
+
def add_data(self):
|
|
1086
|
+
for i, d in enumerate(self.data):
|
|
1087
|
+
self.liststore.append(d)
|
|
1088
|
+
|
|
1089
|
+
gui = coot_serval_conf_list()
|
|
1090
|
+
""".format(for_coot))
|
|
1091
|
+
logger.writeln("\nRun:")
|
|
1092
|
+
logger.writeln(f"coot --script {coot_out}")
|
|
1093
|
+
# compare_conf()
|
|
1094
|
+
|
|
1095
|
+
def adp_stats(args):
|
|
1096
|
+
if not args.output_prefix: args.output_prefix = fileio.splitext(os.path.basename(args.model))[0] + "_adp"
|
|
1097
|
+
st = fileio.read_structure(args.model)
|
|
1098
|
+
model.adp_analysis(st)
|
|
1099
|
+
b_all = [cra.atom.b_iso for cra in st[0].all() if cra.atom.occ > 0]
|
|
1100
|
+
|
|
1101
|
+
# bin width from Freedman–Diaconis rule
|
|
1102
|
+
qs = numpy.quantile(b_all, [0, 0.25, 0.75, 1])
|
|
1103
|
+
bin_h = 2 * (qs[2] - qs[1]) / len(b_all)**(1/3.)
|
|
1104
|
+
|
|
1105
|
+
# for plotly
|
|
1106
|
+
traces = []
|
|
1107
|
+
traces.append("x: [%s], type: 'histogram', name: 'All', xbins: {size: %f}"
|
|
1108
|
+
% (",".join("%.2f"%x for x in b_all), bin_h))
|
|
1109
|
+
if len(st[0]) > 1:
|
|
1110
|
+
b_chain = {}
|
|
1111
|
+
for c in st[0]:
|
|
1112
|
+
b_chain.setdefault(c.name, []).extend(a.b_iso for r in c for a in r if a.occ > 0)
|
|
1113
|
+
for c in b_chain:
|
|
1114
|
+
bs = ",".join("%.2f" % x for x in b_chain[c])
|
|
1115
|
+
traces.append("x: [%s], type: 'histogram', name: 'Chain %s'" % (bs, c))
|
|
1116
|
+
with open(args.output_prefix + "_hist.html", "w") as ofs:
|
|
1117
|
+
ofs.write("""\
|
|
1118
|
+
<html>
|
|
1119
|
+
<head>
|
|
1120
|
+
<meta charset="utf-8" />
|
|
1121
|
+
<script src="https://cdn.plot.ly/plotly-2.20.0.min.js" charset="utf-8"></script>
|
|
1122
|
+
</head>
|
|
1123
|
+
<body>
|
|
1124
|
+
<div id="hist"></div>
|
|
1125
|
+
<script>
|
|
1126
|
+
""")
|
|
1127
|
+
for i, t in enumerate(traces):
|
|
1128
|
+
ofs.write("var trace%d = {%s};\n" % (i+1, t))
|
|
1129
|
+
ofs.write("""\
|
|
1130
|
+
var layout = {
|
|
1131
|
+
title: "isotropic B histogram",
|
|
1132
|
+
xaxis: {title: "B"},
|
|
1133
|
+
yaxis: {title: "count"},
|
|
1134
|
+
barmode: "stack"
|
|
1135
|
+
};
|
|
1136
|
+
target = document.getElementById('hist');
|
|
1137
|
+
Plotly.newPlot(target, [%s], layout);
|
|
1138
|
+
</script>
|
|
1139
|
+
</body>
|
|
1140
|
+
</html>
|
|
1141
|
+
""" % (",".join("trace%d" % (i+1) for i in range(len(traces)))))
|
|
1142
|
+
logger.writeln("check histogram: {}".format(ofs.name))
|
|
1143
|
+
# adp_stats()
|
|
1144
|
+
|
|
1145
|
+
def show_power(args):
|
|
1146
|
+
maps_in = []
|
|
1147
|
+
if args.map:
|
|
1148
|
+
print(args.map)
|
|
1149
|
+
print(sum(args.map, []))
|
|
1150
|
+
maps_in = [(f,) for f in sum(args.map, [])]
|
|
1151
|
+
|
|
1152
|
+
if args.halfmaps:
|
|
1153
|
+
args.halfmaps = sum(args.halfmaps, [])
|
|
1154
|
+
if len(args.halfmaps)%2 != 0:
|
|
1155
|
+
raise RuntimeError("Number of half maps is not even.")
|
|
1156
|
+
maps_in.extend([(args.halfmaps[2*i],args.halfmaps[2*i+1]) for i in range(len(args.halfmaps)//2)])
|
|
1157
|
+
|
|
1158
|
+
if args.mask:
|
|
1159
|
+
mask = fileio.read_ccp4_map(args.mask)[0]
|
|
1160
|
+
else:
|
|
1161
|
+
mask = None
|
|
1162
|
+
|
|
1163
|
+
hkldata = None
|
|
1164
|
+
labs = []
|
|
1165
|
+
for mapin in maps_in: # TODO rewrite in faster way
|
|
1166
|
+
ms = [fileio.read_ccp4_map(f) for f in mapin]
|
|
1167
|
+
d_min = args.resolution
|
|
1168
|
+
if d_min is None:
|
|
1169
|
+
d_min = maps.nyquist_resolution(ms[0][0])
|
|
1170
|
+
logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(d_min))
|
|
1171
|
+
tmp = maps.mask_and_fft_maps(ms, d_min, mask)
|
|
1172
|
+
labs.append("F{:02d}".format(len(labs)+1))
|
|
1173
|
+
tmp.df.rename(columns=dict(FP=labs[-1]), inplace=True)
|
|
1174
|
+
if hkldata is None:
|
|
1175
|
+
hkldata = tmp
|
|
1176
|
+
else:
|
|
1177
|
+
if hkldata.cell.parameters != tmp.cell.parameters: raise RuntimeError("Different unit cell!")
|
|
1178
|
+
hkldata.merge(tmp.df[["H","K","L",labs[-1]]])
|
|
1179
|
+
|
|
1180
|
+
if not labs:
|
|
1181
|
+
raise SystemExit("No map files given. Exiting.")
|
|
1182
|
+
|
|
1183
|
+
hkldata.setup_relion_binning("stat")
|
|
1184
|
+
|
|
1185
|
+
ofs = open(args.output_prefix+".log", "w")
|
|
1186
|
+
ofs.write("Input:\n")
|
|
1187
|
+
for i in range(len(maps_in)):
|
|
1188
|
+
ofs.write("{} from {}\n".format(labs[i], " ".join(maps_in[i])))
|
|
1189
|
+
ofs.write("\n")
|
|
1190
|
+
|
|
1191
|
+
ofs.write("""$TABLE: Power spectrum :
|
|
1192
|
+
$GRAPHS
|
|
1193
|
+
: log10(Mn(|F|^2)) :A:1,{}:
|
|
1194
|
+
$$
|
|
1195
|
+
1/resol^2 n d_max d_min {}
|
|
1196
|
+
$$
|
|
1197
|
+
$$
|
|
1198
|
+
""".format(",".join([str(i+5) for i in range(len(labs))]), " ".join(labs)))
|
|
1199
|
+
print(hkldata.df)
|
|
1200
|
+
abssqr = dict((lab, numpy.abs(hkldata.df[lab].to_numpy())**2) for lab in labs)
|
|
1201
|
+
for i_bin, idxes in hkldata.binned("stat"):
|
|
1202
|
+
bin_d_min = hkldata.binned_df["stat"].d_min[i_bin]
|
|
1203
|
+
bin_d_max = hkldata.binned_df["stat"].d_max[i_bin]
|
|
1204
|
+
ofs.write("{:.4f} {:7d} {:7.3f} {:7.3f}".format(1/bin_d_min**2, len(idxes), bin_d_max, bin_d_min,))
|
|
1205
|
+
for lab in labs:
|
|
1206
|
+
pwr = numpy.log10(numpy.average(abssqr[lab][idxes]))
|
|
1207
|
+
ofs.write(" {:.4e}".format(pwr))
|
|
1208
|
+
ofs.write("\n")
|
|
1209
|
+
ofs.write("$$\n")
|
|
1210
|
+
ofs.close()
|
|
1211
|
+
# show_power()
|
|
1212
|
+
|
|
1213
|
+
def fcalc(args):
|
|
1214
|
+
if (args.auto_box_with_padding, args.cell).count(None) == 0:
|
|
1215
|
+
raise SystemExit("Error: you cannot specify both --auto_box_with_padding and --cell")
|
|
1216
|
+
|
|
1217
|
+
if args.ligand: args.ligand = sum(args.ligand, [])
|
|
1218
|
+
if not args.output_prefix: args.output_prefix = "{}_fcalc_{}".format(fileio.splitext(os.path.basename(args.model))[0], args.source)
|
|
1219
|
+
|
|
1220
|
+
st = fileio.read_structure(args.model)
|
|
1221
|
+
ccu = model.CustomCoefUtil()
|
|
1222
|
+
if not args.keep_charges:
|
|
1223
|
+
model.remove_charge([st])
|
|
1224
|
+
if args.source == "custom":
|
|
1225
|
+
ccu.read_from_cif(st, args.model)
|
|
1226
|
+
ccu.show_info()
|
|
1227
|
+
ccu.set_coeffs(st)
|
|
1228
|
+
else:
|
|
1229
|
+
model.check_atomsf([st], args.source)
|
|
1230
|
+
if not args.no_expand_ncs:
|
|
1231
|
+
model.expand_ncs(st)
|
|
1232
|
+
|
|
1233
|
+
if args.cell is not None:
|
|
1234
|
+
st.cell = gemmi.UnitCell(*args.cell)
|
|
1235
|
+
elif args.auto_box_with_padding is not None:
|
|
1236
|
+
st.cell = model.box_from_model(st[0], args.auto_box_with_padding)
|
|
1237
|
+
st.spacegroup_hm = "P 1"
|
|
1238
|
+
logger.writeln("Box size from the model with padding of {}: {}".format(args.auto_box_with_padding, st.cell.parameters))
|
|
1239
|
+
|
|
1240
|
+
if not st.cell.is_crystal():
|
|
1241
|
+
raise SystemExit("ERROR: No unit cell information. Give --cell or --auto_box_with_padding.")
|
|
1242
|
+
|
|
1243
|
+
if args.source=="electron" and st[0].has_hydrogen():
|
|
1244
|
+
monlib = restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
|
|
1245
|
+
stop_for_unknowns=False)
|
|
1246
|
+
else:
|
|
1247
|
+
monlib = None
|
|
1248
|
+
|
|
1249
|
+
if args.method == "fft":
|
|
1250
|
+
fc_asu = model.calc_fc_fft(st, args.resolution, cutoff=args.cutoff, rate=args.rate,
|
|
1251
|
+
mott_bethe=args.source=="electron",
|
|
1252
|
+
monlib=monlib, source=args.source)
|
|
1253
|
+
else:
|
|
1254
|
+
fc_asu = model.calc_fc_direct(st, args.resolution, source=args.source,
|
|
1255
|
+
mott_bethe=args.source=="electron", monlib=monlib)
|
|
1256
|
+
|
|
1257
|
+
hkldata = hkl.hkldata_from_asu_data(fc_asu, "FC")
|
|
1258
|
+
if args.as_intensity:
|
|
1259
|
+
hkldata.df["IC"] = numpy.abs(hkldata.df.FC)**2
|
|
1260
|
+
labout = ["IC"]
|
|
1261
|
+
if args.add_dummy_sigma:
|
|
1262
|
+
hkldata.df["SIGIC"] = 1.
|
|
1263
|
+
labout.append("SIGIC")
|
|
1264
|
+
else:
|
|
1265
|
+
labout = ["FC"]
|
|
1266
|
+
if args.add_dummy_sigma:
|
|
1267
|
+
hkldata.df["SIGFC"] = 1.
|
|
1268
|
+
labout.append("SIGFC")
|
|
1269
|
+
|
|
1270
|
+
hkldata.write_mtz(args.output_prefix+".mtz", labout, types=dict(IC="J", SIGIC="Q", SIGFC="Q"))
|
|
1271
|
+
# fcalc()
|
|
1272
|
+
|
|
1273
|
+
def nemap(args):
|
|
1274
|
+
from servalcat.spa import fofc
|
|
1275
|
+
|
|
1276
|
+
if (args.trim or args.trim_mtz) and args.mask is None:
|
|
1277
|
+
raise SystemExit("\nError: You need to give --mask as you requested --trim or --trim_mtz.\n")
|
|
1278
|
+
|
|
1279
|
+
if args.mask:
|
|
1280
|
+
mask = fileio.read_ccp4_map(args.mask)[0]
|
|
1281
|
+
else:
|
|
1282
|
+
mask = None
|
|
1283
|
+
|
|
1284
|
+
halfmaps = fileio.read_halfmaps(args.halfmaps, pixel_size=args.pixel_size)
|
|
1285
|
+
if args.resolution is None:
|
|
1286
|
+
args.resolution = maps.nyquist_resolution(halfmaps[0][0])
|
|
1287
|
+
logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(args.resolution))
|
|
1288
|
+
|
|
1289
|
+
d_min = args.resolution
|
|
1290
|
+
if args.local_fourier_weighting_with > 0:
|
|
1291
|
+
d_min = 1 / (args.local_fourier_weighting_with + 1 / d_min)
|
|
1292
|
+
logger.writeln("adjusting d_min= {:.2f} for local correlation".format(d_min))
|
|
1293
|
+
hkldata = maps.mask_and_fft_maps(halfmaps, d_min, mask)
|
|
1294
|
+
|
|
1295
|
+
if args.local_fourier_weighting_with > 0:
|
|
1296
|
+
asu1 = hkldata.as_asu_data("F_map1")
|
|
1297
|
+
asu2 = hkldata.as_asu_data("F_map2")
|
|
1298
|
+
size = asu1.get_size_for_hkl(sample_rate=3)
|
|
1299
|
+
logger.writeln("using grid {}".format(size))
|
|
1300
|
+
gr1 = asu1.get_f_phi_on_grid(size)
|
|
1301
|
+
gr2 = asu2.get_f_phi_on_grid(size)
|
|
1302
|
+
kernel = ext.hard_sphere_kernel_recgrid(size, asu1.unit_cell, args.local_fourier_weighting_with)
|
|
1303
|
+
cc = maps.local_cc(gr1, gr2, kernel.array.real, method="simple")
|
|
1304
|
+
cc.array[cc.array < 0] = 0 # negative cc cannot be used anyway
|
|
1305
|
+
cc.array[:] = 2 * cc.array.real / (1 + cc.array.real) # to full map cc
|
|
1306
|
+
hkldata.df["cc"] = numpy.real(cc.get_value_by_hkl(hkldata.miller_array()))
|
|
1307
|
+
grf = type(gr1)((gr1.array + gr2.array) / 2, gr1.unit_cell, gr1.spacegroup)
|
|
1308
|
+
var_f = maps.local_var(grf, kernel.array.real, method="simple")
|
|
1309
|
+
hkldata.df["var_f"] = numpy.real(var_f.get_value_by_hkl(hkldata.miller_array()))
|
|
1310
|
+
if args.B is not None:
|
|
1311
|
+
k2_l = numpy.exp(-args.B / hkldata.d_spacings()**2 / 2)
|
|
1312
|
+
hkldata.df.cc = k2_l * hkldata.df.cc / (1 + (k2_l - 1) * hkldata.df.cc)
|
|
1313
|
+
hkldata.df["FWT"] = hkldata.df.FP * numpy.sqrt(hkldata.df.cc / hkldata.df.var_f)
|
|
1314
|
+
hkldata.df["kernel"] = numpy.real(kernel.get_value_by_hkl(hkldata.miller_array()))
|
|
1315
|
+
hkldata.write_mtz(args.output_prefix+"_cc.mtz", ["cc", "kernel"])
|
|
1316
|
+
hkldata = hkldata.copy(d_min=args.resolution)
|
|
1317
|
+
map_labs = ["FWT"]
|
|
1318
|
+
else:
|
|
1319
|
+
hkldata.setup_relion_binning("ml")
|
|
1320
|
+
maps.calc_noise_var_from_halfmaps(hkldata)
|
|
1321
|
+
map_labs = fofc.calc_maps(hkldata, B=args.B, has_halfmaps=True, half1_only=args.half1_only,
|
|
1322
|
+
no_fsc_weights=args.no_fsc_weights, sharpening_b=args.sharpening_b)
|
|
1323
|
+
fofc.write_files(hkldata, map_labs, grid_start=halfmaps[0][1], stats_str=None,
|
|
1324
|
+
mask=mask, output_prefix=args.output_prefix,
|
|
1325
|
+
trim_map=args.trim, trim_mtz=args.trim_mtz)
|
|
1326
|
+
# nemap()
|
|
1327
|
+
|
|
1328
|
+
def blur(args):
|
|
1329
|
+
if args.output_prefix is None:
|
|
1330
|
+
args.output_prefix = fileio.splitext(os.path.basename(args.hklin))[0]
|
|
1331
|
+
|
|
1332
|
+
if fileio.is_mmhkl_file(args.hklin):
|
|
1333
|
+
mtz = fileio.read_mmhkl(args.hklin)
|
|
1334
|
+
hkl.blur_mtz(mtz, args.B)
|
|
1335
|
+
suffix = ("_blur" if args.B > 0 else "_sharpen") + "_{:.2f}.mtz".format(abs(args.B))
|
|
1336
|
+
mtz.write_to_file(args.output_prefix+suffix)
|
|
1337
|
+
logger.writeln("Written: {}".format(args.output_prefix+suffix))
|
|
1338
|
+
else:
|
|
1339
|
+
raise SystemExit("ERROR: Unsupported file type: {}".format(args.hklin))
|
|
1340
|
+
# blur()
|
|
1341
|
+
|
|
1342
|
+
def mask_from_model(args):
|
|
1343
|
+
st = fileio.read_structure(args.model) # TODO option to (or not to) expand NCS
|
|
1344
|
+
if args.selection:
|
|
1345
|
+
gemmi.Selection(args.selection).remove_not_selected(st)
|
|
1346
|
+
gr, grid_start, _ = fileio.read_ccp4_map(args.map, header_only=True)
|
|
1347
|
+
mask = maps.mask_from_model(st, args.radius, soft_edge=args.soft_edge, grid=gr)
|
|
1348
|
+
maps.write_ccp4_map(args.output, mask, grid_start=grid_start)
|
|
1349
|
+
# mask_from_model()
|
|
1350
|
+
|
|
1351
|
+
def applymask(args):
|
|
1352
|
+
if args.output_prefix is None:
|
|
1353
|
+
args.output_prefix = fileio.splitext(os.path.basename(args.map))[0] + "_masked"
|
|
1354
|
+
|
|
1355
|
+
grid, grid_start, _ = fileio.read_ccp4_map(args.map)
|
|
1356
|
+
mask = fileio.read_ccp4_map(args.mask)[0]
|
|
1357
|
+
logger.writeln("Applying mask")
|
|
1358
|
+
logger.writeln(" mask min: {:.3f} max: {:.3f}".format(numpy.min(mask), numpy.max(mask)))
|
|
1359
|
+
grid.array[:] *= mask.array
|
|
1360
|
+
|
|
1361
|
+
if args.normalize:
|
|
1362
|
+
masked = grid.array[mask.array>args.mask_cutoff]
|
|
1363
|
+
masked_mean = numpy.average(masked)
|
|
1364
|
+
masked_std = numpy.std(masked)
|
|
1365
|
+
logger.writeln("Normalizing map values within mask")
|
|
1366
|
+
logger.writeln(" masked volume: {} mean: {:.3e} sd: {:.3e}".format(len(masked), masked_mean, masked_std))
|
|
1367
|
+
grid.array[:] = (grid.array - masked_mean) / masked_std
|
|
1368
|
+
|
|
1369
|
+
maps.write_ccp4_map(args.output_prefix+".mrc", grid,
|
|
1370
|
+
grid_start=grid_start,
|
|
1371
|
+
mask_for_extent=mask.array if args.trim else None,
|
|
1372
|
+
mask_threshold=args.mask_cutoff)
|
|
1373
|
+
# applymask()
|
|
1374
|
+
|
|
1375
|
+
def map2mtz(args):
|
|
1376
|
+
if args.output is None:
|
|
1377
|
+
args.output = fileio.splitext(os.path.basename(args.map))[0] + "_fft.mtz"
|
|
1378
|
+
grid, grid_start, grid_shape = fileio.read_ccp4_map(args.map)
|
|
1379
|
+
if args.resolution is None:
|
|
1380
|
+
args.resolution = maps.nyquist_resolution(grid)
|
|
1381
|
+
logger.writeln("WARNING: --resolution is not specified. Using Nyquist resolution: {:.2f}".format(args.resolution))
|
|
1382
|
+
|
|
1383
|
+
if grid_start != (0,0,0) or grid.shape != tuple(grid_shape):
|
|
1384
|
+
# If only subregion of whole grid in map, unit cell needs to be re-defined.
|
|
1385
|
+
if grid.shape != tuple(grid_shape):
|
|
1386
|
+
new_abc = [grid.unit_cell.parameters[i] * grid_shape[i] / grid.shape[i] for i in range(3)]
|
|
1387
|
+
cell = gemmi.UnitCell(*new_abc, *grid.unit_cell.parameters[3:])
|
|
1388
|
+
logger.writeln("Changing unit cell to {}".format(cell.parameters))
|
|
1389
|
+
else:
|
|
1390
|
+
cell = grid.unit_cell
|
|
1391
|
+
grid = gemmi.FloatGrid(grid.get_subarray(grid_start, grid_shape),
|
|
1392
|
+
cell, grid.spacegroup)
|
|
1393
|
+
|
|
1394
|
+
f_grid = gemmi.transform_map_to_f_phi(grid)
|
|
1395
|
+
asudata = f_grid.prepare_asu_data(dmin=args.resolution, with_000=True)
|
|
1396
|
+
hkldata = hkl.hkldata_from_asu_data(asudata, "F")
|
|
1397
|
+
if grid_start != (0,0,0):
|
|
1398
|
+
shifts = grid.get_position(*grid_start)
|
|
1399
|
+
hkldata.translate("F", shifts)
|
|
1400
|
+
logger.writeln("Applying phase shift with translation {}".format(shifts.tolist()))
|
|
1401
|
+
hkldata.write_mtz(args.output, ["F"])
|
|
1402
|
+
# map2mtz()
|
|
1403
|
+
|
|
1404
|
+
def sm2mm(args):
|
|
1405
|
+
if args.output_prefix is None:
|
|
1406
|
+
args.output_prefix = os.path.basename(fileio.splitext(args.files[0])[0])
|
|
1407
|
+
st, mtz = fileio.read_small_molecule_files(args.files)
|
|
1408
|
+
if st is not None:
|
|
1409
|
+
fileio.write_model(st, prefix=args.output_prefix, pdb=True, cif=True)
|
|
1410
|
+
if mtz is not None:
|
|
1411
|
+
mtz_out = args.output_prefix + ".mtz"
|
|
1412
|
+
logger.writeln("Writing MTZ file: {}".format(mtz_out))
|
|
1413
|
+
mtz.write_to_file(mtz_out)
|
|
1414
|
+
# sm2mm()
|
|
1415
|
+
|
|
1416
|
+
def mm2ins(args):
|
|
1417
|
+
if args.output is None:
|
|
1418
|
+
args.output = os.path.basename(fileio.splitext(args.model)[0]) + ".ins"
|
|
1419
|
+
st = fileio.read_structure(args.model)
|
|
1420
|
+
sg = st.find_spacegroup()
|
|
1421
|
+
elems = [cra.atom.element.name for cra in st[0].all()]
|
|
1422
|
+
counts = {x:elems.count(x) for x in set(elems)}
|
|
1423
|
+
elems = sorted(counts)
|
|
1424
|
+
cell = st.cell
|
|
1425
|
+
if args.hklin:
|
|
1426
|
+
mtz = fileio.read_mmhkl(args.hklin)
|
|
1427
|
+
cell = mtz.cell
|
|
1428
|
+
wavelength = next((x.wavelength for x in mtz.datasets if x==x and x.wavelength > 0), None)
|
|
1429
|
+
else:
|
|
1430
|
+
mtz = None
|
|
1431
|
+
wavelength = None
|
|
1432
|
+
|
|
1433
|
+
latt = dict(P=1, I=2, R=3, F=4, A=5, B=6, C=7).get(sg.centring_type())
|
|
1434
|
+
if not sg.is_centrosymmetric():
|
|
1435
|
+
latt *= -1
|
|
1436
|
+
|
|
1437
|
+
with open(args.output, "w") as ofs:
|
|
1438
|
+
ofs.write(f"TITL {os.path.basename(args.model)} in {sg.xhm()}\n")
|
|
1439
|
+
ofs.write(f"CELL {wavelength if wavelength else '????'} ")
|
|
1440
|
+
ofs.write(" ".join(str(x) for x in cell.parameters) + "\n")
|
|
1441
|
+
ofs.write("ZERR 1 0 0 0 0 0 0\n")
|
|
1442
|
+
ofs.write(f"LATT {latt}\n")
|
|
1443
|
+
for op in sg.operations().sym_ops[1:]: # the first is identity
|
|
1444
|
+
ofs.write(f"SYMM {op.triplet('X')}\n")
|
|
1445
|
+
ofs.write(f"SFAC {' '.join(elems)}\n")
|
|
1446
|
+
ofs.write(f"UNIT {' '.join(str(int(counts[x])) for x in elems)}\n\n")
|
|
1447
|
+
ofs.write("""\
|
|
1448
|
+
L.S. 10
|
|
1449
|
+
ACTA
|
|
1450
|
+
LIST 6
|
|
1451
|
+
MORE -1\n\n""")
|
|
1452
|
+
for cra in st[0].all():
|
|
1453
|
+
frac = st.cell.fractionalize(cra.atom.pos)
|
|
1454
|
+
u_iso = model.b_to_u * cra.atom.b_iso
|
|
1455
|
+
if cra.atom.is_hydrogen():
|
|
1456
|
+
u_iso = -1.2
|
|
1457
|
+
ofs.write(f"{cra.atom.name} {elems.index(cra.atom.element.name)+1} {frac.x:.6f} {frac.y:.6f} {frac.z:.6f} {10+cra.atom.occ} {u_iso:.5f}\n")
|
|
1458
|
+
logger.writeln(f"Written: {args.output}")
|
|
1459
|
+
# mm2ins()
|
|
1460
|
+
|
|
1461
|
+
def seq(args):
|
|
1462
|
+
wrap_width = 100
|
|
1463
|
+
seqs = []
|
|
1464
|
+
if args.seq:
|
|
1465
|
+
args.seq = sum(args.seq, [])
|
|
1466
|
+
for sf in args.seq:
|
|
1467
|
+
seqs.extend(fileio.read_sequence_file(sf))
|
|
1468
|
+
|
|
1469
|
+
sc = gemmi.AlignmentScoring()
|
|
1470
|
+
sc.match, sc.mismatch, sc.gapo, sc.gape, sc.good_gapo, sc.bad_gapo = args.scoring
|
|
1471
|
+
|
|
1472
|
+
st = fileio.read_structure(args.model) # TODO option to (or not to) expand NCS
|
|
1473
|
+
model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
|
|
1474
|
+
for chain in st[0]:
|
|
1475
|
+
p = chain.get_polymer()
|
|
1476
|
+
if not p: continue
|
|
1477
|
+
p_type = p.check_polymer_type()
|
|
1478
|
+
if p_type in (gemmi.PolymerType.SaccharideD, gemmi.PolymerType.SaccharideL): continue
|
|
1479
|
+
p_seq = gemmi.one_letter_code(p.extract_sequence())
|
|
1480
|
+
results = []
|
|
1481
|
+
for name, seq in seqs:
|
|
1482
|
+
# what if DnaRnaHybrid?
|
|
1483
|
+
kind = {gemmi.PolymerType.Dna: gemmi.ResidueKind.DNA,
|
|
1484
|
+
gemmi.PolymerType.Rna: gemmi.ResidueKind.RNA}.get(p_type, gemmi.ResidueKind.AA)
|
|
1485
|
+
s = [gemmi.expand_one_letter(x, kind) for x in seq]
|
|
1486
|
+
if None in s: continue
|
|
1487
|
+
#als = [gemmi.align_sequence_to_polymer(s, p, p_type, gemmi.AlignmentScoring(x)) for x in ("s", "p")]
|
|
1488
|
+
#results.append([name, max(als, key=lambda x: x.match_count), seq])
|
|
1489
|
+
results.append([name, gemmi.align_sequence_to_polymer(s, p, p_type, sc), seq])
|
|
1490
|
+
|
|
1491
|
+
if results:
|
|
1492
|
+
logger.writeln("Chain: {}".format(chain.name))
|
|
1493
|
+
logger.writeln(" polymer type: {}".format(str(p_type).replace("PolymerType.", "")))
|
|
1494
|
+
name, al, s1 = max(results, key=lambda x: (x[1].match_count, x[1].score))
|
|
1495
|
+
logger.writeln(" match: {}".format(name))
|
|
1496
|
+
logger.writeln(" aligned: {}".format(al.match_count))
|
|
1497
|
+
logger.writeln(" score: {}".format(al.score))
|
|
1498
|
+
p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
|
|
1499
|
+
unkseq = [x.start() for x in re.finditer(r"\-", p1)]
|
|
1500
|
+
mismatches = [x.start() for x in re.finditer(r"\.", al.match_string)]
|
|
1501
|
+
if mismatches or unkseq:
|
|
1502
|
+
idxes = {x.start(): i for i, x in enumerate(re.finditer("[^-]", p2))}
|
|
1503
|
+
seqnums = [str(x.seqid) for x in p]
|
|
1504
|
+
if mismatches:
|
|
1505
|
+
logger.write(" mismatches: ")
|
|
1506
|
+
logger.writeln(", ".join("{}({}>{})".format(seqnums[idxes[i]], p1[i], p2[i]) for i in mismatches))
|
|
1507
|
+
if unkseq:
|
|
1508
|
+
logger.write(" unknown sequence: ")
|
|
1509
|
+
logger.writeln(", ".join("{}({})".format(seqnums[idxes[i]], p2[i]) for i in unkseq))
|
|
1510
|
+
|
|
1511
|
+
logger.writeln("")
|
|
1512
|
+
for i in range(0, len(p1), wrap_width):
|
|
1513
|
+
logger.writeln(" seq. {}".format(p1[i:i+wrap_width]))
|
|
1514
|
+
logger.writeln(" {}".format(al.match_string[i:i+wrap_width]))
|
|
1515
|
+
logger.writeln(" model {}\n".format(p2[i:i+wrap_width]))
|
|
1516
|
+
else:
|
|
1517
|
+
logger.writeln("> Chain: {}".format(chain.name))
|
|
1518
|
+
logger.writeln(gemmi.one_letter_code(p.extract_sequence()))
|
|
1519
|
+
logger.writeln("")
|
|
1520
|
+
# seq()
|
|
1521
|
+
|
|
1522
|
+
def dnarna(args):
|
|
1523
|
+
import scipy.spatial.transform
|
|
1524
|
+
rna_res = {"A":"DA", "G":"DG", "C":"DC", "U":"DT"}
|
|
1525
|
+
dna_res = {"DA":"A", "DG":"G", "DC":"C", "DT":"U"}
|
|
1526
|
+
if args.chains: args.chains = sum(args.chains, [])
|
|
1527
|
+
model_format = fileio.check_model_format(args.model)
|
|
1528
|
+
if not args.output:
|
|
1529
|
+
args.output = fileio.splitext(os.path.basename(args.model))[0] + "_conv" + model_format
|
|
1530
|
+
st = fileio.read_structure(args.model)
|
|
1531
|
+
if st[0].has_hydrogen():
|
|
1532
|
+
logger.writeln("Hydrogen atoms are detected. I cannot take care of them, so I will remove them.")
|
|
1533
|
+
st.remove_hydrogens()
|
|
1534
|
+
for chain in st[0]:
|
|
1535
|
+
if args.chains and chain.name not in args.chains:
|
|
1536
|
+
continue
|
|
1537
|
+
for res in chain:
|
|
1538
|
+
alt = "*" # XXX
|
|
1539
|
+
if res.name in rna_res and args.to_dna:
|
|
1540
|
+
logger.writeln(f"Changing {chain.name}/{res.seqid} {res.name} to DNA")
|
|
1541
|
+
res.name = rna_res[res.name]
|
|
1542
|
+
res.remove_atom("O2'", alt)
|
|
1543
|
+
if res.name == "DT":
|
|
1544
|
+
C4 = res.find_atom("C4", alt)
|
|
1545
|
+
C5 = res.find_atom("C5", alt)
|
|
1546
|
+
C6 = res.find_atom("C6", alt)
|
|
1547
|
+
v1 = C5.pos - C4.pos
|
|
1548
|
+
v2 = C5.pos - C6.pos
|
|
1549
|
+
v = v1 + v2
|
|
1550
|
+
res.add_atom(C5)
|
|
1551
|
+
res[-1].name = "C7"
|
|
1552
|
+
res[-1].pos = C5.pos + v / v.length() * 1.5
|
|
1553
|
+
elif res.name in dna_res and args.to_rna:
|
|
1554
|
+
logger.writeln(f"Changing {chain.name}/{res.seqid} {res.name} to RNA")
|
|
1555
|
+
res.name = dna_res[res.name]
|
|
1556
|
+
C1p = numpy.array(res.find_atom("C1'", alt).pos.tolist())
|
|
1557
|
+
C2p = numpy.array(res.find_atom("C2'", alt).pos.tolist())
|
|
1558
|
+
C3p = numpy.array(res.find_atom("C3'", alt).pos.tolist())
|
|
1559
|
+
rotvec = C2p - C3p
|
|
1560
|
+
rotvec /= numpy.linalg.norm(rotvec)
|
|
1561
|
+
r = scipy.spatial.transform.Rotation.from_rotvec(-rotvec * 120,
|
|
1562
|
+
degrees=True)
|
|
1563
|
+
rotated = r.apply(C1p - C2p)
|
|
1564
|
+
rotated *= 1.411 / numpy.linalg.norm(rotated)
|
|
1565
|
+
res.add_atom(res.find_atom("O3'", alt))
|
|
1566
|
+
res[-1].name = "O2'"
|
|
1567
|
+
res[-1].pos.fromlist(C2p + rotated)
|
|
1568
|
+
if res.name == "U":
|
|
1569
|
+
res.remove_atom("C7", alt)
|
|
1570
|
+
fileio.write_model(st, file_name=args.output)
|
|
1571
|
+
# dnarna()
|
|
1572
|
+
|
|
1573
|
+
def show(args):
|
|
1574
|
+
for filename in args.files:
|
|
1575
|
+
ext = fileio.splitext(filename)[1]
|
|
1576
|
+
if ext in (".mrc", ".ccp4", ".map"):
|
|
1577
|
+
fileio.read_ccp4_map(filename)
|
|
1578
|
+
logger.writeln("\n")
|
|
1579
|
+
# show()
|
|
1580
|
+
|
|
1581
|
+
def json2csv(args):
|
|
1582
|
+
if not args.output_prefix:
|
|
1583
|
+
args.output_prefix = fileio.splitext(os.path.basename(args.json))[0]
|
|
1584
|
+
|
|
1585
|
+
df = pandas.read_json(args.json)
|
|
1586
|
+
df.to_csv(args.output_prefix+".csv", index=False)
|
|
1587
|
+
logger.writeln("Output: {}".format(args.output_prefix+".csv"))
|
|
1588
|
+
# json2csv()
|
|
1589
|
+
|
|
1590
|
+
def main(args):
|
|
1591
|
+
comms = dict(show=show,
|
|
1592
|
+
json2csv=json2csv,
|
|
1593
|
+
symmodel=symmodel,
|
|
1594
|
+
helical_biomt=helical_biomt,
|
|
1595
|
+
expand=symexpand,
|
|
1596
|
+
h_add=h_add,
|
|
1597
|
+
add_op3=add_op3,
|
|
1598
|
+
map_peaks=map_peaks,
|
|
1599
|
+
h_density=h_density_analysis,
|
|
1600
|
+
fix_link=fix_link,
|
|
1601
|
+
merge_models=merge_models,
|
|
1602
|
+
merge_dicts=merge_dicts,
|
|
1603
|
+
geom=geometry,
|
|
1604
|
+
conf=compare_conf,
|
|
1605
|
+
adp=adp_stats,
|
|
1606
|
+
power=show_power,
|
|
1607
|
+
fcalc=fcalc,
|
|
1608
|
+
nemap=nemap,
|
|
1609
|
+
blur=blur,
|
|
1610
|
+
mask_from_model=mask_from_model,
|
|
1611
|
+
applymask=applymask,
|
|
1612
|
+
map2mtz=map2mtz,
|
|
1613
|
+
sm2mm=sm2mm,
|
|
1614
|
+
mm2ins=mm2ins,
|
|
1615
|
+
seq=seq,
|
|
1616
|
+
dnarna=dnarna)
|
|
1617
|
+
|
|
1618
|
+
com = args.subcommand
|
|
1619
|
+
f = comms.get(com)
|
|
1620
|
+
if f:
|
|
1621
|
+
return f(args)
|
|
1622
|
+
else:
|
|
1623
|
+
raise SystemExit("Unknown subcommand: {}".format(com))
|
|
1624
|
+
# main()
|
|
1625
|
+
|
|
1626
|
+
if __name__ == "__main__":
|
|
1627
|
+
import sys
|
|
1628
|
+
args = parse_args(sys.argv[1:])
|
|
1629
|
+
main(args)
|