emdbva 0.0.1.dev136__tar.gz → 0.0.1.dev139__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {emdbva-0.0.1.dev136/emdbva.egg-info → emdbva-0.0.1.dev139}/PKG-INFO +1 -1
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139/emdbva.egg-info}/PKG-INFO +1 -1
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/emdbva.egg-info/SOURCES.txt +1 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/mainva.py +1 -0
- emdbva-0.0.1.dev139/va/metrics/map_data_validation.py +664 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/validationanalysis.py +48 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/version.py +1 -1
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/LICENSE +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/MANIFEST.in +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/README.rst +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/emdbva.egg-info/dependency_links.txt +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/emdbva.egg-info/entry_points.txt +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/emdbva.egg-info/requires.txt +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/emdbva.egg-info/top_level.txt +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/setup.cfg +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/setup.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/__init__.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/__init__.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/bars.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/connected_percentage.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/contour_level_predicator.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/emda_mmcc.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/emringer.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/inclusion.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/overlap_percentage.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/phaserandomization.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/phenix_cc.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/phenix_mm.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/projections.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/qscore.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/residue_locres.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/resmap.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/smoc.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/strudel.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/surfaces.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/threedfsc.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/preparation.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/qscores.csv +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/Checker.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/ChimeraxViews.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/MapProcessor.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/Model.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/__init__.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/cl_weights.pth +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/log_utils.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/misc.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/rescolor.py +0 -0
- {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/stars.py +0 -0
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
import warnings
|
|
5
|
+
from collections import OrderedDict
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import mrcfile
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _normalise_map_input(map_input):
|
|
12
|
+
if hasattr(map_input, "fullname"):
|
|
13
|
+
return map_input.fullname
|
|
14
|
+
if hasattr(map_input, "filename"):
|
|
15
|
+
return map_input.filename
|
|
16
|
+
return str(map_input)
|
|
17
|
+
|
|
18
|
+
def _make_issue(code, category, severity, message, details=None, source="mrcfile.validate"):
|
|
19
|
+
"""
|
|
20
|
+
Create one structured validation issue.
|
|
21
|
+
|
|
22
|
+
code:
|
|
23
|
+
Stable machine-readable problem name.
|
|
24
|
+
|
|
25
|
+
category:
|
|
26
|
+
Header/data/file area affected.
|
|
27
|
+
|
|
28
|
+
severity:
|
|
29
|
+
Usually "error" for validation failure, "warning" for weaker/unknown cases.
|
|
30
|
+
|
|
31
|
+
message:
|
|
32
|
+
Original mrcfile message, preserved for debugging.
|
|
33
|
+
|
|
34
|
+
details:
|
|
35
|
+
Parsed values extracted from the message.
|
|
36
|
+
"""
|
|
37
|
+
return OrderedDict([
|
|
38
|
+
("code", code),
|
|
39
|
+
("category", category),
|
|
40
|
+
("severity", severity),
|
|
41
|
+
("source", source),
|
|
42
|
+
("message", message),
|
|
43
|
+
("details", details or OrderedDict()),
|
|
44
|
+
])
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _to_int(value):
|
|
48
|
+
try:
|
|
49
|
+
return int(value)
|
|
50
|
+
except Exception:
|
|
51
|
+
return value
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _to_float(value):
|
|
55
|
+
try:
|
|
56
|
+
return float(value)
|
|
57
|
+
except Exception:
|
|
58
|
+
return value
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _parse_validation_line(line, source="mrcfile.validate"):
|
|
62
|
+
"""
|
|
63
|
+
Parse one line from mrcfile.validate() or one RuntimeWarning.
|
|
64
|
+
|
|
65
|
+
Any message not recognised by the parser is still kept in the JSON as
|
|
66
|
+
"unclassified_mrcfile_validation_message" so future mrcfile versions do
|
|
67
|
+
not silently lose information.
|
|
68
|
+
"""
|
|
69
|
+
line = line.strip()
|
|
70
|
+
|
|
71
|
+
# 1. MRC map ID/header map field
|
|
72
|
+
match = re.match(
|
|
73
|
+
r"^Map ID string is incorrect: found (?P<found>.*), should be (?P<expected>.*)$",
|
|
74
|
+
line,
|
|
75
|
+
)
|
|
76
|
+
if match:
|
|
77
|
+
return _make_issue(
|
|
78
|
+
"map_id_incorrect",
|
|
79
|
+
"header.map",
|
|
80
|
+
"warning",
|
|
81
|
+
line,
|
|
82
|
+
OrderedDict([
|
|
83
|
+
("found", match.group("found")),
|
|
84
|
+
("expected", match.group("expected")),
|
|
85
|
+
]),
|
|
86
|
+
source,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if line == "Map ID string not found - not an MRC file, or file is corrupt":
|
|
90
|
+
return _make_issue(
|
|
91
|
+
"map_id_missing_or_corrupt",
|
|
92
|
+
"header.map",
|
|
93
|
+
"warning",
|
|
94
|
+
line,
|
|
95
|
+
source=source,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# 2. Machine stamp
|
|
99
|
+
match = re.match(r"^Invalid machine stamp: (?P<machine_stamp>.*)$", line)
|
|
100
|
+
if match:
|
|
101
|
+
return _make_issue(
|
|
102
|
+
"machine_stamp_invalid",
|
|
103
|
+
"header.machst",
|
|
104
|
+
"error",
|
|
105
|
+
line,
|
|
106
|
+
OrderedDict([
|
|
107
|
+
("machine_stamp", match.group("machine_stamp")),
|
|
108
|
+
]),
|
|
109
|
+
source,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
match = re.match(
|
|
113
|
+
r"^Machine stamp '(?P<machine_stamp>.*)' does not match the apparent byte order '(?P<byte_order>.*)'$",
|
|
114
|
+
line,
|
|
115
|
+
)
|
|
116
|
+
if match:
|
|
117
|
+
return _make_issue(
|
|
118
|
+
"machine_stamp_byte_order_mismatch",
|
|
119
|
+
"header.machst",
|
|
120
|
+
"warning",
|
|
121
|
+
line,
|
|
122
|
+
OrderedDict([
|
|
123
|
+
("machine_stamp", match.group("machine_stamp")),
|
|
124
|
+
("byte_order", match.group("byte_order")),
|
|
125
|
+
]),
|
|
126
|
+
source,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# 3. Mode
|
|
130
|
+
match = re.match(r"^Invalid mode: (?P<mode>[-+]?\d+)$", line)
|
|
131
|
+
if match:
|
|
132
|
+
return _make_issue(
|
|
133
|
+
"mode_invalid",
|
|
134
|
+
"header.mode",
|
|
135
|
+
"error",
|
|
136
|
+
line,
|
|
137
|
+
OrderedDict([
|
|
138
|
+
("mode", _to_int(match.group("mode"))),
|
|
139
|
+
]),
|
|
140
|
+
source,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
match = re.match(r"^Unrecognised mode '(?P<mode>.*)' - data block cannot be read$", line)
|
|
144
|
+
if match:
|
|
145
|
+
return _make_issue(
|
|
146
|
+
"mode_unrecognised_data_unreadable",
|
|
147
|
+
"header.mode",
|
|
148
|
+
"error",
|
|
149
|
+
line,
|
|
150
|
+
OrderedDict([
|
|
151
|
+
("mode", match.group("mode")),
|
|
152
|
+
]),
|
|
153
|
+
source,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# 4. Negative header fields
|
|
157
|
+
match = re.match(
|
|
158
|
+
r"^Header field '(?P<field>nx|ny|nz|mx|my|mz|ispg|nlabl)' is negative$",
|
|
159
|
+
line,
|
|
160
|
+
)
|
|
161
|
+
if match:
|
|
162
|
+
return _make_issue(
|
|
163
|
+
"header_field_negative",
|
|
164
|
+
"header.dimension_or_count",
|
|
165
|
+
"error",
|
|
166
|
+
line,
|
|
167
|
+
OrderedDict([
|
|
168
|
+
("field", match.group("field")),
|
|
169
|
+
]),
|
|
170
|
+
source,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
match = re.match(r"^Cell dimension '(?P<axis>x|y|z)' is negative$", line)
|
|
174
|
+
if match:
|
|
175
|
+
return _make_issue(
|
|
176
|
+
"cell_dimension_negative",
|
|
177
|
+
"header.cella",
|
|
178
|
+
"error",
|
|
179
|
+
line,
|
|
180
|
+
OrderedDict([
|
|
181
|
+
("axis", match.group("axis")),
|
|
182
|
+
]),
|
|
183
|
+
source,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# 5. Axis mapping
|
|
187
|
+
match = re.match(
|
|
188
|
+
r"^Invalid axis mapping: found (?P<found>.*), should be \[1, 2, 3\]$",
|
|
189
|
+
line,
|
|
190
|
+
)
|
|
191
|
+
if match:
|
|
192
|
+
return _make_issue(
|
|
193
|
+
"axis_mapping_invalid",
|
|
194
|
+
"header.mapc_mapr_maps",
|
|
195
|
+
"error",
|
|
196
|
+
line,
|
|
197
|
+
OrderedDict([
|
|
198
|
+
("found", match.group("found")),
|
|
199
|
+
("expected", "[1, 2, 3]"),
|
|
200
|
+
]),
|
|
201
|
+
source,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# 6. Volume stack
|
|
205
|
+
match = re.match(
|
|
206
|
+
r"^Error in dimensions for volume stack: nz should be divisible by mz\. "
|
|
207
|
+
r"Found nz = (?P<nz>[-+]?\d+), mz = (?P<mz>[-+]?\d+)\)?$",
|
|
208
|
+
line,
|
|
209
|
+
)
|
|
210
|
+
if match:
|
|
211
|
+
return _make_issue(
|
|
212
|
+
"volume_stack_dimensions_invalid",
|
|
213
|
+
"header.nz_mz_ispg",
|
|
214
|
+
"error",
|
|
215
|
+
line,
|
|
216
|
+
OrderedDict([
|
|
217
|
+
("nz", _to_int(match.group("nz"))),
|
|
218
|
+
("mz", _to_int(match.group("mz"))),
|
|
219
|
+
]),
|
|
220
|
+
source,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# 7. Header labels
|
|
224
|
+
if line == "Error in header labels: empty labels appear between text-containing labels":
|
|
225
|
+
return _make_issue(
|
|
226
|
+
"header_labels_empty_between_text",
|
|
227
|
+
"header.label",
|
|
228
|
+
"error",
|
|
229
|
+
line,
|
|
230
|
+
source=source,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
match = re.match(
|
|
234
|
+
r"^Error in header labels: nlabl is (?P<nlabl>[-+]?\d+) "
|
|
235
|
+
r"but (?P<label_count>[-+]?\d+) labels contain text$",
|
|
236
|
+
line,
|
|
237
|
+
)
|
|
238
|
+
if match:
|
|
239
|
+
return _make_issue(
|
|
240
|
+
"header_labels_nlabl_mismatch",
|
|
241
|
+
"header.nlabl",
|
|
242
|
+
"error",
|
|
243
|
+
line,
|
|
244
|
+
OrderedDict([
|
|
245
|
+
("nlabl", _to_int(match.group("nlabl"))),
|
|
246
|
+
("actual_label_count", _to_int(match.group("label_count"))),
|
|
247
|
+
]),
|
|
248
|
+
source,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# 8. MRC format version
|
|
252
|
+
match = re.match(
|
|
253
|
+
r"^File does not declare MRC format version 20140 or 20141: nversion = (?P<nversion>[-+]?\d+)$",
|
|
254
|
+
line,
|
|
255
|
+
)
|
|
256
|
+
if match:
|
|
257
|
+
return _make_issue(
|
|
258
|
+
"mrc_format_version_invalid",
|
|
259
|
+
"header.nversion",
|
|
260
|
+
"warning",
|
|
261
|
+
line,
|
|
262
|
+
OrderedDict([
|
|
263
|
+
("nversion", _to_int(match.group("nversion"))),
|
|
264
|
+
("expected", [20140, 20141]),
|
|
265
|
+
]),
|
|
266
|
+
source,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# 9. Extended header type
|
|
270
|
+
match = re.match(
|
|
271
|
+
r"^Extended header type is undefined or unrecognised: exttyp = '(?P<exttyp>.*)'$",
|
|
272
|
+
line,
|
|
273
|
+
)
|
|
274
|
+
if match:
|
|
275
|
+
return _make_issue(
|
|
276
|
+
"extended_header_type_invalid",
|
|
277
|
+
"header.exttyp",
|
|
278
|
+
"error",
|
|
279
|
+
line,
|
|
280
|
+
OrderedDict([
|
|
281
|
+
("exttyp", match.group("exttyp")),
|
|
282
|
+
("expected_known_types", ["CCP4", "MRCO", "SERI", "AGAR", "FEI1", "FEI2", "HDF5"]),
|
|
283
|
+
]),
|
|
284
|
+
source,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# 10. Data statistics
|
|
288
|
+
match = re.match(
|
|
289
|
+
r"^Data statistics appear to be inaccurate: RMS deviation is "
|
|
290
|
+
r"(?P<actual>[-+0-9.eE]+) but the value in the header is (?P<header>[-+0-9.eE]+)$",
|
|
291
|
+
line,
|
|
292
|
+
)
|
|
293
|
+
if match:
|
|
294
|
+
return _make_issue(
|
|
295
|
+
"data_statistics_rms_mismatch",
|
|
296
|
+
"header.rms",
|
|
297
|
+
"error",
|
|
298
|
+
line,
|
|
299
|
+
OrderedDict([
|
|
300
|
+
("actual", _to_float(match.group("actual"))),
|
|
301
|
+
("header", _to_float(match.group("header"))),
|
|
302
|
+
]),
|
|
303
|
+
source,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
match = re.match(
|
|
307
|
+
r"^Data statistics appear to be inaccurate: minimum is "
|
|
308
|
+
r"(?P<actual>[-+0-9.eE]+) but the value in the header is (?P<header>[-+0-9.eE]+)$",
|
|
309
|
+
line,
|
|
310
|
+
)
|
|
311
|
+
if match:
|
|
312
|
+
return _make_issue(
|
|
313
|
+
"data_statistics_minimum_mismatch",
|
|
314
|
+
"header.dmin",
|
|
315
|
+
"error",
|
|
316
|
+
line,
|
|
317
|
+
OrderedDict([
|
|
318
|
+
("actual", _to_float(match.group("actual"))),
|
|
319
|
+
("header", _to_float(match.group("header"))),
|
|
320
|
+
]),
|
|
321
|
+
source,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
match = re.match(
|
|
325
|
+
r"^Data statistics appear to be inaccurate: maximum is "
|
|
326
|
+
r"(?P<actual>[-+0-9.eE]+) but the value in the header is (?P<header>[-+0-9.eE]+)$",
|
|
327
|
+
line,
|
|
328
|
+
)
|
|
329
|
+
if match:
|
|
330
|
+
return _make_issue(
|
|
331
|
+
"data_statistics_maximum_mismatch",
|
|
332
|
+
"header.dmax",
|
|
333
|
+
"error",
|
|
334
|
+
line,
|
|
335
|
+
OrderedDict([
|
|
336
|
+
("actual", _to_float(match.group("actual"))),
|
|
337
|
+
("header", _to_float(match.group("header"))),
|
|
338
|
+
]),
|
|
339
|
+
source,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
match = re.match(
|
|
343
|
+
r"^Data statistics appear to be inaccurate: mean is "
|
|
344
|
+
r"(?P<actual>[-+0-9.eE]+) but the value in the header is (?P<header>[-+0-9.eE]+)$",
|
|
345
|
+
line,
|
|
346
|
+
)
|
|
347
|
+
if match:
|
|
348
|
+
return _make_issue(
|
|
349
|
+
"data_statistics_mean_mismatch",
|
|
350
|
+
"header.dmean",
|
|
351
|
+
"error",
|
|
352
|
+
line,
|
|
353
|
+
OrderedDict([
|
|
354
|
+
("actual", _to_float(match.group("actual"))),
|
|
355
|
+
("header", _to_float(match.group("header"))),
|
|
356
|
+
]),
|
|
357
|
+
source,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# 11. File size
|
|
361
|
+
match = re.match(
|
|
362
|
+
r"^File is larger than expected\. Actual size: (?P<actual_bytes>\d+) bytes; "
|
|
363
|
+
r"expected size: (?P<expected_bytes>\d+) bytes \(calculated from header\)$",
|
|
364
|
+
line,
|
|
365
|
+
)
|
|
366
|
+
if match:
|
|
367
|
+
return _make_issue(
|
|
368
|
+
"file_size_larger_than_expected",
|
|
369
|
+
"file.size",
|
|
370
|
+
"error",
|
|
371
|
+
line,
|
|
372
|
+
OrderedDict([
|
|
373
|
+
("actual_bytes", _to_int(match.group("actual_bytes"))),
|
|
374
|
+
("expected_bytes", _to_int(match.group("expected_bytes"))),
|
|
375
|
+
]),
|
|
376
|
+
source,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
if line == "Data block could not be read - file size not checked":
|
|
380
|
+
return _make_issue(
|
|
381
|
+
"data_block_unreadable_file_size_not_checked",
|
|
382
|
+
"data",
|
|
383
|
+
"error",
|
|
384
|
+
line,
|
|
385
|
+
source=source,
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
# 12. Extended header/data read warnings
|
|
389
|
+
match = re.match(
|
|
390
|
+
r"^Expected (?P<expected_bytes>\d+) bytes in extended header but could only read (?P<actual_bytes>\d+)$",
|
|
391
|
+
line,
|
|
392
|
+
)
|
|
393
|
+
if match:
|
|
394
|
+
return _make_issue(
|
|
395
|
+
"extended_header_too_small",
|
|
396
|
+
"extended_header.size",
|
|
397
|
+
"error",
|
|
398
|
+
line,
|
|
399
|
+
OrderedDict([
|
|
400
|
+
("expected_bytes", _to_int(match.group("expected_bytes"))),
|
|
401
|
+
("actual_bytes", _to_int(match.group("actual_bytes"))),
|
|
402
|
+
]),
|
|
403
|
+
source,
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
match = re.match(
|
|
407
|
+
r"^Expected (?P<expected_bytes>\d+) bytes in data block but could only read (?P<actual_bytes>\d+)$",
|
|
408
|
+
line,
|
|
409
|
+
)
|
|
410
|
+
if match:
|
|
411
|
+
return _make_issue(
|
|
412
|
+
"data_block_too_small",
|
|
413
|
+
"data.size",
|
|
414
|
+
"error",
|
|
415
|
+
line,
|
|
416
|
+
OrderedDict([
|
|
417
|
+
("expected_bytes", _to_int(match.group("expected_bytes"))),
|
|
418
|
+
("actual_bytes", _to_int(match.group("actual_bytes"))),
|
|
419
|
+
]),
|
|
420
|
+
source,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
match = re.match(
|
|
424
|
+
r"^Expected (?P<expected_bytes>\d+) bytes in data block but limit is (?P<limit_bytes>\d+)$",
|
|
425
|
+
line,
|
|
426
|
+
)
|
|
427
|
+
if match:
|
|
428
|
+
return _make_issue(
|
|
429
|
+
"data_block_exceeds_read_limit",
|
|
430
|
+
"data.size",
|
|
431
|
+
"error",
|
|
432
|
+
line,
|
|
433
|
+
OrderedDict([
|
|
434
|
+
("expected_bytes", _to_int(match.group("expected_bytes"))),
|
|
435
|
+
("limit_bytes", _to_int(match.group("limit_bytes"))),
|
|
436
|
+
]),
|
|
437
|
+
source,
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
if line == "Couldn't read enough bytes for MRC header":
|
|
441
|
+
return _make_issue(
|
|
442
|
+
"mrc_header_too_small",
|
|
443
|
+
"header.size",
|
|
444
|
+
"error",
|
|
445
|
+
line,
|
|
446
|
+
source=source,
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
# 13. Exception-like final traceback lines
|
|
450
|
+
match = re.match(r"^(?P<exception_type>[A-Za-z_][A-Za-z0-9_]*Error): (?P<exception_message>.*)$", line)
|
|
451
|
+
if match:
|
|
452
|
+
return _make_issue(
|
|
453
|
+
"exception_during_validation",
|
|
454
|
+
"exception",
|
|
455
|
+
"error",
|
|
456
|
+
line,
|
|
457
|
+
OrderedDict([
|
|
458
|
+
("exception_type", match.group("exception_type")),
|
|
459
|
+
("exception_message", match.group("exception_message")),
|
|
460
|
+
]),
|
|
461
|
+
source,
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
# Fallback. Keep unknown messages instead of losing them.
|
|
465
|
+
return _make_issue(
|
|
466
|
+
"unclassified_mrcfile_validation_message",
|
|
467
|
+
"unclassified",
|
|
468
|
+
"warning",
|
|
469
|
+
line,
|
|
470
|
+
source=source,
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def _split_mrcfile_output(output):
|
|
475
|
+
"""
|
|
476
|
+
Convert mrcfile.validate() text output into individual validation messages.
|
|
477
|
+
|
|
478
|
+
We remove non-problem lines like:
|
|
479
|
+
Checking if ... is a valid MRC2014 file...
|
|
480
|
+
File appears to be valid.
|
|
481
|
+
"""
|
|
482
|
+
lines = []
|
|
483
|
+
|
|
484
|
+
for raw_line in output.splitlines():
|
|
485
|
+
line = raw_line.strip()
|
|
486
|
+
|
|
487
|
+
if not line:
|
|
488
|
+
continue
|
|
489
|
+
|
|
490
|
+
if line.startswith("Checking if ") and line.endswith(" is a valid MRC2014 file..."):
|
|
491
|
+
continue
|
|
492
|
+
|
|
493
|
+
if line == "File appears to be valid.":
|
|
494
|
+
continue
|
|
495
|
+
|
|
496
|
+
# If traceback is printed, keep only meaningful final error line.
|
|
497
|
+
if line == "Traceback (most recent call last):":
|
|
498
|
+
continue
|
|
499
|
+
|
|
500
|
+
if line.startswith("File ") and ", line " in line:
|
|
501
|
+
continue
|
|
502
|
+
|
|
503
|
+
if line.startswith("^"):
|
|
504
|
+
continue
|
|
505
|
+
|
|
506
|
+
lines.append(line)
|
|
507
|
+
|
|
508
|
+
return lines
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def _deduplicate_issues(issues):
|
|
512
|
+
"""
|
|
513
|
+
Remove duplicates.
|
|
514
|
+
|
|
515
|
+
The same problem can sometimes appear both in mrcfile.validate() text output
|
|
516
|
+
and in captured RuntimeWarning messages.
|
|
517
|
+
"""
|
|
518
|
+
unique_issues = []
|
|
519
|
+
seen = set()
|
|
520
|
+
|
|
521
|
+
for issue in issues:
|
|
522
|
+
key = (
|
|
523
|
+
issue.get("code"),
|
|
524
|
+
issue.get("category"),
|
|
525
|
+
issue.get("message"),
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
if key in seen:
|
|
529
|
+
continue
|
|
530
|
+
|
|
531
|
+
seen.add(key)
|
|
532
|
+
unique_issues.append(issue)
|
|
533
|
+
|
|
534
|
+
return unique_issues
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def _parse_messages_and_warnings(messages_text, warning_messages):
|
|
538
|
+
issues = []
|
|
539
|
+
|
|
540
|
+
for line in _split_mrcfile_output(messages_text):
|
|
541
|
+
issues.append(_parse_validation_line(line, source="mrcfile.validate"))
|
|
542
|
+
|
|
543
|
+
for warning_message in warning_messages:
|
|
544
|
+
warning_message = warning_message.strip()
|
|
545
|
+
if not warning_message:
|
|
546
|
+
continue
|
|
547
|
+
|
|
548
|
+
issues.append(_parse_validation_line(warning_message, source="RuntimeWarning"))
|
|
549
|
+
|
|
550
|
+
return _deduplicate_issues(issues)
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def validate_single_map(map_input):
|
|
555
|
+
map_path = Path(_normalise_map_input(map_input))
|
|
556
|
+
messages = io.StringIO()
|
|
557
|
+
warning_messages = []
|
|
558
|
+
|
|
559
|
+
result = OrderedDict([
|
|
560
|
+
("file", str(map_path)),
|
|
561
|
+
("exists", map_path.exists()),
|
|
562
|
+
("valid", False),
|
|
563
|
+
|
|
564
|
+
# New structured fields
|
|
565
|
+
("issue_count", 0),
|
|
566
|
+
("issues", []),
|
|
567
|
+
|
|
568
|
+
# Keep old raw fields for backward compatibility/debugging
|
|
569
|
+
("messages", ""),
|
|
570
|
+
("warnings", []),
|
|
571
|
+
("error", None),
|
|
572
|
+
])
|
|
573
|
+
|
|
574
|
+
if not map_path.exists():
|
|
575
|
+
result["error"] = "File does not exist"
|
|
576
|
+
result["issues"] = [
|
|
577
|
+
_make_issue(
|
|
578
|
+
"file_not_found",
|
|
579
|
+
"file.path",
|
|
580
|
+
"error",
|
|
581
|
+
"File does not exist",
|
|
582
|
+
OrderedDict([
|
|
583
|
+
("path", str(map_path)),
|
|
584
|
+
]),
|
|
585
|
+
source="validationanalysis",
|
|
586
|
+
)
|
|
587
|
+
]
|
|
588
|
+
result["issue_count"] = len(result["issues"])
|
|
589
|
+
return result
|
|
590
|
+
|
|
591
|
+
try:
|
|
592
|
+
with warnings.catch_warnings(record=True) as caught_warnings:
|
|
593
|
+
warnings.simplefilter("always", RuntimeWarning)
|
|
594
|
+
|
|
595
|
+
mrcfile_valid = mrcfile.validate(str(map_path), print_file=messages)
|
|
596
|
+
|
|
597
|
+
warning_messages = [
|
|
598
|
+
str(warning.message)
|
|
599
|
+
for warning in caught_warnings
|
|
600
|
+
if str(warning.message).strip()
|
|
601
|
+
]
|
|
602
|
+
|
|
603
|
+
messages_text = messages.getvalue().strip()
|
|
604
|
+
issues = _parse_messages_and_warnings(messages_text, warning_messages)
|
|
605
|
+
has_error = any(issue.get("severity") == "error" for issue in issues)
|
|
606
|
+
|
|
607
|
+
result["mrcfile_valid"] = mrcfile_valid
|
|
608
|
+
result["no_known_critical_error"] = not has_error
|
|
609
|
+
# result["messages"] = messages_text
|
|
610
|
+
# result["warnings"] = warning_messages
|
|
611
|
+
result["issues"] = issues
|
|
612
|
+
result["issue_count"] = len(issues)
|
|
613
|
+
|
|
614
|
+
except Exception as exc:
|
|
615
|
+
messages_text = messages.getvalue().strip()
|
|
616
|
+
exception_message = str(exc)
|
|
617
|
+
|
|
618
|
+
issues = _parse_messages_and_warnings(messages_text, warning_messages)
|
|
619
|
+
issues.append(
|
|
620
|
+
_make_issue(
|
|
621
|
+
"mrcfile_validate_exception",
|
|
622
|
+
"exception",
|
|
623
|
+
"error",
|
|
624
|
+
exception_message,
|
|
625
|
+
OrderedDict([
|
|
626
|
+
("exception_type", exc.__class__.__name__),
|
|
627
|
+
]),
|
|
628
|
+
source="validationanalysis",
|
|
629
|
+
)
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
result["error"] = exception_message
|
|
633
|
+
result["messages"] = messages_text
|
|
634
|
+
result["warnings"] = warning_messages
|
|
635
|
+
result["issues"] = _deduplicate_issues(issues)
|
|
636
|
+
result["issue_count"] = len(result["issues"])
|
|
637
|
+
|
|
638
|
+
return result
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def run_map_data_validation(map_inputs):
|
|
642
|
+
map_results = OrderedDict()
|
|
643
|
+
|
|
644
|
+
for map_input in map_inputs:
|
|
645
|
+
if map_input is None:
|
|
646
|
+
continue
|
|
647
|
+
|
|
648
|
+
map_path = _normalise_map_input(map_input)
|
|
649
|
+
map_name = os.path.basename(map_path)
|
|
650
|
+
|
|
651
|
+
# Avoid overwriting when two inputs have the same basename.
|
|
652
|
+
# For example:
|
|
653
|
+
# /path1/map.mrc
|
|
654
|
+
# /path2/map.mrc
|
|
655
|
+
if map_name in map_results:
|
|
656
|
+
map_name = map_path
|
|
657
|
+
|
|
658
|
+
map_results[map_name] = validate_single_map(map_input)
|
|
659
|
+
|
|
660
|
+
return OrderedDict([
|
|
661
|
+
("data_integrity", OrderedDict([
|
|
662
|
+
("map_data_integrity", map_results)
|
|
663
|
+
]))
|
|
664
|
+
])
|
|
@@ -77,6 +77,7 @@ from va.metrics.qscore import *
|
|
|
77
77
|
from va.metrics.inclusion import *
|
|
78
78
|
from va.metrics.connected_percentage import *
|
|
79
79
|
from va.metrics.overlap_percentage import *
|
|
80
|
+
from metrics.map_data_validation import run_map_data_validation
|
|
80
81
|
import va
|
|
81
82
|
|
|
82
83
|
try:
|
|
@@ -1158,6 +1159,53 @@ class ValidationAnalysis:
|
|
|
1158
1159
|
viewer.new_surface_view_chimerax(primary_input_map, primary_input_contour, 'mask', '',
|
|
1159
1160
|
mask_name, mask_contour)
|
|
1160
1161
|
|
|
1162
|
+
@profile_peak_memory()
|
|
1163
|
+
def map_data_validation(self):
|
|
1164
|
+
"""
|
|
1165
|
+
Validate map/header data using mrcfile.validate().
|
|
1166
|
+
|
|
1167
|
+
Output JSON structure:
|
|
1168
|
+
data_validation -> map_data_validation -> mapname -> validation results
|
|
1169
|
+
"""
|
|
1170
|
+
|
|
1171
|
+
start = timeit.default_timer()
|
|
1172
|
+
map_inputs = []
|
|
1173
|
+
|
|
1174
|
+
for attr in ("map", "rawmap", "hmodd", "hmeven"):
|
|
1175
|
+
try:
|
|
1176
|
+
value = getattr(self, attr, None)
|
|
1177
|
+
if value is not None:
|
|
1178
|
+
map_inputs.append(value)
|
|
1179
|
+
except AttributeError:
|
|
1180
|
+
pass
|
|
1181
|
+
|
|
1182
|
+
try:
|
|
1183
|
+
result_dict = run_map_data_validation(map_inputs)
|
|
1184
|
+
|
|
1185
|
+
out_json = os.path.join(
|
|
1186
|
+
self.workdir,
|
|
1187
|
+
f"{self.mapname}_map_data_validation.json"
|
|
1188
|
+
)
|
|
1189
|
+
|
|
1190
|
+
with codecs.open(out_json, "w", encoding="utf-8") as f:
|
|
1191
|
+
json.dump(
|
|
1192
|
+
result_dict,
|
|
1193
|
+
f,
|
|
1194
|
+
ensure_ascii=False,
|
|
1195
|
+
indent=2
|
|
1196
|
+
)
|
|
1197
|
+
|
|
1198
|
+
print("Map data validation results were collected.")
|
|
1199
|
+
|
|
1200
|
+
except Exception:
|
|
1201
|
+
err = "Map data validation error: {}.".format(sys.exc_info()[1])
|
|
1202
|
+
sys.stderr.write(err + "\n")
|
|
1203
|
+
|
|
1204
|
+
end = timeit.default_timer()
|
|
1205
|
+
print("Map data validation time: %s" % (end - start))
|
|
1206
|
+
print("------------------------------------")
|
|
1207
|
+
|
|
1208
|
+
return None
|
|
1161
1209
|
|
|
1162
1210
|
# Surface Chimera way
|
|
1163
1211
|
# def new_surface_view_chimerax(self, input_map, input_contour, type='surface', raw='', mask_map=None,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|