emdbva 0.0.1.dev136__tar.gz → 0.0.1.dev139__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {emdbva-0.0.1.dev136/emdbva.egg-info → emdbva-0.0.1.dev139}/PKG-INFO +1 -1
  2. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139/emdbva.egg-info}/PKG-INFO +1 -1
  3. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/emdbva.egg-info/SOURCES.txt +1 -0
  4. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/mainva.py +1 -0
  5. emdbva-0.0.1.dev139/va/metrics/map_data_validation.py +664 -0
  6. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/validationanalysis.py +48 -0
  7. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/version.py +1 -1
  8. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/LICENSE +0 -0
  9. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/MANIFEST.in +0 -0
  10. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/README.rst +0 -0
  11. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/emdbva.egg-info/dependency_links.txt +0 -0
  12. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/emdbva.egg-info/entry_points.txt +0 -0
  13. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/emdbva.egg-info/requires.txt +0 -0
  14. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/emdbva.egg-info/top_level.txt +0 -0
  15. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/setup.cfg +0 -0
  16. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/setup.py +0 -0
  17. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/__init__.py +0 -0
  18. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/__init__.py +0 -0
  19. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/bars.py +0 -0
  20. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/connected_percentage.py +0 -0
  21. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/contour_level_predicator.py +0 -0
  22. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/emda_mmcc.py +0 -0
  23. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/emringer.py +0 -0
  24. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/inclusion.py +0 -0
  25. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/overlap_percentage.py +0 -0
  26. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/phaserandomization.py +0 -0
  27. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/phenix_cc.py +0 -0
  28. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/phenix_mm.py +0 -0
  29. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/projections.py +0 -0
  30. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/qscore.py +0 -0
  31. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/residue_locres.py +0 -0
  32. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/resmap.py +0 -0
  33. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/smoc.py +0 -0
  34. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/strudel.py +0 -0
  35. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/surfaces.py +0 -0
  36. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/metrics/threedfsc.py +0 -0
  37. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/preparation.py +0 -0
  38. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/qscores.csv +0 -0
  39. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/Checker.py +0 -0
  40. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/ChimeraxViews.py +0 -0
  41. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/MapProcessor.py +0 -0
  42. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/Model.py +0 -0
  43. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/__init__.py +0 -0
  44. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/cl_weights.pth +0 -0
  45. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/log_utils.py +0 -0
  46. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/misc.py +0 -0
  47. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/rescolor.py +0 -0
  48. {emdbva-0.0.1.dev136 → emdbva-0.0.1.dev139}/va/utils/stars.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: emdbva
3
- Version: 0.0.1.dev136
3
+ Version: 0.0.1.dev139
4
4
  Summary: CryoEM validation toolkit
5
5
  Home-page: https://test.pypi.org/project/va/
6
6
  Author: Zhe Wang
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: emdbva
3
- Version: 0.0.1.dev136
3
+ Version: 0.0.1.dev139
4
4
  Summary: CryoEM validation toolkit
5
5
  Home-page: https://test.pypi.org/project/va/
6
6
  Author: Zhe Wang
@@ -21,6 +21,7 @@ va/metrics/contour_level_predicator.py
21
21
  va/metrics/emda_mmcc.py
22
22
  va/metrics/emringer.py
23
23
  va/metrics/inclusion.py
24
+ va/metrics/map_data_validation.py
24
25
  va/metrics/overlap_percentage.py
25
26
  va/metrics/phaserandomization.py
26
27
  va/metrics/phenix_cc.py
@@ -58,6 +58,7 @@ def allruns(validationobj, runs):
58
58
 
59
59
  # Projections
60
60
  if 'projection' in runs:
61
+ validationobj.map_data_validation()
61
62
  validationobj.new_projection()
62
63
  validationobj.relion_mask_evaluation()
63
64
 
@@ -0,0 +1,664 @@
1
+ import io
2
+ import os
3
+ import re
4
+ import warnings
5
+ from collections import OrderedDict
6
+ from pathlib import Path
7
+
8
+ import mrcfile
9
+
10
+
11
+ def _normalise_map_input(map_input):
12
+ if hasattr(map_input, "fullname"):
13
+ return map_input.fullname
14
+ if hasattr(map_input, "filename"):
15
+ return map_input.filename
16
+ return str(map_input)
17
+
18
+ def _make_issue(code, category, severity, message, details=None, source="mrcfile.validate"):
19
+ """
20
+ Create one structured validation issue.
21
+
22
+ code:
23
+ Stable machine-readable problem name.
24
+
25
+ category:
26
+ Header/data/file area affected.
27
+
28
+ severity:
29
+ Usually "error" for validation failure, "warning" for weaker/unknown cases.
30
+
31
+ message:
32
+ Original mrcfile message, preserved for debugging.
33
+
34
+ details:
35
+ Parsed values extracted from the message.
36
+ """
37
+ return OrderedDict([
38
+ ("code", code),
39
+ ("category", category),
40
+ ("severity", severity),
41
+ ("source", source),
42
+ ("message", message),
43
+ ("details", details or OrderedDict()),
44
+ ])
45
+
46
+
47
+ def _to_int(value):
48
+ try:
49
+ return int(value)
50
+ except Exception:
51
+ return value
52
+
53
+
54
+ def _to_float(value):
55
+ try:
56
+ return float(value)
57
+ except Exception:
58
+ return value
59
+
60
+
61
+ def _parse_validation_line(line, source="mrcfile.validate"):
62
+ """
63
+ Parse one line from mrcfile.validate() or one RuntimeWarning.
64
+
65
+ Any message not recognised by the parser is still kept in the JSON as
66
+ "unclassified_mrcfile_validation_message" so future mrcfile versions do
67
+ not silently lose information.
68
+ """
69
+ line = line.strip()
70
+
71
+ # 1. MRC map ID/header map field
72
+ match = re.match(
73
+ r"^Map ID string is incorrect: found (?P<found>.*), should be (?P<expected>.*)$",
74
+ line,
75
+ )
76
+ if match:
77
+ return _make_issue(
78
+ "map_id_incorrect",
79
+ "header.map",
80
+ "warning",
81
+ line,
82
+ OrderedDict([
83
+ ("found", match.group("found")),
84
+ ("expected", match.group("expected")),
85
+ ]),
86
+ source,
87
+ )
88
+
89
+ if line == "Map ID string not found - not an MRC file, or file is corrupt":
90
+ return _make_issue(
91
+ "map_id_missing_or_corrupt",
92
+ "header.map",
93
+ "warning",
94
+ line,
95
+ source=source,
96
+ )
97
+
98
+ # 2. Machine stamp
99
+ match = re.match(r"^Invalid machine stamp: (?P<machine_stamp>.*)$", line)
100
+ if match:
101
+ return _make_issue(
102
+ "machine_stamp_invalid",
103
+ "header.machst",
104
+ "error",
105
+ line,
106
+ OrderedDict([
107
+ ("machine_stamp", match.group("machine_stamp")),
108
+ ]),
109
+ source,
110
+ )
111
+
112
+ match = re.match(
113
+ r"^Machine stamp '(?P<machine_stamp>.*)' does not match the apparent byte order '(?P<byte_order>.*)'$",
114
+ line,
115
+ )
116
+ if match:
117
+ return _make_issue(
118
+ "machine_stamp_byte_order_mismatch",
119
+ "header.machst",
120
+ "warning",
121
+ line,
122
+ OrderedDict([
123
+ ("machine_stamp", match.group("machine_stamp")),
124
+ ("byte_order", match.group("byte_order")),
125
+ ]),
126
+ source,
127
+ )
128
+
129
+ # 3. Mode
130
+ match = re.match(r"^Invalid mode: (?P<mode>[-+]?\d+)$", line)
131
+ if match:
132
+ return _make_issue(
133
+ "mode_invalid",
134
+ "header.mode",
135
+ "error",
136
+ line,
137
+ OrderedDict([
138
+ ("mode", _to_int(match.group("mode"))),
139
+ ]),
140
+ source,
141
+ )
142
+
143
+ match = re.match(r"^Unrecognised mode '(?P<mode>.*)' - data block cannot be read$", line)
144
+ if match:
145
+ return _make_issue(
146
+ "mode_unrecognised_data_unreadable",
147
+ "header.mode",
148
+ "error",
149
+ line,
150
+ OrderedDict([
151
+ ("mode", match.group("mode")),
152
+ ]),
153
+ source,
154
+ )
155
+
156
+ # 4. Negative header fields
157
+ match = re.match(
158
+ r"^Header field '(?P<field>nx|ny|nz|mx|my|mz|ispg|nlabl)' is negative$",
159
+ line,
160
+ )
161
+ if match:
162
+ return _make_issue(
163
+ "header_field_negative",
164
+ "header.dimension_or_count",
165
+ "error",
166
+ line,
167
+ OrderedDict([
168
+ ("field", match.group("field")),
169
+ ]),
170
+ source,
171
+ )
172
+
173
+ match = re.match(r"^Cell dimension '(?P<axis>x|y|z)' is negative$", line)
174
+ if match:
175
+ return _make_issue(
176
+ "cell_dimension_negative",
177
+ "header.cella",
178
+ "error",
179
+ line,
180
+ OrderedDict([
181
+ ("axis", match.group("axis")),
182
+ ]),
183
+ source,
184
+ )
185
+
186
+ # 5. Axis mapping
187
+ match = re.match(
188
+ r"^Invalid axis mapping: found (?P<found>.*), should be \[1, 2, 3\]$",
189
+ line,
190
+ )
191
+ if match:
192
+ return _make_issue(
193
+ "axis_mapping_invalid",
194
+ "header.mapc_mapr_maps",
195
+ "error",
196
+ line,
197
+ OrderedDict([
198
+ ("found", match.group("found")),
199
+ ("expected", "[1, 2, 3]"),
200
+ ]),
201
+ source,
202
+ )
203
+
204
+ # 6. Volume stack
205
+ match = re.match(
206
+ r"^Error in dimensions for volume stack: nz should be divisible by mz\. "
207
+ r"Found nz = (?P<nz>[-+]?\d+), mz = (?P<mz>[-+]?\d+)\)?$",
208
+ line,
209
+ )
210
+ if match:
211
+ return _make_issue(
212
+ "volume_stack_dimensions_invalid",
213
+ "header.nz_mz_ispg",
214
+ "error",
215
+ line,
216
+ OrderedDict([
217
+ ("nz", _to_int(match.group("nz"))),
218
+ ("mz", _to_int(match.group("mz"))),
219
+ ]),
220
+ source,
221
+ )
222
+
223
+ # 7. Header labels
224
+ if line == "Error in header labels: empty labels appear between text-containing labels":
225
+ return _make_issue(
226
+ "header_labels_empty_between_text",
227
+ "header.label",
228
+ "error",
229
+ line,
230
+ source=source,
231
+ )
232
+
233
+ match = re.match(
234
+ r"^Error in header labels: nlabl is (?P<nlabl>[-+]?\d+) "
235
+ r"but (?P<label_count>[-+]?\d+) labels contain text$",
236
+ line,
237
+ )
238
+ if match:
239
+ return _make_issue(
240
+ "header_labels_nlabl_mismatch",
241
+ "header.nlabl",
242
+ "error",
243
+ line,
244
+ OrderedDict([
245
+ ("nlabl", _to_int(match.group("nlabl"))),
246
+ ("actual_label_count", _to_int(match.group("label_count"))),
247
+ ]),
248
+ source,
249
+ )
250
+
251
+ # 8. MRC format version
252
+ match = re.match(
253
+ r"^File does not declare MRC format version 20140 or 20141: nversion = (?P<nversion>[-+]?\d+)$",
254
+ line,
255
+ )
256
+ if match:
257
+ return _make_issue(
258
+ "mrc_format_version_invalid",
259
+ "header.nversion",
260
+ "warning",
261
+ line,
262
+ OrderedDict([
263
+ ("nversion", _to_int(match.group("nversion"))),
264
+ ("expected", [20140, 20141]),
265
+ ]),
266
+ source,
267
+ )
268
+
269
+ # 9. Extended header type
270
+ match = re.match(
271
+ r"^Extended header type is undefined or unrecognised: exttyp = '(?P<exttyp>.*)'$",
272
+ line,
273
+ )
274
+ if match:
275
+ return _make_issue(
276
+ "extended_header_type_invalid",
277
+ "header.exttyp",
278
+ "error",
279
+ line,
280
+ OrderedDict([
281
+ ("exttyp", match.group("exttyp")),
282
+ ("expected_known_types", ["CCP4", "MRCO", "SERI", "AGAR", "FEI1", "FEI2", "HDF5"]),
283
+ ]),
284
+ source,
285
+ )
286
+
287
+ # 10. Data statistics
288
+ match = re.match(
289
+ r"^Data statistics appear to be inaccurate: RMS deviation is "
290
+ r"(?P<actual>[-+0-9.eE]+) but the value in the header is (?P<header>[-+0-9.eE]+)$",
291
+ line,
292
+ )
293
+ if match:
294
+ return _make_issue(
295
+ "data_statistics_rms_mismatch",
296
+ "header.rms",
297
+ "error",
298
+ line,
299
+ OrderedDict([
300
+ ("actual", _to_float(match.group("actual"))),
301
+ ("header", _to_float(match.group("header"))),
302
+ ]),
303
+ source,
304
+ )
305
+
306
+ match = re.match(
307
+ r"^Data statistics appear to be inaccurate: minimum is "
308
+ r"(?P<actual>[-+0-9.eE]+) but the value in the header is (?P<header>[-+0-9.eE]+)$",
309
+ line,
310
+ )
311
+ if match:
312
+ return _make_issue(
313
+ "data_statistics_minimum_mismatch",
314
+ "header.dmin",
315
+ "error",
316
+ line,
317
+ OrderedDict([
318
+ ("actual", _to_float(match.group("actual"))),
319
+ ("header", _to_float(match.group("header"))),
320
+ ]),
321
+ source,
322
+ )
323
+
324
+ match = re.match(
325
+ r"^Data statistics appear to be inaccurate: maximum is "
326
+ r"(?P<actual>[-+0-9.eE]+) but the value in the header is (?P<header>[-+0-9.eE]+)$",
327
+ line,
328
+ )
329
+ if match:
330
+ return _make_issue(
331
+ "data_statistics_maximum_mismatch",
332
+ "header.dmax",
333
+ "error",
334
+ line,
335
+ OrderedDict([
336
+ ("actual", _to_float(match.group("actual"))),
337
+ ("header", _to_float(match.group("header"))),
338
+ ]),
339
+ source,
340
+ )
341
+
342
+ match = re.match(
343
+ r"^Data statistics appear to be inaccurate: mean is "
344
+ r"(?P<actual>[-+0-9.eE]+) but the value in the header is (?P<header>[-+0-9.eE]+)$",
345
+ line,
346
+ )
347
+ if match:
348
+ return _make_issue(
349
+ "data_statistics_mean_mismatch",
350
+ "header.dmean",
351
+ "error",
352
+ line,
353
+ OrderedDict([
354
+ ("actual", _to_float(match.group("actual"))),
355
+ ("header", _to_float(match.group("header"))),
356
+ ]),
357
+ source,
358
+ )
359
+
360
+ # 11. File size
361
+ match = re.match(
362
+ r"^File is larger than expected\. Actual size: (?P<actual_bytes>\d+) bytes; "
363
+ r"expected size: (?P<expected_bytes>\d+) bytes \(calculated from header\)$",
364
+ line,
365
+ )
366
+ if match:
367
+ return _make_issue(
368
+ "file_size_larger_than_expected",
369
+ "file.size",
370
+ "error",
371
+ line,
372
+ OrderedDict([
373
+ ("actual_bytes", _to_int(match.group("actual_bytes"))),
374
+ ("expected_bytes", _to_int(match.group("expected_bytes"))),
375
+ ]),
376
+ source,
377
+ )
378
+
379
+ if line == "Data block could not be read - file size not checked":
380
+ return _make_issue(
381
+ "data_block_unreadable_file_size_not_checked",
382
+ "data",
383
+ "error",
384
+ line,
385
+ source=source,
386
+ )
387
+
388
+ # 12. Extended header/data read warnings
389
+ match = re.match(
390
+ r"^Expected (?P<expected_bytes>\d+) bytes in extended header but could only read (?P<actual_bytes>\d+)$",
391
+ line,
392
+ )
393
+ if match:
394
+ return _make_issue(
395
+ "extended_header_too_small",
396
+ "extended_header.size",
397
+ "error",
398
+ line,
399
+ OrderedDict([
400
+ ("expected_bytes", _to_int(match.group("expected_bytes"))),
401
+ ("actual_bytes", _to_int(match.group("actual_bytes"))),
402
+ ]),
403
+ source,
404
+ )
405
+
406
+ match = re.match(
407
+ r"^Expected (?P<expected_bytes>\d+) bytes in data block but could only read (?P<actual_bytes>\d+)$",
408
+ line,
409
+ )
410
+ if match:
411
+ return _make_issue(
412
+ "data_block_too_small",
413
+ "data.size",
414
+ "error",
415
+ line,
416
+ OrderedDict([
417
+ ("expected_bytes", _to_int(match.group("expected_bytes"))),
418
+ ("actual_bytes", _to_int(match.group("actual_bytes"))),
419
+ ]),
420
+ source,
421
+ )
422
+
423
+ match = re.match(
424
+ r"^Expected (?P<expected_bytes>\d+) bytes in data block but limit is (?P<limit_bytes>\d+)$",
425
+ line,
426
+ )
427
+ if match:
428
+ return _make_issue(
429
+ "data_block_exceeds_read_limit",
430
+ "data.size",
431
+ "error",
432
+ line,
433
+ OrderedDict([
434
+ ("expected_bytes", _to_int(match.group("expected_bytes"))),
435
+ ("limit_bytes", _to_int(match.group("limit_bytes"))),
436
+ ]),
437
+ source,
438
+ )
439
+
440
+ if line == "Couldn't read enough bytes for MRC header":
441
+ return _make_issue(
442
+ "mrc_header_too_small",
443
+ "header.size",
444
+ "error",
445
+ line,
446
+ source=source,
447
+ )
448
+
449
+ # 13. Exception-like final traceback lines
450
+ match = re.match(r"^(?P<exception_type>[A-Za-z_][A-Za-z0-9_]*Error): (?P<exception_message>.*)$", line)
451
+ if match:
452
+ return _make_issue(
453
+ "exception_during_validation",
454
+ "exception",
455
+ "error",
456
+ line,
457
+ OrderedDict([
458
+ ("exception_type", match.group("exception_type")),
459
+ ("exception_message", match.group("exception_message")),
460
+ ]),
461
+ source,
462
+ )
463
+
464
+ # Fallback. Keep unknown messages instead of losing them.
465
+ return _make_issue(
466
+ "unclassified_mrcfile_validation_message",
467
+ "unclassified",
468
+ "warning",
469
+ line,
470
+ source=source,
471
+ )
472
+
473
+
474
+ def _split_mrcfile_output(output):
475
+ """
476
+ Convert mrcfile.validate() text output into individual validation messages.
477
+
478
+ We remove non-problem lines like:
479
+ Checking if ... is a valid MRC2014 file...
480
+ File appears to be valid.
481
+ """
482
+ lines = []
483
+
484
+ for raw_line in output.splitlines():
485
+ line = raw_line.strip()
486
+
487
+ if not line:
488
+ continue
489
+
490
+ if line.startswith("Checking if ") and line.endswith(" is a valid MRC2014 file..."):
491
+ continue
492
+
493
+ if line == "File appears to be valid.":
494
+ continue
495
+
496
+ # If traceback is printed, keep only meaningful final error line.
497
+ if line == "Traceback (most recent call last):":
498
+ continue
499
+
500
+ if line.startswith("File ") and ", line " in line:
501
+ continue
502
+
503
+ if line.startswith("^"):
504
+ continue
505
+
506
+ lines.append(line)
507
+
508
+ return lines
509
+
510
+
511
+ def _deduplicate_issues(issues):
512
+ """
513
+ Remove duplicates.
514
+
515
+ The same problem can sometimes appear both in mrcfile.validate() text output
516
+ and in captured RuntimeWarning messages.
517
+ """
518
+ unique_issues = []
519
+ seen = set()
520
+
521
+ for issue in issues:
522
+ key = (
523
+ issue.get("code"),
524
+ issue.get("category"),
525
+ issue.get("message"),
526
+ )
527
+
528
+ if key in seen:
529
+ continue
530
+
531
+ seen.add(key)
532
+ unique_issues.append(issue)
533
+
534
+ return unique_issues
535
+
536
+
537
+ def _parse_messages_and_warnings(messages_text, warning_messages):
538
+ issues = []
539
+
540
+ for line in _split_mrcfile_output(messages_text):
541
+ issues.append(_parse_validation_line(line, source="mrcfile.validate"))
542
+
543
+ for warning_message in warning_messages:
544
+ warning_message = warning_message.strip()
545
+ if not warning_message:
546
+ continue
547
+
548
+ issues.append(_parse_validation_line(warning_message, source="RuntimeWarning"))
549
+
550
+ return _deduplicate_issues(issues)
551
+
552
+
553
+
554
+ def validate_single_map(map_input):
555
+ map_path = Path(_normalise_map_input(map_input))
556
+ messages = io.StringIO()
557
+ warning_messages = []
558
+
559
+ result = OrderedDict([
560
+ ("file", str(map_path)),
561
+ ("exists", map_path.exists()),
562
+ ("valid", False),
563
+
564
+ # New structured fields
565
+ ("issue_count", 0),
566
+ ("issues", []),
567
+
568
+ # Keep old raw fields for backward compatibility/debugging
569
+ ("messages", ""),
570
+ ("warnings", []),
571
+ ("error", None),
572
+ ])
573
+
574
+ if not map_path.exists():
575
+ result["error"] = "File does not exist"
576
+ result["issues"] = [
577
+ _make_issue(
578
+ "file_not_found",
579
+ "file.path",
580
+ "error",
581
+ "File does not exist",
582
+ OrderedDict([
583
+ ("path", str(map_path)),
584
+ ]),
585
+ source="validationanalysis",
586
+ )
587
+ ]
588
+ result["issue_count"] = len(result["issues"])
589
+ return result
590
+
591
+ try:
592
+ with warnings.catch_warnings(record=True) as caught_warnings:
593
+ warnings.simplefilter("always", RuntimeWarning)
594
+
595
+ mrcfile_valid = mrcfile.validate(str(map_path), print_file=messages)
596
+
597
+ warning_messages = [
598
+ str(warning.message)
599
+ for warning in caught_warnings
600
+ if str(warning.message).strip()
601
+ ]
602
+
603
+ messages_text = messages.getvalue().strip()
604
+ issues = _parse_messages_and_warnings(messages_text, warning_messages)
605
+ has_error = any(issue.get("severity") == "error" for issue in issues)
606
+
607
+ result["mrcfile_valid"] = mrcfile_valid
608
+ result["no_known_critical_error"] = not has_error
609
+ # result["messages"] = messages_text
610
+ # result["warnings"] = warning_messages
611
+ result["issues"] = issues
612
+ result["issue_count"] = len(issues)
613
+
614
+ except Exception as exc:
615
+ messages_text = messages.getvalue().strip()
616
+ exception_message = str(exc)
617
+
618
+ issues = _parse_messages_and_warnings(messages_text, warning_messages)
619
+ issues.append(
620
+ _make_issue(
621
+ "mrcfile_validate_exception",
622
+ "exception",
623
+ "error",
624
+ exception_message,
625
+ OrderedDict([
626
+ ("exception_type", exc.__class__.__name__),
627
+ ]),
628
+ source="validationanalysis",
629
+ )
630
+ )
631
+
632
+ result["error"] = exception_message
633
+ result["messages"] = messages_text
634
+ result["warnings"] = warning_messages
635
+ result["issues"] = _deduplicate_issues(issues)
636
+ result["issue_count"] = len(result["issues"])
637
+
638
+ return result
639
+
640
+
641
+ def run_map_data_validation(map_inputs):
642
+ map_results = OrderedDict()
643
+
644
+ for map_input in map_inputs:
645
+ if map_input is None:
646
+ continue
647
+
648
+ map_path = _normalise_map_input(map_input)
649
+ map_name = os.path.basename(map_path)
650
+
651
+ # Avoid overwriting when two inputs have the same basename.
652
+ # For example:
653
+ # /path1/map.mrc
654
+ # /path2/map.mrc
655
+ if map_name in map_results:
656
+ map_name = map_path
657
+
658
+ map_results[map_name] = validate_single_map(map_input)
659
+
660
+ return OrderedDict([
661
+ ("data_integrity", OrderedDict([
662
+ ("map_data_integrity", map_results)
663
+ ]))
664
+ ])
@@ -77,6 +77,7 @@ from va.metrics.qscore import *
77
77
  from va.metrics.inclusion import *
78
78
  from va.metrics.connected_percentage import *
79
79
  from va.metrics.overlap_percentage import *
80
+ from metrics.map_data_validation import run_map_data_validation
80
81
  import va
81
82
 
82
83
  try:
@@ -1158,6 +1159,53 @@ class ValidationAnalysis:
1158
1159
  viewer.new_surface_view_chimerax(primary_input_map, primary_input_contour, 'mask', '',
1159
1160
  mask_name, mask_contour)
1160
1161
 
1162
+ @profile_peak_memory()
1163
+ def map_data_validation(self):
1164
+ """
1165
+ Validate map/header data using mrcfile.validate().
1166
+
1167
+ Output JSON structure:
1168
+ data_validation -> map_data_validation -> mapname -> validation results
1169
+ """
1170
+
1171
+ start = timeit.default_timer()
1172
+ map_inputs = []
1173
+
1174
+ for attr in ("map", "rawmap", "hmodd", "hmeven"):
1175
+ try:
1176
+ value = getattr(self, attr, None)
1177
+ if value is not None:
1178
+ map_inputs.append(value)
1179
+ except AttributeError:
1180
+ pass
1181
+
1182
+ try:
1183
+ result_dict = run_map_data_validation(map_inputs)
1184
+
1185
+ out_json = os.path.join(
1186
+ self.workdir,
1187
+ f"{self.mapname}_map_data_validation.json"
1188
+ )
1189
+
1190
+ with codecs.open(out_json, "w", encoding="utf-8") as f:
1191
+ json.dump(
1192
+ result_dict,
1193
+ f,
1194
+ ensure_ascii=False,
1195
+ indent=2
1196
+ )
1197
+
1198
+ print("Map data validation results were collected.")
1199
+
1200
+ except Exception:
1201
+ err = "Map data validation error: {}.".format(sys.exc_info()[1])
1202
+ sys.stderr.write(err + "\n")
1203
+
1204
+ end = timeit.default_timer()
1205
+ print("Map data validation time: %s" % (end - start))
1206
+ print("------------------------------------")
1207
+
1208
+ return None
1161
1209
 
1162
1210
  # Surface Chimera way
1163
1211
  # def new_surface_view_chimerax(self, input_map, input_contour, type='surface', raw='', mask_map=None,
@@ -18,5 +18,5 @@ under the License.
18
18
 
19
19
  """
20
20
 
21
- __version__ = '0.0.1.dev136'
21
+ __version__ = '0.0.1.dev139'
22
22
  __em_statistics_version__ = '202505.v01'
File without changes
File without changes
File without changes
File without changes
File without changes