emdbva 0.0.1.dev137__tar.gz → 0.0.1.dev139__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {emdbva-0.0.1.dev137/emdbva.egg-info → emdbva-0.0.1.dev139}/PKG-INFO +1 -1
  2. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139/emdbva.egg-info}/PKG-INFO +1 -1
  3. emdbva-0.0.1.dev139/va/metrics/map_data_validation.py +664 -0
  4. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/validationanalysis.py +7 -2
  5. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/version.py +1 -1
  6. emdbva-0.0.1.dev137/va/metrics/map_data_validation.py +0 -59
  7. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/LICENSE +0 -0
  8. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/MANIFEST.in +0 -0
  9. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/README.rst +0 -0
  10. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/emdbva.egg-info/SOURCES.txt +0 -0
  11. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/emdbva.egg-info/dependency_links.txt +0 -0
  12. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/emdbva.egg-info/entry_points.txt +0 -0
  13. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/emdbva.egg-info/requires.txt +0 -0
  14. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/emdbva.egg-info/top_level.txt +0 -0
  15. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/setup.cfg +0 -0
  16. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/setup.py +0 -0
  17. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/__init__.py +0 -0
  18. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/mainva.py +0 -0
  19. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/__init__.py +0 -0
  20. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/bars.py +0 -0
  21. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/connected_percentage.py +0 -0
  22. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/contour_level_predicator.py +0 -0
  23. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/emda_mmcc.py +0 -0
  24. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/emringer.py +0 -0
  25. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/inclusion.py +0 -0
  26. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/overlap_percentage.py +0 -0
  27. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/phaserandomization.py +0 -0
  28. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/phenix_cc.py +0 -0
  29. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/phenix_mm.py +0 -0
  30. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/projections.py +0 -0
  31. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/qscore.py +0 -0
  32. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/residue_locres.py +0 -0
  33. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/resmap.py +0 -0
  34. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/smoc.py +0 -0
  35. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/strudel.py +0 -0
  36. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/surfaces.py +0 -0
  37. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/metrics/threedfsc.py +0 -0
  38. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/preparation.py +0 -0
  39. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/qscores.csv +0 -0
  40. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/utils/Checker.py +0 -0
  41. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/utils/ChimeraxViews.py +0 -0
  42. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/utils/MapProcessor.py +0 -0
  43. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/utils/Model.py +0 -0
  44. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/utils/__init__.py +0 -0
  45. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/utils/cl_weights.pth +0 -0
  46. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/utils/log_utils.py +0 -0
  47. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/utils/misc.py +0 -0
  48. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/utils/rescolor.py +0 -0
  49. {emdbva-0.0.1.dev137 → emdbva-0.0.1.dev139}/va/utils/stars.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: emdbva
3
- Version: 0.0.1.dev137
3
+ Version: 0.0.1.dev139
4
4
  Summary: CryoEM validation toolkit
5
5
  Home-page: https://test.pypi.org/project/va/
6
6
  Author: Zhe Wang
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: emdbva
3
- Version: 0.0.1.dev137
3
+ Version: 0.0.1.dev139
4
4
  Summary: CryoEM validation toolkit
5
5
  Home-page: https://test.pypi.org/project/va/
6
6
  Author: Zhe Wang
@@ -0,0 +1,664 @@
1
+ import io
2
+ import os
3
+ import re
4
+ import warnings
5
+ from collections import OrderedDict
6
+ from pathlib import Path
7
+
8
+ import mrcfile
9
+
10
+
11
+ def _normalise_map_input(map_input):
12
+ if hasattr(map_input, "fullname"):
13
+ return map_input.fullname
14
+ if hasattr(map_input, "filename"):
15
+ return map_input.filename
16
+ return str(map_input)
17
+
18
+ def _make_issue(code, category, severity, message, details=None, source="mrcfile.validate"):
19
+ """
20
+ Create one structured validation issue.
21
+
22
+ code:
23
+ Stable machine-readable problem name.
24
+
25
+ category:
26
+ Header/data/file area affected.
27
+
28
+ severity:
29
+ Usually "error" for validation failure, "warning" for weaker/unknown cases.
30
+
31
+ message:
32
+ Original mrcfile message, preserved for debugging.
33
+
34
+ details:
35
+ Parsed values extracted from the message.
36
+ """
37
+ return OrderedDict([
38
+ ("code", code),
39
+ ("category", category),
40
+ ("severity", severity),
41
+ ("source", source),
42
+ ("message", message),
43
+ ("details", details or OrderedDict()),
44
+ ])
45
+
46
+
47
+ def _to_int(value):
48
+ try:
49
+ return int(value)
50
+ except Exception:
51
+ return value
52
+
53
+
54
+ def _to_float(value):
55
+ try:
56
+ return float(value)
57
+ except Exception:
58
+ return value
59
+
60
+
61
+ def _parse_validation_line(line, source="mrcfile.validate"):
62
+ """
63
+ Parse one line from mrcfile.validate() or one RuntimeWarning.
64
+
65
+ Any message not recognised by the parser is still kept in the JSON as
66
+ "unclassified_mrcfile_validation_message" so future mrcfile versions do
67
+ not silently lose information.
68
+ """
69
+ line = line.strip()
70
+
71
+ # 1. MRC map ID/header map field
72
+ match = re.match(
73
+ r"^Map ID string is incorrect: found (?P<found>.*), should be (?P<expected>.*)$",
74
+ line,
75
+ )
76
+ if match:
77
+ return _make_issue(
78
+ "map_id_incorrect",
79
+ "header.map",
80
+ "warning",
81
+ line,
82
+ OrderedDict([
83
+ ("found", match.group("found")),
84
+ ("expected", match.group("expected")),
85
+ ]),
86
+ source,
87
+ )
88
+
89
+ if line == "Map ID string not found - not an MRC file, or file is corrupt":
90
+ return _make_issue(
91
+ "map_id_missing_or_corrupt",
92
+ "header.map",
93
+ "warning",
94
+ line,
95
+ source=source,
96
+ )
97
+
98
+ # 2. Machine stamp
99
+ match = re.match(r"^Invalid machine stamp: (?P<machine_stamp>.*)$", line)
100
+ if match:
101
+ return _make_issue(
102
+ "machine_stamp_invalid",
103
+ "header.machst",
104
+ "error",
105
+ line,
106
+ OrderedDict([
107
+ ("machine_stamp", match.group("machine_stamp")),
108
+ ]),
109
+ source,
110
+ )
111
+
112
+ match = re.match(
113
+ r"^Machine stamp '(?P<machine_stamp>.*)' does not match the apparent byte order '(?P<byte_order>.*)'$",
114
+ line,
115
+ )
116
+ if match:
117
+ return _make_issue(
118
+ "machine_stamp_byte_order_mismatch",
119
+ "header.machst",
120
+ "warning",
121
+ line,
122
+ OrderedDict([
123
+ ("machine_stamp", match.group("machine_stamp")),
124
+ ("byte_order", match.group("byte_order")),
125
+ ]),
126
+ source,
127
+ )
128
+
129
+ # 3. Mode
130
+ match = re.match(r"^Invalid mode: (?P<mode>[-+]?\d+)$", line)
131
+ if match:
132
+ return _make_issue(
133
+ "mode_invalid",
134
+ "header.mode",
135
+ "error",
136
+ line,
137
+ OrderedDict([
138
+ ("mode", _to_int(match.group("mode"))),
139
+ ]),
140
+ source,
141
+ )
142
+
143
+ match = re.match(r"^Unrecognised mode '(?P<mode>.*)' - data block cannot be read$", line)
144
+ if match:
145
+ return _make_issue(
146
+ "mode_unrecognised_data_unreadable",
147
+ "header.mode",
148
+ "error",
149
+ line,
150
+ OrderedDict([
151
+ ("mode", match.group("mode")),
152
+ ]),
153
+ source,
154
+ )
155
+
156
+ # 4. Negative header fields
157
+ match = re.match(
158
+ r"^Header field '(?P<field>nx|ny|nz|mx|my|mz|ispg|nlabl)' is negative$",
159
+ line,
160
+ )
161
+ if match:
162
+ return _make_issue(
163
+ "header_field_negative",
164
+ "header.dimension_or_count",
165
+ "error",
166
+ line,
167
+ OrderedDict([
168
+ ("field", match.group("field")),
169
+ ]),
170
+ source,
171
+ )
172
+
173
+ match = re.match(r"^Cell dimension '(?P<axis>x|y|z)' is negative$", line)
174
+ if match:
175
+ return _make_issue(
176
+ "cell_dimension_negative",
177
+ "header.cella",
178
+ "error",
179
+ line,
180
+ OrderedDict([
181
+ ("axis", match.group("axis")),
182
+ ]),
183
+ source,
184
+ )
185
+
186
+ # 5. Axis mapping
187
+ match = re.match(
188
+ r"^Invalid axis mapping: found (?P<found>.*), should be \[1, 2, 3\]$",
189
+ line,
190
+ )
191
+ if match:
192
+ return _make_issue(
193
+ "axis_mapping_invalid",
194
+ "header.mapc_mapr_maps",
195
+ "error",
196
+ line,
197
+ OrderedDict([
198
+ ("found", match.group("found")),
199
+ ("expected", "[1, 2, 3]"),
200
+ ]),
201
+ source,
202
+ )
203
+
204
+ # 6. Volume stack
205
+ match = re.match(
206
+ r"^Error in dimensions for volume stack: nz should be divisible by mz\. "
207
+ r"Found nz = (?P<nz>[-+]?\d+), mz = (?P<mz>[-+]?\d+)\)?$",
208
+ line,
209
+ )
210
+ if match:
211
+ return _make_issue(
212
+ "volume_stack_dimensions_invalid",
213
+ "header.nz_mz_ispg",
214
+ "error",
215
+ line,
216
+ OrderedDict([
217
+ ("nz", _to_int(match.group("nz"))),
218
+ ("mz", _to_int(match.group("mz"))),
219
+ ]),
220
+ source,
221
+ )
222
+
223
+ # 7. Header labels
224
+ if line == "Error in header labels: empty labels appear between text-containing labels":
225
+ return _make_issue(
226
+ "header_labels_empty_between_text",
227
+ "header.label",
228
+ "error",
229
+ line,
230
+ source=source,
231
+ )
232
+
233
+ match = re.match(
234
+ r"^Error in header labels: nlabl is (?P<nlabl>[-+]?\d+) "
235
+ r"but (?P<label_count>[-+]?\d+) labels contain text$",
236
+ line,
237
+ )
238
+ if match:
239
+ return _make_issue(
240
+ "header_labels_nlabl_mismatch",
241
+ "header.nlabl",
242
+ "error",
243
+ line,
244
+ OrderedDict([
245
+ ("nlabl", _to_int(match.group("nlabl"))),
246
+ ("actual_label_count", _to_int(match.group("label_count"))),
247
+ ]),
248
+ source,
249
+ )
250
+
251
+ # 8. MRC format version
252
+ match = re.match(
253
+ r"^File does not declare MRC format version 20140 or 20141: nversion = (?P<nversion>[-+]?\d+)$",
254
+ line,
255
+ )
256
+ if match:
257
+ return _make_issue(
258
+ "mrc_format_version_invalid",
259
+ "header.nversion",
260
+ "warning",
261
+ line,
262
+ OrderedDict([
263
+ ("nversion", _to_int(match.group("nversion"))),
264
+ ("expected", [20140, 20141]),
265
+ ]),
266
+ source,
267
+ )
268
+
269
+ # 9. Extended header type
270
+ match = re.match(
271
+ r"^Extended header type is undefined or unrecognised: exttyp = '(?P<exttyp>.*)'$",
272
+ line,
273
+ )
274
+ if match:
275
+ return _make_issue(
276
+ "extended_header_type_invalid",
277
+ "header.exttyp",
278
+ "error",
279
+ line,
280
+ OrderedDict([
281
+ ("exttyp", match.group("exttyp")),
282
+ ("expected_known_types", ["CCP4", "MRCO", "SERI", "AGAR", "FEI1", "FEI2", "HDF5"]),
283
+ ]),
284
+ source,
285
+ )
286
+
287
+ # 10. Data statistics
288
+ match = re.match(
289
+ r"^Data statistics appear to be inaccurate: RMS deviation is "
290
+ r"(?P<actual>[-+0-9.eE]+) but the value in the header is (?P<header>[-+0-9.eE]+)$",
291
+ line,
292
+ )
293
+ if match:
294
+ return _make_issue(
295
+ "data_statistics_rms_mismatch",
296
+ "header.rms",
297
+ "error",
298
+ line,
299
+ OrderedDict([
300
+ ("actual", _to_float(match.group("actual"))),
301
+ ("header", _to_float(match.group("header"))),
302
+ ]),
303
+ source,
304
+ )
305
+
306
+ match = re.match(
307
+ r"^Data statistics appear to be inaccurate: minimum is "
308
+ r"(?P<actual>[-+0-9.eE]+) but the value in the header is (?P<header>[-+0-9.eE]+)$",
309
+ line,
310
+ )
311
+ if match:
312
+ return _make_issue(
313
+ "data_statistics_minimum_mismatch",
314
+ "header.dmin",
315
+ "error",
316
+ line,
317
+ OrderedDict([
318
+ ("actual", _to_float(match.group("actual"))),
319
+ ("header", _to_float(match.group("header"))),
320
+ ]),
321
+ source,
322
+ )
323
+
324
+ match = re.match(
325
+ r"^Data statistics appear to be inaccurate: maximum is "
326
+ r"(?P<actual>[-+0-9.eE]+) but the value in the header is (?P<header>[-+0-9.eE]+)$",
327
+ line,
328
+ )
329
+ if match:
330
+ return _make_issue(
331
+ "data_statistics_maximum_mismatch",
332
+ "header.dmax",
333
+ "error",
334
+ line,
335
+ OrderedDict([
336
+ ("actual", _to_float(match.group("actual"))),
337
+ ("header", _to_float(match.group("header"))),
338
+ ]),
339
+ source,
340
+ )
341
+
342
+ match = re.match(
343
+ r"^Data statistics appear to be inaccurate: mean is "
344
+ r"(?P<actual>[-+0-9.eE]+) but the value in the header is (?P<header>[-+0-9.eE]+)$",
345
+ line,
346
+ )
347
+ if match:
348
+ return _make_issue(
349
+ "data_statistics_mean_mismatch",
350
+ "header.dmean",
351
+ "error",
352
+ line,
353
+ OrderedDict([
354
+ ("actual", _to_float(match.group("actual"))),
355
+ ("header", _to_float(match.group("header"))),
356
+ ]),
357
+ source,
358
+ )
359
+
360
+ # 11. File size
361
+ match = re.match(
362
+ r"^File is larger than expected\. Actual size: (?P<actual_bytes>\d+) bytes; "
363
+ r"expected size: (?P<expected_bytes>\d+) bytes \(calculated from header\)$",
364
+ line,
365
+ )
366
+ if match:
367
+ return _make_issue(
368
+ "file_size_larger_than_expected",
369
+ "file.size",
370
+ "error",
371
+ line,
372
+ OrderedDict([
373
+ ("actual_bytes", _to_int(match.group("actual_bytes"))),
374
+ ("expected_bytes", _to_int(match.group("expected_bytes"))),
375
+ ]),
376
+ source,
377
+ )
378
+
379
+ if line == "Data block could not be read - file size not checked":
380
+ return _make_issue(
381
+ "data_block_unreadable_file_size_not_checked",
382
+ "data",
383
+ "error",
384
+ line,
385
+ source=source,
386
+ )
387
+
388
+ # 12. Extended header/data read warnings
389
+ match = re.match(
390
+ r"^Expected (?P<expected_bytes>\d+) bytes in extended header but could only read (?P<actual_bytes>\d+)$",
391
+ line,
392
+ )
393
+ if match:
394
+ return _make_issue(
395
+ "extended_header_too_small",
396
+ "extended_header.size",
397
+ "error",
398
+ line,
399
+ OrderedDict([
400
+ ("expected_bytes", _to_int(match.group("expected_bytes"))),
401
+ ("actual_bytes", _to_int(match.group("actual_bytes"))),
402
+ ]),
403
+ source,
404
+ )
405
+
406
+ match = re.match(
407
+ r"^Expected (?P<expected_bytes>\d+) bytes in data block but could only read (?P<actual_bytes>\d+)$",
408
+ line,
409
+ )
410
+ if match:
411
+ return _make_issue(
412
+ "data_block_too_small",
413
+ "data.size",
414
+ "error",
415
+ line,
416
+ OrderedDict([
417
+ ("expected_bytes", _to_int(match.group("expected_bytes"))),
418
+ ("actual_bytes", _to_int(match.group("actual_bytes"))),
419
+ ]),
420
+ source,
421
+ )
422
+
423
+ match = re.match(
424
+ r"^Expected (?P<expected_bytes>\d+) bytes in data block but limit is (?P<limit_bytes>\d+)$",
425
+ line,
426
+ )
427
+ if match:
428
+ return _make_issue(
429
+ "data_block_exceeds_read_limit",
430
+ "data.size",
431
+ "error",
432
+ line,
433
+ OrderedDict([
434
+ ("expected_bytes", _to_int(match.group("expected_bytes"))),
435
+ ("limit_bytes", _to_int(match.group("limit_bytes"))),
436
+ ]),
437
+ source,
438
+ )
439
+
440
+ if line == "Couldn't read enough bytes for MRC header":
441
+ return _make_issue(
442
+ "mrc_header_too_small",
443
+ "header.size",
444
+ "error",
445
+ line,
446
+ source=source,
447
+ )
448
+
449
+ # 13. Exception-like final traceback lines
450
+ match = re.match(r"^(?P<exception_type>[A-Za-z_][A-Za-z0-9_]*Error): (?P<exception_message>.*)$", line)
451
+ if match:
452
+ return _make_issue(
453
+ "exception_during_validation",
454
+ "exception",
455
+ "error",
456
+ line,
457
+ OrderedDict([
458
+ ("exception_type", match.group("exception_type")),
459
+ ("exception_message", match.group("exception_message")),
460
+ ]),
461
+ source,
462
+ )
463
+
464
+ # Fallback. Keep unknown messages instead of losing them.
465
+ return _make_issue(
466
+ "unclassified_mrcfile_validation_message",
467
+ "unclassified",
468
+ "warning",
469
+ line,
470
+ source=source,
471
+ )
472
+
473
+
474
+ def _split_mrcfile_output(output):
475
+ """
476
+ Convert mrcfile.validate() text output into individual validation messages.
477
+
478
+ We remove non-problem lines like:
479
+ Checking if ... is a valid MRC2014 file...
480
+ File appears to be valid.
481
+ """
482
+ lines = []
483
+
484
+ for raw_line in output.splitlines():
485
+ line = raw_line.strip()
486
+
487
+ if not line:
488
+ continue
489
+
490
+ if line.startswith("Checking if ") and line.endswith(" is a valid MRC2014 file..."):
491
+ continue
492
+
493
+ if line == "File appears to be valid.":
494
+ continue
495
+
496
+ # If traceback is printed, keep only meaningful final error line.
497
+ if line == "Traceback (most recent call last):":
498
+ continue
499
+
500
+ if line.startswith("File ") and ", line " in line:
501
+ continue
502
+
503
+ if line.startswith("^"):
504
+ continue
505
+
506
+ lines.append(line)
507
+
508
+ return lines
509
+
510
+
511
+ def _deduplicate_issues(issues):
512
+ """
513
+ Remove duplicates.
514
+
515
+ The same problem can sometimes appear both in mrcfile.validate() text output
516
+ and in captured RuntimeWarning messages.
517
+ """
518
+ unique_issues = []
519
+ seen = set()
520
+
521
+ for issue in issues:
522
+ key = (
523
+ issue.get("code"),
524
+ issue.get("category"),
525
+ issue.get("message"),
526
+ )
527
+
528
+ if key in seen:
529
+ continue
530
+
531
+ seen.add(key)
532
+ unique_issues.append(issue)
533
+
534
+ return unique_issues
535
+
536
+
537
+ def _parse_messages_and_warnings(messages_text, warning_messages):
538
+ issues = []
539
+
540
+ for line in _split_mrcfile_output(messages_text):
541
+ issues.append(_parse_validation_line(line, source="mrcfile.validate"))
542
+
543
+ for warning_message in warning_messages:
544
+ warning_message = warning_message.strip()
545
+ if not warning_message:
546
+ continue
547
+
548
+ issues.append(_parse_validation_line(warning_message, source="RuntimeWarning"))
549
+
550
+ return _deduplicate_issues(issues)
551
+
552
+
553
+
554
+ def validate_single_map(map_input):
555
+ map_path = Path(_normalise_map_input(map_input))
556
+ messages = io.StringIO()
557
+ warning_messages = []
558
+
559
+ result = OrderedDict([
560
+ ("file", str(map_path)),
561
+ ("exists", map_path.exists()),
562
+ ("valid", False),
563
+
564
+ # New structured fields
565
+ ("issue_count", 0),
566
+ ("issues", []),
567
+
568
+ # Keep old raw fields for backward compatibility/debugging
569
+ ("messages", ""),
570
+ ("warnings", []),
571
+ ("error", None),
572
+ ])
573
+
574
+ if not map_path.exists():
575
+ result["error"] = "File does not exist"
576
+ result["issues"] = [
577
+ _make_issue(
578
+ "file_not_found",
579
+ "file.path",
580
+ "error",
581
+ "File does not exist",
582
+ OrderedDict([
583
+ ("path", str(map_path)),
584
+ ]),
585
+ source="validationanalysis",
586
+ )
587
+ ]
588
+ result["issue_count"] = len(result["issues"])
589
+ return result
590
+
591
+ try:
592
+ with warnings.catch_warnings(record=True) as caught_warnings:
593
+ warnings.simplefilter("always", RuntimeWarning)
594
+
595
+ mrcfile_valid = mrcfile.validate(str(map_path), print_file=messages)
596
+
597
+ warning_messages = [
598
+ str(warning.message)
599
+ for warning in caught_warnings
600
+ if str(warning.message).strip()
601
+ ]
602
+
603
+ messages_text = messages.getvalue().strip()
604
+ issues = _parse_messages_and_warnings(messages_text, warning_messages)
605
+ has_error = any(issue.get("severity") == "error" for issue in issues)
606
+
607
+ result["mrcfile_valid"] = mrcfile_valid
608
+ result["no_known_critical_error"] = not has_error
609
+ # result["messages"] = messages_text
610
+ # result["warnings"] = warning_messages
611
+ result["issues"] = issues
612
+ result["issue_count"] = len(issues)
613
+
614
+ except Exception as exc:
615
+ messages_text = messages.getvalue().strip()
616
+ exception_message = str(exc)
617
+
618
+ issues = _parse_messages_and_warnings(messages_text, warning_messages)
619
+ issues.append(
620
+ _make_issue(
621
+ "mrcfile_validate_exception",
622
+ "exception",
623
+ "error",
624
+ exception_message,
625
+ OrderedDict([
626
+ ("exception_type", exc.__class__.__name__),
627
+ ]),
628
+ source="validationanalysis",
629
+ )
630
+ )
631
+
632
+ result["error"] = exception_message
633
+ result["messages"] = messages_text
634
+ result["warnings"] = warning_messages
635
+ result["issues"] = _deduplicate_issues(issues)
636
+ result["issue_count"] = len(result["issues"])
637
+
638
+ return result
639
+
640
+
641
+ def run_map_data_validation(map_inputs):
642
+ map_results = OrderedDict()
643
+
644
+ for map_input in map_inputs:
645
+ if map_input is None:
646
+ continue
647
+
648
+ map_path = _normalise_map_input(map_input)
649
+ map_name = os.path.basename(map_path)
650
+
651
+ # Avoid overwriting when two inputs have the same basename.
652
+ # For example:
653
+ # /path1/map.mrc
654
+ # /path2/map.mrc
655
+ if map_name in map_results:
656
+ map_name = map_path
657
+
658
+ map_results[map_name] = validate_single_map(map_input)
659
+
660
+ return OrderedDict([
661
+ ("data_integrity", OrderedDict([
662
+ ("map_data_integrity", map_results)
663
+ ]))
664
+ ])
@@ -1188,11 +1188,16 @@ class ValidationAnalysis:
1188
1188
  )
1189
1189
 
1190
1190
  with codecs.open(out_json, "w", encoding="utf-8") as f:
1191
- json.dump(result_dict, f)
1191
+ json.dump(
1192
+ result_dict,
1193
+ f,
1194
+ ensure_ascii=False,
1195
+ indent=2
1196
+ )
1192
1197
 
1193
1198
  print("Map data validation results were collected.")
1194
1199
 
1195
- except:
1200
+ except Exception:
1196
1201
  err = "Map data validation error: {}.".format(sys.exc_info()[1])
1197
1202
  sys.stderr.write(err + "\n")
1198
1203
 
@@ -18,5 +18,5 @@ under the License.
18
18
 
19
19
  """
20
20
 
21
- __version__ = '0.0.1.dev137'
21
+ __version__ = '0.0.1.dev139'
22
22
  __em_statistics_version__ = '202505.v01'
@@ -1,59 +0,0 @@
1
- import io
2
- import os
3
- from collections import OrderedDict
4
- from pathlib import Path
5
- import mrcfile
6
-
7
-
8
- def _normalise_map_input(map_input):
9
- if hasattr(map_input, "fullname"):
10
- return map_input.fullname
11
- if hasattr(map_input, "filename"):
12
- return map_input.filename
13
- return str(map_input)
14
-
15
-
16
- def validate_single_map(map_input):
17
- map_path = Path(_normalise_map_input(map_input))
18
- messages = io.StringIO()
19
-
20
- result = OrderedDict([
21
- ("file", str(map_path)),
22
- ("exists", map_path.exists()),
23
- ("valid", False),
24
- ("messages", ""),
25
- ("error", None),
26
- ])
27
-
28
- if not map_path.exists():
29
- result["error"] = "File does not exist"
30
- return result
31
-
32
- try:
33
- valid = mrcfile.validate(str(map_path), print_file=messages)
34
- result["valid"] = bool(valid)
35
- result["messages"] = messages.getvalue().strip()
36
- except Exception as exc:
37
- result["error"] = str(exc)
38
- result["messages"] = messages.getvalue().strip()
39
-
40
- return result
41
-
42
-
43
- def run_map_data_validation(map_inputs):
44
- map_results = OrderedDict()
45
-
46
- for map_input in map_inputs:
47
- if map_input is None:
48
- continue
49
-
50
- map_path = _normalise_map_input(map_input)
51
- map_name = os.path.basename(map_path)
52
-
53
- map_results[map_name] = validate_single_map(map_input)
54
-
55
- return OrderedDict([
56
- ("data_validation", OrderedDict([
57
- ("map_data_validation", map_results)
58
- ]))
59
- ])
File without changes
File without changes
File without changes
File without changes
File without changes