emdbva 0.0.1.dev139__tar.gz → 0.0.1.dev140__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {emdbva-0.0.1.dev139/emdbva.egg-info → emdbva-0.0.1.dev140}/PKG-INFO +1 -1
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140/emdbva.egg-info}/PKG-INFO +1 -1
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/emdbva.egg-info/SOURCES.txt +1 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/mainva.py +3 -3
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/map_data_validation.py +123 -6
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/preparation.py +307 -11
- emdbva-0.0.1.dev140/va/prepareandrun_codon_airflow.py +1170 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/validationanalysis.py +60 -18
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/version.py +1 -1
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/LICENSE +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/MANIFEST.in +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/README.rst +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/emdbva.egg-info/dependency_links.txt +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/emdbva.egg-info/entry_points.txt +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/emdbva.egg-info/requires.txt +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/emdbva.egg-info/top_level.txt +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/setup.cfg +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/setup.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/__init__.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/__init__.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/bars.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/connected_percentage.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/contour_level_predicator.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/emda_mmcc.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/emringer.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/inclusion.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/overlap_percentage.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/phaserandomization.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/phenix_cc.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/phenix_mm.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/projections.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/qscore.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/residue_locres.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/resmap.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/smoc.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/strudel.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/surfaces.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/threedfsc.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/qscores.csv +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/utils/Checker.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/utils/ChimeraxViews.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/utils/MapProcessor.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/utils/Model.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/utils/__init__.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/utils/cl_weights.pth +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/utils/log_utils.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/utils/misc.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/utils/rescolor.py +0 -0
- {emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/utils/stars.py +0 -0
|
@@ -121,9 +121,9 @@ def allruns(validationobj, runs):
|
|
|
121
121
|
if 'symmetry' in runs:
|
|
122
122
|
validationobj.symmetry()
|
|
123
123
|
|
|
124
|
-
# Strudel
|
|
125
|
-
if 'strudel' in runs:
|
|
126
|
-
|
|
124
|
+
# Strudel (turning off)
|
|
125
|
+
# if 'strudel' in runs:
|
|
126
|
+
# validationobj.strudel()
|
|
127
127
|
|
|
128
128
|
# Q-score
|
|
129
129
|
if 'qscore' in runs:
|
|
@@ -59,13 +59,127 @@ def _to_float(value):
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
def _parse_validation_line(line, source="mrcfile.validate"):
|
|
62
|
-
"""
|
|
63
|
-
Parse one line from mrcfile.validate() or one RuntimeWarning.
|
|
64
62
|
|
|
65
|
-
Any message not recognised by the parser is still kept in the JSON as
|
|
66
|
-
"unclassified_mrcfile_validation_message" so future mrcfile versions do
|
|
67
|
-
not silently lose information.
|
|
68
63
|
"""
|
|
64
|
+
Parse one line from ``mrcfile.validate()`` output or one captured
|
|
65
|
+
``RuntimeWarning`` into a structured validation issue.
|
|
66
|
+
The returned issue is created by ``_make_issue()`` and contains:
|
|
67
|
+
code
|
|
68
|
+
Stable machine-readable issue code.
|
|
69
|
+
category
|
|
70
|
+
Header/data/file area affected by the issue.
|
|
71
|
+
severity
|
|
72
|
+
Local VA severity assigned to the issue. This is intentionally
|
|
73
|
+
not always identical to the strict ``mrcfile.validate()`` result.
|
|
74
|
+
VA uses the parsed severity to decide its own pipeline-level
|
|
75
|
+
validity.
|
|
76
|
+
source
|
|
77
|
+
Message source, usually ``"mrcfile.validate"`` or
|
|
78
|
+
``"RuntimeWarning"``.
|
|
79
|
+
message
|
|
80
|
+
Original mrcfile message, preserved verbatim for debugging.
|
|
81
|
+
details
|
|
82
|
+
Parsed values extracted from the message, such as header field
|
|
83
|
+
names, expected values, actual values, byte counts, or exception
|
|
84
|
+
information.
|
|
85
|
+
Severity mapping used by this parser:
|
|
86
|
+
map_id_incorrect
|
|
87
|
+
warning
|
|
88
|
+
The MAP ID header string differs from the expected value.
|
|
89
|
+
map_id_missing_or_corrupt
|
|
90
|
+
warning
|
|
91
|
+
The MAP ID string is absent, suggesting the file may not be an
|
|
92
|
+
MRC file or may be corrupt.
|
|
93
|
+
machine_stamp_invalid
|
|
94
|
+
error
|
|
95
|
+
The machine stamp is invalid.
|
|
96
|
+
machine_stamp_byte_order_mismatch
|
|
97
|
+
warning
|
|
98
|
+
The machine stamp does not match the apparent byte order.
|
|
99
|
+
mode_invalid
|
|
100
|
+
error
|
|
101
|
+
The MRC mode value is invalid.
|
|
102
|
+
mode_unrecognised_data_unreadable
|
|
103
|
+
error
|
|
104
|
+
The MRC mode is unrecognised and the data block cannot be read.
|
|
105
|
+
header_field_negative
|
|
106
|
+
error
|
|
107
|
+
A non-negative integer header field such as nx, ny, nz, mx, my,
|
|
108
|
+
mz, ispg, or nlabl is negative.
|
|
109
|
+
cell_dimension_negative
|
|
110
|
+
error
|
|
111
|
+
One of the cell dimensions x, y, or z is negative.
|
|
112
|
+
axis_mapping_invalid
|
|
113
|
+
error
|
|
114
|
+
The map axis mapping is not the expected [1, 2, 3] permutation.
|
|
115
|
+
volume_stack_dimensions_invalid
|
|
116
|
+
error
|
|
117
|
+
For a volume stack, nz is not divisible by mz.
|
|
118
|
+
header_labels_empty_between_text
|
|
119
|
+
error
|
|
120
|
+
Empty labels appear between labels containing text.
|
|
121
|
+
header_labels_nlabl_mismatch
|
|
122
|
+
error
|
|
123
|
+
The nlabl header value does not match the number of non-empty
|
|
124
|
+
label records.
|
|
125
|
+
mrc_format_version_invalid
|
|
126
|
+
warning
|
|
127
|
+
The nversion field does not declare MRC2014 version 20140 or
|
|
128
|
+
20141.
|
|
129
|
+
extended_header_type_invalid
|
|
130
|
+
error
|
|
131
|
+
The extended-header type is undefined or unrecognised.
|
|
132
|
+
data_statistics_rms_mismatch
|
|
133
|
+
error
|
|
134
|
+
The calculated RMS deviation differs from the header value.
|
|
135
|
+
data_statistics_minimum_mismatch
|
|
136
|
+
error
|
|
137
|
+
The calculated minimum differs from the header value.
|
|
138
|
+
data_statistics_maximum_mismatch
|
|
139
|
+
error
|
|
140
|
+
The calculated maximum differs from the header value.
|
|
141
|
+
data_statistics_mean_mismatch
|
|
142
|
+
error
|
|
143
|
+
The calculated mean differs from the header value.
|
|
144
|
+
file_size_larger_than_expected
|
|
145
|
+
error
|
|
146
|
+
The physical file size is larger than the size calculated from
|
|
147
|
+
the header.
|
|
148
|
+
data_block_unreadable_file_size_not_checked
|
|
149
|
+
error
|
|
150
|
+
The data block could not be read, so file size could not be
|
|
151
|
+
checked.
|
|
152
|
+
extended_header_too_small
|
|
153
|
+
error
|
|
154
|
+
The file contains fewer extended-header bytes than expected.
|
|
155
|
+
data_block_too_small
|
|
156
|
+
error
|
|
157
|
+
The file contains fewer data-block bytes than expected.
|
|
158
|
+
data_block_exceeds_read_limit
|
|
159
|
+
error
|
|
160
|
+
The expected data block exceeds the configured read limit.
|
|
161
|
+
mrc_header_too_small
|
|
162
|
+
error
|
|
163
|
+
The file does not contain enough bytes for a full MRC header.
|
|
164
|
+
exception_during_validation
|
|
165
|
+
error
|
|
166
|
+
A Python exception-like message was emitted during validation.
|
|
167
|
+
unclassified_mrcfile_validation_message
|
|
168
|
+
warning
|
|
169
|
+
Fallback for any unrecognised message. The original message is
|
|
170
|
+
still preserved so new mrcfile messages are not silently lost.
|
|
171
|
+
Notes
|
|
172
|
+
-----
|
|
173
|
+
Some messages that ``mrcfile.validate()`` treats as validation failures
|
|
174
|
+
are intentionally classified as ``warning`` here, for example
|
|
175
|
+
``map_id_incorrect`` and ``mrc_format_version_invalid``. This lets VA
|
|
176
|
+
distinguish strict mrcfile validity from VA's own pipeline-level
|
|
177
|
+
acceptability.
|
|
178
|
+
Unknown messages are kept as
|
|
179
|
+
``unclassified_mrcfile_validation_message`` instead of being discarded.
|
|
180
|
+
This makes the parser forward-compatible with future mrcfile output.
|
|
181
|
+
"""
|
|
182
|
+
|
|
69
183
|
line = line.strip()
|
|
70
184
|
|
|
71
185
|
# 1. MRC map ID/header map field
|
|
@@ -552,6 +666,9 @@ def _parse_messages_and_warnings(messages_text, warning_messages):
|
|
|
552
666
|
|
|
553
667
|
|
|
554
668
|
def validate_single_map(map_input):
|
|
669
|
+
"""
|
|
670
|
+
Validate a single map file using mrcfile.validate() and capture any warnings.
|
|
671
|
+
"""
|
|
555
672
|
map_path = Path(_normalise_map_input(map_input))
|
|
556
673
|
messages = io.StringIO()
|
|
557
674
|
warning_messages = []
|
|
@@ -559,7 +676,7 @@ def validate_single_map(map_input):
|
|
|
559
676
|
result = OrderedDict([
|
|
560
677
|
("file", str(map_path)),
|
|
561
678
|
("exists", map_path.exists()),
|
|
562
|
-
("
|
|
679
|
+
("no_known_critical_error", False),
|
|
563
680
|
|
|
564
681
|
# New structured fields
|
|
565
682
|
("issue_count", 0),
|
|
@@ -294,12 +294,12 @@ class PreParation:
|
|
|
294
294
|
# ----------------- Schema validation helpers -----------------
|
|
295
295
|
|
|
296
296
|
def _schema_file_path(self):
|
|
297
|
-
"""Return the on-disk path to
|
|
298
|
-
# preparation.py
|
|
299
|
-
return os.path.join(os.path.dirname(__file__), 'emdb_entry_full.schema.json')
|
|
297
|
+
"""Return the on-disk path to docs/emdb_entry_full.schema.json."""
|
|
298
|
+
# preparation.py lives under va/; the schema is stored in sibling docs/.
|
|
299
|
+
return os.path.join(os.path.dirname(os.path.dirname(__file__)), 'docs', 'emdb_entry_full.schema.json')
|
|
300
300
|
|
|
301
301
|
def _load_json_schema(self):
|
|
302
|
-
"""Load a JSON Schema from
|
|
302
|
+
"""Load a JSON Schema from docs/emdb_entry_full.schema.json. Returns dict or None."""
|
|
303
303
|
path = self._schema_file_path()
|
|
304
304
|
if not os.path.isfile(path):
|
|
305
305
|
sys.stderr.write(f'[validation] Schema file not found: {path}\n')
|
|
@@ -344,7 +344,7 @@ class PreParation:
|
|
|
344
344
|
|
|
345
345
|
def _validate_merged_entry(self, entry_obj):
|
|
346
346
|
"""
|
|
347
|
-
Validate a merged entry object against the schema in
|
|
347
|
+
Validate a merged entry object against the schema in docs/emdb_entry_full.schema.json.
|
|
348
348
|
|
|
349
349
|
Returns:
|
|
350
350
|
(status: bool, errors: list[str]) where errors is empty if OK.
|
|
@@ -384,7 +384,13 @@ class PreParation:
|
|
|
384
384
|
errs = sorted(validator.iter_errors(entry_obj), key=lambda e: e.path)
|
|
385
385
|
else:
|
|
386
386
|
# Fallback: assume top-level schema expects { "<id>": entry }
|
|
387
|
-
|
|
387
|
+
if getattr(self, 'emdid', None):
|
|
388
|
+
key = str(self.emdid)
|
|
389
|
+
else:
|
|
390
|
+
mapname = getattr(self, 'mapname', None)
|
|
391
|
+
if not mapname:
|
|
392
|
+
return False, ['Validator error: unable to determine output root key (missing emdid and mapname).']
|
|
393
|
+
key = os.path.basename(mapname)
|
|
388
394
|
wrapped = {key: entry_obj}
|
|
389
395
|
validator = Validator(schema)
|
|
390
396
|
errs = sorted(validator.iter_errors(wrapped), key=lambda e: e.path)
|
|
@@ -400,6 +406,281 @@ class PreParation:
|
|
|
400
406
|
|
|
401
407
|
return (len(errors_out) == 0), errors_out
|
|
402
408
|
|
|
409
|
+
def _build_input_json_validation(self, args, argsdata):
|
|
410
|
+
"""Build a lightweight validation report for the raw input.json payload."""
|
|
411
|
+
|
|
412
|
+
issues = []
|
|
413
|
+
required = {
|
|
414
|
+
'inputs.map': False,
|
|
415
|
+
'inputs.workdir': False
|
|
416
|
+
}
|
|
417
|
+
checked_fields = []
|
|
418
|
+
present_optional_fields = []
|
|
419
|
+
missing_optional_fields = []
|
|
420
|
+
|
|
421
|
+
def add_issue(code, severity, path, message):
|
|
422
|
+
issues.append({
|
|
423
|
+
'code': code,
|
|
424
|
+
'severity': severity,
|
|
425
|
+
'path': path,
|
|
426
|
+
'message': message
|
|
427
|
+
})
|
|
428
|
+
|
|
429
|
+
def is_numeric_string(value):
|
|
430
|
+
return isinstance(value, str) and re.match(r'^-?\d+(?:\.\d+)?(?:[eE][-+]?\d+)?$', value) is not None
|
|
431
|
+
|
|
432
|
+
def is_string_list(value):
|
|
433
|
+
return isinstance(value, list) and all(isinstance(item, str) for item in value)
|
|
434
|
+
|
|
435
|
+
def is_number_like(value):
|
|
436
|
+
return isinstance(value, (int, float)) and not isinstance(value, bool)
|
|
437
|
+
|
|
438
|
+
checked_fields.append('$')
|
|
439
|
+
if not isinstance(args, dict):
|
|
440
|
+
add_issue('invalid_root_type', 'error', '$', 'Root JSON value must be an object.')
|
|
441
|
+
inputs_obj = None
|
|
442
|
+
if isinstance(args, dict):
|
|
443
|
+
checked_fields.append('inputs')
|
|
444
|
+
if 'inputs' not in args:
|
|
445
|
+
add_issue('missing_required_key', 'error', 'inputs', 'Required key inputs is missing.')
|
|
446
|
+
elif not isinstance(args['inputs'], dict):
|
|
447
|
+
add_issue('invalid_type', 'error', 'inputs', 'Key inputs must be an object.')
|
|
448
|
+
else:
|
|
449
|
+
inputs_obj = args['inputs']
|
|
450
|
+
|
|
451
|
+
if inputs_obj is None:
|
|
452
|
+
return {
|
|
453
|
+
'data_validation': {
|
|
454
|
+
'input_json_validation': {
|
|
455
|
+
'valid': not any(issue.get('severity') == 'error' for issue in issues),
|
|
456
|
+
'issue_count': len(issues),
|
|
457
|
+
'issues': issues,
|
|
458
|
+
'required': required,
|
|
459
|
+
'checked_fields': checked_fields,
|
|
460
|
+
'present_optional_fields': present_optional_fields,
|
|
461
|
+
'missing_optional_fields': missing_optional_fields
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
checked_fields.append('inputs.map')
|
|
467
|
+
if 'map' in inputs_obj and isinstance(inputs_obj['map'], str):
|
|
468
|
+
required['inputs.map'] = True
|
|
469
|
+
else:
|
|
470
|
+
add_issue(
|
|
471
|
+
'missing_required_key' if 'map' not in inputs_obj else 'invalid_type',
|
|
472
|
+
'error',
|
|
473
|
+
'inputs.map',
|
|
474
|
+
'Required key inputs.map is missing.' if 'map' not in inputs_obj else 'Required key inputs.map must be a string.'
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
checked_fields.append('inputs.workdir')
|
|
478
|
+
if 'workdir' in inputs_obj and isinstance(inputs_obj['workdir'], str):
|
|
479
|
+
required['inputs.workdir'] = True
|
|
480
|
+
else:
|
|
481
|
+
add_issue(
|
|
482
|
+
'missing_required_key' if 'workdir' not in inputs_obj else 'invalid_type',
|
|
483
|
+
'error',
|
|
484
|
+
'inputs.workdir',
|
|
485
|
+
'Required key inputs.workdir is missing.' if 'workdir' not in inputs_obj else 'Required key inputs.workdir must be a string.'
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
optional_string_fields = (
|
|
489
|
+
'inputs.evenmap',
|
|
490
|
+
'inputs.oddmap',
|
|
491
|
+
'inputs.fscfile',
|
|
492
|
+
'inputs.method',
|
|
493
|
+
'inputs.update_resolution_bin_file',
|
|
494
|
+
'inputs.3dfscdir',
|
|
495
|
+
'inputs.strudellib'
|
|
496
|
+
)
|
|
497
|
+
optional_number_fields = ('inputs.contour_level', 'inputs.resolution')
|
|
498
|
+
optional_list_fields = ('inputs.runs', 'inputs.run_exclude')
|
|
499
|
+
optional_bool_fields = ('inputs.modelmap', 'inputs.onlybar')
|
|
500
|
+
optional_object_fields = ('inputs.models', 'inputs.masks')
|
|
501
|
+
|
|
502
|
+
def record_optional_presence(field_name, present):
|
|
503
|
+
checked_fields.append(field_name)
|
|
504
|
+
if present:
|
|
505
|
+
present_optional_fields.append(field_name)
|
|
506
|
+
else:
|
|
507
|
+
missing_optional_fields.append(field_name)
|
|
508
|
+
|
|
509
|
+
for field_name in optional_string_fields:
|
|
510
|
+
key = field_name.split('.', 1)[1]
|
|
511
|
+
if key in inputs_obj:
|
|
512
|
+
value = inputs_obj[key]
|
|
513
|
+
record_optional_presence(field_name, value is not None)
|
|
514
|
+
if value is None:
|
|
515
|
+
continue
|
|
516
|
+
if field_name == 'inputs.method':
|
|
517
|
+
if not isinstance(value, str):
|
|
518
|
+
add_issue(
|
|
519
|
+
'invalid_type',
|
|
520
|
+
'error',
|
|
521
|
+
field_name,
|
|
522
|
+
f'{field_name} must be a string if provided.'
|
|
523
|
+
)
|
|
524
|
+
elif not isinstance(value, str):
|
|
525
|
+
add_issue(
|
|
526
|
+
'invalid_type',
|
|
527
|
+
'warning',
|
|
528
|
+
field_name,
|
|
529
|
+
f'{field_name} must be a string or null.'
|
|
530
|
+
)
|
|
531
|
+
else:
|
|
532
|
+
record_optional_presence(field_name, False)
|
|
533
|
+
|
|
534
|
+
for field_name in optional_number_fields:
|
|
535
|
+
key = field_name.split('.', 1)[1]
|
|
536
|
+
if key in inputs_obj:
|
|
537
|
+
value = inputs_obj[key]
|
|
538
|
+
record_optional_presence(field_name, value is not None)
|
|
539
|
+
if value is None:
|
|
540
|
+
continue
|
|
541
|
+
if not (is_number_like(value) or is_numeric_string(value)):
|
|
542
|
+
add_issue(
|
|
543
|
+
'invalid_type',
|
|
544
|
+
'warning',
|
|
545
|
+
field_name,
|
|
546
|
+
f'{field_name} must be a number, numeric string, or null.'
|
|
547
|
+
)
|
|
548
|
+
else:
|
|
549
|
+
record_optional_presence(field_name, False)
|
|
550
|
+
|
|
551
|
+
for field_name in optional_list_fields:
|
|
552
|
+
key = field_name.split('.', 1)[1]
|
|
553
|
+
if key in inputs_obj:
|
|
554
|
+
value = inputs_obj[key]
|
|
555
|
+
record_optional_presence(field_name, value is not None)
|
|
556
|
+
if value is None:
|
|
557
|
+
continue
|
|
558
|
+
if not (isinstance(value, str) or is_string_list(value)):
|
|
559
|
+
add_issue(
|
|
560
|
+
'invalid_type',
|
|
561
|
+
'warning',
|
|
562
|
+
field_name,
|
|
563
|
+
f'{field_name} must be a string or an array of strings.'
|
|
564
|
+
)
|
|
565
|
+
else:
|
|
566
|
+
record_optional_presence(field_name, False)
|
|
567
|
+
|
|
568
|
+
for field_name in optional_bool_fields:
|
|
569
|
+
key = field_name.split('.', 1)[1]
|
|
570
|
+
if key in inputs_obj:
|
|
571
|
+
value = inputs_obj[key]
|
|
572
|
+
record_optional_presence(field_name, value is not None)
|
|
573
|
+
if value is None:
|
|
574
|
+
continue
|
|
575
|
+
if not (isinstance(value, bool) or (isinstance(value, int) and value in (0, 1))):
|
|
576
|
+
add_issue(
|
|
577
|
+
'invalid_type',
|
|
578
|
+
'warning',
|
|
579
|
+
field_name,
|
|
580
|
+
f'{field_name} must be a boolean or 0/1 integer.'
|
|
581
|
+
)
|
|
582
|
+
else:
|
|
583
|
+
record_optional_presence(field_name, False)
|
|
584
|
+
|
|
585
|
+
if 'platform' in inputs_obj:
|
|
586
|
+
platform_value = inputs_obj['platform']
|
|
587
|
+
record_optional_presence('inputs.platform', platform_value is not None)
|
|
588
|
+
if platform_value is None:
|
|
589
|
+
pass
|
|
590
|
+
elif platform_value not in ('emdb', 'wwpdb'):
|
|
591
|
+
add_issue(
|
|
592
|
+
'invalid_value',
|
|
593
|
+
'warning',
|
|
594
|
+
'inputs.platform',
|
|
595
|
+
'inputs.platform must be "emdb" or "wwpdb".'
|
|
596
|
+
)
|
|
597
|
+
else:
|
|
598
|
+
record_optional_presence('inputs.platform', False)
|
|
599
|
+
|
|
600
|
+
for field_name in optional_object_fields:
|
|
601
|
+
key = field_name.split('.', 1)[1]
|
|
602
|
+
if key not in inputs_obj or inputs_obj[key] is None:
|
|
603
|
+
record_optional_presence(field_name, False)
|
|
604
|
+
continue
|
|
605
|
+
value = inputs_obj[key]
|
|
606
|
+
record_optional_presence(field_name, True)
|
|
607
|
+
if not isinstance(value, dict):
|
|
608
|
+
add_issue(
|
|
609
|
+
'invalid_type',
|
|
610
|
+
'error',
|
|
611
|
+
field_name,
|
|
612
|
+
f'{field_name} must be an object keyed by arbitrary IDs.'
|
|
613
|
+
)
|
|
614
|
+
continue
|
|
615
|
+
if field_name == 'inputs.models':
|
|
616
|
+
for model_id, model_entry in value.items():
|
|
617
|
+
model_path = f'inputs.models.{model_id}'
|
|
618
|
+
checked_fields.append(model_path)
|
|
619
|
+
if not isinstance(model_entry, dict):
|
|
620
|
+
add_issue(
|
|
621
|
+
'invalid_type',
|
|
622
|
+
'error',
|
|
623
|
+
model_path,
|
|
624
|
+
f'{model_path} must be an object.'
|
|
625
|
+
)
|
|
626
|
+
continue
|
|
627
|
+
if 'name' not in model_entry:
|
|
628
|
+
add_issue(
|
|
629
|
+
'missing_required_key',
|
|
630
|
+
'error',
|
|
631
|
+
f'{model_path}.name',
|
|
632
|
+
f'Required key {model_path}.name is missing.'
|
|
633
|
+
)
|
|
634
|
+
elif not isinstance(model_entry['name'], str):
|
|
635
|
+
add_issue(
|
|
636
|
+
'invalid_type',
|
|
637
|
+
'error',
|
|
638
|
+
f'{model_path}.name',
|
|
639
|
+
f'{model_path}.name must be a string.'
|
|
640
|
+
)
|
|
641
|
+
else:
|
|
642
|
+
for mask_id, mask_entry in value.items():
|
|
643
|
+
mask_path = f'inputs.masks.{mask_id}'
|
|
644
|
+
checked_fields.append(mask_path)
|
|
645
|
+
if not isinstance(mask_entry, dict):
|
|
646
|
+
add_issue(
|
|
647
|
+
'invalid_type',
|
|
648
|
+
'error',
|
|
649
|
+
mask_path,
|
|
650
|
+
f'{mask_path} must be an object.'
|
|
651
|
+
)
|
|
652
|
+
continue
|
|
653
|
+
if 'name' in mask_entry and not isinstance(mask_entry['name'], str):
|
|
654
|
+
add_issue(
|
|
655
|
+
'invalid_type',
|
|
656
|
+
'error',
|
|
657
|
+
f'{mask_path}.name',
|
|
658
|
+
f'{mask_path}.name must be a string.'
|
|
659
|
+
)
|
|
660
|
+
if 'contour' in mask_entry:
|
|
661
|
+
contour_value = mask_entry['contour']
|
|
662
|
+
if contour_value is not None and not (is_number_like(contour_value) or is_numeric_string(contour_value)):
|
|
663
|
+
add_issue(
|
|
664
|
+
'invalid_type',
|
|
665
|
+
'warning',
|
|
666
|
+
f'{mask_path}.contour',
|
|
667
|
+
f'{mask_path}.contour must be a number, numeric string, or null.'
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
return {
|
|
671
|
+
'input_json_validation': {
|
|
672
|
+
'input_json_validation': {
|
|
673
|
+
'valid': not any(issue.get('severity') == 'error' for issue in issues),
|
|
674
|
+
'issue_count': len(issues),
|
|
675
|
+
'issues': issues,
|
|
676
|
+
'required': required,
|
|
677
|
+
'checked_fields': checked_fields,
|
|
678
|
+
'present_optional_fields': present_optional_fields,
|
|
679
|
+
'missing_optional_fields': missing_optional_fields
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
|
|
403
684
|
def findfscxml(self):
|
|
404
685
|
"""Finds the `fsc.xml` file in the working directory if it exists.
|
|
405
686
|
|
|
@@ -457,7 +738,25 @@ class PreParation:
|
|
|
457
738
|
if injson:
|
|
458
739
|
with open(injson, 'r') as f:
|
|
459
740
|
args = json.load(f)
|
|
460
|
-
|
|
741
|
+
try:
|
|
742
|
+
argsdata = args['inputs']
|
|
743
|
+
except Exception:
|
|
744
|
+
validation = self._build_input_json_validation(args, None)
|
|
745
|
+
validation_dir = getattr(self, 'vadir', None) or (os.getcwd() + '/')
|
|
746
|
+
validation_name = f'{getattr(self, "mapname", None)}_input_json_validation.json' if getattr(self, 'mapname', None) else 'input_json_validation.json'
|
|
747
|
+
out_json(validation, f'{validation_dir}{validation_name}')
|
|
748
|
+
raise
|
|
749
|
+
|
|
750
|
+
validation = self._build_input_json_validation(args, argsdata)
|
|
751
|
+
validation_dir = getattr(self, 'vadir', None)
|
|
752
|
+
if validation_dir is None and isinstance(argsdata, dict):
|
|
753
|
+
maybe_workdir = argsdata.get('workdir')
|
|
754
|
+
if isinstance(maybe_workdir, str) and maybe_workdir:
|
|
755
|
+
validation_dir = maybe_workdir.rstrip('/') + '/'
|
|
756
|
+
if validation_dir is None:
|
|
757
|
+
validation_dir = os.getcwd() + '/'
|
|
758
|
+
validation_name = f'{getattr(self, "mapname", None)}_input_json_validation.json' if getattr(self, 'mapname', None) else 'input_json_validation.json'
|
|
759
|
+
out_json(validation, f'{validation_dir}{validation_name}')
|
|
461
760
|
map = argsdata['map']
|
|
462
761
|
assert map is not None, "There must be a map needed in the input JSON file."
|
|
463
762
|
assert argsdata['workdir'] is not None, "Working directory must be provided in the input JSON file."
|
|
@@ -2205,7 +2504,7 @@ class PreParation:
|
|
|
2205
2504
|
|
|
2206
2505
|
fuldata = self.merge_json_files(jsonfiles)
|
|
2207
2506
|
|
|
2208
|
-
# ---------- NEW: Validate against schema in
|
|
2507
|
+
# ---------- NEW: Validate against schema in docs/emdb_entry_full.schema.json ----------
|
|
2209
2508
|
status, errors = self._validate_merged_entry(fuldata)
|
|
2210
2509
|
validation_block = {
|
|
2211
2510
|
'status': bool(status),
|
|
@@ -2407,6 +2706,3 @@ class PreParation:
|
|
|
2407
2706
|
else:
|
|
2408
2707
|
print('No memory data available for prediction yet')
|
|
2409
2708
|
return None
|
|
2410
|
-
|
|
2411
|
-
|
|
2412
|
-
|