PyPI - emdbva - Versions diffs - 0.0.1.dev139__tar.gz → 0.0.1.dev140__tar.gz - Mend

emdbva 0.0.1.dev139tar.gz → 0.0.1.dev140tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

{emdbva-0.0.1.dev139/emdbva.egg-info → emdbva-0.0.1.dev140}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: emdbva
-Version: 0.0.1.dev139
+Version: 0.0.1.dev140
 Summary: CryoEM validation toolkit
 Home-page: https://test.pypi.org/project/va/
 Author: Zhe Wang

{emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140/emdbva.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: emdbva
-Version: 0.0.1.dev139
+Version: 0.0.1.dev140
 Summary: CryoEM validation toolkit
 Home-page: https://test.pypi.org/project/va/
 Author: Zhe Wang

{emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/emdbva.egg-info/SOURCES.txt RENAMED Viewed

@@ -11,6 +11,7 @@ emdbva.egg-info/top_level.txt
 va/__init__.py
 va/mainva.py
 va/preparation.py
+va/prepareandrun_codon_airflow.py
 va/qscores.csv
 va/validationanalysis.py
 va/version.py

{emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/mainva.py RENAMED Viewed

@@ -121,9 +121,9 @@ def allruns(validationobj, runs):
     if 'symmetry' in runs:
         validationobj.symmetry()
-    # Strudel
-    if 'strudel' in runs:
-        validationobj.strudel()
+    # Strudel (turning off)
+    # if 'strudel' in runs:
+    #     validationobj.strudel()
     # Q-score
     if 'qscore' in runs:

{emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/metrics/map_data_validation.py RENAMED Viewed

@@ -59,13 +59,127 @@ def _to_float(value):
 def _parse_validation_line(line, source="mrcfile.validate"):
-    """
-    Parse one line from mrcfile.validate() or one RuntimeWarning.
-    Any message not recognised by the parser is still kept in the JSON as
-    "unclassified_mrcfile_validation_message" so future mrcfile versions do
-    not silently lose information.
     """
+    Parse one line from ``mrcfile.validate()`` output or one captured
+    ``RuntimeWarning`` into a structured validation issue.
+    The returned issue is created by ``_make_issue()`` and contains:
+        code
+            Stable machine-readable issue code.
+        category
+            Header/data/file area affected by the issue.
+        severity
+            Local VA severity assigned to the issue. This is intentionally
+            not always identical to the strict ``mrcfile.validate()`` result.
+            VA uses the parsed severity to decide its own pipeline-level
+            validity.
+        source
+            Message source, usually ``"mrcfile.validate"`` or
+            ``"RuntimeWarning"``.
+        message
+            Original mrcfile message, preserved verbatim for debugging.
+        details
+            Parsed values extracted from the message, such as header field
+            names, expected values, actual values, byte counts, or exception
+            information.
+    Severity mapping used by this parser:
+        map_id_incorrect
+            warning
+            The MAP ID header string differs from the expected value.
+        map_id_missing_or_corrupt
+            warning
+            The MAP ID string is absent, suggesting the file may not be an
+            MRC file or may be corrupt.
+        machine_stamp_invalid
+            error
+            The machine stamp is invalid.
+        machine_stamp_byte_order_mismatch
+            warning
+            The machine stamp does not match the apparent byte order.
+        mode_invalid
+            error
+            The MRC mode value is invalid.
+        mode_unrecognised_data_unreadable
+            error
+            The MRC mode is unrecognised and the data block cannot be read.
+        header_field_negative
+            error
+            A non-negative integer header field such as nx, ny, nz, mx, my,
+            mz, ispg, or nlabl is negative.
+        cell_dimension_negative
+            error
+            One of the cell dimensions x, y, or z is negative.
+        axis_mapping_invalid
+            error
+            The map axis mapping is not the expected [1, 2, 3] permutation.
+        volume_stack_dimensions_invalid
+            error
+            For a volume stack, nz is not divisible by mz.
+        header_labels_empty_between_text
+            error
+            Empty labels appear between labels containing text.
+        header_labels_nlabl_mismatch
+            error
+            The nlabl header value does not match the number of non-empty
+            label records.
+        mrc_format_version_invalid
+            warning
+            The nversion field does not declare MRC2014 version 20140 or
+            20141.
+        extended_header_type_invalid
+            error
+            The extended-header type is undefined or unrecognised.
+        data_statistics_rms_mismatch
+            error
+            The calculated RMS deviation differs from the header value.
+        data_statistics_minimum_mismatch
+            error
+            The calculated minimum differs from the header value.
+        data_statistics_maximum_mismatch
+            error
+            The calculated maximum differs from the header value.
+        data_statistics_mean_mismatch
+            error
+            The calculated mean differs from the header value.
+        file_size_larger_than_expected
+            error
+            The physical file size is larger than the size calculated from
+            the header.
+        data_block_unreadable_file_size_not_checked
+            error
+            The data block could not be read, so file size could not be
+            checked.
+        extended_header_too_small
+            error
+            The file contains fewer extended-header bytes than expected.
+        data_block_too_small
+            error
+            The file contains fewer data-block bytes than expected.
+        data_block_exceeds_read_limit
+            error
+            The expected data block exceeds the configured read limit.
+        mrc_header_too_small
+            error
+            The file does not contain enough bytes for a full MRC header.
+        exception_during_validation
+            error
+            A Python exception-like message was emitted during validation.
+        unclassified_mrcfile_validation_message
+            warning
+            Fallback for any unrecognised message. The original message is
+            still preserved so new mrcfile messages are not silently lost.
+    Notes
+    -----
+    Some messages that ``mrcfile.validate()`` treats as validation failures
+    are intentionally classified as ``warning`` here, for example
+    ``map_id_incorrect`` and ``mrc_format_version_invalid``. This lets VA
+    distinguish strict mrcfile validity from VA's own pipeline-level
+    acceptability.
+    Unknown messages are kept as
+    ``unclassified_mrcfile_validation_message`` instead of being discarded.
+    This makes the parser forward-compatible with future mrcfile output.
+    """
     line = line.strip()
     # 1. MRC map ID/header map field
@@ -552,6 +666,9 @@ def _parse_messages_and_warnings(messages_text, warning_messages):
 def validate_single_map(map_input):
+    """
+    Validate a single map file using mrcfile.validate() and capture any warnings.
+    """
     map_path = Path(_normalise_map_input(map_input))
     messages = io.StringIO()
     warning_messages = []
@@ -559,7 +676,7 @@ def validate_single_map(map_input):
     result = OrderedDict([
         ("file", str(map_path)),
         ("exists", map_path.exists()),
-        ("valid", False),
+        ("no_known_critical_error", False),
         # New structured fields
         ("issue_count", 0),

{emdbva-0.0.1.dev139 → emdbva-0.0.1.dev140}/va/preparation.py RENAMED Viewed

@@ -294,12 +294,12 @@ class PreParation:
     # ----------------- Schema validation helpers -----------------
     def _schema_file_path(self):
-        """Return the on-disk path to va/emdb_entry_full.schema.json."""
-        # preparation.py is inside package 'va'; schema file sits next to it.
-        return os.path.join(os.path.dirname(__file__), 'emdb_entry_full.schema.json')
+        """Return the on-disk path to docs/emdb_entry_full.schema.json."""
+        # preparation.py lives under va/; the schema is stored in sibling docs/.
+        return os.path.join(os.path.dirname(os.path.dirname(__file__)), 'docs', 'emdb_entry_full.schema.json')
     def _load_json_schema(self):
-        """Load a JSON Schema from va/emdb_entry_full.schema.json. Returns dict or None."""
+        """Load a JSON Schema from docs/emdb_entry_full.schema.json. Returns dict or None."""
         path = self._schema_file_path()
         if not os.path.isfile(path):
             sys.stderr.write(f'[validation] Schema file not found: {path}\n')
@@ -344,7 +344,7 @@ class PreParation:
     def _validate_merged_entry(self, entry_obj):
         """
-        Validate a merged entry object against the schema in va/emdb_entry_full.schema.json.
+        Validate a merged entry object against the schema in docs/emdb_entry_full.schema.json.
         Returns:
           (status: bool, errors: list[str]) where errors is empty if OK.
@@ -384,7 +384,13 @@ class PreParation:
                 errs = sorted(validator.iter_errors(entry_obj), key=lambda e: e.path)
             else:
                 # Fallback: assume top-level schema expects { "<id>": entry }
-                key = str(self.emdid) if getattr(self, 'emdid', None) else 'ENTRY'
+                if getattr(self, 'emdid', None):
+                    key = str(self.emdid)
+                else:
+                    mapname = getattr(self, 'mapname', None)
+                    if not mapname:
+                        return False, ['Validator error: unable to determine output root key (missing emdid and mapname).']
+                    key = os.path.basename(mapname)
                 wrapped = {key: entry_obj}
                 validator = Validator(schema)
                 errs = sorted(validator.iter_errors(wrapped), key=lambda e: e.path)
@@ -400,6 +406,281 @@ class PreParation:
         return (len(errors_out) == 0), errors_out
+    def _build_input_json_validation(self, args, argsdata):
+        """Build a lightweight validation report for the raw input.json payload."""
+        issues = []
+        required = {
+            'inputs.map': False,
+            'inputs.workdir': False
+        }
+        checked_fields = []
+        present_optional_fields = []
+        missing_optional_fields = []
+        def add_issue(code, severity, path, message):
+            issues.append({
+                'code': code,
+                'severity': severity,
+                'path': path,
+                'message': message
+            })
+        def is_numeric_string(value):
+            return isinstance(value, str) and re.match(r'^-?\d+(?:\.\d+)?(?:[eE][-+]?\d+)?$', value) is not None
+        def is_string_list(value):
+            return isinstance(value, list) and all(isinstance(item, str) for item in value)
+        def is_number_like(value):
+            return isinstance(value, (int, float)) and not isinstance(value, bool)
+        checked_fields.append('$')
+        if not isinstance(args, dict):
+            add_issue('invalid_root_type', 'error', '$', 'Root JSON value must be an object.')
+        inputs_obj = None
+        if isinstance(args, dict):
+            checked_fields.append('inputs')
+            if 'inputs' not in args:
+                add_issue('missing_required_key', 'error', 'inputs', 'Required key inputs is missing.')
+            elif not isinstance(args['inputs'], dict):
+                add_issue('invalid_type', 'error', 'inputs', 'Key inputs must be an object.')
+            else:
+                inputs_obj = args['inputs']
+        if inputs_obj is None:
+            return {
+                'data_validation': {
+                    'input_json_validation': {
+                        'valid': not any(issue.get('severity') == 'error' for issue in issues),
+                        'issue_count': len(issues),
+                        'issues': issues,
+                        'required': required,
+                        'checked_fields': checked_fields,
+                        'present_optional_fields': present_optional_fields,
+                        'missing_optional_fields': missing_optional_fields
+                    }
+                }
+            }
+        checked_fields.append('inputs.map')
+        if 'map' in inputs_obj and isinstance(inputs_obj['map'], str):
+            required['inputs.map'] = True
+        else:
+            add_issue(
+                'missing_required_key' if 'map' not in inputs_obj else 'invalid_type',
+                'error',
+                'inputs.map',
+                'Required key inputs.map is missing.' if 'map' not in inputs_obj else 'Required key inputs.map must be a string.'
+            )
+        checked_fields.append('inputs.workdir')
+        if 'workdir' in inputs_obj and isinstance(inputs_obj['workdir'], str):
+            required['inputs.workdir'] = True
+        else:
+            add_issue(
+                'missing_required_key' if 'workdir' not in inputs_obj else 'invalid_type',
+                'error',
+                'inputs.workdir',
+                'Required key inputs.workdir is missing.' if 'workdir' not in inputs_obj else 'Required key inputs.workdir must be a string.'
+            )
+        optional_string_fields = (
+            'inputs.evenmap',
+            'inputs.oddmap',
+            'inputs.fscfile',
+            'inputs.method',
+            'inputs.update_resolution_bin_file',
+            'inputs.3dfscdir',
+            'inputs.strudellib'
+        )
+        optional_number_fields = ('inputs.contour_level', 'inputs.resolution')
+        optional_list_fields = ('inputs.runs', 'inputs.run_exclude')
+        optional_bool_fields = ('inputs.modelmap', 'inputs.onlybar')
+        optional_object_fields = ('inputs.models', 'inputs.masks')
+        def record_optional_presence(field_name, present):
+            checked_fields.append(field_name)
+            if present:
+                present_optional_fields.append(field_name)
+            else:
+                missing_optional_fields.append(field_name)
+        for field_name in optional_string_fields:
+            key = field_name.split('.', 1)[1]
+            if key in inputs_obj:
+                value = inputs_obj[key]
+                record_optional_presence(field_name, value is not None)
+                if value is None:
+                    continue
+                if field_name == 'inputs.method':
+                    if not isinstance(value, str):
+                        add_issue(
+                            'invalid_type',
+                            'error',
+                            field_name,
+                            f'{field_name} must be a string if provided.'
+                        )
+                elif not isinstance(value, str):
+                    add_issue(
+                        'invalid_type',
+                        'warning',
+                        field_name,
+                        f'{field_name} must be a string or null.'
+                    )
+            else:
+                record_optional_presence(field_name, False)
+        for field_name in optional_number_fields:
+            key = field_name.split('.', 1)[1]
+            if key in inputs_obj:
+                value = inputs_obj[key]
+                record_optional_presence(field_name, value is not None)
+                if value is None:
+                    continue
+                if not (is_number_like(value) or is_numeric_string(value)):
+                    add_issue(
+                        'invalid_type',
+                        'warning',
+                        field_name,
+                        f'{field_name} must be a number, numeric string, or null.'
+                    )
+            else:
+                record_optional_presence(field_name, False)
+        for field_name in optional_list_fields:
+            key = field_name.split('.', 1)[1]
+            if key in inputs_obj:
+                value = inputs_obj[key]
+                record_optional_presence(field_name, value is not None)
+                if value is None:
+                    continue
+                if not (isinstance(value, str) or is_string_list(value)):
+                    add_issue(
+                        'invalid_type',
+                        'warning',
+                        field_name,
+                        f'{field_name} must be a string or an array of strings.'
+                    )
+            else:
+                record_optional_presence(field_name, False)
+        for field_name in optional_bool_fields:
+            key = field_name.split('.', 1)[1]
+            if key in inputs_obj:
+                value = inputs_obj[key]
+                record_optional_presence(field_name, value is not None)
+                if value is None:
+                    continue
+                if not (isinstance(value, bool) or (isinstance(value, int) and value in (0, 1))):
+                    add_issue(
+                        'invalid_type',
+                        'warning',
+                        field_name,
+                        f'{field_name} must be a boolean or 0/1 integer.'
+                    )
+            else:
+                record_optional_presence(field_name, False)
+        if 'platform' in inputs_obj:
+            platform_value = inputs_obj['platform']
+            record_optional_presence('inputs.platform', platform_value is not None)
+            if platform_value is None:
+                pass
+            elif platform_value not in ('emdb', 'wwpdb'):
+                add_issue(
+                    'invalid_value',
+                    'warning',
+                    'inputs.platform',
+                    'inputs.platform must be "emdb" or "wwpdb".'
+                )
+        else:
+            record_optional_presence('inputs.platform', False)
+        for field_name in optional_object_fields:
+            key = field_name.split('.', 1)[1]
+            if key not in inputs_obj or inputs_obj[key] is None:
+                record_optional_presence(field_name, False)
+                continue
+            value = inputs_obj[key]
+            record_optional_presence(field_name, True)
+            if not isinstance(value, dict):
+                add_issue(
+                    'invalid_type',
+                    'error',
+                    field_name,
+                    f'{field_name} must be an object keyed by arbitrary IDs.'
+                )
+                continue
+            if field_name == 'inputs.models':
+                for model_id, model_entry in value.items():
+                    model_path = f'inputs.models.{model_id}'
+                    checked_fields.append(model_path)
+                    if not isinstance(model_entry, dict):
+                        add_issue(
+                            'invalid_type',
+                            'error',
+                            model_path,
+                            f'{model_path} must be an object.'
+                        )
+                        continue
+                    if 'name' not in model_entry:
+                        add_issue(
+                            'missing_required_key',
+                            'error',
+                            f'{model_path}.name',
+                            f'Required key {model_path}.name is missing.'
+                        )
+                    elif not isinstance(model_entry['name'], str):
+                        add_issue(
+                            'invalid_type',
+                            'error',
+                            f'{model_path}.name',
+                            f'{model_path}.name must be a string.'
+                        )
+            else:
+                for mask_id, mask_entry in value.items():
+                    mask_path = f'inputs.masks.{mask_id}'
+                    checked_fields.append(mask_path)
+                    if not isinstance(mask_entry, dict):
+                        add_issue(
+                            'invalid_type',
+                            'error',
+                            mask_path,
+                            f'{mask_path} must be an object.'
+                        )
+                        continue
+                    if 'name' in mask_entry and not isinstance(mask_entry['name'], str):
+                        add_issue(
+                            'invalid_type',
+                            'error',
+                            f'{mask_path}.name',
+                            f'{mask_path}.name must be a string.'
+                        )
+                    if 'contour' in mask_entry:
+                        contour_value = mask_entry['contour']
+                        if contour_value is not None and not (is_number_like(contour_value) or is_numeric_string(contour_value)):
+                            add_issue(
+                                'invalid_type',
+                                'warning',
+                                f'{mask_path}.contour',
+                                f'{mask_path}.contour must be a number, numeric string, or null.'
+                            )
+        return {
+            'input_json_validation': {
+                'input_json_validation': {
+                    'valid': not any(issue.get('severity') == 'error' for issue in issues),
+                    'issue_count': len(issues),
+                    'issues': issues,
+                    'required': required,
+                    'checked_fields': checked_fields,
+                    'present_optional_fields': present_optional_fields,
+                    'missing_optional_fields': missing_optional_fields
+                }
+            }
+        }
     def findfscxml(self):
         """Finds the `fsc.xml` file in the working directory if it exists.
@@ -457,7 +738,25 @@ class PreParation:
         if injson:
             with open(injson, 'r') as f:
                 args = json.load(f)
-            argsdata = args['inputs']
+            try:
+                argsdata = args['inputs']
+            except Exception:
+                validation = self._build_input_json_validation(args, None)
+                validation_dir = getattr(self, 'vadir', None) or (os.getcwd() + '/')
+                validation_name = f'{getattr(self, "mapname", None)}_input_json_validation.json' if getattr(self, 'mapname', None) else 'input_json_validation.json'
+                out_json(validation, f'{validation_dir}{validation_name}')
+                raise
+            validation = self._build_input_json_validation(args, argsdata)
+            validation_dir = getattr(self, 'vadir', None)
+            if validation_dir is None and isinstance(argsdata, dict):
+                maybe_workdir = argsdata.get('workdir')
+                if isinstance(maybe_workdir, str) and maybe_workdir:
+                    validation_dir = maybe_workdir.rstrip('/') + '/'
+            if validation_dir is None:
+                validation_dir = os.getcwd() + '/'
+            validation_name = f'{getattr(self, "mapname", None)}_input_json_validation.json' if getattr(self, 'mapname', None) else 'input_json_validation.json'
+            out_json(validation, f'{validation_dir}{validation_name}')
             map = argsdata['map']
             assert map is not None, "There must be a map needed in the input JSON file."
             assert argsdata['workdir'] is not None, "Working directory must be provided in the input JSON file."
@@ -2205,7 +2504,7 @@ class PreParation:
         fuldata = self.merge_json_files(jsonfiles)
-        # ---------- NEW: Validate against schema in va/emdb_entry_full.schema.json ----------
+        # ---------- NEW: Validate against schema in docs/emdb_entry_full.schema.json ----------
         status, errors = self._validate_merged_entry(fuldata)
         validation_block = {
             'status': bool(status),
@@ -2407,6 +2706,3 @@ class PreParation:
         else:
             print('No memory data available for prediction yet')
             return None

emdbva 0.0.1.dev139__tar.gz → 0.0.1.dev140__tar.gz

emdbva 0.0.1.dev139tar.gz → 0.0.1.dev140tar.gz