PyPI - modacor - Versions diffs - 1.0.0__py3-none-any.whl - Mend

modacor 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

modacor/__init__.py +30 -0
modacor/dataclasses/__init__.py +0 -0
modacor/dataclasses/basedata.py +973 -0
modacor/dataclasses/databundle.py +23 -0
modacor/dataclasses/helpers.py +45 -0
modacor/dataclasses/messagehandler.py +75 -0
modacor/dataclasses/process_step.py +233 -0
modacor/dataclasses/process_step_describer.py +146 -0
modacor/dataclasses/processing_data.py +59 -0
modacor/dataclasses/trace_event.py +118 -0
modacor/dataclasses/uncertainty_tools.py +132 -0
modacor/dataclasses/validators.py +84 -0
modacor/debug/pipeline_tracer.py +548 -0
modacor/io/__init__.py +33 -0
modacor/io/csv/__init__.py +0 -0
modacor/io/csv/csv_sink.py +114 -0
modacor/io/csv/csv_source.py +210 -0
modacor/io/hdf/__init__.py +27 -0
modacor/io/hdf/hdf_source.py +120 -0
modacor/io/io_sink.py +41 -0
modacor/io/io_sinks.py +61 -0
modacor/io/io_source.py +164 -0
modacor/io/io_sources.py +208 -0
modacor/io/processing_path.py +113 -0
modacor/io/tiled/__init__.py +16 -0
modacor/io/tiled/tiled_source.py +403 -0
modacor/io/yaml/__init__.py +27 -0
modacor/io/yaml/yaml_source.py +116 -0
modacor/modules/__init__.py +53 -0
modacor/modules/base_modules/__init__.py +0 -0
modacor/modules/base_modules/append_processing_data.py +329 -0
modacor/modules/base_modules/append_sink.py +141 -0
modacor/modules/base_modules/append_source.py +181 -0
modacor/modules/base_modules/bitwise_or_masks.py +113 -0
modacor/modules/base_modules/combine_uncertainties.py +120 -0
modacor/modules/base_modules/combine_uncertainties_max.py +105 -0
modacor/modules/base_modules/divide.py +82 -0
modacor/modules/base_modules/find_scale_factor1d.py +373 -0
modacor/modules/base_modules/multiply.py +77 -0
modacor/modules/base_modules/multiply_databundles.py +73 -0
modacor/modules/base_modules/poisson_uncertainties.py +69 -0
modacor/modules/base_modules/reduce_dimensionality.py +252 -0
modacor/modules/base_modules/sink_processing_data.py +80 -0
modacor/modules/base_modules/subtract.py +80 -0
modacor/modules/base_modules/subtract_databundles.py +67 -0
modacor/modules/base_modules/units_label_update.py +66 -0
modacor/modules/instrument_modules/__init__.py +0 -0
modacor/modules/instrument_modules/readme.md +9 -0
modacor/modules/technique_modules/__init__.py +0 -0
modacor/modules/technique_modules/scattering/__init__.py +0 -0
modacor/modules/technique_modules/scattering/geometry_helpers.py +114 -0
modacor/modules/technique_modules/scattering/index_pixels.py +492 -0
modacor/modules/technique_modules/scattering/indexed_averager.py +628 -0
modacor/modules/technique_modules/scattering/pixel_coordinates_3d.py +417 -0
modacor/modules/technique_modules/scattering/solid_angle_correction.py +63 -0
modacor/modules/technique_modules/scattering/xs_geometry.py +571 -0
modacor/modules/technique_modules/scattering/xs_geometry_from_pixel_coordinates.py +293 -0
modacor/runner/__init__.py +0 -0
modacor/runner/pipeline.py +749 -0
modacor/runner/process_step_registry.py +224 -0
modacor/tests/__init__.py +27 -0
modacor/tests/dataclasses/test_basedata.py +519 -0
modacor/tests/dataclasses/test_basedata_operations.py +439 -0
modacor/tests/dataclasses/test_basedata_to_base_units.py +57 -0
modacor/tests/dataclasses/test_process_step_describer.py +73 -0
modacor/tests/dataclasses/test_processstep.py +282 -0
modacor/tests/debug/test_tracing_integration.py +188 -0
modacor/tests/integration/__init__.py +0 -0
modacor/tests/integration/test_pipeline_run.py +238 -0
modacor/tests/io/__init__.py +27 -0
modacor/tests/io/csv/__init__.py +0 -0
modacor/tests/io/csv/test_csv_source.py +156 -0
modacor/tests/io/hdf/__init__.py +27 -0
modacor/tests/io/hdf/test_hdf_source.py +92 -0
modacor/tests/io/test_io_sources.py +119 -0
modacor/tests/io/tiled/__init__.py +12 -0
modacor/tests/io/tiled/test_tiled_source.py +120 -0
modacor/tests/io/yaml/__init__.py +27 -0
modacor/tests/io/yaml/static_data_example.yaml +26 -0
modacor/tests/io/yaml/test_yaml_source.py +47 -0
modacor/tests/modules/__init__.py +27 -0
modacor/tests/modules/base_modules/__init__.py +27 -0
modacor/tests/modules/base_modules/test_append_processing_data.py +219 -0
modacor/tests/modules/base_modules/test_append_sink.py +76 -0
modacor/tests/modules/base_modules/test_append_source.py +180 -0
modacor/tests/modules/base_modules/test_bitwise_or_masks.py +264 -0
modacor/tests/modules/base_modules/test_combine_uncertainties.py +105 -0
modacor/tests/modules/base_modules/test_combine_uncertainties_max.py +109 -0
modacor/tests/modules/base_modules/test_divide.py +140 -0
modacor/tests/modules/base_modules/test_find_scale_factor1d.py +220 -0
modacor/tests/modules/base_modules/test_multiply.py +113 -0
modacor/tests/modules/base_modules/test_multiply_databundles.py +136 -0
modacor/tests/modules/base_modules/test_poisson_uncertainties.py +61 -0
modacor/tests/modules/base_modules/test_reduce_dimensionality.py +358 -0
modacor/tests/modules/base_modules/test_sink_processing_data.py +119 -0
modacor/tests/modules/base_modules/test_subtract.py +111 -0
modacor/tests/modules/base_modules/test_subtract_databundles.py +136 -0
modacor/tests/modules/base_modules/test_units_label_update.py +91 -0
modacor/tests/modules/technique_modules/__init__.py +0 -0
modacor/tests/modules/technique_modules/scattering/__init__.py +0 -0
modacor/tests/modules/technique_modules/scattering/test_geometry_helpers.py +198 -0
modacor/tests/modules/technique_modules/scattering/test_index_pixels.py +426 -0
modacor/tests/modules/technique_modules/scattering/test_indexed_averaging.py +559 -0
modacor/tests/modules/technique_modules/scattering/test_pixel_coordinates_3d.py +282 -0
modacor/tests/modules/technique_modules/scattering/test_xs_geometry_from_pixel_coordinates.py +224 -0
modacor/tests/modules/technique_modules/scattering/test_xsgeometry.py +635 -0
modacor/tests/requirements.txt +12 -0
modacor/tests/runner/test_pipeline.py +438 -0
modacor/tests/runner/test_process_step_registry.py +65 -0
modacor/tests/test_import.py +43 -0
modacor/tests/test_modacor.py +17 -0
modacor/tests/test_units.py +79 -0
modacor/units.py +97 -0
modacor-1.0.0.dist-info/METADATA +482 -0
modacor-1.0.0.dist-info/RECORD +120 -0
modacor-1.0.0.dist-info/WHEEL +5 -0
modacor-1.0.0.dist-info/licenses/AUTHORS.md +11 -0
modacor-1.0.0.dist-info/licenses/LICENSE +11 -0
modacor-1.0.0.dist-info/licenses/LICENSE.txt +11 -0
modacor-1.0.0.dist-info/top_level.txt +1 -0

modacor/dataclasses/databundle.py ADDED Viewed

@@ -0,0 +1,23 @@
+# src/modacor/dataclasses/databundle.py
+# -*- coding: utf-8 -*-
+__author__ = "Jerome Kieffer"
+__copyright__ = "MoDaCor team"
+__license__ = "BSD3"
+__date__ = "21/05/2025"
+__version__ = "20250521.1"
+__status__ = "Production"  # "Development", "Production"
+# end of header and standard imports
+class DataBundle(dict):
+    """
+    DataBundle is a specialized data class for storing related data.
+    It contains a dictionary of BaseData data elements, for example Signal,
+    a wavelength and flux spectrum, Qx, Qy, Qz, Psi, etc. Process steps can
+    add further BaseData objects to this bundle.
+    """
+    description: str | None = None
+    # as per NXcanSAS, tells which basedata to plot
+    default_plot: str | None = None

modacor/dataclasses/helpers.py ADDED Viewed

@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# /usr/bin/env python3
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+__coding__ = "utf-8"
+__authors__ = ["Brian R. Pauw", "Armin Moser"]  # add names to the list as appropriate
+__copyright__ = "Copyright 2025, The MoDaCor team"
+__date__ = "16/11/2025"
+__status__ = "Development"  # "Development", "Production"
+# end of header and standard imports
+from modacor import ureg
+from modacor.dataclasses.basedata import BaseData
+from modacor.io.io_sources import IoSources
+__all__ = ["basedata_from_sources"]
+def basedata_from_sources(
+    io_sources: IoSources,
+    signal_source: str,
+    units_source: str | None = None,
+    uncertainty_sources: dict[str, str] = {},
+) -> BaseData:
+    """Helper function to build a BaseData object from IoSources
+    Parameters
+    ----------
+    io_sources : IoSources
+        The IoSources object to load data from.
+    signal_source : str
+        The source key for the signal data.
+    unit_source : str | None, optional
+        The source key for the units data, by default None.
+        for iosources that support attributes, the units can also be stored as an attribute.
+        In that case, it can be specified by 'key to the dataset@[units_attribute_name]'
+    uncertainty_sources : dict[str, str], optional
+        A dictionary mapping uncertainty names to their source keys, by default an empty dictionary.
+    """
+    signal = io_sources.get_data(signal_source)
+    units = ureg.Unit(io_sources.get_static_metadata(units_source)) if units_source is not None else ureg.dimensionless
+    uncertainties = {name: io_sources.get_data(source) for name, source in uncertainty_sources.items()}
+    return BaseData(signal=signal, units=units, uncertainties=uncertainties)

modacor/dataclasses/messagehandler.py ADDED Viewed

@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# /usr/bin/env python3
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+__coding__ = "utf-8"
+__authors__ = ["Malte Storm", "Tim Snow", "Brian R. Pauw"]  # add names to the list as appropriate
+__copyright__ = "Copyright 2025, The MoDaCor team"
+__date__ = "16/11/2025"
+__status__ = "Development"  # "Development", "Production"
+# end of header and standard imports
+import logging
+_default_handler: MessageHandler | None = None
+def get_default_handler(level: int = logging.INFO) -> MessageHandler:
+    """
+    MoDaCor-wide default message handler. Useful for overarching logging like in the pipeline runner.
+    For specific modules or classes, it's better to create dedicated named MessageHandler instances.
+    """
+    global _default_handler
+    if _default_handler is None:
+        _default_handler = MessageHandler(level=level, name="MoDaCor")
+    return _default_handler
+class MessageHandler:
+    """
+    A simple class to handle logging messages at different levels.
+    This class should be replaced to match the messaging system used at a given location.
+    Args:
+        level (int): The logging level to use. Defaults to logging.INFO.
+        name (str): Logger name (typically __name__).
+    """
+    def __init__(self, level: int = logging.INFO, name: str = "MoDaCor", **kwargs):
+        self.level = level
+        self.name = name
+        self.logger = logging.getLogger(name)
+        self.logger.setLevel(level)
+        # Avoid adding multiple console handlers if this handler is created multiple times
+        if not any(isinstance(h, logging.StreamHandler) for h in self.logger.handlers):
+            console_handler = logging.StreamHandler()
+            console_handler.setLevel(level)
+            formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+            console_handler.setFormatter(formatter)
+            self.logger.addHandler(console_handler)
+    def log(self, message: str, level: int = None) -> None:
+        if level is None:
+            level = self.level
+        self.logger.log(msg=message, level=level)
+    def info(self, message: str):
+        self.log(message, level=logging.INFO)
+    def warning(self, message: str):
+        self.log(message, level=logging.WARNING)
+    def error(self, message: str):
+        self.log(message, level=logging.ERROR)
+    def critical(self, message: str):
+        self.log(message, level=logging.CRITICAL)
+    def debug(self, message: str):
+        self.log(message, level=logging.DEBUG)

modacor/dataclasses/process_step.py ADDED Viewed

@@ -0,0 +1,233 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# /usr/bin/env python3
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+__coding__ = "utf-8"
+__authors__ = ["Malte Storm", "Tim Snow", "Brian R. Pauw", "Anja Hörmann"]  # add names to the list as appropriate
+__copyright__ = "Copyright 2025, The MoDaCor team"
+__date__ = "16/11/2025"
+__status__ = "Development"  # "Development", "Production"
+# end of header and standard imports
+__version__ = "20251121.1"
+from abc import abstractmethod
+from numbers import Integral
+from pathlib import Path
+from typing import Any, Iterable, Type
+from attrs import define, field
+from attrs import validators as v
+from ..io.io_sinks import IoSinks
+from ..io.io_sources import IoSources
+from .databundle import DataBundle
+from .messagehandler import MessageHandler
+from .process_step_describer import ProcessStepDescriber
+from .processing_data import ProcessingData
+# from .validators import is_list_of_ints
+@define(eq=False)
+class ProcessStep:
+    """A base class defining a processing step"""
+    # Class attributes for the process step
+    CONFIG_KEYS = {
+        "with_processing_keys": {
+            "type": str,
+            "allow_iterable": True,
+            "allow_none": True,
+            "default": None,
+        },
+        "output_processing_key": {
+            "type": str,
+            "allow_iterable": False,
+            "allow_none": True,
+            "default": None,
+        },
+    }
+    # three input items for the process step. For backward compatibility, the first is io_sources
+    # The configuration keys for the process step instantiation
+    io_sources: IoSources | None = field(default=None, validator=v.optional(v.instance_of(IoSources)))
+    # the processing data to work on
+    processing_data: ProcessingData = field(default=None, validator=v.optional(v.instance_of(ProcessingData)))
+    # optional IO sinks if needed
+    io_sinks: IoSinks | None = field(default=None, validator=v.optional(v.instance_of(IoSinks)))
+    # class attribute for a machine-readable description of the process step
+    documentation = ProcessStepDescriber(
+        calling_name="Generic Process step",
+        calling_id="",  # to be filled in by the process
+        calling_module_path=Path(__file__),
+        calling_version=__version__,
+    )
+    # dynamic instance configuration
+    configuration: dict = field(
+        factory=dict,
+        # validator=lambda inst, attrs, val: inst.is_process_step_dict,
+        validator=lambda inst, attrs, val: ProcessStep.is_process_step_dict(inst, attrs.name if attrs else None, val),
+    )
+    # flags and attributes for running the pipeline
+    requires_steps: list[str] = field(factory=list)
+    step_id: int | str = field(default=-1, validator=v.instance_of((Integral, str)))
+    executed: bool = field(default=False, validator=v.instance_of(bool))
+    short_title: str | None = field(default=None, validator=v.optional(v.instance_of(str)))
+    # if the process produces intermediate arrays, they are stored here, optionally cached
+    produced_outputs: dict[str, Any] = field(factory=dict)
+    # intermediate prepared data for the process step
+    _prepared_data: dict[str, Any] = field(factory=dict)
+    # a message handler, supporting logging, warnings, errors, etc. emitted by the process
+    # during execution
+    logger: MessageHandler = field(factory=MessageHandler, validator=v.instance_of(MessageHandler))
+    # internal variables:
+    __prepared: bool = field(default=False, validator=v.instance_of(bool))
+    def __attrs_post_init__(self):
+        """
+        Post-initialization method to set up the process step.
+        """
+        self.configuration = self.default_config()
+        self.configuration.update(self.documentation.initial_configuration())
+    def __call__(self, processing_data: ProcessingData) -> None:
+        """Allow the process step to be called like a function"""
+        self.execute(processing_data)
+    # add hash function. equality can be checked
+    # def __hash__(self):
+    #     return hash((self.documentation.__repr__(), self.configuration.__repr__(), self.step_id))
+    def __hash__(self) -> int:
+        return object.__hash__(self)
+    def prepare_execution(self):
+        """
+        Prepare the execution of the ProcessStep
+        This method can be used to run any costly setup code that is needed
+        once before the process step can be executed.
+        """
+        pass
+    def _normalised_processing_keys(self, cfg_key: str = "with_processing_keys") -> list[str]:
+        """
+        Normalize a ProcessingData key selection into a non-empty list of strings.
+        Behavior:
+        - None: if processing_data has exactly one key, use it; otherwise error.
+        - str: wrap into a one-item list.
+        - iterable: materialize into a list (must be non-empty).
+        """
+        if self.processing_data is None:
+            raise RuntimeError(f"{self.__class__.__name__}: processing_data is None in _normalised_processing_keys.")
+        cfg_value = self.configuration.get(cfg_key, None)
+        if cfg_value is None:
+            if len(self.processing_data) == 0:
+                raise ValueError(f"{self.__class__.__name__}: {cfg_key} is None and processing_data is empty.")
+            if len(self.processing_data) == 1:
+                only_key = next(iter(self.processing_data.keys()))
+                self.logger.info(
+                    f"{self.__class__.__name__}: {cfg_key} not set; using the only key {only_key!r}."  # noqa: E702
+                )
+                return [only_key]
+            raise ValueError(f"{self.__class__.__name__}: {cfg_key} is None but multiple databundles are present.")
+        if isinstance(cfg_value, str):
+            return [cfg_value]
+        try:
+            keys = list(cfg_value)
+        except TypeError as exc:  # not iterable
+            raise ValueError(
+                f"{self.__class__.__name__}: {cfg_key} must be a string, an iterable of strings, or None."
+            ) from exc
+        if not keys:
+            raise ValueError(f"{self.__class__.__name__}: {cfg_key} must not be an empty list.")
+        return keys
+    @abstractmethod
+    def calculate(self) -> dict[str, DataBundle]:
+        """Calculate the process step on the given data"""
+        raise NotImplementedError("Subclasses must implement this method")
+    def execute(self, data: ProcessingData) -> None:
+        """Execute the process step on the given data"""
+        self.processing_data = data
+        if not self.__prepared:
+            self.prepare_execution()
+            self.__prepared = True
+        self.produced_outputs = self.calculate()
+        for _key, value in self.produced_outputs.items():
+            if _key in data:
+                data[_key].update(value)
+            else:
+                data[_key] = value
+        self.executed = True
+    def reset(self):
+        """Reset the process step to its initial state"""
+        self.__prepared = False
+        self.executed = False
+        self.produced_outputs = {}
+        self._prepared_data = {}
+    def modify_config_by_dict(self, by_dict: dict = {}) -> None:
+        """Modify the configuration of the process step by a dictionary"""
+        for key, value in by_dict.items():
+            if key in self.configuration:
+                self.configuration[key] = value
+            elif key in self.documentation.arguments:
+                # Allow setting documented arguments even if they were not part of the
+                # current configuration snapshot yet.
+                self.configuration[key] = value
+            else:
+                known_keys = ", ".join(sorted(self.configuration.keys()))
+                raise KeyError(f"Key {key} not found in configuration. Known keys: {known_keys}")  # noqa
+        # restart preparation after configuration change:
+        self.__prepared = False
+    def modify_config_by_kwargs(self, **kwargs) -> None:
+        """Modify the configuration of the process step by keyword arguments"""
+        if kwargs:
+            self.modify_config_by_dict(kwargs)
+    @classmethod
+    def is_process_step_dict(cls, instance: Type | None, attribute: str | None, item: Any) -> bool:
+        """
+        Check if the value is a dictionary with the correct keys and types.
+        """
+        if not isinstance(item, dict):
+            return False
+        for _key, _value in item.items():
+            if _key not in cls.CONFIG_KEYS:
+                return False
+            _config = cls.CONFIG_KEYS[_key]
+            if _value is None:
+                if _config["allow_none"]:
+                    continue
+                return False
+            if isinstance(_value, Iterable) and not isinstance(_value, str):
+                if not (_config["allow_iterable"] and all([isinstance(_i, _config["type"]) for _i in _value])):
+                    return False
+                continue
+            if not isinstance(_value, _config["type"]):
+                return False
+        return True
+    @classmethod
+    def default_config(cls) -> dict[str, Any]:
+        """
+        Create an initial dictionary for the process step configuration.
+        """
+        return {_k: _v["default"] for _k, _v in cls.CONFIG_KEYS.items()}

modacor/dataclasses/process_step_describer.py ADDED Viewed

@@ -0,0 +1,146 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# /usr/bin/env python3
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+__coding__ = "utf-8"
+__authors__ = ["Malte Storm", "Jérôme Kieffer", "Brian R. Pauw"]  # add names to the list as appropriate
+__copyright__ = "Copyright 2025, The MoDaCor team"
+__date__ = "16/11/2025"
+__status__ = "Development"  # "Development", "Production"
+# end of header and standard imports
+from copy import deepcopy
+from pathlib import Path
+from typing import Any
+from attrs import define, evolve, field
+from attrs import validators as v
+__all__ = ["ProcessStepDescriber"]
+NXCite = str
+ArgumentSpec = dict[str, Any]
+_MISSING = object()
+def _normalize_str_list(value: Any, field_name: str) -> list[str]:
+    if value is None:
+        return []
+    if isinstance(value, (str, dict)):
+        raise TypeError(f"{field_name} must be a list of strings, got {type(value).__name__}.")
+    if isinstance(value, (list, tuple, set)):
+        return [item.strip() if isinstance(item, str) else item for item in value]
+    raise TypeError(f"{field_name} must be a list of strings, got {type(value).__name__}.")
+def _normalize_arguments(value: Any, field_name: str) -> dict[str, ArgumentSpec]:
+    if value is None:
+        return {}
+    if not isinstance(value, dict):
+        raise TypeError(f"{field_name} must be a mapping, got {type(value).__name__}.")
+    normalized: dict[str, ArgumentSpec] = {}
+    for key, spec in value.items():
+        if not isinstance(spec, dict):
+            raise TypeError(f"{field_name} entries must be mappings, got {type(spec).__name__} for key {key!r}.")
+        normalized_key = str(key).strip()
+        if not normalized_key:
+            raise ValueError(f"{field_name} keys must be non-empty strings.")
+        normalized_spec = dict(spec)
+        default = normalized_spec.get("default", _MISSING)
+        normalized_spec["default"] = default
+        required = normalized_spec.get("required", False)
+        if not isinstance(required, bool):
+            raise TypeError(f"{field_name}[{normalized_key!r}]['required'] must be a boolean.")
+        normalized_spec["required"] = required
+        normalized[normalized_key] = normalized_spec
+    return normalized
+def _deepcopy_default(value: Any) -> Any:
+    return deepcopy(value)
+@define
+class ProcessStepDescriber:
+    calling_name: str = field()  # short name to identify the calling process for the UI
+    calling_id: str = field()  # not sure what we were planning here. some UID perhaps? difference with calling_module
+    calling_module_path: Path = field(
+        validator=v.instance_of(Path)
+    )  # partial path to the module from src/modacor/modules onwards
+    calling_version: str = field()  # module version being executed
+    required_data_keys: list[str] = field(
+        factory=list,
+        converter=lambda value: _normalize_str_list(value, "required_data_keys"),
+        validator=v.deep_iterable(member_validator=v.instance_of(str), iterable_validator=v.instance_of(list)),
+    )  # list of data keys required by the process
+    arguments: dict[str, ArgumentSpec] = field(
+        factory=dict,
+        converter=lambda value: _normalize_arguments(value, "arguments"),
+        validator=v.deep_mapping(key_validator=v.instance_of(str), value_validator=v.instance_of(dict)),
+    )  # schema describing configurable arguments
+    modifies: dict[str, list] = field(
+        factory=dict, validator=v.instance_of(dict)
+    )  # which aspects of BaseData are modified by this
+    step_keywords: list[str] = field(
+        factory=list,
+        converter=lambda value: _normalize_str_list(value, "step_keywords"),
+        validator=v.deep_iterable(member_validator=v.instance_of(str), iterable_validator=v.instance_of(list)),
+    )  # list of keywords that can be used to identify the process (allowing for searches)
+    step_doc: str = field(default="")  # documentation for the process
+    step_reference: NXCite = field(default="")  # NXCite to the paper describing the process
+    step_note: str | None = field(default=None)
+    # use_frames_cache: list[str] = field(factory=list)
+    # # for produced_values dictionary key names in this list, the produced_values are cached
+    # # on first run, and reused on subsequent runs. Maybe two chaches, one for per-file and
+    # # one for per-execution.
+    # use_overall_cache: list[str] = field(factory=list)
+    # # for produced_values dictionary key names in this list, the produced_values are cached
+    # # on first run, and reused on subsequent runs. Maybe two chaches, one for per-file and
+    # # one for per-execution.
+    def copy(self) -> "ProcessStepDescriber":
+        return evolve(self)
+    def argument_names(self) -> tuple[str, ...]:
+        return tuple(self.arguments.keys())
+    def required_argument_names(self) -> tuple[str, ...]:
+        return tuple(name for name, spec in self.arguments.items() if spec.get("required", False))
+    def initial_configuration(self) -> dict[str, Any]:
+        configuration: dict[str, Any] = {}
+        for name, spec in self.arguments.items():
+            default = spec.get("default", _MISSING)
+            if default is _MISSING:
+                configuration[name] = None
+            else:
+                configuration[name] = _deepcopy_default(default)
+        return configuration
+    @classmethod
+    def from_module(
+        cls,
+        *,
+        calling_name: str,
+        calling_id: str,
+        module_file: str | Path,
+        version: str,
+        **kwargs: Any,
+    ) -> "ProcessStepDescriber":
+        """Convenience constructor that normalises ``module_file`` to :class:`Path`."""
+        return cls(
+            calling_name=calling_name,
+            calling_id=calling_id,
+            calling_module_path=Path(module_file),
+            calling_version=version,
+            **kwargs,
+        )

modacor/dataclasses/processing_data.py ADDED Viewed

@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# /usr/bin/env python3
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+__coding__ = "utf-8"
+__authors__ = ["Malte Storm", "Brian R. Pauw"]  # add names to the list as appropriate
+__copyright__ = "Copyright 2025, The MoDaCor team"
+__date__ = "16/11/2025"
+__status__ = "Development"  # "Development", "Production"
+# end of header and standard imports
+from typing import Any
+from modacor.dataclasses.databundle import DataBundle
+class ProcessingData(dict):
+    """
+    The ProcessingData class is a dictionary-like object that stores reference
+    to DataBundles.
+    """
+    def __setitem__(self, key: str, item: DataBundle | Any):
+        """
+        Assign a value to a dictionary key.
+        Parameters
+        ----------
+        key : str
+            The dictionary key.
+        item : DataBundle | Any
+            The value / object to be added to the dictionary.
+        Raises
+        ------
+        TypeError
+            If the item is not an instance of DataBundle.
+        TypeError
+            If the key is not a string.
+        """
+        if not isinstance(item, DataBundle):
+            raise TypeError(f"Expected a DataBundle instance, got {type(item).__name__}.")
+        if not isinstance(key, str):
+            raise TypeError(f"Expected a string key, got {type(key).__name__}.")
+        super().__setitem__(key, item)
+    def __repr__(self):
+        """
+        Print the information of all DataBundles stored in the ProcessingData.
+        """
+        result = []
+        for key in self.keys():
+            result.append(f"DataBundle '{key}': contains datasets {list(self[key].keys())}")
+            for dkey in self[key].keys():
+                result.append(f" Dataset '{dkey}': shape {self[key][dkey].signal.shape}, units {self[key][dkey].units}")
+                result.append(f"         available uncertainties: {list(self[key][dkey].uncertainties.keys())}")
+        return "\n".join(result)