PyPI - ChessAnalysisPipeline - Versions diffs - 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl - Mend

ChessAnalysisPipeline 0.0.14py3-none-any.whl → 0.0.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ChessAnalysisPipeline might be problematic. Click here for more details.

Files changed (38) hide show

CHAP/__init__.py +1 -1
CHAP/common/__init__.py +13 -0
CHAP/common/models/integration.py +29 -26
CHAP/common/models/map.py +395 -224
CHAP/common/processor.py +1725 -93
CHAP/common/reader.py +265 -28
CHAP/common/writer.py +191 -18
CHAP/edd/__init__.py +9 -2
CHAP/edd/models.py +886 -665
CHAP/edd/processor.py +2592 -936
CHAP/edd/reader.py +889 -0
CHAP/edd/utils.py +846 -292
CHAP/foxden/__init__.py +6 -0
CHAP/foxden/processor.py +42 -0
CHAP/foxden/writer.py +65 -0
CHAP/giwaxs/__init__.py +8 -0
CHAP/giwaxs/models.py +100 -0
CHAP/giwaxs/processor.py +520 -0
CHAP/giwaxs/reader.py +5 -0
CHAP/giwaxs/writer.py +5 -0
CHAP/pipeline.py +48 -10
CHAP/runner.py +161 -72
CHAP/tomo/models.py +31 -29
CHAP/tomo/processor.py +169 -118
CHAP/utils/__init__.py +1 -0
CHAP/utils/fit.py +1292 -1315
CHAP/utils/general.py +411 -53
CHAP/utils/models.py +594 -0
CHAP/utils/parfile.py +10 -2
ChessAnalysisPipeline-0.0.16.dist-info/LICENSE +60 -0
{ChessAnalysisPipeline-0.0.14.dist-info → ChessAnalysisPipeline-0.0.16.dist-info}/METADATA +1 -1
ChessAnalysisPipeline-0.0.16.dist-info/RECORD +62 -0
{ChessAnalysisPipeline-0.0.14.dist-info → ChessAnalysisPipeline-0.0.16.dist-info}/WHEEL +1 -1
CHAP/utils/scanparsers.py +0 -1431
ChessAnalysisPipeline-0.0.14.dist-info/LICENSE +0 -21
ChessAnalysisPipeline-0.0.14.dist-info/RECORD +0 -54
{ChessAnalysisPipeline-0.0.14.dist-info → ChessAnalysisPipeline-0.0.16.dist-info}/entry_points.txt +0 -0
{ChessAnalysisPipeline-0.0.14.dist-info → ChessAnalysisPipeline-0.0.16.dist-info}/top_level.txt +0 -0

CHAP/common/processor.py CHANGED Viewed

@@ -8,6 +8,9 @@ Description: Module for Processors used in multiple experiment-specific
              workflows.
 """
+# System modules
+import os
 # Third party modules
 import numpy as np
@@ -58,12 +61,6 @@ class AnimationProcessor(Processor):
         :return: The matplotlib animation.
         :rtype: matplotlib.animation.ArtistAnimation
         """
-        # System modules
-        from os.path import (
-            isabs,
-            join,
-        )
         # Third party modules
         import matplotlib.animation as animation
         import matplotlib.pyplot as plt
@@ -134,6 +131,7 @@ class AnimationProcessor(Processor):
             a_max = frames[0].max()
             for n in range(1, num_frames):
                 a_max = min(a_max, frames[n].max())
+            a_max = float(a_max)
             if vmin is None:
                 vmin = -a_max
             if vmax is None:
@@ -248,17 +246,9 @@ class BinarizeProcessor(Processor):
         :raises ValueError: Upon invalid input parameters.
         :return: The binarized dataset with a return type equal to
             that of the input dataset.
-        :rtype: numpy.ndarray, nexusformat.nexus.NXobject
+        :rtype: typing.Union[numpy.ndarray, nexusformat.nexus.NXobject]
         """
-        # System modules
-        from os.path import join as os_join
-        from os.path import relpath
-        # Local modules
-        from CHAP.utils.general import (
-            is_int,
-            nxcopy,
-        )
+        # Third party modules
         from nexusformat.nexus import (
             NXdata,
             NXfield,
@@ -267,6 +257,12 @@ class BinarizeProcessor(Processor):
             nxsetconfig,
         )
+        # Local modules
+        from CHAP.utils.general import (
+            is_int,
+            nxcopy,
+        )
         if method not in [
                 'CHAP', 'manual', 'otsu', 'yen', 'isodata', 'minimum']:
             raise ValueError(f'Invalid parameter method ({method})')
@@ -344,19 +340,21 @@ class BinarizeProcessor(Processor):
             exclude_nxpaths = []
             if nxdefault is not None:
                 exclude_nxpaths.append(
-                    os_join(relpath(nxdefault.nxpath, dataset.nxpath)))
+                    os.path.join(os.path.relpath(
+                        nxdefault.nxpath, dataset.nxpath)))
             if remove_original_data:
                 if (nxdefault is None
                         or nxdefault.nxpath != nxdata.nxpath):
-                    relpath_nxdata = relpath(nxdata.nxpath, dataset.nxpath)
+                    relpath_nxdata = os.path.relpath(
+                        nxdata.nxpath, dataset.nxpath)
                     keys = list(nxdata.keys())
                     keys.remove(nxsignal.nxname)
                     for axis in nxdata.axes:
                         keys.remove(axis)
                     if len(keys):
                         raise RuntimeError('Not tested yet')
-                        exclude_nxpaths.append(os_join(
-                            relpath(nxsignal.nxpath, dataset.nxpath)))
+                        exclude_nxpaths.append(os.path.join(
+                            os.path.relpath(nxsignal.nxpath, dataset.nxpath)))
                     elif relpath_nxdata == '.':
                         exclude_nxpaths.append(nxsignal.nxname)
                         if dataset.nxclass != 'NXdata':
@@ -373,11 +371,11 @@ class BinarizeProcessor(Processor):
                         keys.remove(axis)
                     if len(keys):
                         raise RuntimeError('Not tested yet')
-                        exclude_nxpaths.append(os_join(
-                            relpath(nxsignal.nxpath, dataset.nxpath)))
+                        exclude_nxpaths.append(os.path.join(
+                            os.path.relpath(nxsignal.nxpath, dataset.nxpath)))
                     else:
-                        exclude_nxpaths.append(os_join(
-                            relpath(nxgroup.nxpath, dataset.nxpath)))
+                        exclude_nxpaths.append(os.path.join(
+                            os.path.relpath(nxgroup.nxpath, dataset.nxpath)))
             nxobject = nxcopy(dataset, exclude_nxpaths=exclude_nxpaths)
         # Get a histogram of the data
@@ -494,12 +492,11 @@ class BinarizeProcessor(Processor):
             # Select the ROI's orthogonal to the selected averaging direction
             bounds = []
             for i, bound in enumerate(['"0"', '"1"']):
-                _, roi = select_roi_2d(
+                roi = select_roi_2d(
                     mean_data,
                     title=f'Select the ROI to obtain the {bound} data value',
                     title_a=f'Data averaged in the {axes[axis]}-direction',
                     row_label=subaxes[0], column_label=subaxes[1])
-                plt.close()
                 # Select the index range in the selected averaging direction
                 if not axis:
@@ -512,12 +509,11 @@ class BinarizeProcessor(Processor):
                     mean_roi_data = data[roi[2]:roi[3],roi[0]:roi[1],:].mean(
                         axis=(0,1))
-                _, _range = select_roi_1d(
+                _range = select_roi_1d(
                     mean_roi_data, preselected_roi=(0, data.shape[axis]),
                     title=f'Select the {axes[axis]}-direction range to obtain '
                           f'the {bound} data bound',
                     xlabel=axes[axis], ylabel='Average data')
-                plt.close()
                 # Obtain the lower/upper data bound
                 if not axis:
@@ -573,10 +569,261 @@ class BinarizeProcessor(Processor):
         nxdata = nxentry[name].data
         nxentry.data = NXdata(
             NXlink(nxdata.nxsignal.nxpath),
-            [NXlink(os_join(nxdata.nxpath, axis)) for axis in nxdata.axes])
+            [NXlink(os.path.join(nxdata.nxpath, axis))
+                for axis in nxdata.axes])
+        nxentry.data.set_default()
         return nxobject
+class ConstructBaseline(Processor):
+    """A Processor to construct a baseline for a dataset.
+    """
+    def process(
+            self, data, mask=None, tol=1.e-6, lam=1.e6, max_iter=20,
+            save_figures=False, outputdir='.', interactive=False):
+        """Construct and return the baseline for a dataset.
+        :param data: Input data.
+        :type data: list[PipelineData]
+        :param mask: A mask to apply to the spectrum before baseline
+           construction, default to `None`.
+        :type mask: array-like, optional
+        :param tol: The convergence tolerence, defaults to `1.e-6`.
+        :type tol: float, optional
+        :param lam: The &lambda (smoothness) parameter (the balance
+            between the residual of the data and the baseline and the
+            smoothness of the baseline). The suggested range is between
+            100 and 10^8, defaults to `10^6`.
+        :type lam: float, optional
+        :param max_iter: The maximum number of iterations,
+            defaults to `20`.
+        :type max_iter: int, optional
+        :param save_figures: Save .pngs of plots for checking inputs &
+            outputs of this Processor, defaults to False.
+        :type save_figures: bool, optional
+        :param outputdir: Directory to which any output figures will
+            be saved, defaults to '.'
+        :type outputdir: str, optional
+        :param interactive: Allows for user interactions, defaults to
+            False.
+        :type interactive: bool, optional
+        :return: The smoothed baseline and the configuration.
+        :rtype: numpy.array, dict
+        """
+        try:
+            data = np.asarray(self.unwrap_pipelinedata(data)[0])
+        except:
+            raise ValueError(
+                f'The structure of {data} contains no valid data')
+        return self.construct_baseline(
+            data, mask, tol, lam, max_iter, save_figures, outputdir,
+            interactive)
+    @staticmethod
+    def construct_baseline(
+        y, x=None, mask=None, tol=1.e-6, lam=1.e6, max_iter=20, title=None,
+        xlabel=None, ylabel=None, interactive=False, filename=None):
+        """Construct and return the baseline for a dataset.
+        :param y: Input data.
+        :type y: numpy.array
+        :param x: Independent dimension (only used when interactive is
+            `True` of when filename is set), defaults to `None`.
+        :type x: array-like, optional
+        :param mask: A mask to apply to the spectrum before baseline
+           construction, default to `None`.
+        :type mask: array-like, optional
+        :param tol: The convergence tolerence, defaults to `1.e-6`.
+        :type tol: float, optional
+        :param lam: The &lambda (smoothness) parameter (the balance
+            between the residual of the data and the baseline and the
+            smoothness of the baseline). The suggested range is between
+            100 and 10^8, defaults to `10^6`.
+        :type lam: float, optional
+        :param max_iter: The maximum number of iterations,
+            defaults to `20`.
+        :type max_iter: int, optional
+        :param xlabel: Label for the x-axis of the displayed figure,
+            defaults to `None`.
+        :param title: Title for the displayed figure, defaults to `None`.
+        :type title: str, optional
+        :type xlabel: str, optional
+        :param ylabel: Label for the y-axis of the displayed figure,
+            defaults to `None`.
+        :type ylabel: str, optional
+        :param interactive: Allows for user interactions, defaults to
+            False.
+        :type interactive: bool, optional
+        :param filename: Save a .png of the plot to filename, defaults to
+            `None`, in which case the plot is not saved.
+        :type filename: str, optional
+        :return: The smoothed baseline and the configuration.
+        :rtype: numpy.array, dict
+        """
+        # Third party modules
+        if interactive or filename is not None:
+            from matplotlib.widgets import TextBox, Button
+            import matplotlib.pyplot as plt
+        # Local modules
+        from CHAP.utils.general import baseline_arPLS
+        def change_fig_subtitle(maxed_out=False, subtitle=None):
+            if fig_subtitles:
+                fig_subtitles[0].remove()
+                fig_subtitles.pop()
+            if subtitle is None:
+                subtitle = r'$\lambda$ = 'f'{lambdas[-1]:.2e}, '
+                if maxed_out:
+                    subtitle += f'# iter = {num_iters[-1]} (maxed out) '
+                else:
+                    subtitle += f'# iter = {num_iters[-1]} '
+                subtitle += f'error = {errors[-1]:.2e}'
+            fig_subtitles.append(
+                plt.figtext(*subtitle_pos, subtitle, **subtitle_props))
+        def select_lambda(expression):
+            """Callback function for the "Select lambda" TextBox.
+            """
+            if not len(expression):
+                return
+            try:
+                lam = float(expression)
+                if lam < 0:
+                    raise ValueError
+            except ValueError:
+                change_fig_subtitle(
+                    subtitle=f'Invalid lambda, enter a positive number')
+            else:
+                lambdas.pop()
+                lambdas.append(10**lam)
+                baseline, _, w, num_iter, error = baseline_arPLS(
+                    y, mask=mask, tol=tol, lam=lambdas[-1], max_iter=max_iter,
+                    full_output=True)
+                num_iters.pop()
+                num_iters.append(num_iter)
+                errors.pop()
+                errors.append(error)
+                if num_iter < max_iter:
+                    change_fig_subtitle()
+                else:
+                    change_fig_subtitle(maxed_out=True)
+                baseline_handle.set_ydata(baseline)
+            lambda_box.set_val('')
+            plt.draw()
+        def continue_iter(event):
+            """Callback function for the "Continue" button."""
+            baseline, _, w, n_iter, error = baseline_arPLS(
+                y, mask=mask, w=weights[-1], tol=tol, lam=lambdas[-1],
+                max_iter=max_iter, full_output=True)
+            num_iters[-1] += n_iter
+            errors.pop()
+            errors.append(error)
+            if n_iter < max_iter:
+                change_fig_subtitle()
+            else:
+                change_fig_subtitle(maxed_out=True)
+            baseline_handle.set_ydata(baseline)
+            plt.draw()
+            weights.pop()
+            weights.append(w)
+        def confirm(event):
+            """Callback function for the "Confirm" button."""
+            plt.close()
+        baseline, _, w, num_iter, error = baseline_arPLS(
+            y, mask=mask, tol=tol, lam=lam, max_iter=max_iter,
+            full_output=True)
+        if not interactive and filename is None:
+            return baseline
+        lambdas = [lam]
+        weights = [w]
+        num_iters = [num_iter]
+        errors = [error]
+        fig_subtitles = []
+        # Check inputs
+        if x is None:
+            x = np.arange(y.size)
+        # Setup the Matplotlib figure
+        title_pos = (0.5, 0.95)
+        title_props = {'fontsize': 'xx-large', 'horizontalalignment': 'center',
+                       'verticalalignment': 'bottom'}
+        subtitle_pos = (0.5, 0.90)
+        subtitle_props = {'fontsize': 'x-large',
+                          'horizontalalignment': 'center',
+                          'verticalalignment': 'bottom'}
+        fig, ax = plt.subplots(figsize=(11, 8.5))
+        if mask is None:
+            ax.plot(x, y, label='input data')
+        else:
+            ax.plot(
+                x[mask.astype(bool)], y[mask.astype(bool)], label='input data')
+        baseline_handle = ax.plot(x, baseline, label='baseline')[0]
+#        ax.plot(x, y-baseline, label='baseline corrected data')
+        ax.set_xlabel(xlabel, fontsize='x-large')
+        ax.set_ylabel(ylabel, fontsize='x-large')
+        ax.legend()
+        if title is None:
+            fig_title = plt.figtext(*title_pos, 'Baseline', **title_props)
+        else:
+            fig_title = plt.figtext(*title_pos, title, **title_props)
+        if num_iter < max_iter:
+            change_fig_subtitle()
+        else:
+            change_fig_subtitle(maxed_out=True)
+        fig.subplots_adjust(bottom=0.0, top=0.85)
+        if interactive:
+            fig.subplots_adjust(bottom=0.2)
+            # Setup TextBox
+            lambda_box = TextBox(
+                plt.axes([0.15, 0.05, 0.15, 0.075]), r'log($\lambda$)')
+            lambda_cid = lambda_box.on_submit(select_lambda)
+            # Setup "Continue" button
+            continue_btn = Button(
+                plt.axes([0.45, 0.05, 0.15, 0.075]), 'Continue smoothing')
+            continue_cid = continue_btn.on_clicked(continue_iter)
+            # Setup "Confirm" button
+            confirm_btn = Button(plt.axes([0.75, 0.05, 0.15, 0.075]), 'Confirm')
+            confirm_cid = confirm_btn.on_clicked(confirm)
+            # Show figure for user interaction
+            plt.show()
+            # Disconnect all widget callbacks when figure is closed
+            lambda_box.disconnect(lambda_cid)
+            continue_btn.disconnect(continue_cid)
+            confirm_btn.disconnect(confirm_cid)
+            # ... and remove the buttons before returning the figure
+            lambda_box.ax.remove()
+            continue_btn.ax.remove()
+            confirm_btn.ax.remove()
+        if filename is not None:
+            fig_title.set_in_layout(True)
+            fig_subtitles[-1].set_in_layout(True)
+            fig.tight_layout(rect=(0, 0, 1, 0.90))
+            fig.savefig(filename)
+        plt.close()
+        config = {
+            'tol': tol, 'lambda': lambdas[-1], 'max_iter': max_iter,
+            'num_iter': num_iters[-1], 'error': errors[-1], 'mask': mask}
+        return baseline, config
 class ImageProcessor(Processor):
     """A Processor to plot an image (slice) from a NeXus object.
     """
@@ -584,9 +831,9 @@ class ImageProcessor(Processor):
             self, data, vmin=None, vmax=None, axis=0, index=None,
             coord=None, interactive=False, save_figure=True, outputdir='.',
             filename='image.png'):
-        """Plot and/or save an image (slice) from a NeXus NXobject object with
-        a default data path contained in `data` and return the NeXus NXdata
-        data object.
+        """Plot and/or save an image (slice) from a NeXus NXobject
+        object with a default data path contained in `data` and return
+        the NeXus NXdata data object.
         :param data: Input data.
         :type data: list[PipelineData]
@@ -618,12 +865,6 @@ class ImageProcessor(Processor):
         :return: The input data object.
         :rtype: nexusformat.nexus.NXdata
         """
-        # System modules
-        from os.path import (
-            isabs,
-            join,
-        )
         # Third party modules
         import matplotlib.pyplot as plt
@@ -639,8 +880,8 @@ class ImageProcessor(Processor):
             raise ValueError(f'Invalid parameter outputdir ({outputdir})')
         if not isinstance(filename, str):
             raise ValueError(f'Invalid parameter filename ({filename})')
-        if not isabs(filename):
-            filename = join(outputdir, filename)
+        if not os.path.isabs(filename):
+            filename = os.path.join(outputdir, filename)
         # Get the default Nexus NXdata object
         data = self.unwrap_pipelinedata(data)[0]
@@ -796,8 +1037,9 @@ class IntegrateMapProcessor(Processor):
         containing a map of the integrated detector data requested.
         :param data: Input data, containing at least one item
-            with the value `'MapConfig'` for the `'schema'` key, and at
-            least one item with the value `'IntegrationConfig'` for the
+            with the value `'common.models.map.MapConfig'` for the
+            `'schema'` key, and at least one item with the value
+            `'common.models.integration.IntegrationConfig'` for the
             `'schema'` key.
         :type data: list[PipelineData]
         :return: Integrated data and process metadata.
@@ -815,10 +1057,11 @@ class IntegrateMapProcessor(Processor):
         """Use a `MapConfig` and `IntegrationConfig` to construct a
         NeXus NXprocess object.
-        :param map_config: A valid map configuration.
-        :type map_config: MapConfig
-        :param integration_config: A valid integration configuration
-        :type integration_config: IntegrationConfig.
+        :param map_config: A valid map configuration..
+        :type map_config: common.models.map.MapConfig
+        :param integration_config: A valid integration configuration.
+        :type integration_config:
+            common.models.integration.IntegrationConfig
         :return: The integrated detector data and metadata.
         :rtype: nexusformat.nexus.NXprocess
         """
@@ -871,7 +1114,7 @@ class IntegrateMapProcessor(Processor):
             *map_config.dims,
             *integration_config.integrated_data_dims
         )
-        for i, dim in enumerate(map_config.independent_dimensions[::-1]):
+        for i, dim in enumerate(map_config.independent_dimensions):
             nxprocess.data[dim.label] = NXfield(
                 value=map_config.coords[dim.label],
                 units=dim.units,
@@ -901,7 +1144,7 @@ class IntegrateMapProcessor(Processor):
             value=np.empty(
                 (*tuple(
                     [len(coord_values) for coord_name, coord_values
-                     in map_config.coords.items()][::-1]),
+                     in map_config.coords.items()]),
                  *integration_config.integrated_data_shape)),
             units='a.u',
             attrs={'long_name':'Intensity (a.u)'})
@@ -958,33 +1201,256 @@ class MapProcessor(Processor):
     NXentry object representing that map's metadata and any
     scalar-valued raw data requested by the supplied map configuration.
     """
-    def process(self, data):
+    def process(
+            self, data, config=None, detector_names=None, num_proc=1,
+            comm=None, inputdir=None):
         """Process the output of a `Reader` that contains a map
         configuration and returns a NeXus NXentry object representing
         the map.
         :param data: Result of `Reader.read` where at least one item
-            has the value `'MapConfig'` for the `'schema'` key.
+            has the value `'common.models.map.MapConfig'` for the
+            `'schema'` key.
         :type data: list[PipelineData]
+        :param config: Initialization parameters for an instance of
+            common.models.map.MapConfig, defaults to `None`.
+        :type config: dict, optional
+        :param detector_names: Detector names/prefixes to include raw
+            data for in the returned NeXus NXentry object,
+            defaults to `None`.
+        :type detector_names: Union(int, str, list[int], list[str]),
+            optional
+        :param num_proc: Number of processors used to read map,
+            defaults to `1`.
+        :type num_proc: int, optional
         :return: Map data and metadata.
         :rtype: nexusformat.nexus.NXentry
         """
-        map_config = self.get_config(data, 'common.models.map.MapConfig')
-        nxentry = self.__class__.get_nxentry(map_config)
+        # System modules
+        from copy import deepcopy
+        import logging
+        from tempfile import NamedTemporaryFile
+        # Third party modules
+        import yaml
+        # Local modules
+        from CHAP.runner import (
+            RunConfig,
+            runner,
+        )
+        from CHAP.utils.general import (
+            is_str_series,
+            string_to_list,
+        )
+        # Get the validated map configuration
+        try:
+            map_config = self.get_config(
+                data, 'common.models.map.MapConfig', inputdir=inputdir)
+        except Exception as data_exc:
+            self.logger.info('No valid Map configuration in input pipeline '
+                             'data, using config parameter instead.')
+            try:
+                # Local modules
+                from CHAP.common.models.map import MapConfig
+                map_config = MapConfig(**config, inputdir=inputdir)
+            except Exception as dict_exc:
+                raise RuntimeError from dict_exc
+        # Validate the number of processors
+        if not isinstance(num_proc, int):
+            self.logger.warning('Ignoring invalid parameter num_proc '
+                                f'({num_proc}), running serially')
+            num_proc = 1
+        elif num_proc > 1:
+            try:
+                # System modules
+                from os import cpu_count
+                # Third party modules
+                from mpi4py import MPI
+                if num_proc > cpu_count():
+                    self.logger.warning(
+                        f'The requested number of processors ({num_proc}) '
+                        'exceeds the maximum number of processors '
+                        f'({cpu_count()}): reset it to {cpu_count()}')
+                    num_proc = cpu_count()
+            except:
+                self.logger.warning('Unable to load mpi4py, running serially')
+                num_proc = 1
+        # Validate the detector names/prefixes
+        if map_config.experiment_type == 'EDD':
+            if detector_names is None:
+                detector_indices = None
+            else:
+                # Local modules
+                from CHAP.utils.general import is_str_series
+                if isinstance(detector_names, int):
+                    detector_names = [str(detector_names)]
+                elif isinstance(detector_names, str):
+                    try:
+                        detector_names = [
+                            str(v) for v in string_to_list(
+                                detector_names, raise_error=True)]
+                    except:
+                        raise ValueError('Invalid parameter detector_names '
+                                         f'({detector_names})')
+                else:
+                    detector_names = [str(v) for v in detector_names]
+                detector_indices = [int(name) for name in detector_names]
+        else:
+            if detector_names is None:
+                raise ValueError(
+                    'Missing "detector_names" parameter')
+            if isinstance(detector_names, str):
+                detector_names = [detector_names]
+            if not is_str_series(detector_names, log=False):
+                raise ValueError(
+                    f'Invalid "detector_names" parameter ({detector_names})')
+        # Create the sub-pipeline configuration for each processor
+        # FIX: catered to EDD with one spec scan
+        assert len(map_config.spec_scans) == 1
+        spec_scans = map_config.spec_scans[0]
+        scan_numbers = spec_scans.scan_numbers
+        num_scan = len(scan_numbers)
+        if num_scan < num_proc:
+            self.logger.warning(
+                f'The requested number of processors ({num_proc}) exceeds '
+                f'the number of scans ({num_scan}): reset it to {num_scan}')
+            num_proc = num_scan
+        if num_proc == 1:
+            common_comm = comm
+            offsets = [0]
+        else:
+            scans_per_proc = num_scan//num_proc
+            num = scans_per_proc
+            if num_scan - scans_per_proc*num_proc > 0:
+                num += 1
+            spec_scans.scan_numbers = scan_numbers[:num]
+            n_scan = num
+            pipeline_config = []
+            offsets = [0]
+            for n_proc in range(1, num_proc):
+                num = scans_per_proc
+                if n_proc < num_scan - scans_per_proc*num_proc:
+                    num += 1
+                config = deepcopy(map_config.dict())
+                config['spec_scans'][0]['scan_numbers'] = \
+                    scan_numbers[n_scan:n_scan+num]
+                pipeline_config.append(
+                    [{'common.MapProcessor': {
+                        'config': config, 'detector_names': detector_names}}])
+                offsets.append(n_scan)
+                n_scan += num
+            # Spawn the workers to run the sub-pipeline
+            run_config = RunConfig(
+                config={'log_level': logging.getLevelName(self.logger.level),
+                        'spawn': 1})
+            tmp_names = []
+            with NamedTemporaryFile(delete=False) as fp:
+                fp_name = fp.name
+                tmp_names.append(fp_name)
+                with open(fp_name, 'w') as f:
+                    yaml.dump({'config': {'spawn': 1}}, f, sort_keys=False)
+                for n_proc in range(1, num_proc):
+                    f_name = f'{fp_name}_{n_proc}'
+                    tmp_names.append(f_name)
+                    with open(f_name, 'w') as f:
+                        yaml.dump(
+                            {'config': run_config.__dict__,
+                             'pipeline': pipeline_config[n_proc-1]},
+                            f, sort_keys=False)
+                sub_comm = MPI.COMM_SELF.Spawn(
+                    'CHAP', args=[fp_name], maxprocs=num_proc-1)
+                common_comm = sub_comm.Merge(False)
+                # Align with the barrier in RunConfig() on common_comm
+                # called from the spawned main()
+                common_comm.barrier()
+                # Align with the barrier in run() on common_comm
+                # called from the spawned main()
+                common_comm.barrier()
+        if common_comm is None:
+            num_proc = 1
+            rank = 0
+        else:
+            num_proc = common_comm.Get_size()
+            rank = common_comm.Get_rank()
+        if num_proc == 1:
+            offset = 0
+        else:
+            num_scan = common_comm.bcast(num_scan, root=0)
+            offset = common_comm.scatter(offsets, root=0)
+        # Read the raw data
+        if map_config.experiment_type == 'EDD':
+            data, independent_dimensions, all_scalar_data = \
+                self._read_raw_data_edd(
+                    map_config, detector_indices, common_comm, num_scan,
+                    offset)
+        else:
+            data, independent_dimensions, all_scalar_data = \
+                self._read_raw_data(
+                    map_config, detector_names, common_comm, num_scan, offset)
+        if not rank:
+            self.logger.debug(f'Data shape: {data.shape}')
+            if independent_dimensions is not None:
+                self.logger.debug('Independent dimensions shape: '
+                                  f'{independent_dimensions.shape}')
+            if all_scalar_data is not None:
+                self.logger.debug('Scalar data shape: '
+                                  f'{all_scalar_data.shape}')
+        if rank:
+            return None
+        if num_proc > 1:
+            # Reset the scan_numbers to the original full set
+            spec_scans.scan_numbers = scan_numbers
+            # Disconnect spawned workers and cleanup temporary files
+            common_comm.barrier()
+            sub_comm.Disconnect()
+            for tmp_name in tmp_names:
+                os.remove(tmp_name)
+        # Construct the NeXus NXentry object
+        nxentry = self._get_nxentry(
+            map_config, detector_names, data, independent_dimensions,
+            all_scalar_data)
         return nxentry
-    @staticmethod
-    def get_nxentry(map_config):
+    def _get_nxentry(
+            self, map_config, detector_names, data, independent_dimensions,
+            all_scalar_data):
         """Use a `MapConfig` to construct a NeXus NXentry object.
         :param map_config: A valid map configuration.
-        :type map_config: MapConfig
+        :type map_config: common.models.map.MapConfig
+        :param detector_names: Detector names to include raw data
+            for in the returned NeXus NXentry object,
+            defaults to `None`.
+        :type detector_names: list[str]
+        :param data: The map's raw data.
+        :type data: numpy.ndarray
+        :param independent_dimensions: The map's independent
+            coordinates.
+        :type independent_dimensions: numpy.ndarray
+        :param all_scalar_data: The map's scalar data.
+        :type all_scalar_data: numpy.ndarray
         :return: The map's data and metadata contained in a NeXus
             structure.
         :rtype: nexusformat.nexus.NXentry
         """
         # System modules
+        from copy import deepcopy
         from json import dumps
         # Third party modules
@@ -996,11 +1462,16 @@ class MapProcessor(Processor):
             NXsample,
         )
+        # Local modules:
+        from CHAP.common.models.map import PointByPointScanData
+        from CHAP.utils.general import is_int_series
+        # Set up NeXus NXentry and add misc. CHESS-specific metadata
         nxentry = NXentry(name=map_config.title)
-        nxentry.map_config = dumps(map_config.dict())
-        nxentry[map_config.sample.name] = NXsample(**map_config.sample.dict())
         nxentry.attrs['station'] = map_config.station
+        for key, value in map_config.attrs.items():
+            nxentry.attrs[key] = value
+        nxentry.detector_names = detector_names
         nxentry.spec_scans = NXcollection()
         for scans in map_config.spec_scans:
             nxentry.spec_scans[scans.scanparsers[0].scan_name] = \
@@ -1008,44 +1479,618 @@ class MapProcessor(Processor):
                         dtype='int8',
                         attrs={'spec_file': str(scans.spec_file)})
-        nxentry.data = NXdata()
-        if map_config.map_type == 'structured':
-            nxentry.data.attrs['axes'] = map_config.dims
-        for i, dim in enumerate(map_config.independent_dimensions[::-1]):
-            nxentry.data[dim.label] = NXfield(
-                value=map_config.coords[dim.label],
+        # Add sample metadata
+        nxentry[map_config.sample.name] = NXsample(**map_config.sample.dict())
+        # Set up default NeXus NXdata group (squeeze out constant dimensions)
+        constant_dim = []
+        for i, dim in enumerate(map_config.independent_dimensions):
+            unique = np.unique(independent_dimensions[i])
+            if unique.size == 1:
+                constant_dim.append(i)
+        nxentry.data = NXdata(
+            NXfield(data, 'detector_data'),
+            tuple([
+                NXfield(
+                    independent_dimensions[i], dim.label,
+                    attrs={'units': dim.units,
+                           'long_name': f'{dim.label} ({dim.units})',
+                           'data_type': dim.data_type,
+                           'local_name': dim.name})
+                for i, dim in enumerate(map_config.independent_dimensions)
+                    if i not in constant_dim]))
+        nxentry.data.set_default()
+        # Set up auxiliary NeXus NXdata group (add the constant dimensions)
+        auxiliary_signals = []
+        auxiliary_data = []
+        for i, dim in enumerate(map_config.all_scalar_data):
+            auxiliary_signals.append(dim.label)
+            auxiliary_data.append(NXfield(
+                value=all_scalar_data[i],
                 units=dim.units,
                 attrs={'long_name': f'{dim.label} ({dim.units})',
                        'data_type': dim.data_type,
-                       'local_name': dim.name})
-            if map_config.map_type == 'structured':
-                nxentry.data.attrs[f'{dim.label}_indices'] = i
-        signal = False
-        auxilliary_signals = []
-        for data in map_config.all_scalar_data:
-            nxentry.data[data.label] = NXfield(
-                value=np.empty(map_config.shape),
-                units=data.units,
-                attrs={'long_name': f'{data.label} ({data.units})',
-                       'data_type': data.data_type,
-                       'local_name': data.name})
-            if not signal:
-                signal = data.label
+                       'local_name': dim.name}))
+        for i, dim in enumerate(deepcopy(map_config.independent_dimensions)):
+            if i in constant_dim:
+                auxiliary_signals.append(dim.label)
+                auxiliary_data.append(NXfield(
+                    independent_dimensions[i], dim.label,
+                    attrs={'units': dim.units,
+                           'long_name': f'{dim.label} ({dim.units})',
+                           'data_type': dim.data_type,
+                           'local_name': dim.name}))
+                map_config.all_scalar_data.append(
+                    PointByPointScanData(**dict(dim)))
+                map_config.independent_dimensions.remove(dim)
+        if auxiliary_signals:
+            nxentry.auxdata = NXdata()
+            for label, data in zip(auxiliary_signals, auxiliary_data):
+                nxentry.auxdata[label] = data
+            if 'SCAN_N' in auxiliary_signals:
+                nxentry.auxdata.attrs['signal'] = 'SCAN_N'
             else:
-                auxilliary_signals.append(data.label)
+                nxentry.auxdata.attrs['signal'] = auxiliary_signals[0]
+            auxiliary_signals.remove(nxentry.auxdata.attrs['signal'])
+            nxentry.auxdata.attrs['auxiliary_signals'] = auxiliary_signals
-        if signal:
-            nxentry.data.attrs['signal'] = signal
-            nxentry.data.attrs['auxilliary_signals'] = auxilliary_signals
-        for data in map_config.all_scalar_data:
-            for map_index in np.ndindex(map_config.shape):
-                nxentry.data[data.label][map_index] = map_config.get_value(
-                    data, map_index)
+        nxentry.map_config = dumps(map_config.dict())
         return nxentry
+    def _read_raw_data_edd(
+            self, map_config, detector_indices, comm, num_scan, offset):
+        """Read the raw EDD data for a given map configuration.
+        :param map_config: A valid map configuration.
+        :type map_config: common.models.map.MapConfig
+        :param detector_indices: Indices to the corresponding
+            detector names.
+        :type detector_indices: list[int]
+        :return: The map's raw data, independent dimensions and scalar
+            data
+        :rtype: numpy.ndarray, numpy.ndarray, numpy.ndarray
+        """
+        # Third party modules
+        try:
+            from mpi4py import MPI
+            from mpi4py.util import dtlib
+        except:
+            pass
+        # Local modules
+        from CHAP.utils.general import list_to_string
+        if comm is None:
+            num_proc = 1
+            rank = 0
+        else:
+            num_proc = comm.Get_size()
+            rank = comm.Get_rank()
+        if not rank:
+            self.logger.debug(f'Number of processors: {num_proc}')
+            self.logger.debug(f'Number of scans: {num_scan}')
+        # Create the shared data buffers
+        # FIX: just one spec scan at this point
+        assert len(map_config.spec_scans) == 1
+        scan = map_config.spec_scans[0]
+        scan_numbers = scan.scan_numbers
+        scanparser = scan.get_scanparser(scan_numbers[0])
+        ddata = scanparser.get_detector_data(detector_indices)
+        spec_scan_shape = scanparser.spec_scan_shape
+        num_dim = np.prod(spec_scan_shape)
+        num_id = len(map_config.independent_dimensions)
+        num_sd = len(map_config.all_scalar_data)
+        if num_proc == 1:
+            assert num_scan == len(scan_numbers)
+            data = np.empty((num_scan, *ddata.shape), dtype=ddata.dtype)
+            independent_dimensions = np.empty(
+                (num_id, num_scan*num_dim), dtype=np.float64)
+            all_scalar_data = np.empty(
+                (num_sd, num_scan*num_dim), dtype=np.float64)
+        else:
+            self.logger.debug(f'Scan offset on processor {rank}: {offset}')
+            self.logger.debug(f'Scan numbers on processor {rank}: '
+                              f'{list_to_string(scan_numbers)}')
+            datatype = dtlib.from_numpy_dtype(ddata.dtype)
+            itemsize = datatype.Get_size()
+            if not rank:
+                nbytes = num_scan * np.prod(ddata.shape) * itemsize
+            else:
+                nbytes = 0
+            win = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm)
+            buf, itemsize = win.Shared_query(0)
+            assert itemsize == datatype.Get_size()
+            data = np.ndarray(
+                buffer=buf, dtype=ddata.dtype, shape=(num_scan, *ddata.shape))
+            datatype = dtlib.from_numpy_dtype(np.float64)
+            itemsize = datatype.Get_size()
+            if not rank:
+                nbytes = num_id * num_scan * num_dim * itemsize
+            win_id = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm)
+            buf_id, _ = win_id.Shared_query(0)
+            independent_dimensions = np.ndarray(
+                buffer=buf_id, dtype=np.float64,
+                shape=(num_id, num_scan*num_dim))
+            if not rank:
+                nbytes = num_sd * num_scan * num_dim * itemsize
+            win_sd = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm)
+            buf_sd, _ = win_sd.Shared_query(0)
+            all_scalar_data = np.ndarray(
+                buffer=buf_sd, dtype=np.float64,
+                shape=(num_sd, num_scan*num_dim))
+        # Read the raw data
+        init = True
+        for scan in map_config.spec_scans:
+            for scan_number in scan.scan_numbers:
+                if init:
+                    init = False
+                else:
+                    scanparser = scan.get_scanparser(scan_number)
+                    assert spec_scan_shape == scanparser.spec_scan_shape
+                    ddata = scanparser.get_detector_data(detector_indices)
+                data[offset] = ddata
+                spec_scan_motor_mnes = scanparser.spec_scan_motor_mnes
+                start_dim = offset * num_dim
+                end_dim = start_dim + num_dim
+                if len(spec_scan_shape) == 1:
+                    for i, dim in enumerate(map_config.independent_dimensions):
+                        v = dim.get_value(
+                            scan, scan_number, scan_step_index=-1,
+                            relative=False)
+                        if dim.name in spec_scan_motor_mnes:
+                            independent_dimensions[i][start_dim:end_dim] = v
+                        else:
+                            independent_dimensions[i][start_dim:end_dim] = \
+                                np.repeat(v, spec_scan_shape[0])
+                    for i, dim in enumerate(map_config.all_scalar_data):
+                        v = dim.get_value(
+                            scan, scan_number, scan_step_index=-1,
+                            relative=False)
+                        #if dim.name in spec_scan_motor_mnes:
+                        if dim.data_type == 'scan_column':
+                            all_scalar_data[i][start_dim:end_dim] = v
+                        else:
+                            all_scalar_data[i][start_dim:end_dim] = \
+                                np.repeat(v, spec_scan_shape[0])
+                else:
+                    for i, dim in enumerate(map_config.independent_dimensions):
+                        v = dim.get_value(
+                            scan, scan_number, scan_step_index=-1,
+                            relative=False)
+                        if dim.name == spec_scan_motor_mnes[0]:
+                            # Fast motor
+                            independent_dimensions[i][start_dim:end_dim] = \
+                                np.concatenate((v,)*spec_scan_shape[1])
+                        elif dim.name == spec_scan_motor_mnes[1]:
+                            # Slow motor
+                            independent_dimensions[i][start_dim:end_dim] = \
+                                np.repeat(v, spec_scan_shape[0])
+                        else:
+                            independent_dimensions[i][start_dim:end_dim] = v
+                    for i, dim in enumerate(map_config.all_scalar_data):
+                        v = dim.get_value(
+                            scan, scan_number, scan_step_index=-1,
+                            relative=False)
+                        if dim.data_type == 'scan_column':
+                            all_scalar_data[i][start_dim:end_dim] = v
+                        elif dim.data_type == 'smb_par':
+                            if dim.name == spec_scan_motor_mnes[0]:
+                                # Fast motor
+                                all_scalar_data[i][start_dim:end_dim] = \
+                                     np.concatenate((v,)*spec_scan_shape[1])
+                            elif dim.name == spec_scan_motor_mnes[1]:
+                                # Slow motor
+                                all_scalar_data[i][start_dim:end_dim] = \
+                                     np.repeat(v, spec_scan_shape[0])
+                            else:
+                                all_scalar_data[i][start_dim:end_dim] = v
+                        else:
+                            raise RuntimeError(
+                                f'{dim.data_type} in data_type not tested')
+                offset += 1
+        return (
+            data.reshape((np.prod(data.shape[:2]), *data.shape[2:])),
+            independent_dimensions, all_scalar_data)
+    def _read_raw_data(
+            self, map_config, detector_names, comm, num_scan, offset):
+        """Read the raw data for a given map configuration.
+        :param map_config: A valid map configuration.
+        :type map_config: common.models.map.MapConfig
+        :param detector_names: Detector names to include raw data
+            for in the returned NeXus NXentry object,
+            defaults to `None`.
+        :type detector_names: list[str]
+        :return: The map's raw data, independent dimensions and scalar
+            data
+        :rtype: numpy.ndarray, numpy.ndarray, numpy.ndarray
+        """
+        # Third party modules
+        try:
+            from mpi4py import MPI
+            from mpi4py.util import dtlib
+        except:
+            pass
+        # Local modules
+        from CHAP.utils.general import list_to_string
+        if comm is None:
+            num_proc = 1
+            rank = 0
+        else:
+            num_proc = comm.Get_size()
+            rank = comm.Get_rank()
+        if not rank:
+            self.logger.debug(f'Number of processors: {num_proc}')
+            self.logger.debug(f'Number of scans: {num_scan}')
+        # Create the shared data buffers
+        # FIX: just one spec scan and one detector at this point
+        assert len(map_config.spec_scans) == 1
+        assert len(detector_names) == 1
+        scans = map_config.spec_scans[0]
+        scan_numbers = scans.scan_numbers
+        scanparser = scans.get_scanparser(scan_numbers[0])
+        ddata = scanparser.get_detector_data(detector_names[0])
+        num_dim = ddata.shape[0]
+        num_id = len(map_config.independent_dimensions)
+        num_sd = len(map_config.all_scalar_data)
+        if not num_sd:
+            all_scalar_data = None
+        if num_proc == 1:
+            assert num_scan == len(scan_numbers)
+            data = np.empty((num_scan, *ddata.shape), dtype=ddata.dtype)
+            independent_dimensions = np.empty(
+                (num_scan, num_id, num_dim), dtype=np.float64)
+            if num_sd:
+                all_scalar_data = np.empty(
+                    (num_scan, num_sd, num_dim), dtype=np.float64)
+        else:
+            self.logger.debug(f'Scan offset on processor {rank}: {offset}')
+            self.logger.debug(f'Scan numbers on processor {rank}: '
+                              f'{list_to_string(scan_numbers)}')
+            datatype = dtlib.from_numpy_dtype(ddata.dtype)
+            itemsize = datatype.Get_size()
+            if not rank:
+                nbytes = num_scan * np.prod(ddata.shape) * itemsize
+            else:
+                nbytes = 0
+            win = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm)
+            buf, _ = win.Shared_query(0)
+            data = np.ndarray(
+                buffer=buf, dtype=ddata.dtype, shape=(num_scan, *ddata.shape))
+            datatype = dtlib.from_numpy_dtype(np.float64)
+            itemsize = datatype.Get_size()
+            if not rank:
+                nbytes = num_scan * num_id * num_dim * itemsize
+            else:
+                nbytes = 0
+            win_id = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm)
+            buf_id, _ = win_id.Shared_query(0)
+            independent_dimensions = np.ndarray(
+                buffer=buf_id, dtype=np.float64,
+                shape=(num_scan, num_id, num_dim))
+            if num_sd:
+                if not rank:
+                    nbytes = num_scan * num_sd * num_dim * itemsize
+                win_sd = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm)
+                buf_sd, _ = win_sd.Shared_query(0)
+                all_scalar_data = np.ndarray(
+                    buffer=buf_sd, dtype=np.float64,
+                    shape=(num_scan, num_sd, num_dim))
+        # Read the raw data
+        init = True
+        for scans in map_config.spec_scans:
+            for scan_number in scans.scan_numbers:
+                if init:
+                    init = False
+                else:
+                    scanparser = scans.get_scanparser(scan_number)
+                    ddata = scanparser.get_detector_data(detector_names[0])
+                data[offset] = ddata
+                for i, dim in enumerate(map_config.independent_dimensions):
+                    if dim.data_type == 'scan_column':
+                        independent_dimensions[offset,i] = dim.get_value(
+                        #v = dim.get_value(
+                            scans, scan_number, scan_step_index=-1,
+                            relative=False)[:num_dim]
+                        #print(f'\ndim: {dim}\nv {np.asarray(v).shape}: {v}')
+                        #independent_dimensions[offset,i] = v[:num_dim]
+                    elif dim.data_type in ['smb_par', 'spec_motor']:
+                        independent_dimensions[offset,i] = dim.get_value(
+                        #v = dim.get_value(
+                            scans, scan_number, scan_step_index=-1,
+                            relative=False)
+                        #print(f'\ndim: {dim}\nv {np.asarray(v).shape}: {v}')
+                        #independent_dimensions[offset,i] = v
+                    else:
+                        raise RuntimeError(
+                            f'{dim.data_type} in data_type not tested')
+                for i, dim in enumerate(map_config.all_scalar_data):
+                    all_scalar_data[offset,i] = dim.get_value(
+                        scans, scan_number, scan_step_index=-1,
+                        relative=False)
+                offset += 1
+        if num_sd:
+            return (
+                data.reshape((1, np.prod(data.shape[:2]), *data.shape[2:])),
+                np.stack(tuple([independent_dimensions[:,i].flatten()
+                                for i in range(num_id)])),
+                np.stack(tuple([all_scalar_data[:,i].flatten()
+                                for i in range(num_sd)])))
+        return (
+            data.reshape((1, np.prod(data.shape[:2]), *data.shape[2:])),
+            np.stack(tuple([independent_dimensions[:,i].flatten()
+                            for i in range(num_id)])),
+            all_scalar_data)
+class MPITestProcessor(Processor):
+    """A test MPI Processor.
+    """
+    def process(self, data, sub_pipeline={}):
+        # Third party modules
+        import mpi4py as mpi4py
+        from mpi4py import MPI
+        my_rank = MPI.COMM_WORLD.Get_rank()
+        size = MPI.COMM_WORLD.Get_size()
+        (version, subversion) = MPI.Get_version()
+        mpi4py_version = mpi4py.__version__
+        if (my_rank == 0):
+            if (size > 1):
+                print('Successful first MPI test executed in parallel on '
+                      f'{size} processes using mpi4py version '
+                      f'{mpi4py_version}.')
+                if int(mpi4py_version[0]) < 3:
+                    print('CAUTION: You are using an mpi4py version '
+                          'below 3.0.0.')
+            else:
+                print('CAUTION: This MPI test is executed only on one MPI '
+                      'process, i.e., sequentially!')
+            print('Your installation supports MPI standard version '
+                  f'{version}.{subversion}.')
+        print(f'Finished on processor {my_rank} of {size}')
+class MPICollectProcessor(Processor):
+    """A Processor that collects the distributed worker data from
+    MPIMapProcessor on the root node
+    """
+    def process(self, data, comm, root_as_worker=True):
+        # Third party modules
+        from mpi4py import MPI
+        num_proc = comm.Get_size()
+        rank = comm.Get_rank()
+        if root_as_worker:
+            data = self.unwrap_pipelinedata(data)[-1]
+            if num_proc > 1:
+                data = comm.gather(data, root=0)
+        else:
+            for n_worker in range(1, num_proc):
+                if rank == n_worker:
+                    comm.send(self.unwrap_pipelinedata(data)[-1], dest=0)
+                    data = None
+                elif not rank:
+                    if n_worker == 1:
+                        data = [comm.recv(source=n_worker)]
+                    else:
+                        data.append(comm.recv(source=n_worker))
+        return data
+class MPIMapProcessor(Processor):
+    """A Processor that applies a parallel generic sub-pipeline to
+    a map configuration.
+    """
+    def process(self, data, sub_pipeline={}):
+        # System modules
+        from copy import deepcopy
+        # Third party modules
+        from mpi4py import MPI
+        # Local modules
+        from CHAP.runner import (
+            RunConfig,
+            run,
+        )
+        from CHAP.common.models.map import (
+            SpecScans,
+            SpecConfig,
+        )
+        comm = MPI.COMM_WORLD
+        num_proc = comm.Get_size()
+        rank = comm.Get_rank()
+        # Get the map configuration from data
+        map_config = self.get_config(
+            data, 'common.models.map.MapConfig')
+        # Create the spec reader configuration for each processor
+        spec_scans = map_config.spec_scans[0]
+        scan_numbers = spec_scans.scan_numbers
+        num_scan = len(scan_numbers)
+        scans_per_proc = num_scan//num_proc
+        n_scan = 0
+        for n_proc in range(num_proc):
+            num = scans_per_proc
+            if n_proc == rank:
+                if rank < num_scan - scans_per_proc*num_proc:
+                    num += 1
+                scan_numbers = scan_numbers[n_scan:n_scan+num]
+            n_scan += num
+        spec_config = {
+            'station': map_config.station,
+            'experiment_type': map_config.experiment_type,
+            'spec_scans': [SpecScans(
+                spec_file=spec_scans.spec_file, scan_numbers=scan_numbers)]}
+        # Get the run configuration to use for the sub-pipeline
+        run_config = RunConfig(sub_pipeline.get('config', {}), comm)
+        pipeline_config = []
+        for item in sub_pipeline['pipeline']:
+            if isinstance(item, dict):
+                for k, v in deepcopy(item).items():
+                    if k.endswith('Reader'):
+                        v['config'] = spec_config
+                        item[k] = v
+                    if num_proc > 1 and k.endswith('Writer'):
+                        r, e = os.path.splitext(v['filename'])
+                        v['filename'] = f'{r}_{rank}{e}'
+                        item[k] = v
+            pipeline_config.append(item)
+        # Run the sub-pipeline on each processor
+        return run(
+            pipeline_config, inputdir=run_config.inputdir,
+            outputdir=run_config.outputdir,
+            interactive=run_config.interactive, comm=comm)
+class MPISpawnMapProcessor(Processor):
+    """A Processor that applies a parallel generic sub-pipeline to
+    a map configuration by spawning workers processes.
+    """
+    def process(
+            self, data, num_proc=1, root_as_worker=True, collect_on_root=True,
+            sub_pipeline={}):
+        # System modules
+        from copy import deepcopy
+        from tempfile import NamedTemporaryFile
+        # Third party modules
+        try:
+            from mpi4py import MPI
+        except:
+            raise ImportError('Unable to import mpi4py')
+        import yaml
+        # Local modules
+        from CHAP.runner import (
+            RunConfig,
+            runner,
+        )
+        from CHAP.common.models.map import (
+            SpecScans,
+            SpecConfig,
+        )
+        # Get the map configuration from data
+        map_config = self.get_config(
+            data, 'common.models.map.MapConfig')
+        # Get the run configuration to use for the sub-pipeline
+        run_config = RunConfig(config=sub_pipeline.get('config', {}))
+        # Create the sub-pipeline configuration for each processor
+        spec_scans = map_config.spec_scans[0]
+        scan_numbers = spec_scans.scan_numbers
+        num_scan = len(scan_numbers)
+        scans_per_proc = num_scan//num_proc
+        n_scan = 0
+        pipeline_config = []
+        for n_proc in range(num_proc):
+            num = scans_per_proc
+            if n_proc < num_scan - scans_per_proc*num_proc:
+                num += 1
+            spec_config = {
+                'station': map_config.station,
+                'experiment_type': map_config.experiment_type,
+                'spec_scans': [SpecScans(
+                    spec_file=spec_scans.spec_file,
+                    scan_numbers=scan_numbers[n_scan:n_scan+num]).__dict__]}
+            sub_pipeline_config = []
+            for item in deepcopy(sub_pipeline['pipeline']):
+                if isinstance(item, dict):
+                    for k, v in deepcopy(item).items():
+                        if k.endswith('Reader'):
+                            v['config'] = spec_config
+                            item[k] = v
+                    if num_proc > 1 and k.endswith('Writer'):
+                        r, e = os.path.splitext(v['filename'])
+                        v['filename'] = f'{r}_{n_proc}{e}'
+                        item[k] = v
+                sub_pipeline_config.append(item)
+            if collect_on_root and (not root_as_worker or num_proc > 1):
+                sub_pipeline_config += [
+                    {'common.MPICollectProcessor': {
+                        'root_as_worker': root_as_worker}}]
+            pipeline_config.append(sub_pipeline_config)
+            n_scan += num
+        # Optionally include the root node as a worker node
+        if root_as_worker:
+            first_proc = 1
+            run_config.spawn = 1
+        else:
+            first_proc = 0
+            run_config.spawn = -1
+        # Spawn the workers to run the sub-pipeline
+        if num_proc > first_proc:
+            tmp_names = []
+            with NamedTemporaryFile(delete=False) as fp:
+                fp_name = fp.name
+                tmp_names.append(fp_name)
+                with open(fp_name, 'w') as f:
+                    yaml.dump(
+                        {'config': {'spawn': run_config.spawn}}, f,
+                        sort_keys=False)
+                for n_proc in range(first_proc, num_proc):
+                    f_name = f'{fp_name}_{n_proc}'
+                    tmp_names.append(f_name)
+                    with open(f_name, 'w') as f:
+                        yaml.dump(
+                            {'config': run_config.__dict__,
+                             'pipeline': pipeline_config[n_proc]},
+                            f, sort_keys=False)
+                sub_comm = MPI.COMM_SELF.Spawn(
+                    'CHAP', args=[fp_name], maxprocs=num_proc-first_proc)
+                common_comm = sub_comm.Merge(False)
+                if run_config.spawn > 0:
+                    # Align with the barrier in RunConfig() on common_comm
+                    # called from the spawned main()
+                    common_comm.barrier()
+        else:
+            common_comm = None
+        # Run the sub-pipeline on the root node
+        if root_as_worker:
+            data = runner(run_config, pipeline_config[0], common_comm)
+        elif collect_on_root:
+            run_config.spawn = 0
+            pipeline_config = [{'common.MPICollectProcessor': {
+                 'root_as_worker': root_as_worker}}]
+            data = runner(run_config, pipeline_config, common_comm)
+        else:
+            # Align with the barrier in run() on common_comm
+            # called from the spawned main()
+            common_comm.barrier()
+            data = None
+        # Disconnect spawned workers and cleanup temporary files
+        if num_proc > first_proc:
+            common_comm.barrier()
+            sub_comm.Disconnect()
+            for tmp_name in tmp_names:
+                os.remove(tmp_name)
+        return data
 class NexusToNumpyProcessor(Processor):
     """A Processor to convert the default plottable data in a NeXus
@@ -1162,7 +2207,7 @@ class PrintProcessor(Processor):
         """
         print(f'{self.__name__} data :')
         if callable(getattr(data, '_str_tree', None)):
-            # If data is likely an NXobject, print its tree
+            # If data is likely a NeXus NXobject, print its tree
             # representation (since NXobjects' str representations are
             # just their nxname)
             print(data._str_tree(attrs=True, recursive=True))
@@ -1172,6 +2217,67 @@ class PrintProcessor(Processor):
         return data
+class PyfaiAzimuthalIntegrationProcessor(Processor):
+    """Processor to azimuthally integrate one or more frames of 2d
+    detector data using the
+    [pyFAI](https://pyfai.readthedocs.io/en/v2023.1/index.html)
+    package.
+    """
+    def process(self, data, poni_file, npt, mask_file=None,
+                integrate1d_kwargs=None, inputdir='.'):
+        """Azimuthally integrate the detector data provided and return
+        the result as a dictionary of numpy arrays containing the
+        values of the radial coordinate of the result, the intensities
+        along the radial direction, and the poisson errors for each
+        intensity spectrum.
+        :param data: Detector data to integrate.
+        :type data: Union[PipelineData, list[np.ndarray]]
+        :param poni_file: Name of the [pyFAI PONI
+            file](https://pyfai.readthedocs.io/en/v2023.1/glossary.html?highlight=poni%20file#poni-file)
+        containing the detector properties pyFAI needs to perform
+        azimuthal integration.
+        :type poni_file: str
+        :param npt: Number of points in the output pattern.
+        :type npt: int
+        :param mask_file: A file to use for masking the input data.
+        :type: str
+        :param integrate1d_kwargs: Optional dictionary of keyword
+            arguments to use with
+            [`pyFAI.azimuthalIntegrator.AzimuthalIntegrator.integrate1d`](https://pyfai.readthedocs.io/en/v2023.1/api/pyFAI.html#pyFAI.azimuthalIntegrator.AzimuthalIntegrator.integrate1d). Defaults
+            to `None`.
+        :type integrate1d_kwargs: Optional[dict]
+        :returns: Azimuthal integration results as a dictionary of
+            numpy arrays.
+        """
+        # Third party modules
+        from pyFAI import load
+        if not os.path.isabs(poni_file):
+            poni_file = os.path.join(inputdir, poni_file)
+        ai = load(poni_file)
+        if mask_file is None:
+            mask = None
+        else:
+            # Third party modules
+            import fabio
+            if not os.path.isabs(mask_file):
+                mask_file = os.path.join(inputdir, mask_file)
+            mask = fabio.open(mask_file).data
+        try:
+            det_data = self.unwrap_pipelinedata(data)[0]
+        except:
+            det_data = det_data
+        if integrate1d_kwargs is None:
+            integrate1d_kwargs = {}
+        integrate1d_kwargs['mask'] = mask
+        return [ai.integrate1d(d, npt, **integrate1d_kwargs) for d in det_data]
 class RawDetectorDataMapProcessor(Processor):
     """A Processor to return a map of raw derector data in a
     NeXus NXroot object.
@@ -1200,13 +2306,14 @@ class RawDetectorDataMapProcessor(Processor):
         `Processor`.
         :param data: Result of `Reader.read` where at least one item
-            has the value `'MapConfig'` for the `'schema'` key.
+            has the value `'common.models.map.MapConfig'` for the
+            `'schema'` key.
         :type data: list[PipelineData]
         :raises Exception: If a valid map config object cannot be
             constructed from `data`.
         :return: A valid instance of the map configuration object with
             field values taken from `data`.
-        :rtype: MapConfig
+        :rtype: common.models.map.MapConfig
         """
         # Local modules
         from CHAP.common.models.map import MapConfig
@@ -1216,7 +2323,7 @@ class RawDetectorDataMapProcessor(Processor):
             for item in data:
                 if isinstance(item, dict):
                     schema = item.get('schema')
-                    if schema == 'MapConfig':
+                    if schema == 'common.models.map.MapConfig':
                         map_config = item.get('data')
         if not map_config:
@@ -1230,7 +2337,7 @@ class RawDetectorDataMapProcessor(Processor):
         relevant metadata in the form of a NeXus structure.
         :param map_config: The map configuration.
-        :type map_config: MapConfig
+        :type map_config: common.models.map.MapConfig
         :param detector_name: The detector prefix.
         :type detector_name: str
         :param detector_shape: The shape of detector data for a single
@@ -1344,6 +2451,504 @@ class StrainAnalysisProcessor(Processor):
         return strain_analysis_config
+class SetupNXdataProcessor(Processor):
+    """Processor to set up and return an "empty" NeXus representation
+    of a structured dataset. This representation will be an instance
+    of a NeXus NXdata object that has:
+    1. A NeXus NXfield entry for every coordinate/signal specified.
+    1. `nxaxes` that are the NeXus NXfield entries for the coordinates
+       and contain the values provided for each coordinate.
+    1. NeXus NXfield entries of appropriate shape, but containing all
+       zeros, for every signal.
+    1. Attributes that define the axes, plus any additional attributes
+       specified by the user.
+    This `Processor` is most useful as a "setup" step for
+    constucting a representation of / container for a complete dataset
+    that will be filled out in pieces later by
+    `UpdateNXdataProcessor`.
+    Examples of use in a `Pipeline` configuration:
+    - With inputs from a previous `PipelineItem` specifically written
+      to provide inputs to this `Processor`:
+      ```yaml
+      config:
+        inputdir: /rawdata/samplename
+        outputdir: /reduceddata/samplename
+      pipeline:
+        - edd.SetupNXdataReader:
+            filename: SpecInput.txt
+            dataset_id: 1
+        - common.SetupNXdataProcessor:
+            nxname: samplename_dataset_1
+        - common.NexusWriter:
+            filename: data.nxs
+      ```
+     - With inputs provided directly though the optional arguments:
+       ```yaml
+      config:
+        outputdir: /reduceddata/samplename
+      pipeline:
+        - common.SetupNXdataProcessor:
+            nxname: your_dataset_name
+            coords:
+              - name: x
+                values: [0.0, 0.5, 1.0]
+                attrs:
+                  units: mm
+                  yourkey: yourvalue
+              - name: temperature
+                values: [200, 250, 275]
+                attrs:
+                  units: Celsius
+                  yourotherkey: yourothervalue
+            signals:
+              - name: raw_detector_data
+                shape: [407, 487]
+                attrs:
+                  local_name: PIL11
+                  foo: bar
+              - name: presample_intensity
+                shape: []
+                attrs:
+                   local_name: a3ic0
+                   zebra: fish
+            attrs:
+              arbitrary: metadata
+              from: users
+              goes: here
+        - common.NexusWriter:
+            filename: data.nxs
+       ```
+    """
+    def process(self, data, nxname='data',
+                coords=[], signals=[], attrs={}, data_points=[],
+                extra_nxfields=[], duplicates='overwrite'):
+        """Return a NeXus NXdata object that has the requisite axes
+        and NeXus NXfield entries to represent a structured dataset
+        with the properties provided. Properties may be provided either
+        through the `data` argument (from an appropriate `PipelineItem`
+        that immediately preceeds this one in a `Pipeline`), or through
+        the `coords`, `signals`, `attrs`, and/or `data_points`
+        arguments. If any of the latter are used, their values will
+        completely override any values for these parameters found from
+        `data.`
+        :param data: Data from the previous item in a `Pipeline`.
+        :type data: list[PipelineData]
+        :param nxname: Name for the returned NeXus NXdata object.
+            Defaults to `'data'`.
+        :type nxname: str, optional
+        :param coords: List of dictionaries defining the coordinates
+            of the dataset. Each dictionary must have the keys
+            `'name'` and `'values'`, whose values are the name of the
+            coordinate axis (a string) and all the unique values of
+            that coordinate for the structured dataset (a list of
+            numbers), respectively. A third item in the dictionary is
+            optional, but highly recommended: `'attrs'` may provide a
+            dictionary of attributes to attach to the coordinate axis
+            that assist in in interpreting the returned NeXus NXdata
+            representation of the dataset. It is strongly recommended
+            to provide the units of the values along an axis in the
+            `attrs` dictionary. Defaults to [].
+        :type coords: list[dict[str, object]], optional
+        :param signals: List of dictionaries defining the signals of
+            the dataset. Each dictionary must have the keys `'name'`
+            and `'shape'`, whose values are the name of the signal
+            field (a string) and the shape of the signal's value at
+            each point in the dataset (a list of zero or more
+            integers), respectively. A third item in the dictionary is
+            optional, but highly recommended: `'attrs'` may provide a
+            dictionary of attributes to attach to the signal fieldthat
+            assist in in interpreting the returned NeXus NXdata
+            representation of the dataset. It is strongly recommended
+            to provide the units of the signal's values `attrs`
+            dictionary. Defaults to [].
+        :type signals: list[dict[str, object]], optional
+        :param attrs: An arbitrary dictionary of attributes to assign
+            to the returned NeXus NXdata object. Defaults to {}.
+        :type attrs: dict[str, object], optional
+        :param data_points: A list of data points to partially (or
+            even entirely) fil out the "empty" signal NeXus NXfield's
+            before returning the NeXus NXdata object. Defaults to [].
+        :type data_points: list[dict[str, object]], optional
+        :param extra_nxfields: List "extra" NeXus NXfield's to include that
+            can be described neither as a signal of the dataset, not a
+            dedicated coordinate. This paramteter is good for
+            including "alternate" values for one of the coordinate
+            dimensions -- the same coordinate axis expressed in
+            different units, for instance. Each item in the list
+            shoulde be a dictionary of parameters for the
+            `nexusformat.nexus.NXfield` constructor. Defaults to `[]`.
+        :type extra_nxfields: list[dict[str, object]], optional
+        :param duplicates: Behavior to use if any new data points occur
+            at the same point in the dataset's coordinate space as an
+            existing data point. Allowed values for `duplicates` are:
+            `'overwrite'` and `'block'`. Defaults to `'overwrite'`.
+        :type duplicates: Literal['overwrite', 'block']
+        :returns: A NeXus NXdata object that represents the structured
+            dataset as specified.
+        :rtype: nexusformat.nexus.NXdata
+        """
+        self.nxname = nxname
+        self.coords = coords
+        self.signals = signals
+        self.attrs = attrs
+        try:
+            setup_params = self.unwrap_pipelinedata(data)[0]
+        except:
+            setup_params = None
+        if isinstance(setup_params, dict):
+            for a in ('coords', 'signals', 'attrs'):
+                setup_param = setup_params.get(a)
+                if not getattr(self, a) and setup_param:
+                    self.logger.info(f'Using input data from pipeline for {a}')
+                    setattr(self, a, setup_param)
+                else:
+                    self.logger.info(
+                        f'Ignoring input data from pipeline for {a}')
+        else:
+            self.logger.warning('Ignoring all input data from pipeline')
+        self.shape = tuple(len(c['values']) for c in self.coords)
+        self.extra_nxfields = extra_nxfields
+        self._data_points = []
+        self.duplicates = duplicates
+        self.init_nxdata()
+        for d in data_points:
+            self.add_data_point(d)
+        return self.nxdata
+    def add_data_point(self, data_point):
+        """Add a data point to this dataset.
+        1. Validate `data_point`.
+        2. Append `data_point` to `self._data_points`.
+        3. Update signal `NXfield`s in `self.nxdata`.
+        :param data_point: Data point defining a point in the
+            dataset's coordinate space and the new signal values at
+            that point.
+        :type data_point: dict[str, object]
+        :returns: None
+        """
+        self.logger.info(f'Adding data point no. {len(self._data_points)}')
+        self.logger.debug(f'New data point: {data_point}')
+        valid, msg = self.validate_data_point(data_point)
+        if not valid:
+            self.logger.error(f'Cannot add data point: {msg}')
+        else:
+            self._data_points.append(data_point)
+            self.update_nxdata(data_point)
+    def validate_data_point(self, data_point):
+        """Return `True` if `data_point` occurs at a valid point in
+        this structured dataset's coordinate space, `False`
+        otherwise. Also validate shapes of signal values and add NaN
+        values for any missing signals.
+        :param data_point: Data point defining a point in the
+            dataset's coordinate space and the new signal values at
+            that point.
+        :type data_point: dict[str, object]
+        :returns: Validity of `data_point`, message
+        :rtype: bool, str
+        """
+        # Third party modules
+        import numpy as np
+        valid = True
+        msg = ''
+        # Convert all values to numpy types
+        data_point = {k: np.asarray(v) for k, v in data_point.items()}
+        # Ensure data_point defines a specific point in the dataset's
+        # coordinate space
+        if not all(c['name'] in data_point for c in self.coords):
+            valid = False
+            msg = 'Missing coordinate values'
+        # Find & handle any duplicates
+        for i, d in enumerate(self._data_points):
+            is_duplicate = all(data_point[c] == d[c] for c in self.coord_names)
+            if is_duplicate:
+                if self.duplicates == 'overwrite':
+                    self._data_points.pop(i)
+                elif self.duplicates == 'block':
+                    valid = False
+                    msg = 'Duplicate point will be blocked'
+        # Ensure a value is present for all signals
+        for s in self.signals:
+            if s['name'] not in data_point:
+                data_point[s['name']] = np.full(s['shape'], 0)
+            else:
+                if not data_point[s['name']].shape == tuple(s['shape']):
+                    valid = False
+                    msg = f'Shape mismatch for signal {s}'
+        return valid, msg
+    def init_nxdata(self):
+        """Initialize an empty NeXus NXdata representing this dataset
+        to `self.nxdata`; values for axes' `NXfield`s are filled out,
+        values for signals' `NXfield`s are empty an can be filled out
+        later. Save the empty NeXus NXdata object to the NeXus file.
+        Initialise `self.nxfile` and `self.nxdata_path` with the
+        `NXFile` object and actual nxpath used to save and make updates
+        to the Nexus NXdata object.
+        :returns: None
+        """
+        # Third party modules
+        from nexusformat.nexus import NXdata, NXfield
+        import numpy as np
+        axes = tuple(NXfield(
+            value=c['values'],
+            name=c['name'],
+            attrs=c.get('attrs')) for c in self.coords)
+        entries = {s['name']: NXfield(
+            value=np.full((*self.shape, *s['shape']), 0),
+            name=s['name'],
+            attrs=s.get('attrs')) for s in self.signals}
+        extra_nxfields = [NXfield(**params) for params in self.extra_nxfields]
+        extra_nxfields = {f.nxname: f for f in extra_nxfields}
+        entries.update(extra_nxfields)
+        self.nxdata = NXdata(
+            name=self.nxname, axes=axes, entries=entries, attrs=self.attrs)
+    def update_nxdata(self, data_point):
+        """Update `self.nxdata`'s NXfield values.
+        :param data_point: Data point defining a point in the
+            dataset's coordinate space and the new signal values at
+            that point.
+        :type data_point: dict[str, object]
+        :returns: None
+        """
+        index = self.get_index(data_point)
+        for s in self.signals:
+            if s['name'] in data_point:
+                self.nxdata[s['name']][index] = data_point[s['name']]
+    def get_index(self, data_point):
+        """Return a tuple representing the array index of `data_point`
+        in the coordinate space of the dataset.
+        :param data_point: Data point defining a point in the
+            dataset's coordinate space.
+        :type data_point: dict[str, object]
+        :returns: Multi-dimensional index of `data_point` in the
+            dataset's coordinate space.
+        :rtype: tuple
+        """
+        return tuple(c['values'].index(data_point[c['name']]) \
+                     for c in self.coords)
+class UpdateNXdataProcessor(Processor):
+    """Processor to fill in part(s) of a NeXus NXdata representing a
+    structured dataset that's already been written to a NeXus file.
+    This Processor is most useful as an "update" step for a NeXus
+    NXdata object created by `common.SetupNXdataProcessor`, and is
+    most easy to use in a `Pipeline` immediately after another
+    `PipelineItem` designed specifically to return a value that can
+    be used as input to this `Processor`.
+    Example of use in a `Pipeline` configuration:
+    ```yaml
+    config:
+      inputdir: /rawdata/samplename
+    pipeline:
+      - edd.UpdateNXdataReader:
+          spec_file: spec.log
+          scan_number: 1
+      - common.SetupNXdataProcessor:
+          nxfilename: /reduceddata/samplename/data.nxs
+          nxdata_path: /entry/samplename_dataset_1
+    ```
+    """
+    def process(self, data, nxfilename, nxdata_path, data_points=[],
+                allow_approximate_coordinates=True):
+        """Write new data points to the signal fields of an existing
+        NeXus NXdata object representing a structued dataset in a NeXus
+        file. Return the list of data points used to update the
+        dataset.
+        :param data: Data from the previous item in a `Pipeline`. May
+            contain a list of data points that will extend the list of
+            data points optionally provided with the `data_points`
+            argument.
+        :type data: list[PipelineData]
+        :param nxfilename: Name of the NeXus file containing the
+            NeXus NXdata object to update.
+        :type nxfilename: str
+        :param nxdata_path: The path to the NeXus NXdata object to
+            update in the file.
+        :type nxdata_path: str
+        :param data_points: List of data points, each one a dictionary
+            whose keys are the names of the coordinates and axes, and
+            whose values are the values of each coordinate / signal at
+            a single point in the dataset. Deafults to [].
+        :type data_points: list[dict[str, object]]
+        :param allow_approximate_coordinates: Parameter to allow the
+            nearest existing match for the new data points'
+            coordinates to be used if an exact match connot be found
+            (sometimes this is due simply to differences in rounding
+            convetions). Defaults to True.
+        :type allow_approximate_coordinates: bool, optional
+        :returns: Complete list of data points used to update the dataset.
+        :rtype: list[dict[str, object]]
+        """
+        # Third party modules
+        from nexusformat.nexus import NXFile
+        import numpy as np
+        _data_points = self.unwrap_pipelinedata(data)[0]
+        if isinstance(_data_points, list):
+            data_points.extend(_data_points)
+        self.logger.info(f'Updating {len(data_points)} data points')
+        nxfile = NXFile(nxfilename, 'rw')
+        nxdata = nxfile.readfile()[nxdata_path]
+        axes_names = [a.nxname for a in nxdata.nxaxes]
+        data_points_used = []
+        for i, d in enumerate(data_points):
+            # Verify that the data point contains a value for all
+            # coordinates in the dataset.
+            if not all(a in d for a in axes_names):
+                self.logger.error(
+                    f'Data point {i} is missing a value for at least one '
+                    + f'axis. Skipping. Axes are: {", ".join(axes_names)}')
+                continue
+            self.logger.info(
+                f'Coordinates for data point {i}: '
+                + ', '.join([f'{a}={d[a]}' for a in axes_names]))
+            # Get the index of the data point in the dataset based on
+            # its values for each coordinate.
+            try:
+                index = tuple(np.where(a.nxdata == d[a.nxname])[0][0] \
+                              for a in nxdata.nxaxes)
+            except:
+                if allow_approximate_coordinates:
+                    try:
+                        index = tuple(
+                            np.argmin(np.abs(a.nxdata - d[a.nxname])) \
+                            for a in nxdata.nxaxes)
+                        self.logger.warning(
+                            f'Nearest match for coordinates of data point {i}:'
+                            + ', '.join(
+                                [f'{a.nxname}={a[_i]}' \
+                                 for _i, a in zip(index, nxdata.nxaxes)]))
+                    except:
+                        self.logger.error(
+                            f'Cannot get the index of data point {i}. '
+                            + f'Skipping.')
+                        continue
+                else:
+                    self.logger.error(
+                        f'Cannot get the index of data point {i}. Skipping.')
+                    continue
+            self.logger.info(f'Index of data point {i}: {index}')
+            # Update the signals contained in this data point at the
+            # proper index in the dataset's singal `NXfield`s
+            for k, v in d.items():
+                if k in axes_names:
+                    continue
+                try:
+                    nxfile.writevalue(
+                        os.path.join(nxdata_path, k), np.asarray(v), index)
+                except Exception as e:
+                    self.logger.error(
+                        f'Error updating signal {k} for new data point '
+                        + f'{i} (dataset index {index}): {e}')
+            data_points_used.append(d)
+        nxfile.close()
+        return data_points_used
+class NXdataToDataPointsProcessor(Processor):
+    """Transform a NeXus NXdata object into a list of dictionaries.
+    Each dictionary represents a single data point in the coordinate
+    space of the dataset. The keys are the names of the signals and
+    axes in the dataset, and the values are a single scalar value (in
+    the case of axes) or the value of the signal at that point in the
+    coordinate space of the dataset (in the case of signals -- this
+    means that values for signals may be any shape, depending on the
+    shape of the signal itself).
+    Example of use in a pipeline configuration:
+    ```yaml
+    config:
+      inputdir: /reduceddata/samplename
+    - common.NXdataReader:
+        name: data
+        axes_names:
+          - x
+          - y
+        signal_name: z
+        nxfield_params:
+          - filename: data.nxs
+            nxpath: entry/data/x
+            slice_params:
+              - step: 2
+          - filename: data.nxs
+            nxpath: entry/data/y
+            slice_params:
+              - step: 2
+          - filename: data.nxs
+            nxpath: entry/data/z
+            slice_params:
+              - step: 2
+              - step: 2
+    - common.NXdataToDataPointsProcessor
+    - common.UpdateNXdataProcessor:
+        nxfilename: /reduceddata/samplename/sparsedata.nxs
+        nxdata_path: /entry/data
+    ```
+    """
+    def process(self, data):
+        """Return a list of dictionaries representing the coordinate
+        and signal values at every point in the dataset provided.
+        :param data: Input pipeline data containing a NeXus NXdata
+            object.
+        :type data: list[PipelineData]
+        :returns: List of all data points in the dataset.
+        :rtype: list[dict[str,object]]
+        """
+        # Third party modules
+        import numpy as np
+        nxdata = self.unwrap_pipelinedata(data)[0]
+        data_points = []
+        axes_names = [a.nxname for a in nxdata.nxaxes]
+        self.logger.info(f'Dataset axes: {axes_names}')
+        dataset_shape = tuple([a.size for a in nxdata.nxaxes])
+        self.logger.info(f'Dataset shape: {dataset_shape}')
+        signal_names = [k for k, v in nxdata.entries.items() \
+                        if not k in axes_names \
+                        and v.shape[:len(dataset_shape)] == dataset_shape]
+        self.logger.info(f'Dataset signals: {signal_names}')
+        other_fields = [k for k, v in nxdata.entries.items() \
+                        if not k in axes_names + signal_names]
+        if len(other_fields) > 0:
+            self.logger.warning(
+                'Ignoring the following fields that cannot be interpreted as '
+                + f'either dataset coordinates or signals: {other_fields}')
+        for i in np.ndindex(dataset_shape):
+            data_points.append({**{a: nxdata[a][_i] \
+                                   for a, _i in zip(axes_names, i)},
+                                **{s: nxdata[s].nxdata[i] \
+                                   for s in signal_names}})
+        return data_points
 class XarrayToNexusProcessor(Processor):
     """A Processor to convert the data in an `xarray` structure to a
     NeXus NXdata object.
@@ -1394,3 +2999,30 @@ if __name__ == '__main__':
     from CHAP.processor import main
     main()
+class SumProcessor(Processor):
+    """A Processor to sum the data in a NeXus NXobject, given a set of
+    nxpaths
+    """
+    def process(self, data):
+        """Return the summed data array
+        :param data:
+        :type data:
+        :return: The summed data.
+        :rtype: numpy.ndarray
+        """
+        from copy import deepcopy
+        nxentry, nxpaths = self.unwrap_pipelinedata(data)[-1]
+        if len(nxpaths) == 1:
+            return nxentry[nxpaths[0]]
+        sum_data = deepcopy(nxentry[nxpaths[0]])
+        for nxpath in nxpaths[1:]:
+            nxdata = nxentry[nxpath]
+            for entry in nxdata.entries:
+                sum_data[entry] += nxdata[entry]
+        return sum_data

ChessAnalysisPipeline 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

Potentially problematic release.

ChessAnalysisPipeline 0.0.14py3-none-any.whl → 0.0.16py3-none-any.whl