ChessAnalysisPipeline 0.0.15__py3-none-any.whl → 0.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ChessAnalysisPipeline might be problematic. Click here for more details.

CHAP/common/processor.py CHANGED
@@ -8,6 +8,9 @@ Description: Module for Processors used in multiple experiment-specific
8
8
  workflows.
9
9
  """
10
10
 
11
+ # System modules
12
+ import os
13
+
11
14
  # Third party modules
12
15
  import numpy as np
13
16
 
@@ -58,12 +61,6 @@ class AnimationProcessor(Processor):
58
61
  :return: The matplotlib animation.
59
62
  :rtype: matplotlib.animation.ArtistAnimation
60
63
  """
61
- # System modules
62
- from os.path import (
63
- isabs,
64
- join,
65
- )
66
-
67
64
  # Third party modules
68
65
  import matplotlib.animation as animation
69
66
  import matplotlib.pyplot as plt
@@ -251,15 +248,7 @@ class BinarizeProcessor(Processor):
251
248
  that of the input dataset.
252
249
  :rtype: typing.Union[numpy.ndarray, nexusformat.nexus.NXobject]
253
250
  """
254
- # System modules
255
- from os.path import join as os_join
256
- from os.path import relpath
257
-
258
- # Local modules
259
- from CHAP.utils.general import (
260
- is_int,
261
- nxcopy,
262
- )
251
+ # Third party modules
263
252
  from nexusformat.nexus import (
264
253
  NXdata,
265
254
  NXfield,
@@ -268,6 +257,12 @@ class BinarizeProcessor(Processor):
268
257
  nxsetconfig,
269
258
  )
270
259
 
260
+ # Local modules
261
+ from CHAP.utils.general import (
262
+ is_int,
263
+ nxcopy,
264
+ )
265
+
271
266
  if method not in [
272
267
  'CHAP', 'manual', 'otsu', 'yen', 'isodata', 'minimum']:
273
268
  raise ValueError(f'Invalid parameter method ({method})')
@@ -345,19 +340,21 @@ class BinarizeProcessor(Processor):
345
340
  exclude_nxpaths = []
346
341
  if nxdefault is not None:
347
342
  exclude_nxpaths.append(
348
- os_join(relpath(nxdefault.nxpath, dataset.nxpath)))
343
+ os.path.join(os.path.relpath(
344
+ nxdefault.nxpath, dataset.nxpath)))
349
345
  if remove_original_data:
350
346
  if (nxdefault is None
351
347
  or nxdefault.nxpath != nxdata.nxpath):
352
- relpath_nxdata = relpath(nxdata.nxpath, dataset.nxpath)
348
+ relpath_nxdata = os.path.relpath(
349
+ nxdata.nxpath, dataset.nxpath)
353
350
  keys = list(nxdata.keys())
354
351
  keys.remove(nxsignal.nxname)
355
352
  for axis in nxdata.axes:
356
353
  keys.remove(axis)
357
354
  if len(keys):
358
355
  raise RuntimeError('Not tested yet')
359
- exclude_nxpaths.append(os_join(
360
- relpath(nxsignal.nxpath, dataset.nxpath)))
356
+ exclude_nxpaths.append(os.path.join(
357
+ os.path.relpath(nxsignal.nxpath, dataset.nxpath)))
361
358
  elif relpath_nxdata == '.':
362
359
  exclude_nxpaths.append(nxsignal.nxname)
363
360
  if dataset.nxclass != 'NXdata':
@@ -374,11 +371,11 @@ class BinarizeProcessor(Processor):
374
371
  keys.remove(axis)
375
372
  if len(keys):
376
373
  raise RuntimeError('Not tested yet')
377
- exclude_nxpaths.append(os_join(
378
- relpath(nxsignal.nxpath, dataset.nxpath)))
374
+ exclude_nxpaths.append(os.path.join(
375
+ os.path.relpath(nxsignal.nxpath, dataset.nxpath)))
379
376
  else:
380
- exclude_nxpaths.append(os_join(
381
- relpath(nxgroup.nxpath, dataset.nxpath)))
377
+ exclude_nxpaths.append(os.path.join(
378
+ os.path.relpath(nxgroup.nxpath, dataset.nxpath)))
382
379
  nxobject = nxcopy(dataset, exclude_nxpaths=exclude_nxpaths)
383
380
 
384
381
  # Get a histogram of the data
@@ -572,7 +569,8 @@ class BinarizeProcessor(Processor):
572
569
  nxdata = nxentry[name].data
573
570
  nxentry.data = NXdata(
574
571
  NXlink(nxdata.nxsignal.nxpath),
575
- [NXlink(os_join(nxdata.nxpath, axis)) for axis in nxdata.axes])
572
+ [NXlink(os.path.join(nxdata.nxpath, axis))
573
+ for axis in nxdata.axes])
576
574
  nxentry.data.set_default()
577
575
  return nxobject
578
576
 
@@ -833,9 +831,9 @@ class ImageProcessor(Processor):
833
831
  self, data, vmin=None, vmax=None, axis=0, index=None,
834
832
  coord=None, interactive=False, save_figure=True, outputdir='.',
835
833
  filename='image.png'):
836
- """Plot and/or save an image (slice) from a NeXus NXobject object with
837
- a default data path contained in `data` and return the NeXus NXdata
838
- data object.
834
+ """Plot and/or save an image (slice) from a NeXus NXobject
835
+ object with a default data path contained in `data` and return
836
+ the NeXus NXdata data object.
839
837
 
840
838
  :param data: Input data.
841
839
  :type data: list[PipelineData]
@@ -867,12 +865,6 @@ class ImageProcessor(Processor):
867
865
  :return: The input data object.
868
866
  :rtype: nexusformat.nexus.NXdata
869
867
  """
870
- # System modules
871
- from os.path import (
872
- isabs,
873
- join,
874
- )
875
-
876
868
  # Third party modules
877
869
  import matplotlib.pyplot as plt
878
870
 
@@ -888,8 +880,8 @@ class ImageProcessor(Processor):
888
880
  raise ValueError(f'Invalid parameter outputdir ({outputdir})')
889
881
  if not isinstance(filename, str):
890
882
  raise ValueError(f'Invalid parameter filename ({filename})')
891
- if not isabs(filename):
892
- filename = join(outputdir, filename)
883
+ if not os.path.isabs(filename):
884
+ filename = os.path.join(outputdir, filename)
893
885
 
894
886
  # Get the default Nexus NXdata object
895
887
  data = self.unwrap_pipelinedata(data)[0]
@@ -1045,8 +1037,9 @@ class IntegrateMapProcessor(Processor):
1045
1037
  containing a map of the integrated detector data requested.
1046
1038
 
1047
1039
  :param data: Input data, containing at least one item
1048
- with the value `'MapConfig'` for the `'schema'` key, and at
1049
- least one item with the value `'IntegrationConfig'` for the
1040
+ with the value `'common.models.map.MapConfig'` for the
1041
+ `'schema'` key, and at least one item with the value
1042
+ `'common.models.integration.IntegrationConfig'` for the
1050
1043
  `'schema'` key.
1051
1044
  :type data: list[PipelineData]
1052
1045
  :return: Integrated data and process metadata.
@@ -1064,10 +1057,11 @@ class IntegrateMapProcessor(Processor):
1064
1057
  """Use a `MapConfig` and `IntegrationConfig` to construct a
1065
1058
  NeXus NXprocess object.
1066
1059
 
1067
- :param map_config: A valid map configuration.
1068
- :type map_config: MapConfig
1069
- :param integration_config: A valid integration configuration
1070
- :type integration_config: IntegrationConfig.
1060
+ :param map_config: A valid map configuration..
1061
+ :type map_config: common.models.map.MapConfig
1062
+ :param integration_config: A valid integration configuration.
1063
+ :type integration_config:
1064
+ common.models.integration.IntegrationConfig
1071
1065
  :return: The integrated detector data and metadata.
1072
1066
  :rtype: nexusformat.nexus.NXprocess
1073
1067
  """
@@ -1120,7 +1114,7 @@ class IntegrateMapProcessor(Processor):
1120
1114
  *map_config.dims,
1121
1115
  *integration_config.integrated_data_dims
1122
1116
  )
1123
- for i, dim in enumerate(map_config.independent_dimensions[::-1]):
1117
+ for i, dim in enumerate(map_config.independent_dimensions):
1124
1118
  nxprocess.data[dim.label] = NXfield(
1125
1119
  value=map_config.coords[dim.label],
1126
1120
  units=dim.units,
@@ -1150,7 +1144,7 @@ class IntegrateMapProcessor(Processor):
1150
1144
  value=np.empty(
1151
1145
  (*tuple(
1152
1146
  [len(coord_values) for coord_name, coord_values
1153
- in map_config.coords.items()][::-1]),
1147
+ in map_config.coords.items()]),
1154
1148
  *integration_config.integrated_data_shape)),
1155
1149
  units='a.u',
1156
1150
  attrs={'long_name':'Intensity (a.u)'})
@@ -1207,49 +1201,256 @@ class MapProcessor(Processor):
1207
1201
  NXentry object representing that map's metadata and any
1208
1202
  scalar-valued raw data requested by the supplied map configuration.
1209
1203
  """
1210
- def process(self, data, detector_names=[]):
1204
+ def process(
1205
+ self, data, config=None, detector_names=None, num_proc=1,
1206
+ comm=None, inputdir=None):
1211
1207
  """Process the output of a `Reader` that contains a map
1212
1208
  configuration and returns a NeXus NXentry object representing
1213
1209
  the map.
1214
1210
 
1215
1211
  :param data: Result of `Reader.read` where at least one item
1216
- has the value `'MapConfig'` for the `'schema'` key.
1212
+ has the value `'common.models.map.MapConfig'` for the
1213
+ `'schema'` key.
1217
1214
  :type data: list[PipelineData]
1218
- :param detector_names: Detector prefixes to include raw data
1219
- for in the returned NeXus NXentry object, defaults to `[]`.
1220
- :type detector_names: list[str], optional
1215
+ :param config: Initialization parameters for an instance of
1216
+ common.models.map.MapConfig, defaults to `None`.
1217
+ :type config: dict, optional
1218
+ :param detector_names: Detector names/prefixes to include raw
1219
+ data for in the returned NeXus NXentry object,
1220
+ defaults to `None`.
1221
+ :type detector_names: Union(int, str, list[int], list[str]),
1222
+ optional
1223
+ :param num_proc: Number of processors used to read map,
1224
+ defaults to `1`.
1225
+ :type num_proc: int, optional
1221
1226
  :return: Map data and metadata.
1222
1227
  :rtype: nexusformat.nexus.NXentry
1223
1228
  """
1229
+ # System modules
1230
+ from copy import deepcopy
1231
+ import logging
1232
+ from tempfile import NamedTemporaryFile
1233
+
1234
+ # Third party modules
1235
+ import yaml
1236
+
1224
1237
  # Local modules
1225
- from CHAP.utils.general import string_to_list
1226
- if isinstance(detector_names, str):
1238
+ from CHAP.runner import (
1239
+ RunConfig,
1240
+ runner,
1241
+ )
1242
+ from CHAP.utils.general import (
1243
+ is_str_series,
1244
+ string_to_list,
1245
+ )
1246
+
1247
+ # Get the validated map configuration
1248
+ try:
1249
+ map_config = self.get_config(
1250
+ data, 'common.models.map.MapConfig', inputdir=inputdir)
1251
+ except Exception as data_exc:
1252
+ self.logger.info('No valid Map configuration in input pipeline '
1253
+ 'data, using config parameter instead.')
1227
1254
  try:
1228
- detector_names = [
1229
- str(v) for v in string_to_list(
1230
- detector_names, raise_error=True)]
1255
+ # Local modules
1256
+ from CHAP.common.models.map import MapConfig
1257
+
1258
+ map_config = MapConfig(**config, inputdir=inputdir)
1259
+ except Exception as dict_exc:
1260
+ raise RuntimeError from dict_exc
1261
+
1262
+ # Validate the number of processors
1263
+ if not isinstance(num_proc, int):
1264
+ self.logger.warning('Ignoring invalid parameter num_proc '
1265
+ f'({num_proc}), running serially')
1266
+ num_proc = 1
1267
+ elif num_proc > 1:
1268
+ try:
1269
+ # System modules
1270
+ from os import cpu_count
1271
+
1272
+ # Third party modules
1273
+ from mpi4py import MPI
1274
+
1275
+ if num_proc > cpu_count():
1276
+ self.logger.warning(
1277
+ f'The requested number of processors ({num_proc}) '
1278
+ 'exceeds the maximum number of processors '
1279
+ f'({cpu_count()}): reset it to {cpu_count()}')
1280
+ num_proc = cpu_count()
1231
1281
  except:
1282
+ self.logger.warning('Unable to load mpi4py, running serially')
1283
+ num_proc = 1
1284
+
1285
+ # Validate the detector names/prefixes
1286
+ if map_config.experiment_type == 'EDD':
1287
+ if detector_names is None:
1288
+ detector_indices = None
1289
+ else:
1290
+ # Local modules
1291
+ from CHAP.utils.general import is_str_series
1292
+
1293
+ if isinstance(detector_names, int):
1294
+ detector_names = [str(detector_names)]
1295
+ elif isinstance(detector_names, str):
1296
+ try:
1297
+ detector_names = [
1298
+ str(v) for v in string_to_list(
1299
+ detector_names, raise_error=True)]
1300
+ except:
1301
+ raise ValueError('Invalid parameter detector_names '
1302
+ f'({detector_names})')
1303
+ else:
1304
+ detector_names = [str(v) for v in detector_names]
1305
+ detector_indices = [int(name) for name in detector_names]
1306
+ else:
1307
+ if detector_names is None:
1308
+ raise ValueError(
1309
+ 'Missing "detector_names" parameter')
1310
+ if isinstance(detector_names, str):
1311
+ detector_names = [detector_names]
1312
+ if not is_str_series(detector_names, log=False):
1232
1313
  raise ValueError(
1233
- f'Invalid parameter detector_names ({detector_names})')
1234
- map_config = self.get_config(data, 'common.models.map.MapConfig')
1235
- nxentry = self.__class__.get_nxentry(map_config, detector_names)
1314
+ f'Invalid "detector_names" parameter ({detector_names})')
1315
+
1316
+ # Create the sub-pipeline configuration for each processor
1317
+ # FIX: catered to EDD with one spec scan
1318
+ assert len(map_config.spec_scans) == 1
1319
+ spec_scans = map_config.spec_scans[0]
1320
+ scan_numbers = spec_scans.scan_numbers
1321
+ num_scan = len(scan_numbers)
1322
+ if num_scan < num_proc:
1323
+ self.logger.warning(
1324
+ f'The requested number of processors ({num_proc}) exceeds '
1325
+ f'the number of scans ({num_scan}): reset it to {num_scan}')
1326
+ num_proc = num_scan
1327
+ if num_proc == 1:
1328
+ common_comm = comm
1329
+ offsets = [0]
1330
+ else:
1331
+ scans_per_proc = num_scan//num_proc
1332
+ num = scans_per_proc
1333
+ if num_scan - scans_per_proc*num_proc > 0:
1334
+ num += 1
1335
+ spec_scans.scan_numbers = scan_numbers[:num]
1336
+ n_scan = num
1337
+ pipeline_config = []
1338
+ offsets = [0]
1339
+ for n_proc in range(1, num_proc):
1340
+ num = scans_per_proc
1341
+ if n_proc < num_scan - scans_per_proc*num_proc:
1342
+ num += 1
1343
+ config = deepcopy(map_config.dict())
1344
+ config['spec_scans'][0]['scan_numbers'] = \
1345
+ scan_numbers[n_scan:n_scan+num]
1346
+ pipeline_config.append(
1347
+ [{'common.MapProcessor': {
1348
+ 'config': config, 'detector_names': detector_names}}])
1349
+ offsets.append(n_scan)
1350
+ n_scan += num
1351
+
1352
+ # Spawn the workers to run the sub-pipeline
1353
+ run_config = RunConfig(
1354
+ config={'log_level': logging.getLevelName(self.logger.level),
1355
+ 'spawn': 1})
1356
+ tmp_names = []
1357
+ with NamedTemporaryFile(delete=False) as fp:
1358
+ fp_name = fp.name
1359
+ tmp_names.append(fp_name)
1360
+ with open(fp_name, 'w') as f:
1361
+ yaml.dump({'config': {'spawn': 1}}, f, sort_keys=False)
1362
+ for n_proc in range(1, num_proc):
1363
+ f_name = f'{fp_name}_{n_proc}'
1364
+ tmp_names.append(f_name)
1365
+ with open(f_name, 'w') as f:
1366
+ yaml.dump(
1367
+ {'config': run_config.__dict__,
1368
+ 'pipeline': pipeline_config[n_proc-1]},
1369
+ f, sort_keys=False)
1370
+ sub_comm = MPI.COMM_SELF.Spawn(
1371
+ 'CHAP', args=[fp_name], maxprocs=num_proc-1)
1372
+ common_comm = sub_comm.Merge(False)
1373
+ # Align with the barrier in RunConfig() on common_comm
1374
+ # called from the spawned main()
1375
+ common_comm.barrier()
1376
+ # Align with the barrier in run() on common_comm
1377
+ # called from the spawned main()
1378
+ common_comm.barrier()
1379
+
1380
+ if common_comm is None:
1381
+ num_proc = 1
1382
+ rank = 0
1383
+ else:
1384
+ num_proc = common_comm.Get_size()
1385
+ rank = common_comm.Get_rank()
1386
+ if num_proc == 1:
1387
+ offset = 0
1388
+ else:
1389
+ num_scan = common_comm.bcast(num_scan, root=0)
1390
+ offset = common_comm.scatter(offsets, root=0)
1391
+
1392
+ # Read the raw data
1393
+ if map_config.experiment_type == 'EDD':
1394
+ data, independent_dimensions, all_scalar_data = \
1395
+ self._read_raw_data_edd(
1396
+ map_config, detector_indices, common_comm, num_scan,
1397
+ offset)
1398
+ else:
1399
+ data, independent_dimensions, all_scalar_data = \
1400
+ self._read_raw_data(
1401
+ map_config, detector_names, common_comm, num_scan, offset)
1402
+ if not rank:
1403
+ self.logger.debug(f'Data shape: {data.shape}')
1404
+ if independent_dimensions is not None:
1405
+ self.logger.debug('Independent dimensions shape: '
1406
+ f'{independent_dimensions.shape}')
1407
+ if all_scalar_data is not None:
1408
+ self.logger.debug('Scalar data shape: '
1409
+ f'{all_scalar_data.shape}')
1410
+
1411
+ if rank:
1412
+ return None
1413
+
1414
+ if num_proc > 1:
1415
+ # Reset the scan_numbers to the original full set
1416
+ spec_scans.scan_numbers = scan_numbers
1417
+ # Disconnect spawned workers and cleanup temporary files
1418
+ common_comm.barrier()
1419
+ sub_comm.Disconnect()
1420
+ for tmp_name in tmp_names:
1421
+ os.remove(tmp_name)
1422
+
1423
+ # Construct the NeXus NXentry object
1424
+ nxentry = self._get_nxentry(
1425
+ map_config, detector_names, data, independent_dimensions,
1426
+ all_scalar_data)
1236
1427
 
1237
1428
  return nxentry
1238
1429
 
1239
- @staticmethod
1240
- def get_nxentry(map_config, detector_names=[]):
1430
+ def _get_nxentry(
1431
+ self, map_config, detector_names, data, independent_dimensions,
1432
+ all_scalar_data):
1241
1433
  """Use a `MapConfig` to construct a NeXus NXentry object.
1242
1434
 
1243
1435
  :param map_config: A valid map configuration.
1244
- :type map_config: MapConfig
1245
- :param detector_names: Detector prefixes to include raw data
1246
- for in the returned NeXus NXentry object.
1436
+ :type map_config: common.models.map.MapConfig
1437
+ :param detector_names: Detector names to include raw data
1438
+ for in the returned NeXus NXentry object,
1439
+ defaults to `None`.
1247
1440
  :type detector_names: list[str]
1441
+ :param data: The map's raw data.
1442
+ :type data: numpy.ndarray
1443
+ :param independent_dimensions: The map's independent
1444
+ coordinates.
1445
+ :type independent_dimensions: numpy.ndarray
1446
+ :param all_scalar_data: The map's scalar data.
1447
+ :type all_scalar_data: numpy.ndarray
1248
1448
  :return: The map's data and metadata contained in a NeXus
1249
1449
  structure.
1250
1450
  :rtype: nexusformat.nexus.NXentry
1251
1451
  """
1252
1452
  # System modules
1453
+ from copy import deepcopy
1253
1454
  from json import dumps
1254
1455
 
1255
1456
  # Third party modules
@@ -1261,13 +1462,16 @@ class MapProcessor(Processor):
1261
1462
  NXsample,
1262
1463
  )
1263
1464
 
1465
+ # Local modules:
1466
+ from CHAP.common.models.map import PointByPointScanData
1467
+ from CHAP.utils.general import is_int_series
1468
+
1469
+ # Set up NeXus NXentry and add misc. CHESS-specific metadata
1264
1470
  nxentry = NXentry(name=map_config.title)
1265
- nxentry.map_config = dumps(map_config.dict())
1266
- nxentry[map_config.sample.name] = NXsample(**map_config.sample.dict())
1267
1471
  nxentry.attrs['station'] = map_config.station
1268
1472
  for key, value in map_config.attrs.items():
1269
1473
  nxentry.attrs[key] = value
1270
-
1474
+ nxentry.detector_names = detector_names
1271
1475
  nxentry.spec_scans = NXcollection()
1272
1476
  for scans in map_config.spec_scans:
1273
1477
  nxentry.spec_scans[scans.scanparsers[0].scan_name] = \
@@ -1275,60 +1479,618 @@ class MapProcessor(Processor):
1275
1479
  dtype='int8',
1276
1480
  attrs={'spec_file': str(scans.spec_file)})
1277
1481
 
1278
- nxentry.data = NXdata()
1279
- if map_config.map_type == 'structured':
1280
- nxentry.data.attrs['axes'] = map_config.dims
1281
- for i, dim in enumerate(map_config.independent_dimensions[::-1]):
1282
- nxentry.data[dim.label] = NXfield(
1283
- value=map_config.coords[dim.label],
1482
+ # Add sample metadata
1483
+ nxentry[map_config.sample.name] = NXsample(**map_config.sample.dict())
1484
+
1485
+ # Set up default NeXus NXdata group (squeeze out constant dimensions)
1486
+ constant_dim = []
1487
+ for i, dim in enumerate(map_config.independent_dimensions):
1488
+ unique = np.unique(independent_dimensions[i])
1489
+ if unique.size == 1:
1490
+ constant_dim.append(i)
1491
+ nxentry.data = NXdata(
1492
+ NXfield(data, 'detector_data'),
1493
+ tuple([
1494
+ NXfield(
1495
+ independent_dimensions[i], dim.label,
1496
+ attrs={'units': dim.units,
1497
+ 'long_name': f'{dim.label} ({dim.units})',
1498
+ 'data_type': dim.data_type,
1499
+ 'local_name': dim.name})
1500
+ for i, dim in enumerate(map_config.independent_dimensions)
1501
+ if i not in constant_dim]))
1502
+ nxentry.data.set_default()
1503
+
1504
+ # Set up auxiliary NeXus NXdata group (add the constant dimensions)
1505
+ auxiliary_signals = []
1506
+ auxiliary_data = []
1507
+ for i, dim in enumerate(map_config.all_scalar_data):
1508
+ auxiliary_signals.append(dim.label)
1509
+ auxiliary_data.append(NXfield(
1510
+ value=all_scalar_data[i],
1284
1511
  units=dim.units,
1285
1512
  attrs={'long_name': f'{dim.label} ({dim.units})',
1286
1513
  'data_type': dim.data_type,
1287
- 'local_name': dim.name})
1288
- if map_config.map_type == 'structured':
1289
- nxentry.data.attrs[f'{dim.label}_indices'] = i
1290
-
1291
- signal = False
1292
- auxilliary_signals = []
1293
- for data in map_config.all_scalar_data:
1294
- nxentry.data[data.label] = NXfield(
1295
- value=np.empty(map_config.shape),
1296
- units=data.units,
1297
- attrs={'long_name': f'{data.label} ({data.units})',
1298
- 'data_type': data.data_type,
1299
- 'local_name': data.name})
1300
- if not signal:
1301
- signal = data.label
1514
+ 'local_name': dim.name}))
1515
+ for i, dim in enumerate(deepcopy(map_config.independent_dimensions)):
1516
+ if i in constant_dim:
1517
+ auxiliary_signals.append(dim.label)
1518
+ auxiliary_data.append(NXfield(
1519
+ independent_dimensions[i], dim.label,
1520
+ attrs={'units': dim.units,
1521
+ 'long_name': f'{dim.label} ({dim.units})',
1522
+ 'data_type': dim.data_type,
1523
+ 'local_name': dim.name}))
1524
+ map_config.all_scalar_data.append(
1525
+ PointByPointScanData(**dict(dim)))
1526
+ map_config.independent_dimensions.remove(dim)
1527
+ if auxiliary_signals:
1528
+ nxentry.auxdata = NXdata()
1529
+ for label, data in zip(auxiliary_signals, auxiliary_data):
1530
+ nxentry.auxdata[label] = data
1531
+ if 'SCAN_N' in auxiliary_signals:
1532
+ nxentry.auxdata.attrs['signal'] = 'SCAN_N'
1302
1533
  else:
1303
- auxilliary_signals.append(data.label)
1304
-
1305
- if signal:
1306
- nxentry.data.attrs['signal'] = signal
1307
- nxentry.data.attrs['auxilliary_signals'] = auxilliary_signals
1308
-
1309
- # Create empty NXfields of appropriate shape for raw
1310
- # detector data
1311
- for detector_name in detector_names:
1312
- if not isinstance(detector_name, str):
1313
- detector_name = str(detector_name)
1314
- detector_data = map_config.get_detector_data(
1315
- detector_name, (0,) * len(map_config.shape))
1316
- nxentry.data[detector_name] = NXfield(value=np.zeros(
1317
- (*map_config.shape, *detector_data.shape)),
1318
- dtype=detector_data.dtype)
1534
+ nxentry.auxdata.attrs['signal'] = auxiliary_signals[0]
1535
+ auxiliary_signals.remove(nxentry.auxdata.attrs['signal'])
1536
+ nxentry.auxdata.attrs['auxiliary_signals'] = auxiliary_signals
1319
1537
 
1320
- for map_index in np.ndindex(map_config.shape):
1321
- for data in map_config.all_scalar_data:
1322
- nxentry.data[data.label][map_index] = map_config.get_value(
1323
- data, map_index)
1324
- for detector_name in detector_names:
1325
- if not isinstance(detector_name, str):
1326
- detector_name = str(detector_name)
1327
- nxentry.data[detector_name][map_index] = \
1328
- map_config.get_detector_data(detector_name, map_index)
1538
+ nxentry.map_config = dumps(map_config.dict())
1329
1539
 
1330
1540
  return nxentry
1331
1541
 
1542
+ def _read_raw_data_edd(
1543
+ self, map_config, detector_indices, comm, num_scan, offset):
1544
+ """Read the raw EDD data for a given map configuration.
1545
+
1546
+ :param map_config: A valid map configuration.
1547
+ :type map_config: common.models.map.MapConfig
1548
+ :param detector_indices: Indices to the corresponding
1549
+ detector names.
1550
+ :type detector_indices: list[int]
1551
+ :return: The map's raw data, independent dimensions and scalar
1552
+ data
1553
+ :rtype: numpy.ndarray, numpy.ndarray, numpy.ndarray
1554
+ """
1555
+ # Third party modules
1556
+ try:
1557
+ from mpi4py import MPI
1558
+ from mpi4py.util import dtlib
1559
+ except:
1560
+ pass
1561
+
1562
+ # Local modules
1563
+ from CHAP.utils.general import list_to_string
1564
+
1565
+ if comm is None:
1566
+ num_proc = 1
1567
+ rank = 0
1568
+ else:
1569
+ num_proc = comm.Get_size()
1570
+ rank = comm.Get_rank()
1571
+ if not rank:
1572
+ self.logger.debug(f'Number of processors: {num_proc}')
1573
+ self.logger.debug(f'Number of scans: {num_scan}')
1574
+
1575
+ # Create the shared data buffers
1576
+ # FIX: just one spec scan at this point
1577
+ assert len(map_config.spec_scans) == 1
1578
+ scan = map_config.spec_scans[0]
1579
+ scan_numbers = scan.scan_numbers
1580
+ scanparser = scan.get_scanparser(scan_numbers[0])
1581
+ ddata = scanparser.get_detector_data(detector_indices)
1582
+ spec_scan_shape = scanparser.spec_scan_shape
1583
+ num_dim = np.prod(spec_scan_shape)
1584
+ num_id = len(map_config.independent_dimensions)
1585
+ num_sd = len(map_config.all_scalar_data)
1586
+ if num_proc == 1:
1587
+ assert num_scan == len(scan_numbers)
1588
+ data = np.empty((num_scan, *ddata.shape), dtype=ddata.dtype)
1589
+ independent_dimensions = np.empty(
1590
+ (num_id, num_scan*num_dim), dtype=np.float64)
1591
+ all_scalar_data = np.empty(
1592
+ (num_sd, num_scan*num_dim), dtype=np.float64)
1593
+ else:
1594
+ self.logger.debug(f'Scan offset on processor {rank}: {offset}')
1595
+ self.logger.debug(f'Scan numbers on processor {rank}: '
1596
+ f'{list_to_string(scan_numbers)}')
1597
+ datatype = dtlib.from_numpy_dtype(ddata.dtype)
1598
+ itemsize = datatype.Get_size()
1599
+ if not rank:
1600
+ nbytes = num_scan * np.prod(ddata.shape) * itemsize
1601
+ else:
1602
+ nbytes = 0
1603
+ win = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm)
1604
+ buf, itemsize = win.Shared_query(0)
1605
+ assert itemsize == datatype.Get_size()
1606
+ data = np.ndarray(
1607
+ buffer=buf, dtype=ddata.dtype, shape=(num_scan, *ddata.shape))
1608
+ datatype = dtlib.from_numpy_dtype(np.float64)
1609
+ itemsize = datatype.Get_size()
1610
+ if not rank:
1611
+ nbytes = num_id * num_scan * num_dim * itemsize
1612
+ win_id = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm)
1613
+ buf_id, _ = win_id.Shared_query(0)
1614
+ independent_dimensions = np.ndarray(
1615
+ buffer=buf_id, dtype=np.float64,
1616
+ shape=(num_id, num_scan*num_dim))
1617
+ if not rank:
1618
+ nbytes = num_sd * num_scan * num_dim * itemsize
1619
+ win_sd = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm)
1620
+ buf_sd, _ = win_sd.Shared_query(0)
1621
+ all_scalar_data = np.ndarray(
1622
+ buffer=buf_sd, dtype=np.float64,
1623
+ shape=(num_sd, num_scan*num_dim))
1624
+
1625
+ # Read the raw data
1626
+ init = True
1627
+ for scan in map_config.spec_scans:
1628
+ for scan_number in scan.scan_numbers:
1629
+ if init:
1630
+ init = False
1631
+ else:
1632
+ scanparser = scan.get_scanparser(scan_number)
1633
+ assert spec_scan_shape == scanparser.spec_scan_shape
1634
+ ddata = scanparser.get_detector_data(detector_indices)
1635
+ data[offset] = ddata
1636
+ spec_scan_motor_mnes = scanparser.spec_scan_motor_mnes
1637
+ start_dim = offset * num_dim
1638
+ end_dim = start_dim + num_dim
1639
+ if len(spec_scan_shape) == 1:
1640
+ for i, dim in enumerate(map_config.independent_dimensions):
1641
+ v = dim.get_value(
1642
+ scan, scan_number, scan_step_index=-1,
1643
+ relative=False)
1644
+ if dim.name in spec_scan_motor_mnes:
1645
+ independent_dimensions[i][start_dim:end_dim] = v
1646
+ else:
1647
+ independent_dimensions[i][start_dim:end_dim] = \
1648
+ np.repeat(v, spec_scan_shape[0])
1649
+ for i, dim in enumerate(map_config.all_scalar_data):
1650
+ v = dim.get_value(
1651
+ scan, scan_number, scan_step_index=-1,
1652
+ relative=False)
1653
+ #if dim.name in spec_scan_motor_mnes:
1654
+ if dim.data_type == 'scan_column':
1655
+ all_scalar_data[i][start_dim:end_dim] = v
1656
+ else:
1657
+ all_scalar_data[i][start_dim:end_dim] = \
1658
+ np.repeat(v, spec_scan_shape[0])
1659
+ else:
1660
+ for i, dim in enumerate(map_config.independent_dimensions):
1661
+ v = dim.get_value(
1662
+ scan, scan_number, scan_step_index=-1,
1663
+ relative=False)
1664
+ if dim.name == spec_scan_motor_mnes[0]:
1665
+ # Fast motor
1666
+ independent_dimensions[i][start_dim:end_dim] = \
1667
+ np.concatenate((v,)*spec_scan_shape[1])
1668
+ elif dim.name == spec_scan_motor_mnes[1]:
1669
+ # Slow motor
1670
+ independent_dimensions[i][start_dim:end_dim] = \
1671
+ np.repeat(v, spec_scan_shape[0])
1672
+ else:
1673
+ independent_dimensions[i][start_dim:end_dim] = v
1674
+ for i, dim in enumerate(map_config.all_scalar_data):
1675
+ v = dim.get_value(
1676
+ scan, scan_number, scan_step_index=-1,
1677
+ relative=False)
1678
+ if dim.data_type == 'scan_column':
1679
+ all_scalar_data[i][start_dim:end_dim] = v
1680
+ elif dim.data_type == 'smb_par':
1681
+ if dim.name == spec_scan_motor_mnes[0]:
1682
+ # Fast motor
1683
+ all_scalar_data[i][start_dim:end_dim] = \
1684
+ np.concatenate((v,)*spec_scan_shape[1])
1685
+ elif dim.name == spec_scan_motor_mnes[1]:
1686
+ # Slow motor
1687
+ all_scalar_data[i][start_dim:end_dim] = \
1688
+ np.repeat(v, spec_scan_shape[0])
1689
+ else:
1690
+ all_scalar_data[i][start_dim:end_dim] = v
1691
+ else:
1692
+ raise RuntimeError(
1693
+ f'{dim.data_type} in data_type not tested')
1694
+ offset += 1
1695
+
1696
+ return (
1697
+ data.reshape((np.prod(data.shape[:2]), *data.shape[2:])),
1698
+ independent_dimensions, all_scalar_data)
1699
+
1700
+ def _read_raw_data(
1701
+ self, map_config, detector_names, comm, num_scan, offset):
1702
+ """Read the raw data for a given map configuration.
1703
+
1704
+ :param map_config: A valid map configuration.
1705
+ :type map_config: common.models.map.MapConfig
1706
+ :param detector_names: Detector names to include raw data
1707
+ for in the returned NeXus NXentry object,
1708
+ defaults to `None`.
1709
+ :type detector_names: list[str]
1710
+ :return: The map's raw data, independent dimensions and scalar
1711
+ data
1712
+ :rtype: numpy.ndarray, numpy.ndarray, numpy.ndarray
1713
+ """
1714
+ # Third party modules
1715
+ try:
1716
+ from mpi4py import MPI
1717
+ from mpi4py.util import dtlib
1718
+ except:
1719
+ pass
1720
+
1721
+ # Local modules
1722
+ from CHAP.utils.general import list_to_string
1723
+
1724
+ if comm is None:
1725
+ num_proc = 1
1726
+ rank = 0
1727
+ else:
1728
+ num_proc = comm.Get_size()
1729
+ rank = comm.Get_rank()
1730
+ if not rank:
1731
+ self.logger.debug(f'Number of processors: {num_proc}')
1732
+ self.logger.debug(f'Number of scans: {num_scan}')
1733
+
1734
+ # Create the shared data buffers
1735
+ # FIX: just one spec scan and one detector at this point
1736
+ assert len(map_config.spec_scans) == 1
1737
+ assert len(detector_names) == 1
1738
+ scans = map_config.spec_scans[0]
1739
+ scan_numbers = scans.scan_numbers
1740
+ scanparser = scans.get_scanparser(scan_numbers[0])
1741
+ ddata = scanparser.get_detector_data(detector_names[0])
1742
+ num_dim = ddata.shape[0]
1743
+ num_id = len(map_config.independent_dimensions)
1744
+ num_sd = len(map_config.all_scalar_data)
1745
+ if not num_sd:
1746
+ all_scalar_data = None
1747
+ if num_proc == 1:
1748
+ assert num_scan == len(scan_numbers)
1749
+ data = np.empty((num_scan, *ddata.shape), dtype=ddata.dtype)
1750
+ independent_dimensions = np.empty(
1751
+ (num_scan, num_id, num_dim), dtype=np.float64)
1752
+ if num_sd:
1753
+ all_scalar_data = np.empty(
1754
+ (num_scan, num_sd, num_dim), dtype=np.float64)
1755
+ else:
1756
+ self.logger.debug(f'Scan offset on processor {rank}: {offset}')
1757
+ self.logger.debug(f'Scan numbers on processor {rank}: '
1758
+ f'{list_to_string(scan_numbers)}')
1759
+ datatype = dtlib.from_numpy_dtype(ddata.dtype)
1760
+ itemsize = datatype.Get_size()
1761
+ if not rank:
1762
+ nbytes = num_scan * np.prod(ddata.shape) * itemsize
1763
+ else:
1764
+ nbytes = 0
1765
+ win = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm)
1766
+ buf, _ = win.Shared_query(0)
1767
+ data = np.ndarray(
1768
+ buffer=buf, dtype=ddata.dtype, shape=(num_scan, *ddata.shape))
1769
+ datatype = dtlib.from_numpy_dtype(np.float64)
1770
+ itemsize = datatype.Get_size()
1771
+ if not rank:
1772
+ nbytes = num_scan * num_id * num_dim * itemsize
1773
+ else:
1774
+ nbytes = 0
1775
+ win_id = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm)
1776
+ buf_id, _ = win_id.Shared_query(0)
1777
+ independent_dimensions = np.ndarray(
1778
+ buffer=buf_id, dtype=np.float64,
1779
+ shape=(num_scan, num_id, num_dim))
1780
+ if num_sd:
1781
+ if not rank:
1782
+ nbytes = num_scan * num_sd * num_dim * itemsize
1783
+ win_sd = MPI.Win.Allocate_shared(nbytes, itemsize, comm=comm)
1784
+ buf_sd, _ = win_sd.Shared_query(0)
1785
+ all_scalar_data = np.ndarray(
1786
+ buffer=buf_sd, dtype=np.float64,
1787
+ shape=(num_scan, num_sd, num_dim))
1788
+
1789
+ # Read the raw data
1790
+ init = True
1791
+ for scans in map_config.spec_scans:
1792
+ for scan_number in scans.scan_numbers:
1793
+ if init:
1794
+ init = False
1795
+ else:
1796
+ scanparser = scans.get_scanparser(scan_number)
1797
+ ddata = scanparser.get_detector_data(detector_names[0])
1798
+ data[offset] = ddata
1799
+ for i, dim in enumerate(map_config.independent_dimensions):
1800
+ if dim.data_type == 'scan_column':
1801
+ independent_dimensions[offset,i] = dim.get_value(
1802
+ #v = dim.get_value(
1803
+ scans, scan_number, scan_step_index=-1,
1804
+ relative=False)[:num_dim]
1805
+ #print(f'\ndim: {dim}\nv {np.asarray(v).shape}: {v}')
1806
+ #independent_dimensions[offset,i] = v[:num_dim]
1807
+ elif dim.data_type in ['smb_par', 'spec_motor']:
1808
+ independent_dimensions[offset,i] = dim.get_value(
1809
+ #v = dim.get_value(
1810
+ scans, scan_number, scan_step_index=-1,
1811
+ relative=False)
1812
+ #print(f'\ndim: {dim}\nv {np.asarray(v).shape}: {v}')
1813
+ #independent_dimensions[offset,i] = v
1814
+ else:
1815
+ raise RuntimeError(
1816
+ f'{dim.data_type} in data_type not tested')
1817
+ for i, dim in enumerate(map_config.all_scalar_data):
1818
+ all_scalar_data[offset,i] = dim.get_value(
1819
+ scans, scan_number, scan_step_index=-1,
1820
+ relative=False)
1821
+ offset += 1
1822
+
1823
+ if num_sd:
1824
+ return (
1825
+ data.reshape((1, np.prod(data.shape[:2]), *data.shape[2:])),
1826
+ np.stack(tuple([independent_dimensions[:,i].flatten()
1827
+ for i in range(num_id)])),
1828
+ np.stack(tuple([all_scalar_data[:,i].flatten()
1829
+ for i in range(num_sd)])))
1830
+ return (
1831
+ data.reshape((1, np.prod(data.shape[:2]), *data.shape[2:])),
1832
+ np.stack(tuple([independent_dimensions[:,i].flatten()
1833
+ for i in range(num_id)])),
1834
+ all_scalar_data)
1835
+
1836
+
1837
+ class MPITestProcessor(Processor):
1838
+ """A test MPI Processor.
1839
+ """
1840
+ def process(self, data, sub_pipeline={}):
1841
+ # Third party modules
1842
+ import mpi4py as mpi4py
1843
+ from mpi4py import MPI
1844
+
1845
+ my_rank = MPI.COMM_WORLD.Get_rank()
1846
+ size = MPI.COMM_WORLD.Get_size()
1847
+ (version, subversion) = MPI.Get_version()
1848
+
1849
+ mpi4py_version = mpi4py.__version__
1850
+
1851
+ if (my_rank == 0):
1852
+ if (size > 1):
1853
+ print('Successful first MPI test executed in parallel on '
1854
+ f'{size} processes using mpi4py version '
1855
+ f'{mpi4py_version}.')
1856
+ if int(mpi4py_version[0]) < 3:
1857
+ print('CAUTION: You are using an mpi4py version '
1858
+ 'below 3.0.0.')
1859
+ else:
1860
+ print('CAUTION: This MPI test is executed only on one MPI '
1861
+ 'process, i.e., sequentially!')
1862
+ print('Your installation supports MPI standard version '
1863
+ f'{version}.{subversion}.')
1864
+ print(f'Finished on processor {my_rank} of {size}')
1865
+
1866
+
1867
+ class MPICollectProcessor(Processor):
1868
+ """A Processor that collects the distributed worker data from
1869
+ MPIMapProcessor on the root node
1870
+ """
1871
+ def process(self, data, comm, root_as_worker=True):
1872
+ # Third party modules
1873
+ from mpi4py import MPI
1874
+
1875
+ num_proc = comm.Get_size()
1876
+ rank = comm.Get_rank()
1877
+ if root_as_worker:
1878
+ data = self.unwrap_pipelinedata(data)[-1]
1879
+ if num_proc > 1:
1880
+ data = comm.gather(data, root=0)
1881
+ else:
1882
+ for n_worker in range(1, num_proc):
1883
+ if rank == n_worker:
1884
+ comm.send(self.unwrap_pipelinedata(data)[-1], dest=0)
1885
+ data = None
1886
+ elif not rank:
1887
+ if n_worker == 1:
1888
+ data = [comm.recv(source=n_worker)]
1889
+ else:
1890
+ data.append(comm.recv(source=n_worker))
1891
+ return data
1892
+
1893
+
1894
+ class MPIMapProcessor(Processor):
1895
+ """A Processor that applies a parallel generic sub-pipeline to
1896
+ a map configuration.
1897
+ """
1898
+ def process(self, data, sub_pipeline={}):
1899
+ # System modules
1900
+ from copy import deepcopy
1901
+
1902
+ # Third party modules
1903
+ from mpi4py import MPI
1904
+
1905
+ # Local modules
1906
+ from CHAP.runner import (
1907
+ RunConfig,
1908
+ run,
1909
+ )
1910
+ from CHAP.common.models.map import (
1911
+ SpecScans,
1912
+ SpecConfig,
1913
+ )
1914
+
1915
+ comm = MPI.COMM_WORLD
1916
+ num_proc = comm.Get_size()
1917
+ rank = comm.Get_rank()
1918
+
1919
+ # Get the map configuration from data
1920
+ map_config = self.get_config(
1921
+ data, 'common.models.map.MapConfig')
1922
+
1923
+ # Create the spec reader configuration for each processor
1924
+ spec_scans = map_config.spec_scans[0]
1925
+ scan_numbers = spec_scans.scan_numbers
1926
+ num_scan = len(scan_numbers)
1927
+ scans_per_proc = num_scan//num_proc
1928
+ n_scan = 0
1929
+ for n_proc in range(num_proc):
1930
+ num = scans_per_proc
1931
+ if n_proc == rank:
1932
+ if rank < num_scan - scans_per_proc*num_proc:
1933
+ num += 1
1934
+ scan_numbers = scan_numbers[n_scan:n_scan+num]
1935
+ n_scan += num
1936
+ spec_config = {
1937
+ 'station': map_config.station,
1938
+ 'experiment_type': map_config.experiment_type,
1939
+ 'spec_scans': [SpecScans(
1940
+ spec_file=spec_scans.spec_file, scan_numbers=scan_numbers)]}
1941
+
1942
+ # Get the run configuration to use for the sub-pipeline
1943
+ run_config = RunConfig(sub_pipeline.get('config', {}), comm)
1944
+ pipeline_config = []
1945
+ for item in sub_pipeline['pipeline']:
1946
+ if isinstance(item, dict):
1947
+ for k, v in deepcopy(item).items():
1948
+ if k.endswith('Reader'):
1949
+ v['config'] = spec_config
1950
+ item[k] = v
1951
+ if num_proc > 1 and k.endswith('Writer'):
1952
+ r, e = os.path.splitext(v['filename'])
1953
+ v['filename'] = f'{r}_{rank}{e}'
1954
+ item[k] = v
1955
+ pipeline_config.append(item)
1956
+
1957
+ # Run the sub-pipeline on each processor
1958
+ return run(
1959
+ pipeline_config, inputdir=run_config.inputdir,
1960
+ outputdir=run_config.outputdir,
1961
+ interactive=run_config.interactive, comm=comm)
1962
+
1963
+
1964
+ class MPISpawnMapProcessor(Processor):
1965
+ """A Processor that applies a parallel generic sub-pipeline to
1966
+ a map configuration by spawning workers processes.
1967
+ """
1968
+ def process(
1969
+ self, data, num_proc=1, root_as_worker=True, collect_on_root=True,
1970
+ sub_pipeline={}):
1971
+ # System modules
1972
+ from copy import deepcopy
1973
+ from tempfile import NamedTemporaryFile
1974
+
1975
+ # Third party modules
1976
+ try:
1977
+ from mpi4py import MPI
1978
+ except:
1979
+ raise ImportError('Unable to import mpi4py')
1980
+ import yaml
1981
+
1982
+ # Local modules
1983
+ from CHAP.runner import (
1984
+ RunConfig,
1985
+ runner,
1986
+ )
1987
+ from CHAP.common.models.map import (
1988
+ SpecScans,
1989
+ SpecConfig,
1990
+ )
1991
+
1992
+ # Get the map configuration from data
1993
+ map_config = self.get_config(
1994
+ data, 'common.models.map.MapConfig')
1995
+
1996
+ # Get the run configuration to use for the sub-pipeline
1997
+ run_config = RunConfig(config=sub_pipeline.get('config', {}))
1998
+
1999
+ # Create the sub-pipeline configuration for each processor
2000
+ spec_scans = map_config.spec_scans[0]
2001
+ scan_numbers = spec_scans.scan_numbers
2002
+ num_scan = len(scan_numbers)
2003
+ scans_per_proc = num_scan//num_proc
2004
+ n_scan = 0
2005
+ pipeline_config = []
2006
+ for n_proc in range(num_proc):
2007
+ num = scans_per_proc
2008
+ if n_proc < num_scan - scans_per_proc*num_proc:
2009
+ num += 1
2010
+ spec_config = {
2011
+ 'station': map_config.station,
2012
+ 'experiment_type': map_config.experiment_type,
2013
+ 'spec_scans': [SpecScans(
2014
+ spec_file=spec_scans.spec_file,
2015
+ scan_numbers=scan_numbers[n_scan:n_scan+num]).__dict__]}
2016
+ sub_pipeline_config = []
2017
+ for item in deepcopy(sub_pipeline['pipeline']):
2018
+ if isinstance(item, dict):
2019
+ for k, v in deepcopy(item).items():
2020
+ if k.endswith('Reader'):
2021
+ v['config'] = spec_config
2022
+ item[k] = v
2023
+ if num_proc > 1 and k.endswith('Writer'):
2024
+ r, e = os.path.splitext(v['filename'])
2025
+ v['filename'] = f'{r}_{n_proc}{e}'
2026
+ item[k] = v
2027
+ sub_pipeline_config.append(item)
2028
+ if collect_on_root and (not root_as_worker or num_proc > 1):
2029
+ sub_pipeline_config += [
2030
+ {'common.MPICollectProcessor': {
2031
+ 'root_as_worker': root_as_worker}}]
2032
+ pipeline_config.append(sub_pipeline_config)
2033
+ n_scan += num
2034
+
2035
+ # Optionally include the root node as a worker node
2036
+ if root_as_worker:
2037
+ first_proc = 1
2038
+ run_config.spawn = 1
2039
+ else:
2040
+ first_proc = 0
2041
+ run_config.spawn = -1
2042
+
2043
+ # Spawn the workers to run the sub-pipeline
2044
+ if num_proc > first_proc:
2045
+ tmp_names = []
2046
+ with NamedTemporaryFile(delete=False) as fp:
2047
+ fp_name = fp.name
2048
+ tmp_names.append(fp_name)
2049
+ with open(fp_name, 'w') as f:
2050
+ yaml.dump(
2051
+ {'config': {'spawn': run_config.spawn}}, f,
2052
+ sort_keys=False)
2053
+ for n_proc in range(first_proc, num_proc):
2054
+ f_name = f'{fp_name}_{n_proc}'
2055
+ tmp_names.append(f_name)
2056
+ with open(f_name, 'w') as f:
2057
+ yaml.dump(
2058
+ {'config': run_config.__dict__,
2059
+ 'pipeline': pipeline_config[n_proc]},
2060
+ f, sort_keys=False)
2061
+ sub_comm = MPI.COMM_SELF.Spawn(
2062
+ 'CHAP', args=[fp_name], maxprocs=num_proc-first_proc)
2063
+ common_comm = sub_comm.Merge(False)
2064
+ if run_config.spawn > 0:
2065
+ # Align with the barrier in RunConfig() on common_comm
2066
+ # called from the spawned main()
2067
+ common_comm.barrier()
2068
+ else:
2069
+ common_comm = None
2070
+
2071
+ # Run the sub-pipeline on the root node
2072
+ if root_as_worker:
2073
+ data = runner(run_config, pipeline_config[0], common_comm)
2074
+ elif collect_on_root:
2075
+ run_config.spawn = 0
2076
+ pipeline_config = [{'common.MPICollectProcessor': {
2077
+ 'root_as_worker': root_as_worker}}]
2078
+ data = runner(run_config, pipeline_config, common_comm)
2079
+ else:
2080
+ # Align with the barrier in run() on common_comm
2081
+ # called from the spawned main()
2082
+ common_comm.barrier()
2083
+ data = None
2084
+
2085
+ # Disconnect spawned workers and cleanup temporary files
2086
+ if num_proc > first_proc:
2087
+ common_comm.barrier()
2088
+ sub_comm.Disconnect()
2089
+ for tmp_name in tmp_names:
2090
+ os.remove(tmp_name)
2091
+
2092
+ return data
2093
+
1332
2094
 
1333
2095
  class NexusToNumpyProcessor(Processor):
1334
2096
  """A Processor to convert the default plottable data in a NeXus
@@ -1445,7 +2207,7 @@ class PrintProcessor(Processor):
1445
2207
  """
1446
2208
  print(f'{self.__name__} data :')
1447
2209
  if callable(getattr(data, '_str_tree', None)):
1448
- # If data is likely an NXobject, print its tree
2210
+ # If data is likely a NeXus NXobject, print its tree
1449
2211
  # representation (since NXobjects' str representations are
1450
2212
  # just their nxname)
1451
2213
  print(data._str_tree(attrs=True, recursive=True))
@@ -1488,7 +2250,7 @@ class PyfaiAzimuthalIntegrationProcessor(Processor):
1488
2250
  :returns: Azimuthal integration results as a dictionary of
1489
2251
  numpy arrays.
1490
2252
  """
1491
- import os
2253
+ # Third party modules
1492
2254
  from pyFAI import load
1493
2255
 
1494
2256
  if not os.path.isabs(poni_file):
@@ -1498,9 +2260,10 @@ class PyfaiAzimuthalIntegrationProcessor(Processor):
1498
2260
  if mask_file is None:
1499
2261
  mask = None
1500
2262
  else:
2263
+ # Third party modules
2264
+ import fabio
1501
2265
  if not os.path.isabs(mask_file):
1502
2266
  mask_file = os.path.join(inputdir, mask_file)
1503
- import fabio
1504
2267
  mask = fabio.open(mask_file).data
1505
2268
 
1506
2269
  try:
@@ -1543,13 +2306,14 @@ class RawDetectorDataMapProcessor(Processor):
1543
2306
  `Processor`.
1544
2307
 
1545
2308
  :param data: Result of `Reader.read` where at least one item
1546
- has the value `'MapConfig'` for the `'schema'` key.
2309
+ has the value `'common.models.map.MapConfig'` for the
2310
+ `'schema'` key.
1547
2311
  :type data: list[PipelineData]
1548
2312
  :raises Exception: If a valid map config object cannot be
1549
2313
  constructed from `data`.
1550
2314
  :return: A valid instance of the map configuration object with
1551
2315
  field values taken from `data`.
1552
- :rtype: MapConfig
2316
+ :rtype: common.models.map.MapConfig
1553
2317
  """
1554
2318
  # Local modules
1555
2319
  from CHAP.common.models.map import MapConfig
@@ -1559,7 +2323,7 @@ class RawDetectorDataMapProcessor(Processor):
1559
2323
  for item in data:
1560
2324
  if isinstance(item, dict):
1561
2325
  schema = item.get('schema')
1562
- if schema == 'MapConfig':
2326
+ if schema == 'common.models.map.MapConfig':
1563
2327
  map_config = item.get('data')
1564
2328
 
1565
2329
  if not map_config:
@@ -1573,7 +2337,7 @@ class RawDetectorDataMapProcessor(Processor):
1573
2337
  relevant metadata in the form of a NeXus structure.
1574
2338
 
1575
2339
  :param map_config: The map configuration.
1576
- :type map_config: MapConfig
2340
+ :type map_config: common.models.map.MapConfig
1577
2341
  :param detector_name: The detector prefix.
1578
2342
  :type detector_name: str
1579
2343
  :param detector_shape: The shape of detector data for a single
@@ -1690,11 +2454,11 @@ class StrainAnalysisProcessor(Processor):
1690
2454
  class SetupNXdataProcessor(Processor):
1691
2455
  """Processor to set up and return an "empty" NeXus representation
1692
2456
  of a structured dataset. This representation will be an instance
1693
- of `NXdata` that has:
1694
- 1. An `NXfield` entry for every coordinate and signal specified.
1695
- 1. `nxaxes` that are the `NXfield` entries for the coordinates and
1696
- contain the values provided for each coordinate.
1697
- 1. `NXfield` entries of appropriate shape, but containing all
2457
+ of a NeXus NXdata object that has:
2458
+ 1. A NeXus NXfield entry for every coordinate/signal specified.
2459
+ 1. `nxaxes` that are the NeXus NXfield entries for the coordinates
2460
+ and contain the values provided for each coordinate.
2461
+ 1. NeXus NXfield entries of appropriate shape, but containing all
1698
2462
  zeros, for every signal.
1699
2463
  1. Attributes that define the axes, plus any additional attributes
1700
2464
  specified by the user.
@@ -1760,20 +2524,20 @@ class SetupNXdataProcessor(Processor):
1760
2524
  def process(self, data, nxname='data',
1761
2525
  coords=[], signals=[], attrs={}, data_points=[],
1762
2526
  extra_nxfields=[], duplicates='overwrite'):
1763
- """Return an `NXdata` that has the requisite axes and
1764
- `NXfield` entries to represent a structured dataset with the
1765
- properties provided. Properties may be provided either through
1766
- the `data` argument (from an appropriate `PipelineItem` that
1767
- immediately preceeds this one in a `Pipeline`), or through the
1768
- `coords`, `signals`, `attrs`, and/or `data_points`
2527
+ """Return a NeXus NXdata object that has the requisite axes
2528
+ and NeXus NXfield entries to represent a structured dataset
2529
+ with the properties provided. Properties may be provided either
2530
+ through the `data` argument (from an appropriate `PipelineItem`
2531
+ that immediately preceeds this one in a `Pipeline`), or through
2532
+ the `coords`, `signals`, `attrs`, and/or `data_points`
1769
2533
  arguments. If any of the latter are used, their values will
1770
2534
  completely override any values for these parameters found from
1771
2535
  `data.`
1772
2536
 
1773
2537
  :param data: Data from the previous item in a `Pipeline`.
1774
2538
  :type data: list[PipelineData]
1775
- :param nxname: Name for the returned `NXdata` object. Defaults
1776
- to `'data'`.
2539
+ :param nxname: Name for the returned NeXus NXdata object.
2540
+ Defaults to `'data'`.
1777
2541
  :type nxname: str, optional
1778
2542
  :param coords: List of dictionaries defining the coordinates
1779
2543
  of the dataset. Each dictionary must have the keys
@@ -1783,7 +2547,7 @@ class SetupNXdataProcessor(Processor):
1783
2547
  numbers), respectively. A third item in the dictionary is
1784
2548
  optional, but highly recommended: `'attrs'` may provide a
1785
2549
  dictionary of attributes to attach to the coordinate axis
1786
- that assist in in interpreting the returned `NXdata`
2550
+ that assist in in interpreting the returned NeXus NXdata
1787
2551
  representation of the dataset. It is strongly recommended
1788
2552
  to provide the units of the values along an axis in the
1789
2553
  `attrs` dictionary. Defaults to [].
@@ -1796,19 +2560,19 @@ class SetupNXdataProcessor(Processor):
1796
2560
  integers), respectively. A third item in the dictionary is
1797
2561
  optional, but highly recommended: `'attrs'` may provide a
1798
2562
  dictionary of attributes to attach to the signal fieldthat
1799
- assist in in interpreting the returned `NXdata`
2563
+ assist in in interpreting the returned NeXus NXdata
1800
2564
  representation of the dataset. It is strongly recommended
1801
2565
  to provide the units of the signal's values `attrs`
1802
2566
  dictionary. Defaults to [].
1803
2567
  :type signals: list[dict[str, object]], optional
1804
2568
  :param attrs: An arbitrary dictionary of attributes to assign
1805
- to the returned `NXdata`. Defaults to {}.
2569
+ to the returned NeXus NXdata object. Defaults to {}.
1806
2570
  :type attrs: dict[str, object], optional
1807
2571
  :param data_points: A list of data points to partially (or
1808
- even entirely) fil out the "empty" signal `NXfield`s
1809
- before returning the `NXdata`. Defaults to [].
2572
+ even entirely) fil out the "empty" signal NeXus NXfield's
2573
+ before returning the NeXus NXdata object. Defaults to [].
1810
2574
  :type data_points: list[dict[str, object]], optional
1811
- :param extra_nxfields: List "extra" NXfield`s to include that
2575
+ :param extra_nxfields: List "extra" NeXus NXfield's to include that
1812
2576
  can be described neither as a signal of the dataset, not a
1813
2577
  dedicated coordinate. This paramteter is good for
1814
2578
  including "alternate" values for one of the coordinate
@@ -1822,8 +2586,8 @@ class SetupNXdataProcessor(Processor):
1822
2586
  existing data point. Allowed values for `duplicates` are:
1823
2587
  `'overwrite'` and `'block'`. Defaults to `'overwrite'`.
1824
2588
  :type duplicates: Literal['overwrite', 'block']
1825
- :returns: An `NXdata` that represents the structured dataset
1826
- as specified.
2589
+ :returns: A NeXus NXdata object that represents the structured
2590
+ dataset as specified.
1827
2591
  :rtype: nexusformat.nexus.NXdata
1828
2592
  """
1829
2593
  self.nxname = nxname
@@ -1892,6 +2656,7 @@ class SetupNXdataProcessor(Processor):
1892
2656
  :returns: Validity of `data_point`, message
1893
2657
  :rtype: bool, str
1894
2658
  """
2659
+ # Third party modules
1895
2660
  import numpy as np
1896
2661
 
1897
2662
  valid = True
@@ -1923,16 +2688,17 @@ class SetupNXdataProcessor(Processor):
1923
2688
  return valid, msg
1924
2689
 
1925
2690
  def init_nxdata(self):
1926
- """Initialize an empty `NXdata` representing this dataset to
1927
- `self.nxdata`; values for axes' `NXfield`s are filled out,
2691
+ """Initialize an empty NeXus NXdata representing this dataset
2692
+ to `self.nxdata`; values for axes' `NXfield`s are filled out,
1928
2693
  values for signals' `NXfield`s are empty an can be filled out
1929
- later. Save the empty `NXdata` to the NeXus file. Initialise
1930
- `self.nxfile` and `self.nxdata_path` with the `NXFile` object
1931
- and actual nxpath used to save and make updates to the
1932
- `NXdata`.
2694
+ later. Save the empty NeXus NXdata object to the NeXus file.
2695
+ Initialise `self.nxfile` and `self.nxdata_path` with the
2696
+ `NXFile` object and actual nxpath used to save and make updates
2697
+ to the Nexus NXdata object.
1933
2698
 
1934
2699
  :returns: None
1935
2700
  """
2701
+ # Third party modules
1936
2702
  from nexusformat.nexus import NXdata, NXfield
1937
2703
  import numpy as np
1938
2704
 
@@ -1980,14 +2746,14 @@ class SetupNXdataProcessor(Processor):
1980
2746
 
1981
2747
 
1982
2748
  class UpdateNXdataProcessor(Processor):
1983
- """Processor to fill in part(s) of an `NXdata` representing a
2749
+ """Processor to fill in part(s) of a NeXus NXdata representing a
1984
2750
  structured dataset that's already been written to a NeXus file.
1985
2751
 
1986
- This Processor is most useful as an "update" step for an `NXdata`
1987
- created by `common.SetupNXdataProcessor`, and is easitest to use
1988
- in a `Pipeline` immediately after another `PipelineItem` designed
1989
- specifically to return a value that can be used as input to this
1990
- `Processor`.
2752
+ This Processor is most useful as an "update" step for a NeXus
2753
+ NXdata object created by `common.SetupNXdataProcessor`, and is
2754
+ most easy to use in a `Pipeline` immediately after another
2755
+ `PipelineItem` designed specifically to return a value that can
2756
+ be used as input to this `Processor`.
1991
2757
 
1992
2758
  Example of use in a `Pipeline` configuration:
1993
2759
  ```yaml
@@ -2006,7 +2772,7 @@ class UpdateNXdataProcessor(Processor):
2006
2772
  def process(self, data, nxfilename, nxdata_path, data_points=[],
2007
2773
  allow_approximate_coordinates=True):
2008
2774
  """Write new data points to the signal fields of an existing
2009
- `NXdata` object representing a structued dataset in a NeXus
2775
+ NeXus NXdata object representing a structued dataset in a NeXus
2010
2776
  file. Return the list of data points used to update the
2011
2777
  dataset.
2012
2778
 
@@ -2016,9 +2782,10 @@ class UpdateNXdataProcessor(Processor):
2016
2782
  argument.
2017
2783
  :type data: list[PipelineData]
2018
2784
  :param nxfilename: Name of the NeXus file containing the
2019
- `NXdata` to update.
2785
+ NeXus NXdata object to update.
2020
2786
  :type nxfilename: str
2021
- :param nxdata_path: The path to the `NXdata` to update in the file.
2787
+ :param nxdata_path: The path to the NeXus NXdata object to
2788
+ update in the file.
2022
2789
  :type nxdata_path: str
2023
2790
  :param data_points: List of data points, each one a dictionary
2024
2791
  whose keys are the names of the coordinates and axes, and
@@ -2034,9 +2801,9 @@ class UpdateNXdataProcessor(Processor):
2034
2801
  :returns: Complete list of data points used to update the dataset.
2035
2802
  :rtype: list[dict[str, object]]
2036
2803
  """
2804
+ # Third party modules
2037
2805
  from nexusformat.nexus import NXFile
2038
2806
  import numpy as np
2039
- import os
2040
2807
 
2041
2808
  _data_points = self.unwrap_pipelinedata(data)[0]
2042
2809
  if isinstance(_data_points, list):
@@ -2105,11 +2872,11 @@ class UpdateNXdataProcessor(Processor):
2105
2872
 
2106
2873
 
2107
2874
  class NXdataToDataPointsProcessor(Processor):
2108
- """Transform an `NXdata` object into a list of dictionaries. Each
2109
- dictionary represents a single data point in the coordinate space
2110
- of the dataset. The keys are the names of the signals and axes in
2111
- the dataset, and the values are a single scalar value (in the case
2112
- of axes) or the value of the signal at that point in the
2875
+ """Transform a NeXus NXdata object into a list of dictionaries.
2876
+ Each dictionary represents a single data point in the coordinate
2877
+ space of the dataset. The keys are the names of the signals and
2878
+ axes in the dataset, and the values are a single scalar value (in
2879
+ the case of axes) or the value of the signal at that point in the
2113
2880
  coordinate space of the dataset (in the case of signals -- this
2114
2881
  means that values for signals may be any shape, depending on the
2115
2882
  shape of the signal itself).
@@ -2148,11 +2915,13 @@ class NXdataToDataPointsProcessor(Processor):
2148
2915
  """Return a list of dictionaries representing the coordinate
2149
2916
  and signal values at every point in the dataset provided.
2150
2917
 
2151
- :param data: Input pipeline data containing an `NXdata`.
2918
+ :param data: Input pipeline data containing a NeXus NXdata
2919
+ object.
2152
2920
  :type data: list[PipelineData]
2153
2921
  :returns: List of all data points in the dataset.
2154
2922
  :rtype: list[dict[str,object]]
2155
2923
  """
2924
+ # Third party modules
2156
2925
  import numpy as np
2157
2926
 
2158
2927
  nxdata = self.unwrap_pipelinedata(data)[0]
@@ -2230,3 +2999,30 @@ if __name__ == '__main__':
2230
2999
  from CHAP.processor import main
2231
3000
 
2232
3001
  main()
3002
+
3003
+
3004
+ class SumProcessor(Processor):
3005
+ """A Processor to sum the data in a NeXus NXobject, given a set of
3006
+ nxpaths
3007
+ """
3008
+ def process(self, data):
3009
+ """Return the summed data array
3010
+
3011
+ :param data:
3012
+ :type data:
3013
+ :return: The summed data.
3014
+ :rtype: numpy.ndarray
3015
+ """
3016
+ from copy import deepcopy
3017
+
3018
+ nxentry, nxpaths = self.unwrap_pipelinedata(data)[-1]
3019
+ if len(nxpaths) == 1:
3020
+ return nxentry[nxpaths[0]]
3021
+ sum_data = deepcopy(nxentry[nxpaths[0]])
3022
+ for nxpath in nxpaths[1:]:
3023
+ nxdata = nxentry[nxpath]
3024
+ for entry in nxdata.entries:
3025
+ sum_data[entry] += nxdata[entry]
3026
+
3027
+ return sum_data
3028
+