essreduce 25.1.1__py3-none-any.whl → 25.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ess/reduce/__init__.py CHANGED
@@ -1,10 +1,10 @@
1
1
  # SPDX-License-Identifier: BSD-3-Clause
2
- # Copyright (c) 2024 Scipp contributors (https://github.com/scipp)
3
- # ruff: noqa: E402, F401
2
+ # Copyright (c) 2025 Scipp contributors (https://github.com/scipp)
3
+ # ruff: noqa: E402, F401, I
4
4
 
5
5
  import importlib.metadata
6
6
 
7
- from . import nexus, uncertainty, time_of_flight
7
+ from . import nexus, time_of_flight, uncertainty
8
8
 
9
9
  try:
10
10
  __version__ = importlib.metadata.version("essreduce")
@@ -13,4 +13,4 @@ except importlib.metadata.PackageNotFoundError:
13
13
 
14
14
  del importlib
15
15
 
16
- __all__ = ["nexus", "uncertainty", "time_of_flight"]
16
+ __all__ = ["nexus", "time_of_flight", "uncertainty"]
ess/reduce/live/raw.py CHANGED
@@ -19,6 +19,8 @@ options:
19
19
  flatten dimensions of the data.
20
20
  """
21
21
 
22
+ from __future__ import annotations
23
+
22
24
  from collections.abc import Callable, Sequence
23
25
  from dataclasses import dataclass, field
24
26
  from math import ceil
@@ -38,6 +40,8 @@ from ess.reduce.nexus.types import (
38
40
  )
39
41
  from ess.reduce.nexus.workflow import GenericNeXusWorkflow
40
42
 
43
+ from . import roi
44
+
41
45
  CalibratedPositionWithNoisyReplicas = NewType(
42
46
  'CalibratedPositionWithNoisyReplicas', sc.Variable
43
47
  )
@@ -73,10 +77,14 @@ class Histogrammer:
73
77
  self._coords = coords
74
78
  self._edges = edges
75
79
 
80
+ @property
81
+ def replicas(self) -> int:
82
+ return self._replicas
83
+
76
84
  @staticmethod
77
85
  def from_coords(
78
86
  coords: ProjectedCoords, resolution: DetectorViewResolution
79
- ) -> 'Histogrammer':
87
+ ) -> Histogrammer:
80
88
  """
81
89
  Create a histogrammer from coordinates and resolution.
82
90
 
@@ -102,7 +110,34 @@ class Histogrammer:
102
110
  def __call__(self, da: sc.DataArray) -> sc.DataArray:
103
111
  self._current += 1
104
112
  coords = self._coords[self._replica_dim, self._current % self._replicas]
105
- return sc.DataArray(da.data, coords=coords).hist(self._edges)
113
+ return self._hist(da.data, coords=coords)
114
+
115
+ def _hist(self, data: sc.Variable, *, coords: sc.DataGroup) -> sc.DataArray:
116
+ # If input is multi-dim we need to flatten since those dims cannot be preserved.
117
+ return sc.DataArray(data, coords=coords).flatten(to='_').hist(self._edges)
118
+
119
+ def input_indices(self) -> sc.DataArray:
120
+ """Return an array with input indices corresponding to each histogram bin."""
121
+ dim = 'detector_number'
122
+ # For some projections one of the coords is a scalar, convert to flat table.
123
+ coords = self._coords.broadcast(sizes=self._coords.sizes).flatten(to=dim).copy()
124
+ ndet = sc.index(coords.sizes[dim] // self._replicas)
125
+ da = sc.DataArray(
126
+ sc.arange(dim, coords.sizes[dim], dtype='int64', unit=None) % ndet,
127
+ coords=coords,
128
+ )
129
+ return sc.DataArray(da.bin(self._edges).bins.data, coords=self._edges)
130
+
131
+ def apply_full(self, var: sc.Variable) -> sc.DataArray:
132
+ """
133
+ Apply the histogrammer to a variable using all replicas.
134
+
135
+ This is used for one-off operations where the full data is needed, e.g., for
136
+ transforming pixel weights. Compare to :py:meth:`__call__`, which applies the
137
+ histogrammer to a single replica for efficiency.
138
+ """
139
+ replicated = sc.concat([var] * self.replicas, dim=self._replica_dim)
140
+ return self._hist(replicated, coords=self._coords) / self.replicas
106
141
 
107
142
 
108
143
  @dataclass
@@ -149,20 +184,27 @@ class Detector:
149
184
  sc.zeros(sizes=detector_number.sizes, unit='counts', dtype='int32'),
150
185
  coords={'detector_number': detector_number},
151
186
  )
187
+ self._detector_number = detector_number
152
188
  self._flat_detector_number = detector_number.flatten(to='event_id')
153
189
  self._start = int(self._flat_detector_number[0].value)
154
190
  self._stop = int(self._flat_detector_number[-1].value)
155
191
  self._size = int(self._flat_detector_number.size)
156
- if not sc.issorted(self._flat_detector_number, dim='event_id'):
157
- raise ValueError("Detector numbers must be sorted.")
158
- if self._stop - self._start + 1 != self._size:
159
- raise ValueError("Detector numbers must be consecutive.")
192
+ self._sorted = sc.issorted(self._flat_detector_number, dim='event_id')
193
+ self._consecutive = self._stop - self._start + 1 == self._size
194
+
195
+ @property
196
+ def detector_number(self) -> sc.Variable:
197
+ return self._detector_number
160
198
 
161
199
  @property
162
200
  def data(self) -> sc.DataArray:
163
201
  return self._data
164
202
 
165
203
  def bincount(self, data: Sequence[int]) -> sc.DataArray:
204
+ if not self._sorted:
205
+ raise ValueError("Detector numbers must be sorted to use `bincount`.")
206
+ if not self._consecutive:
207
+ raise ValueError("Detector numbers must be consecutive to use `bincount`.")
166
208
  offset = np.asarray(data, dtype=np.int32) - self._start
167
209
  # Ignore events with detector numbers outside the range of the detector. This
168
210
  # should not happen in valid files but for now it is useful until we are sure
@@ -213,10 +255,27 @@ class RollingDetectorView(Detector):
213
255
  self._projection = projection
214
256
  self._window = window
215
257
  self._current = 0
216
- self._history: sc.DataArray | None = None
217
- self._cache: sc.DataArray | None = None
258
+ self._history: sc.DataArray
259
+ self._cache: sc.DataArray
260
+ self._cumulative: sc.DataArray
261
+ self.clear_counts()
262
+
263
+ @property
264
+ def max_window(self) -> int:
265
+ return self._window
218
266
 
219
- counts = self.bincount([])
267
+ @property
268
+ def cumulative(self) -> sc.DataArray:
269
+ return self._cumulative
270
+
271
+ def clear_counts(self) -> None:
272
+ """
273
+ Clear counts.
274
+
275
+ Overrides Detector.clear_counts, to properly clear sliding window history and
276
+ cache.
277
+ """
278
+ counts = sc.zeros_like(self.data)
220
279
  if self._projection is not None:
221
280
  counts = self._projection(counts)
222
281
  self._history = (
@@ -225,13 +284,71 @@ class RollingDetectorView(Detector):
225
284
  .copy()
226
285
  )
227
286
  self._cache = self._history.sum('window')
287
+ self._cumulative = sc.zeros_like(self._cache)
288
+
289
+ def make_roi_filter(self) -> roi.ROIFilter:
290
+ """Return a ROI filter operating via the projection plane of the view."""
291
+ norm = 1.0
292
+ if isinstance(self._projection, Histogrammer):
293
+ indices = self._projection.input_indices()
294
+ norm = self._projection.replicas
295
+ else:
296
+ indices = sc.ones(sizes=self.data.sizes, dtype='int32', unit=None)
297
+ indices = sc.cumsum(indices, mode='exclusive')
298
+ if isinstance(self._projection, LogicalView):
299
+ indices = self._projection(indices)
300
+ return roi.ROIFilter(indices=indices, norm=norm)
301
+
302
+ def transform_weights(
303
+ self,
304
+ weights: sc.Variable | sc.DataArray | None = None,
305
+ *,
306
+ threshold: float = 0.1,
307
+ ) -> sc.DataArray:
308
+ """
309
+ Transform raw pixel weights to the projection plane.
310
+
311
+ Parameters
312
+ ----------
313
+ weights:
314
+ Raw pixel weights to transform. If None, default weights of 1 are used.
315
+ threshold:
316
+ Threshold for identifying bins with a low weight. If the weight is below the
317
+ threshold times the median weight, the bin is marked as invalid. This is
318
+ relevant to avoid issues with color scales in plots, where noise in bins
319
+ with low weight may dominate the color scale if auto-scaling is used.
320
+ """
321
+ if weights is None:
322
+ weights = sc.ones(
323
+ sizes=self.detector_number.sizes, dtype='float32', unit=''
324
+ )
325
+ else:
326
+ if weights.sizes != self.detector_number.sizes:
327
+ raise sc.DimensionError(
328
+ f'Invalid {weights.sizes=} for {self.detector_number.sizes=}.'
329
+ )
330
+ if isinstance(weights, sc.DataArray):
331
+ if (det_num := weights.coords.get('detector_number')) is not None:
332
+ if not sc.identical(det_num, self.detector_number):
333
+ raise sc.CoordError("Mismatching detector numbers in weights.")
334
+ weights = weights.data
335
+ if isinstance(self._projection, Histogrammer):
336
+ xs = self._projection.apply_full(weights) # Use all replicas
337
+ elif self._projection is not None:
338
+ xs = self._projection(weights)
339
+ else:
340
+ xs = weights.copy()
341
+ nonempty = xs.values[xs.values > 0]
342
+ mask = xs.values < threshold * np.median(nonempty)
343
+ xs.values[mask] = np.nan
344
+ return xs if isinstance(xs, sc.DataArray) else sc.DataArray(xs)
228
345
 
229
346
  @staticmethod
230
347
  def from_detector_and_histogrammer(
231
348
  detector: CalibratedDetector[SampleRun],
232
349
  window: RollingDetectorViewWindow,
233
350
  projection: Histogrammer,
234
- ) -> 'RollingDetectorView':
351
+ ) -> RollingDetectorView:
235
352
  """Helper for constructing via a Sciline workflow."""
236
353
  return RollingDetectorView(
237
354
  detector_number=detector.coords['detector_number'],
@@ -244,7 +361,7 @@ class RollingDetectorView(Detector):
244
361
  detector: CalibratedDetector[SampleRun],
245
362
  window: RollingDetectorViewWindow,
246
363
  projection: LogicalView,
247
- ) -> 'RollingDetectorView':
364
+ ) -> RollingDetectorView:
248
365
  """Helper for constructing via a Sciline workflow."""
249
366
  return RollingDetectorView(
250
367
  detector_number=detector.coords['detector_number'],
@@ -261,7 +378,7 @@ class RollingDetectorView(Detector):
261
378
  projection: Literal['xy_plane', 'cylinder_mantle_z'] | LogicalView,
262
379
  resolution: dict[str, int] | None = None,
263
380
  pixel_noise: Literal['cylindrical'] | sc.Variable | None = None,
264
- ) -> 'RollingDetectorView':
381
+ ) -> RollingDetectorView:
265
382
  """
266
383
  Create a rolling detector view from a NeXus file using GenericNeXusWorkflow.
267
384
 
@@ -293,7 +410,7 @@ class RollingDetectorView(Detector):
293
410
  pixel_noise = sc.scalar(0.0, unit='m')
294
411
  noise_replica_count = 0
295
412
  else:
296
- noise_replica_count = 4
413
+ noise_replica_count = 16
297
414
  wf = GenericNeXusWorkflow(run_types=[SampleRun], monitor_types=[])
298
415
  wf[RollingDetectorViewWindow] = window
299
416
  if isinstance(projection, LogicalView):
@@ -329,7 +446,20 @@ class RollingDetectorView(Detector):
329
446
  wf[NeXusDetectorName] = detector_name
330
447
  return wf.compute(RollingDetectorView)
331
448
 
332
- def get(self, window: int | None = None) -> sc.DataArray:
449
+ def get(self, *, window: int | None = None) -> sc.DataArray:
450
+ """
451
+ Get the sum of counts over a window of the most recent counts.
452
+
453
+ Parameters
454
+ ----------
455
+ window:
456
+ Size of the window to use. If None, the full history is used.
457
+
458
+ Returns
459
+ -------
460
+ :
461
+ Sum of counts over the window.
462
+ """
333
463
  if window is not None and not 0 <= window <= self._window:
334
464
  raise ValueError("Window size must be less than the history size.")
335
465
  if window is None or window == self._window:
@@ -343,13 +473,38 @@ class RollingDetectorView(Detector):
343
473
  data += self._history['window', 0 : self._current].sum('window')
344
474
  return data
345
475
 
476
+ def add_events(self, data: sc.DataArray) -> None:
477
+ """
478
+ Add counts in the form of events grouped by pixel ID.
479
+
480
+ Parameters
481
+ ----------
482
+ data:
483
+ Events grouped by pixel ID, given by binned data.
484
+ """
485
+ counts = data.bins.size().to(dtype='int32', copy=False)
486
+ counts.unit = 'counts'
487
+ self._add_counts(counts)
488
+
346
489
  def add_counts(self, data: Sequence[int]) -> None:
490
+ """
491
+ Add counts in the form of a sequence of pixel IDs.
492
+
493
+ Parameters
494
+ ----------
495
+ data:
496
+ List of pixel IDs.
497
+ """
347
498
  counts = self.bincount(data)
499
+ self._add_counts(counts)
500
+
501
+ def _add_counts(self, counts: sc.Variable) -> None:
348
502
  if self._projection is not None:
349
503
  counts = self._projection(counts)
350
504
  self._cache -= self._history['window', self._current]
351
505
  self._history['window', self._current] = counts
352
506
  self._cache += counts
507
+ self._cumulative += counts
353
508
  self._current = (self._current + 1) % self._window
354
509
 
355
510
 
@@ -455,9 +610,12 @@ def position_noise_for_cylindrical_pixel(
455
610
 
456
611
 
457
612
  def gaussian_position_noise(sigma: PositionNoiseSigma) -> PositionNoise:
613
+ sigma = sigma.to(unit='m', copy=False)
458
614
  size = _noise_size
459
615
  position = sc.empty(sizes={'position': size}, unit='m', dtype=sc.DType.vector3)
460
- position.values = np.random.default_rng().normal(0, sigma.value, size=(size, 3))
616
+ position.values = np.random.default_rng(seed=1234).normal(
617
+ 0, sigma.value, size=(size, 3)
618
+ )
461
619
  return PositionNoise(position)
462
620
 
463
621
 
ess/reduce/live/roi.py ADDED
@@ -0,0 +1,115 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # Copyright (c) 2025 Scipp contributors (https://github.com/scipp)
3
+ """Utilities for region of interest (ROI) selection."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import TypeVar
8
+
9
+ import numpy as np
10
+ import scipp as sc
11
+
12
+
13
+ def select_indices_in_intervals(
14
+ intervals: sc.DataGroup[tuple[int, int] | tuple[sc.Variable, sc.Variable]],
15
+ indices: sc.Variable | sc.DataArray,
16
+ ) -> sc.Variable:
17
+ """
18
+ Return subset of indices that fall within the intervals.
19
+
20
+ Parameters
21
+ ----------
22
+ intervals:
23
+ DataGroup with dimension names as keys and tuples of low and high values. This
24
+ can be used to define a band or a rectangle to selected. When low and high are
25
+ scipp.Variable, the selection is done using label-based indexing. In this case
26
+ `indices` must be a DataArray with corresponding coordinates.
27
+ indices:
28
+ Variable or DataArray with indices to select from. If binned data the selected
29
+ indices will be returned concatenated into a dense array.
30
+ """
31
+ out_dim = 'index'
32
+ for dim, bounds in intervals.items():
33
+ low, high = sorted(bounds)
34
+ indices = indices[dim, low:high]
35
+ indices = indices.flatten(to=out_dim)
36
+ if indices.bins is None:
37
+ return indices
38
+ indices = indices.bins.concat().value
39
+ return indices.rename_dims({indices.dim: out_dim})
40
+
41
+
42
+ T = TypeVar('T', sc.DataArray, sc.Variable)
43
+
44
+
45
+ def apply_selection(
46
+ data: T, *, selection: sc.Variable, norm: float = 1.0
47
+ ) -> tuple[T, sc.Variable]:
48
+ """
49
+ Apply selection to data.
50
+
51
+ Parameters
52
+ ----------
53
+ data:
54
+ Data to filter.
55
+ selection:
56
+ Variable with indices to select.
57
+ norm:
58
+ Normalization factor to apply to the selected data. This is used for cases where
59
+ indices may be selected multiple times.
60
+
61
+ Returns
62
+ -------
63
+ :
64
+ Filtered data and scale factor.
65
+ """
66
+ indices, counts = np.unique(selection.values, return_counts=True)
67
+ if data.ndim != 1:
68
+ data = data.flatten(to='detector_number')
69
+ scale = sc.array(dims=[data.dim], values=counts) / norm
70
+ return data[indices], scale
71
+
72
+
73
+ class ROIFilter:
74
+ """Filter for selecting a region of interest (ROI)."""
75
+
76
+ def __init__(self, indices: sc.Variable | sc.DataArray, norm: float = 1.0) -> None:
77
+ """
78
+ Create a new ROI filter.
79
+
80
+ Parameters
81
+ ----------
82
+ indices:
83
+ Variable with indices to filter. The indices facilitate selecting a 2-D
84
+ ROI in a projection of a 3-D dataset. Typically the indices are given by a
85
+ 2-D array. Each element in the array may correspond to a single index (when
86
+ there is no projection) or a list of indices that were projected into an
87
+ output pixel.
88
+ """
89
+ self._indices = indices
90
+ self._selection = sc.array(dims=['index'], values=[])
91
+ self._norm = norm
92
+
93
+ def set_roi_from_intervals(self, intervals: sc.DataGroup) -> None:
94
+ """Set the ROI from (typically 1 or 2) intervals."""
95
+ self._selection = select_indices_in_intervals(intervals, self._indices)
96
+
97
+ def apply(self, data: T) -> tuple[T, sc.Variable]:
98
+ """
99
+ Apply the ROI filter to data.
100
+
101
+ The returned scale factor can be used to handle filtering via a projection, to
102
+ take into account that fractions of source data point contribute to a data point
103
+ in the projection.
104
+
105
+ Parameters
106
+ ----------
107
+ data:
108
+ Data to filter.
109
+
110
+ Returns
111
+ -------
112
+ :
113
+ Filtered data and scale factor.
114
+ """
115
+ return apply_selection(data, selection=self._selection, norm=self._norm)
@@ -13,23 +13,23 @@ The submodule :mod:`types` defines all domain types.
13
13
  """
14
14
 
15
15
  from . import types
16
- from .workflow import GenericNeXusWorkflow
17
16
  from ._nexus_loader import (
18
- load_data,
19
- group_event_data,
20
- load_component,
21
- load_all_components,
22
17
  compute_component_position,
23
18
  extract_signal_data_array,
19
+ group_event_data,
20
+ load_all_components,
21
+ load_component,
22
+ load_data,
24
23
  )
24
+ from .workflow import GenericNeXusWorkflow
25
25
 
26
26
  __all__ = [
27
- 'types',
27
+ 'GenericNeXusWorkflow',
28
+ 'compute_component_position',
29
+ 'extract_signal_data_array',
28
30
  'group_event_data',
29
31
  'load_all_components',
30
- 'load_data',
31
32
  'load_component',
32
- 'compute_component_position',
33
- 'extract_signal_data_array',
34
- 'GenericNeXusWorkflow',
33
+ 'load_data',
34
+ 'types',
35
35
  ]
@@ -30,6 +30,14 @@ class NoNewDefinitionsType: ...
30
30
  NoNewDefinitions = NoNewDefinitionsType()
31
31
 
32
32
 
33
+ class NoLockingIfNeededType:
34
+ def __repr__(self) -> str:
35
+ return "NoLockingIfNeeded"
36
+
37
+
38
+ NoLockingIfNeeded = NoLockingIfNeededType()
39
+
40
+
33
41
  def load_component(
34
42
  location: NeXusLocationSpec,
35
43
  *,
@@ -93,7 +101,7 @@ def _open_nexus_file(
93
101
  file_path: FilePath | NeXusFile | NeXusGroup,
94
102
  definitions: Mapping | None | NoNewDefinitionsType = NoNewDefinitions,
95
103
  *,
96
- locking: bool | None = None,
104
+ locking: bool | str | None | NoLockingIfNeededType = NoLockingIfNeeded,
97
105
  ) -> AbstractContextManager[snx.Group]:
98
106
  if isinstance(file_path, getattr(NeXusGroup, '__supertype__', type(None))):
99
107
  if (
@@ -106,32 +114,63 @@ def _open_nexus_file(
106
114
  return nullcontext(file_path)
107
115
 
108
116
  try:
109
- return _open_nexus_file_from_path(file_path, definitions, locking=locking)
117
+ return _open_nexus_file_from_path(
118
+ file_path,
119
+ definitions,
120
+ locking=None if locking is NoLockingIfNeeded else locking,
121
+ )
110
122
  except OSError as err:
111
- if err.errno == errno.EROFS:
112
- # Failed to open because the filesystem is read-only.
113
- # (According to https://www.ioplex.com/%7Emiallen/errcmpp.html
114
- # this error code is universal.)
115
- #
116
- # On ESS machines, this happens for network filesystems of data that was
117
- # ingested into SciCat, including raw data.
118
- # In this case, it is safe to open the file without locking because:
119
- # - For raw files, they were written on a separate machine and are synced
120
- # with the one running reduction software. So there cannot be concurrent
121
- # write and read accesses to the same file on the same filesystem.
122
- # The ground truth on the filesystem used by the file writer is protected
123
- # and cannot be corrupted by our reader.
124
- # - For processed data, the file was copied to the read-only filesystem.
125
- # So the copy we are opening was not written by HDF5 directly and thus
126
- # locking has no effect anyway.
127
- #
128
- # When running on user machines, disabling locking can potentially corrupt
129
- # files. But the risk is minimal because very few users will have read-only
130
- # filesystems and do concurrent reads and writes.
123
+ if _attempt_to_open_without_locking(err, locking):
131
124
  return _open_nexus_file_from_path(file_path, definitions, locking=False)
132
125
  raise
133
126
 
134
127
 
128
+ # On ESS machines, some network filesystems are read-only.
129
+ # E.g., data that was ingested into SciCat, including raw data.
130
+ # HDF5 fails to open such files because it cannot lock the files.
131
+ # In this case, it is safe(*) to open the file without locking because:
132
+ #
133
+ # - For raw files, they were written on a separate machine and are synced
134
+ # with the one running reduction software. So there cannot be concurrent
135
+ # write and read accesses to the same file on the same filesystem.
136
+ # The ground truth on the filesystem used by the file writer is protected
137
+ # and cannot be corrupted by our reader.
138
+ # - For processed data, the file was copied to the read-only filesystem.
139
+ # So the copy we are opening was not written by HDF5 directly and thus
140
+ # locking has no effect anyway.
141
+ #
142
+ # When running on user machines, disabling locking can potentially corrupt
143
+ # files. But the risk is minimal because very few users will have read-only
144
+ # filesystems and do concurrent reads and writes.
145
+ #
146
+ # (*) Files on the read-only filesystem may still change while a file is open for
147
+ # reading if they get updated from the original file. E.g., when reading a file that is
148
+ # currently being written to. This can crash the reader. But our code is anyway not set
149
+ # up to deal with changing files, so the added risk is not significant.
150
+ #
151
+ # See https://github.com/HDFGroup/hdf5/blob/e9ab45f0f4d7240937d5f88055f6c217da80f0d4/doxygen/dox/file-locking.dox
152
+ # about HDF5 file locking.
153
+ def _attempt_to_open_without_locking(
154
+ err: OSError, locking: bool | str | None | NoLockingIfNeededType
155
+ ) -> bool:
156
+ if locking is not NoLockingIfNeeded:
157
+ return False # Respect user's choice.
158
+ if err.errno == errno.EROFS:
159
+ # Read-only filesystem.
160
+ # (According to https://www.ioplex.com/%7Emiallen/errcmpp.html
161
+ # this error code is universal.)
162
+ return True
163
+
164
+ # HDF5 tracks file locking flags internally within a single process.
165
+ # If the same file is opened multiple times, we can get a flag mismatch.
166
+ # We can try opening without locking, maybe this matches the original flags.
167
+ if "file locking flag values don't match" in err.args[0]:
168
+ return True
169
+ if "file locking 'ignore disabled locks' flag values don't match" in err.args[0]:
170
+ return True
171
+ return False
172
+
173
+
135
174
  def _open_nexus_file_from_path(
136
175
  file_path: FilePath,
137
176
  definitions: Mapping | None | NoNewDefinitionsType,
ess/reduce/streaming.py CHANGED
@@ -4,6 +4,7 @@
4
4
 
5
5
  from abc import ABC, abstractmethod
6
6
  from collections.abc import Callable
7
+ from copy import deepcopy
7
8
  from typing import Any, Generic, TypeVar
8
9
 
9
10
  import networkx as nx
@@ -29,6 +30,8 @@ def maybe_hist(value: T) -> T:
29
30
  :
30
31
  Histogram.
31
32
  """
33
+ if not isinstance(value, sc.Variable | sc.DataArray):
34
+ return value
32
35
  return value if value.bins is None else value.hist()
33
36
 
34
37
 
@@ -90,11 +93,11 @@ class EternalAccumulator(Accumulator[T]):
90
93
 
91
94
  @property
92
95
  def value(self) -> T:
93
- return self._value.copy()
96
+ return deepcopy(self._value)
94
97
 
95
98
  def _do_push(self, value: T) -> None:
96
99
  if self._value is None:
97
- self._value = value.copy()
100
+ self._value = deepcopy(value)
98
101
  else:
99
102
  self._value += value
100
103
 
@@ -146,6 +149,7 @@ class StreamProcessor:
146
149
  target_keys: tuple[sciline.typing.Key, ...],
147
150
  accumulators: dict[sciline.typing.Key, Accumulator, Callable[..., Accumulator]]
148
151
  | tuple[sciline.typing.Key, ...],
152
+ allow_bypass: bool = False,
149
153
  ) -> None:
150
154
  """
151
155
  Create a stream processor.
@@ -163,6 +167,12 @@ class StreamProcessor:
163
167
  passed, :py:class:`EternalAccumulator` is used for all keys. Otherwise, a
164
168
  dict mapping keys to accumulator instances can be passed. If a dict value is
165
169
  a callable, base_workflow.bind_and_call(value) is used to make an instance.
170
+ allow_bypass:
171
+ If True, allow bypassing accumulators for keys that are not in the
172
+ accumulators dict. This is useful for dynamic keys that are not "terminated"
173
+ in any accumulator. USE WITH CARE! This will lead to incorrect results
174
+ unless the values for these keys are valid for all chunks comprised in the
175
+ final accumulators at the point where :py:meth:`finalize` is called.
166
176
  """
167
177
  workflow = sciline.Pipeline()
168
178
  for key in target_keys:
@@ -201,19 +211,59 @@ class StreamProcessor:
201
211
  for key, value in self._accumulators.items()
202
212
  }
203
213
  self._target_keys = target_keys
214
+ self._allow_bypass = allow_bypass
204
215
 
205
216
  def add_chunk(
206
217
  self, chunks: dict[sciline.typing.Key, Any]
207
218
  ) -> dict[sciline.typing.Key, Any]:
219
+ """
220
+ Legacy interface for accumulating values from chunks and finalizing the result.
221
+
222
+ It is recommended to use :py:meth:`accumulate` and :py:meth:`finalize` instead.
223
+
224
+ Parameters
225
+ ----------
226
+ chunks:
227
+ Chunks to be processed.
228
+
229
+ Returns
230
+ -------
231
+ :
232
+ Finalized result.
233
+ """
234
+ self.accumulate(chunks)
235
+ return self.finalize()
236
+
237
+ def accumulate(self, chunks: dict[sciline.typing.Key, Any]) -> None:
238
+ """
239
+ Accumulate values from chunks without finalizing the result.
240
+
241
+ Parameters
242
+ ----------
243
+ chunks:
244
+ Chunks to be processed.
245
+ """
208
246
  for key, value in chunks.items():
209
247
  self._process_chunk_workflow[key] = value
210
248
  # There can be dynamic keys that do not "terminate" in any accumulator. In
211
249
  # that case, we need to make sure they can be and are used when computing
212
250
  # the target keys.
213
- self._finalize_workflow[key] = value
251
+ if self._allow_bypass:
252
+ self._finalize_workflow[key] = value
214
253
  to_accumulate = self._process_chunk_workflow.compute(self._accumulators)
215
254
  for key, processed in to_accumulate.items():
216
255
  self._accumulators[key].push(processed)
256
+
257
+ def finalize(self) -> dict[sciline.typing.Key, Any]:
258
+ """
259
+ Get the final result by computing the target keys based on accumulated values.
260
+
261
+ Returns
262
+ -------
263
+ :
264
+ Finalized result.
265
+ """
266
+ for key in self._accumulators:
217
267
  self._finalize_workflow[key] = self._accumulators[key].value
218
268
  return self._finalize_workflow.compute(self._target_keys)
219
269