essreduce 25.1.1__py3-none-any.whl → 25.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ess/reduce/__init__.py +4 -4
- ess/reduce/live/raw.py +173 -15
- ess/reduce/live/roi.py +115 -0
- ess/reduce/nexus/__init__.py +10 -10
- ess/reduce/nexus/_nexus_loader.py +61 -22
- ess/reduce/streaming.py +53 -3
- ess/reduce/time_of_flight/__init__.py +4 -18
- ess/reduce/time_of_flight/fakes.py +29 -47
- ess/reduce/time_of_flight/simulation.py +6 -3
- ess/reduce/time_of_flight/to_events.py +12 -7
- ess/reduce/time_of_flight/toa_to_tof.py +361 -353
- ess/reduce/time_of_flight/types.py +13 -53
- ess/reduce/widgets/__init__.py +9 -10
- {essreduce-25.1.1.dist-info → essreduce-25.2.1.dist-info}/METADATA +1 -1
- {essreduce-25.1.1.dist-info → essreduce-25.2.1.dist-info}/RECORD +19 -18
- {essreduce-25.1.1.dist-info → essreduce-25.2.1.dist-info}/LICENSE +0 -0
- {essreduce-25.1.1.dist-info → essreduce-25.2.1.dist-info}/WHEEL +0 -0
- {essreduce-25.1.1.dist-info → essreduce-25.2.1.dist-info}/entry_points.txt +0 -0
- {essreduce-25.1.1.dist-info → essreduce-25.2.1.dist-info}/top_level.txt +0 -0
ess/reduce/__init__.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# SPDX-License-Identifier: BSD-3-Clause
|
|
2
|
-
# Copyright (c)
|
|
3
|
-
# ruff: noqa: E402, F401
|
|
2
|
+
# Copyright (c) 2025 Scipp contributors (https://github.com/scipp)
|
|
3
|
+
# ruff: noqa: E402, F401, I
|
|
4
4
|
|
|
5
5
|
import importlib.metadata
|
|
6
6
|
|
|
7
|
-
from . import nexus,
|
|
7
|
+
from . import nexus, time_of_flight, uncertainty
|
|
8
8
|
|
|
9
9
|
try:
|
|
10
10
|
__version__ = importlib.metadata.version("essreduce")
|
|
@@ -13,4 +13,4 @@ except importlib.metadata.PackageNotFoundError:
|
|
|
13
13
|
|
|
14
14
|
del importlib
|
|
15
15
|
|
|
16
|
-
__all__ = ["nexus", "
|
|
16
|
+
__all__ = ["nexus", "time_of_flight", "uncertainty"]
|
ess/reduce/live/raw.py
CHANGED
|
@@ -19,6 +19,8 @@ options:
|
|
|
19
19
|
flatten dimensions of the data.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
22
24
|
from collections.abc import Callable, Sequence
|
|
23
25
|
from dataclasses import dataclass, field
|
|
24
26
|
from math import ceil
|
|
@@ -38,6 +40,8 @@ from ess.reduce.nexus.types import (
|
|
|
38
40
|
)
|
|
39
41
|
from ess.reduce.nexus.workflow import GenericNeXusWorkflow
|
|
40
42
|
|
|
43
|
+
from . import roi
|
|
44
|
+
|
|
41
45
|
CalibratedPositionWithNoisyReplicas = NewType(
|
|
42
46
|
'CalibratedPositionWithNoisyReplicas', sc.Variable
|
|
43
47
|
)
|
|
@@ -73,10 +77,14 @@ class Histogrammer:
|
|
|
73
77
|
self._coords = coords
|
|
74
78
|
self._edges = edges
|
|
75
79
|
|
|
80
|
+
@property
|
|
81
|
+
def replicas(self) -> int:
|
|
82
|
+
return self._replicas
|
|
83
|
+
|
|
76
84
|
@staticmethod
|
|
77
85
|
def from_coords(
|
|
78
86
|
coords: ProjectedCoords, resolution: DetectorViewResolution
|
|
79
|
-
) ->
|
|
87
|
+
) -> Histogrammer:
|
|
80
88
|
"""
|
|
81
89
|
Create a histogrammer from coordinates and resolution.
|
|
82
90
|
|
|
@@ -102,7 +110,34 @@ class Histogrammer:
|
|
|
102
110
|
def __call__(self, da: sc.DataArray) -> sc.DataArray:
|
|
103
111
|
self._current += 1
|
|
104
112
|
coords = self._coords[self._replica_dim, self._current % self._replicas]
|
|
105
|
-
return
|
|
113
|
+
return self._hist(da.data, coords=coords)
|
|
114
|
+
|
|
115
|
+
def _hist(self, data: sc.Variable, *, coords: sc.DataGroup) -> sc.DataArray:
|
|
116
|
+
# If input is multi-dim we need to flatten since those dims cannot be preserved.
|
|
117
|
+
return sc.DataArray(data, coords=coords).flatten(to='_').hist(self._edges)
|
|
118
|
+
|
|
119
|
+
def input_indices(self) -> sc.DataArray:
|
|
120
|
+
"""Return an array with input indices corresponding to each histogram bin."""
|
|
121
|
+
dim = 'detector_number'
|
|
122
|
+
# For some projections one of the coords is a scalar, convert to flat table.
|
|
123
|
+
coords = self._coords.broadcast(sizes=self._coords.sizes).flatten(to=dim).copy()
|
|
124
|
+
ndet = sc.index(coords.sizes[dim] // self._replicas)
|
|
125
|
+
da = sc.DataArray(
|
|
126
|
+
sc.arange(dim, coords.sizes[dim], dtype='int64', unit=None) % ndet,
|
|
127
|
+
coords=coords,
|
|
128
|
+
)
|
|
129
|
+
return sc.DataArray(da.bin(self._edges).bins.data, coords=self._edges)
|
|
130
|
+
|
|
131
|
+
def apply_full(self, var: sc.Variable) -> sc.DataArray:
|
|
132
|
+
"""
|
|
133
|
+
Apply the histogrammer to a variable using all replicas.
|
|
134
|
+
|
|
135
|
+
This is used for one-off operations where the full data is needed, e.g., for
|
|
136
|
+
transforming pixel weights. Compare to :py:meth:`__call__`, which applies the
|
|
137
|
+
histogrammer to a single replica for efficiency.
|
|
138
|
+
"""
|
|
139
|
+
replicated = sc.concat([var] * self.replicas, dim=self._replica_dim)
|
|
140
|
+
return self._hist(replicated, coords=self._coords) / self.replicas
|
|
106
141
|
|
|
107
142
|
|
|
108
143
|
@dataclass
|
|
@@ -149,20 +184,27 @@ class Detector:
|
|
|
149
184
|
sc.zeros(sizes=detector_number.sizes, unit='counts', dtype='int32'),
|
|
150
185
|
coords={'detector_number': detector_number},
|
|
151
186
|
)
|
|
187
|
+
self._detector_number = detector_number
|
|
152
188
|
self._flat_detector_number = detector_number.flatten(to='event_id')
|
|
153
189
|
self._start = int(self._flat_detector_number[0].value)
|
|
154
190
|
self._stop = int(self._flat_detector_number[-1].value)
|
|
155
191
|
self._size = int(self._flat_detector_number.size)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
192
|
+
self._sorted = sc.issorted(self._flat_detector_number, dim='event_id')
|
|
193
|
+
self._consecutive = self._stop - self._start + 1 == self._size
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def detector_number(self) -> sc.Variable:
|
|
197
|
+
return self._detector_number
|
|
160
198
|
|
|
161
199
|
@property
|
|
162
200
|
def data(self) -> sc.DataArray:
|
|
163
201
|
return self._data
|
|
164
202
|
|
|
165
203
|
def bincount(self, data: Sequence[int]) -> sc.DataArray:
|
|
204
|
+
if not self._sorted:
|
|
205
|
+
raise ValueError("Detector numbers must be sorted to use `bincount`.")
|
|
206
|
+
if not self._consecutive:
|
|
207
|
+
raise ValueError("Detector numbers must be consecutive to use `bincount`.")
|
|
166
208
|
offset = np.asarray(data, dtype=np.int32) - self._start
|
|
167
209
|
# Ignore events with detector numbers outside the range of the detector. This
|
|
168
210
|
# should not happen in valid files but for now it is useful until we are sure
|
|
@@ -213,10 +255,27 @@ class RollingDetectorView(Detector):
|
|
|
213
255
|
self._projection = projection
|
|
214
256
|
self._window = window
|
|
215
257
|
self._current = 0
|
|
216
|
-
self._history: sc.DataArray
|
|
217
|
-
self._cache: sc.DataArray
|
|
258
|
+
self._history: sc.DataArray
|
|
259
|
+
self._cache: sc.DataArray
|
|
260
|
+
self._cumulative: sc.DataArray
|
|
261
|
+
self.clear_counts()
|
|
262
|
+
|
|
263
|
+
@property
|
|
264
|
+
def max_window(self) -> int:
|
|
265
|
+
return self._window
|
|
218
266
|
|
|
219
|
-
|
|
267
|
+
@property
|
|
268
|
+
def cumulative(self) -> sc.DataArray:
|
|
269
|
+
return self._cumulative
|
|
270
|
+
|
|
271
|
+
def clear_counts(self) -> None:
|
|
272
|
+
"""
|
|
273
|
+
Clear counts.
|
|
274
|
+
|
|
275
|
+
Overrides Detector.clear_counts, to properly clear sliding window history and
|
|
276
|
+
cache.
|
|
277
|
+
"""
|
|
278
|
+
counts = sc.zeros_like(self.data)
|
|
220
279
|
if self._projection is not None:
|
|
221
280
|
counts = self._projection(counts)
|
|
222
281
|
self._history = (
|
|
@@ -225,13 +284,71 @@ class RollingDetectorView(Detector):
|
|
|
225
284
|
.copy()
|
|
226
285
|
)
|
|
227
286
|
self._cache = self._history.sum('window')
|
|
287
|
+
self._cumulative = sc.zeros_like(self._cache)
|
|
288
|
+
|
|
289
|
+
def make_roi_filter(self) -> roi.ROIFilter:
|
|
290
|
+
"""Return a ROI filter operating via the projection plane of the view."""
|
|
291
|
+
norm = 1.0
|
|
292
|
+
if isinstance(self._projection, Histogrammer):
|
|
293
|
+
indices = self._projection.input_indices()
|
|
294
|
+
norm = self._projection.replicas
|
|
295
|
+
else:
|
|
296
|
+
indices = sc.ones(sizes=self.data.sizes, dtype='int32', unit=None)
|
|
297
|
+
indices = sc.cumsum(indices, mode='exclusive')
|
|
298
|
+
if isinstance(self._projection, LogicalView):
|
|
299
|
+
indices = self._projection(indices)
|
|
300
|
+
return roi.ROIFilter(indices=indices, norm=norm)
|
|
301
|
+
|
|
302
|
+
def transform_weights(
|
|
303
|
+
self,
|
|
304
|
+
weights: sc.Variable | sc.DataArray | None = None,
|
|
305
|
+
*,
|
|
306
|
+
threshold: float = 0.1,
|
|
307
|
+
) -> sc.DataArray:
|
|
308
|
+
"""
|
|
309
|
+
Transform raw pixel weights to the projection plane.
|
|
310
|
+
|
|
311
|
+
Parameters
|
|
312
|
+
----------
|
|
313
|
+
weights:
|
|
314
|
+
Raw pixel weights to transform. If None, default weights of 1 are used.
|
|
315
|
+
threshold:
|
|
316
|
+
Threshold for identifying bins with a low weight. If the weight is below the
|
|
317
|
+
threshold times the median weight, the bin is marked as invalid. This is
|
|
318
|
+
relevant to avoid issues with color scales in plots, where noise in bins
|
|
319
|
+
with low weight may dominate the color scale if auto-scaling is used.
|
|
320
|
+
"""
|
|
321
|
+
if weights is None:
|
|
322
|
+
weights = sc.ones(
|
|
323
|
+
sizes=self.detector_number.sizes, dtype='float32', unit=''
|
|
324
|
+
)
|
|
325
|
+
else:
|
|
326
|
+
if weights.sizes != self.detector_number.sizes:
|
|
327
|
+
raise sc.DimensionError(
|
|
328
|
+
f'Invalid {weights.sizes=} for {self.detector_number.sizes=}.'
|
|
329
|
+
)
|
|
330
|
+
if isinstance(weights, sc.DataArray):
|
|
331
|
+
if (det_num := weights.coords.get('detector_number')) is not None:
|
|
332
|
+
if not sc.identical(det_num, self.detector_number):
|
|
333
|
+
raise sc.CoordError("Mismatching detector numbers in weights.")
|
|
334
|
+
weights = weights.data
|
|
335
|
+
if isinstance(self._projection, Histogrammer):
|
|
336
|
+
xs = self._projection.apply_full(weights) # Use all replicas
|
|
337
|
+
elif self._projection is not None:
|
|
338
|
+
xs = self._projection(weights)
|
|
339
|
+
else:
|
|
340
|
+
xs = weights.copy()
|
|
341
|
+
nonempty = xs.values[xs.values > 0]
|
|
342
|
+
mask = xs.values < threshold * np.median(nonempty)
|
|
343
|
+
xs.values[mask] = np.nan
|
|
344
|
+
return xs if isinstance(xs, sc.DataArray) else sc.DataArray(xs)
|
|
228
345
|
|
|
229
346
|
@staticmethod
|
|
230
347
|
def from_detector_and_histogrammer(
|
|
231
348
|
detector: CalibratedDetector[SampleRun],
|
|
232
349
|
window: RollingDetectorViewWindow,
|
|
233
350
|
projection: Histogrammer,
|
|
234
|
-
) ->
|
|
351
|
+
) -> RollingDetectorView:
|
|
235
352
|
"""Helper for constructing via a Sciline workflow."""
|
|
236
353
|
return RollingDetectorView(
|
|
237
354
|
detector_number=detector.coords['detector_number'],
|
|
@@ -244,7 +361,7 @@ class RollingDetectorView(Detector):
|
|
|
244
361
|
detector: CalibratedDetector[SampleRun],
|
|
245
362
|
window: RollingDetectorViewWindow,
|
|
246
363
|
projection: LogicalView,
|
|
247
|
-
) ->
|
|
364
|
+
) -> RollingDetectorView:
|
|
248
365
|
"""Helper for constructing via a Sciline workflow."""
|
|
249
366
|
return RollingDetectorView(
|
|
250
367
|
detector_number=detector.coords['detector_number'],
|
|
@@ -261,7 +378,7 @@ class RollingDetectorView(Detector):
|
|
|
261
378
|
projection: Literal['xy_plane', 'cylinder_mantle_z'] | LogicalView,
|
|
262
379
|
resolution: dict[str, int] | None = None,
|
|
263
380
|
pixel_noise: Literal['cylindrical'] | sc.Variable | None = None,
|
|
264
|
-
) ->
|
|
381
|
+
) -> RollingDetectorView:
|
|
265
382
|
"""
|
|
266
383
|
Create a rolling detector view from a NeXus file using GenericNeXusWorkflow.
|
|
267
384
|
|
|
@@ -293,7 +410,7 @@ class RollingDetectorView(Detector):
|
|
|
293
410
|
pixel_noise = sc.scalar(0.0, unit='m')
|
|
294
411
|
noise_replica_count = 0
|
|
295
412
|
else:
|
|
296
|
-
noise_replica_count =
|
|
413
|
+
noise_replica_count = 16
|
|
297
414
|
wf = GenericNeXusWorkflow(run_types=[SampleRun], monitor_types=[])
|
|
298
415
|
wf[RollingDetectorViewWindow] = window
|
|
299
416
|
if isinstance(projection, LogicalView):
|
|
@@ -329,7 +446,20 @@ class RollingDetectorView(Detector):
|
|
|
329
446
|
wf[NeXusDetectorName] = detector_name
|
|
330
447
|
return wf.compute(RollingDetectorView)
|
|
331
448
|
|
|
332
|
-
def get(self, window: int | None = None) -> sc.DataArray:
|
|
449
|
+
def get(self, *, window: int | None = None) -> sc.DataArray:
|
|
450
|
+
"""
|
|
451
|
+
Get the sum of counts over a window of the most recent counts.
|
|
452
|
+
|
|
453
|
+
Parameters
|
|
454
|
+
----------
|
|
455
|
+
window:
|
|
456
|
+
Size of the window to use. If None, the full history is used.
|
|
457
|
+
|
|
458
|
+
Returns
|
|
459
|
+
-------
|
|
460
|
+
:
|
|
461
|
+
Sum of counts over the window.
|
|
462
|
+
"""
|
|
333
463
|
if window is not None and not 0 <= window <= self._window:
|
|
334
464
|
raise ValueError("Window size must be less than the history size.")
|
|
335
465
|
if window is None or window == self._window:
|
|
@@ -343,13 +473,38 @@ class RollingDetectorView(Detector):
|
|
|
343
473
|
data += self._history['window', 0 : self._current].sum('window')
|
|
344
474
|
return data
|
|
345
475
|
|
|
476
|
+
def add_events(self, data: sc.DataArray) -> None:
|
|
477
|
+
"""
|
|
478
|
+
Add counts in the form of events grouped by pixel ID.
|
|
479
|
+
|
|
480
|
+
Parameters
|
|
481
|
+
----------
|
|
482
|
+
data:
|
|
483
|
+
Events grouped by pixel ID, given by binned data.
|
|
484
|
+
"""
|
|
485
|
+
counts = data.bins.size().to(dtype='int32', copy=False)
|
|
486
|
+
counts.unit = 'counts'
|
|
487
|
+
self._add_counts(counts)
|
|
488
|
+
|
|
346
489
|
def add_counts(self, data: Sequence[int]) -> None:
|
|
490
|
+
"""
|
|
491
|
+
Add counts in the form of a sequence of pixel IDs.
|
|
492
|
+
|
|
493
|
+
Parameters
|
|
494
|
+
----------
|
|
495
|
+
data:
|
|
496
|
+
List of pixel IDs.
|
|
497
|
+
"""
|
|
347
498
|
counts = self.bincount(data)
|
|
499
|
+
self._add_counts(counts)
|
|
500
|
+
|
|
501
|
+
def _add_counts(self, counts: sc.Variable) -> None:
|
|
348
502
|
if self._projection is not None:
|
|
349
503
|
counts = self._projection(counts)
|
|
350
504
|
self._cache -= self._history['window', self._current]
|
|
351
505
|
self._history['window', self._current] = counts
|
|
352
506
|
self._cache += counts
|
|
507
|
+
self._cumulative += counts
|
|
353
508
|
self._current = (self._current + 1) % self._window
|
|
354
509
|
|
|
355
510
|
|
|
@@ -455,9 +610,12 @@ def position_noise_for_cylindrical_pixel(
|
|
|
455
610
|
|
|
456
611
|
|
|
457
612
|
def gaussian_position_noise(sigma: PositionNoiseSigma) -> PositionNoise:
|
|
613
|
+
sigma = sigma.to(unit='m', copy=False)
|
|
458
614
|
size = _noise_size
|
|
459
615
|
position = sc.empty(sizes={'position': size}, unit='m', dtype=sc.DType.vector3)
|
|
460
|
-
position.values = np.random.default_rng().normal(
|
|
616
|
+
position.values = np.random.default_rng(seed=1234).normal(
|
|
617
|
+
0, sigma.value, size=(size, 3)
|
|
618
|
+
)
|
|
461
619
|
return PositionNoise(position)
|
|
462
620
|
|
|
463
621
|
|
ess/reduce/live/roi.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
|
2
|
+
# Copyright (c) 2025 Scipp contributors (https://github.com/scipp)
|
|
3
|
+
"""Utilities for region of interest (ROI) selection."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import TypeVar
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import scipp as sc
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def select_indices_in_intervals(
|
|
14
|
+
intervals: sc.DataGroup[tuple[int, int] | tuple[sc.Variable, sc.Variable]],
|
|
15
|
+
indices: sc.Variable | sc.DataArray,
|
|
16
|
+
) -> sc.Variable:
|
|
17
|
+
"""
|
|
18
|
+
Return subset of indices that fall within the intervals.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
intervals:
|
|
23
|
+
DataGroup with dimension names as keys and tuples of low and high values. This
|
|
24
|
+
can be used to define a band or a rectangle to selected. When low and high are
|
|
25
|
+
scipp.Variable, the selection is done using label-based indexing. In this case
|
|
26
|
+
`indices` must be a DataArray with corresponding coordinates.
|
|
27
|
+
indices:
|
|
28
|
+
Variable or DataArray with indices to select from. If binned data the selected
|
|
29
|
+
indices will be returned concatenated into a dense array.
|
|
30
|
+
"""
|
|
31
|
+
out_dim = 'index'
|
|
32
|
+
for dim, bounds in intervals.items():
|
|
33
|
+
low, high = sorted(bounds)
|
|
34
|
+
indices = indices[dim, low:high]
|
|
35
|
+
indices = indices.flatten(to=out_dim)
|
|
36
|
+
if indices.bins is None:
|
|
37
|
+
return indices
|
|
38
|
+
indices = indices.bins.concat().value
|
|
39
|
+
return indices.rename_dims({indices.dim: out_dim})
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
T = TypeVar('T', sc.DataArray, sc.Variable)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def apply_selection(
|
|
46
|
+
data: T, *, selection: sc.Variable, norm: float = 1.0
|
|
47
|
+
) -> tuple[T, sc.Variable]:
|
|
48
|
+
"""
|
|
49
|
+
Apply selection to data.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
data:
|
|
54
|
+
Data to filter.
|
|
55
|
+
selection:
|
|
56
|
+
Variable with indices to select.
|
|
57
|
+
norm:
|
|
58
|
+
Normalization factor to apply to the selected data. This is used for cases where
|
|
59
|
+
indices may be selected multiple times.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
:
|
|
64
|
+
Filtered data and scale factor.
|
|
65
|
+
"""
|
|
66
|
+
indices, counts = np.unique(selection.values, return_counts=True)
|
|
67
|
+
if data.ndim != 1:
|
|
68
|
+
data = data.flatten(to='detector_number')
|
|
69
|
+
scale = sc.array(dims=[data.dim], values=counts) / norm
|
|
70
|
+
return data[indices], scale
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class ROIFilter:
|
|
74
|
+
"""Filter for selecting a region of interest (ROI)."""
|
|
75
|
+
|
|
76
|
+
def __init__(self, indices: sc.Variable | sc.DataArray, norm: float = 1.0) -> None:
|
|
77
|
+
"""
|
|
78
|
+
Create a new ROI filter.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
indices:
|
|
83
|
+
Variable with indices to filter. The indices facilitate selecting a 2-D
|
|
84
|
+
ROI in a projection of a 3-D dataset. Typically the indices are given by a
|
|
85
|
+
2-D array. Each element in the array may correspond to a single index (when
|
|
86
|
+
there is no projection) or a list of indices that were projected into an
|
|
87
|
+
output pixel.
|
|
88
|
+
"""
|
|
89
|
+
self._indices = indices
|
|
90
|
+
self._selection = sc.array(dims=['index'], values=[])
|
|
91
|
+
self._norm = norm
|
|
92
|
+
|
|
93
|
+
def set_roi_from_intervals(self, intervals: sc.DataGroup) -> None:
|
|
94
|
+
"""Set the ROI from (typically 1 or 2) intervals."""
|
|
95
|
+
self._selection = select_indices_in_intervals(intervals, self._indices)
|
|
96
|
+
|
|
97
|
+
def apply(self, data: T) -> tuple[T, sc.Variable]:
|
|
98
|
+
"""
|
|
99
|
+
Apply the ROI filter to data.
|
|
100
|
+
|
|
101
|
+
The returned scale factor can be used to handle filtering via a projection, to
|
|
102
|
+
take into account that fractions of source data point contribute to a data point
|
|
103
|
+
in the projection.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
data:
|
|
108
|
+
Data to filter.
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
:
|
|
113
|
+
Filtered data and scale factor.
|
|
114
|
+
"""
|
|
115
|
+
return apply_selection(data, selection=self._selection, norm=self._norm)
|
ess/reduce/nexus/__init__.py
CHANGED
|
@@ -13,23 +13,23 @@ The submodule :mod:`types` defines all domain types.
|
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
from . import types
|
|
16
|
-
from .workflow import GenericNeXusWorkflow
|
|
17
16
|
from ._nexus_loader import (
|
|
18
|
-
load_data,
|
|
19
|
-
group_event_data,
|
|
20
|
-
load_component,
|
|
21
|
-
load_all_components,
|
|
22
17
|
compute_component_position,
|
|
23
18
|
extract_signal_data_array,
|
|
19
|
+
group_event_data,
|
|
20
|
+
load_all_components,
|
|
21
|
+
load_component,
|
|
22
|
+
load_data,
|
|
24
23
|
)
|
|
24
|
+
from .workflow import GenericNeXusWorkflow
|
|
25
25
|
|
|
26
26
|
__all__ = [
|
|
27
|
-
'
|
|
27
|
+
'GenericNeXusWorkflow',
|
|
28
|
+
'compute_component_position',
|
|
29
|
+
'extract_signal_data_array',
|
|
28
30
|
'group_event_data',
|
|
29
31
|
'load_all_components',
|
|
30
|
-
'load_data',
|
|
31
32
|
'load_component',
|
|
32
|
-
'
|
|
33
|
-
'
|
|
34
|
-
'GenericNeXusWorkflow',
|
|
33
|
+
'load_data',
|
|
34
|
+
'types',
|
|
35
35
|
]
|
|
@@ -30,6 +30,14 @@ class NoNewDefinitionsType: ...
|
|
|
30
30
|
NoNewDefinitions = NoNewDefinitionsType()
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
class NoLockingIfNeededType:
|
|
34
|
+
def __repr__(self) -> str:
|
|
35
|
+
return "NoLockingIfNeeded"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
NoLockingIfNeeded = NoLockingIfNeededType()
|
|
39
|
+
|
|
40
|
+
|
|
33
41
|
def load_component(
|
|
34
42
|
location: NeXusLocationSpec,
|
|
35
43
|
*,
|
|
@@ -93,7 +101,7 @@ def _open_nexus_file(
|
|
|
93
101
|
file_path: FilePath | NeXusFile | NeXusGroup,
|
|
94
102
|
definitions: Mapping | None | NoNewDefinitionsType = NoNewDefinitions,
|
|
95
103
|
*,
|
|
96
|
-
locking: bool | None =
|
|
104
|
+
locking: bool | str | None | NoLockingIfNeededType = NoLockingIfNeeded,
|
|
97
105
|
) -> AbstractContextManager[snx.Group]:
|
|
98
106
|
if isinstance(file_path, getattr(NeXusGroup, '__supertype__', type(None))):
|
|
99
107
|
if (
|
|
@@ -106,32 +114,63 @@ def _open_nexus_file(
|
|
|
106
114
|
return nullcontext(file_path)
|
|
107
115
|
|
|
108
116
|
try:
|
|
109
|
-
return _open_nexus_file_from_path(
|
|
117
|
+
return _open_nexus_file_from_path(
|
|
118
|
+
file_path,
|
|
119
|
+
definitions,
|
|
120
|
+
locking=None if locking is NoLockingIfNeeded else locking,
|
|
121
|
+
)
|
|
110
122
|
except OSError as err:
|
|
111
|
-
if err
|
|
112
|
-
# Failed to open because the filesystem is read-only.
|
|
113
|
-
# (According to https://www.ioplex.com/%7Emiallen/errcmpp.html
|
|
114
|
-
# this error code is universal.)
|
|
115
|
-
#
|
|
116
|
-
# On ESS machines, this happens for network filesystems of data that was
|
|
117
|
-
# ingested into SciCat, including raw data.
|
|
118
|
-
# In this case, it is safe to open the file without locking because:
|
|
119
|
-
# - For raw files, they were written on a separate machine and are synced
|
|
120
|
-
# with the one running reduction software. So there cannot be concurrent
|
|
121
|
-
# write and read accesses to the same file on the same filesystem.
|
|
122
|
-
# The ground truth on the filesystem used by the file writer is protected
|
|
123
|
-
# and cannot be corrupted by our reader.
|
|
124
|
-
# - For processed data, the file was copied to the read-only filesystem.
|
|
125
|
-
# So the copy we are opening was not written by HDF5 directly and thus
|
|
126
|
-
# locking has no effect anyway.
|
|
127
|
-
#
|
|
128
|
-
# When running on user machines, disabling locking can potentially corrupt
|
|
129
|
-
# files. But the risk is minimal because very few users will have read-only
|
|
130
|
-
# filesystems and do concurrent reads and writes.
|
|
123
|
+
if _attempt_to_open_without_locking(err, locking):
|
|
131
124
|
return _open_nexus_file_from_path(file_path, definitions, locking=False)
|
|
132
125
|
raise
|
|
133
126
|
|
|
134
127
|
|
|
128
|
+
# On ESS machines, some network filesystems are read-only.
|
|
129
|
+
# E.g., data that was ingested into SciCat, including raw data.
|
|
130
|
+
# HDF5 fails to open such files because it cannot lock the files.
|
|
131
|
+
# In this case, it is safe(*) to open the file without locking because:
|
|
132
|
+
#
|
|
133
|
+
# - For raw files, they were written on a separate machine and are synced
|
|
134
|
+
# with the one running reduction software. So there cannot be concurrent
|
|
135
|
+
# write and read accesses to the same file on the same filesystem.
|
|
136
|
+
# The ground truth on the filesystem used by the file writer is protected
|
|
137
|
+
# and cannot be corrupted by our reader.
|
|
138
|
+
# - For processed data, the file was copied to the read-only filesystem.
|
|
139
|
+
# So the copy we are opening was not written by HDF5 directly and thus
|
|
140
|
+
# locking has no effect anyway.
|
|
141
|
+
#
|
|
142
|
+
# When running on user machines, disabling locking can potentially corrupt
|
|
143
|
+
# files. But the risk is minimal because very few users will have read-only
|
|
144
|
+
# filesystems and do concurrent reads and writes.
|
|
145
|
+
#
|
|
146
|
+
# (*) Files on the read-only filesystem may still change while a file is open for
|
|
147
|
+
# reading if they get updated from the original file. E.g., when reading a file that is
|
|
148
|
+
# currently being written to. This can crash the reader. But our code is anyway not set
|
|
149
|
+
# up to deal with changing files, so the added risk is not significant.
|
|
150
|
+
#
|
|
151
|
+
# See https://github.com/HDFGroup/hdf5/blob/e9ab45f0f4d7240937d5f88055f6c217da80f0d4/doxygen/dox/file-locking.dox
|
|
152
|
+
# about HDF5 file locking.
|
|
153
|
+
def _attempt_to_open_without_locking(
|
|
154
|
+
err: OSError, locking: bool | str | None | NoLockingIfNeededType
|
|
155
|
+
) -> bool:
|
|
156
|
+
if locking is not NoLockingIfNeeded:
|
|
157
|
+
return False # Respect user's choice.
|
|
158
|
+
if err.errno == errno.EROFS:
|
|
159
|
+
# Read-only filesystem.
|
|
160
|
+
# (According to https://www.ioplex.com/%7Emiallen/errcmpp.html
|
|
161
|
+
# this error code is universal.)
|
|
162
|
+
return True
|
|
163
|
+
|
|
164
|
+
# HDF5 tracks file locking flags internally within a single process.
|
|
165
|
+
# If the same file is opened multiple times, we can get a flag mismatch.
|
|
166
|
+
# We can try opening without locking, maybe this matches the original flags.
|
|
167
|
+
if "file locking flag values don't match" in err.args[0]:
|
|
168
|
+
return True
|
|
169
|
+
if "file locking 'ignore disabled locks' flag values don't match" in err.args[0]:
|
|
170
|
+
return True
|
|
171
|
+
return False
|
|
172
|
+
|
|
173
|
+
|
|
135
174
|
def _open_nexus_file_from_path(
|
|
136
175
|
file_path: FilePath,
|
|
137
176
|
definitions: Mapping | None | NoNewDefinitionsType,
|
ess/reduce/streaming.py
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
6
|
from collections.abc import Callable
|
|
7
|
+
from copy import deepcopy
|
|
7
8
|
from typing import Any, Generic, TypeVar
|
|
8
9
|
|
|
9
10
|
import networkx as nx
|
|
@@ -29,6 +30,8 @@ def maybe_hist(value: T) -> T:
|
|
|
29
30
|
:
|
|
30
31
|
Histogram.
|
|
31
32
|
"""
|
|
33
|
+
if not isinstance(value, sc.Variable | sc.DataArray):
|
|
34
|
+
return value
|
|
32
35
|
return value if value.bins is None else value.hist()
|
|
33
36
|
|
|
34
37
|
|
|
@@ -90,11 +93,11 @@ class EternalAccumulator(Accumulator[T]):
|
|
|
90
93
|
|
|
91
94
|
@property
|
|
92
95
|
def value(self) -> T:
|
|
93
|
-
return self._value
|
|
96
|
+
return deepcopy(self._value)
|
|
94
97
|
|
|
95
98
|
def _do_push(self, value: T) -> None:
|
|
96
99
|
if self._value is None:
|
|
97
|
-
self._value = value
|
|
100
|
+
self._value = deepcopy(value)
|
|
98
101
|
else:
|
|
99
102
|
self._value += value
|
|
100
103
|
|
|
@@ -146,6 +149,7 @@ class StreamProcessor:
|
|
|
146
149
|
target_keys: tuple[sciline.typing.Key, ...],
|
|
147
150
|
accumulators: dict[sciline.typing.Key, Accumulator, Callable[..., Accumulator]]
|
|
148
151
|
| tuple[sciline.typing.Key, ...],
|
|
152
|
+
allow_bypass: bool = False,
|
|
149
153
|
) -> None:
|
|
150
154
|
"""
|
|
151
155
|
Create a stream processor.
|
|
@@ -163,6 +167,12 @@ class StreamProcessor:
|
|
|
163
167
|
passed, :py:class:`EternalAccumulator` is used for all keys. Otherwise, a
|
|
164
168
|
dict mapping keys to accumulator instances can be passed. If a dict value is
|
|
165
169
|
a callable, base_workflow.bind_and_call(value) is used to make an instance.
|
|
170
|
+
allow_bypass:
|
|
171
|
+
If True, allow bypassing accumulators for keys that are not in the
|
|
172
|
+
accumulators dict. This is useful for dynamic keys that are not "terminated"
|
|
173
|
+
in any accumulator. USE WITH CARE! This will lead to incorrect results
|
|
174
|
+
unless the values for these keys are valid for all chunks comprised in the
|
|
175
|
+
final accumulators at the point where :py:meth:`finalize` is called.
|
|
166
176
|
"""
|
|
167
177
|
workflow = sciline.Pipeline()
|
|
168
178
|
for key in target_keys:
|
|
@@ -201,19 +211,59 @@ class StreamProcessor:
|
|
|
201
211
|
for key, value in self._accumulators.items()
|
|
202
212
|
}
|
|
203
213
|
self._target_keys = target_keys
|
|
214
|
+
self._allow_bypass = allow_bypass
|
|
204
215
|
|
|
205
216
|
def add_chunk(
|
|
206
217
|
self, chunks: dict[sciline.typing.Key, Any]
|
|
207
218
|
) -> dict[sciline.typing.Key, Any]:
|
|
219
|
+
"""
|
|
220
|
+
Legacy interface for accumulating values from chunks and finalizing the result.
|
|
221
|
+
|
|
222
|
+
It is recommended to use :py:meth:`accumulate` and :py:meth:`finalize` instead.
|
|
223
|
+
|
|
224
|
+
Parameters
|
|
225
|
+
----------
|
|
226
|
+
chunks:
|
|
227
|
+
Chunks to be processed.
|
|
228
|
+
|
|
229
|
+
Returns
|
|
230
|
+
-------
|
|
231
|
+
:
|
|
232
|
+
Finalized result.
|
|
233
|
+
"""
|
|
234
|
+
self.accumulate(chunks)
|
|
235
|
+
return self.finalize()
|
|
236
|
+
|
|
237
|
+
def accumulate(self, chunks: dict[sciline.typing.Key, Any]) -> None:
|
|
238
|
+
"""
|
|
239
|
+
Accumulate values from chunks without finalizing the result.
|
|
240
|
+
|
|
241
|
+
Parameters
|
|
242
|
+
----------
|
|
243
|
+
chunks:
|
|
244
|
+
Chunks to be processed.
|
|
245
|
+
"""
|
|
208
246
|
for key, value in chunks.items():
|
|
209
247
|
self._process_chunk_workflow[key] = value
|
|
210
248
|
# There can be dynamic keys that do not "terminate" in any accumulator. In
|
|
211
249
|
# that case, we need to make sure they can be and are used when computing
|
|
212
250
|
# the target keys.
|
|
213
|
-
self.
|
|
251
|
+
if self._allow_bypass:
|
|
252
|
+
self._finalize_workflow[key] = value
|
|
214
253
|
to_accumulate = self._process_chunk_workflow.compute(self._accumulators)
|
|
215
254
|
for key, processed in to_accumulate.items():
|
|
216
255
|
self._accumulators[key].push(processed)
|
|
256
|
+
|
|
257
|
+
def finalize(self) -> dict[sciline.typing.Key, Any]:
|
|
258
|
+
"""
|
|
259
|
+
Get the final result by computing the target keys based on accumulated values.
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
:
|
|
264
|
+
Finalized result.
|
|
265
|
+
"""
|
|
266
|
+
for key in self._accumulators:
|
|
217
267
|
self._finalize_workflow[key] = self._accumulators[key].value
|
|
218
268
|
return self._finalize_workflow.compute(self._target_keys)
|
|
219
269
|
|