essreduce 25.1.1__py3-none-any.whl → 25.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ess/reduce/live/raw.py +160 -13
- ess/reduce/live/roi.py +115 -0
- ess/reduce/nexus/_nexus_loader.py +61 -22
- ess/reduce/streaming.py +53 -3
- ess/reduce/time_of_flight/__init__.py +4 -17
- ess/reduce/time_of_flight/fakes.py +29 -47
- ess/reduce/time_of_flight/simulation.py +6 -3
- ess/reduce/time_of_flight/to_events.py +12 -7
- ess/reduce/time_of_flight/toa_to_tof.py +361 -353
- ess/reduce/time_of_flight/types.py +13 -53
- {essreduce-25.1.1.dist-info → essreduce-25.2.0.dist-info}/METADATA +1 -1
- {essreduce-25.1.1.dist-info → essreduce-25.2.0.dist-info}/RECORD +16 -15
- {essreduce-25.1.1.dist-info → essreduce-25.2.0.dist-info}/LICENSE +0 -0
- {essreduce-25.1.1.dist-info → essreduce-25.2.0.dist-info}/WHEEL +0 -0
- {essreduce-25.1.1.dist-info → essreduce-25.2.0.dist-info}/entry_points.txt +0 -0
- {essreduce-25.1.1.dist-info → essreduce-25.2.0.dist-info}/top_level.txt +0 -0
ess/reduce/live/raw.py
CHANGED
|
@@ -19,6 +19,8 @@ options:
|
|
|
19
19
|
flatten dimensions of the data.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
22
24
|
from collections.abc import Callable, Sequence
|
|
23
25
|
from dataclasses import dataclass, field
|
|
24
26
|
from math import ceil
|
|
@@ -38,6 +40,8 @@ from ess.reduce.nexus.types import (
|
|
|
38
40
|
)
|
|
39
41
|
from ess.reduce.nexus.workflow import GenericNeXusWorkflow
|
|
40
42
|
|
|
43
|
+
from . import roi
|
|
44
|
+
|
|
41
45
|
CalibratedPositionWithNoisyReplicas = NewType(
|
|
42
46
|
'CalibratedPositionWithNoisyReplicas', sc.Variable
|
|
43
47
|
)
|
|
@@ -73,10 +77,14 @@ class Histogrammer:
|
|
|
73
77
|
self._coords = coords
|
|
74
78
|
self._edges = edges
|
|
75
79
|
|
|
80
|
+
@property
|
|
81
|
+
def replicas(self) -> int:
|
|
82
|
+
return self._replicas
|
|
83
|
+
|
|
76
84
|
@staticmethod
|
|
77
85
|
def from_coords(
|
|
78
86
|
coords: ProjectedCoords, resolution: DetectorViewResolution
|
|
79
|
-
) ->
|
|
87
|
+
) -> Histogrammer:
|
|
80
88
|
"""
|
|
81
89
|
Create a histogrammer from coordinates and resolution.
|
|
82
90
|
|
|
@@ -102,7 +110,34 @@ class Histogrammer:
|
|
|
102
110
|
def __call__(self, da: sc.DataArray) -> sc.DataArray:
|
|
103
111
|
self._current += 1
|
|
104
112
|
coords = self._coords[self._replica_dim, self._current % self._replicas]
|
|
105
|
-
return
|
|
113
|
+
return self._hist(da.data, coords=coords)
|
|
114
|
+
|
|
115
|
+
def _hist(self, data: sc.Variable, *, coords: sc.DataGroup) -> sc.DataArray:
|
|
116
|
+
# If input is multi-dim we need to flatten since those dims cannot be preserved.
|
|
117
|
+
return sc.DataArray(data, coords=coords).flatten(to='_').hist(self._edges)
|
|
118
|
+
|
|
119
|
+
def input_indices(self) -> sc.DataArray:
|
|
120
|
+
"""Return an array with input indices corresponding to each histogram bin."""
|
|
121
|
+
dim = 'detector_number'
|
|
122
|
+
# For some projections one of the coords is a scalar, convert to flat table.
|
|
123
|
+
coords = self._coords.broadcast(sizes=self._coords.sizes).flatten(to=dim).copy()
|
|
124
|
+
ndet = sc.index(coords.sizes[dim] // self._replicas)
|
|
125
|
+
da = sc.DataArray(
|
|
126
|
+
sc.arange(dim, coords.sizes[dim], dtype='int64', unit=None) % ndet,
|
|
127
|
+
coords=coords,
|
|
128
|
+
)
|
|
129
|
+
return sc.DataArray(da.bin(self._edges).bins.data, coords=self._edges)
|
|
130
|
+
|
|
131
|
+
def apply_full(self, var: sc.Variable) -> sc.DataArray:
|
|
132
|
+
"""
|
|
133
|
+
Apply the histogrammer to a variable using all replicas.
|
|
134
|
+
|
|
135
|
+
This is used for one-off operations where the full data is needed, e.g., for
|
|
136
|
+
transforming pixel weights. Compare to :py:meth:`__call__`, which applies the
|
|
137
|
+
histogrammer to a single replica for efficiency.
|
|
138
|
+
"""
|
|
139
|
+
replicated = sc.concat([var] * self.replicas, dim=self._replica_dim)
|
|
140
|
+
return self._hist(replicated, coords=self._coords) / self.replicas
|
|
106
141
|
|
|
107
142
|
|
|
108
143
|
@dataclass
|
|
@@ -149,20 +184,27 @@ class Detector:
|
|
|
149
184
|
sc.zeros(sizes=detector_number.sizes, unit='counts', dtype='int32'),
|
|
150
185
|
coords={'detector_number': detector_number},
|
|
151
186
|
)
|
|
187
|
+
self._detector_number = detector_number
|
|
152
188
|
self._flat_detector_number = detector_number.flatten(to='event_id')
|
|
153
189
|
self._start = int(self._flat_detector_number[0].value)
|
|
154
190
|
self._stop = int(self._flat_detector_number[-1].value)
|
|
155
191
|
self._size = int(self._flat_detector_number.size)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
192
|
+
self._sorted = sc.issorted(self._flat_detector_number, dim='event_id')
|
|
193
|
+
self._consecutive = self._stop - self._start + 1 == self._size
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def detector_number(self) -> sc.Variable:
|
|
197
|
+
return self._detector_number
|
|
160
198
|
|
|
161
199
|
@property
|
|
162
200
|
def data(self) -> sc.DataArray:
|
|
163
201
|
return self._data
|
|
164
202
|
|
|
165
203
|
def bincount(self, data: Sequence[int]) -> sc.DataArray:
|
|
204
|
+
if not self._sorted:
|
|
205
|
+
raise ValueError("Detector numbers must be sorted to use `bincount`.")
|
|
206
|
+
if not self._consecutive:
|
|
207
|
+
raise ValueError("Detector numbers must be consecutive to use `bincount`.")
|
|
166
208
|
offset = np.asarray(data, dtype=np.int32) - self._start
|
|
167
209
|
# Ignore events with detector numbers outside the range of the detector. This
|
|
168
210
|
# should not happen in valid files but for now it is useful until we are sure
|
|
@@ -215,8 +257,16 @@ class RollingDetectorView(Detector):
|
|
|
215
257
|
self._current = 0
|
|
216
258
|
self._history: sc.DataArray | None = None
|
|
217
259
|
self._cache: sc.DataArray | None = None
|
|
260
|
+
self.clear_counts()
|
|
261
|
+
|
|
262
|
+
def clear_counts(self) -> None:
|
|
263
|
+
"""
|
|
264
|
+
Clear counts.
|
|
218
265
|
|
|
219
|
-
|
|
266
|
+
Overrides Detector.clear_counts, to properly clear sliding window history and
|
|
267
|
+
cache.
|
|
268
|
+
"""
|
|
269
|
+
counts = sc.zeros_like(self.data)
|
|
220
270
|
if self._projection is not None:
|
|
221
271
|
counts = self._projection(counts)
|
|
222
272
|
self._history = (
|
|
@@ -226,12 +276,69 @@ class RollingDetectorView(Detector):
|
|
|
226
276
|
)
|
|
227
277
|
self._cache = self._history.sum('window')
|
|
228
278
|
|
|
279
|
+
def make_roi_filter(self) -> roi.ROIFilter:
|
|
280
|
+
"""Return a ROI filter operating via the projection plane of the view."""
|
|
281
|
+
norm = 1.0
|
|
282
|
+
if isinstance(self._projection, Histogrammer):
|
|
283
|
+
indices = self._projection.input_indices()
|
|
284
|
+
norm = self._projection.replicas
|
|
285
|
+
else:
|
|
286
|
+
indices = sc.ones(sizes=self.data.sizes, dtype='int32', unit=None)
|
|
287
|
+
indices = sc.cumsum(indices, mode='exclusive')
|
|
288
|
+
if isinstance(self._projection, LogicalView):
|
|
289
|
+
indices = self._projection(indices)
|
|
290
|
+
return roi.ROIFilter(indices=indices, norm=norm)
|
|
291
|
+
|
|
292
|
+
def transform_weights(
|
|
293
|
+
self,
|
|
294
|
+
weights: sc.Variable | sc.DataArray | None = None,
|
|
295
|
+
*,
|
|
296
|
+
threshold: float = 0.1,
|
|
297
|
+
) -> sc.DataArray:
|
|
298
|
+
"""
|
|
299
|
+
Transform raw pixel weights to the projection plane.
|
|
300
|
+
|
|
301
|
+
Parameters
|
|
302
|
+
----------
|
|
303
|
+
weights:
|
|
304
|
+
Raw pixel weights to transform. If None, default weights of 1 are used.
|
|
305
|
+
threshold:
|
|
306
|
+
Threshold for identifying bins with a low weight. If the weight is below the
|
|
307
|
+
threshold times the median weight, the bin is marked as invalid. This is
|
|
308
|
+
relevant to avoid issues with color scales in plots, where noise in bins
|
|
309
|
+
with low weight may dominate the color scale if auto-scaling is used.
|
|
310
|
+
"""
|
|
311
|
+
if weights is None:
|
|
312
|
+
weights = sc.ones(
|
|
313
|
+
sizes=self.detector_number.sizes, dtype='float32', unit=''
|
|
314
|
+
)
|
|
315
|
+
else:
|
|
316
|
+
if weights.sizes != self.detector_number.sizes:
|
|
317
|
+
raise sc.DimensionError(
|
|
318
|
+
f'Invalid {weights.sizes=} for {self.detector_number.sizes=}.'
|
|
319
|
+
)
|
|
320
|
+
if isinstance(weights, sc.DataArray):
|
|
321
|
+
if (det_num := weights.coords.get('detector_number')) is not None:
|
|
322
|
+
if not sc.identical(det_num, self.detector_number):
|
|
323
|
+
raise sc.CoordError("Mismatching detector numbers in weights.")
|
|
324
|
+
weights = weights.data
|
|
325
|
+
if isinstance(self._projection, Histogrammer):
|
|
326
|
+
xs = self._projection.apply_full(weights) # Use all replicas
|
|
327
|
+
elif self._projection is not None:
|
|
328
|
+
xs = self._projection(weights)
|
|
329
|
+
else:
|
|
330
|
+
xs = weights.copy()
|
|
331
|
+
nonempty = xs.values[xs.values > 0]
|
|
332
|
+
mask = xs.values < threshold * np.median(nonempty)
|
|
333
|
+
xs.values[mask] = np.nan
|
|
334
|
+
return xs if isinstance(xs, sc.DataArray) else sc.DataArray(xs)
|
|
335
|
+
|
|
229
336
|
@staticmethod
|
|
230
337
|
def from_detector_and_histogrammer(
|
|
231
338
|
detector: CalibratedDetector[SampleRun],
|
|
232
339
|
window: RollingDetectorViewWindow,
|
|
233
340
|
projection: Histogrammer,
|
|
234
|
-
) ->
|
|
341
|
+
) -> RollingDetectorView:
|
|
235
342
|
"""Helper for constructing via a Sciline workflow."""
|
|
236
343
|
return RollingDetectorView(
|
|
237
344
|
detector_number=detector.coords['detector_number'],
|
|
@@ -244,7 +351,7 @@ class RollingDetectorView(Detector):
|
|
|
244
351
|
detector: CalibratedDetector[SampleRun],
|
|
245
352
|
window: RollingDetectorViewWindow,
|
|
246
353
|
projection: LogicalView,
|
|
247
|
-
) ->
|
|
354
|
+
) -> RollingDetectorView:
|
|
248
355
|
"""Helper for constructing via a Sciline workflow."""
|
|
249
356
|
return RollingDetectorView(
|
|
250
357
|
detector_number=detector.coords['detector_number'],
|
|
@@ -261,7 +368,7 @@ class RollingDetectorView(Detector):
|
|
|
261
368
|
projection: Literal['xy_plane', 'cylinder_mantle_z'] | LogicalView,
|
|
262
369
|
resolution: dict[str, int] | None = None,
|
|
263
370
|
pixel_noise: Literal['cylindrical'] | sc.Variable | None = None,
|
|
264
|
-
) ->
|
|
371
|
+
) -> RollingDetectorView:
|
|
265
372
|
"""
|
|
266
373
|
Create a rolling detector view from a NeXus file using GenericNeXusWorkflow.
|
|
267
374
|
|
|
@@ -293,7 +400,7 @@ class RollingDetectorView(Detector):
|
|
|
293
400
|
pixel_noise = sc.scalar(0.0, unit='m')
|
|
294
401
|
noise_replica_count = 0
|
|
295
402
|
else:
|
|
296
|
-
noise_replica_count =
|
|
403
|
+
noise_replica_count = 16
|
|
297
404
|
wf = GenericNeXusWorkflow(run_types=[SampleRun], monitor_types=[])
|
|
298
405
|
wf[RollingDetectorViewWindow] = window
|
|
299
406
|
if isinstance(projection, LogicalView):
|
|
@@ -329,7 +436,20 @@ class RollingDetectorView(Detector):
|
|
|
329
436
|
wf[NeXusDetectorName] = detector_name
|
|
330
437
|
return wf.compute(RollingDetectorView)
|
|
331
438
|
|
|
332
|
-
def get(self, window: int | None = None) -> sc.DataArray:
|
|
439
|
+
def get(self, *, window: int | None = None) -> sc.DataArray:
|
|
440
|
+
"""
|
|
441
|
+
Get the sum of counts over a window of the most recent counts.
|
|
442
|
+
|
|
443
|
+
Parameters
|
|
444
|
+
----------
|
|
445
|
+
window:
|
|
446
|
+
Size of the window to use. If None, the full history is used.
|
|
447
|
+
|
|
448
|
+
Returns
|
|
449
|
+
-------
|
|
450
|
+
:
|
|
451
|
+
Sum of counts over the window.
|
|
452
|
+
"""
|
|
333
453
|
if window is not None and not 0 <= window <= self._window:
|
|
334
454
|
raise ValueError("Window size must be less than the history size.")
|
|
335
455
|
if window is None or window == self._window:
|
|
@@ -343,8 +463,32 @@ class RollingDetectorView(Detector):
|
|
|
343
463
|
data += self._history['window', 0 : self._current].sum('window')
|
|
344
464
|
return data
|
|
345
465
|
|
|
466
|
+
def add_events(self, data: sc.DataArray) -> None:
|
|
467
|
+
"""
|
|
468
|
+
Add counts in the form of events grouped by pixel ID.
|
|
469
|
+
|
|
470
|
+
Parameters
|
|
471
|
+
----------
|
|
472
|
+
data:
|
|
473
|
+
Events grouped by pixel ID, given by binned data.
|
|
474
|
+
"""
|
|
475
|
+
counts = data.bins.size().to(dtype='int32', copy=False)
|
|
476
|
+
counts.unit = 'counts'
|
|
477
|
+
self._add_counts(counts)
|
|
478
|
+
|
|
346
479
|
def add_counts(self, data: Sequence[int]) -> None:
|
|
480
|
+
"""
|
|
481
|
+
Add counts in the form of a sequence of pixel IDs.
|
|
482
|
+
|
|
483
|
+
Parameters
|
|
484
|
+
----------
|
|
485
|
+
data:
|
|
486
|
+
List of pixel IDs.
|
|
487
|
+
"""
|
|
347
488
|
counts = self.bincount(data)
|
|
489
|
+
self._add_counts(counts)
|
|
490
|
+
|
|
491
|
+
def _add_counts(self, counts: sc.Variable) -> None:
|
|
348
492
|
if self._projection is not None:
|
|
349
493
|
counts = self._projection(counts)
|
|
350
494
|
self._cache -= self._history['window', self._current]
|
|
@@ -455,9 +599,12 @@ def position_noise_for_cylindrical_pixel(
|
|
|
455
599
|
|
|
456
600
|
|
|
457
601
|
def gaussian_position_noise(sigma: PositionNoiseSigma) -> PositionNoise:
|
|
602
|
+
sigma = sigma.to(unit='m', copy=False)
|
|
458
603
|
size = _noise_size
|
|
459
604
|
position = sc.empty(sizes={'position': size}, unit='m', dtype=sc.DType.vector3)
|
|
460
|
-
position.values = np.random.default_rng().normal(
|
|
605
|
+
position.values = np.random.default_rng(seed=1234).normal(
|
|
606
|
+
0, sigma.value, size=(size, 3)
|
|
607
|
+
)
|
|
461
608
|
return PositionNoise(position)
|
|
462
609
|
|
|
463
610
|
|
ess/reduce/live/roi.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
|
2
|
+
# Copyright (c) 2025 Scipp contributors (https://github.com/scipp)
|
|
3
|
+
"""Utilities for region of interest (ROI) selection."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import TypeVar
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import scipp as sc
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def select_indices_in_intervals(
|
|
14
|
+
intervals: sc.DataGroup[tuple[int, int] | tuple[sc.Variable, sc.Variable]],
|
|
15
|
+
indices: sc.Variable | sc.DataArray,
|
|
16
|
+
) -> sc.Variable:
|
|
17
|
+
"""
|
|
18
|
+
Return subset of indices that fall within the intervals.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
intervals:
|
|
23
|
+
DataGroup with dimension names as keys and tuples of low and high values. This
|
|
24
|
+
can be used to define a band or a rectangle to selected. When low and high are
|
|
25
|
+
scipp.Variable, the selection is done using label-based indexing. In this case
|
|
26
|
+
`indices` must be a DataArray with corresponding coordinates.
|
|
27
|
+
indices:
|
|
28
|
+
Variable or DataArray with indices to select from. If binned data the selected
|
|
29
|
+
indices will be returned concatenated into a dense array.
|
|
30
|
+
"""
|
|
31
|
+
out_dim = 'index'
|
|
32
|
+
for dim, bounds in intervals.items():
|
|
33
|
+
low, high = sorted(bounds)
|
|
34
|
+
indices = indices[dim, low:high]
|
|
35
|
+
indices = indices.flatten(to=out_dim)
|
|
36
|
+
if indices.bins is None:
|
|
37
|
+
return indices
|
|
38
|
+
indices = indices.bins.concat().value
|
|
39
|
+
return indices.rename_dims({indices.dim: out_dim})
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
T = TypeVar('T', sc.DataArray, sc.Variable)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def apply_selection(
|
|
46
|
+
data: T, *, selection: sc.Variable, norm: float = 1.0
|
|
47
|
+
) -> tuple[T, sc.Variable]:
|
|
48
|
+
"""
|
|
49
|
+
Apply selection to data.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
data:
|
|
54
|
+
Data to filter.
|
|
55
|
+
selection:
|
|
56
|
+
Variable with indices to select.
|
|
57
|
+
norm:
|
|
58
|
+
Normalization factor to apply to the selected data. This is used for cases where
|
|
59
|
+
indices may be selected multiple times.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
:
|
|
64
|
+
Filtered data and scale factor.
|
|
65
|
+
"""
|
|
66
|
+
indices, counts = np.unique(selection.values, return_counts=True)
|
|
67
|
+
if data.ndim != 1:
|
|
68
|
+
data = data.flatten(to='detector_number')
|
|
69
|
+
scale = sc.array(dims=[data.dim], values=counts) / norm
|
|
70
|
+
return data[indices], scale
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class ROIFilter:
|
|
74
|
+
"""Filter for selecting a region of interest (ROI)."""
|
|
75
|
+
|
|
76
|
+
def __init__(self, indices: sc.Variable | sc.DataArray, norm: float = 1.0) -> None:
|
|
77
|
+
"""
|
|
78
|
+
Create a new ROI filter.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
indices:
|
|
83
|
+
Variable with indices to filter. The indices facilitate selecting a 2-D
|
|
84
|
+
ROI in a projection of a 3-D dataset. Typically the indices are given by a
|
|
85
|
+
2-D array. Each element in the array may correspond to a single index (when
|
|
86
|
+
there is no projection) or a list of indices that were projected into an
|
|
87
|
+
output pixel.
|
|
88
|
+
"""
|
|
89
|
+
self._indices = indices
|
|
90
|
+
self._selection = sc.array(dims=['index'], values=[])
|
|
91
|
+
self._norm = norm
|
|
92
|
+
|
|
93
|
+
def set_roi_from_intervals(self, intervals: sc.DataGroup) -> None:
|
|
94
|
+
"""Set the ROI from (typically 1 or 2) intervals."""
|
|
95
|
+
self._selection = select_indices_in_intervals(intervals, self._indices)
|
|
96
|
+
|
|
97
|
+
def apply(self, data: T) -> tuple[T, sc.Variable]:
|
|
98
|
+
"""
|
|
99
|
+
Apply the ROI filter to data.
|
|
100
|
+
|
|
101
|
+
The returned scale factor can be used to handle filtering via a projection, to
|
|
102
|
+
take into account that fractions of source data point contribute to a data point
|
|
103
|
+
in the projection.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
data:
|
|
108
|
+
Data to filter.
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
:
|
|
113
|
+
Filtered data and scale factor.
|
|
114
|
+
"""
|
|
115
|
+
return apply_selection(data, selection=self._selection, norm=self._norm)
|
|
@@ -30,6 +30,14 @@ class NoNewDefinitionsType: ...
|
|
|
30
30
|
NoNewDefinitions = NoNewDefinitionsType()
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
class NoLockingIfNeededType:
|
|
34
|
+
def __repr__(self) -> str:
|
|
35
|
+
return "NoLockingIfNeeded"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
NoLockingIfNeeded = NoLockingIfNeededType()
|
|
39
|
+
|
|
40
|
+
|
|
33
41
|
def load_component(
|
|
34
42
|
location: NeXusLocationSpec,
|
|
35
43
|
*,
|
|
@@ -93,7 +101,7 @@ def _open_nexus_file(
|
|
|
93
101
|
file_path: FilePath | NeXusFile | NeXusGroup,
|
|
94
102
|
definitions: Mapping | None | NoNewDefinitionsType = NoNewDefinitions,
|
|
95
103
|
*,
|
|
96
|
-
locking: bool | None =
|
|
104
|
+
locking: bool | str | None | NoLockingIfNeededType = NoLockingIfNeeded,
|
|
97
105
|
) -> AbstractContextManager[snx.Group]:
|
|
98
106
|
if isinstance(file_path, getattr(NeXusGroup, '__supertype__', type(None))):
|
|
99
107
|
if (
|
|
@@ -106,32 +114,63 @@ def _open_nexus_file(
|
|
|
106
114
|
return nullcontext(file_path)
|
|
107
115
|
|
|
108
116
|
try:
|
|
109
|
-
return _open_nexus_file_from_path(
|
|
117
|
+
return _open_nexus_file_from_path(
|
|
118
|
+
file_path,
|
|
119
|
+
definitions,
|
|
120
|
+
locking=None if locking is NoLockingIfNeeded else locking,
|
|
121
|
+
)
|
|
110
122
|
except OSError as err:
|
|
111
|
-
if err
|
|
112
|
-
# Failed to open because the filesystem is read-only.
|
|
113
|
-
# (According to https://www.ioplex.com/%7Emiallen/errcmpp.html
|
|
114
|
-
# this error code is universal.)
|
|
115
|
-
#
|
|
116
|
-
# On ESS machines, this happens for network filesystems of data that was
|
|
117
|
-
# ingested into SciCat, including raw data.
|
|
118
|
-
# In this case, it is safe to open the file without locking because:
|
|
119
|
-
# - For raw files, they were written on a separate machine and are synced
|
|
120
|
-
# with the one running reduction software. So there cannot be concurrent
|
|
121
|
-
# write and read accesses to the same file on the same filesystem.
|
|
122
|
-
# The ground truth on the filesystem used by the file writer is protected
|
|
123
|
-
# and cannot be corrupted by our reader.
|
|
124
|
-
# - For processed data, the file was copied to the read-only filesystem.
|
|
125
|
-
# So the copy we are opening was not written by HDF5 directly and thus
|
|
126
|
-
# locking has no effect anyway.
|
|
127
|
-
#
|
|
128
|
-
# When running on user machines, disabling locking can potentially corrupt
|
|
129
|
-
# files. But the risk is minimal because very few users will have read-only
|
|
130
|
-
# filesystems and do concurrent reads and writes.
|
|
123
|
+
if _attempt_to_open_without_locking(err, locking):
|
|
131
124
|
return _open_nexus_file_from_path(file_path, definitions, locking=False)
|
|
132
125
|
raise
|
|
133
126
|
|
|
134
127
|
|
|
128
|
+
# On ESS machines, some network filesystems are read-only.
|
|
129
|
+
# E.g., data that was ingested into SciCat, including raw data.
|
|
130
|
+
# HDF5 fails to open such files because it cannot lock the files.
|
|
131
|
+
# In this case, it is safe(*) to open the file without locking because:
|
|
132
|
+
#
|
|
133
|
+
# - For raw files, they were written on a separate machine and are synced
|
|
134
|
+
# with the one running reduction software. So there cannot be concurrent
|
|
135
|
+
# write and read accesses to the same file on the same filesystem.
|
|
136
|
+
# The ground truth on the filesystem used by the file writer is protected
|
|
137
|
+
# and cannot be corrupted by our reader.
|
|
138
|
+
# - For processed data, the file was copied to the read-only filesystem.
|
|
139
|
+
# So the copy we are opening was not written by HDF5 directly and thus
|
|
140
|
+
# locking has no effect anyway.
|
|
141
|
+
#
|
|
142
|
+
# When running on user machines, disabling locking can potentially corrupt
|
|
143
|
+
# files. But the risk is minimal because very few users will have read-only
|
|
144
|
+
# filesystems and do concurrent reads and writes.
|
|
145
|
+
#
|
|
146
|
+
# (*) Files on the read-only filesystem may still change while a file is open for
|
|
147
|
+
# reading if they get updated from the original file. E.g., when reading a file that is
|
|
148
|
+
# currently being written to. This can crash the reader. But our code is anyway not set
|
|
149
|
+
# up to deal with changing files, so the added risk is not significant.
|
|
150
|
+
#
|
|
151
|
+
# See https://github.com/HDFGroup/hdf5/blob/e9ab45f0f4d7240937d5f88055f6c217da80f0d4/doxygen/dox/file-locking.dox
|
|
152
|
+
# about HDF5 file locking.
|
|
153
|
+
def _attempt_to_open_without_locking(
|
|
154
|
+
err: OSError, locking: bool | str | None | NoLockingIfNeededType
|
|
155
|
+
) -> bool:
|
|
156
|
+
if locking is not NoLockingIfNeeded:
|
|
157
|
+
return False # Respect user's choice.
|
|
158
|
+
if err.errno == errno.EROFS:
|
|
159
|
+
# Read-only filesystem.
|
|
160
|
+
# (According to https://www.ioplex.com/%7Emiallen/errcmpp.html
|
|
161
|
+
# this error code is universal.)
|
|
162
|
+
return True
|
|
163
|
+
|
|
164
|
+
# HDF5 tracks file locking flags internally within a single process.
|
|
165
|
+
# If the same file is opened multiple times, we can get a flag mismatch.
|
|
166
|
+
# We can try opening without locking, maybe this matches the original flags.
|
|
167
|
+
if "file locking flag values don't match" in err.args[0]:
|
|
168
|
+
return True
|
|
169
|
+
if "file locking 'ignore disabled locks' flag values don't match" in err.args[0]:
|
|
170
|
+
return True
|
|
171
|
+
return False
|
|
172
|
+
|
|
173
|
+
|
|
135
174
|
def _open_nexus_file_from_path(
|
|
136
175
|
file_path: FilePath,
|
|
137
176
|
definitions: Mapping | None | NoNewDefinitionsType,
|
ess/reduce/streaming.py
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
6
|
from collections.abc import Callable
|
|
7
|
+
from copy import deepcopy
|
|
7
8
|
from typing import Any, Generic, TypeVar
|
|
8
9
|
|
|
9
10
|
import networkx as nx
|
|
@@ -29,6 +30,8 @@ def maybe_hist(value: T) -> T:
|
|
|
29
30
|
:
|
|
30
31
|
Histogram.
|
|
31
32
|
"""
|
|
33
|
+
if not isinstance(value, sc.Variable | sc.DataArray):
|
|
34
|
+
return value
|
|
32
35
|
return value if value.bins is None else value.hist()
|
|
33
36
|
|
|
34
37
|
|
|
@@ -90,11 +93,11 @@ class EternalAccumulator(Accumulator[T]):
|
|
|
90
93
|
|
|
91
94
|
@property
|
|
92
95
|
def value(self) -> T:
|
|
93
|
-
return self._value
|
|
96
|
+
return deepcopy(self._value)
|
|
94
97
|
|
|
95
98
|
def _do_push(self, value: T) -> None:
|
|
96
99
|
if self._value is None:
|
|
97
|
-
self._value = value
|
|
100
|
+
self._value = deepcopy(value)
|
|
98
101
|
else:
|
|
99
102
|
self._value += value
|
|
100
103
|
|
|
@@ -146,6 +149,7 @@ class StreamProcessor:
|
|
|
146
149
|
target_keys: tuple[sciline.typing.Key, ...],
|
|
147
150
|
accumulators: dict[sciline.typing.Key, Accumulator, Callable[..., Accumulator]]
|
|
148
151
|
| tuple[sciline.typing.Key, ...],
|
|
152
|
+
allow_bypass: bool = False,
|
|
149
153
|
) -> None:
|
|
150
154
|
"""
|
|
151
155
|
Create a stream processor.
|
|
@@ -163,6 +167,12 @@ class StreamProcessor:
|
|
|
163
167
|
passed, :py:class:`EternalAccumulator` is used for all keys. Otherwise, a
|
|
164
168
|
dict mapping keys to accumulator instances can be passed. If a dict value is
|
|
165
169
|
a callable, base_workflow.bind_and_call(value) is used to make an instance.
|
|
170
|
+
allow_bypass:
|
|
171
|
+
If True, allow bypassing accumulators for keys that are not in the
|
|
172
|
+
accumulators dict. This is useful for dynamic keys that are not "terminated"
|
|
173
|
+
in any accumulator. USE WITH CARE! This will lead to incorrect results
|
|
174
|
+
unless the values for these keys are valid for all chunks comprised in the
|
|
175
|
+
final accumulators at the point where :py:meth:`finalize` is called.
|
|
166
176
|
"""
|
|
167
177
|
workflow = sciline.Pipeline()
|
|
168
178
|
for key in target_keys:
|
|
@@ -201,19 +211,59 @@ class StreamProcessor:
|
|
|
201
211
|
for key, value in self._accumulators.items()
|
|
202
212
|
}
|
|
203
213
|
self._target_keys = target_keys
|
|
214
|
+
self._allow_bypass = allow_bypass
|
|
204
215
|
|
|
205
216
|
def add_chunk(
|
|
206
217
|
self, chunks: dict[sciline.typing.Key, Any]
|
|
207
218
|
) -> dict[sciline.typing.Key, Any]:
|
|
219
|
+
"""
|
|
220
|
+
Legacy interface for accumulating values from chunks and finalizing the result.
|
|
221
|
+
|
|
222
|
+
It is recommended to use :py:meth:`accumulate` and :py:meth:`finalize` instead.
|
|
223
|
+
|
|
224
|
+
Parameters
|
|
225
|
+
----------
|
|
226
|
+
chunks:
|
|
227
|
+
Chunks to be processed.
|
|
228
|
+
|
|
229
|
+
Returns
|
|
230
|
+
-------
|
|
231
|
+
:
|
|
232
|
+
Finalized result.
|
|
233
|
+
"""
|
|
234
|
+
self.accumulate(chunks)
|
|
235
|
+
return self.finalize()
|
|
236
|
+
|
|
237
|
+
def accumulate(self, chunks: dict[sciline.typing.Key, Any]) -> None:
|
|
238
|
+
"""
|
|
239
|
+
Accumulate values from chunks without finalizing the result.
|
|
240
|
+
|
|
241
|
+
Parameters
|
|
242
|
+
----------
|
|
243
|
+
chunks:
|
|
244
|
+
Chunks to be processed.
|
|
245
|
+
"""
|
|
208
246
|
for key, value in chunks.items():
|
|
209
247
|
self._process_chunk_workflow[key] = value
|
|
210
248
|
# There can be dynamic keys that do not "terminate" in any accumulator. In
|
|
211
249
|
# that case, we need to make sure they can be and are used when computing
|
|
212
250
|
# the target keys.
|
|
213
|
-
self.
|
|
251
|
+
if self._allow_bypass:
|
|
252
|
+
self._finalize_workflow[key] = value
|
|
214
253
|
to_accumulate = self._process_chunk_workflow.compute(self._accumulators)
|
|
215
254
|
for key, processed in to_accumulate.items():
|
|
216
255
|
self._accumulators[key].push(processed)
|
|
256
|
+
|
|
257
|
+
def finalize(self) -> dict[sciline.typing.Key, Any]:
|
|
258
|
+
"""
|
|
259
|
+
Get the final result by computing the target keys based on accumulated values.
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
:
|
|
264
|
+
Finalized result.
|
|
265
|
+
"""
|
|
266
|
+
for key in self._accumulators:
|
|
217
267
|
self._finalize_workflow[key] = self._accumulators[key].value
|
|
218
268
|
return self._finalize_workflow.compute(self._target_keys)
|
|
219
269
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# SPDX-License-Identifier: BSD-3-Clause
|
|
2
|
-
# Copyright (c)
|
|
2
|
+
# Copyright (c) 2025 Scipp contributors (https://github.com/scipp)
|
|
3
3
|
|
|
4
4
|
"""
|
|
5
5
|
Utilities for computing real neutron time-of-flight from chopper settings and
|
|
@@ -7,52 +7,39 @@ neutron time-of-arrival at the detectors.
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from .simulation import simulate_beamline
|
|
10
|
-
from .toa_to_tof import default_parameters, resample_tof_data, providers
|
|
10
|
+
from .toa_to_tof import default_parameters, resample_tof_data, providers
|
|
11
11
|
from .to_events import to_events
|
|
12
12
|
from .types import (
|
|
13
13
|
DistanceResolution,
|
|
14
|
-
FrameFoldedTimeOfArrival,
|
|
15
|
-
FramePeriod,
|
|
16
14
|
LookupTableRelativeErrorThreshold,
|
|
17
15
|
Ltotal,
|
|
18
16
|
LtotalRange,
|
|
19
|
-
MaskedTimeOfFlightLookupTable,
|
|
20
|
-
PivotTimeAtDetector,
|
|
21
17
|
PulsePeriod,
|
|
22
18
|
PulseStride,
|
|
23
19
|
PulseStrideOffset,
|
|
24
20
|
RawData,
|
|
25
21
|
ResampledTofData,
|
|
26
22
|
SimulationResults,
|
|
27
|
-
TimeOfArrivalMinusPivotTimeModuloPeriod,
|
|
28
23
|
TimeOfFlightLookupTable,
|
|
24
|
+
TimeResolution,
|
|
29
25
|
TofData,
|
|
30
|
-
UnwrappedTimeOfArrival,
|
|
31
|
-
UnwrappedTimeOfArrivalMinusPivotTime,
|
|
32
26
|
)
|
|
33
27
|
|
|
34
28
|
|
|
35
29
|
__all__ = [
|
|
36
30
|
"DistanceResolution",
|
|
37
|
-
"FrameFoldedTimeOfArrival",
|
|
38
|
-
"FramePeriod",
|
|
39
31
|
"LookupTableRelativeErrorThreshold",
|
|
40
32
|
"Ltotal",
|
|
41
33
|
"LtotalRange",
|
|
42
|
-
"MaskedTimeOfFlightLookupTable",
|
|
43
|
-
"PivotTimeAtDetector",
|
|
44
34
|
"PulsePeriod",
|
|
45
35
|
"PulseStride",
|
|
46
36
|
"PulseStrideOffset",
|
|
47
37
|
"RawData",
|
|
48
38
|
"ResampledTofData",
|
|
49
39
|
"SimulationResults",
|
|
50
|
-
"TimeOfArrivalMinusPivotTimeModuloPeriod",
|
|
51
40
|
"TimeOfFlightLookupTable",
|
|
41
|
+
"TimeResolution",
|
|
52
42
|
"TofData",
|
|
53
|
-
"TofWorkflow",
|
|
54
|
-
"UnwrappedTimeOfArrival",
|
|
55
|
-
"UnwrappedTimeOfArrivalMinusPivotTime",
|
|
56
43
|
"default_parameters",
|
|
57
44
|
"providers",
|
|
58
45
|
"resample_tof_data",
|