ewoksid02 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. ewoksid02/__init__.py +0 -0
  2. ewoksid02/ocl/__init__.py +0 -0
  3. ewoksid02/resources/__init__.py +8 -0
  4. ewoksid02/resources/saxs_loop.json +96 -0
  5. ewoksid02/resources/template_saxs.yaml +37 -0
  6. ewoksid02/scripts/__init__.py +0 -0
  7. ewoksid02/scripts/__main__.py +70 -0
  8. ewoksid02/scripts/parsers.py +224 -0
  9. ewoksid02/scripts/saxs/__init__.py +0 -0
  10. ewoksid02/scripts/saxs/main.py +255 -0
  11. ewoksid02/scripts/saxs/slurm_python_post_script.py +3 -0
  12. ewoksid02/scripts/saxs/slurm_python_pre_script.py +5 -0
  13. ewoksid02/scripts/utils.py +21 -0
  14. ewoksid02/scripts/xpcs/__init__.py +0 -0
  15. ewoksid02/scripts/xpcs/__main__.py +3 -0
  16. ewoksid02/tasks/__init__.py +7 -0
  17. ewoksid02/tasks/averagetask.py +179 -0
  18. ewoksid02/tasks/azimuthaltask.py +272 -0
  19. ewoksid02/tasks/cavingtask.py +170 -0
  20. ewoksid02/tasks/dahuprocessingtask.py +71 -0
  21. ewoksid02/tasks/end.py +35 -0
  22. ewoksid02/tasks/id02processingtask.py +2582 -0
  23. ewoksid02/tasks/looptask.py +672 -0
  24. ewoksid02/tasks/metadatatask.py +879 -0
  25. ewoksid02/tasks/normalizationtask.py +204 -0
  26. ewoksid02/tasks/scalerstask.py +46 -0
  27. ewoksid02/tasks/secondaryscatteringtask.py +159 -0
  28. ewoksid02/tasks/sumtask.py +45 -0
  29. ewoksid02/tests/__init__.py +3 -0
  30. ewoksid02/tests/conftest.py +639 -0
  31. ewoksid02/tests/debug.py +64 -0
  32. ewoksid02/tests/test_2scat_node.py +119 -0
  33. ewoksid02/tests/test_ave_node.py +106 -0
  34. ewoksid02/tests/test_azim_node.py +89 -0
  35. ewoksid02/tests/test_cave_node.py +118 -0
  36. ewoksid02/tests/test_norm_node.py +190 -0
  37. ewoksid02/tests/test_saxs.py +69 -0
  38. ewoksid02/tests/test_sumtask.py +10 -0
  39. ewoksid02/tests/utils.py +514 -0
  40. ewoksid02/utils/__init__.py +22 -0
  41. ewoksid02/utils/average.py +158 -0
  42. ewoksid02/utils/blissdata.py +1157 -0
  43. ewoksid02/utils/caving.py +851 -0
  44. ewoksid02/utils/cupyutils.py +42 -0
  45. ewoksid02/utils/io.py +722 -0
  46. ewoksid02/utils/normalization.py +804 -0
  47. ewoksid02/utils/pyfai.py +424 -0
  48. ewoksid02/utils/secondaryscattering.py +597 -0
  49. ewoksid02-0.1.0.dist-info/METADATA +76 -0
  50. ewoksid02-0.1.0.dist-info/RECORD +54 -0
  51. ewoksid02-0.1.0.dist-info/WHEEL +5 -0
  52. ewoksid02-0.1.0.dist-info/entry_points.txt +5 -0
  53. ewoksid02-0.1.0.dist-info/licenses/LICENSE.md +20 -0
  54. ewoksid02-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,672 @@
1
+ import gc
2
+ import logging
3
+ import os
4
+
5
+ import numpy
6
+ import psutil
7
+ from ewokscore import Task
8
+ from silx.io.h5py_utils import open_item as open_item_silx
9
+
10
+ from ewoksid02.utils.blissdata import (
11
+ LIMA_URL_TEMPLATE_ID02,
12
+ continue_pipeline_bliss,
13
+ continue_pipeline_offline,
14
+ load_scan,
15
+ read_dataset_offline,
16
+ _slice_dataset_online,
17
+ _slice_dataset_offline,
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+ PYFAI_PROCESSES = ["norm", "gaps", "2scat", "cave", "azim", "ave", "caving"]
22
+ TRUSAXS_PROCESSES = ["scalers", "dispatch"]
23
+ ALL_PROCESSES = PYFAI_PROCESSES + TRUSAXS_PROCESSES
24
+
25
+
26
+ class ID02ProcessingTask(
27
+ Task,
28
+ input_names=[
29
+ "detector_name",
30
+ ],
31
+ optional_input_names=[
32
+ "scan_memory_url",
33
+ "beacon_host",
34
+ "reading_node",
35
+ "filename_data", # Bliss master file for a dataset
36
+ "filename_lima",
37
+ "scan_nb",
38
+ "subscan",
39
+ "headers",
40
+ "slow_counters",
41
+ "max_slice_size",
42
+ "dataset_signal",
43
+ "dataset_variance",
44
+ "dataset_sigma",
45
+ "datatype",
46
+ "lima_url_template",
47
+ "lima_url_template_args",
48
+ "log_level",
49
+ "processing_filename",
50
+ "processing_subtitle",
51
+ "subtitle",
52
+ "do_process",
53
+ "do_save",
54
+ "save_variance",
55
+ "save_sigma",
56
+ "force_saving",
57
+ "save_metadata",
58
+ "last_index_read",
59
+ "range_index_read",
60
+ "loop_nb",
61
+ "info",
62
+ "info_history",
63
+ ],
64
+ output_names=[
65
+ "last_index_read",
66
+ "loop_nb",
67
+ "dataset_signal",
68
+ "dataset_variance",
69
+ "dataset_sigma",
70
+ "continue_pipeline",
71
+ "info_history",
72
+ ],
73
+ ):
74
+ """The `ID02LoopTask` class is a base task designed to handle iterative data processing in the ID02 SAXS pipeline.
75
+ It provides functionality for reading datasets, managing processing loops, and controlling the pipeline flow.
76
+ This class is intended to be extended by more specific tasks, such as `ID02ProcessingTask`.
77
+ This class could be seen also a Reading node.
78
+
79
+ Inputs:
80
+ - detector_name (str): Name of the detector used for data acquisition. This is the only mandatory input.
81
+ Optional Inputs:
82
+ - scan_memory_url (str): URL for accessing scan memory in online processing.
83
+ - filename_data (str): Path to the dataset file (Master file, Nexus writer) for offline processing.
84
+ - scan_nb (int): Scan number for identifying the dataset.
85
+ - subscan (int): Subscan number for processing. Default is `1`.
86
+ - max_slice_size (int): Maximum number of frames to process in one iteration. Default is `20`.
87
+ - last_index_read (int): Index of the last frame read in the dataset. Default is `0`.
88
+ - range_index_read (list): Range of indices to read from the dataset. This parameter is not propagated to the next task.
89
+ - loop_nb (int): Current loop iteration number. Default is `0`.
90
+ - dataset_signal (numpy.ndarray): Signal dataset to be processed.
91
+ - dataset_variance (numpy.ndarray): Variance dataset to be processed.
92
+ - dataset_sigma (numpy.ndarray): Sigma dataset to be processed.
93
+ - reading_node (bool): Flag to indicate if the task should read data from the node.
94
+ - lima_url_template (str): Format string to locate the Lima file and the path to the data inside that file.
95
+ - lima_url_template_args (dict): Dictionary to format the lima_url_template.
96
+ - beacon_host (str): Host and port to plug blissdata to the correct beacon server. Only for online processing.
97
+ - log_level (str): Logging level for the task. Default is `"warning"`.
98
+ - info (dict): Additional metadata to save.
99
+ - info_history (dict): Additional metadata to propagate and save, creating a history of processing.
100
+ Outputs:
101
+ - last_index_read (int): Updated index of the last frame read.
102
+ - loop_nb (int): Updated loop iteration number.
103
+ - dataset_signal (numpy.ndarray): Processed signal dataset.
104
+ - dataset_variance (numpy.ndarray): Processed variance dataset.
105
+ - dataset_sigma (numpy.ndarray): Processed sigma dataset.
106
+ - continue_pipeline (bool): Flag to indicate whether the pipeline should continue.
107
+ - info_history (dict): Additional metadata to propagate and save, creating a history of processing.
108
+
109
+ Usage:
110
+ ------
111
+ This class is intended to be used as part of a larger pipeline for SAXS data processing.
112
+ It handles the reading and propagation of data:
113
+ - If 'reading_node' is set to True, it will try to get data from Blissdata (online processing) or from a static file (offline processing).
114
+ - If 'reading_node' is set to False (default) and it receives a dataset, it will just propagate the dataset to the next task.
115
+ Since the SAXS pipeline is a loop, if `reading_node` to True, the task will become a kind of entry-exit valve for the pipeline.
116
+ """
117
+
118
+ def run(self, processing_type):
119
+ self.pid = os.getpid()
120
+ self._process = psutil.Process()
121
+ self.detector_name = self.inputs.detector_name
122
+ self.scan_memory_url = self.get_input_value("scan_memory_url", None)
123
+ self.filename_data = self.get_input_value("filename_data", None)
124
+ self.filename_lima = self.get_input_value("filename_lima", None)
125
+ self.scan_nb = self.get_input_value("scan_nb", None)
126
+ self.subscan = self.get_input_value("subscan", 1)
127
+ self.max_slice_size = self.get_input_value("max_slice_size", 50)
128
+ self.last_index_read = self.get_input_value("last_index_read", 0)
129
+ self.range_index_read = self.get_input_value("range_index_read", None)
130
+ self.loop_nb = self.get_input_value("loop_nb", 0)
131
+ self.beacon_host = self.get_input_value(
132
+ "beacon_host", os.environ.get("BEACON_HOST")
133
+ )
134
+ self.processing_type = processing_type
135
+ self.links = {"source": {}, "destination": {}}
136
+
137
+ self.outputs.last_index_read = self.last_index_read
138
+ self.outputs.loop_nb = self.loop_nb
139
+ self.outputs.dataset_signal = self.get_input_value("dataset_signal", None)
140
+ self.outputs.dataset_variance = self.get_input_value("dataset_variance", None)
141
+ self.outputs.dataset_sigma = self.get_input_value("dataset_sigma", None)
142
+ self.outputs.continue_pipeline = True
143
+
144
+ self.set_log_level(log_level=self.get_input_value("log_level", "warning"))
145
+
146
+ self._set_input_variables()
147
+
148
+ gc.collect()
149
+ self.log_allocated_memory()
150
+ self._get_datasets()
151
+
152
+ def _get_datasets(self) -> None:
153
+ already_processed_frames = self.last_index_read
154
+ if (
155
+ self.get_input_value("reading_node", False)
156
+ or self.get_input_value("dataset_signal", None) is None
157
+ ):
158
+ if (
159
+ self.range_index_read
160
+ and self.last_index_read >= self.range_index_read[1]
161
+ ):
162
+ self.log_error(
163
+ f"Requested range_index {self.range_index_read} has been already read! (Last index: {self.last_index_read}). Canceling the pipeline."
164
+ )
165
+ self.outputs.continue_pipeline = False
166
+ return
167
+
168
+ # These incoming datasets cannot be None, they will always be numpy arrays (maybe empty)
169
+ dataset_signal, dataset_variance, dataset_sigma = self.get_new_datasets()
170
+ nb_read_frames = len(dataset_signal)
171
+ if nb_read_frames > 0:
172
+ new_last_index = self.last_index_read + nb_read_frames
173
+ if self.range_index_read is None:
174
+ self.range_index_read = [
175
+ self.last_index_read,
176
+ new_last_index,
177
+ ]
178
+ self.last_index_read = self.last_index_read + nb_read_frames
179
+ else:
180
+ self.last_index_read += len(dataset_signal)
181
+
182
+ logger.info(f"""
183
+ Already processed frames: {already_processed_frames},
184
+ New dataset sliced with {nb_read_frames} frames, index: {already_processed_frames} -> {self.last_index_read},
185
+ New last index: {self.last_index_read},
186
+ """)
187
+ else:
188
+ if self.scan_memory_url:
189
+ self.outputs.continue_pipeline = continue_pipeline_bliss(
190
+ scan=self.scan,
191
+ detector_name=self.detector_name,
192
+ last_index_read=self.last_index_read,
193
+ subscan=self.subscan,
194
+ lima_url_template=self.get_input_value(
195
+ "lima_url_template", LIMA_URL_TEMPLATE_ID02
196
+ ),
197
+ lima_url_template_args=self.get_input_value(
198
+ "lima_url_template_args", {}
199
+ ),
200
+ )
201
+ else:
202
+ filename_data, h5path_datasignal = self.links["source"][
203
+ "datasignal"
204
+ ].split("::")
205
+ self.outputs.continue_pipeline = continue_pipeline_offline(
206
+ filename_data=filename_data,
207
+ last_index_read=self.last_index_read,
208
+ path_to_data_signal=h5path_datasignal,
209
+ )
210
+ self.loop_nb += 1
211
+ self.outputs.loop_nb = self.loop_nb
212
+ self.outputs.last_index_read = self.last_index_read
213
+ self.outputs.dataset_signal = dataset_signal
214
+ self.outputs.dataset_variance = dataset_variance
215
+ self.outputs.dataset_sigma = dataset_sigma
216
+
217
+ self.dataset_signal = self.outputs.dataset_signal
218
+ self.dataset_variance = self.outputs.dataset_variance
219
+ self.dataset_sigma = self.outputs.dataset_sigma
220
+ self.range_index_read = self.range_index_read or [
221
+ self.last_index_read - len(self.dataset_signal),
222
+ self.last_index_read,
223
+ ]
224
+
225
+ def set_log_level(self, log_level="warning"):
226
+ if not isinstance(log_level, str):
227
+ return
228
+ if log_level.lower() == "info":
229
+ logger.setLevel(logging.INFO)
230
+ elif log_level.lower() == "warning":
231
+ logger.setLevel(logging.WARNING)
232
+ elif log_level.lower() == "error":
233
+ logger.setLevel(logging.ERROR)
234
+ elif log_level.lower() == "debug":
235
+ logger.setLevel(logging.DEBUG)
236
+
237
+ def log_debug(self, msg):
238
+ self._log(level="debug", msg=msg)
239
+
240
+ def log_info(self, msg):
241
+ self._log(level="info", msg=msg)
242
+
243
+ def log_warning(self, msg):
244
+ self._log(level="warning", msg=msg)
245
+
246
+ def log_error(self, msg):
247
+ self._log(level="error", msg=msg)
248
+
249
+ def _log(self, level, msg):
250
+ msg = f"Loop #{self.loop_nb}: {self.__class__.__name__}: (PID: {self.pid}): {msg}. "
251
+ logger.__getattribute__(level)(msg)
252
+
253
+ def log_allocated_memory(self):
254
+ mem_usage_GB = self._process.memory_info().rss / 1e9
255
+ total_mem_GB = psutil.virtual_memory().total / 1e9
256
+ # used_mem_GB = psutil.virtual_memory().used / 1e9
257
+ available_mem_GB = psutil.virtual_memory().available / 1e9
258
+
259
+ if available_mem_GB / total_mem_GB < 0.1:
260
+ mem_message = "Low memory available"
261
+ color_prefix = "\033[91m"
262
+ elif available_mem_GB / total_mem_GB < 0.3:
263
+ mem_message = "Medium memory available"
264
+ color_prefix = "\033[93m"
265
+ else:
266
+ mem_message = "Sufficient memory available"
267
+ color_prefix = "\033[92m"
268
+ color_suffix = "\033[0m"
269
+
270
+ logger.info(
271
+ f"{color_prefix}Loop #{self.loop_nb}: {self.__class__.__name__}: (PID: {self.pid}): \
272
+ Memory: {mem_usage_GB}GB used; {available_mem_GB}GB available. {mem_message}{color_suffix}"
273
+ )
274
+
275
+ def log_benchmark(self, bench):
276
+ self.log_info(
277
+ f"Benchmark. Total ({bench.nb_frames}). {bench.benchmark_name}: {bench.bench_total_s:.2f} s. Per frame: {bench.bench_per_frame_ms:.2f} ms"
278
+ )
279
+
280
+ def _set_input_variables(self):
281
+ if self.scan_memory_url:
282
+ if not self.beacon_host:
283
+ raise ValueError("Online processing requires a beacon_host")
284
+
285
+ self.scan = self.get_scan()
286
+ self.filename_data = self.scan.info["filename"]
287
+ self.filename_lima = self.filename_lima or (
288
+ f"{self.scan.info['images_path'].format(img_acq_device='eiger2')}00.h5"
289
+ )
290
+ self.scan_nb = self.scan.info["scan_nb"]
291
+ self.links["source"].update(
292
+ {
293
+ # "detector": f"{self.filename_data}::{self.scan_nb}.{self.subscan}/instrument/{self.detector_name}",
294
+ "datasignal": f"{self.filename_data}::{self.scan_nb}.{self.subscan}/instrument/{self.detector_name}/data",
295
+ "datavariance": None,
296
+ "datasigma": None,
297
+ "metadata_detector": (
298
+ f"{self.filename_lima}::entry_0000/ESRF-ID02/{self.detector_name}",
299
+ f"{self.filename_lima}::entry_0000/instrument/{self.detector_name}",
300
+ ),
301
+ "metadata_headers": (
302
+ self.headers,
303
+ f"{self.filename_lima}::entry_0000/ESRF-ID02/{self.detector_name}/header",
304
+ f"{self.filename_lima}::entry_0000/instrument/{self.detector_name}/header",
305
+ ),
306
+ "metadata_mcs": None,
307
+ "metadata_tfg": None,
308
+ "metadata_titleextension": None,
309
+ "metadata_raw": None,
310
+ }
311
+ )
312
+
313
+ # self.filename_metadata_detector_input = self.filename_lima
314
+ # self.path_to_metadata_detector_input = (
315
+ # f"{ENTRY_NAME}/ESRF-ID02/{self.detector_name}",
316
+ # f"{ENTRY_NAME}/instrument/{self.detector_name}",
317
+ # )
318
+ # self.filename_metadata_headers_input = self.filename_lima
319
+ # self.path_to_metadata_headers_input = (
320
+ # f"{ENTRY_NAME}/ESRF-ID02/{self.detector_name}/header",
321
+ # f"{ENTRY_NAME}/instrument/{self.detector_name}/header",
322
+ # )
323
+ # self.filename_metadata_mcs_input = None
324
+ # self.path_to_metadata_mcs_input = None
325
+ # self.filename_metadata_tfg_input = None
326
+ # self.path_to_metadata_tfg_input = None
327
+ # self.filename_metadata_titleextension_input = None
328
+ # self.path_to_metadata_titleextension_input = None
329
+ # self.filename_metadata_raw_input = None
330
+ # self.path_to_metadata_raw_input = None
331
+
332
+ # self.link_to_headers = (
333
+ # f"{self.filename_lima}::{ENTRY_NAME}/ESRF-ID02/{self.detector_name}/header",
334
+ # f"{self.filename_lima}::{ENTRY_NAME}/instrument/{self.detector_name}/header",
335
+ # )
336
+ # self.link_to_counters = (
337
+ # f"{self.filename_data}::{self.scan_nb}.1/measurement"
338
+ # )
339
+
340
+ else:
341
+ self.scan = None
342
+ if not self.filename_data or not os.path.exists(self.filename_data):
343
+ raise ValueError(f"filename_data {self.filename_data} does not exist")
344
+
345
+ with open_item_silx(self.filename_data, "/", "r") as file_input:
346
+ if f"{self.scan_nb}.{self.subscan}" in file_input:
347
+ # We trust it's an offline processing from a RAW file
348
+ # if self.processing_type in PYFAI_PROCESSES:
349
+ # nxprocess = "PyFAI"
350
+ # elif self.processing_type in TRUSAXS_PROCESSES:
351
+ # nxprocess = "TRUSAXS"
352
+ # else:
353
+ # nxprocess = "other"
354
+
355
+ self.links["source"].update(
356
+ {
357
+ # "detector": f"{self.filename_data}::{self.scan_nb}.{self.subscan}/instrument/{self.detector_name}",
358
+ "datasignal": f"{self.filename_data}::{self.scan_nb}.{self.subscan}/instrument/{self.detector_name}/data",
359
+ "datavariance": None,
360
+ "datasigma": None,
361
+ "metadata_detector": (
362
+ f"{self.filename_lima}::entry_0000/ESRF-ID02/{self.detector_name}",
363
+ f"{self.filename_lima}::entry_0000/instrument/{self.detector_name}",
364
+ ),
365
+ "metadata_headers": (
366
+ f"{self.filename_lima}::entry_0000/ESRF-ID02/{self.detector_name}/header",
367
+ f"{self.filename_lima}::entry_0000/instrument/{self.detector_name}/header",
368
+ ),
369
+ "metadata_counters": f"{self.filename_data}::{self.scan_nb}.1/measurement",
370
+ # "metadata_tfg": f"{self.filename_lima}::entry_0000/{nxprocess}/TFG",
371
+ # "metadata_titleextension": None,
372
+ # "metadata_raw" : None,
373
+ }
374
+ )
375
+
376
+ # self.filename_metadata_detector_input = self.filename_lima
377
+ # self.path_to_metadata_detector_input = (
378
+ # f"{ENTRY_NAME}/ESRF-ID02/{self.detector_name}",
379
+ # f"{ENTRY_NAME}/instrument/{self.detector_name}",
380
+ # )
381
+ # self.filename_metadata_headers_input = self.filename_lima
382
+ # self.path_to_metadata_headers_input = (
383
+ # f"{ENTRY_NAME}/ESRF-ID02/{self.detector_name}/header",
384
+ # f"{ENTRY_NAME}/instrument/{self.detector_name}/header",
385
+ # )
386
+ # self.filename_metadata_mcs_input = self.filename_metadata
387
+ # self.path_to_metadata_mcs_input = (
388
+ # f"{ENTRY_NAME}/{NXPROCESS_NAME_TRUSAXS}/MCS"
389
+ # )
390
+ # self.filename_metadata_tfg_input = self.filename_metadata
391
+ # self.path_to_metadata_tfg_input = (
392
+ # f"{ENTRY_NAME}/{NXPROCESS_NAME_TRUSAXS}/TFG"
393
+ # )
394
+ # self.filename_metadata_titleextension_input = self.filename_metadata
395
+ # self.path_to_metadata_titleextension_input = (
396
+ # f"{ENTRY_NAME}/{NXPROCESS_NAME_TRUSAXS}/parameters"
397
+ # )
398
+ # self.filename_metadata_raw_input = self.filename_data
399
+ # self.path_to_metadata_raw_input = f"{self.scan}.{self.subscan}/measurement"
400
+
401
+ # self.link_to_headers = (
402
+ # f"{self.filename_lima}::{ENTRY_NAME}/ESRF-ID02/{self.detector_name}/header",
403
+ # f"{self.filename_lima}::{ENTRY_NAME}/instrument/{self.detector_name}/header",
404
+ # )
405
+ # self.link_to_counters = (
406
+ # f"{self.filename_data}::{self.scan_nb}.1/measurement"
407
+ # )
408
+
409
+ elif "entry_0000/PyFAI" in file_input:
410
+ # We trust it's an offline processing from an already PROCESSED file (like norm)
411
+ if self.processing_type in PYFAI_PROCESSES:
412
+ nxprocess = "PyFAI"
413
+ elif self.processing_type in TRUSAXS_PROCESSES:
414
+ nxprocess = "TRUSAXS"
415
+ else:
416
+ nxprocess = "other"
417
+ self.links["source"].update(
418
+ {
419
+ # "detector": f"entry_0000/PyFAI/{self.detector_name}",
420
+ "datasignal": f"{self.filename_data}::entry_0000/PyFAI/result_{self.processing_type}/data",
421
+ "datavariance": f"{self.filename_data}::entry_0000/PyFAI/result_{self.processing_type}/data_variance",
422
+ "datasigma": f"{self.filename_data}::entry_0000/PyFAI/result_{self.processing_type}/data_errors",
423
+ "metadata_detector": f"{self.filename_data}::entry_0000/{nxprocess}/{self.detector_name}",
424
+ "metadata_headers": f"{self.filename_data}::entry_0000/{nxprocess}/parameters",
425
+ "metadata_mcs": f"{self.filename_data}::entry_0000/{nxprocess}/MCS",
426
+ "metadata_tfg": f"{self.filename_data}::entry_0000/{nxprocess}/TFG",
427
+ "metadata_counters": f"{self.filename_data}::{self.scan_nb}.1/measurement",
428
+ }
429
+ )
430
+
431
+ else:
432
+ raise ValueError(
433
+ f"filename_data {self.filename_data} is not a valid BLISS dataset file or ewoks/dahu processed file"
434
+ )
435
+
436
+ def get_scan(self):
437
+ if self.scan_memory_url:
438
+ return load_scan(
439
+ scan_memory_url=self.scan_memory_url, beacon_host=self.beacon_host
440
+ )
441
+
442
+ def generate_streams(self):
443
+ if self.scan:
444
+ for stream_name, stream in self.scan.streams.items():
445
+ yield stream_name, stream
446
+
447
+ def get_new_datasets(self):
448
+ if self.scan_memory_url:
449
+ dataset_signal, dataset_variance, dataset_sigma = (
450
+ self.get_datasets_from_bliss()
451
+ )
452
+ else:
453
+ dataset_signal, dataset_variance, dataset_sigma = (
454
+ self.get_datasets_from_static_file()
455
+ )
456
+ if dataset_signal is None or len(dataset_signal) == 0:
457
+ return numpy.array([]), numpy.array([]), numpy.array([])
458
+ else:
459
+ return dataset_signal, dataset_variance, dataset_sigma
460
+
461
+ def get_datasets_from_bliss(self):
462
+ """Get the dataset from the bliss scan memory."""
463
+ if not self.scan_memory_url:
464
+ self.log_error("scan_memory_url is mandatory to get dataset from bliss")
465
+ return
466
+
467
+ if not os.environ.get("BEACON_HOST"):
468
+ self.log_error(
469
+ "Online processing requires a BEACON_HOST environment variable"
470
+ )
471
+ return
472
+ dataset_signal = _slice_dataset_online(
473
+ scan=self.scan,
474
+ detector_name=self.detector_name,
475
+ lima_url_template=self.get_input_value(
476
+ "lima_url_template", LIMA_URL_TEMPLATE_ID02
477
+ ),
478
+ lima_url_template_args=self.get_input_value("lima_url_template_args", {}),
479
+ subscan=self.subscan,
480
+ last_index_read=self.last_index_read,
481
+ max_slice_size=self.max_slice_size,
482
+ range_index_read=self.range_index_read,
483
+ )
484
+ dataset_variance = numpy.array([])
485
+ dataset_sigma = numpy.array([])
486
+
487
+ return dataset_signal, dataset_variance, dataset_sigma
488
+
489
+ def get_datasets_from_static_file(self):
490
+ """Get the dataset from the static file."""
491
+ if not self.filename_data:
492
+ self.log_error("filename_data is mandatory to get dataset from static file")
493
+ return
494
+
495
+ filename_data, h5path_datasignal = self.links["source"]["datasignal"].split(
496
+ "::"
497
+ )
498
+ _, h5path_datavariance = self.links["source"]["datavariance"].split("::")
499
+ _, h5path_datasigma = self.links["source"]["datasigma"].split("::")
500
+
501
+ dataset_signal, dataset_variance, dataset_sigma = _slice_dataset_offline(
502
+ filename_data=filename_data,
503
+ path_to_data_signal=h5path_datasignal,
504
+ path_to_data_variance=h5path_datavariance,
505
+ path_to_data_sigma=h5path_datasigma,
506
+ last_index_read=self.last_index_read,
507
+ max_slice_size=self.max_slice_size,
508
+ range_index_read=self.range_index_read,
509
+ )
510
+
511
+ return dataset_signal, dataset_variance, dataset_sigma
512
+
513
+
514
+ class LoopTask(
515
+ Task,
516
+ optional_input_names=[
517
+ "detector_name",
518
+ "scan_memory_url",
519
+ "filename_data",
520
+ "scan_nb",
521
+ "subscan",
522
+ "max_slice_size",
523
+ "last_index_read",
524
+ "range_index_read",
525
+ "loop_nb",
526
+ "dataset",
527
+ "reading_node",
528
+ "lima_url_template",
529
+ "lima_url_template_args",
530
+ "beacon_host",
531
+ ],
532
+ output_names=[
533
+ "last_index_read",
534
+ "loop_nb",
535
+ "dataset",
536
+ "continue_pipeline",
537
+ ],
538
+ ):
539
+ """The `LoopTask` class is a base task designed to handle iterative data processing.
540
+ It provides functionality for reading datasets, managing processing loops, and controlling the pipeline flow.
541
+ This class could be seen also a Reading node.
542
+
543
+ Optional Inputs:
544
+ - detector_name (str): Name of the detector used for data acquisition. This is the only mandatory input.
545
+ - scan_memory_url (str): URL for accessing scan memory in online processing.
546
+ - filename_data (str): Path to the dataset file (Master file, Nexus writer) for offline processing.
547
+ - scan_nb (int): Scan number for identifying the dataset.
548
+ - subscan (int): Subscan number for processing. Default is `1`.
549
+ - max_slice_size (int): Maximum number of frames to process in one iteration. Default is `20`.
550
+ - last_index_read (int): Index of the last frame read in the dataset. Default is `0`.
551
+ - range_index_read (list): Range of indices to read from the dataset. This parameter is not propagated to the next task.
552
+ - loop_nb (int): Current loop iteration number. Default is `0`.
553
+ - dataset (numpy.ndarray): Signal dataset to be processed.
554
+ - reading_node (bool): Flag to indicate if the task should read data from the node.
555
+ - lima_url_template (str): Format string to locate the Lima file and the path to the data inside that file.
556
+ - lima_url_template_args (dict): Dictionary to format the lima_url_template.
557
+ - beacon_host (str): Host and port to plug blissdata to the correct beacon server. Only for online processing.
558
+ Outputs:
559
+ - last_index_read (int): Updated index of the last frame read.
560
+ - loop_nb (int): Updated loop iteration number.
561
+ - dataset (numpy.ndarray): Processed signal dataset.
562
+ - continue_pipeline (bool): Flag to indicate whether the pipeline should continue.
563
+ """
564
+
565
+ def get_data(self) -> None:
566
+ last_index_read = self.get_input_value("last_index_read", 0)
567
+ range_index_read = self.get_input_value("range_index_read", None)
568
+ loop_nb = self.get_input_value("loop_nb", 0)
569
+ dataset = self.get_input_value("dataset", None)
570
+
571
+ self.outputs.last_index_read = last_index_read
572
+ self.outputs.dataset = dataset
573
+ self.outputs.loop_nb = loop_nb
574
+ self.outputs.continue_pipeline = True
575
+
576
+ if self.get_input_value("reading_node", False) or dataset is None:
577
+ if self.get_input_value("detector_name", None) is None:
578
+ raise ValueError("detector_name is mandatory to read new data")
579
+
580
+ if range_index_read and last_index_read >= range_index_read[1]:
581
+ raise ValueError(
582
+ f"Requested range_index {range_index_read} has been already read! (Last index: {last_index_read}). Canceling the pipeline."
583
+ )
584
+
585
+ new_read_dataset = self.get_new_dataset()
586
+ if new_read_dataset is None:
587
+ new_read_dataset = numpy.array([])
588
+
589
+ nb_read_frames = len(new_read_dataset)
590
+ if nb_read_frames > 0:
591
+ new_last_index = last_index_read + nb_read_frames
592
+ new_range_index = [
593
+ last_index_read,
594
+ new_last_index,
595
+ ]
596
+ logger.info(f"""
597
+ Already processed frames: {last_index_read},
598
+ New dataset sliced with {nb_read_frames} frames,
599
+ New range_index_read: {new_range_index},
600
+ New last index: {new_last_index},
601
+ """)
602
+ self.outputs.last_index_read = new_last_index
603
+ else:
604
+ scan_memory_url = self.get_input_value("scan_memory_url", None)
605
+ filename_data = self.get_input_value("filename_data", None)
606
+ scan_nb = self.get_input_value("scan_nb", 1)
607
+ subscan = self.get_input_value("subscan", 1)
608
+
609
+ if scan_memory_url:
610
+ self.outputs.continue_pipeline = continue_pipeline_bliss(
611
+ scan=self.scan,
612
+ detector_name=self.inputs.detector_name,
613
+ last_index_read=last_index_read,
614
+ subscan=subscan,
615
+ lima_url_template=self.get_input_value(
616
+ "lima_url_template", None
617
+ ),
618
+ lima_url_template_args=self.get_input_value(
619
+ "lima_url_template_args", {}
620
+ ),
621
+ )
622
+ elif filename_data:
623
+ self.outputs.continue_pipeline = continue_pipeline_offline(
624
+ filename_data=filename_data,
625
+ last_index_read=last_index_read,
626
+ path_to_data_signal=f"/{scan_nb}.{subscan}/instrument/{self.inputs.detector_name}/data",
627
+ )
628
+
629
+ self.outputs.dataset = new_read_dataset
630
+ self.outputs.loop_nb = loop_nb + 1
631
+ self.last_index_read = self.outputs.last_index_read
632
+ self.range_index_read = [
633
+ self.last_index_read - len(self.outputs.dataset),
634
+ self.last_index_read,
635
+ ]
636
+ return self.outputs.dataset
637
+
638
+ def get_new_dataset(self):
639
+ if self.get_input_value("scan_memory_url", None):
640
+ return self._get_datasets_from_bliss()
641
+ else:
642
+ return self._get_datasets_from_static_file()
643
+
644
+ def _get_datasets_from_bliss(self):
645
+ """Get the dataset from the bliss scan memory."""
646
+ if not os.environ.get("BEACON_HOST"):
647
+ logger.error(
648
+ "Online processing requires a BEACON_HOST environment variable"
649
+ )
650
+ return
651
+
652
+ return _slice_dataset_online(
653
+ scan=self.scan,
654
+ detector_name=self.inputs.detector_name,
655
+ lima_url_template=self.get_input_value("lima_url_template", None),
656
+ lima_url_template_args=self.get_input_value("lima_url_template_args", {}),
657
+ subscan=self.get_input_value("subscan", 1),
658
+ last_index_read=self.get_input_value("last_index_read", 0),
659
+ max_slice_size=self.get_input_value("max_slice_size", 20),
660
+ range_index_read=self.get_input_value("range_index_read", None),
661
+ )
662
+
663
+ def _get_datasets_from_static_file(self):
664
+ """Get the dataset from the static file."""
665
+ return read_dataset_offline(
666
+ filename_data=self.inputs.filename_data,
667
+ detector_name=self.inputs.detector_name,
668
+ scan_nb=self.get_input_value("scan_nb", 1),
669
+ last_index_read=self.get_input_value("last_index_read", 0),
670
+ max_slice_size=self.get_input_value("max_slice_size", 20),
671
+ range_index_read=self.get_input_value("range_index_read", None),
672
+ )