ewoksid02 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. ewoksid02/__init__.py +0 -0
  2. ewoksid02/ocl/__init__.py +0 -0
  3. ewoksid02/resources/__init__.py +8 -0
  4. ewoksid02/resources/saxs_loop.json +96 -0
  5. ewoksid02/resources/template_saxs.yaml +37 -0
  6. ewoksid02/scripts/__init__.py +0 -0
  7. ewoksid02/scripts/__main__.py +70 -0
  8. ewoksid02/scripts/parsers.py +224 -0
  9. ewoksid02/scripts/saxs/__init__.py +0 -0
  10. ewoksid02/scripts/saxs/main.py +255 -0
  11. ewoksid02/scripts/saxs/slurm_python_post_script.py +3 -0
  12. ewoksid02/scripts/saxs/slurm_python_pre_script.py +5 -0
  13. ewoksid02/scripts/utils.py +21 -0
  14. ewoksid02/scripts/xpcs/__init__.py +0 -0
  15. ewoksid02/scripts/xpcs/__main__.py +3 -0
  16. ewoksid02/tasks/__init__.py +7 -0
  17. ewoksid02/tasks/averagetask.py +179 -0
  18. ewoksid02/tasks/azimuthaltask.py +272 -0
  19. ewoksid02/tasks/cavingtask.py +170 -0
  20. ewoksid02/tasks/dahuprocessingtask.py +71 -0
  21. ewoksid02/tasks/end.py +35 -0
  22. ewoksid02/tasks/id02processingtask.py +2582 -0
  23. ewoksid02/tasks/looptask.py +672 -0
  24. ewoksid02/tasks/metadatatask.py +879 -0
  25. ewoksid02/tasks/normalizationtask.py +204 -0
  26. ewoksid02/tasks/scalerstask.py +46 -0
  27. ewoksid02/tasks/secondaryscatteringtask.py +159 -0
  28. ewoksid02/tasks/sumtask.py +45 -0
  29. ewoksid02/tests/__init__.py +3 -0
  30. ewoksid02/tests/conftest.py +639 -0
  31. ewoksid02/tests/debug.py +64 -0
  32. ewoksid02/tests/test_2scat_node.py +119 -0
  33. ewoksid02/tests/test_ave_node.py +106 -0
  34. ewoksid02/tests/test_azim_node.py +89 -0
  35. ewoksid02/tests/test_cave_node.py +118 -0
  36. ewoksid02/tests/test_norm_node.py +190 -0
  37. ewoksid02/tests/test_saxs.py +69 -0
  38. ewoksid02/tests/test_sumtask.py +10 -0
  39. ewoksid02/tests/utils.py +514 -0
  40. ewoksid02/utils/__init__.py +22 -0
  41. ewoksid02/utils/average.py +158 -0
  42. ewoksid02/utils/blissdata.py +1157 -0
  43. ewoksid02/utils/caving.py +851 -0
  44. ewoksid02/utils/cupyutils.py +42 -0
  45. ewoksid02/utils/io.py +722 -0
  46. ewoksid02/utils/normalization.py +804 -0
  47. ewoksid02/utils/pyfai.py +424 -0
  48. ewoksid02/utils/secondaryscattering.py +597 -0
  49. ewoksid02-0.1.0.dist-info/METADATA +76 -0
  50. ewoksid02-0.1.0.dist-info/RECORD +54 -0
  51. ewoksid02-0.1.0.dist-info/WHEEL +5 -0
  52. ewoksid02-0.1.0.dist-info/entry_points.txt +5 -0
  53. ewoksid02-0.1.0.dist-info/licenses/LICENSE.md +20 -0
  54. ewoksid02-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2582 @@
1
+ import json
2
+ import os
3
+ import gc
4
+ import threading
5
+ import socket
6
+ import time
7
+ from importlib.metadata import version
8
+ from contextlib import ExitStack
9
+ from ewokscore import Task
10
+ import psutil
11
+ from pathlib import Path
12
+ import h5py
13
+ import hdf5plugin
14
+ import numpy
15
+ import logging
16
+ from ewokscore import missing_data
17
+ import matplotlib.image
18
+ from pyFAI import version as pyFAIVersion
19
+ from silx.io.h5py_utils import open_item as open_item_silx
20
+ from ewoksid02.utils.blissdata import (
21
+ copy_group_excluding_dataset,
22
+ )
23
+ from ewoksid02.utils.io import (
24
+ KEY_BEAMSTOP_MASK_FILE,
25
+ KEY_BEAMSTOP_MASK_FOLDER,
26
+ KEY_DARK_FILE,
27
+ KEY_DARK_FOLDER,
28
+ KEY_DETECTOR_MASK_FILE,
29
+ KEY_DETECTOR_MASK_FOLDER,
30
+ KEY_FLAT_FILE,
31
+ KEY_FLAT_FOLDER,
32
+ KEY_WINDOW_FILE,
33
+ KEY_WINDOW_FOLDER,
34
+ get_isotime,
35
+ refactor_stream_name_raw,
36
+ refactor_stream_name_interpreted,
37
+ parse_titleextension_template,
38
+ serialize_h5py_task,
39
+ deserialize_h5py_task,
40
+ )
41
+
42
+ from ewoksid02.utils.blissdata import (
43
+ LIMA_URL_TEMPLATE_ID02,
44
+ # do_continue_pipeline,
45
+ load_scan,
46
+ _slice_dataset_online,
47
+ _slice_dataset_offline,
48
+ does_scan_contain_subscan2,
49
+ # get_streams_subscan1,
50
+ # _get_streams_subscan2,
51
+ _get_new_slice_limits,
52
+ _get_stream_names_from_acquisition_chain,
53
+ )
54
+
55
+ lock = threading.Lock()
56
+
57
+ PYFAI_PROCESSES = ["norm", "gaps", "2scat", "cave", "azim", "ave", "caving"]
58
+ TRUSAXS_PROCESSES = ["scalers", "dispatch", "debug"]
59
+ ALL_PROCESSES = PYFAI_PROCESSES + TRUSAXS_PROCESSES
60
+
61
+ PROCESSING_TYPE_TASK = {
62
+ "norm": "ewoksid02.tasks.normalizationtask.NormalizationTask",
63
+ "gaps": "ewoksid02.tasks.cavingtask.CavingGapsTask",
64
+ "2scat": "ewoksid02.tasks.secondaryscatteringtask.SecondaryScatteringTask",
65
+ "cave": "ewoksid02.tasks.cavingtask.CavingBeamstopTask",
66
+ "azim": "ewoksid02.tasks.azimuthaltask.AzimuthalTask",
67
+ "ave": "ewoksid02.tasks.averagetask.AverageTask",
68
+ "scalers": "ewoksid02.tasks.scalerstask.ScalersTask",
69
+ }
70
+
71
+ KEYS_FLOAT = [
72
+ "Center_1",
73
+ "Center_2",
74
+ "Dummy",
75
+ "DDummy",
76
+ "PSize_1",
77
+ "PSize_2",
78
+ "SampleDistance",
79
+ "WaveLength",
80
+ ]
81
+
82
+ KEYS_INT = [
83
+ "BSize_1",
84
+ "BSize_2",
85
+ "Offset_1",
86
+ "Offset_2",
87
+ "RasterOrientation",
88
+ ]
89
+
90
+ HEADERS_KEY_EXPOSURE_TIME = "HSTime"
91
+ HEADERS_KEY_MONITOR_0 = "HSI0" # Monitor for beam intensity before the sample
92
+ HEADERS_KEY_MONITOR_0_FACTOR = "HSI0Factor"
93
+ HEADERS_KEY_MONITOR_1 = "HSI1" # Monitor for beam intensity after the sample
94
+ HEADERS_KEY_MONITOR_1_FACTOR = "HSI1Factor"
95
+ HEADERS_KEY_SOT = "ShutterOpeningTime"
96
+ HEADERS_KEY_SCT = "ShutterClosingTime"
97
+ MAP_DETECTORS_LIMA = {
98
+ "eiger2": "ESRF-ID02",
99
+ "waxs": "instrument",
100
+ "default": "ESRF-ID02",
101
+ "eiger500k": "ESRF-ID02", # TODO ???
102
+ }
103
+ DETECTOR_LIMA_DEFAULT = MAP_DETECTORS_LIMA.get("default")
104
+
105
+ SLOW_COUNTER_TIMER = "sampling_timer:epoch"
106
+
107
+
108
+ CHUNK_SIZE_3D = (1, 200, 200)
109
+
110
+ INFO_COMMON = {"h5path": "entry_0000"}
111
+
112
+ MAX_SLICE_SIZE = 50
113
+ LOG_LEVEL_DEFAULT = "warning"
114
+ LIMA_INDEX_NUMBER_FORMAT_ID02 = "%02d"
115
+ MEM_USAGE_START = None
116
+
117
+ # Global logger at ewoksid02.tasks.id02processingtask
118
+ logger = logging.getLogger("ewoksid02")
119
+ logger.propagate = True
120
+
121
+
122
+ class ID02ProcessingTask(
123
+ Task,
124
+ optional_input_names=[
125
+ "detector_name",
126
+ "scan_memory_url",
127
+ "beacon_host",
128
+ "reading_node",
129
+ "filename_data", # Bliss master file for a dataset
130
+ "filename_lima",
131
+ "scan_nb",
132
+ "subscan",
133
+ "headers",
134
+ "dataset_signal",
135
+ "dataset_variance",
136
+ "dataset_sigma",
137
+ "datatype",
138
+ "lima_url_template",
139
+ "lima_url_template_args",
140
+ "log_level",
141
+ "processing_filename",
142
+ "processing_subtitle",
143
+ "subtitle",
144
+ "do_process",
145
+ "do_save",
146
+ "save_variance",
147
+ "save_sigma",
148
+ "save_metadata",
149
+ "index_range", # Global range, do not propagate
150
+ "index_range_last", # Dynamic range, propagate and change every loop
151
+ "max_slice_size",
152
+ "loop_nb",
153
+ "info",
154
+ "info_history",
155
+ "gc_collect",
156
+ "lima_index_number_format",
157
+ "save_in_gallery",
158
+ ],
159
+ output_names=[
160
+ "index_range_last",
161
+ "loop_nb",
162
+ "dataset_signal",
163
+ "dataset_variance",
164
+ "dataset_sigma",
165
+ "continue_pipeline",
166
+ "info_history",
167
+ ],
168
+ ):
169
+ """This class contains processing support methods and saving methods in the ID02 SAXS pipeline.
170
+ It extends the `ID02LoopTask` class and provides additional functionality for handling metadata, processing flags,
171
+ and saving processed data to HDF5 files.This class is designed to be used as part of the ID02 pipeline.It does not contain a process method, that has to be implemented in the child class.
172
+
173
+ Optional Inputs:
174
+ - detector_name (str): Name of the detector used for data acquisition. This is the only mandatory input.
175
+ - scan_memory_url (str): URL for accessing scan memory in online processing.
176
+ - beacon_host (str): Host and port to plug blissdata to the correct beacon server. Only for online processing.
177
+ - reading_node (bool): Flag to indicate if the task should read data from the node.
178
+ - filename_data (str): Path to the dataset file (Master file, Nexus writer) for offline processing.
179
+ - filename_lima (str): Path to the first Lima file, the only place where some detector metadata can be found.
180
+ - scan_nb (int): Scan number for identifying the dataset.
181
+ - subscan (int): Subscan number for processing. Default is `1`.
182
+ - headers (dict): Only for Online processing. Dictionary containing headers information.
183
+ - max_slice_size (int): Maximum number of frames to process in one iteration. Default is `20`.
184
+ - dataset_signal (numpy.ndarray): Signal dataset to be processed.
185
+ - dataset_variance (numpy.ndarray): Variance dataset to be processed.
186
+ - dataset_sigma (numpy.ndarray): Sigma dataset to be processed.
187
+ - datatype (str): Datatype to be used to save the 2D data. Default and recommended is float32.
188
+ - lima_url_template (str): Format string to locate the Lima file and the path to the data inside that file.
189
+ - lima_url_template_args (dict): Dictionary to format the lima_url_template.
190
+ - log_level (str): Logging level for the task. Default is `"warning"`.
191
+ - processing_filename (str): Full path to the (new) output file.
192
+ - processing_subtitle (str): Additional subtitle for the processing task.
193
+ - subtitle (str): Subtitle for the processing task to be added to the output filename.
194
+ - do_process (bool): Flag to enable or disable processing. Default is `True`.
195
+ - do_save (bool): Flag to enable or disable saving of processed data. Default is `True`.
196
+ - save_variance (bool): Flag to enable or disable saving of variance dataset. Default is `False`.
197
+ - save_sigma (bool): Flag to enable or disable saving of sigma dataset. Default is `True`.
198
+ - save_metadata (bool): Flag to enable or disable saving of metadata. Default is `True`.
199
+ - last_index_read (int): Index of the last frame read in the dataset. Default is `0`.
200
+ - range_index_read (list): Range of indices to read from the dataset. This parameter is not propagated to the next task.
201
+ - loop_nb (int): Current loop iteration number. Default is `0`.
202
+ - info (dict): Additional metadata to save.
203
+ - info_history (dict): Additional metadata to propagate and save, creating a history of processing.
204
+ - gc_collect (bool): Manually collect garbage at the end of every task.
205
+ - lima_index_number_format (str): format to find the first Lima file (02%d by default)
206
+ Outputs:
207
+ - last_index_read (int): Updated index of the last frame read.
208
+ - loop_nb (int): Updated loop iteration number.
209
+ - dataset_signal (numpy.ndarray): Processed signal dataset.
210
+ - dataset_variance (numpy.ndarray): Processed variance dataset.
211
+ - dataset_sigma (numpy.ndarray): Processed sigma dataset.
212
+ - continue_pipeline (bool): Flag to indicate whether the pipeline should continue.
213
+ - info_history (dict): Additional metadata to propagate and save, creating a history of processing.
214
+ """
215
+
216
+ class Benchmark:
217
+ """A context manager for benchmarking."""
218
+
219
+ def __init__(self, nb_frames, benchmark_name="processing"):
220
+ self.nb_frames = nb_frames
221
+ self.benchmark_name = benchmark_name
222
+ self.bench_total_s = 0.0
223
+ self.bench_per_frame_ms = 0.0
224
+
225
+ def __enter__(self):
226
+ self.start = time.perf_counter()
227
+ return self
228
+
229
+ def __exit__(self, exc_type, exc_value, traceback):
230
+ self.end = time.perf_counter()
231
+ self.bench_total_s = self.end - self.start
232
+ if self.nb_frames > 0:
233
+ self.bench_per_frame_ms = self.bench_total_s / self.nb_frames * 1000
234
+ else:
235
+ self.bench_per_frame_ms = 0
236
+
237
+ def run(self, processing_type: str = "debug"):
238
+ self.processing_type = processing_type
239
+ self._pid = os.getpid()
240
+ self._process = psutil.Process()
241
+ self._set_log_level(
242
+ log_level=self.get_input_value("log_level", LOG_LEVEL_DEFAULT)
243
+ )
244
+ self.detector_name = self.get_input_value("detector_name", None)
245
+ self.scan_memory_url = self.get_input_value("scan_memory_url", None)
246
+ self.beacon_host = self.get_input_value(
247
+ "beacon_host", os.environ.get("BEACON_HOST")
248
+ )
249
+ self.filename_data = self.get_input_value("filename_data", None)
250
+ self.filename_lima = self.get_input_value("filename_lima", None)
251
+ self.subscan = self.get_input_value("subscan", 1)
252
+ self.max_slice_size = self.get_input_value("max_slice_size", MAX_SLICE_SIZE)
253
+ self.loop_nb = self.get_input_value("loop_nb", 0)
254
+ self.headers = self.get_input_value("headers", None)
255
+ self.index_range = self.get_input_value("index_range", None)
256
+ self.index_range_last = self.get_input_value("index_range_last", None)
257
+ self.links = {"source": {}, "destination": {}}
258
+
259
+ self._set_input_parameters()
260
+ self._load_headers()
261
+ self._load_streams()
262
+ self._log_allocated_memory()
263
+ self._set_datasets()
264
+
265
+ # Check-point to avoid data overwritting (only for offline processing)
266
+ self.processing_filename = self.get_input_value("processing_filename", None)
267
+ if not self.scan_memory_url and self.processing_filename:
268
+ if self.loop_nb == 1 and os.path.exists(self.processing_filename):
269
+ self.log_error(
270
+ f"Offline processing, processing file {self.processing_filename} already exist. Data will not be saved. Choose another name! Workflow is canceled!"
271
+ )
272
+ self.outputs.continue_pipeline = False
273
+
274
+ if self.outputs.continue_pipeline is False:
275
+ return False
276
+ if self.processing_type == "debug":
277
+ return
278
+
279
+ self.do_process = self.get_input_value("do_process", True)
280
+ self.do_save = self.get_input_value("do_save", True)
281
+ if self.do_process:
282
+ self.process()
283
+ if self.do_save and self.processing_filename:
284
+ self.save()
285
+ elif self.do_save and not self.processing_filename:
286
+ raise ValueError(
287
+ f"Processing filename for {self.processing_type} is not set."
288
+ )
289
+ else:
290
+ self.log_warning("Save flag was set to False, data will not be saved")
291
+ else:
292
+ self.log_warning(msg=f"Processing {self.processing_type} will be skipped.")
293
+ self.outputs.info_history = self.get_input_value("info_history", [])
294
+
295
+ if self.get_input_value("save_in_gallery", False):
296
+ self._save_in_gallery()
297
+
298
+ if self.get_input_value("gc_collect", True):
299
+ gc.collect()
300
+
301
+ def _set_log_level(self, log_level="warning"):
302
+ if not isinstance(log_level, str):
303
+ return
304
+ if log_level.lower() == "info":
305
+ logger.setLevel(logging.INFO)
306
+ elif log_level.lower() == "warning":
307
+ logger.setLevel(logging.WARNING)
308
+ elif log_level.lower() == "error":
309
+ logger.setLevel(logging.ERROR)
310
+ elif log_level.lower() == "debug":
311
+ logger.setLevel(logging.DEBUG)
312
+
313
+ def log_debug(self, msg):
314
+ self._log(level="debug", msg=msg)
315
+
316
+ def log_info(self, msg):
317
+ self._log(level="info", msg=msg)
318
+
319
+ def log_warning(self, msg):
320
+ self._log(level="warning", msg=msg)
321
+
322
+ def log_error(self, msg):
323
+ self._log(level="error", msg=msg)
324
+
325
+ def _log(self, level, msg):
326
+ msg = f"Loop #{self.loop_nb}: {self.__class__.__name__}: (PID: {self._pid}): {msg}"
327
+ logger.__getattribute__(level)(msg)
328
+
329
+ def _log_allocated_memory(self):
330
+ memory_info = self.get_memory_info()
331
+ mem_usage_GB = memory_info["used"]
332
+ total_mem_GB = memory_info["total"]
333
+ available_mem_GB = memory_info["available"]
334
+
335
+ if available_mem_GB / total_mem_GB < 0.1:
336
+ mem_message = "Low memory available"
337
+ color_prefix = "\033[91m"
338
+ elif available_mem_GB / total_mem_GB < 0.3:
339
+ mem_message = "Medium memory available"
340
+ color_prefix = "\033[93m"
341
+ else:
342
+ mem_message = "Sufficient memory available"
343
+ color_prefix = "\033[92m"
344
+ color_suffix = "\033[0m"
345
+
346
+ global MEM_USAGE_START
347
+ if MEM_USAGE_START is None:
348
+ memory_delta = 0.0
349
+ MEM_USAGE_START = mem_usage_GB
350
+ else:
351
+ memory_delta = mem_usage_GB - MEM_USAGE_START
352
+
353
+ logger.info(
354
+ f"{color_prefix}Loop #{self.loop_nb}: {self.__class__.__name__}: (PID: {self._pid}): Memory: {mem_usage_GB:.2f}GB used, increased by {memory_delta:.2f}GB; {available_mem_GB:.2f}GB available. {mem_message}{color_suffix}"
355
+ )
356
+
357
+ def get_memory_info(self):
358
+ # Return memory info in GBs
359
+ return {
360
+ "used": self._process.memory_info().rss / 1e9,
361
+ "total": psutil.virtual_memory().total / 1e9,
362
+ "available": psutil.virtual_memory().available / 1e9,
363
+ }
364
+
365
+ def _save_in_gallery(self): ...
366
+
367
+ def _log_benchmark(self, bench):
368
+ self.log_info(
369
+ f"Benchmark. Total ({bench.nb_frames}). {bench.benchmark_name}: {bench.bench_total_s:.2f} s. Per frame: {bench.bench_per_frame_ms:.2f} ms"
370
+ )
371
+
372
+ def _set_input_parameters(self):
373
+ # self.outputs.last_index_read = self.last_index_read
374
+ # self.outputs.loop_nb = self.loop_nb
375
+
376
+ if self.scan_memory_url:
377
+ if self.scan_memory_url.startswith("esrf:scan"):
378
+ # We trust this is an online processing
379
+ if self.detector_name is None:
380
+ raise ValueError("Online processing requires a detector_name")
381
+ if not self.beacon_host:
382
+ raise ValueError("Online processing requires a beacon_host")
383
+ self.scan = load_scan(
384
+ scan_memory_url=self.scan_memory_url, beacon_host=self.beacon_host
385
+ )
386
+ self.scan_info = self.scan.info
387
+
388
+ elif Path(self.scan_memory_url).is_file():
389
+ # We trust this is a fake online processing, recreating streams from a file
390
+ if self.detector_name is None:
391
+ raise ValueError("Fake online processing requires a detector_name")
392
+ with h5py.File(self.scan_memory_url, "r") as f:
393
+ self.scan_info = json.loads(f["scan_info"][()].decode())
394
+
395
+ self.filename_data = self.filename_data or self.scan_info["filename"]
396
+ if self.filename_lima is None:
397
+ lima_index_number_format = self.get_input_value(
398
+ "lima_index_number_format", LIMA_INDEX_NUMBER_FORMAT_ID02
399
+ )
400
+ self.filename_lima = f"{self.scan_info['images_path'].format(img_acq_device=self.detector_name)}{lima_index_number_format % 0}.h5"
401
+
402
+ self.scan_nb = self.scan_info["scan_nb"]
403
+ self.links["source"].update(
404
+ {
405
+ "datasignal": f"{self.filename_data}::{self.scan_nb}.{self.subscan}/instrument/{self.detector_name}/data",
406
+ "datavariance": None,
407
+ "datasigma": None,
408
+ "metadata_counters": f"{self.filename_data}::/{self.scan_nb}.1/measurement",
409
+ "metadata_counters_subscan2": f"{self.filename_data}::/{self.scan_nb}.2/measurement",
410
+ "metadata_detector": f"{self.filename_lima}::entry_0000/{MAP_DETECTORS_LIMA.get(self.detector_name, DETECTOR_LIMA_DEFAULT)}/{self.detector_name}",
411
+ "metadata_headers": f"{self.filename_lima}::entry_0000/{MAP_DETECTORS_LIMA.get(self.detector_name, DETECTOR_LIMA_DEFAULT)}/{self.detector_name}/header",
412
+ "metadata_titleextension": None,
413
+ "ewoks": None,
414
+ }
415
+ )
416
+
417
+ elif self.filename_data:
418
+ # This is offline processing, but there are two types
419
+ self.scan_nb = self.get_input_value("scan_nb", None)
420
+ self.subscan = self.get_input_value("subscan", 1)
421
+ self.scan = None
422
+ self.scan_memory_url = None
423
+ self.beacon_host = None
424
+ with open_item_silx(self.filename_data, "/", "r") as file_input:
425
+ if "entry_0000/PyFAI" in file_input:
426
+ # We trust this is an offline processing from an already processed file
427
+ self.log_info(
428
+ "Looks like an offline processing from PROCESSED data"
429
+ )
430
+ self.detector_name = file_input["entry_0000/detector_name"][
431
+ ()
432
+ ].decode()
433
+ self.filename_lima = None
434
+ self.links["source"].update(
435
+ {
436
+ "datasignal": f"{self.filename_data}::/entry_0000/PyFAI/result_{self.processing_type}/data",
437
+ "datavariance": f"{self.filename_data}::/entry_0000/PyFAI/result_{self.processing_type}/data_variance",
438
+ "datasigma": f"{self.filename_data}::/entry_0000/PyFAI/result_{self.processing_type}/data_errors",
439
+ "metadata_counters": f"{self.filename_data}::/entry_0000/PyFAI/MCS/raw/subscan_1",
440
+ "metadata_counters_subscan2": f"{self.filename_data}::/entry_0000/PyFAI/MCS/raw/subscan_2",
441
+ "metadata_detector": f"{self.filename_data}::/entry_0000/PyFAI/{self.detector_name}",
442
+ "metadata_headers": f"{self.filename_data}::/entry_0000/PyFAI/parameters",
443
+ "metadata_titleextension": f"{self.filename_data}::/entry_0000/PyFAI/parameters/TitleExtension",
444
+ "ewoks": f"{self.filename_data}::/entry_0000/ewoks",
445
+ }
446
+ )
447
+ elif (
448
+ self.scan_nb is not None
449
+ and f"{self.scan_nb}.{self.subscan}" in file_input
450
+ ):
451
+ # We trust this is an offline processing from a RAW_DATA file
452
+ if self.detector_name is None:
453
+ raise ValueError(
454
+ "Offline processing from RAW_DATA requires a detector_name"
455
+ )
456
+
457
+ collection_name = file_input[
458
+ f"{self.scan_nb}.{self.subscan}/sample/name"
459
+ ][()].decode()
460
+ self.filename_lima = (
461
+ Path(self.filename_data).parent
462
+ / f"{collection_name}_{self.detector_name}_{self.scan_nb:05}_00.h5"
463
+ )
464
+ if not self.filename_lima.exists():
465
+ raise FileNotFoundError(
466
+ f"Lima file could not be found: {self.filename_lima}"
467
+ )
468
+ self.filename_lima = str(self.filename_lima)
469
+ self.links["source"].update(
470
+ {
471
+ "datasignal": f"{self.filename_data}::/{self.scan_nb}.{self.subscan}/instrument/{self.detector_name}/data",
472
+ "datavariance": None,
473
+ "datasigma": None,
474
+ "metadata_counters": f"{self.filename_data}::/{self.scan_nb}.1/measurement",
475
+ "metadata_counters_subscan2": f"{self.filename_data}::/{self.scan_nb}.2/measurement",
476
+ "metadata_detector": f"{self.filename_lima}::/entry_0000/{MAP_DETECTORS_LIMA.get(self.detector_name, DETECTOR_LIMA_DEFAULT)}/{self.detector_name}",
477
+ "metadata_headers": f"{self.filename_lima}::/entry_0000/{MAP_DETECTORS_LIMA.get(self.detector_name, DETECTOR_LIMA_DEFAULT)}/{self.detector_name}/header",
478
+ "metadata_titleextension": None,
479
+ "ewoks": None,
480
+ }
481
+ )
482
+ elif self.scan_nb is None:
483
+ raise ValueError(
484
+ "Offline processing requires a scan number: scan_nb"
485
+ )
486
+ else:
487
+ raise ValueError(
488
+ f"The format in {self.filename_data} is not valid for offline processing."
489
+ )
490
+
491
+ def _load_headers(self) -> None:
492
+ if self.headers is not None:
493
+ return
494
+
495
+ self.headers = {}
496
+ file_headers, h5path_headers = self.links["source"]["metadata_headers"].split(
497
+ "::"
498
+ )
499
+ with open_item_silx(file_headers, h5path_headers, "r") as headers_group:
500
+ for key_header in headers_group:
501
+ value_header = headers_group[key_header][()]
502
+ if isinstance(value_header, bytes):
503
+ value_header = value_header.decode()
504
+ self.headers[key_header] = str(value_header)
505
+
506
+ def _load_streams(self) -> None:
507
+ self.streams_subscan1 = []
508
+ self.streams_subscan2 = []
509
+
510
+ if self.scan:
511
+ acquisition_chains = self.scan.info.get("acquisition_chain")
512
+ if does_scan_contain_subscan2(scan=self.scan):
513
+ acquisition_chain_subscan1 = acquisition_chains.get("mcs")
514
+ acquisition_chain_subscan2 = acquisition_chains.get("sampling_timer")
515
+
516
+ # Subscan1
517
+ if acquisition_chain_subscan1:
518
+ stream_names_subscan1 = [
519
+ s
520
+ for s in _get_stream_names_from_acquisition_chain(
521
+ acq_chain=acquisition_chain_subscan1, include_images=False
522
+ )
523
+ if s in self.scan.streams
524
+ ]
525
+ else:
526
+ stream_names_subscan1 = []
527
+
528
+ stream_names_subscan1_raw = [
529
+ refactor_stream_name_raw(stream_name=s, cut_name=True)
530
+ for s in stream_names_subscan1
531
+ ]
532
+ stream_names_subscan1_interpreted = [
533
+ refactor_stream_name_interpreted(stream_name=s)
534
+ for s in stream_names_subscan1
535
+ ]
536
+
537
+ for stream_name, stream_name_raw_, stream_name_interpreted in zip(
538
+ stream_names_subscan1,
539
+ stream_names_subscan1_raw,
540
+ stream_names_subscan1_interpreted,
541
+ ):
542
+ if stream_names_subscan1_raw.count(stream_name_raw_) > 1:
543
+ stream_name_raw = refactor_stream_name_raw(
544
+ stream_name=stream_name,
545
+ detector_name=self.detector_name,
546
+ cut_name=False,
547
+ )
548
+ else:
549
+ stream_name_raw = stream_name_raw_
550
+
551
+ stream_subscan1_info = {
552
+ "stream": self.scan.streams[stream_name],
553
+ "name": stream_name,
554
+ "name_raw": stream_name_raw,
555
+ "name_interpreted": stream_name_interpreted,
556
+ }
557
+ self.streams_subscan1.append(stream_subscan1_info)
558
+
559
+ # Subscan2
560
+ if acquisition_chain_subscan2:
561
+ stream_names_subscan2 = [
562
+ s
563
+ for s in _get_stream_names_from_acquisition_chain(
564
+ acq_chain=acquisition_chain_subscan2, include_images=False
565
+ )
566
+ if s in self.scan.streams
567
+ ]
568
+ else:
569
+ stream_names_subscan2 = []
570
+
571
+ stream_names_subscan2_raw = [
572
+ refactor_stream_name_raw(stream_name=s, cut_name=True)
573
+ for s in stream_names_subscan2
574
+ ]
575
+ stream_names_subscan2_interpreted = [
576
+ refactor_stream_name_interpreted(stream_name=s)
577
+ for s in stream_names_subscan2
578
+ ]
579
+
580
+ for stream_name, stream_name_raw_, stream_name_interpreted in zip(
581
+ stream_names_subscan2,
582
+ stream_names_subscan2_raw,
583
+ stream_names_subscan2_interpreted,
584
+ ):
585
+ if stream_names_subscan2_raw.count(stream_name_raw_) > 1:
586
+ stream_name_raw = refactor_stream_name_raw(
587
+ stream_name=stream_name, cut_name=False
588
+ )
589
+ else:
590
+ stream_name_raw = stream_name_raw_
591
+
592
+ stream_subscan2_info = {
593
+ "stream": self.scan.streams[stream_name],
594
+ "name": stream_name,
595
+ "name_raw": stream_name_raw,
596
+ "name_interpreted": stream_name_interpreted,
597
+ }
598
+ self.streams_subscan2.append(stream_subscan2_info)
599
+ else:
600
+ # One subscan1, no subscan2
601
+ acquisition_chain_subscan1 = next(iter(acquisition_chains.values()))
602
+ acquisition_chain_subscan2 = None
603
+
604
+ stream_names_subscan1 = [
605
+ s
606
+ for s in _get_stream_names_from_acquisition_chain(
607
+ acq_chain=acquisition_chain_subscan1, include_images=False
608
+ )
609
+ if s in self.scan.streams
610
+ ]
611
+ stream_names_subscan1_raw = [
612
+ refactor_stream_name_raw(stream_name=s, cut_name=True)
613
+ for s in stream_names_subscan1
614
+ ]
615
+ stream_names_subscan1_interpreted = [
616
+ refactor_stream_name_interpreted(stream_name=s)
617
+ for s in stream_names_subscan1
618
+ ]
619
+
620
+ for stream_name, stream_name_raw_, stream_name_interpreted in zip(
621
+ stream_names_subscan1,
622
+ stream_names_subscan1_raw,
623
+ stream_names_subscan1_interpreted,
624
+ ):
625
+ if stream_names_subscan1_raw.count(stream_name_raw_) > 1:
626
+ stream_name_raw = refactor_stream_name_raw(
627
+ stream_name=stream_name,
628
+ cut_name=False,
629
+ )
630
+ else:
631
+ stream_name_raw = stream_name_raw_
632
+
633
+ stream_subscan1_info = {
634
+ "stream": self.scan.streams[stream_name],
635
+ "name": stream_name,
636
+ "name_raw": stream_name_raw,
637
+ "name_interpreted": stream_name_interpreted,
638
+ }
639
+ self.streams_subscan1.append(stream_subscan1_info)
640
+
641
+ elif self.scan_memory_url and Path(self.scan_memory_url).is_file():
642
+ ...
643
+ # with h5py.File(self.scan_memory_url, "r") as f:
644
+ # self.scan_info = json.loads(f["scan_info"][()].decode())
645
+ # nb_points = self.scan_info["npoints"]
646
+ # for stream_name in f["streams"]:
647
+ # stream = f["streams"][stream_name]
648
+ # if len(stream) == nb_points:
649
+ # self.streams[stream_name] = stream[:]
650
+ # else:
651
+ # self.streams_subscan2[stream_name] = stream[:]
652
+ else:
653
+ # The available counters are either in the RAW master file or in the HS32 arrays in the PROCESSED file
654
+ link_counters = self.links["source"]["metadata_counters"]
655
+ if link_counters is not None:
656
+ filename, h5path_subscan1 = link_counters.split("::")
657
+ _, h5path_subscan2 = self.links["source"][
658
+ "metadata_counters_subscan2"
659
+ ].split("::")
660
+ params = {
661
+ "filename": filename,
662
+ "name": "/",
663
+ "retry_timeout": 0.1,
664
+ }
665
+
666
+ with open_item_silx(**params) as root:
667
+ if h5path_subscan1 in root:
668
+ group_subscan1 = root[h5path_subscan1]
669
+ for dset_name in group_subscan1:
670
+ if group_subscan1[dset_name].ndim == 3:
671
+ # Skip the detector
672
+ continue
673
+
674
+ stream_info = {
675
+ "stream": group_subscan1[dset_name][:],
676
+ "name": dset_name,
677
+ "name_raw": dset_name,
678
+ "name_interpreted": dset_name,
679
+ }
680
+ self.streams_subscan1.append(stream_info)
681
+
682
+ if h5path_subscan2 in root:
683
+ group_subscan2 = root[h5path_subscan2]
684
+ for dset_name in group_subscan2:
685
+ stream_info = {
686
+ "stream": group_subscan2[dset_name][:],
687
+ "name": dset_name,
688
+ "name_raw": dset_name,
689
+ "name_interpreted": dset_name,
690
+ }
691
+ self.streams_subscan2.append(stream_info)
692
+
693
+ def _get_new_datasets(self) -> dict:
694
+ filename_data, h5path_datasignal = self.links["source"]["datasignal"].split(
695
+ "::"
696
+ )
697
+
698
+ out = {
699
+ "dataset_signal": numpy.array([]),
700
+ "dataset_variance": numpy.array([]),
701
+ "dataset_sigma": numpy.array([]),
702
+ "index_range": None,
703
+ }
704
+
705
+ if self.scan_memory_url and self.scan_memory_url.startswith("esrf:scan"):
706
+ # Reading online
707
+ # - dataset_signal comes from the blissdata stream
708
+ # - dataset_variance = empty
709
+ # - dataset_sigma = empty
710
+
711
+ if self.processing_type in PYFAI_PROCESSES:
712
+ stream_name = f"{self.detector_name}:image"
713
+ else:
714
+ stream_name = "mcs:epoch"
715
+
716
+ indew_range_new = _get_new_slice_limits(
717
+ stream_name=stream_name,
718
+ scan=self.scan,
719
+ scan_memory_url=self.scan_memory_url,
720
+ beacon_host=self.beacon_host,
721
+ index_range=self.index_range,
722
+ index_range_last=self.index_range_last,
723
+ max_slice_size=self.max_slice_size,
724
+ )
725
+ if indew_range_new is None:
726
+ return out
727
+
728
+ ptdata_signal = _slice_dataset_online(
729
+ stream_name=stream_name,
730
+ detector_name=self.detector_name,
731
+ scan=self.scan,
732
+ scan_memory_url=self.scan_memory_url,
733
+ beacon_host=self.beacon_host,
734
+ lima_url_template=self.get_input_value(
735
+ "lima_url_template", LIMA_URL_TEMPLATE_ID02
736
+ ),
737
+ lima_url_template_args=self.get_input_value(
738
+ "lima_url_template_args", {}
739
+ ),
740
+ subscan=self.subscan,
741
+ index_range=indew_range_new,
742
+ start_from_memory=True,
743
+ )
744
+
745
+ if ptdata_signal["dataset"] is not None:
746
+ out["dataset_signal"] = ptdata_signal["dataset"]
747
+ out["index_range"] = ptdata_signal["index_range"]
748
+
749
+ else:
750
+ # Reading offline
751
+ # - dataset_signal comes from the RAW_DATA or PROCESSED_DATA
752
+ # - dataset_variance = comes only from PROCESSED_DATA
753
+ # - dataset_sigma = comes only from PROCESSED_DATA
754
+
755
+ filename_data, h5path_datasignal = self.links["source"]["datasignal"].split(
756
+ "::"
757
+ )
758
+ indew_range_new = _get_new_slice_limits(
759
+ filename_data=self.filename_data,
760
+ h5path=h5path_datasignal,
761
+ index_range=self.index_range,
762
+ index_range_last=self.index_range_last,
763
+ max_slice_size=self.max_slice_size,
764
+ )
765
+ if indew_range_new is None:
766
+ return out
767
+
768
+ ptdata_signal = _slice_dataset_offline(
769
+ filename_data=filename_data,
770
+ h5path_to_data=h5path_datasignal,
771
+ index_range=indew_range_new,
772
+ )
773
+ if ptdata_signal["dataset"] is not None:
774
+ out["dataset_signal"] = ptdata_signal["dataset"]
775
+ out["index_range"] = ptdata_signal["index_range"]
776
+
777
+ if self.links["source"]["datavariance"] is not None:
778
+ filename_data, h5path_datavariance = self.links["source"][
779
+ "datavariance"
780
+ ].split("::")
781
+ ptdata_variance = _slice_dataset_offline(
782
+ filename_data=filename_data,
783
+ h5path_to_data=h5path_datavariance,
784
+ index_range=indew_range_new,
785
+ )
786
+ if ptdata_variance["dataset"] is not None:
787
+ out["dataset_variance"] = ptdata_variance["dataset"]
788
+
789
+ if self.links["source"]["datasigma"] is not None:
790
+ filename_data, h5path_datasigma = self.links["source"][
791
+ "datasigma"
792
+ ].split("::")
793
+ ptdata_sigma = _slice_dataset_offline(
794
+ filename_data=filename_data,
795
+ h5path_to_data=h5path_datasigma,
796
+ index_range=indew_range_new,
797
+ )
798
+ if ptdata_sigma["dataset"] is not None:
799
+ out["dataset_sigma"] = ptdata_sigma["dataset"]
800
+
801
+ nb_frames_read = len(out["dataset_signal"])
802
+ index_range_sliced = out["index_range"]
803
+ if nb_frames_read > 0:
804
+ self.log_info(f"""
805
+ \n\tIncoming ({nb_frames_read} frames) in the range {index_range_sliced[0]} -> {index_range_sliced[-1]},
806
+ """)
807
+ else:
808
+ self.log_info("""
809
+ \n\tNo more data to read. End of the workflow.
810
+ """)
811
+
812
+ return out
813
+
814
+ def _set_datasets(self) -> None:
815
+ reading_node = self.get_input_value("reading_node", False)
816
+ dataset_signal = self.get_input_value("dataset_signal", None)
817
+ dataset_variance = self.get_input_value("dataset_variance", None)
818
+ dataset_sigma = self.get_input_value("dataset_sigma", None)
819
+
820
+ self.outputs.continue_pipeline = True
821
+ if reading_node or dataset_signal is None:
822
+ # Get new incoming data
823
+ ptdata = self._get_new_datasets()
824
+ dataset_signal = ptdata["dataset_signal"]
825
+ dataset_variance = ptdata["dataset_variance"]
826
+ dataset_sigma = ptdata["dataset_sigma"]
827
+ index_range_new = ptdata["index_range"]
828
+
829
+ if len(dataset_signal) == 0:
830
+ self.outputs.continue_pipeline = False
831
+ self.outputs.loop_nb = self.loop_nb
832
+ self.outputs.index_range_last = self.index_range_last
833
+ return
834
+ self.loop_nb += 1
835
+
836
+ # We define here the actual index limits because we read the data from streams
837
+ self.index_range_last = index_range_new
838
+
839
+ if (
840
+ dataset_signal is not None
841
+ and len(dataset_signal) > 0
842
+ and self.index_range_last is None
843
+ ):
844
+ # Only possible if the data was sent as inputs without any index_range_last
845
+ self.index_range_last = [0, len(dataset_signal)]
846
+
847
+ self.outputs.loop_nb = self.loop_nb
848
+ self.outputs.index_range_last = self.index_range_last
849
+ self.outputs.dataset_signal = dataset_signal
850
+ self.outputs.dataset_variance = dataset_variance
851
+ self.outputs.dataset_sigma = dataset_sigma
852
+ self.dataset_signal = dataset_signal
853
+ self.dataset_variance = dataset_variance
854
+ self.dataset_sigma = dataset_sigma
855
+
856
+ def _new_loop(self):
857
+ self.loop_nb += 1
858
+
859
+ memory_used = self.get_memory_info()["used"]
860
+ global MEM_USAGE_START
861
+ if self.loop_nb == 1:
862
+ memory_delta = 0.0
863
+ MEM_USAGE_START = memory_used
864
+ else:
865
+ memory_delta = memory_used - MEM_USAGE_START
866
+
867
+ if memory_delta == 0.0:
868
+ color_prefix = "\033[93m"
869
+ elif memory_delta < 0.0:
870
+ color_prefix = "\033[92m"
871
+ else:
872
+ color_prefix = "\033[91m"
873
+ color_suffix = "\033[0m"
874
+
875
+ logger.info(
876
+ f"{color_prefix}Loop #{self.loop_nb}: {self.__class__.__name__}: (PID: {self._pid}): \
877
+ Memory Delta: {memory_delta:.2f}GB{color_suffix}"
878
+ )
879
+
880
+ def process(self):
881
+ self.bench_process = None
882
+ self.bench_saving = None
883
+ self.processing_params = {}
884
+ if self.dataset_signal.size == 0:
885
+ self.log_warning(
886
+ f"Skipping processing {self.processing_type} due to empty array."
887
+ )
888
+ self.do_save = False
889
+ return False
890
+ return True
891
+
892
+ def _set_output_variables(self):
893
+ # Write some h5 paths
894
+ if not self.processing_filename:
895
+ return
896
+
897
+ if self.processing_type in PYFAI_PROCESSES:
898
+ nxprocess = "PyFAI"
899
+ elif self.processing_type in TRUSAXS_PROCESSES:
900
+ nxprocess = "TRUSAXS"
901
+ else:
902
+ nxprocess = "other"
903
+
904
+ self.links["destination"].update(
905
+ {
906
+ "nxdata": f"{self.processing_filename}::/entry_0000/{nxprocess}/result_{self.processing_type}",
907
+ "datasignal": f"{self.processing_filename}::/entry_0000/{nxprocess}/result_{self.processing_type}/data",
908
+ "datavariance": f"{self.processing_filename}::/entry_0000/{nxprocess}/result_{self.processing_type}/data_variance",
909
+ "datasigma": f"{self.processing_filename}::/entry_0000/{nxprocess}/result_{self.processing_type}/data_errors",
910
+ "metadata_detector": f"{self.processing_filename}::/entry_0000/{nxprocess}/{self.detector_name}",
911
+ "metadata_headers": f"{self.processing_filename}::/entry_0000/{nxprocess}/parameters",
912
+ "metadata_mcs": f"{self.processing_filename}::/entry_0000/{nxprocess}/MCS",
913
+ "metadata_tfg": f"{self.processing_filename}::/entry_0000/{nxprocess}/TFG",
914
+ "metadata_counters": None,
915
+ "ewoks": f"{self.processing_filename}::/entry_0000/ewoks",
916
+ }
917
+ )
918
+
919
+ def get_parameter(self, key: str, to_integer: bool = False, defaut=None):
920
+ value = self.get_input_value(key=key)
921
+ if value == missing_data.MISSING_DATA:
922
+ # Try to get it from header
923
+ value = self.get_from_headers(
924
+ key=key,
925
+ to_integer=to_integer,
926
+ default=defaut,
927
+ )
928
+ return value
929
+
930
+ def get_headers_filename(self, folder_key: str, file_key: str):
931
+ folder = self.get_from_headers(
932
+ key=folder_key,
933
+ )
934
+ file_name = self.get_from_headers(
935
+ key=file_key,
936
+ )
937
+ if folder is None or file_name is None:
938
+ return
939
+
940
+ filename = os.path.join(folder, file_name)
941
+ if not os.path.exists(filename):
942
+ self.log_warning(f"{filename} from headers could not be found.")
943
+ return
944
+ return filename
945
+
946
+ def get_mask_beamstop_filename(self):
947
+ return self.get_headers_filename(
948
+ folder_key=KEY_BEAMSTOP_MASK_FOLDER,
949
+ file_key=KEY_BEAMSTOP_MASK_FILE,
950
+ )
951
+
952
+ def get_mask_gaps_filename(self):
953
+ return self.get_headers_filename(
954
+ folder_key=KEY_DETECTOR_MASK_FOLDER,
955
+ file_key=KEY_DETECTOR_MASK_FILE,
956
+ )
957
+
958
+ def get_flat_filename(self):
959
+ return self.get_headers_filename(
960
+ folder_key=KEY_FLAT_FOLDER,
961
+ file_key=KEY_FLAT_FILE,
962
+ )
963
+
964
+ def get_dark_filename(self):
965
+ return self.get_headers_filename(
966
+ folder_key=KEY_DARK_FOLDER,
967
+ file_key=KEY_DARK_FILE,
968
+ )
969
+
970
+ def get_mask_window(self):
971
+ return self.get_headers_filename(
972
+ folder_key=KEY_WINDOW_FOLDER,
973
+ file_key=KEY_WINDOW_FILE,
974
+ )
975
+
976
+ def save(self):
977
+ self.log_debug("Saving processed data...")
978
+ self._set_output_variables()
979
+ with ExitStack() as stack:
980
+ if (
981
+ not self.processing_filename
982
+ or not self.do_save
983
+ or self.dataset_signal.size == 0
984
+ ):
985
+ return False
986
+
987
+ # Create the file and root groups
988
+ self._create_processing_file()
989
+
990
+ # Append data to the nexus data group
991
+ stack.enter_context(lock)
992
+ self.bench_saving = self.Benchmark(
993
+ nb_frames=len(self.dataset_signal), benchmark_name="saving"
994
+ )
995
+ stack.enter_context(self.bench_saving)
996
+ self._update_id02_data(
997
+ stack=stack,
998
+ )
999
+ if self.get_input_value("save_metadata", True):
1000
+ self._update_id02_metadata(stack)
1001
+
1002
+ self._log_benchmark(self.bench_saving)
1003
+ self._save_benchmark(self.bench_process)
1004
+ self._save_benchmark(self.bench_saving)
1005
+ return True
1006
+
1007
+ def _create_processing_file(self):
1008
+ if self.scan_memory_url:
1009
+ # With online processing, loop_nb=1 doesnt have to be the beginning of processing
1010
+ # Processing files are created if they dont exist, never overwrite, and only if there is self.dataset_signal
1011
+ if os.path.exists(self.processing_filename):
1012
+ return
1013
+
1014
+ if self.dataset_signal is None:
1015
+ return
1016
+ else:
1017
+ # With offline processing, it will create the file in the first loop, since there will always be dataset_signal
1018
+ # Comment for dispatching
1019
+ if self.loop_nb == 1 and os.path.exists(self.processing_filename):
1020
+ self.log_error(
1021
+ f"Offline processing, processing file {self.processing_filename} already exist. Data will not be saved. Choose another name!"
1022
+ )
1023
+ self.outputs.repeat = False
1024
+ return
1025
+
1026
+ if os.path.exists(self.processing_filename):
1027
+ return
1028
+
1029
+ if self.loop_nb > 1:
1030
+ return
1031
+
1032
+ # Create directories if needed
1033
+ os.makedirs(os.path.dirname(self.processing_filename), exist_ok=True)
1034
+
1035
+ self.log_info(f"Creating file: {self.processing_filename}")
1036
+ with ExitStack() as stack:
1037
+ root_group = stack.enter_context(
1038
+ open_item_silx(filename=self.processing_filename, name="/", mode="w")
1039
+ )
1040
+ root_group = self.write_root_group(root_group=root_group)
1041
+
1042
+ if self.processing_type in PYFAI_PROCESSES:
1043
+ title = self.links["destination"]["nxdata"]
1044
+ elif self.processing_type in TRUSAXS_PROCESSES:
1045
+ title = "TFG metadata collection"
1046
+ else:
1047
+ title = ""
1048
+
1049
+ # Entry group
1050
+ entry_group = self.create_h5_group(
1051
+ h5_parent_group=root_group,
1052
+ h5_group_name="entry_0000",
1053
+ title=title,
1054
+ NX_class="NXentry",
1055
+ )
1056
+ if self.processing_type in PYFAI_PROCESSES:
1057
+ entry_group.attrs["default"] = self.links["destination"][
1058
+ "nxdata"
1059
+ ].split("::")[-1]
1060
+
1061
+ entry_group["start_time"] = str(get_isotime())
1062
+ if self.processing_type in PYFAI_PROCESSES:
1063
+ entry_group["detector_name"] = self.detector_name
1064
+
1065
+ # Configuration group
1066
+ configuration_group = self.create_h5_group(
1067
+ h5_parent_group=entry_group,
1068
+ h5_group_name="configuration",
1069
+ NX_class="NXnote",
1070
+ )
1071
+ configuration_group["type"] = "text/json"
1072
+ configuration_group["data"] = json.dumps(
1073
+ self.get_headers(), indent=2, separators=(",\r\n", ": ")
1074
+ )
1075
+
1076
+ # PyFAI Nexus group
1077
+ if self.processing_type in PYFAI_PROCESSES:
1078
+ process_group = self.create_h5_group(
1079
+ h5_parent_group=entry_group,
1080
+ h5_group_name="PyFAI",
1081
+ NX_class="NXprocess",
1082
+ default=self.links["destination"]["nxdata"].split("::")[-1],
1083
+ )
1084
+ process_group["date"] = str(get_isotime())
1085
+ process_group["processing_type"] = self.processing_type
1086
+ process_group["program"] = "pyFAI"
1087
+ process_group["version"] = pyFAIVersion
1088
+ elif self.processing_type in TRUSAXS_PROCESSES:
1089
+ process_group = self.create_h5_group(
1090
+ h5_parent_group=entry_group,
1091
+ h5_group_name="TRUSAXS",
1092
+ NX_class="NXinstrument",
1093
+ )
1094
+ process_group["date"] = str(get_isotime())
1095
+ process_group["processing_type"] = self.processing_type
1096
+ process_group["program"] = "TruSAXS"
1097
+ else:
1098
+ return
1099
+
1100
+ # MCS group
1101
+ metadata_mcs_output = self.create_h5_group(
1102
+ h5_parent_group=process_group,
1103
+ h5_group_name="MCS",
1104
+ NX_class="NXcollection",
1105
+ )
1106
+ metadata_mcs_output["device"] = "bliss"
1107
+
1108
+ # HS32 N(name), Z(zero), F(factor) arrays
1109
+ nb_pins = self.get_HS32_number_pins()
1110
+ HS32N_array = numpy.array(
1111
+ [
1112
+ self.get_pin_name_from_index(index_pin=index_pin)
1113
+ for index_pin in range(nb_pins)
1114
+ ],
1115
+ dtype=h5py.string_dtype(encoding="utf-8"),
1116
+ )
1117
+ HS32Z_array = numpy.array(
1118
+ [
1119
+ self.get_pin_zerovalue_from_index(index_pin=index_pin)
1120
+ for index_pin in range(nb_pins)
1121
+ ]
1122
+ )
1123
+ HS32F_array = numpy.array(
1124
+ [
1125
+ self.get_pin_factorvalue_from_index(index_pin=index_pin)
1126
+ for index_pin in range(nb_pins)
1127
+ ]
1128
+ )
1129
+ metadata_mcs_output.create_dataset(
1130
+ name="HS32N",
1131
+ data=HS32N_array,
1132
+ dtype=h5py.string_dtype(encoding="utf-8"),
1133
+ )
1134
+
1135
+ metadata_mcs_output.create_dataset(
1136
+ name="HS32Z",
1137
+ data=HS32Z_array,
1138
+ dtype="float64",
1139
+ )
1140
+ metadata_mcs_output.create_dataset(
1141
+ name="HS32F",
1142
+ data=HS32F_array,
1143
+ dtype="float64",
1144
+ )
1145
+
1146
+ # HSI0Factor, HSI1Factor
1147
+ for key in ["HSI0Factor", "HSI1Factor"]:
1148
+ value = self.get_from_headers(
1149
+ key=key,
1150
+ )
1151
+ if value is not None:
1152
+ metadata_mcs_output.create_dataset(
1153
+ name=key, data=value, dtype="float64"
1154
+ )
1155
+
1156
+ # HSI0, HSI1, HSTime
1157
+ for key in ["HSI0", "HSI1", "HSTime"]:
1158
+ pin_name = self.get_from_headers(key=key)
1159
+ for index_pin in range(nb_pins):
1160
+ if self.get_pin_name_from_index(index_pin=index_pin) == pin_name:
1161
+ metadata_mcs_output.create_dataset(
1162
+ name=key, data=index_pin + 1, dtype="int64"
1163
+ )
1164
+ break
1165
+
1166
+ # ShutterTime
1167
+ for key in ["ShutterOpeningTime", "ShutterClosingTime"]:
1168
+ value = self.get_from_headers(key=key)
1169
+ if value is None:
1170
+ value = 0.0
1171
+ metadata_mcs_output.create_dataset(
1172
+ name=key, data=value, dtype="float64"
1173
+ )
1174
+
1175
+ # Parameters (header) group
1176
+ metadata_headers_output = self.create_h5_group(
1177
+ h5_parent_group=process_group,
1178
+ h5_group_name="parameters",
1179
+ NX_class="NXcollection",
1180
+ )
1181
+ headers = self.get_headers()
1182
+ for key, value in headers.items():
1183
+ if key in KEYS_FLOAT:
1184
+ metadata_headers_output.create_dataset(
1185
+ name=key, data=value, dtype="float64"
1186
+ )
1187
+ elif key in KEYS_INT:
1188
+ metadata_headers_output.create_dataset(
1189
+ name=key, data=value, dtype="int64"
1190
+ )
1191
+ elif key == "TitleExtension":
1192
+ continue
1193
+ else:
1194
+ value = str(value)
1195
+ metadata_headers_output.create_dataset(
1196
+ name=key,
1197
+ data=value,
1198
+ dtype=h5py.string_dtype(encoding="utf-8"),
1199
+ )
1200
+
1201
+ # TFG group
1202
+ metadata_tfg_output = self.create_h5_group(
1203
+ h5_parent_group=process_group,
1204
+ h5_group_name="TFG",
1205
+ NX_class="NXcollection",
1206
+ )
1207
+ metadata_tfg_output["device"] = "bliss"
1208
+
1209
+ for key in ["HMStartEpoch", "HMStartTime"]:
1210
+ if key in headers:
1211
+ value = str(headers[key])
1212
+ metadata_tfg_output.create_dataset(
1213
+ name=key, data=value, dtype=h5py.string_dtype(encoding="utf-8")
1214
+ )
1215
+ else:
1216
+ self.log_warning(f"Key {key} not found in headers")
1217
+
1218
+ # Type some static information
1219
+ # info_list = [{**INFO_COMMON}]
1220
+ # info_list += self.processing_info()
1221
+ # for info_item in info_list:
1222
+ # h5group = info_item.get("h5path")
1223
+ # if h5group in root_group:
1224
+ # root_group[h5group][info_item["name"]] = str(info_item["value"])
1225
+
1226
+ if self.processing_type in PYFAI_PROCESSES:
1227
+ self._link_ewoks_history(
1228
+ root_group_destination=root_group,
1229
+ stack=stack,
1230
+ )
1231
+
1232
+ def get_HS32_number_pins(self):
1233
+ """
1234
+ Get the number of HS32 pins, which will determine the shape of metadata arrays
1235
+ """
1236
+ return self.get_from_headers(key="HS32Len", to_integer=True)
1237
+
1238
+ def get_pin_name_from_index(self, index_pin: int):
1239
+ """
1240
+ Retrieve the name of the pin that is going to be saved in the
1241
+ array HS32V at position [:,index]
1242
+ No need to use scalers_keys dictionary
1243
+ index goes from 0 -> (self.get_hs32_number_pins() - 1)
1244
+ """
1245
+ return self.get_from_headers(key=f"HS32N{(index_pin + 1):02}")
1246
+
1247
+ def get_pin_zerovalue_from_index(self, index_pin: int):
1248
+ return self.get_from_headers(key=f"HS32Z{(index_pin + 1):02}")
1249
+
1250
+ def get_pin_factorvalue_from_index(self, index_pin: int):
1251
+ return self.get_from_headers(key=f"HS32F{(index_pin + 1):02}")
1252
+
1253
+ def _update_id02_data(
1254
+ self,
1255
+ stack: ExitStack,
1256
+ ):
1257
+ file = stack.enter_context(
1258
+ open_item_silx(filename=self.processing_filename, name="/", mode="a")
1259
+ )
1260
+ # Three datasets: data, data_variance, data_errors
1261
+ _, path_nxdata_output = self.links["destination"]["nxdata"].split("::")
1262
+ _, path_data_output = self.links["destination"]["datasignal"].split("::")
1263
+ if path_nxdata_output not in file:
1264
+ nexus_data_grp = self.create_h5_group(
1265
+ h5_parent_group=file,
1266
+ h5_group_name=path_nxdata_output,
1267
+ NX_class="NXdata",
1268
+ default=path_data_output,
1269
+ signal="data",
1270
+ )
1271
+ else:
1272
+ nexus_data_grp = file[path_nxdata_output]
1273
+ self._update_dataset(
1274
+ added_dataset=self.outputs.dataset_signal,
1275
+ h5_group=nexus_data_grp,
1276
+ h5_dataset_name="data",
1277
+ index_read=self.index_range_last,
1278
+ )
1279
+ if self.get_input_value("save_variance", False):
1280
+ self._update_dataset(
1281
+ added_dataset=self.outputs.dataset_variance,
1282
+ h5_group=nexus_data_grp,
1283
+ h5_dataset_name="data_variance",
1284
+ index_read=self.index_range_last,
1285
+ )
1286
+ if self.get_input_value("save_sigma", True):
1287
+ self._update_dataset(
1288
+ added_dataset=self.outputs.dataset_sigma,
1289
+ h5_group=nexus_data_grp,
1290
+ h5_dataset_name="data_errors",
1291
+ index_read=self.index_range_last,
1292
+ )
1293
+
1294
+ def _update_id02_metadata(self, stack: ExitStack):
1295
+ # Update MCS group
1296
+ if self.processing_type not in ALL_PROCESSES:
1297
+ self.log_error(f"Processing type {self.processing_type} not valid")
1298
+ return
1299
+
1300
+ root_group_destination = stack.enter_context(
1301
+ open_item_silx(filename=self.processing_filename, name="/", mode="a")
1302
+ )
1303
+
1304
+ _, h5path_mcs_output = self.links["destination"]["metadata_mcs"].split("::")
1305
+ _, h5path_tfg_output = self.links["destination"]["metadata_tfg"].split("::")
1306
+ _, h5path_headers_output = self.links["destination"]["metadata_headers"].split(
1307
+ "::"
1308
+ )
1309
+ _, h5path_detector = self.links["destination"]["metadata_detector"].split("::")
1310
+ _, h5path_nxdata_output = self.links["destination"]["nxdata"].split("::")
1311
+ _, h5path_ewoks = self.links["destination"]["ewoks"].split("::")
1312
+
1313
+ metadata_mcs_output = root_group_destination.require_group(h5path_mcs_output)
1314
+ metadata_tfg_output = root_group_destination.require_group(h5path_tfg_output)
1315
+ metadata_headers_output = root_group_destination.require_group(
1316
+ h5path_headers_output
1317
+ )
1318
+ nexus_data_grp_destination = None
1319
+ if self.processing_type in PYFAI_PROCESSES:
1320
+ nexus_data_grp_destination = root_group_destination.require_group(
1321
+ h5path_nxdata_output
1322
+ )
1323
+
1324
+ self._update_subscan1(metadata_mcs_output)
1325
+ self._update_subscan2(metadata_mcs_output)
1326
+ # self._copy_streams(metadata_mcs_output)
1327
+ self._update_HS32C(metadata_mcs_output)
1328
+ self._update_HS32V(metadata_mcs_output)
1329
+ self._update_exposuretime(metadata_mcs_output)
1330
+ self._update_deltatime(metadata_tfg_output, nexus_data_grp_destination)
1331
+ self._update_intensities(metadata_mcs_output)
1332
+
1333
+ # Save the processing params (only once except for normalization values)
1334
+ if self.processing_type in PYFAI_PROCESSES:
1335
+ self._save_processing_params(root_group=root_group_destination)
1336
+
1337
+ # Update TitleExtension
1338
+ self.update_title_extension(
1339
+ parameters_group_destination=metadata_headers_output,
1340
+ )
1341
+
1342
+ # Update NexusDetector with metadata from the RAW_DATA file (has to be accesible). To be done only once
1343
+ if (
1344
+ self.processing_type in PYFAI_PROCESSES
1345
+ and h5path_detector not in root_group_destination
1346
+ ):
1347
+ filename_input, h5path_metadata_detector_input = self.links["source"][
1348
+ "metadata_detector"
1349
+ ].split("::")
1350
+
1351
+ if not os.path.exists(filename_input):
1352
+ return
1353
+
1354
+ try:
1355
+ file_input = stack.enter_context(h5py.File(filename_input, "r"))
1356
+ except Exception:
1357
+ return
1358
+
1359
+ if h5path_metadata_detector_input in file_input:
1360
+ metadata_detector_input = file_input[h5path_metadata_detector_input]
1361
+ if (
1362
+ metadata_detector_input is not None
1363
+ and len(metadata_detector_input) > 0
1364
+ ):
1365
+ metadata_detector_output = self.create_h5_group(
1366
+ h5_parent_group=root_group_destination,
1367
+ h5_group_name=h5path_detector,
1368
+ NX_class="NXdetector",
1369
+ )
1370
+
1371
+ self.update_nexus_detector_group(
1372
+ nxdetector_group_destination=metadata_detector_output,
1373
+ nxdetector_group_source=metadata_detector_input,
1374
+ )
1375
+
1376
+ def _update_HS32C(self, metadata_mcs_output: h5py.Group):
1377
+ HS32C_array = self.get_HS32C_array()
1378
+ self._update_dataset(
1379
+ added_dataset=HS32C_array,
1380
+ h5_group=metadata_mcs_output,
1381
+ h5_dataset_name="HS32C",
1382
+ index_read=self.index_range_last,
1383
+ )
1384
+
1385
+ def _update_HS32V(self, metadata_mcs_output: h5py.Group):
1386
+ HS32V_array = self.get_HS32V_array()
1387
+ self._update_dataset(
1388
+ added_dataset=HS32V_array,
1389
+ h5_group=metadata_mcs_output,
1390
+ h5_dataset_name="HS32V",
1391
+ index_read=self.index_range_last,
1392
+ )
1393
+
1394
+ def _update_exposuretime(self, metadata_mcs_output: h5py.Group):
1395
+ exposuretime_values, slice_init, slice_end = self._get_exposuretime_values()
1396
+ self._update_dataset(
1397
+ added_dataset=exposuretime_values,
1398
+ h5_group=metadata_mcs_output,
1399
+ h5_dataset_name="ExposureTime",
1400
+ index_read=[slice_init, slice_end],
1401
+ )
1402
+
1403
+ def _update_deltatime(
1404
+ self, metadata_tfg_output: h5py.Group, nexus_data_grp_destination: h5py.Group
1405
+ ):
1406
+ stream_deltatime = self.get_stream_deltatime()
1407
+ deltatime, slice_init, slice_end = self._read_from_stream(
1408
+ stream_object=stream_deltatime.get("stream"),
1409
+ slice_init=self.index_range_last[0],
1410
+ slice_end=self.index_range_last[-1],
1411
+ )
1412
+
1413
+ self._update_dataset(
1414
+ added_dataset=deltatime,
1415
+ h5_group=metadata_tfg_output,
1416
+ h5_dataset_name="delta_time",
1417
+ index_read=[slice_init, slice_end],
1418
+ )
1419
+
1420
+ if self.processing_type in PYFAI_PROCESSES:
1421
+ self._update_dataset(
1422
+ added_dataset=deltatime,
1423
+ h5_group=nexus_data_grp_destination,
1424
+ h5_dataset_name="t",
1425
+ unit="s",
1426
+ index_read=[slice_init, slice_end],
1427
+ )
1428
+
1429
+ def _update_intensities(self, metadata_mcs_output: h5py.Group):
1430
+ exposuretime_values, slice_init, slice_end = self._get_exposuretime_values()
1431
+ stream_intensity0 = self.get_stream_monitor_0()
1432
+
1433
+ intensity0_values, monitor0_slice_init, monitor0_slice_end = (
1434
+ self._read_from_stream(
1435
+ stream_object=stream_intensity0.get("stream"),
1436
+ slice_init=slice_init,
1437
+ slice_end=slice_end,
1438
+ )
1439
+ )
1440
+
1441
+ intensity0_factor = self.get_from_headers(key=HEADERS_KEY_MONITOR_0_FACTOR)
1442
+ intensity0uncor = intensity0_values * intensity0_factor
1443
+ sot = self.get_from_headers(key=HEADERS_KEY_SOT, default=0.0)
1444
+ sct = self.get_from_headers(key=HEADERS_KEY_SCT, default=0.0)
1445
+ intensity0shutcor = (
1446
+ intensity0uncor
1447
+ * (exposuretime_values - sot + sct)
1448
+ / (exposuretime_values - sot)
1449
+ )
1450
+
1451
+ self._update_dataset(
1452
+ added_dataset=intensity0shutcor,
1453
+ h5_group=metadata_mcs_output,
1454
+ h5_dataset_name="Intensity0ShutCor",
1455
+ index_read=[monitor0_slice_init, monitor0_slice_end],
1456
+ )
1457
+ self._update_dataset(
1458
+ added_dataset=intensity0uncor,
1459
+ h5_group=metadata_mcs_output,
1460
+ h5_dataset_name="Intensity0UnCor",
1461
+ index_read=[monitor0_slice_init, monitor0_slice_end],
1462
+ )
1463
+
1464
+ stream_intensity1 = self.get_stream_monitor_1()
1465
+ intensity1_values, monitor1_slice_init, monitor1_slice_end = (
1466
+ self._read_from_stream(
1467
+ stream_object=stream_intensity1.get("stream"),
1468
+ slice_init=slice_init,
1469
+ slice_end=slice_end,
1470
+ )
1471
+ )
1472
+ intensity1_factor = self.get_from_headers(key=HEADERS_KEY_MONITOR_1_FACTOR)
1473
+ intensity1uncor = intensity1_values * intensity1_factor
1474
+ intensity1shutcor = (
1475
+ intensity1uncor
1476
+ * (exposuretime_values - sot + sct)
1477
+ / (exposuretime_values - sot)
1478
+ )
1479
+
1480
+ self._update_dataset(
1481
+ added_dataset=intensity1shutcor,
1482
+ h5_group=metadata_mcs_output,
1483
+ h5_dataset_name="Intensity1ShutCor",
1484
+ index_read=[monitor1_slice_init, monitor1_slice_end],
1485
+ )
1486
+
1487
+ self._update_dataset(
1488
+ added_dataset=intensity1uncor,
1489
+ h5_group=metadata_mcs_output,
1490
+ h5_dataset_name="Intensity1UnCor",
1491
+ index_read=[monitor1_slice_init, monitor1_slice_end],
1492
+ )
1493
+
1494
+ def update_nexus_detector_group(
1495
+ self,
1496
+ nxdetector_group_destination: h5py.Group = None,
1497
+ nxdetector_group_source: h5py.Group = None,
1498
+ ):
1499
+ for name, item in nxdetector_group_source.items():
1500
+ if isinstance(item, h5py.Group):
1501
+ # Recursively copy subgroups
1502
+ new_subgroup = nxdetector_group_destination.create_group(name)
1503
+ copy_group_excluding_dataset(item, new_subgroup, "data")
1504
+ elif isinstance(item, h5py.Dataset):
1505
+ if name != "data": # Skip the excluded dataset
1506
+ nxdetector_group_source.copy(
1507
+ name, nxdetector_group_destination, name=name
1508
+ )
1509
+
1510
+ def get_headers(self):
1511
+ if not self.headers:
1512
+ self.log_error(
1513
+ "Headers was not initialized! Maybe after using .run() method"
1514
+ )
1515
+ return self.headers
1516
+
1517
+ def get_from_headers(self, key: str, to_integer: bool = False, default=None):
1518
+ if not self.headers:
1519
+ self.log_error(
1520
+ "Headers was not initialized! Maybe after using .run() method"
1521
+ )
1522
+ return
1523
+
1524
+ value = self.headers.get(key, default)
1525
+ if value is None:
1526
+ return
1527
+
1528
+ if isinstance(value, bytes):
1529
+ value = value.decode("UTF-8")
1530
+ try:
1531
+ value = float(value)
1532
+ if to_integer:
1533
+ return int(value)
1534
+ return value
1535
+ except Exception:
1536
+ return value
1537
+
1538
+ def get_streams_HS32_scalers(self) -> list:
1539
+ hs32_names = self.get_HS32_names()
1540
+ streams_scalers = []
1541
+
1542
+ for key_name in hs32_names.values():
1543
+ if self.scan_memory_url:
1544
+ # In blissdata streams, they appear as scalers:{name}
1545
+ stream_name = f"scalers:{key_name}"
1546
+ else:
1547
+ # In the file, they are saved with the raw refactored name (without scalers)
1548
+ stream_name = key_name
1549
+ stream_info = self.get_stream_info(
1550
+ name=stream_name,
1551
+ )
1552
+ streams_scalers.append(stream_info)
1553
+ return streams_scalers
1554
+
1555
+ def get_streams_HS32_raw(self) -> list:
1556
+ hs32_names = self.get_HS32_names()
1557
+ streams_raw = []
1558
+
1559
+ for key_name in hs32_names.values():
1560
+ if self.scan_memory_url:
1561
+ # In blissdata streams, they appear as mcs:{name}_raw
1562
+ stream_name = f"mcs:{key_name}_raw"
1563
+ else:
1564
+ # In the file, they are saved with the raw refactored name (without mcs)
1565
+ stream_name = f"{key_name}_raw"
1566
+ stream_info = self.get_stream_info(
1567
+ name=stream_name,
1568
+ )
1569
+ streams_raw.append(stream_info)
1570
+ return streams_raw
1571
+
1572
+ def get_HS32_factors(self) -> dict:
1573
+ nb_pins = self.get_HS32_number_pins()
1574
+ factors = {}
1575
+ for index_pin in range(nb_pins):
1576
+ factors[index_pin] = self.get_from_headers(key=f"HS32F{(index_pin + 1):02}")
1577
+ return factors
1578
+
1579
+ def get_HS32_zeros(self) -> dict:
1580
+ nb_pins = self.get_HS32_number_pins()
1581
+ zeros = {}
1582
+ for index_pin in range(nb_pins):
1583
+ zeros[index_pin] = self.get_from_headers(key=f"HS32Z{(index_pin + 1):02}")
1584
+ return zeros
1585
+
1586
+ def get_HS32_names(self) -> dict:
1587
+ nb_pins = self.get_HS32_number_pins()
1588
+ names = {}
1589
+ for index_pin in range(nb_pins):
1590
+ names[index_pin] = self.get_from_headers(key=f"HS32N{(index_pin + 1):02}")
1591
+ return names
1592
+
1593
+ def get_stream_slow_timer(self) -> dict:
1594
+ for stream_info in self.streams_subscan2:
1595
+ if "epoch" in stream_info["name"]:
1596
+ return stream_info
1597
+
1598
+ def get_stream_fast_timer(self) -> dict:
1599
+ return self.get_stream_info(name="epoch", prefix="mcs")
1600
+
1601
+ def get_stream_exposuretime(self) -> dict:
1602
+ """
1603
+ In blissdata, this stream is (normally) called scalers:time (in subscan1)
1604
+ In the header, there is key "HSTime" pointing to the string 'time'
1605
+ """
1606
+ key_name = self.get_from_headers(key=HEADERS_KEY_EXPOSURE_TIME)
1607
+ stream_time_name = f"scalers:{key_name}"
1608
+ return self.get_stream_info(
1609
+ name=stream_time_name,
1610
+ subscan_2=False,
1611
+ )
1612
+
1613
+ def get_stream_exposuretime_raw(self) -> dict:
1614
+ """
1615
+ In blissdata, this stream is (normally) called mcs:time_raw (in subscan1)
1616
+ In the header, there is key "HSTime" pointing to the string 'time'
1617
+ """
1618
+ key_name = self.get_from_headers(key=HEADERS_KEY_EXPOSURE_TIME)
1619
+ stream_time_name = f"mcs:{key_name}_raw"
1620
+ return self.get_stream_info(
1621
+ name=stream_time_name,
1622
+ subscan_2=False,
1623
+ )
1624
+
1625
+ def get_stream_deltatime(self) -> dict:
1626
+ if self.scan_memory_url:
1627
+ stream_name = "mcs:elapsed_time"
1628
+ else:
1629
+ stream_name = "elapsed_time"
1630
+
1631
+ stream_info = self.get_stream_info(
1632
+ name=stream_name,
1633
+ )
1634
+ if not stream_info:
1635
+ stream_name = "mcs_elapsed_time"
1636
+ stream_info = self.get_stream_info(
1637
+ name=stream_name,
1638
+ )
1639
+ return stream_info
1640
+
1641
+ def get_stream_info(
1642
+ self,
1643
+ index_pin: int = None,
1644
+ name: str = None,
1645
+ header_key_pin: str = None,
1646
+ prefix: str = "",
1647
+ suffix: str = "",
1648
+ subscan_2: bool = False,
1649
+ ) -> dict:
1650
+ if name is None:
1651
+ if header_key_pin is not None:
1652
+ name = self.get_from_headers(key=header_key_pin)
1653
+ elif index_pin is not None:
1654
+ name = self.get_pin_name_from_index(index_pin=index_pin)
1655
+ if name is None:
1656
+ return
1657
+
1658
+ if prefix:
1659
+ name = f"{prefix}:{name}"
1660
+ if suffix:
1661
+ name = f"{name}_{suffix}"
1662
+
1663
+ name_raw = refactor_stream_name_raw(stream_name=name, cut_name=False)
1664
+ name_raw_cut = refactor_stream_name_raw(stream_name=name, cut_name=True)
1665
+ name_interpreted = refactor_stream_name_interpreted(stream_name=name)
1666
+
1667
+ if subscan_2:
1668
+ streams = self.streams_subscan2
1669
+ else:
1670
+ streams = self.streams_subscan1
1671
+
1672
+ for stream_info in streams:
1673
+ if name == stream_info["name"]:
1674
+ return stream_info
1675
+
1676
+ for stream_info in streams:
1677
+ if name_raw == stream_info["name_raw"]:
1678
+ return stream_info
1679
+
1680
+ for stream_info in streams:
1681
+ if name_raw_cut == stream_info["name_raw"]:
1682
+ return stream_info
1683
+
1684
+ for stream_info in streams:
1685
+ if name_interpreted == stream_info["name_interpreted"]:
1686
+ return stream_info
1687
+
1688
+ def _read_from_stream(
1689
+ self,
1690
+ stream_object,
1691
+ slice_init: int,
1692
+ slice_end: int,
1693
+ datatype: str = "float32",
1694
+ ) -> tuple:
1695
+ nb_available_frames = len(stream_object)
1696
+ if slice_init > nb_available_frames:
1697
+ self.log_error(
1698
+ f"Not enough frames in {stream_object} ({nb_available_frames}). Requested init: {slice_init}"
1699
+ )
1700
+ return
1701
+ if slice_end > nb_available_frames:
1702
+ self.log_warning(
1703
+ f"Not enough frames in {stream_object} ({nb_available_frames}). Requested end: {slice_end}"
1704
+ )
1705
+ slice_end = nb_available_frames
1706
+
1707
+ try:
1708
+ stream_values = stream_object[slice_init:slice_end]
1709
+ return stream_values.astype(datatype), slice_init, slice_end
1710
+ except Exception as e:
1711
+ self.log_error(f"{stream_object} could not be sliced: {e}")
1712
+
1713
+ def read_from_stream_interpolate(
1714
+ self,
1715
+ stream_object,
1716
+ stream_slice=None,
1717
+ datatype: str = "float32",
1718
+ ) -> numpy.ndarray:
1719
+ """
1720
+ stream_object is a sliceable object (numpy.ndarray or blissdata stream object)
1721
+ Valid for all streams, from subscan1 and subscan2
1722
+ """
1723
+ if stream_slice is None:
1724
+ slice_init = 0
1725
+ slice_end = len(stream_object)
1726
+ else:
1727
+ slice_init, slice_end = stream_slice
1728
+
1729
+ stream_values = stream_object[slice_init:slice_end]
1730
+ stream_values = stream_values.astype(datatype)
1731
+
1732
+ # 2) Read all the values in the slow epoch counter
1733
+ slow_epoch_stream = self.get_stream_slow_timer()
1734
+ if slow_epoch_stream is None:
1735
+ self.log_debug("Slow epoch stream could not be found.")
1736
+ return stream_values
1737
+
1738
+ slow_epoch_stream = slow_epoch_stream.get("stream")
1739
+
1740
+ # Both epoch must be read with double precision
1741
+ slow_epoch_values, slice_init, slice_end = self._read_from_stream(
1742
+ stream_object=slow_epoch_stream,
1743
+ slice_init=0,
1744
+ slice_end=len(slow_epoch_stream),
1745
+ datatype="float64",
1746
+ )
1747
+ if slow_epoch_values is None:
1748
+ self.log_error(f"Stream {slow_epoch_stream} could not be read.")
1749
+ return stream_values
1750
+
1751
+ # 3) Match the slow streams
1752
+ if len(slow_epoch_values) != len(stream_values):
1753
+ nb_slow_frames = min(len(slow_epoch_values), len(stream_values))
1754
+ slow_epoch_values = slow_epoch_values[0:nb_slow_frames]
1755
+ stream_values = stream_values[0:nb_slow_frames]
1756
+
1757
+ # 4) Read the fast epoch counter
1758
+ fast_epoch_stream = self.get_stream_fast_timer().get("stream")
1759
+ if fast_epoch_stream is None:
1760
+ self.log_error("Fast epoch stream could not be found.")
1761
+ return stream_values
1762
+
1763
+ fast_epoch_values, slice_init, slice_end = self._read_from_stream(
1764
+ stream_object=fast_epoch_stream,
1765
+ slice_init=0,
1766
+ slice_end=self.index_range_last[-1],
1767
+ datatype="float64",
1768
+ )
1769
+
1770
+ if fast_epoch_values is None:
1771
+ self.log_error(f"Stream {fast_epoch_stream} could not be read.")
1772
+ elif fast_epoch_values.size == 0:
1773
+ self.log_error(f"Stream {fast_epoch_stream} is empty.")
1774
+ return
1775
+
1776
+ # 5) Interpolate data, len(interpdata) = len(fast_epoch_values)
1777
+ try:
1778
+ return numpy.interp(fast_epoch_values, slow_epoch_values, stream_values)
1779
+ except Exception as e:
1780
+ self.log_error(f"Error during numpy interpolation: {e}")
1781
+ return
1782
+
1783
+ def get_HS32C_array(self):
1784
+ nb_hs32_pins = self.get_HS32_number_pins()
1785
+ new_HS32C_array = numpy.full(
1786
+ (self.index_range_last[1] - self.index_range_last[0], nb_hs32_pins),
1787
+ fill_value=-1,
1788
+ dtype="float64",
1789
+ )
1790
+
1791
+ streams_raw = self.get_streams_HS32_raw()
1792
+ for index_pin, stream_info in enumerate(streams_raw):
1793
+ if not stream_info:
1794
+ continue
1795
+ stream_values, slice_init, slice_end = self._read_from_stream(
1796
+ stream_object=stream_info.get("stream"),
1797
+ slice_init=self.index_range_last[0],
1798
+ slice_end=self.index_range_last[-1],
1799
+ )
1800
+ if stream_values is not None:
1801
+ new_HS32C_array[0 : len(stream_values), index_pin] = stream_values
1802
+
1803
+ return new_HS32C_array
1804
+
1805
+ def get_HS32V_array(
1806
+ self,
1807
+ ):
1808
+ nb_hs32_pins = self.get_HS32_number_pins()
1809
+ new_HS32V_array = numpy.zeros(
1810
+ (self.index_range_last[1] - self.index_range_last[0], nb_hs32_pins),
1811
+ dtype="float64",
1812
+ )
1813
+
1814
+ streams_scalers = self.get_streams_HS32_scalers()
1815
+ for index_pin, stream_info in enumerate(streams_scalers):
1816
+ if not stream_info:
1817
+ continue
1818
+ stream_values, slice_init, slice_end = self._read_from_stream(
1819
+ stream_object=stream_info.get("stream"),
1820
+ slice_init=self.index_range_last[0],
1821
+ slice_end=self.index_range_last[-1],
1822
+ )
1823
+ if stream_values is not None:
1824
+ new_HS32V_array[0 : len(stream_values), index_pin] = stream_values
1825
+
1826
+ return new_HS32V_array
1827
+
1828
+ def _get_exposuretime_values(
1829
+ self,
1830
+ ) -> tuple:
1831
+ # Go first for the already normalized time values
1832
+ stream_exposuretime = self.get_stream_exposuretime()
1833
+ exposuretime, slice_init, slice_end = self._read_from_stream(
1834
+ stream_object=stream_exposuretime.get("stream"),
1835
+ slice_init=self.index_range_last[0],
1836
+ slice_end=self.index_range_last[-1],
1837
+ )
1838
+
1839
+ if exposuretime is None:
1840
+ # Try to go to the raw time
1841
+ stream_exposuretime_raw = self.get_stream_exposuretime_raw()
1842
+ exposuretime, slice_init, slice_end = self._read_from_stream(
1843
+ stream_object=stream_exposuretime_raw.get("stream"),
1844
+ slice_init=self.index_range_last[0],
1845
+ slice_end=self.index_range_last[-1],
1846
+ )
1847
+
1848
+ if exposuretime is None:
1849
+ self.log_error("Exposure time could not be read.")
1850
+ return None, None, None
1851
+
1852
+ factor_exposuretime = self.get_factor_exposuretime()
1853
+ if factor_exposuretime is None:
1854
+ self.log_error("Default exposure time factor 1.0 will be used")
1855
+ factor_exposuretime = 1.0
1856
+ exposuretime *= factor_exposuretime
1857
+ return exposuretime, slice_init, slice_end
1858
+
1859
+ # def _get_deltatime_values(self) -> tuple:
1860
+ # stream_deltatime = self.get_stream_deltatime()
1861
+ # deltatime = self._read_from_stream(
1862
+ # stream_object=stream_deltatime.get("stream"),
1863
+ # slice_init=self.index_range_last[-1],
1864
+ # slice_end=self.index_range_last[-1],
1865
+ # )
1866
+ # return deltatime
1867
+
1868
+ def get_stream_monitor_0(self) -> dict:
1869
+ key_name = self.get_from_headers(key=HEADERS_KEY_MONITOR_0)
1870
+ stream_name = f"scalers:{key_name}"
1871
+ return self.get_stream_info(name=stream_name)
1872
+
1873
+ def get_stream_monitor_1(self) -> dict:
1874
+ key_name = self.get_from_headers(key=HEADERS_KEY_MONITOR_1)
1875
+ stream_name = f"scalers:{key_name}"
1876
+ return self.get_stream_info(name=stream_name)
1877
+
1878
+ def get_factor(
1879
+ self, index_pin: int = None, pin_name: str = None, header_key_pin: str = None
1880
+ ):
1881
+ if index_pin is not None:
1882
+ return self.get_from_headers(key=f"HS32F{(index_pin + 1):02}")
1883
+ if pin_name is None:
1884
+ nb_pins = self.get_HS32_number_pins()
1885
+ pin_name = self.get_from_headers(key=header_key_pin)
1886
+ for index_pin in range(nb_pins):
1887
+ if self.get_pin_name_from_index(index_pin=index_pin) == pin_name:
1888
+ return self.get_from_headers(key=f"HS32F{(index_pin + 1):02}")
1889
+
1890
+ def get_factor_exposuretime(self):
1891
+ return self.get_factor(header_key_pin=HEADERS_KEY_EXPOSURE_TIME)
1892
+
1893
+ def update_title_extension(
1894
+ self,
1895
+ parameters_group_destination: h5py.Group,
1896
+ ):
1897
+ titleextension_template = self.get_from_headers(key="TitleExtensionTemplate")
1898
+ if not titleextension_template:
1899
+ self.log_warning("There is no TitleExtensionTemplate in the header.")
1900
+ return
1901
+
1902
+ title_extension_parsed, title_extension_formats = parse_titleextension_template(
1903
+ titleextension_template
1904
+ )
1905
+ title_extension_values = {}
1906
+
1907
+ # We want to slice all the streams at once, and then build the array of strings
1908
+ for titleextension_dict in title_extension_formats:
1909
+ stream_name = titleextension_dict["stream_name"]
1910
+ stream_values = None
1911
+ title_extension_values[stream_name] = numpy.full(
1912
+ shape=(self.index_range_last[1],), fill_value=numpy.nan
1913
+ )
1914
+
1915
+ # Try first from subscan2
1916
+ stream_info = self.get_stream_info(name=stream_name, subscan_2=True)
1917
+ if stream_info:
1918
+ stream_values = self.read_from_stream_interpolate(
1919
+ stream_object=stream_info.get("stream"),
1920
+ stream_slice=None,
1921
+ )
1922
+ else:
1923
+ stream_info = stream_info or self.get_stream_info(
1924
+ name=stream_name, subscan_2=False
1925
+ )
1926
+ if stream_info is None:
1927
+ self.log_error(
1928
+ f"{stream_name} stream from TitleExtension could not be found"
1929
+ )
1930
+ continue
1931
+ stream_values, slice_init, slice_end = self._read_from_stream(
1932
+ stream_object=stream_info.get("stream"),
1933
+ slice_init=0,
1934
+ slice_end=self.index_range_last[-1],
1935
+ )
1936
+ if stream_values is None:
1937
+ self.log_error(f"Stream {stream_info} could not be read")
1938
+ continue
1939
+
1940
+ title_extension_values[stream_name] = stream_values
1941
+
1942
+ # Now we go index by index, building the string cells
1943
+ # The title extensions will always be the size of last index (from 0 -> last_frame)
1944
+ new_title_extensions = numpy.full(
1945
+ shape=(self.index_range_last[1],),
1946
+ fill_value="",
1947
+ dtype=h5py.string_dtype(encoding="utf-8"),
1948
+ )
1949
+ for index in range(len(new_title_extensions)):
1950
+ format_index = {
1951
+ stream_name: title_extension_values[stream_name][index]
1952
+ for stream_name in title_extension_values
1953
+ }
1954
+
1955
+ new_title_extensions[index] = title_extension_parsed.format(**format_index)
1956
+
1957
+ self._update_dataset(
1958
+ added_dataset=new_title_extensions,
1959
+ h5_group=parameters_group_destination,
1960
+ h5_dataset_name="TitleExtension",
1961
+ index_read=[0, self.index_range_last[1]],
1962
+ )
1963
+
1964
+ def _update_subscan1(
1965
+ self,
1966
+ mcs_group_destination: h5py.Group,
1967
+ ):
1968
+ """
1969
+ Updates both raw/subscan_1 and interpreted with the values of subscan_1 (equivalent)
1970
+ """
1971
+ raw_group = mcs_group_destination.require_group("raw")
1972
+ subscan1 = raw_group.require_group("subscan_1")
1973
+ interpreted_grp = mcs_group_destination.require_group("interpreted")
1974
+
1975
+ for stream_info in self.streams_subscan1:
1976
+ stream_name_raw = stream_info.get("name_raw")
1977
+ stream_name_interpreted = stream_info.get("name_interpreted")
1978
+ stream = stream_info.get("stream")
1979
+
1980
+ stream_values, slice_init, slice_end = self._read_from_stream(
1981
+ stream_object=stream,
1982
+ slice_init=self.index_range_last[0],
1983
+ slice_end=self.index_range_last[-1],
1984
+ )
1985
+
1986
+ self._update_dataset(
1987
+ added_dataset=stream_values,
1988
+ h5_group=subscan1,
1989
+ h5_dataset_name=stream_name_raw,
1990
+ index_read=[slice_init, slice_end],
1991
+ )
1992
+ self._update_dataset(
1993
+ added_dataset=stream_values,
1994
+ h5_group=interpreted_grp,
1995
+ h5_dataset_name=stream_name_interpreted,
1996
+ index_read=[slice_init, slice_end],
1997
+ )
1998
+
1999
+ def _update_subscan2(
2000
+ self,
2001
+ mcs_group: h5py.Group,
2002
+ ):
2003
+ if not self.streams_subscan2:
2004
+ return
2005
+
2006
+ interpreted_grp = mcs_group.require_group("interpreted")
2007
+ raw_grp = mcs_group.require_group("raw")
2008
+
2009
+ for stream_info in self.streams_subscan2:
2010
+ stream_name_raw = stream_info.get("name_raw")
2011
+ stream_name_interpreted = stream_info.get("name_interpreted")
2012
+ stream = stream_info.get("stream")
2013
+
2014
+ # Read without any interpolation to send to raw-subscan2
2015
+ stream_values, slice_init, slice_end = self._read_from_stream(
2016
+ stream_object=stream,
2017
+ slice_init=0,
2018
+ slice_end=len(stream),
2019
+ )
2020
+ if stream_values is None:
2021
+ continue
2022
+
2023
+ subscan2_grp = raw_grp.require_group("subscan_2")
2024
+ if stream_name_raw not in subscan2_grp:
2025
+ dset_raw_subscan2 = subscan2_grp.create_dataset(
2026
+ name=stream_name_raw,
2027
+ shape=stream_values.shape,
2028
+ maxshape=(None,),
2029
+ dtype=stream_values.dtype,
2030
+ )
2031
+ else:
2032
+ dset_raw_subscan2 = subscan2_grp[stream_name_raw]
2033
+ dset_raw_subscan2.resize((len(stream_values),))
2034
+ dset_raw_subscan2[:] = stream_values
2035
+
2036
+ # Send the interpolated values to interpreted group
2037
+ interpolated_values = self.read_from_stream_interpolate(
2038
+ stream_object=stream,
2039
+ stream_slice=None,
2040
+ )
2041
+ if interpolated_values is None:
2042
+ continue
2043
+
2044
+ if stream_name_interpreted not in interpreted_grp:
2045
+ dset_interpreted = interpreted_grp.create_dataset(
2046
+ name=stream_name_interpreted,
2047
+ shape=(0,),
2048
+ maxshape=(None,),
2049
+ dtype=interpolated_values.dtype,
2050
+ )
2051
+ else:
2052
+ dset_interpreted = interpreted_grp[stream_name_interpreted]
2053
+ dset_interpreted.resize((len(interpolated_values),))
2054
+ dset_interpreted[:] = interpolated_values
2055
+
2056
+ def create_h5_group(
2057
+ self,
2058
+ h5_parent_group: h5py.Group,
2059
+ h5_group_name: str,
2060
+ title: str = None,
2061
+ **kwargs,
2062
+ ) -> h5py.Group:
2063
+ """
2064
+ Unified method to create a group in a HDF5 file with additional attributes.
2065
+
2066
+ h5_parent_group: h5py.Group - Parent group where the new group will be created
2067
+ h5_group_name: str - Name of the new group
2068
+ title: str - Title of the group
2069
+ kwargs: dict - Additional arguments to add as attributes in the group
2070
+ """
2071
+ if h5_group_name in h5_parent_group:
2072
+ self.log_warning(
2073
+ f"Group {h5_group_name} already exists in {h5_parent_group.name}"
2074
+ )
2075
+ return h5_parent_group[h5_group_name]
2076
+
2077
+ h5_group = h5_parent_group.create_group(h5_group_name)
2078
+ if title:
2079
+ h5_group["title"] = title
2080
+ for key, value in kwargs.items():
2081
+ h5_group.attrs[key] = value
2082
+ return h5_group
2083
+
2084
+ def _update_dataset(
2085
+ self,
2086
+ added_dataset: numpy.ndarray,
2087
+ h5_group: h5py.Group,
2088
+ h5_dataset_name: str,
2089
+ index_read: tuple = None,
2090
+ **kwargs,
2091
+ ) -> None:
2092
+ """
2093
+ Update a dataset in a HDF5 file with new data.
2094
+ It will create the dataset if it does not exist.
2095
+
2096
+ added_dataset: numpy.ndarray - Array with the new data. It has to contain an additional dimension to the data
2097
+ h5_group: h5py.Group - Group in the HDF5 file where the dataset is located
2098
+ h5_dataset_name: str - Name of the dataset in h5_group
2099
+ kwargs: dict - Additional arguments to add as attributes in the dataset
2100
+ """
2101
+ if added_dataset is None:
2102
+ return
2103
+
2104
+ if not isinstance(added_dataset, numpy.ndarray):
2105
+ self.log_error(f"Added dataset is not a numpy array. {type(added_dataset)}")
2106
+ return
2107
+
2108
+ nb_new_frames = len(added_dataset)
2109
+ if nb_new_frames == 0:
2110
+ return
2111
+
2112
+ if index_read is None:
2113
+ slice_init = 0
2114
+ slice_end = nb_new_frames
2115
+ else:
2116
+ slice_init, slice_end = index_read
2117
+
2118
+ self.log_debug(
2119
+ f"Updating dataset {h5_dataset_name} with {nb_new_frames} new frames"
2120
+ )
2121
+
2122
+ ndim = added_dataset.ndim
2123
+ if h5_dataset_name not in h5_group:
2124
+ if ndim == 3:
2125
+ interpretation = "image"
2126
+ dtype = self.get_input_value("datatype", "float32")
2127
+ compression = hdf5plugin.Bitshuffle(cname="lz4")
2128
+ chunks = CHUNK_SIZE_3D
2129
+ elif ndim == 2:
2130
+ interpretation = "spectrum"
2131
+ dtype = "float64"
2132
+ compression = None
2133
+ chunks = None
2134
+ elif ndim == 1:
2135
+ interpretation = "scalar"
2136
+ dtype = added_dataset.dtype
2137
+ compression = None
2138
+ chunks = None
2139
+ if added_dataset.dtype.kind == "U":
2140
+ dtype = h5py.string_dtype(encoding="utf-8")
2141
+ added_dataset = added_dataset.astype(
2142
+ h5py.string_dtype(encoding="utf-8")
2143
+ )
2144
+ dset = h5_group.create_dataset(
2145
+ name=h5_dataset_name,
2146
+ shape=(0,) + added_dataset.shape[1:ndim],
2147
+ maxshape=(None,) + added_dataset.shape[1:ndim],
2148
+ chunks=chunks,
2149
+ dtype=dtype,
2150
+ compression=compression,
2151
+ )
2152
+ dset.attrs["interpretation"] = interpretation
2153
+ for key, value in kwargs.items():
2154
+ dset.attrs[key] = value
2155
+ else:
2156
+ dset = h5_group[h5_dataset_name]
2157
+
2158
+ # The added dataset can be:
2159
+ # - New frames to append between index_range_last limits (normal loop procedure)
2160
+ # - Rewrite the whole dataset, posibly with more frames (subscan2 interpolation)
2161
+ dset_current_nb_frames = len(dset)
2162
+ if slice_end <= dset_current_nb_frames:
2163
+ # No resize is needed
2164
+ ...
2165
+ else:
2166
+ dset_new_shape = (slice_end, *dset.shape[1:ndim])
2167
+ dset.resize(dset_new_shape)
2168
+
2169
+ try:
2170
+ dset[slice_init:slice_end] = added_dataset
2171
+ self.log_debug(
2172
+ f"Dataset {h5_dataset_name} updated with {len(added_dataset)} frames"
2173
+ )
2174
+ except Exception as e:
2175
+ self.log_error(f"""{e}: Failed while saving {h5_dataset_name}. \
2176
+ Incoming dataset shape: {added_dataset.shape}, \
2177
+ hdf5 dset shape={dset.shape}, {added_dataset.shape=} \
2178
+ limits: {slice_init} -> {slice_end}
2179
+ """)
2180
+
2181
+ def write_root_group(self, root_group: h5py.Group) -> h5py.Group:
2182
+ self.log_debug("Creating root group")
2183
+ root_group.attrs.update(
2184
+ {
2185
+ "HDF5_Version": h5py.version.hdf5_version,
2186
+ "NX_class": "NXroot",
2187
+ "creator": "ewoksid02",
2188
+ "file_name": str(self.processing_filename),
2189
+ "file_time": get_isotime(),
2190
+ "default": "entry_0000",
2191
+ }
2192
+ )
2193
+ return root_group
2194
+
2195
+ def _save_benchmark(
2196
+ self,
2197
+ bench,
2198
+ ):
2199
+ if not self.do_save:
2200
+ return
2201
+
2202
+ benchmark_name = bench.benchmark_name
2203
+ total_time = bench.bench_total_s
2204
+ time_per_frame = bench.bench_per_frame_ms
2205
+ nb_frames = bench.nb_frames
2206
+
2207
+ with ExitStack() as stack:
2208
+ if not self.processing_filename or not self.do_save:
2209
+ return
2210
+
2211
+ # Append data to the nexus data group
2212
+ stack.enter_context(lock)
2213
+ root_group_destination = stack.enter_context(
2214
+ open_item_silx(filename=self.processing_filename, name="/", mode="a")
2215
+ )
2216
+
2217
+ _, h5path_ewoks_destination = self.links["destination"]["ewoks"].split("::")
2218
+ ewoks_group = root_group_destination[h5path_ewoks_destination]
2219
+
2220
+ if "last_process" not in ewoks_group:
2221
+ return
2222
+
2223
+ title = ewoks_group["last_process/title"][()].decode()
2224
+ h5path_ewoks_benchmark = (
2225
+ f"{h5path_ewoks_destination}/history/{title}/benchmark"
2226
+ )
2227
+
2228
+ if h5path_ewoks_benchmark not in root_group_destination:
2229
+ bench_grp = self.create_h5_group(
2230
+ h5_parent_group=root_group_destination,
2231
+ h5_group_name=h5path_ewoks_benchmark,
2232
+ title="Benchmark",
2233
+ NX_class="NXdata",
2234
+ default=h5path_ewoks_benchmark,
2235
+ signal="data",
2236
+ # axes=["steps"],
2237
+ axes=["index"],
2238
+ )
2239
+ steps = bench_grp.create_dataset(
2240
+ name="steps",
2241
+ shape=(0,),
2242
+ maxshape=(None,),
2243
+ dtype=h5py.string_dtype(encoding="utf-8"),
2244
+ )
2245
+ indexes = bench_grp.create_dataset(
2246
+ name="index",
2247
+ shape=(0,),
2248
+ maxshape=(None,),
2249
+ )
2250
+ data = bench_grp.create_dataset(
2251
+ name="data",
2252
+ shape=(0,),
2253
+ maxshape=(None,),
2254
+ )
2255
+ data.attrs.update(
2256
+ {
2257
+ "interpretation": "spectrum",
2258
+ }
2259
+ )
2260
+ data_errors = bench_grp.create_dataset(
2261
+ name="data_errors",
2262
+ shape=(0,),
2263
+ maxshape=(None,),
2264
+ )
2265
+ data_errors.attrs.update(
2266
+ {
2267
+ "interpretation": "spectrum",
2268
+ }
2269
+ )
2270
+ else:
2271
+ bench_grp = root_group_destination[h5path_ewoks_benchmark]
2272
+ steps = bench_grp["steps"]
2273
+ indexes = bench_grp["index"]
2274
+ data = bench_grp["data"]
2275
+ data_errors = bench_grp["data_errors"]
2276
+
2277
+ h5path_ewoks_benchmark_step = (
2278
+ f"{h5path_ewoks_destination}/history/{title}/benchmark/{benchmark_name}"
2279
+ )
2280
+ if h5path_ewoks_benchmark_step not in root_group_destination:
2281
+ bench_grp_step = self.create_h5_group(
2282
+ h5_parent_group=root_group_destination,
2283
+ h5_group_name=h5path_ewoks_benchmark_step,
2284
+ title=benchmark_name,
2285
+ NX_class="NXdata",
2286
+ default=h5path_ewoks_benchmark,
2287
+ )
2288
+ else:
2289
+ bench_grp_step = root_group_destination[h5path_ewoks_benchmark_step]
2290
+
2291
+ if f"{benchmark_name}_loop_nb" not in bench_grp_step:
2292
+ loop_dset = bench_grp_step.create_dataset(
2293
+ name=f"{benchmark_name}_loop_nb",
2294
+ dtype="int32",
2295
+ shape=(0,),
2296
+ maxshape=(None,),
2297
+ )
2298
+ perframe_dset = bench_grp_step.create_dataset(
2299
+ name=f"{benchmark_name}_per_frame",
2300
+ dtype="float32",
2301
+ shape=(0,),
2302
+ maxshape=(None,),
2303
+ )
2304
+ perframe_mean_dset = bench_grp_step.create_dataset(
2305
+ name=f"{benchmark_name}_per_frame_mean",
2306
+ dtype="float32",
2307
+ data=time_per_frame,
2308
+ )
2309
+ perframe_std_dset = bench_grp_step.create_dataset(
2310
+ name=f"{benchmark_name}_per_frame_std",
2311
+ dtype="float32",
2312
+ data=0.0,
2313
+ )
2314
+ nbframes_dset = bench_grp_step.create_dataset(
2315
+ name=f"{benchmark_name}_nb_frames",
2316
+ dtype="int32",
2317
+ shape=(0,),
2318
+ maxshape=(None,),
2319
+ )
2320
+ total_dset = bench_grp_step.create_dataset(
2321
+ name=f"{benchmark_name}_total",
2322
+ dtype="float32",
2323
+ shape=(0,),
2324
+ maxshape=(None,),
2325
+ )
2326
+ accumulated_dset = bench_grp_step.create_dataset(
2327
+ name=f"{benchmark_name}_accumulated",
2328
+ dtype="float32",
2329
+ shape=(0,),
2330
+ maxshape=(None,),
2331
+ )
2332
+ steps.resize((steps.shape[0] + 1,))
2333
+ steps[-1] = benchmark_name
2334
+ indexes.resize((indexes.shape[0] + 1,))
2335
+ indexes[-1] = len(indexes)
2336
+ data.resize((data.shape[0] + 1,))
2337
+ data_errors.resize((data_errors.shape[0] + 1,))
2338
+ else:
2339
+ loop_dset = bench_grp_step[f"{benchmark_name}_loop_nb"]
2340
+ perframe_dset = bench_grp_step[f"{benchmark_name}_per_frame"]
2341
+ perframe_mean_dset = bench_grp_step[f"{benchmark_name}_per_frame_mean"]
2342
+ perframe_std_dset = bench_grp_step[f"{benchmark_name}_per_frame_std"]
2343
+ nbframes_dset = bench_grp_step[f"{benchmark_name}_nb_frames"]
2344
+ total_dset = bench_grp_step[f"{benchmark_name}_total"]
2345
+ accumulated_dset = bench_grp_step[f"{benchmark_name}_accumulated"]
2346
+
2347
+ # Append new data
2348
+ loop_dset.resize((loop_dset.shape[0] + 1,))
2349
+ perframe_dset.resize((perframe_dset.shape[0] + 1,))
2350
+ nbframes_dset.resize((nbframes_dset.shape[0] + 1,))
2351
+ total_dset.resize((total_dset.shape[0] + 1,))
2352
+ accumulated_dset.resize((accumulated_dset.shape[0] + 1,))
2353
+
2354
+ loop_dset[-1] = self.loop_nb
2355
+ perframe_dset[-1] = time_per_frame
2356
+ nbframes_dset[-1] = nb_frames
2357
+ total_dset[-1] = total_time
2358
+ if len(accumulated_dset) == 1:
2359
+ accumulated_dset[-1] = total_time
2360
+ else:
2361
+ accumulated_dset[-1] = accumulated_dset[-2] + total_time
2362
+
2363
+ mean_value = perframe_dset[:].mean()
2364
+ std_value = perframe_dset[:].std()
2365
+ perframe_mean_dset[()] = mean_value
2366
+ perframe_std_dset[()] = std_value
2367
+
2368
+ step_names = [s.decode() for s in steps]
2369
+ index_benchmark = next(
2370
+ (i for i, s in enumerate(step_names) if s == benchmark_name), None
2371
+ )
2372
+ if index_benchmark is not None:
2373
+ data[index_benchmark] = mean_value
2374
+ data_errors[index_benchmark] = std_value
2375
+
2376
+ def _save_processing_params(self, root_group: h5py.Group):
2377
+ # Do it once for non-numpy arrays (live parameters)
2378
+ _, h5path_ewoks_destination = self.links["destination"]["ewoks"].split("::")
2379
+ ewoks_group = root_group[h5path_ewoks_destination]
2380
+
2381
+ if "last_process" not in ewoks_group:
2382
+ return
2383
+
2384
+ title = ewoks_group["last_process/title"][()].decode()
2385
+ h5path_ewoks_info = f"{h5path_ewoks_destination}/history/{title}"
2386
+
2387
+ ewoks_info_group = root_group[h5path_ewoks_info]
2388
+ params_group_name = "parameters"
2389
+ if params_group_name in ewoks_info_group:
2390
+ return
2391
+
2392
+ info_history_complete = self.outputs.info_history
2393
+ if info_history_complete:
2394
+ info_history = self.outputs.info_history[-1]
2395
+ else:
2396
+ info_history = None
2397
+
2398
+ if h5path_ewoks_info not in root_group:
2399
+ self.log_error(f"{h5path_ewoks_info} not found in {root_group}")
2400
+ return
2401
+
2402
+ params_group = self.create_h5_group(
2403
+ h5_parent_group=ewoks_info_group,
2404
+ h5_group_name=params_group_name,
2405
+ NX_class="NXcollection",
2406
+ )
2407
+
2408
+ params_info = {}
2409
+ for key, value in self.processing_params.items():
2410
+ if isinstance(value, numpy.ndarray):
2411
+ continue
2412
+ incoming_length = value.shape[0]
2413
+ if key not in params_group:
2414
+ dset = params_group.create_dataset(
2415
+ name=key, shape=(incoming_length,), maxshape=(None,), data=value
2416
+ )
2417
+ else:
2418
+ dset = params_group[key]
2419
+ current_length = len(dset)
2420
+ new_length = current_length + incoming_length
2421
+ dset.resize((new_length,))
2422
+ dset[current_length:new_length] = value
2423
+ params_info[key] = dset[()]
2424
+ continue
2425
+
2426
+ if isinstance(value, dict):
2427
+ params_info[key] = json.dumps(value)
2428
+ for k, v in value.items():
2429
+ params_info[k] = str(v)
2430
+ else:
2431
+ try:
2432
+ params_info[key] = str(value)
2433
+ except Exception:
2434
+ params_info[key] = json.dumps(value.as_dict())
2435
+ for k, v in value.items():
2436
+ params_info[k] = v
2437
+
2438
+ for key, value in params_info.items():
2439
+ params_group[key] = value
2440
+
2441
+ info_history["parameters"] = params_info
2442
+ info_history_complete = self.outputs.info_history[:-1]
2443
+ info_history_complete.append(info_history)
2444
+ self.outputs.info_history = info_history_complete
2445
+
2446
+ def _link_ewoks_history(
2447
+ self, root_group_destination: h5py.Group, stack: ExitStack = None
2448
+ ):
2449
+ _, h5path_ewoks_history_destination = self.links["destination"]["ewoks"].split(
2450
+ "::"
2451
+ )
2452
+ if self.links["source"].get("ewoks") is not None:
2453
+ filename_source, h5path_ewoks_source = self.links["source"]["ewoks"].split(
2454
+ "::"
2455
+ )
2456
+ else:
2457
+ filename_source = None
2458
+ h5path_ewoks_source = None
2459
+
2460
+ ewoks_destination = self.create_h5_group(
2461
+ h5_parent_group=root_group_destination,
2462
+ h5_group_name=h5path_ewoks_history_destination,
2463
+ NX_class="NXcollection",
2464
+ )
2465
+ ewoks_history = self.create_h5_group(
2466
+ h5_parent_group=ewoks_destination,
2467
+ h5_group_name="history",
2468
+ NX_class="NXcollection",
2469
+ )
2470
+
2471
+ info_history_complete = self.get_input_value("info_history", []).copy()
2472
+ info_history_previous = None
2473
+ if info_history_complete:
2474
+ info_history_previous = info_history_complete[-1]
2475
+
2476
+ title_format = "{index:02} - {class_name}"
2477
+ info_history = {
2478
+ "index": 0,
2479
+ "task_identifier": f"{self.__module__}.{self.__class__.__name__}",
2480
+ "class": self.__class__.__name__,
2481
+ "processing_type": self.processing_type,
2482
+ "datetime": str(get_isotime()),
2483
+ "version": version("ewoksid02"),
2484
+ "host": socket.gethostname(),
2485
+ }
2486
+ info_history["title"] = title_format.format(
2487
+ index=info_history["index"], class_name=info_history["class"]
2488
+ )
2489
+ if info_history_previous is not None:
2490
+ # The previous node send the recent historic information
2491
+ info_history["index"] = info_history_previous["index"] + 1
2492
+ info_history["title"] = title_format.format(
2493
+ index=info_history["index"], class_name=info_history["class"]
2494
+ )
2495
+ else:
2496
+ # No previous node with historic information
2497
+ if filename_source and h5path_ewoks_source:
2498
+ # This is an ewoks PROCESSED file
2499
+ root_group_source = stack.enter_context(
2500
+ open_item_silx(filename=filename_source, name="/", mode="r")
2501
+ )
2502
+ if h5path_ewoks_source not in root_group_source:
2503
+ # Use ewoksid02 version 1.0 onwards
2504
+ self.log_warning(
2505
+ f"{filename_source} is an ewoks PROCESSED_DATA file without ewoks group. Historical index will be set to 0"
2506
+ )
2507
+ else:
2508
+ ewoks_source = root_group_source[h5path_ewoks_source]
2509
+ if "last_process" not in ewoks_source:
2510
+ self.log_warning(
2511
+ "No index could be found in ewoks group. Historical index will be set to 0"
2512
+ )
2513
+ else:
2514
+ # Collect all the previous history and send to the next task
2515
+ history_group = ewoks_source["history"]
2516
+ for historic_task in history_group:
2517
+ historic_task_serialized = serialize_h5py_task(
2518
+ h5py_group=history_group[historic_task]
2519
+ )
2520
+ info_history_complete.append(historic_task_serialized)
2521
+ info_history["index"] = info_history_complete[-1]["index"] + 1
2522
+ info_history["title"] = title_format.format(
2523
+ index=info_history["index"],
2524
+ class_name=info_history["class"],
2525
+ )
2526
+ else:
2527
+ # This is not an ewoks PROCESSED file (no historic information)
2528
+ ...
2529
+
2530
+ # Add a new process into history group
2531
+ info_history_complete.append(info_history)
2532
+
2533
+ for historic_task in info_history_complete:
2534
+ historic_task_group = self.create_h5_group(
2535
+ h5_parent_group=ewoks_history,
2536
+ h5_group_name=historic_task["title"],
2537
+ NX_class="NXcollection",
2538
+ )
2539
+ deserialize_h5py_task(
2540
+ h5dict=historic_task,
2541
+ h5py_parent=historic_task_group,
2542
+ )
2543
+
2544
+ # Link to last_process group
2545
+ title = info_history["title"]
2546
+ ewoks_destination["last_process"] = h5py.SoftLink(f"history/{title}")
2547
+
2548
+ # Send to the next node
2549
+ self.outputs.info_history = info_history_complete
2550
+
2551
+ def processing_info(self) -> list:
2552
+ return []
2553
+
2554
+ def _get_filename_gallery(self):
2555
+ processed_directory = Path(self.processing_filename).parent
2556
+ gallery_directory = processed_directory / "gallery"
2557
+ gallery_directory.mkdir(exist_ok=True)
2558
+ filename_png = (
2559
+ gallery_directory / Path(self.processing_filename).with_suffix(".png").name
2560
+ )
2561
+ return str(filename_png)
2562
+
2563
+ def _save_png_to_gallery(self, array: numpy.ndarray) -> str:
2564
+ processed_directory = Path(self.processing_filename).parent
2565
+ gallery_directory = processed_directory / "gallery"
2566
+ gallery_directory.mkdir(exist_ok=True)
2567
+ filename_png = (
2568
+ gallery_directory / Path(self.processing_filename).with_suffix(".png").name
2569
+ )
2570
+ filename_png = str(filename_png)
2571
+ try:
2572
+ matplotlib.image.imsave(
2573
+ fname=filename_png,
2574
+ arr=array,
2575
+ cmap="viridis",
2576
+ dpi=100,
2577
+ origin="lower",
2578
+ )
2579
+ return filename_png
2580
+ except Exception as e:
2581
+ self.log_error(f"Failed to save image: {e}")
2582
+ return ""