legend-dataflow-scripts 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. legend_dataflow_scripts-0.1.0.dist-info/METADATA +57 -0
  2. legend_dataflow_scripts-0.1.0.dist-info/RECORD +36 -0
  3. legend_dataflow_scripts-0.1.0.dist-info/WHEEL +5 -0
  4. legend_dataflow_scripts-0.1.0.dist-info/entry_points.txt +18 -0
  5. legend_dataflow_scripts-0.1.0.dist-info/top_level.txt +1 -0
  6. legenddataflowscripts/__init__.py +17 -0
  7. legenddataflowscripts/_version.py +21 -0
  8. legenddataflowscripts/par/__init__.py +0 -0
  9. legenddataflowscripts/par/geds/__init__.py +0 -0
  10. legenddataflowscripts/par/geds/dsp/__init__.py +0 -0
  11. legenddataflowscripts/par/geds/dsp/dplms.py +145 -0
  12. legenddataflowscripts/par/geds/dsp/eopt.py +398 -0
  13. legenddataflowscripts/par/geds/dsp/evtsel.py +400 -0
  14. legenddataflowscripts/par/geds/dsp/nopt.py +120 -0
  15. legenddataflowscripts/par/geds/dsp/pz.py +217 -0
  16. legenddataflowscripts/par/geds/dsp/svm.py +28 -0
  17. legenddataflowscripts/par/geds/dsp/svm_build.py +69 -0
  18. legenddataflowscripts/par/geds/hit/__init__.py +0 -0
  19. legenddataflowscripts/par/geds/hit/aoe.py +245 -0
  20. legenddataflowscripts/par/geds/hit/ecal.py +778 -0
  21. legenddataflowscripts/par/geds/hit/lq.py +213 -0
  22. legenddataflowscripts/par/geds/hit/qc.py +326 -0
  23. legenddataflowscripts/tier/__init__.py +0 -0
  24. legenddataflowscripts/tier/dsp.py +263 -0
  25. legenddataflowscripts/tier/hit.py +148 -0
  26. legenddataflowscripts/utils/__init__.py +15 -0
  27. legenddataflowscripts/utils/alias_table.py +28 -0
  28. legenddataflowscripts/utils/cfgtools.py +14 -0
  29. legenddataflowscripts/utils/convert_np.py +31 -0
  30. legenddataflowscripts/utils/log.py +77 -0
  31. legenddataflowscripts/utils/pulser_removal.py +16 -0
  32. legenddataflowscripts/workflow/__init__.py +20 -0
  33. legenddataflowscripts/workflow/execenv.py +327 -0
  34. legenddataflowscripts/workflow/filedb.py +107 -0
  35. legenddataflowscripts/workflow/pre_compile_catalog.py +24 -0
  36. legenddataflowscripts/workflow/utils.py +113 -0
@@ -0,0 +1,398 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import pickle as pkl
5
+ import time
6
+ import warnings
7
+ from pathlib import Path
8
+
9
+ import numpy as np
10
+ import pygama.pargen.energy_optimisation as om # noqa: F401
11
+ import sklearn.gaussian_process.kernels as ker
12
+ from dbetto import TextDB
13
+ from dbetto.catalog import Props
14
+ from dspeed.units import unit_registry as ureg
15
+ from lgdo import lh5
16
+ from pygama.math.distributions import hpge_peak
17
+ from pygama.pargen.dsp_optimize import (
18
+ BayesianOptimizer,
19
+ run_bayesian_optimisation,
20
+ run_one_dsp,
21
+ )
22
+
23
+ from ....utils import build_log
24
+
25
+ warnings.filterwarnings(action="ignore", category=RuntimeWarning)
26
+ warnings.filterwarnings(action="ignore", category=np.exceptions.RankWarning)
27
+
28
+
29
+ def par_geds_dsp_eopt() -> None:
30
+ argparser = argparse.ArgumentParser()
31
+
32
+ argparser.add_argument("--peak-file", help="tcm_filelist", type=str, required=True)
33
+ argparser.add_argument("--decay-const", help="decay_const", type=str, required=True)
34
+ argparser.add_argument("--inplots", help="in_plot_path", type=str)
35
+
36
+ argparser.add_argument("--log", help="log_file", type=str)
37
+ argparser.add_argument("--configs", help="configs", type=str, required=True)
38
+
39
+ argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
40
+ argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
41
+ argparser.add_argument("--channel", help="Channel", type=str, required=True)
42
+ argparser.add_argument(
43
+ "--raw-table-name", help="raw table name", type=str, required=True
44
+ )
45
+
46
+ argparser.add_argument(
47
+ "--final-dsp-pars", help="final_dsp_pars", type=str, required=True
48
+ )
49
+ argparser.add_argument("--qbb-grid-path", help="qbb_grid_path", type=str)
50
+ argparser.add_argument("--plot-path", help="plot_path", type=str)
51
+
52
+ argparser.add_argument(
53
+ "--plot-save-path", help="plot_save_path", type=str, required=False
54
+ )
55
+ args = argparser.parse_args()
56
+
57
+ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
58
+ config_dict = configs["snakemake_rules"]["pars_dsp_eopt"]
59
+
60
+ log = build_log(config_dict, args.log)
61
+
62
+ t0 = time.time()
63
+
64
+ dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
65
+ opt_json = config_dict["inputs"]["optimiser_config"][args.channel]
66
+
67
+ opt_dict = Props.read_from(opt_json)
68
+ db_dict = Props.read_from(args.decay_const)
69
+
70
+ if opt_dict.pop("run_eopt") is True:
71
+ peaks_kev = np.array(opt_dict["peaks"])
72
+ kev_widths = [tuple(kev_width) for kev_width in opt_dict["kev_widths"]]
73
+
74
+ kwarg_dicts_cusp = []
75
+ kwarg_dicts_trap = []
76
+ kwarg_dicts_zac = []
77
+ for peak in peaks_kev:
78
+ peak_idx = np.where(peaks_kev == peak)[0][0]
79
+ kev_width = kev_widths[peak_idx]
80
+
81
+ kwarg_dicts_cusp.append(
82
+ {
83
+ "parameter": "cuspEmax",
84
+ "func": hpge_peak,
85
+ "peak": peak,
86
+ "kev_width": kev_width,
87
+ "bin_width": 5,
88
+ }
89
+ )
90
+ kwarg_dicts_zac.append(
91
+ {
92
+ "parameter": "zacEmax",
93
+ "func": hpge_peak,
94
+ "peak": peak,
95
+ "kev_width": kev_width,
96
+ "bin_width": 5,
97
+ }
98
+ )
99
+ kwarg_dicts_trap.append(
100
+ {
101
+ "parameter": "trapEmax",
102
+ "func": hpge_peak,
103
+ "peak": peak,
104
+ "kev_width": kev_width,
105
+ "bin_width": 5,
106
+ }
107
+ )
108
+
109
+ peaks_rounded = [int(peak) for peak in peaks_kev]
110
+ peaks = lh5.read_as(f"{args.raw_table_name}/peak", args.peak_file, library="np")
111
+ ids = np.isin(peaks, peaks_rounded)
112
+ peaks = peaks[ids]
113
+ idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
114
+
115
+ tb_data = lh5.read(args.raw_table_name, args.peak_file, idx=ids)
116
+
117
+ t1 = time.time()
118
+ msg = f"Data Loaded in {(t1 - t0) / 60} minutes"
119
+ log.info(msg)
120
+
121
+ if isinstance(dsp_config, str | list):
122
+ dsp_config = Props.read_from(dsp_config)
123
+
124
+ dsp_config["outputs"] = ["tp_99", "tp_0_est", "dt_eff"]
125
+
126
+ init_data = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0)
127
+ full_dt = (init_data["tp_99"].nda - init_data["tp_0_est"].nda)[idx_list[-1]]
128
+ flat_val = np.ceil(1.1 * np.nanpercentile(full_dt, 99) / 100) / 10
129
+
130
+ if flat_val < 1.0:
131
+ flat_val = 1.0
132
+ elif flat_val > 4:
133
+ flat_val = 4
134
+ flat_val = f"{flat_val}*us"
135
+
136
+ db_dict["cusp"] = {"flat": flat_val}
137
+ db_dict["zac"] = {"flat": flat_val}
138
+ db_dict["etrap"] = {"flat": flat_val}
139
+
140
+ tb_data.add_column("dt_eff", init_data["dt_eff"])
141
+
142
+ dsp_config["processors"].pop("dt_eff")
143
+
144
+ dsp_config["outputs"] = ["zacEmax", "cuspEmax", "trapEmax", "dt_eff"]
145
+
146
+ kwarg_dict = [
147
+ {
148
+ "peak_dicts": kwarg_dicts_cusp,
149
+ "ctc_param": "dt_eff",
150
+ "idx_list": idx_list,
151
+ "peaks_kev": peaks_kev,
152
+ },
153
+ {
154
+ "peak_dicts": kwarg_dicts_zac,
155
+ "ctc_param": "dt_eff",
156
+ "idx_list": idx_list,
157
+ "peaks_kev": peaks_kev,
158
+ },
159
+ {
160
+ "peak_dicts": kwarg_dicts_trap,
161
+ "ctc_param": "dt_eff",
162
+ "idx_list": idx_list,
163
+ "peaks_kev": peaks_kev,
164
+ },
165
+ ]
166
+
167
+ fom = eval(opt_dict["fom"])
168
+ out_field = opt_dict["fom_field"]
169
+ out_err_field = opt_dict["fom_err_field"]
170
+ sample_x = np.array(opt_dict["initial_samples"])
171
+
172
+ results_cusp = []
173
+ results_zac = []
174
+ results_trap = []
175
+
176
+ sample_y_cusp = []
177
+ sample_y_zac = []
178
+ sample_y_trap = []
179
+
180
+ err_y_cusp = []
181
+ err_y_zac = []
182
+ err_y_trap = []
183
+
184
+ for i, x in enumerate(sample_x):
185
+ db_dict["cusp"]["sigma"] = f"{x[0]}*us"
186
+ db_dict["zac"]["sigma"] = f"{x[0]}*us"
187
+ db_dict["etrap"]["rise"] = f"{x[0]}*us"
188
+
189
+ msg = f"Initialising values {i + 1} : {db_dict}"
190
+ log.info(msg)
191
+
192
+ tb_out = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0)
193
+
194
+ res = fom(tb_out, kwarg_dict[0])
195
+ results_cusp.append(res)
196
+ sample_y_cusp.append(res[out_field])
197
+ err_y_cusp.append(res[out_err_field])
198
+
199
+ res = fom(tb_out, kwarg_dict[1])
200
+ results_zac.append(res)
201
+ sample_y_zac.append(res[out_field])
202
+ err_y_zac.append(res[out_err_field])
203
+
204
+ res = fom(tb_out, kwarg_dict[2])
205
+ results_trap.append(res)
206
+ sample_y_trap.append(res[out_field])
207
+ err_y_trap.append(res[out_err_field])
208
+
209
+ msg = f"{i + 1} Finished"
210
+ log.info(msg)
211
+
212
+ if np.isnan(sample_y_cusp).all():
213
+ max_cusp = opt_dict["nan_default"]
214
+ else:
215
+ max_cusp = np.ceil(np.nanmax(sample_y_cusp) * 2)
216
+ if np.isnan(sample_y_zac).all():
217
+ max_zac = opt_dict["nan_default"]
218
+ else:
219
+ max_zac = np.ceil(np.nanmax(sample_y_zac) * 2)
220
+ if np.isnan(sample_y_trap).all():
221
+ max_trap = opt_dict["nan_default"]
222
+ else:
223
+ max_trap = np.ceil(np.nanmax(sample_y_trap) * 2)
224
+
225
+ nan_vals = [max_cusp, max_zac, max_trap]
226
+
227
+ for i in range(len(sample_x)):
228
+ if np.isnan(sample_y_cusp[i]):
229
+ results_cusp[i]["y_val"] = max_cusp
230
+ sample_y_cusp[i] = max_cusp
231
+
232
+ if np.isnan(sample_y_zac[i]):
233
+ results_zac[i]["y_val"] = max_zac
234
+ sample_y_zac[i] = max_zac
235
+
236
+ if np.isnan(sample_y_trap[i]):
237
+ results_trap[i]["y_val"] = max_trap
238
+ sample_y_trap[i] = max_trap
239
+
240
+ kernel = (
241
+ ker.ConstantKernel(2.0, constant_value_bounds="fixed")
242
+ + 1.0 * ker.RBF(1.0, length_scale_bounds=[0.5, 2.5])
243
+ + ker.WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-5, 1e1))
244
+ )
245
+
246
+ lambda_param = 5
247
+ sampling_rate = tb_data["waveform_presummed"]["dt"][0]
248
+ sampling_unit = ureg.Quantity(
249
+ tb_data["waveform_presummed"]["dt"].attrs["units"]
250
+ )
251
+ waveform_sampling = sampling_rate * sampling_unit
252
+
253
+ bopt_cusp = BayesianOptimizer(
254
+ acq_func=opt_dict["acq_func"],
255
+ batch_size=opt_dict["batch_size"],
256
+ kernel=kernel,
257
+ sampling_rate=waveform_sampling,
258
+ fom_value=out_field,
259
+ fom_error=out_err_field,
260
+ )
261
+ bopt_cusp.lambda_param = lambda_param
262
+ bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us")
263
+
264
+ bopt_zac = BayesianOptimizer(
265
+ acq_func=opt_dict["acq_func"],
266
+ batch_size=opt_dict["batch_size"],
267
+ kernel=kernel,
268
+ sampling_rate=waveform_sampling,
269
+ fom_value=out_field,
270
+ fom_error=out_err_field,
271
+ )
272
+ bopt_zac.lambda_param = lambda_param
273
+ bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us")
274
+
275
+ bopt_trap = BayesianOptimizer(
276
+ acq_func=opt_dict["acq_func"],
277
+ batch_size=opt_dict["batch_size"],
278
+ kernel=kernel,
279
+ sampling_rate=waveform_sampling,
280
+ fom_value=out_field,
281
+ fom_error=out_err_field,
282
+ )
283
+ bopt_trap.lambda_param = lambda_param
284
+ bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us")
285
+
286
+ bopt_cusp.add_initial_values(
287
+ x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp
288
+ )
289
+ bopt_zac.add_initial_values(
290
+ x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac
291
+ )
292
+ bopt_trap.add_initial_values(
293
+ x_init=sample_x, y_init=sample_y_trap, yerr_init=err_y_trap
294
+ )
295
+
296
+ best_idx = np.nanargmin(sample_y_cusp)
297
+ bopt_cusp.optimal_results = results_cusp[best_idx]
298
+ bopt_cusp.optimal_x = sample_x[best_idx]
299
+
300
+ best_idx = np.nanargmin(sample_y_zac)
301
+ bopt_zac.optimal_results = results_zac[best_idx]
302
+ bopt_zac.optimal_x = sample_x[best_idx]
303
+
304
+ best_idx = np.nanargmin(sample_y_trap)
305
+ bopt_trap.optimal_results = results_trap[best_idx]
306
+ bopt_trap.optimal_x = sample_x[best_idx]
307
+
308
+ optimisers = [bopt_cusp, bopt_zac, bopt_trap]
309
+
310
+ out_param_dict, out_results_list = run_bayesian_optimisation(
311
+ tb_data,
312
+ dsp_config,
313
+ [fom],
314
+ optimisers,
315
+ fom_kwargs=kwarg_dict,
316
+ db_dict=db_dict,
317
+ nan_val=nan_vals,
318
+ n_iter=opt_dict["n_iter"],
319
+ )
320
+
321
+ Props.add_to(db_dict, out_param_dict)
322
+
323
+ # db_dict.update(out_param_dict)
324
+
325
+ t2 = time.time()
326
+ msg = f"Optimiser finished in {(t2 - t1) / 60} minutes"
327
+ log.info(msg)
328
+
329
+ out_alpha_dict = {}
330
+ out_alpha_dict["cuspEmax_ctc"] = {
331
+ "expression": "cuspEmax*(1+dt_eff*a)",
332
+ "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))},
333
+ }
334
+
335
+ out_alpha_dict["cuspEftp_ctc"] = {
336
+ "expression": "cuspEftp*(1+dt_eff*a)",
337
+ "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))},
338
+ }
339
+
340
+ out_alpha_dict["zacEmax_ctc"] = {
341
+ "expression": "zacEmax*(1+dt_eff*a)",
342
+ "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))},
343
+ }
344
+
345
+ out_alpha_dict["zacEftp_ctc"] = {
346
+ "expression": "zacEftp*(1+dt_eff*a)",
347
+ "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))},
348
+ }
349
+
350
+ out_alpha_dict["trapEmax_ctc"] = {
351
+ "expression": "trapEmax*(1+dt_eff*a)",
352
+ "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))},
353
+ }
354
+
355
+ out_alpha_dict["trapEftp_ctc"] = {
356
+ "expression": "trapEftp*(1+dt_eff*a)",
357
+ "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))},
358
+ }
359
+ if "ctc_params" in db_dict:
360
+ db_dict["ctc_params"].update(out_alpha_dict)
361
+ else:
362
+ db_dict.update({"ctc_params": out_alpha_dict})
363
+
364
+ Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True)
365
+ with Path(args.qbb_grid_path).open("wb") as f:
366
+ pkl.dump(optimisers, f)
367
+
368
+ else:
369
+ Path(args.qbb_grid_path).touch()
370
+
371
+ Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True)
372
+ Props.write_to(args.final_dsp_pars, db_dict)
373
+
374
+ if args.plot_path:
375
+ if args.inplots:
376
+ with Path(args.inplots).open("rb") as r:
377
+ plot_dict = pkl.load(r)
378
+ else:
379
+ plot_dict = {}
380
+
381
+ plot_dict["trap_optimisation"] = {
382
+ "kernel_space": bopt_trap.plot(init_samples=sample_x),
383
+ "acq_space": bopt_trap.plot_acq(init_samples=sample_x),
384
+ }
385
+
386
+ plot_dict["cusp_optimisation"] = {
387
+ "kernel_space": bopt_cusp.plot(init_samples=sample_x),
388
+ "acq_space": bopt_cusp.plot_acq(init_samples=sample_x),
389
+ }
390
+
391
+ plot_dict["zac_optimisation"] = {
392
+ "kernel_space": bopt_zac.plot(init_samples=sample_x),
393
+ "acq_space": bopt_zac.plot_acq(init_samples=sample_x),
394
+ }
395
+
396
+ Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
397
+ with Path(args.plot_path).open("wb") as w:
398
+ pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)