legend-dataflow-scripts 0.2.4__py3-none-any.whl → 0.3.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: legend-dataflow-scripts
3
- Version: 0.2.4
3
+ Version: 0.3.0a2
4
4
  Summary: Python package for the processing scripts for LEGEND-200 data
5
5
  Author-email: George Marshall <ggmarsh@uw.edu>, Luigi Pertoldi <gipert@pm.me>
6
6
  Maintainer: The LEGEND Collaboration
@@ -19,10 +19,10 @@ Requires-Python: >=3.11
19
19
  Description-Content-Type: text/markdown
20
20
  Requires-Dist: colorlog
21
21
  Requires-Dist: dbetto>=1.2.3
22
- Requires-Dist: pygama>=2.1
23
- Requires-Dist: dspeed>=1.6
22
+ Requires-Dist: pygama>=2.3.0a1
23
+ Requires-Dist: dspeed>=2.0
24
24
  Requires-Dist: pylegendmeta>=1.2.5
25
- Requires-Dist: legend-pydataobj>=1.11
25
+ Requires-Dist: legend-pydataobj>=1.16
26
26
  Requires-Dist: pip
27
27
  Provides-Extra: test
28
28
  Requires-Dist: legend-dataflow-scripts; extra == "test"
@@ -1,13 +1,13 @@
1
1
  legenddataflowscripts/__init__.py,sha256=hlpvTxSBjOyXlZUyOyYx3VwT5LS6zNzhAZnTmfT3NjU,303
2
- legenddataflowscripts/_version.py,sha256=NRw4Jle4n9v_DD2wtplRqflGCvX8OU5eAjycYY0vY3Y,704
2
+ legenddataflowscripts/_version.py,sha256=pJ82jXLrlPbvRihDFN5QbVZSY5eoLHiYmzrB4vIro6o,712
3
3
  legenddataflowscripts/par/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  legenddataflowscripts/par/geds/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  legenddataflowscripts/par/geds/dsp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  legenddataflowscripts/par/geds/dsp/dplms.py,sha256=GIMQo_qiI-ga5zsdf3oDnxjDzw5T6gqVCQob-Mf2Pvw,5376
7
- legenddataflowscripts/par/geds/dsp/eopt.py,sha256=epnkSddyzI0sXwPyA8gJxoRumGubv3tQpa2J_uRusV8,13615
8
- legenddataflowscripts/par/geds/dsp/evtsel.py,sha256=tyF5sCovm22w0Rdd9vD8NyzM2ym8AGO8IR9oKQFYAWA,17296
9
- legenddataflowscripts/par/geds/dsp/nopt.py,sha256=l3Z5RqPM4my6xSTG78zmjQIorCwGLMP-ipTbNOauWFY,3977
10
- legenddataflowscripts/par/geds/dsp/pz.py,sha256=l9nc3MmSjghmYOR3LBbbGZMnHc52MFy5F7EOnYAPqvc,8062
7
+ legenddataflowscripts/par/geds/dsp/eopt.py,sha256=yTxzt7gNNRxYz8OvULaLPx59UgTV3NTCiGgstF3myes,13635
8
+ legenddataflowscripts/par/geds/dsp/evtsel.py,sha256=VLkrLGedQUSjFptred7cWZ9L_qNdIU5V4BXF_uA37Gs,17658
9
+ legenddataflowscripts/par/geds/dsp/nopt.py,sha256=uJlQreX7F6vHH69GCMQL_mGyGd-BL376UknX83X8_8M,3971
10
+ legenddataflowscripts/par/geds/dsp/pz.py,sha256=PrXrYv5OO-CMrm6Np9Y-jrXaIIZoiKWvGPqsEgGSTSc,8056
11
11
  legenddataflowscripts/par/geds/dsp/svm.py,sha256=eDneRB_PQZp8Q4n2VheTX3kbu4ufZQ-jnuCCjvtwFpk,826
12
12
  legenddataflowscripts/par/geds/dsp/svm_build.py,sha256=w5-vT6rXmpl7V9rdkfc7_g6GTzn86i41tHkIT-3f5YI,1931
13
13
  legenddataflowscripts/par/geds/hit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -16,13 +16,13 @@ legenddataflowscripts/par/geds/hit/ecal.py,sha256=Ki1YGzFYMcQhh2S2VgNs1ApkMjJjGV
16
16
  legenddataflowscripts/par/geds/hit/lq.py,sha256=aY2tRwbTI9U7b5bxwDohQitY-Tuhe1tXdAA5dqeaznc,11264
17
17
  legenddataflowscripts/par/geds/hit/qc.py,sha256=j3oWN28zfGrR_WMMZMGWJ46dpegKf38Fq8J3zH815sU,12606
18
18
  legenddataflowscripts/tier/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- legenddataflowscripts/tier/dsp.py,sha256=J_ABEqbQurZHJOg8LV2aporNjZQpPg2bDZG6DkmuAL4,8633
19
+ legenddataflowscripts/tier/dsp.py,sha256=SM5OVYunITFWQSkMGp5ydDnQhDtCBVTQp7jRN9gjXjY,8910
20
20
  legenddataflowscripts/tier/hit.py,sha256=-LCfsqYAYPAuN2OKbPuh4G2P5B4uA4YmQGcqUtbZxis,4989
21
21
  legenddataflowscripts/utils/__init__.py,sha256=NPpxqapio8CeQIbG8gYnz-OPoUbOlDT6tjXV-lJrKWc,426
22
22
  legenddataflowscripts/utils/alias_table.py,sha256=Im1jYnyt5pWEh8jvF_PxSi6Ur3c0E7nM62MMC-c01zE,1620
23
23
  legenddataflowscripts/utils/cfgtools.py,sha256=_1yxw_eJ08AihONhJ9sWet5HQZpOagj8Yg8y9LS3zX4,381
24
24
  legenddataflowscripts/utils/convert_np.py,sha256=qw-pX9345lhzLAwERLjEJtnikzAWWIeD9lyDre2n9P0,853
25
- legenddataflowscripts/utils/log.py,sha256=RnwUSgSxH7uK_I26rARwnIE_2R0vEvix8Nv_WjmxOGA,2437
25
+ legenddataflowscripts/utils/log.py,sha256=gHG-0K0I1p-5iB6JSvCkvM8_M0a91GsKW7XfSy4bmho,4028
26
26
  legenddataflowscripts/utils/plot_dict.py,sha256=6f2ZB8J1GNAGfldQjgl1gkKMDcqPo1W7Y2Zhxm9NaGI,472
27
27
  legenddataflowscripts/utils/pulser_removal.py,sha256=kuARdp1jf-lsUWcb0_KRDp-ZXzkHNrDCXUc3h7TJm7Q,424
28
28
  legenddataflowscripts/workflow/__init__.py,sha256=JhudKYhBT8bXtX4LCqxQCHzUiITpugAtFxePWEtphC4,474
@@ -30,8 +30,8 @@ legenddataflowscripts/workflow/execenv.py,sha256=qTG4N9ovEPxA0QtqG0wWUIuK50BZIcY
30
30
  legenddataflowscripts/workflow/filedb.py,sha256=6scz8DjdvbXs8OXOy2-6BJWVSALQzmy5cLLn-OsdXsU,3656
31
31
  legenddataflowscripts/workflow/pre_compile_catalog.py,sha256=cEK0KXh-ClSE2Bo9MK471o79XG22bMY5r-2tIihtCfk,790
32
32
  legenddataflowscripts/workflow/utils.py,sha256=VVCsj7wNaqV6sw2Xnk_xykhVv3BKTX4hqQtKE4UUayg,3170
33
- legend_dataflow_scripts-0.2.4.dist-info/METADATA,sha256=vQ9uxbFCQUTNbkVqZlpOVWKOnKxnKj8Ew-b8e6Zr4O0,3085
34
- legend_dataflow_scripts-0.2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
- legend_dataflow_scripts-0.2.4.dist-info/entry_points.txt,sha256=B197waSm-orA_ZS-9rkxNDsmOHdCn8CbWodnlqXQKRg,1313
36
- legend_dataflow_scripts-0.2.4.dist-info/top_level.txt,sha256=s8E2chjJNYUbrN6whFG_VCsJKySFp1IOXLcUefA7DB0,22
37
- legend_dataflow_scripts-0.2.4.dist-info/RECORD,,
33
+ legend_dataflow_scripts-0.3.0a2.dist-info/METADATA,sha256=JsHhX3p6jO9vLRUTG-4v4tXtYrxRljBTH2LHVCjrZjc,3091
34
+ legend_dataflow_scripts-0.3.0a2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
+ legend_dataflow_scripts-0.3.0a2.dist-info/entry_points.txt,sha256=B197waSm-orA_ZS-9rkxNDsmOHdCn8CbWodnlqXQKRg,1313
36
+ legend_dataflow_scripts-0.3.0a2.dist-info/top_level.txt,sha256=s8E2chjJNYUbrN6whFG_VCsJKySFp1IOXLcUefA7DB0,22
37
+ legend_dataflow_scripts-0.3.0a2.dist-info/RECORD,,
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.2.4'
32
- __version_tuple__ = version_tuple = (0, 2, 4)
31
+ __version__ = version = '0.3.0a2'
32
+ __version_tuple__ = version_tuple = (0, 3, 0, 'a2')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -10,13 +10,13 @@ import numpy as np
10
10
  import pygama.pargen.energy_optimisation as om # noqa: F401
11
11
  import sklearn.gaussian_process.kernels as ker
12
12
  from dbetto.catalog import Props
13
+ from dspeed import build_dsp
13
14
  from dspeed.units import unit_registry as ureg
14
15
  from lgdo import lh5
15
16
  from pygama.math.distributions import hpge_peak
16
17
  from pygama.pargen.dsp_optimize import (
17
18
  BayesianOptimizer,
18
19
  run_bayesian_optimisation,
19
- run_one_dsp,
20
20
  )
21
21
 
22
22
  from ....utils import build_log
@@ -127,7 +127,7 @@ def par_geds_dsp_eopt() -> None:
127
127
 
128
128
  dsp_config["outputs"] = ["tp_99", "tp_0_est", "dt_eff"]
129
129
 
130
- init_data = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0)
130
+ init_data = build_dsp(raw_in=tb_data, dsp_config=dsp_config, database=db_dict)
131
131
  full_dt = (init_data["tp_99"].nda - init_data["tp_0_est"].nda)[idx_list[-1]]
132
132
  flat_val = np.ceil(1.1 * np.nanpercentile(full_dt, 99) / 100) / 10
133
133
 
@@ -193,7 +193,7 @@ def par_geds_dsp_eopt() -> None:
193
193
  msg = f"Initialising values {i + 1} : {db_dict}"
194
194
  log.info(msg)
195
195
 
196
- tb_out = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0)
196
+ tb_out = build_dsp(raw_in=tb_data, dsp_config=dsp_config, database=db_dict)
197
197
 
198
198
  res = fom(tb_out, kwarg_dict[0])
199
199
  results_cusp.append(res)
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import argparse
4
4
  import json
5
+ import sys
5
6
  import time
6
7
  import warnings
7
8
  from bisect import bisect_left
@@ -12,9 +13,9 @@ import numpy as np
12
13
  import pygama.math.histogram as pgh
13
14
  import pygama.pargen.energy_cal as pgc
14
15
  from dbetto.catalog import Props
16
+ from dspeed import build_dsp
15
17
  from lgdo import lh5
16
18
  from pygama.pargen.data_cleaning import generate_cuts, get_keys
17
- from pygama.pargen.dsp_optimize import run_one_dsp
18
19
 
19
20
  from ....utils import build_log, get_pulser_mask
20
21
 
@@ -99,6 +100,12 @@ def par_geds_dsp_evtsel() -> None:
99
100
  required=False,
100
101
  )
101
102
 
103
+ argparser.add_argument(
104
+ "--channel",
105
+ type=str,
106
+ help="Channel to process; required if --raw-cal-curve is set",
107
+ required="--raw-cal-curve" in sys.argv,
108
+ )
102
109
  argparser.add_argument("--log", help="log_file", type=str)
103
110
 
104
111
  argparser.add_argument(
@@ -247,7 +254,7 @@ def par_geds_dsp_evtsel() -> None:
247
254
  ]
248
255
 
249
256
  log.debug("Processing data")
250
- tb_data = run_one_dsp(input_data, dsp_config, db_dict=db_dict)
257
+ tb_data = build_dsp(raw_in=input_data, dsp_config=dsp_config, database=db_dict)
251
258
 
252
259
  if cut_parameters is not None:
253
260
  cut_dict = generate_cuts(tb_data, cut_parameters)
@@ -297,8 +304,10 @@ def par_geds_dsp_evtsel() -> None:
297
304
  peak_dict["obj_buf_start"] += n_rows_read_i
298
305
  if peak_dict["n_rows_read"] >= 10000 or file == raw_files[-1]:
299
306
  if "e_lower_lim" not in peak_dict:
300
- tb_out = run_one_dsp(
301
- peak_dict["obj_buf"], dsp_config, db_dict=db_dict
307
+ tb_out = build_dsp(
308
+ raw_in=peak_dict["obj_buf"],
309
+ dsp_config=dsp_config,
310
+ database=db_dict,
302
311
  )
303
312
  energy = tb_out[energy_parameter].nda
304
313
 
@@ -396,8 +405,10 @@ def par_geds_dsp_evtsel() -> None:
396
405
  peak_dict["obj_buf"] is not None
397
406
  and len(peak_dict["obj_buf"]) > 0
398
407
  ):
399
- tb_out = run_one_dsp(
400
- peak_dict["obj_buf"], dsp_config, db_dict=db_dict
408
+ tb_out = build_dsp(
409
+ raw_in=peak_dict["obj_buf"],
410
+ dsp_config=dsp_config,
411
+ database=db_dict,
401
412
  )
402
413
  out_tbl, n_wfs = get_out_data(
403
414
  peak_dict["obj_buf"],
@@ -8,9 +8,9 @@ from pathlib import Path
8
8
  import numpy as np
9
9
  import pygama.pargen.noise_optimization as pno
10
10
  from dbetto.catalog import Props
11
+ from dspeed import build_dsp
11
12
  from lgdo import lh5
12
13
  from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
13
- from pygama.pargen.dsp_optimize import run_one_dsp
14
14
 
15
15
  from ....utils import build_log
16
16
 
@@ -73,7 +73,7 @@ def par_geds_dsp_nopt() -> None:
73
73
 
74
74
  msg = f"Select baselines {len(tb_data)}"
75
75
  log.info(msg)
76
- dsp_data = run_one_dsp(tb_data, dsp_config)
76
+ dsp_data = build_dsp(raw_in=tb_data, dsp_config=dsp_config)
77
77
  cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars"))
78
78
  cut_idxs = get_cut_indexes(dsp_data, cut_dict)
79
79
  tb_data = lh5.read(
@@ -7,9 +7,9 @@ from pathlib import Path
7
7
 
8
8
  import numpy as np
9
9
  from dbetto.catalog import Props
10
+ from dspeed import build_dsp
10
11
  from lgdo import lh5
11
12
  from pygama.pargen.data_cleaning import get_cut_indexes
12
- from pygama.pargen.dsp_optimize import run_one_dsp
13
13
  from pygama.pargen.pz_correct import PZCorrect
14
14
 
15
15
  from ....utils import (
@@ -134,7 +134,7 @@ def par_geds_dsp_pz() -> None:
134
134
  if "frac" in dsp_config["outputs"]:
135
135
  dsp_config_optimise_removed["outputs"].remove("frac")
136
136
 
137
- tb_out = run_one_dsp(tb_data, dsp_config_optimise_removed)
137
+ tb_out = build_dsp(raw_in=tb_data, dsp_config=dsp_config_optimise_removed)
138
138
  log.debug("Processed Data")
139
139
  cut_parameters = kwarg_dict.get("cut_parameters", None)
140
140
  if cut_parameters is not None:
@@ -95,6 +95,8 @@ def build_tier_dsp() -> None:
95
95
  # check if the raw tables are all existing
96
96
  if len(lh5.ls(args.input, input_tbl_name)) > 0:
97
97
  dsp_cfg_tbl_dict[input_tbl_name] = Props.read_from(file)
98
+ msg = f"found table {input_tbl_name} in {args.input}"
99
+ log.debug(msg)
98
100
  else:
99
101
  msg = f"table {input_tbl_name} not found in {args.input} skipping"
100
102
  log.info(msg)
@@ -145,8 +147,10 @@ def build_tier_dsp() -> None:
145
147
  process_kwargs_list = []
146
148
  for i, config in enumerate(chan_configs):
147
149
  kwargs = {
148
- "f_raw": args.input,
149
- "f_dsp": dsp_files[i],
150
+ "raw_in": args.input,
151
+ "dsp_out": dsp_files[i],
152
+ "lh5_tables": list(config.keys()),
153
+ "base_group": "",
150
154
  "chan_config": config,
151
155
  "database": database_dict,
152
156
  "write_mode": "r",
@@ -171,8 +175,10 @@ def build_tier_dsp() -> None:
171
175
 
172
176
  else:
173
177
  build_dsp(
174
- args.input,
175
- args.output,
178
+ raw_in=args.input,
179
+ dsp_out=args.output,
180
+ lh5_tables=list(dsp_cfg_tbl_dict.keys()),
181
+ base_group="",
176
182
  database=database_dict,
177
183
  chan_config=dsp_cfg_tbl_dict,
178
184
  write_mode="r",
@@ -26,7 +26,7 @@ class StreamToLogger:
26
26
 
27
27
 
28
28
  def build_log(
29
- config_dict: dict, log_file: str | None = None, fallback: str = "prod"
29
+ config_dict: dict | str, log_file: str | None = None, fallback: str = "prod"
30
30
  ) -> logging.Logger:
31
31
  """Build a logger from a configuration dictionary.
32
32
 
@@ -39,10 +39,22 @@ def build_log(
39
39
  log_file
40
40
  The path to the log file.
41
41
  """
42
- if isinstance(config_dict, str | dict):
42
+ # Accept either:
43
+ # - a str pointing to a logging properties file
44
+ # - a plain logging dict (handlers/formatters/etc.)
45
+ # - a dict already containing "options" -> {"logging": ...}
46
+ # If a dict is provided and it already contains an "options" key, assume
47
+ # caller set options explicitly (so we must not wrap it).
48
+ if isinstance(config_dict, str) or (
49
+ isinstance(config_dict, dict) and "options" not in config_dict
50
+ ):
43
51
  config_dict = {"options": {"logging": config_dict}}
44
52
 
45
- if "logging" in config_dict["options"]:
53
+ if (
54
+ isinstance(config_dict, dict)
55
+ and "options" in config_dict
56
+ and "logging" in config_dict["options"]
57
+ ):
46
58
  log_config = config_dict["options"]["logging"]
47
59
  # if it's a str, interpret it as a path to a file
48
60
  if isinstance(log_config, str):
@@ -50,7 +62,29 @@ def build_log(
50
62
 
51
63
  if log_file is not None:
52
64
  Path(log_file).parent.mkdir(parents=True, exist_ok=True)
53
- log_config["handlers"]["dataflow"]["filename"] = log_file
65
+ # Ensure the logging config has a handlers->dataflow entry; create
66
+ # minimal structure if needed so we can set the filename.
67
+ if isinstance(log_config, dict):
68
+ handlers = log_config.setdefault("handlers", {})
69
+ dataflow = handlers.setdefault("dataflow", {})
70
+ # Set the filename for the dataflow handler
71
+ dataflow["filename"] = log_file
72
+ dataflow.setdefault("class", "logging.FileHandler")
73
+ dataflow.setdefault("level", "INFO")
74
+ log_config.setdefault("version", 1)
75
+ if (
76
+ "handlers" in log_config
77
+ and "dataflow" in log_config["handlers"]
78
+ and "root" not in log_config
79
+ and "loggers" not in log_config
80
+ ):
81
+ dataflow_level = log_config["handlers"]["dataflow"].get(
82
+ "level", "INFO"
83
+ )
84
+ log_config["root"] = {
85
+ "level": dataflow_level,
86
+ "handlers": ["dataflow"],
87
+ }
54
88
 
55
89
  dictConfig(log_config)
56
90
  log = logging.getLogger(config_dict["options"].get("logger", "prod"))