legend-dataflow-scripts 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: legend-dataflow-scripts
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Python package for the processing scripts for LEGEND-200 data
5
5
  Author-email: George Marshall <ggmarsh@uw.edu>, Luigi Pertoldi <gipert@pm.me>
6
6
  Maintainer: The LEGEND Collaboration
@@ -23,7 +23,6 @@ Requires-Dist: pygama>=2.1
23
23
  Requires-Dist: dspeed>=1.6
24
24
  Requires-Dist: pylegendmeta>=1.2.5
25
25
  Requires-Dist: legend-pydataobj>=1.11
26
- Requires-Dist: legend-daq2lh5>=1.6.1
27
26
  Requires-Dist: pip
28
27
  Provides-Extra: test
29
28
  Requires-Dist: legend-dataflow-scripts; extra == "test"
@@ -1,18 +1,18 @@
1
1
  legenddataflowscripts/__init__.py,sha256=hlpvTxSBjOyXlZUyOyYx3VwT5LS6zNzhAZnTmfT3NjU,303
2
- legenddataflowscripts/_version.py,sha256=Dg8AmJomLVpjKL6prJylOONZAPRtB86LOce7dorQS_A,704
2
+ legenddataflowscripts/_version.py,sha256=o3ZTescp-19Z9cvBGq9dQnbppljgzdUYUf98Nov0spY,704
3
3
  legenddataflowscripts/par/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  legenddataflowscripts/par/geds/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  legenddataflowscripts/par/geds/dsp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- legenddataflowscripts/par/geds/dsp/dplms.py,sha256=GjOPuBuLPtzBEkfmLMSkxSSeBBsY7-wy0rY3jLjAJv0,5298
7
- legenddataflowscripts/par/geds/dsp/eopt.py,sha256=MP_prgRL4Aqj_iNIpxHks2v4NY6B_iWh3SgcxQFhixk,13506
8
- legenddataflowscripts/par/geds/dsp/evtsel.py,sha256=E0jJIuqaywIidr4K-PPMFlctZ9oumuSmiX5zKDPHVlA,17144
9
- legenddataflowscripts/par/geds/dsp/nopt.py,sha256=vHwlHbmBlozVHO1u89FwAx8c_kazTsxMqZGpbyjN-1M,3980
10
- legenddataflowscripts/par/geds/dsp/pz.py,sha256=epYHSAqz13QLjxfnm38zH1Caaih5efuwNlgCipHD5Do,8148
6
+ legenddataflowscripts/par/geds/dsp/dplms.py,sha256=GIMQo_qiI-ga5zsdf3oDnxjDzw5T6gqVCQob-Mf2Pvw,5376
7
+ legenddataflowscripts/par/geds/dsp/eopt.py,sha256=epnkSddyzI0sXwPyA8gJxoRumGubv3tQpa2J_uRusV8,13615
8
+ legenddataflowscripts/par/geds/dsp/evtsel.py,sha256=tyF5sCovm22w0Rdd9vD8NyzM2ym8AGO8IR9oKQFYAWA,17296
9
+ legenddataflowscripts/par/geds/dsp/nopt.py,sha256=l3Z5RqPM4my6xSTG78zmjQIorCwGLMP-ipTbNOauWFY,3977
10
+ legenddataflowscripts/par/geds/dsp/pz.py,sha256=l9nc3MmSjghmYOR3LBbbGZMnHc52MFy5F7EOnYAPqvc,8062
11
11
  legenddataflowscripts/par/geds/dsp/svm.py,sha256=eDneRB_PQZp8Q4n2VheTX3kbu4ufZQ-jnuCCjvtwFpk,826
12
12
  legenddataflowscripts/par/geds/dsp/svm_build.py,sha256=w5-vT6rXmpl7V9rdkfc7_g6GTzn86i41tHkIT-3f5YI,1931
13
13
  legenddataflowscripts/par/geds/hit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  legenddataflowscripts/par/geds/hit/aoe.py,sha256=jAH0Rh3JCnV67vhv9xUZtPdGB8ADJXgG9Lo8t8YQjqs,10841
15
- legenddataflowscripts/par/geds/hit/ecal.py,sha256=wg8TGgdYt1Pvh-hpvNX4qliwPvxgSlHBsReQ86rQ3aY,27032
15
+ legenddataflowscripts/par/geds/hit/ecal.py,sha256=j8Z90r9UP5Hn-cawf2lvDL8tF2Uvfp4rDjlM4sGbM-M,27009
16
16
  legenddataflowscripts/par/geds/hit/lq.py,sha256=vXgK83RlJJ4UUjQQJWfmFGIbT0AEP3EaLALM5LhvZ6s,11133
17
17
  legenddataflowscripts/par/geds/hit/qc.py,sha256=vr6j5sRTvjmDmErW2uUteCPsj5qfBrpn7ssvLKzov4M,12408
18
18
  legenddataflowscripts/tier/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -27,11 +27,11 @@ legenddataflowscripts/utils/plot_dict.py,sha256=6f2ZB8J1GNAGfldQjgl1gkKMDcqPo1W7
27
27
  legenddataflowscripts/utils/pulser_removal.py,sha256=kuARdp1jf-lsUWcb0_KRDp-ZXzkHNrDCXUc3h7TJm7Q,424
28
28
  legenddataflowscripts/workflow/__init__.py,sha256=JhudKYhBT8bXtX4LCqxQCHzUiITpugAtFxePWEtphC4,474
29
29
  legenddataflowscripts/workflow/execenv.py,sha256=qTG4N9ovEPxA0QtqG0wWUIuK50BZIcYvpVlpy-XgxPw,9257
30
- legenddataflowscripts/workflow/filedb.py,sha256=rbvOcXUxLbHz177QuDIDAL3aysz-bZDjHOiMsRHssZo,3434
30
+ legenddataflowscripts/workflow/filedb.py,sha256=6scz8DjdvbXs8OXOy2-6BJWVSALQzmy5cLLn-OsdXsU,3656
31
31
  legenddataflowscripts/workflow/pre_compile_catalog.py,sha256=cEK0KXh-ClSE2Bo9MK471o79XG22bMY5r-2tIihtCfk,790
32
- legenddataflowscripts/workflow/utils.py,sha256=eKE8KIG2ffynZt9fTbI1SVQV85i3aW9GFGh1Nio1iDo,3118
33
- legend_dataflow_scripts-0.2.0.dist-info/METADATA,sha256=lTH14CoB_S_IUgAPggbXmI1mN35E0hvEjdK_YGWUpLw,3122
34
- legend_dataflow_scripts-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
- legend_dataflow_scripts-0.2.0.dist-info/entry_points.txt,sha256=B197waSm-orA_ZS-9rkxNDsmOHdCn8CbWodnlqXQKRg,1313
36
- legend_dataflow_scripts-0.2.0.dist-info/top_level.txt,sha256=s8E2chjJNYUbrN6whFG_VCsJKySFp1IOXLcUefA7DB0,22
37
- legend_dataflow_scripts-0.2.0.dist-info/RECORD,,
32
+ legenddataflowscripts/workflow/utils.py,sha256=VVCsj7wNaqV6sw2Xnk_xykhVv3BKTX4hqQtKE4UUayg,3170
33
+ legend_dataflow_scripts-0.2.2.dist-info/METADATA,sha256=9n2aNwe2pg_qGG5v3e7ly_yQNWELnCpLRe3PC2ygsEA,3085
34
+ legend_dataflow_scripts-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
+ legend_dataflow_scripts-0.2.2.dist-info/entry_points.txt,sha256=B197waSm-orA_ZS-9rkxNDsmOHdCn8CbWodnlqXQKRg,1313
36
+ legend_dataflow_scripts-0.2.2.dist-info/top_level.txt,sha256=s8E2chjJNYUbrN6whFG_VCsJKySFp1IOXLcUefA7DB0,22
37
+ legend_dataflow_scripts-0.2.2.dist-info/RECORD,,
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.2.0'
32
- __version_tuple__ = version_tuple = (0, 2, 0)
31
+ __version__ = version = '0.2.2'
32
+ __version_tuple__ = version_tuple = (0, 2, 2)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -37,6 +37,7 @@ def par_geds_dsp_dplms() -> None:
37
37
  "--config-file", help="Config file", type=str, nargs="*", required=True
38
38
  )
39
39
 
40
+ argparser.add_argument("--channel", help="channel", type=str, required=True)
40
41
  argparser.add_argument(
41
42
  "--raw-table-name", help="raw table name", type=str, required=True
42
43
  )
@@ -53,7 +54,7 @@ def par_geds_dsp_dplms() -> None:
53
54
  t0 = time.time()
54
55
 
55
56
  dplms_dict = Props.read_from(args.config_file)
56
- db_dict = Props.read_from(args.decay_const)
57
+ db_dict = Props.read_from(args.database)
57
58
 
58
59
  if dplms_dict["run_dplms"] is True:
59
60
  with Path(args.fft_raw_filelist).open() as f:
@@ -131,10 +131,10 @@ def par_geds_dsp_eopt() -> None:
131
131
  full_dt = (init_data["tp_99"].nda - init_data["tp_0_est"].nda)[idx_list[-1]]
132
132
  flat_val = np.ceil(1.1 * np.nanpercentile(full_dt, 99) / 100) / 10
133
133
 
134
- if flat_val < 1.0:
135
- flat_val = 1.0
136
- elif flat_val > 4:
137
- flat_val = 4
134
+ if flat_val < opt_dict.get("min_flat_value", 1):
135
+ flat_val = opt_dict.get("min_flat_value", 1)
136
+ elif flat_val > opt_dict.get("max_flat_value", 4):
137
+ flat_val = opt_dict.get("max_flat_value", 4)
138
138
  flat_val = f"{flat_val}*us"
139
139
 
140
140
  db_dict["cusp"] = {"flat": flat_val}
@@ -311,7 +311,7 @@ def par_geds_dsp_eopt() -> None:
311
311
 
312
312
  optimisers = [bopt_cusp, bopt_zac, bopt_trap]
313
313
 
314
- out_param_dict, out_results_list = run_bayesian_optimisation(
314
+ out_param_dict, _ = run_bayesian_optimisation(
315
315
  tb_data,
316
316
  dsp_config,
317
317
  [fom],
@@ -82,7 +82,7 @@ def get_out_data(
82
82
 
83
83
  def par_geds_dsp_evtsel() -> None:
84
84
  argparser = argparse.ArgumentParser()
85
- argparser.add_argument("--raw-filelist", help="raw_filelist", type=str)
85
+ argparser.add_argument("--raw-filelist", help="raw_filelist", type=str, nargs="*")
86
86
  argparser.add_argument(
87
87
  "--pulser-file", help="pulser-file", type=str, required=False
88
88
  )
@@ -133,14 +133,19 @@ def par_geds_dsp_evtsel() -> None:
133
133
  db_dict = Props.read_from(args.decay_const)
134
134
 
135
135
  Path(args.peak_file).parent.mkdir(parents=True, exist_ok=True)
136
- rng = np.random.default_rng()
137
- rand_num = f"{rng.integers(0, 99999):05d}"
138
- temp_output = f"{args.peak_file}.{rand_num}"
139
136
  if peak_dict.pop("run_selection") is True:
140
137
  log.debug("Starting peak selection")
141
138
 
142
- with Path(args.raw_filelist).open() as f:
143
- files = f.read().splitlines()
139
+ if (
140
+ isinstance(args.raw_filelist, list)
141
+ and args.raw_filelist[0].split(".")[-1] == "filelist"
142
+ ):
143
+ files = args.raw_filelist[0]
144
+ with Path(files).open() as f:
145
+ files = f.read().splitlines()
146
+ else:
147
+ files = args.raw_filelist
148
+
144
149
  raw_files = sorted(files)
145
150
 
146
151
  peaks_kev = peak_dict["peaks"]
@@ -158,8 +163,10 @@ def par_geds_dsp_evtsel() -> None:
158
163
  if lh5_path[-1] != "/":
159
164
  lh5_path += "/"
160
165
 
166
+ energy_field = peak_dict.get("energy_param", "daqenergy")
167
+
161
168
  tb = lh5.read(
162
- lh5_path, raw_files, field_mask=["daqenergy", "t_sat_lo", "timestamp"]
169
+ lh5_path, raw_files, field_mask=[energy_field, "t_sat_lo", "timestamp"]
163
170
  )
164
171
 
165
172
  if args.no_pulse is False:
@@ -187,14 +194,14 @@ def par_geds_dsp_evtsel() -> None:
187
194
  "operations"
188
195
  ]
189
196
  else:
190
- E_uncal = tb.daqenergy.nda
197
+ E_uncal = tb[energy_field].nda
191
198
  E_uncal = E_uncal[E_uncal > 200]
192
199
  guess_keV = 2620 / np.nanpercentile(E_uncal, 99) # usual simple guess
193
200
 
194
201
  # daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins)
195
202
 
196
203
  hpge_cal = pgc.HPGeCalibration(
197
- "daqenergy",
204
+ energy_field,
198
205
  peaks_kev,
199
206
  guess_keV,
200
207
  0,
@@ -205,7 +212,7 @@ def par_geds_dsp_evtsel() -> None:
205
212
  roughpars = hpge_cal.pars
206
213
  raw_dict = {
207
214
  "daqenergy_cal": {
208
- "expression": "daqenergy*a",
215
+ "expression": f"{energy_field}*a",
209
216
  "parameters": {"a": round(float(roughpars[1]), 5)},
210
217
  }
211
218
  }
@@ -377,7 +384,7 @@ def par_geds_dsp_evtsel() -> None:
377
384
  lh5.write(
378
385
  out_tbl,
379
386
  name=lh5_path,
380
- lh5_file=temp_output,
387
+ lh5_file=args.peak_file,
381
388
  wo_mode="a",
382
389
  )
383
390
  peak_dict["obj_buf"] = None
@@ -408,7 +415,7 @@ def par_geds_dsp_evtsel() -> None:
408
415
  lh5.write(
409
416
  out_tbl,
410
417
  name=lh5_path,
411
- lh5_file=temp_output,
418
+ lh5_file=args.peak_file,
412
419
  wo_mode="a",
413
420
  )
414
421
  peak_dict["obj_buf"] = None
@@ -423,7 +430,6 @@ def par_geds_dsp_evtsel() -> None:
423
430
  log.debug(msg)
424
431
 
425
432
  else:
426
- Path(temp_output).touch()
433
+ Path(args.peak_file).touch()
427
434
  msg = f"event selection completed in {time.time() - t0} seconds"
428
435
  log.debug(msg)
429
- Path(temp_output).rename(args.peak_file)
@@ -52,7 +52,7 @@ def par_geds_dsp_nopt() -> None:
52
52
  t0 = time.time()
53
53
 
54
54
  opt_dict = Props.read_from(args.config_file)
55
- db_dict = Props.read_from(args.decay_const)
55
+ db_dict = Props.read_from(args.database)
56
56
 
57
57
  if opt_dict.pop("run_nopt") is True:
58
58
  with Path(args.raw_filelist).open() as f:
@@ -21,7 +21,6 @@ from ....utils import (
21
21
 
22
22
  def par_geds_dsp_pz() -> None:
23
23
  argparser = argparse.ArgumentParser()
24
- argparser.add_argument("--configs", help="configs path", type=str, required=True)
25
24
  argparser.add_argument("--log", help="log file", type=str)
26
25
  argparser.add_argument(
27
26
  "-p", "--no-pulse", help="no pulser present", action="store_true"
@@ -71,7 +71,7 @@ def plot_2614_timemap(
71
71
  norm=LogNorm(),
72
72
  )
73
73
 
74
- ticks, labels = plt.xticks()
74
+ ticks, _ = plt.xticks()
75
75
  plt.xlabel(
76
76
  f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}"
77
77
  )
@@ -125,7 +125,7 @@ def plot_pulser_timemap(
125
125
  norm=LogNorm(),
126
126
  )
127
127
  plt.ylim([mean - n_spread * spread, mean + n_spread * spread])
128
- ticks, labels = plt.xticks()
128
+ ticks, _ = plt.xticks()
129
129
  plt.xlabel(
130
130
  f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}"
131
131
  )
@@ -264,7 +264,7 @@ def bin_survival_fraction(
264
264
  data.query(selection_string)[cal_energy_param],
265
265
  bins=np.arange(erange[0], erange[1] + dx, dx),
266
266
  )
267
- counts_fail, bins_fail, _ = pgh.get_hist(
267
+ counts_fail, _, _ = pgh.get_hist(
268
268
  data.query(f"(~{cut_field})&(~{pulser_field})")[cal_energy_param],
269
269
  bins=np.arange(erange[0], erange[1] + dx, dx),
270
270
  )
@@ -303,7 +303,7 @@ def plot_baseline_timemap(
303
303
  norm=LogNorm(),
304
304
  )
305
305
 
306
- ticks, labels = plt.xticks()
306
+ ticks, _ = plt.xticks()
307
307
  plt.xlabel(
308
308
  f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}"
309
309
  )
@@ -42,6 +42,11 @@ def build_filedb() -> None:
42
42
  ignore = []
43
43
 
44
44
  fdb = FileDB(config, scan=False)
45
+ try:
46
+ fdb.scan_files([args.scan_path])
47
+ except Exception as e:
48
+ msg = f"error when building {args.output} from {args.scan_path}"
49
+ raise RuntimeError(msg) from e
45
50
  fdb.scan_files([args.scan_path])
46
51
  fdb.scan_tables_columns(dir_files_conform=True)
47
52
 
@@ -86,7 +91,7 @@ def build_filedb() -> None:
86
91
  if (
87
92
  (loc_timestamps == default).all() or not found
88
93
  ) and row.raw_file not in ignore:
89
- msg = "something went wrong! no valid first timestamp found. Likely: the file is empty"
94
+ msg = "something went wrong! no valid first timestamp found. Likely: the file {row.raw_file} is empty"
90
95
  raise RuntimeError(msg)
91
96
 
92
97
  timestamps[i] = np.min(loc_timestamps)
@@ -97,7 +102,7 @@ def build_filedb() -> None:
97
102
  if (
98
103
  timestamps[i] < 0 or timestamps[i] > 4102444800
99
104
  ) and row.raw_file not in ignore:
100
- msg = f"something went wrong! timestamp {timestamps[i]} does not make sense"
105
+ msg = f"something went wrong! timestamp {timestamps[i]} does not make sense in {row.raw_file}"
101
106
  raise RuntimeError(msg)
102
107
 
103
108
  fdb.df["first_timestamp"] = timestamps
@@ -49,7 +49,8 @@ def subst_vars(
49
49
  if use_env:
50
50
  combined_var_values = dict(iter(os.environ.items()))
51
51
  combined_var_values.update(copy.copy(var_values))
52
- subst_vars_impl(props, combined_var_values, ignore_missing)
52
+
53
+ return subst_vars_impl(props, combined_var_values, ignore_missing)
53
54
 
54
55
 
55
56
  def subst_vars_in_snakemake_config(workflow, config):
@@ -65,10 +66,11 @@ def subst_vars_in_snakemake_config(workflow, config):
65
66
  use_env=True,
66
67
  ignore_missing=False,
67
68
  )
68
- if "system" in config:
69
- config["execenv"] = config["execenv"][config["system"]]
70
- else:
71
- config["execenv"] = config["execenv"]["bare"]
69
+ if "execenv" in config:
70
+ if "system" in config:
71
+ config["execenv"] = config["execenv"][config["system"]]
72
+ else:
73
+ config["execenv"] = config["execenv"]["bare"]
72
74
 
73
75
 
74
76
  def set_last_rule_name(workflow, new_name):