lstosa 0.10.14__py3-none-any.whl → 0.10.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lstosa-0.10.14.dist-info → lstosa-0.10.16.dist-info}/METADATA +1 -1
- {lstosa-0.10.14.dist-info → lstosa-0.10.16.dist-info}/RECORD +17 -16
- {lstosa-0.10.14.dist-info → lstosa-0.10.16.dist-info}/WHEEL +1 -1
- {lstosa-0.10.14.dist-info → lstosa-0.10.16.dist-info}/entry_points.txt +1 -0
- osa/_version.py +2 -2
- osa/configs/sequencer.cfg +5 -1
- osa/job.py +17 -2
- osa/scripts/gain_selection.py +367 -186
- osa/scripts/gainsel_webmaker.py +157 -0
- osa/scripts/sequencer.py +71 -3
- osa/scripts/sequencer_webmaker.py +4 -4
- osa/scripts/tests/test_osa_scripts.py +27 -0
- osa/tests/test_jobs.py +9 -3
- osa/utils/cliopts.py +8 -0
- osa/workflow/stages.py +13 -6
- {lstosa-0.10.14.dist-info → lstosa-0.10.16.dist-info}/LICENSE +0 -0
- {lstosa-0.10.14.dist-info → lstosa-0.10.16.dist-info}/top_level.txt +0 -0
osa/scripts/gain_selection.py
CHANGED
|
@@ -1,24 +1,28 @@
|
|
|
1
1
|
"""Script to run the gain selection over a list of dates."""
|
|
2
2
|
import logging
|
|
3
3
|
import re
|
|
4
|
-
import shutil
|
|
5
4
|
import glob
|
|
6
5
|
import pandas as pd
|
|
7
6
|
import subprocess as sp
|
|
8
7
|
from pathlib import Path
|
|
9
8
|
from textwrap import dedent
|
|
10
|
-
from io import StringIO
|
|
11
9
|
import argparse
|
|
10
|
+
import sys
|
|
12
11
|
|
|
13
12
|
from astropy.table import Table
|
|
14
|
-
from lstchain.paths import
|
|
13
|
+
from lstchain.paths import parse_r0_filename
|
|
14
|
+
from datetime import datetime
|
|
15
15
|
|
|
16
16
|
from osa.scripts.reprocessing import get_list_of_dates, check_job_status_and_wait
|
|
17
|
-
from osa.utils.utils import wait_for_daytime
|
|
17
|
+
from osa.utils.utils import wait_for_daytime, date_to_dir, date_to_iso
|
|
18
18
|
from osa.utils.logging import myLogger
|
|
19
|
-
from osa.
|
|
19
|
+
from osa.utils.iofile import append_to_file
|
|
20
|
+
from osa.utils.cliopts import valid_date
|
|
21
|
+
from osa.job import get_sacct_output, run_sacct, job_finished_in_timeout
|
|
20
22
|
from osa.configs.config import cfg
|
|
21
23
|
from osa.paths import DEFAULT_CFG
|
|
24
|
+
from osa.nightsummary.nightsummary import run_summary_table
|
|
25
|
+
|
|
22
26
|
|
|
23
27
|
log = myLogger(logging.getLogger(__name__))
|
|
24
28
|
|
|
@@ -49,23 +53,16 @@ parser.add_argument(
|
|
|
49
53
|
"-d",
|
|
50
54
|
"--date",
|
|
51
55
|
default=None,
|
|
52
|
-
type=
|
|
53
|
-
help="Night to apply the gain selection in
|
|
56
|
+
type=valid_date,
|
|
57
|
+
help="Night to apply the gain selection in YYYY-MM-DD format",
|
|
54
58
|
)
|
|
55
59
|
parser.add_argument(
|
|
56
60
|
"-l",
|
|
57
61
|
"--dates-file",
|
|
58
62
|
default=None,
|
|
59
63
|
help="List of dates to apply the gain selection. The input file should list"
|
|
60
|
-
"the dates in the format
|
|
61
|
-
)
|
|
62
|
-
parser.add_argument(
|
|
63
|
-
"-o",
|
|
64
|
-
"--output-basedir",
|
|
65
|
-
type=Path,
|
|
66
|
-
default=Path("/fefs/aswg/data/real/R0G"),
|
|
67
|
-
help="Output directory of the gain selected files. Default is /fefs/aswg/data/real/R0G."
|
|
68
|
-
)
|
|
64
|
+
"the dates in the format YYYY-MM-DD, one date per line.",
|
|
65
|
+
)
|
|
69
66
|
parser.add_argument(
|
|
70
67
|
"-s",
|
|
71
68
|
"--start-time",
|
|
@@ -86,67 +83,205 @@ parser.add_argument(
|
|
|
86
83
|
type=str,
|
|
87
84
|
default=None,
|
|
88
85
|
help="Choose tool to apply the gain selection regardless the date. Possible options are: lst_dvr (by default used for dates "
|
|
89
|
-
"previous to
|
|
86
|
+
"previous to 2023-12-05) and lstchain_r0_to_r0g (by default used for dates later than 2023-12-05).",
|
|
87
|
+
)
|
|
88
|
+
parser.add_argument(
|
|
89
|
+
"--simulate",
|
|
90
|
+
action="store_true",
|
|
91
|
+
default=False,
|
|
92
|
+
help="Simulate launching of the gain selection script. Dry run.",
|
|
93
|
+
)
|
|
94
|
+
parser.add_argument(
|
|
95
|
+
"-v",
|
|
96
|
+
"--verbose",
|
|
97
|
+
action="store_true",
|
|
98
|
+
default=False,
|
|
99
|
+
help="Activate debugging mode.",
|
|
90
100
|
)
|
|
91
101
|
|
|
92
102
|
def get_sbatch_script(
|
|
93
|
-
run_id
|
|
103
|
+
run_id: str,
|
|
104
|
+
subrun: str,
|
|
105
|
+
input_file: Path,
|
|
106
|
+
output_dir: Path,
|
|
107
|
+
log_dir: Path,
|
|
108
|
+
log_file: Path,
|
|
109
|
+
ref_time: int,
|
|
110
|
+
ref_counter: int,
|
|
111
|
+
module: int,
|
|
112
|
+
ref_source: str,
|
|
113
|
+
tool: str
|
|
94
114
|
):
|
|
95
115
|
"""Build the sbatch job pilot script for running the gain selection."""
|
|
96
|
-
|
|
97
|
-
|
|
116
|
+
mem_per_job = cfg.get("SLURM", "MEMSIZE_GAINSEL")
|
|
117
|
+
sbatch_script = dedent(
|
|
98
118
|
f"""\
|
|
99
119
|
#!/bin/bash
|
|
100
120
|
|
|
101
121
|
#SBATCH -D {log_dir}
|
|
102
122
|
#SBATCH -o "gain_selection_{run_id:05d}_{subrun:04d}_%j.log"
|
|
103
123
|
#SBATCH --job-name "gain_selection_{run_id:05d}"
|
|
104
|
-
#SBATCH --export {PATH}
|
|
105
124
|
#SBATCH --partition=short,long
|
|
125
|
+
#SBATCH --mem={mem_per_job}
|
|
126
|
+
"""
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if tool == "lst_dvr":
|
|
130
|
+
sbatch_script += dedent(
|
|
131
|
+
f"""
|
|
132
|
+
#SBATCH --export {PATH}
|
|
106
133
|
|
|
107
134
|
lst_dvr {input_file} {output_dir} {ref_time} {ref_counter} {module} {ref_source}
|
|
108
135
|
"""
|
|
109
136
|
)
|
|
137
|
+
|
|
110
138
|
elif tool == "lstchain_r0_to_r0g":
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
139
|
+
cmd = f"lstchain_r0_to_r0g --R0-file={input_file} --output-dir={output_dir} --log={log_file}"
|
|
140
|
+
if not cfg.getboolean("lstchain", "use_ff_heuristic_gain_selection"):
|
|
141
|
+
cmd += " --no-flatfield-heuristic"
|
|
142
|
+
sbatch_script += dedent(cmd)
|
|
114
143
|
|
|
115
|
-
|
|
116
|
-
#SBATCH -o "gain_selection_{run_id:05d}_{subrun:04d}_%j.log"
|
|
117
|
-
#SBATCH --job-name "gain_selection_{run_id:05d}"
|
|
118
|
-
#SBATCH --mem=40GB
|
|
119
|
-
#SBATCH --partition=short,long
|
|
144
|
+
return sbatch_script
|
|
120
145
|
|
|
121
|
-
lstchain_r0_to_r0g --R0-file={input_file} --output-dir={output_dir} --log={log_file} --no-flatfield-heuristic
|
|
122
|
-
"""
|
|
123
|
-
)
|
|
124
146
|
|
|
125
|
-
def
|
|
147
|
+
def launch_gainsel_for_data_run(
|
|
148
|
+
date: datetime, run: Table, output_dir: Path, r0_dir: Path, log_dir: Path, tool: str, simulate: bool = False
|
|
149
|
+
):
|
|
150
|
+
"""
|
|
151
|
+
Create the gain selection sbatch script and launch it for a given run.
|
|
152
|
+
|
|
153
|
+
Runs from before 20231205 without UCTS or TIB info are directly copied to the final directory.
|
|
154
|
+
Subruns that do not have four streams are also directly copied.
|
|
155
|
+
"""
|
|
156
|
+
run_id = run["run_id"]
|
|
157
|
+
ref_time = run["dragon_reference_time"]
|
|
158
|
+
ref_counter = run["dragon_reference_counter"]
|
|
159
|
+
module = run["dragon_reference_module_index"]
|
|
160
|
+
ref_source = run["dragon_reference_source"].upper()
|
|
161
|
+
|
|
162
|
+
files = glob.glob(f"{r0_dir}/LST-1.?.Run{run_id:05d}.????.fits.fz")
|
|
163
|
+
subrun_numbers = [int(file[-12:-8]) for file in files]
|
|
164
|
+
|
|
165
|
+
if tool == "lst_dvr" and ref_source not in ["UCTS", "TIB"]:
|
|
166
|
+
input_files = r0_dir.glob(f"LST-1.?.Run{run_id:05d}.????.fits.fz")
|
|
167
|
+
|
|
168
|
+
if is_run_already_copied(date, run_id):
|
|
169
|
+
log.info(f"The R0 files corresponding to run {run_id} have already been copied to the R0G directory.")
|
|
170
|
+
else:
|
|
171
|
+
if not simulate:
|
|
172
|
+
for file in input_files:
|
|
173
|
+
log.debug(
|
|
174
|
+
f"Run {run_id} does not have UCTS or TIB info, so gain selection cannot"
|
|
175
|
+
f"be applied. Copying directly the R0 files to {output_dir}."
|
|
176
|
+
)
|
|
177
|
+
sp.run(["cp", file, output_dir])
|
|
178
|
+
|
|
179
|
+
else:
|
|
180
|
+
log.info(
|
|
181
|
+
f"Run {run_id} does not have UCTS or TIB info, so gain selection cannot"
|
|
182
|
+
f"be applied. Simulate copy of the R0 files directly to {output_dir}."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
else:
|
|
186
|
+
n_subruns = max(subrun_numbers)
|
|
187
|
+
|
|
188
|
+
for subrun in range(n_subruns + 1):
|
|
189
|
+
|
|
190
|
+
r0_files = glob.glob(f"{r0_dir}/LST-1.?.Run{run_id:05d}.{subrun:04d}.fits.fz")
|
|
191
|
+
|
|
192
|
+
if len(r0_files) != 4:
|
|
193
|
+
if not simulate and not is_run_already_copied(date, run_id):
|
|
194
|
+
log.debug(f"Run {run_id:05d}.{subrun:04d} does not have 4 streams of R0 files, so gain"
|
|
195
|
+
f"selection cannot be applied. Copying directly the R0 files to {output_dir}.")
|
|
196
|
+
for file in r0_files:
|
|
197
|
+
sp.run(["cp", file, output_dir])
|
|
198
|
+
elif is_run_already_copied(date, run_id):
|
|
199
|
+
log.debug(f"Run {run_id:05d}.{subrun:04d} does not have 4 streams of R0 files. The R0 files"
|
|
200
|
+
f"have already been copied to {output_dir}.")
|
|
201
|
+
elif simulate:
|
|
202
|
+
log.debug(f"Run {run_id:05d}.{subrun:04d} does not have 4 streams of R0 files, so gain"
|
|
203
|
+
f"selection cannot be applied. Simulate copy of the R0 files directly to {output_dir}.")
|
|
204
|
+
|
|
205
|
+
else:
|
|
206
|
+
history_file = log_dir / f"gain_selection_{run_id:05d}.{subrun:04d}.history"
|
|
207
|
+
if history_file.exists():
|
|
208
|
+
if not simulate:
|
|
209
|
+
update_history_file(run_id, subrun, log_dir, history_file)
|
|
210
|
+
|
|
211
|
+
if history_file.read_text() == "": # history_file is empty
|
|
212
|
+
log.debug(f"Gain selection is still running for run {run_id:05d}.{subrun:04d}")
|
|
213
|
+
continue
|
|
214
|
+
else:
|
|
215
|
+
gainsel_rc = history_file.read_text().splitlines()[-1][-1]
|
|
216
|
+
if gainsel_rc == "1":
|
|
217
|
+
job_id = get_last_job_id(run_id, subrun, log_dir)
|
|
218
|
+
if job_finished_in_timeout(job_id) and not simulate:
|
|
219
|
+
# Relaunch the job that finished in TIMEOUT
|
|
220
|
+
job_file = log_dir / f"gain_selection_{run_id:05d}.{subrun:04d}.sh"
|
|
221
|
+
sp.run(["sbatch", job_file], stdout=sp.PIPE, stderr=sp.STDOUT, check=True)
|
|
222
|
+
else:
|
|
223
|
+
log.warning(f"Gain selection failed for run {run_id:05d}.{subrun:04d}")
|
|
224
|
+
elif gainsel_rc == "0":
|
|
225
|
+
log.debug(f"Gain selection finished successfully for run {run_id:05d}.{subrun:04d},"
|
|
226
|
+
"no additional jobs will be submitted for this subrun.")
|
|
227
|
+
else:
|
|
228
|
+
log.debug("Creating and launching the gain selection sbatch script for subrun {run_id:05d}.{subrun:04d}")
|
|
229
|
+
if not simulate:
|
|
230
|
+
log_file = log_dir / f"r0_to_r0g_{run_id:05d}.{subrun:04d}.log"
|
|
231
|
+
job_file = log_dir / f"gain_selection_{run_id:05d}.{subrun:04d}.sh"
|
|
232
|
+
r0_files.sort()
|
|
233
|
+
with open(job_file, "w") as f:
|
|
234
|
+
f.write(
|
|
235
|
+
get_sbatch_script(
|
|
236
|
+
run_id,
|
|
237
|
+
subrun,
|
|
238
|
+
r0_files[0],
|
|
239
|
+
output_dir,
|
|
240
|
+
log_dir,
|
|
241
|
+
log_file,
|
|
242
|
+
ref_time,
|
|
243
|
+
ref_counter,
|
|
244
|
+
module,
|
|
245
|
+
ref_source,
|
|
246
|
+
tool,
|
|
247
|
+
)
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
#submit job
|
|
251
|
+
history_file.touch()
|
|
252
|
+
sp.run(["sbatch", job_file], stdout=sp.PIPE, stderr=sp.STDOUT, check=True)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def apply_gain_selection(date: datetime, start: int, end: int, tool: str = None, no_queue_check: bool = False, simulate: bool = False):
|
|
126
256
|
"""
|
|
127
257
|
Submit the jobs to apply the gain selection to the data for a given date
|
|
128
258
|
on a subrun-by-subrun basis.
|
|
129
259
|
"""
|
|
130
260
|
|
|
131
261
|
if not tool:
|
|
132
|
-
if date < "20231205":
|
|
262
|
+
if date_to_dir(date) < "20231205":
|
|
133
263
|
tool = "lst_dvr"
|
|
134
264
|
else:
|
|
135
265
|
tool = "lstchain_r0_to_r0g"
|
|
136
266
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
summary_table
|
|
267
|
+
summary_table = run_summary_table(date)
|
|
268
|
+
|
|
269
|
+
if len(summary_table) == 0:
|
|
270
|
+
log.warning(f"No runs are found in the run summary of {date_to_iso(date)}. Nothing to do. Exiting.")
|
|
271
|
+
sys.exit(0)
|
|
272
|
+
|
|
140
273
|
# Apply gain selection only to DATA runs
|
|
141
274
|
data_runs = summary_table[summary_table["run_type"] == "DATA"]
|
|
142
275
|
log.info(f"Found {len(data_runs)} DATA runs to which apply the gain selection")
|
|
143
276
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
277
|
+
base_dir = Path(cfg.get("LST1", "BASE"))
|
|
278
|
+
date_str = date_to_dir(date)
|
|
279
|
+
r0_dir = base_dir / "R0" / date_str
|
|
280
|
+
output_dir = base_dir / f"R0G/{date_str}"
|
|
281
|
+
log_dir = base_dir / f"R0G/log/{date_str}"
|
|
282
|
+
if not simulate:
|
|
283
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
284
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
150
285
|
|
|
151
286
|
for run in data_runs:
|
|
152
287
|
if not no_queue_check:
|
|
@@ -156,171 +291,210 @@ def apply_gain_selection(date: str, start: int, end: int, output_basedir: Path =
|
|
|
156
291
|
# Avoid running jobs while it is still night time
|
|
157
292
|
wait_for_daytime(start, end)
|
|
158
293
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
files = glob.glob(f"{r0_dir}/LST-1.?.Run{run_id:05d}.????.fits.fz")
|
|
166
|
-
subrun_numbers = [int(file[-12:-8]) for file in files]
|
|
167
|
-
input_files = []
|
|
168
|
-
|
|
169
|
-
if tool == "lst_dvr" and ref_source not in ["UCTS", "TIB"]:
|
|
170
|
-
input_files = r0_dir.glob(f"LST-1.?.Run{run_id:05d}.????.fits.fz")
|
|
171
|
-
log.info(
|
|
172
|
-
f"Run {run_id} does not have UCTS or TIB info, so gain selection cannot"
|
|
173
|
-
f"be applied. Copying directly the R0 files to {output_dir}."
|
|
174
|
-
)
|
|
175
|
-
for file in input_files:
|
|
176
|
-
sp.run(["cp", file, output_dir])
|
|
294
|
+
if not is_closed(date, run["run_id"]):
|
|
295
|
+
launch_gainsel_for_data_run(date, run, output_dir, r0_dir, log_dir, tool, simulate)
|
|
296
|
+
|
|
297
|
+
calib_runs = summary_table[summary_table["run_type"] != "DATA"]
|
|
298
|
+
log.info(f"Found {len(calib_runs)} NO-DATA runs")
|
|
177
299
|
|
|
300
|
+
for run in calib_runs:
|
|
301
|
+
run_id = run["run_id"]
|
|
302
|
+
|
|
303
|
+
if is_run_already_copied(date, run_id):
|
|
304
|
+
log.info(f"The R0 files corresponding to run {run_id:05d} have already been copied, nothing to do.")
|
|
178
305
|
else:
|
|
179
|
-
|
|
306
|
+
log.info(f"Copying R0 files corresponding to run {run_id} directly to {output_dir}")
|
|
307
|
+
if not simulate:
|
|
308
|
+
# Avoid copying files while it is still night time
|
|
309
|
+
wait_for_daytime(start, end)
|
|
180
310
|
|
|
181
|
-
|
|
182
|
-
new_files = glob.glob(f"{r0_dir}/LST-1.?.Run{run_id:05d}.{subrun:04d}.fits.fz")
|
|
311
|
+
r0_files = r0_dir.glob(f"LST-1.?.Run{run_id:05d}.????.fits.fz")
|
|
183
312
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
f"selection cannot be applied. Copying directly the R0 files to {output_dir}."
|
|
187
|
-
)
|
|
188
|
-
for file in new_files:
|
|
189
|
-
sp.run(["cp", file, output_dir])
|
|
313
|
+
for file in r0_files:
|
|
314
|
+
sp.run(["cp", file, output_dir])
|
|
190
315
|
|
|
191
|
-
else:
|
|
192
|
-
new_files.sort()
|
|
193
|
-
input_files.append(new_files[0])
|
|
194
|
-
|
|
195
|
-
log.info("Creating and launching the sbatch scripts for the rest of the runs to apply gain selection")
|
|
196
|
-
for file in input_files:
|
|
197
|
-
run_info = run_info_from_filename(file)
|
|
198
|
-
job_file = log_dir / f"gain_selection_{run_info.run:05d}.{run_info.subrun:04d}.sh"
|
|
199
|
-
with open(job_file, "w") as f:
|
|
200
|
-
f.write(
|
|
201
|
-
get_sbatch_script(
|
|
202
|
-
run_id,
|
|
203
|
-
run_info.subrun,
|
|
204
|
-
file,
|
|
205
|
-
output_dir,
|
|
206
|
-
log_dir,
|
|
207
|
-
log_file,
|
|
208
|
-
ref_time,
|
|
209
|
-
ref_counter,
|
|
210
|
-
module,
|
|
211
|
-
ref_source,
|
|
212
|
-
tool,
|
|
213
|
-
)
|
|
214
|
-
)
|
|
215
|
-
sp.run(["sbatch", job_file], check=True)
|
|
216
316
|
|
|
217
|
-
|
|
218
|
-
|
|
317
|
+
def get_last_job_id(run_id: str, subrun: str, log_dir: Path) -> int:
|
|
318
|
+
"""Get job id of the last gain selection job that was launched for a given subrun."""
|
|
319
|
+
filenames = glob.glob(f"{log_dir}/gain_selection_{run_id:05d}_{subrun:04d}_*.log")
|
|
320
|
+
if filenames:
|
|
321
|
+
match = re.search(f'gain_selection_{run_id:05d}_{subrun:04d}_(\d+).log', sorted(filenames)[-1])
|
|
322
|
+
job_id = match.group(1)
|
|
323
|
+
return job_id
|
|
219
324
|
|
|
220
|
-
for run in calib_runs:
|
|
221
|
-
run_id = run["run_id"]
|
|
222
|
-
log.info(f"Copying R0 files corresponding to run {run_id} directly to {output_dir}")
|
|
223
|
-
# Avoid copying files while it is still night time
|
|
224
|
-
wait_for_daytime(start, end)
|
|
225
325
|
|
|
226
|
-
|
|
227
|
-
|
|
326
|
+
def update_history_file(run_id: str, subrun: str, log_dir: Path, history_file: Path):
|
|
327
|
+
"""
|
|
328
|
+
Update the gain selection history file with the result
|
|
329
|
+
of the last job launched for a given subrun.
|
|
330
|
+
"""
|
|
331
|
+
job_id = get_last_job_id(run_id, subrun, log_dir)
|
|
332
|
+
if not job_id:
|
|
333
|
+
log.debug(f"Cannot find a job_id for the run {run_id:05d}.{subrun:04d}")
|
|
334
|
+
else:
|
|
335
|
+
job_status = get_sacct_output(run_sacct(job_id=job_id))["State"]
|
|
336
|
+
if job_status.item() in ["RUNNING", "PENDING"]:
|
|
337
|
+
log.info(f"Job {job_id} is still running.")
|
|
338
|
+
return
|
|
339
|
+
|
|
340
|
+
elif job_status.item() == "COMPLETED":
|
|
341
|
+
log.debug(f"Job {job_id} finished successfully, updating history file.")
|
|
342
|
+
string_to_write = (
|
|
343
|
+
f"{run_id:05d}.{subrun:04d} gain_selection 0\n"
|
|
344
|
+
)
|
|
345
|
+
append_to_file(history_file, string_to_write)
|
|
346
|
+
|
|
347
|
+
else:
|
|
348
|
+
log.info(f"Job {job_id} failed, updating history file.")
|
|
349
|
+
string_to_write = (
|
|
350
|
+
f"{run_id:05d}.{subrun:04d} gain_selection 1\n"
|
|
351
|
+
)
|
|
352
|
+
append_to_file(history_file, string_to_write)
|
|
228
353
|
|
|
229
|
-
for file in r0_files:
|
|
230
|
-
sp.run(["cp", file, output_dir])
|
|
231
354
|
|
|
232
|
-
def
|
|
233
|
-
"""
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
355
|
+
def is_run_already_copied(date: datetime, run_id: int) -> bool:
|
|
356
|
+
"""Check if the R0 files of a given run have already been copied to the R0G directory."""
|
|
357
|
+
base_dir = Path(cfg.get("LST1", "BASE"))
|
|
358
|
+
r0_files = glob.glob(f"{base_dir}/R0/{date_to_dir(date)}/LST-1.?.Run{run_id:05d}.????.fits.fz")
|
|
359
|
+
r0g_files = glob.glob(f"{base_dir}/R0G/{date_to_dir(date)}/LST-1.?.Run{run_id:05d}.????.fits.fz")
|
|
360
|
+
return len(r0_files)==len(r0g_files)
|
|
237
361
|
|
|
238
|
-
sacct_cmd = [
|
|
239
|
-
"sacct",
|
|
240
|
-
"-n",
|
|
241
|
-
"--parsable2",
|
|
242
|
-
"--delimiter=,",
|
|
243
|
-
"--units=G",
|
|
244
|
-
"-o",
|
|
245
|
-
",".join(FORMAT_SLURM),
|
|
246
|
-
"-j",
|
|
247
|
-
job,
|
|
248
|
-
]
|
|
249
362
|
|
|
250
|
-
|
|
363
|
+
def is_closed(date: datetime, run_id: str) -> bool:
|
|
364
|
+
"""Check if run is already closed."""
|
|
365
|
+
base_dir = Path(cfg.get("LST1", "BASE"))
|
|
366
|
+
log_dir = base_dir / f"R0G/log/{date_to_dir(date)}"
|
|
367
|
+
closed_run_file = log_dir / f"gain_selection_{run_id:05d}.closed"
|
|
368
|
+
return closed_run_file.exists()
|
|
251
369
|
|
|
252
370
|
|
|
253
|
-
def GainSel_flag_file(date:
|
|
371
|
+
def GainSel_flag_file(date: datetime) -> Path:
|
|
372
|
+
"""Return the path to the file indicating the completion of the gain selection stage."""
|
|
254
373
|
filename = cfg.get("LSTOSA", "gain_selection_check")
|
|
255
374
|
GainSel_dir = Path(cfg.get("LST1", "GAIN_SELECTION_FLAG_DIR"))
|
|
256
|
-
flagfile = GainSel_dir / date / filename
|
|
375
|
+
flagfile = GainSel_dir / date_to_dir(date) / filename
|
|
257
376
|
return flagfile.resolve()
|
|
258
377
|
|
|
259
378
|
|
|
260
|
-
def GainSel_finished(date:
|
|
379
|
+
def GainSel_finished(date: datetime) -> bool:
|
|
261
380
|
"""Check if gain selection finished successfully."""
|
|
262
381
|
flagfile = GainSel_flag_file(date)
|
|
263
382
|
return flagfile.exists()
|
|
383
|
+
|
|
264
384
|
|
|
265
|
-
|
|
266
|
-
def check_failed_jobs(date: str, output_basedir: Path = None):
|
|
385
|
+
def check_gainsel_jobs_runwise(date: datetime, run_id: int) -> bool:
|
|
267
386
|
"""Search for failed jobs in the log directory."""
|
|
268
|
-
|
|
269
|
-
log_dir =
|
|
270
|
-
|
|
271
|
-
|
|
387
|
+
base_dir = Path(cfg.get("LST1", "BASE"))
|
|
388
|
+
log_dir = base_dir / f"R0G/log/{date_to_dir(date)}"
|
|
389
|
+
history_files = list(log_dir.glob(f"gain_selection_{run_id:05d}.????.history"))
|
|
390
|
+
summary_table = run_summary_table(date)
|
|
391
|
+
n_subruns = summary_table[summary_table["run_id"] == run_id]["n_subruns"]
|
|
392
|
+
|
|
393
|
+
if len(history_files) != n_subruns:
|
|
394
|
+
log.debug(f"All history files of run {run_id} were not created yet")
|
|
395
|
+
return False
|
|
396
|
+
|
|
397
|
+
failed_subruns = []
|
|
398
|
+
log.info(f"Checking all history files of run {run_id}")
|
|
399
|
+
|
|
400
|
+
for file in history_files:
|
|
401
|
+
match = re.search(f"gain_selection_{run_id:05d}.(\d+).history", str(file))
|
|
402
|
+
subrun = match.group(1)
|
|
403
|
+
if file.read_text() != "":
|
|
404
|
+
gainsel_rc = file.read_text().splitlines()[-1][-1]
|
|
405
|
+
|
|
406
|
+
if gainsel_rc == "1":
|
|
407
|
+
log.warning(f"Gain selection failed for run {run_id}.{subrun}")
|
|
408
|
+
failed_subruns.append(file)
|
|
409
|
+
|
|
410
|
+
elif gainsel_rc == "0":
|
|
411
|
+
log.debug(f"Gain selection finished successfully for run {run_id}.{subrun}")
|
|
412
|
+
else:
|
|
413
|
+
log.info(f"Gain selection is still running for run {run_id}.{subrun}")
|
|
414
|
+
return False
|
|
415
|
+
|
|
416
|
+
if failed_subruns:
|
|
417
|
+
log.warning(f"{date_to_iso(date)}: Some gain selection jobs did not finish successfully for run {run_id}")
|
|
418
|
+
return False
|
|
419
|
+
else:
|
|
420
|
+
log.info(f"{date_to_iso(date)}: All jobs finished successfully for run {run_id}, creating the corresponding .closed file")
|
|
421
|
+
closed_run_file = log_dir / f"gain_selection_{run_id:05d}.closed"
|
|
422
|
+
closed_run_file.touch()
|
|
423
|
+
return True
|
|
272
424
|
|
|
273
|
-
for job in jobs:
|
|
274
|
-
output = run_sacct_j(job)
|
|
275
|
-
df = get_sacct_output(output)
|
|
276
425
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
426
|
+
def check_warnings_in_logs(date: datetime, run_id: int):
|
|
427
|
+
"""Look for warnings in the log files created by lstchain_r0_to_r0g."""
|
|
428
|
+
base_dir = Path(cfg.get("LST1", "BASE"))
|
|
429
|
+
log_dir = base_dir / f"R0G/log/{date_to_dir(date)}"
|
|
430
|
+
log_files = log_dir.glob(f"r0_to_r0g_{run_id:05d}.*.log")
|
|
431
|
+
for file in log_files:
|
|
432
|
+
content = file.read_text().splitlines()
|
|
433
|
+
for line in content:
|
|
434
|
+
if "FlatField(FF)-like events are not tagged as FF" in line:
|
|
435
|
+
log.warning(f"Warning for run {run_id}: {line}")
|
|
280
436
|
|
|
281
|
-
if failed_jobs:
|
|
282
|
-
log.warning(f"{date}: some jobs did not finish successfully")
|
|
283
437
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
run_summary_dir = Path("/fefs/aswg/data/real/monitoring/RunSummary")
|
|
289
|
-
run_summary_file = run_summary_dir / f"RunSummary_{date}.ecsv"
|
|
290
|
-
summary_table = Table.read(run_summary_file)
|
|
291
|
-
runs = summary_table["run_id"]
|
|
292
|
-
missing_runs = []
|
|
293
|
-
|
|
294
|
-
r0_files = glob.glob(f"/fefs/aswg/data/real/R0/{date}/LST-1.?.Run?????.????.fits.fz")
|
|
295
|
-
r0g_files = glob.glob(f"/fefs/aswg/data/real/R0G/{date}/LST-1.?.Run?????.????.fits.fz")
|
|
296
|
-
all_r0_runs = [parse_r0_filename(i).run for i in r0_files]
|
|
297
|
-
all_r0g_runs = [parse_r0_filename(i).run for i in r0g_files]
|
|
298
|
-
|
|
299
|
-
for run in all_r0_runs:
|
|
300
|
-
if run not in runs:
|
|
301
|
-
if run not in all_r0g_runs:
|
|
302
|
-
missing_runs.append(run)
|
|
303
|
-
|
|
304
|
-
missing_runs.sort()
|
|
305
|
-
if missing_runs:
|
|
306
|
-
log.info(
|
|
307
|
-
f"Some runs are missing. Copying R0 files of runs {pd.Series(missing_runs).unique()} "
|
|
308
|
-
f"directly to /fefs/aswg/data/real/R0G/{date}"
|
|
309
|
-
)
|
|
438
|
+
def check_failed_jobs(date: datetime):
|
|
439
|
+
"""Search for failed jobs in the log directory."""
|
|
310
440
|
|
|
311
|
-
|
|
312
|
-
output_dir = Path(f"/fefs/aswg/data/real/R0G/{date}/")
|
|
313
|
-
files = glob.glob(f"/fefs/aswg/data/real/R0/{date}/LST-1.?.Run{run:05d}.????.fits.fz")
|
|
314
|
-
for file in files:
|
|
315
|
-
sp.run(["cp", file, output_dir])
|
|
441
|
+
summary_table = run_summary_table(date)
|
|
316
442
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
443
|
+
if len(summary_table) == 0:
|
|
444
|
+
log.warning(f"No runs are found in the run summary of {date_to_iso(date)}. Nothing to do. Exiting.")
|
|
445
|
+
sys.exit(0)
|
|
320
446
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
447
|
+
data_runs = summary_table[summary_table["run_type"] == "DATA"]
|
|
448
|
+
failed_runs = []
|
|
449
|
+
|
|
450
|
+
for run in data_runs:
|
|
451
|
+
run_id = run["run_id"]
|
|
452
|
+
check_warnings_in_logs(date, run_id)
|
|
453
|
+
if not is_closed(date, run_id):
|
|
454
|
+
if not check_gainsel_jobs_runwise(date, run_id):
|
|
455
|
+
log.warning(f"Gain selection did not finish successfully for run {run_id}.")
|
|
456
|
+
failed_runs.append(run)
|
|
457
|
+
|
|
458
|
+
if failed_runs:
|
|
459
|
+
log.warning(f"Gain selection did not finish successfully for {date_to_iso(date)}, cannot create the flag file.")
|
|
460
|
+
return
|
|
461
|
+
|
|
462
|
+
runs = summary_table["run_id"]
|
|
463
|
+
missing_runs = []
|
|
464
|
+
|
|
465
|
+
date_str = date_to_dir(date)
|
|
466
|
+
base_dir = Path(cfg.get("LST1", "BASE"))
|
|
467
|
+
r0_files = glob.glob(f"{base_dir}/R0/{date_str}/LST-1.?.Run?????.????.fits.fz")
|
|
468
|
+
r0g_files = glob.glob(f"{base_dir}/R0G/{date_str}/LST-1.?.Run?????.????.fits.fz")
|
|
469
|
+
all_r0_runs = [parse_r0_filename(i).run for i in r0_files]
|
|
470
|
+
all_r0g_runs = [parse_r0_filename(i).run for i in r0g_files]
|
|
471
|
+
|
|
472
|
+
for run in all_r0_runs:
|
|
473
|
+
if run not in runs:
|
|
474
|
+
if run not in all_r0g_runs:
|
|
475
|
+
missing_runs.append(run)
|
|
476
|
+
|
|
477
|
+
missing_runs.sort()
|
|
478
|
+
if missing_runs:
|
|
479
|
+
output_dir = base_dir / f"R0G/{date_str}/"
|
|
480
|
+
log.info(
|
|
481
|
+
f"Some runs are missing. Copying R0 files of runs {pd.Series(missing_runs).unique()} "
|
|
482
|
+
f"directly to {output_dir}"
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
for run in missing_runs:
|
|
486
|
+
|
|
487
|
+
files = base_dir.glob(f"R0/{date_str}/LST-1.?.Run{run:05d}.????.fits.fz")
|
|
488
|
+
for file in files:
|
|
489
|
+
sp.run(["cp", file, output_dir])
|
|
490
|
+
|
|
491
|
+
GainSel_dir = Path(cfg.get("LST1", "GAIN_SELECTION_FLAG_DIR"))
|
|
492
|
+
flagfile_dir = GainSel_dir / date_str
|
|
493
|
+
flagfile_dir.mkdir(parents=True, exist_ok=True)
|
|
494
|
+
|
|
495
|
+
flagfile = GainSel_flag_file(date)
|
|
496
|
+
log.info(f"Gain selection finished successfully, creating flag file for date {date_to_iso(date)} ({flagfile})")
|
|
497
|
+
flagfile.touch()
|
|
324
498
|
|
|
325
499
|
|
|
326
500
|
def main():
|
|
@@ -329,22 +503,29 @@ def main():
|
|
|
329
503
|
script for each of them. The input file should list the dates in the format
|
|
330
504
|
YYYYMMDD one date per line.
|
|
331
505
|
"""
|
|
332
|
-
log.setLevel(logging.INFO)
|
|
333
506
|
args = parser.parse_args()
|
|
507
|
+
|
|
508
|
+
if args.verbose:
|
|
509
|
+
log.setLevel(logging.DEBUG)
|
|
510
|
+
else:
|
|
511
|
+
log.setLevel(logging.INFO)
|
|
334
512
|
|
|
335
513
|
if args.date:
|
|
336
|
-
if args.
|
|
337
|
-
log.
|
|
338
|
-
|
|
514
|
+
if GainSel_finished(args.date):
|
|
515
|
+
log.warning(f"Gain selection already done for date {date_to_iso(args.date)}. Exiting.")
|
|
516
|
+
sys.exit(0)
|
|
517
|
+
elif args.check:
|
|
518
|
+
log.info(f"Checking gain selection status for date {date_to_iso(args.date)}")
|
|
519
|
+
check_failed_jobs(args.date)
|
|
339
520
|
else:
|
|
340
|
-
log.info(f"
|
|
521
|
+
log.info(f"\nApplying gain selection to date {date_to_iso(args.date)}")
|
|
341
522
|
apply_gain_selection(
|
|
342
523
|
args.date,
|
|
343
524
|
args.start_time,
|
|
344
|
-
args.end_time,
|
|
345
|
-
args.output_basedir,
|
|
525
|
+
args.end_time,
|
|
346
526
|
args.tool,
|
|
347
527
|
no_queue_check=args.no_queue_check,
|
|
528
|
+
simulate=args.simulate,
|
|
348
529
|
)
|
|
349
530
|
|
|
350
531
|
|
|
@@ -355,7 +536,7 @@ def main():
|
|
|
355
536
|
if args.check:
|
|
356
537
|
for date in list_of_dates:
|
|
357
538
|
log.info(f"Checking gain selection status for date {date}")
|
|
358
|
-
check_failed_jobs(date
|
|
539
|
+
check_failed_jobs(date)
|
|
359
540
|
else:
|
|
360
541
|
for date in list_of_dates:
|
|
361
542
|
log.info(f"Applying gain selection to date {date}")
|
|
@@ -363,9 +544,9 @@ def main():
|
|
|
363
544
|
date,
|
|
364
545
|
args.start_time,
|
|
365
546
|
args.end_time,
|
|
366
|
-
args.output_basedir,
|
|
367
547
|
args.tool,
|
|
368
548
|
no_queue_check=args.no_queue_check,
|
|
549
|
+
simulate=args.simulate,
|
|
369
550
|
)
|
|
370
551
|
log.info("Done! No more dates to process.")
|
|
371
552
|
|