km3smk 0.2.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- km3smk/__init__.py +13 -0
- km3smk/bookkeeping.py +49 -0
- km3smk/onerror.py +253 -0
- km3smk/onstart.py +134 -0
- km3smk/tools.py +76 -0
- km3smk/version.py +34 -0
- km3smk/workflow_settings.py +60 -0
- km3smk-0.2.0.dist-info/LICENSE +29 -0
- km3smk-0.2.0.dist-info/METADATA +108 -0
- km3smk-0.2.0.dist-info/RECORD +12 -0
- km3smk-0.2.0.dist-info/WHEEL +6 -0
- km3smk-0.2.0.dist-info/top_level.txt +1 -0
km3smk/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from importlib.metadata import version as get_version
|
|
3
|
+
|
|
4
|
+
version = get_version(__name__)
|
|
5
|
+
except ImportError:
|
|
6
|
+
from pkg_resources import get_distribution
|
|
7
|
+
|
|
8
|
+
version = get_distribution(__name__).version
|
|
9
|
+
|
|
10
|
+
from . import workflow_settings
|
|
11
|
+
from . import onstart
|
|
12
|
+
from . import onerror
|
|
13
|
+
from .tools import *
|
km3smk/bookkeeping.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import yaml
|
|
3
|
+
import shutil
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from snakemake.logging import logger
|
|
7
|
+
from snakemake.shell import shell
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def bookkeep_inputs(workflow, config):
|
|
11
|
+
"""
|
|
12
|
+
Bookkeeping function to for the inputs. Stores:
|
|
13
|
+
- all input configfiles in a configfiles directory
|
|
14
|
+
- a config.yaml file that contains the dump of the config dictionnary
|
|
15
|
+
- input_runs_list which contains the runs list used if any provided
|
|
16
|
+
- git_commit which contains a trace of the currently use commit
|
|
17
|
+
- a symlink to the snakemake log file
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
log_file = Path(workflow.logger_manager.get_logfile()[0])
|
|
21
|
+
output_path = Path(f"bookkeeping/inputs/{log_file.name.split('.')[0]}/")
|
|
22
|
+
|
|
23
|
+
if output_path.exists():
|
|
24
|
+
raise Exception("{output_path} already exists!!")
|
|
25
|
+
|
|
26
|
+
output_path.mkdir(parents=True)
|
|
27
|
+
|
|
28
|
+
# Create a symlink to log file
|
|
29
|
+
(output_path / "log_file").symlink_to(log_file)
|
|
30
|
+
|
|
31
|
+
# Export configuration files
|
|
32
|
+
output_path_configfiles = output_path / "configfiles"
|
|
33
|
+
output_path_configfiles.mkdir()
|
|
34
|
+
for configfile in workflow.configfiles:
|
|
35
|
+
shutil.copy(configfile, output_path_configfiles)
|
|
36
|
+
|
|
37
|
+
# Export effective configuration
|
|
38
|
+
with open(output_path / "config.yaml", "w") as yaml_file:
|
|
39
|
+
yaml.dump(config, yaml_file)
|
|
40
|
+
|
|
41
|
+
# Export runs_list
|
|
42
|
+
if "runs_list" in config:
|
|
43
|
+
shutil.copy(config["runs_list"], output_path / "input_runs_list")
|
|
44
|
+
|
|
45
|
+
# Export the git commit from rbdp repo
|
|
46
|
+
git_trace = output_path / "git_commit"
|
|
47
|
+
shell(f"git -C {workflow.basedir} describe --dirty --always --long > {git_trace}")
|
|
48
|
+
|
|
49
|
+
logger.info(f"Inputs bookkeeping done in {output_path}")
|
km3smk/onerror.py
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
import shutil
|
|
5
|
+
import tarfile
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from snakemake.logging import logger
|
|
8
|
+
from snakemake.jobs import Job, GroupJob
|
|
9
|
+
import subprocess
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def default(workflow, config):
|
|
13
|
+
logger.info("-" * 20 + " On error actions " + "-" * 20)
|
|
14
|
+
error_path = Path("./error_reports/")
|
|
15
|
+
logger.info(f"Creating reproductible trace in {error_path}")
|
|
16
|
+
|
|
17
|
+
if not error_path.exists():
|
|
18
|
+
error_path.mkdir(parents=True)
|
|
19
|
+
|
|
20
|
+
for job in workflow.scheduler.failed:
|
|
21
|
+
pack_errors(job, error_path)
|
|
22
|
+
|
|
23
|
+
# logger.error("Execution ended with error, printing some custom details ...")
|
|
24
|
+
onerror_report(workflow, max_printed_details=0)
|
|
25
|
+
logger.info("-" * 20 + " Completed " + "-" * 20)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def jobs2df(jobs, tag="#", return_tag_only=False):
|
|
29
|
+
"""Convert jobs iterable in a pandas dataframe"""
|
|
30
|
+
|
|
31
|
+
df = []
|
|
32
|
+
for j in jobs:
|
|
33
|
+
if isinstance(j, Job):
|
|
34
|
+
df.append(
|
|
35
|
+
{
|
|
36
|
+
**{
|
|
37
|
+
"rule": j.name,
|
|
38
|
+
"logs": j.log,
|
|
39
|
+
"jobobj": j,
|
|
40
|
+
"job id": j.jobid,
|
|
41
|
+
"inputs": j.input,
|
|
42
|
+
"outputs": j.output,
|
|
43
|
+
},
|
|
44
|
+
**j.wildcards,
|
|
45
|
+
}
|
|
46
|
+
)
|
|
47
|
+
elif isinstance(j, GroupJob):
|
|
48
|
+
df.append(
|
|
49
|
+
{
|
|
50
|
+
**{
|
|
51
|
+
"rule": j.name,
|
|
52
|
+
"logs": j.log,
|
|
53
|
+
"jobobj": j,
|
|
54
|
+
# "job id":j.jobid,
|
|
55
|
+
"group id": j.jobid,
|
|
56
|
+
"inputs": j.input,
|
|
57
|
+
"outputs": j.output,
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
raise Exception("Unknown job format: {type(j)}")
|
|
63
|
+
|
|
64
|
+
if len(df) == 0:
|
|
65
|
+
return None
|
|
66
|
+
df = pd.DataFrame(df).set_index("jobobj")
|
|
67
|
+
df[tag] = 1
|
|
68
|
+
if return_tag_only:
|
|
69
|
+
df = df[[tag]]
|
|
70
|
+
return df
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def update_flag_failed_group_jobs(jobs, df, tag="Failed group"):
|
|
74
|
+
"""Update failed tag of rules if part of a failed group job"""
|
|
75
|
+
for i, gj in enumerate(jobs):
|
|
76
|
+
if isinstance(gj, GroupJob):
|
|
77
|
+
for j in gj.jobs:
|
|
78
|
+
df.at[j, tag] = True
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def sort_subjobs_outputs(groupjob):
|
|
82
|
+
"""Sort information about subjobs for grouped jobs"""
|
|
83
|
+
df = jobs2df(groupjob.jobs)
|
|
84
|
+
full_products = []
|
|
85
|
+
last_time_updated = []
|
|
86
|
+
wildcards = []
|
|
87
|
+
for ind, row in df.iterrows():
|
|
88
|
+
wildcards += list(ind.wildcards.keys())
|
|
89
|
+
latest = [os.path.getmtime(f) for f in ind.products() if os.path.exists(f)]
|
|
90
|
+
if len(latest) < len(ind.products()):
|
|
91
|
+
full_products.append(False)
|
|
92
|
+
else:
|
|
93
|
+
full_products.append(True)
|
|
94
|
+
if len(latest) == 0:
|
|
95
|
+
last_time_updated.append(np.nan)
|
|
96
|
+
else:
|
|
97
|
+
last_time_updated.append(np.max(latest))
|
|
98
|
+
df["Products available"] = full_products
|
|
99
|
+
df["Latest output"] = pd.to_datetime(last_time_updated, unit="s")
|
|
100
|
+
wildcards = set(wildcards)
|
|
101
|
+
wildcards.remove("detid")
|
|
102
|
+
wildcards.remove("run")
|
|
103
|
+
df = df.sort_values(["detid", "run", "rule", "Latest output"])
|
|
104
|
+
df = df.set_index(["detid", "run", "rule"])
|
|
105
|
+
return df
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def onerror_report(workflow, max_printed_details=10):
|
|
109
|
+
"""
|
|
110
|
+
Print a report about failure.
|
|
111
|
+
"""
|
|
112
|
+
tags = ["Job in DAG", "Planned", "Waiting", "Failed", "Failed group"]
|
|
113
|
+
|
|
114
|
+
# Collect information about jobs in DAG, torun, failed etc ...
|
|
115
|
+
df = pd.concat(
|
|
116
|
+
(
|
|
117
|
+
jobs2df(workflow.persistence.dag.jobs, "Job in DAG"),
|
|
118
|
+
jobs2df(
|
|
119
|
+
workflow.persistence.dag.needrun_jobs(exclude_finished=False),
|
|
120
|
+
"Planned",
|
|
121
|
+
True,
|
|
122
|
+
),
|
|
123
|
+
jobs2df(workflow.persistence.dag.needrun_jobs(), "Waiting", True),
|
|
124
|
+
jobs2df(workflow.scheduler.failed, "Failed", True),
|
|
125
|
+
),
|
|
126
|
+
axis=1,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Check which rule was part of a failing group
|
|
130
|
+
update_flag_failed_group_jobs(workflow.scheduler.failed, df)
|
|
131
|
+
|
|
132
|
+
# Somehow group job are missing in needrun_jobs
|
|
133
|
+
if "group id" in df.columns:
|
|
134
|
+
group_jobs = df.dropna(subset=["group id"]).index
|
|
135
|
+
df.loc[group_jobs, "Planned"] = 1
|
|
136
|
+
|
|
137
|
+
# Make a boolean matrix for the tags
|
|
138
|
+
for tag in tags:
|
|
139
|
+
if tag not in df.columns:
|
|
140
|
+
df[tag] = False
|
|
141
|
+
else:
|
|
142
|
+
df[tag] = df[tag].replace(np.nan, 0).astype(bool)
|
|
143
|
+
|
|
144
|
+
df["Waiting"] = (
|
|
145
|
+
df["Waiting"].values & (df["Failed"] == False) & (df["Failed group"] == False)
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Create rule-wise and run-wise summary table
|
|
149
|
+
df_rules = df.groupby("rule").sum(numeric_only=True)[tags]
|
|
150
|
+
df_rules["Failure [%]"] = (
|
|
151
|
+
100.0 * df_rules["Failed"] / (df_rules["Planned"] - df_rules["Waiting"])
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
df_run = df.groupby(["detid", "run"]).sum(numeric_only=True)[tags]
|
|
155
|
+
|
|
156
|
+
# Print details about the failing rules
|
|
157
|
+
n_failed_rules = df["Failed"].sum()
|
|
158
|
+
if max_printed_details < 0:
|
|
159
|
+
max_printed_details = n_failed_rules
|
|
160
|
+
if n_failed_rules > max_printed_details and n_failed_rules > 0:
|
|
161
|
+
n_failed_rules = max_printed_details
|
|
162
|
+
|
|
163
|
+
logger.info(f"Print short details for {n_failed_rules} failed rules ...")
|
|
164
|
+
list_print = ["logs", "inputs", "outputs"]
|
|
165
|
+
for i in range(n_failed_rules):
|
|
166
|
+
row = df[df["Failed"]].iloc[i].drop(index=tags)
|
|
167
|
+
job = row.name
|
|
168
|
+
logger.info("\n" * 2 + "-" * 10 + f" Error in {row.name} " + "-" * 10)
|
|
169
|
+
logger.info(row.dropna().drop(index=list_print).to_string())
|
|
170
|
+
# Handle grouped jobs details
|
|
171
|
+
if isinstance(job, GroupJob):
|
|
172
|
+
df_group = sort_subjobs_outputs(job)
|
|
173
|
+
with pd.option_context("display.max_rows", None):
|
|
174
|
+
logger.info(df_group.drop(columns=["inputs", "outputs", "#"]))
|
|
175
|
+
|
|
176
|
+
# Handle single task details
|
|
177
|
+
else:
|
|
178
|
+
for l in list_print:
|
|
179
|
+
if len(l) > 0:
|
|
180
|
+
logger.info(f"{l} :")
|
|
181
|
+
for el in row[l]:
|
|
182
|
+
logger.info(f"\t - {el}")
|
|
183
|
+
|
|
184
|
+
# Print a generic summary about failure rate etc ...
|
|
185
|
+
logger.info("\n" + "-" * 30)
|
|
186
|
+
logger.info("Rules-wise summary")
|
|
187
|
+
logger.info(str(df_rules.replace(np.nan, "-")))
|
|
188
|
+
logger.info("\n" + "-" * 30)
|
|
189
|
+
logger.info("Run-wise failure summary")
|
|
190
|
+
logger.info(str(df_run[df_run["Failed"].astype(bool)]))
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def pack_errors(job, basedir, make_archive=False):
|
|
194
|
+
"""
|
|
195
|
+
Create a directory containing the instruction and inputs for reproducibility
|
|
196
|
+
|
|
197
|
+
:parameters: - ``job`` is a Job object from Snakemake
|
|
198
|
+
- ``basedir`` is the path in which the workdir will be created
|
|
199
|
+
- ``make_archive`` will also archive the directory
|
|
200
|
+
"""
|
|
201
|
+
dir_name = "_".join(v for v in job.wildcards)
|
|
202
|
+
output_dir = basedir / job.name / dir_name
|
|
203
|
+
|
|
204
|
+
if output_dir.exists():
|
|
205
|
+
shutil.rmtree(output_dir)
|
|
206
|
+
|
|
207
|
+
output_dir.mkdir(parents=True)
|
|
208
|
+
|
|
209
|
+
shell_path = output_dir / "shell.sh"
|
|
210
|
+
with open(shell_path, "w") as fout:
|
|
211
|
+
fout.write("#!/usr/bin/env bash\n")
|
|
212
|
+
if job.shellcmd:
|
|
213
|
+
fout.write(job.shellcmd)
|
|
214
|
+
|
|
215
|
+
shell_path.chmod(int("0o100775", 8))
|
|
216
|
+
|
|
217
|
+
input_files = []
|
|
218
|
+
for input_file in job.input:
|
|
219
|
+
p = output_dir / input_file
|
|
220
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
221
|
+
p.symlink_to(Path(input_file).absolute())
|
|
222
|
+
input_files.append(p)
|
|
223
|
+
|
|
224
|
+
log_files = []
|
|
225
|
+
for log_file in job.log:
|
|
226
|
+
p = output_dir / log_file
|
|
227
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
228
|
+
p.symlink_to(Path(log_file).absolute())
|
|
229
|
+
log_files.append(p)
|
|
230
|
+
|
|
231
|
+
archive_script_path = output_dir / "archive.sh"
|
|
232
|
+
archive_path = output_dir / f"{job.name}.{dir_name}.tar.gz"
|
|
233
|
+
|
|
234
|
+
with open(archive_script_path, "w") as fout:
|
|
235
|
+
fout.write("#!/usr/bin/env bash\n")
|
|
236
|
+
fout.write(
|
|
237
|
+
"export FILES_LIST='{}'\n".format(
|
|
238
|
+
" ".join([str(p.relative_to(output_dir)) for p in input_files])
|
|
239
|
+
)
|
|
240
|
+
)
|
|
241
|
+
fout.write(
|
|
242
|
+
"export LOGS_LIST='{}'\n".format(
|
|
243
|
+
" ".join([str(p.relative_to(output_dir)) for p in log_files])
|
|
244
|
+
)
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
fout.write(
|
|
248
|
+
f"tar --dereference -cvzf {archive_path.name} shell.sh $FILES_LIST $LOGS_LIST\n"
|
|
249
|
+
)
|
|
250
|
+
archive_script_path.chmod(int("0o100775", 8))
|
|
251
|
+
|
|
252
|
+
if make_archive:
|
|
253
|
+
subprocess.run(f"./{archive_script_path.name}", cwd=archive_script_path.parent)
|
km3smk/onstart.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import km3db
|
|
3
|
+
import json
|
|
4
|
+
import subprocess
|
|
5
|
+
import shutil
|
|
6
|
+
|
|
7
|
+
from snakemake.logging import logger
|
|
8
|
+
from snakemake.shell import shell
|
|
9
|
+
|
|
10
|
+
from .bookkeeping import bookkeep_inputs
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def generate_iRods_profile(
|
|
14
|
+
filename, host="ccirods.in2p3.fr", port=5530, zone_name="in2p3"
|
|
15
|
+
):
|
|
16
|
+
"""Generate json configuration file for iRods"""
|
|
17
|
+
config = {
|
|
18
|
+
"irods_home": "/in2p3/km3net",
|
|
19
|
+
"irods_host": host,
|
|
20
|
+
"irods_port": port,
|
|
21
|
+
"irods_zone_name": zone_name,
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
json_object = json.dumps(config, indent=4)
|
|
25
|
+
|
|
26
|
+
logger.info("irods json configuration generated:")
|
|
27
|
+
logger.info(json_object)
|
|
28
|
+
|
|
29
|
+
with open(filename, "w") as f:
|
|
30
|
+
f.write(json_object)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def is_resource_needed(workflow, resource):
|
|
34
|
+
"""Check if the given resources appears in a job that will be run"""
|
|
35
|
+
jobs = [
|
|
36
|
+
jobs.name
|
|
37
|
+
for jobs in workflow.persistence.dag.needrun_jobs()
|
|
38
|
+
if resource in jobs.resources.keys()
|
|
39
|
+
]
|
|
40
|
+
logger.debug(f"Jobs requiring {resource}: {jobs}")
|
|
41
|
+
return len(jobs) > 0
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def check_ressources_availability(workflow, config):
|
|
45
|
+
"""Check KM3NeT ressources access from the current session"""
|
|
46
|
+
km3net_db_need = is_resource_needed(workflow, "km3net_db_socket")
|
|
47
|
+
km3net_gitlab_need = is_resource_needed(workflow, "km3net_gitlab_socket")
|
|
48
|
+
irods_need = is_resource_needed(workflow, "irods_socket")
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
# DB access
|
|
52
|
+
if km3net_db_need:
|
|
53
|
+
logger.info("Check DB access and cookie validity ...")
|
|
54
|
+
sds = km3db.tools.StreamDS()
|
|
55
|
+
if "DBCOOKIE" not in os.environ:
|
|
56
|
+
# DBCOOKIE is needed to provide loggin information within JPP container
|
|
57
|
+
# to get the default PMT parameters from the DB
|
|
58
|
+
logger.info("Exporting cookie to environement variable DBCOOKIE ...")
|
|
59
|
+
os.environ["DBCOOKIE"] = km3db.DBManager().session_cookie
|
|
60
|
+
shutil.copy2(km3db.core.COOKIE_FILENAME, "./")
|
|
61
|
+
else:
|
|
62
|
+
logger.info(
|
|
63
|
+
"No jobs requiring DB socket in the DAG, skip the cookie check."
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
if irods_need:
|
|
67
|
+
# iRods credentials
|
|
68
|
+
logger.info("Check iRods credentialds ...")
|
|
69
|
+
|
|
70
|
+
# Check if a container is provided
|
|
71
|
+
container = None
|
|
72
|
+
if "irods" in config:
|
|
73
|
+
if "container" in config["irods"]:
|
|
74
|
+
container = config["irods"]["container"]
|
|
75
|
+
|
|
76
|
+
if os.path.exists(".irods/.irodsA") == False:
|
|
77
|
+
os.makedirs(".irods", exist_ok=True)
|
|
78
|
+
logger.info("Create iRods configuration files ...")
|
|
79
|
+
generate_iRods_profile(".irods/irods_environment.json")
|
|
80
|
+
# Call iinit from the container to create the cookie
|
|
81
|
+
try:
|
|
82
|
+
shell(
|
|
83
|
+
"""
|
|
84
|
+
cd $HOME
|
|
85
|
+
HOME="./"
|
|
86
|
+
iinit
|
|
87
|
+
""",
|
|
88
|
+
container_img=container,
|
|
89
|
+
apptainer_args=workflow.deployment_settings.apptainer_args,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
except subprocess.CalledProcessError as E:
|
|
93
|
+
logger.error(
|
|
94
|
+
"Error when trying call iinit. Maybe a wrong iRods password ?"
|
|
95
|
+
)
|
|
96
|
+
os.remove(".irods/.irodsA")
|
|
97
|
+
raise E
|
|
98
|
+
else:
|
|
99
|
+
logger.info(
|
|
100
|
+
"No jobs requiring iRods in the DAG, skip the credential check."
|
|
101
|
+
)
|
|
102
|
+
# ssh ready for git.km3net.de
|
|
103
|
+
if km3net_gitlab_need:
|
|
104
|
+
logger.info("Check ssh connection with git.km3net.de ...")
|
|
105
|
+
shell("ssh -T git@git.km3net.de")
|
|
106
|
+
else:
|
|
107
|
+
logger.info(
|
|
108
|
+
"No jobs requiring km3net gitlab socket in the DAG, skip the connectivity check."
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
except Exception as E:
|
|
112
|
+
logger.error("Failed, exit")
|
|
113
|
+
raise E
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def default(workflow, config):
|
|
117
|
+
"""Wrapper for onstart action"""
|
|
118
|
+
|
|
119
|
+
logger.info("-" * 20 + " On start checks " + "-" * 20)
|
|
120
|
+
|
|
121
|
+
check_ressources_availability(workflow, config)
|
|
122
|
+
|
|
123
|
+
logger.info("-" * 20 + " Completed " + "-" * 20)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def default_rbdp(workflow, config):
|
|
127
|
+
"""Wrapper for onstart action in run-based data-processing context"""
|
|
128
|
+
|
|
129
|
+
logger.info("-" * 20 + " On start checks " + "-" * 20)
|
|
130
|
+
|
|
131
|
+
check_ressources_availability(workflow, config)
|
|
132
|
+
bookkeep_inputs(workflow, config)
|
|
133
|
+
|
|
134
|
+
logger.info("-" * 20 + " Completed " + "-" * 20)
|
km3smk/tools.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utilities to operate Snakemake workflows
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import pprint
|
|
7
|
+
import typing
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
from logging import Logger
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def update_scope(target: dict, default: dict):
|
|
13
|
+
"""
|
|
14
|
+
Recursive function to update a sub dictionnary
|
|
15
|
+
"""
|
|
16
|
+
mod = {}
|
|
17
|
+
for key in default.keys():
|
|
18
|
+
if key not in target:
|
|
19
|
+
target[key] = default[key]
|
|
20
|
+
mod[key] = default[key]
|
|
21
|
+
elif isinstance(default[key], dict):
|
|
22
|
+
changes = update_scope(target[key], default[key])
|
|
23
|
+
if len(changes) != 0:
|
|
24
|
+
mod[key] = changes
|
|
25
|
+
else:
|
|
26
|
+
continue
|
|
27
|
+
return mod
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def set_default_config(
|
|
31
|
+
default: dict,
|
|
32
|
+
config: dict,
|
|
33
|
+
scope: str = "<not specified>",
|
|
34
|
+
logger: typing.Optional[Logger] = None,
|
|
35
|
+
):
|
|
36
|
+
"""
|
|
37
|
+
Function taking a dictionnary representing the default
|
|
38
|
+
configuration and updating the missing config with the default
|
|
39
|
+
values
|
|
40
|
+
"""
|
|
41
|
+
updated = update_scope(config, default)
|
|
42
|
+
|
|
43
|
+
if logger and updated:
|
|
44
|
+
logger.warning(
|
|
45
|
+
f"Missing config values added in {scope}. Run with --verbose to see which"
|
|
46
|
+
)
|
|
47
|
+
logger.debug("Config key:values updated:")
|
|
48
|
+
logger.debug(pprint.pformat(updated, width=1, indent=2))
|
|
49
|
+
|
|
50
|
+
return updated
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def parse_runs_list(runslist_file):
|
|
54
|
+
"""
|
|
55
|
+
Parse runs list file
|
|
56
|
+
|
|
57
|
+
:parameter: - ``runslist_file`` containing detid:run pairs, space separated, 8 digits numbers
|
|
58
|
+
|
|
59
|
+
:return: - ``dict`` with detid:runs pairs
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
runs = {}
|
|
63
|
+
with open(runslist_file) as fin:
|
|
64
|
+
for line in fin:
|
|
65
|
+
match = re.match(r"(?P<detid>\d{8})\s+(?P<run>\d{8}).*", line)
|
|
66
|
+
if match is None:
|
|
67
|
+
continue
|
|
68
|
+
detid = match.group("detid")
|
|
69
|
+
run = match.group("run")
|
|
70
|
+
|
|
71
|
+
if detid not in runs:
|
|
72
|
+
runs[detid] = []
|
|
73
|
+
|
|
74
|
+
runs[detid].append(run)
|
|
75
|
+
|
|
76
|
+
return runs
|
km3smk/version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.2.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 2, 0)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = 'g02cb70992'
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from snakemake.logging import logger
|
|
3
|
+
from snakemake_interface_executor_plugins.settings import DeploymentMethod
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def default(workflow):
|
|
7
|
+
set_apptainer_args(workflow)
|
|
8
|
+
set_default_ressources(workflow)
|
|
9
|
+
set_default_cache_directory(workflow)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def set_apptainer_args(workflow):
|
|
13
|
+
"""
|
|
14
|
+
Set apptainer arguments for the workflow
|
|
15
|
+
"""
|
|
16
|
+
# - Force the use of apptainer
|
|
17
|
+
workflow.deployment_settings.deployment_method.add(DeploymentMethod.APPTAINER)
|
|
18
|
+
|
|
19
|
+
# - Using apptainer, does not export host environment
|
|
20
|
+
workflow.deployment_settings.apptainer_args += " --cleanenv"
|
|
21
|
+
# - Export proxy variables if sets:
|
|
22
|
+
if "http_proxy" in os.environ:
|
|
23
|
+
workflow.deployment_settings.apptainer_args += (
|
|
24
|
+
" --env http_proxy=" + os.environ["http_proxy"]
|
|
25
|
+
)
|
|
26
|
+
if "https_proxy" in os.environ:
|
|
27
|
+
workflow.deployment_settings.apptainer_args += (
|
|
28
|
+
" --env https_proxy=" + os.environ["https_proxy"]
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# For some reason, if only one arg is provided we need quotes
|
|
32
|
+
# if " " not in workflow.deployment_settings.apptainer_args:
|
|
33
|
+
# workflow.deployment_settings.apptainer_args = (
|
|
34
|
+
# '"' + workflow.deployment_settings.apptainer_args + '"'
|
|
35
|
+
# )
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def set_default_ressources(workflow):
|
|
39
|
+
"""If not manually set, fix limit on ressources"""
|
|
40
|
+
|
|
41
|
+
# Only set resources from the steering process
|
|
42
|
+
if not workflow.is_main_process:
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
# If not manually set, limit irods/xrootd/km3db to 5e simultaneous jobs
|
|
46
|
+
if "irods_socket" not in workflow.global_resources:
|
|
47
|
+
workflow.global_resources["irods_socket"] = 5
|
|
48
|
+
if "xrootd_socket" not in workflow.global_resources:
|
|
49
|
+
workflow.global_resources["xrootd_socket"] = 5
|
|
50
|
+
if "km3net_db_socket" not in workflow.global_resources:
|
|
51
|
+
workflow.global_resources["km3net_db_socket"] = 5
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def set_default_cache_directory(workflow):
|
|
55
|
+
"""set XDG_CACHE_HOME, where sources are cached, to point in workdir"""
|
|
56
|
+
if "XDG_CACHE_HOME" not in os.environ:
|
|
57
|
+
os.environ["XDG_CACHE_HOME"] = f"{workflow.workdir_init}/.snakemake_cache"
|
|
58
|
+
logger.info(f"Set XDG_CACHE_HOME to {os.environ['XDG_CACHE_HOME']}")
|
|
59
|
+
else:
|
|
60
|
+
logger.info(f"XDG_CACHE_HOME already set to {os.environ['XDG_CACHE_HOME']}")
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) YEAR, The KM3NeT collaboration
|
|
4
|
+
All rights reserved.
|
|
5
|
+
|
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
|
8
|
+
|
|
9
|
+
* Redistributions of source code must retain the above copyright notice, this
|
|
10
|
+
list of conditions and the following disclaimer.
|
|
11
|
+
|
|
12
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
and/or other materials provided with the distribution.
|
|
15
|
+
|
|
16
|
+
* Neither the name of the copyright holder nor the names of its
|
|
17
|
+
contributors may be used to endorse or promote products derived from
|
|
18
|
+
this software without specific prior written permission.
|
|
19
|
+
|
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
21
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
22
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
23
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
24
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
25
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
26
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
27
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
28
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
29
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: km3smk
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: The km3smk project
|
|
5
|
+
Home-page: https://git.km3net.de/vpestel/km3smk
|
|
6
|
+
Author: Valentin Pestel
|
|
7
|
+
Author-email: vpestel@km3net.de
|
|
8
|
+
Maintainer: Valentin Pestel
|
|
9
|
+
Maintainer-email: vpestel@km3net.de
|
|
10
|
+
License: MIT
|
|
11
|
+
Keywords: neutrino,astroparticle,physics,HEP
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.6
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
25
|
+
Classifier: Topic :: Scientific/Engineering
|
|
26
|
+
Requires-Python: >=3.6
|
|
27
|
+
Description-Content-Type: text/x-rst
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: setuptools-scm
|
|
30
|
+
Requires-Dist: km3db>=0.14.1
|
|
31
|
+
Requires-Dist: pandas
|
|
32
|
+
Provides-Extra: all
|
|
33
|
+
Requires-Dist: black; extra == "all"
|
|
34
|
+
Requires-Dist: matplotlib; extra == "all"
|
|
35
|
+
Requires-Dist: numpydoc; extra == "all"
|
|
36
|
+
Requires-Dist: ipykernel; extra == "all"
|
|
37
|
+
Requires-Dist: pillow; extra == "all"
|
|
38
|
+
Requires-Dist: pytest; extra == "all"
|
|
39
|
+
Requires-Dist: pytest-cov; extra == "all"
|
|
40
|
+
Requires-Dist: pytest-flake8; extra == "all"
|
|
41
|
+
Requires-Dist: pylint; extra == "all"
|
|
42
|
+
Requires-Dist: pytest-watch; extra == "all"
|
|
43
|
+
Requires-Dist: sphinx; extra == "all"
|
|
44
|
+
Requires-Dist: sphinx-autoapi; extra == "all"
|
|
45
|
+
Requires-Dist: sphinx-gallery>=0.12.1; extra == "all"
|
|
46
|
+
Requires-Dist: sphinx-rtd-theme; extra == "all"
|
|
47
|
+
Requires-Dist: sphinxcontrib-versioning; extra == "all"
|
|
48
|
+
Requires-Dist: wheel; extra == "all"
|
|
49
|
+
Provides-Extra: dev
|
|
50
|
+
Requires-Dist: black; extra == "dev"
|
|
51
|
+
Requires-Dist: matplotlib; extra == "dev"
|
|
52
|
+
Requires-Dist: numpydoc; extra == "dev"
|
|
53
|
+
Requires-Dist: ipykernel; extra == "dev"
|
|
54
|
+
Requires-Dist: pillow; extra == "dev"
|
|
55
|
+
Requires-Dist: pytest; extra == "dev"
|
|
56
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
57
|
+
Requires-Dist: pytest-flake8; extra == "dev"
|
|
58
|
+
Requires-Dist: pylint; extra == "dev"
|
|
59
|
+
Requires-Dist: pytest-watch; extra == "dev"
|
|
60
|
+
Requires-Dist: sphinx; extra == "dev"
|
|
61
|
+
Requires-Dist: sphinx-autoapi; extra == "dev"
|
|
62
|
+
Requires-Dist: sphinx-gallery>=0.1.12; extra == "dev"
|
|
63
|
+
Requires-Dist: sphinx-rtd-theme; extra == "dev"
|
|
64
|
+
Requires-Dist: sphinxcontrib-versioning; extra == "dev"
|
|
65
|
+
Requires-Dist: wheel; extra == "dev"
|
|
66
|
+
|
|
67
|
+
The km3smk project
|
|
68
|
+
==================
|
|
69
|
+
|
|
70
|
+
.. image:: https://git.km3net.de/vpestel/km3smk/badges/master/pipeline.svg
|
|
71
|
+
:target: https://git.km3net.de/vpestel/km3smk/pipelines
|
|
72
|
+
|
|
73
|
+
.. image:: https://git.km3net.de/vpestel/km3smk/badges/master/coverage.svg
|
|
74
|
+
:target: https://vpestel.pages.km3net.de/km3smk/coverage
|
|
75
|
+
|
|
76
|
+
.. image:: https://git.km3net.de/examples/km3badges/-/raw/master/docs-latest-brightgreen.svg
|
|
77
|
+
:target: https://vpestel.pages.km3net.de/km3smk
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
Installation
|
|
81
|
+
~~~~~~~~~~~~
|
|
82
|
+
|
|
83
|
+
It is recommended to first create an isolated virtualenvironment to not interfere
|
|
84
|
+
with other Python projects::
|
|
85
|
+
|
|
86
|
+
git clone https://git.km3net.de/vpestel/km3smk
|
|
87
|
+
cd km3smk
|
|
88
|
+
python3 -m venv venv
|
|
89
|
+
. venv/bin/activate
|
|
90
|
+
|
|
91
|
+
Install directly from the Git server via ``pip`` (no cloneing needed)::
|
|
92
|
+
|
|
93
|
+
pip install git+https://git.km3net.de/vpestel/km3smk
|
|
94
|
+
|
|
95
|
+
Or clone the repository and run::
|
|
96
|
+
|
|
97
|
+
make install
|
|
98
|
+
|
|
99
|
+
To install all the development dependencies, in case you want to contribute or
|
|
100
|
+
run the test suite::
|
|
101
|
+
|
|
102
|
+
make install-dev
|
|
103
|
+
make test
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
*Created with ``cookiecutter https://git.km3net.de/templates/python-project``*
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
km3smk/__init__.py,sha256=9AR7yrhn9i1s4HWWPGJ6kI4epXHYAAEidD7T8FBoaD4,315
|
|
2
|
+
km3smk/bookkeeping.py,sha256=v50CpjiZFA-yBSkrTvWoINSYhrJkYCGdS5vx8jTKbiQ,1636
|
|
3
|
+
km3smk/onerror.py,sha256=W3ZH8IrEbkXoBhQ549yZ9QvkfzkvBFjLbCVAUXr1kuI,8407
|
|
4
|
+
km3smk/onstart.py,sha256=In2DRG__7pkMYflPMl4jguudDGUsnjDrfYz-zEq6JEw,4488
|
|
5
|
+
km3smk/tools.py,sha256=G1NAaZBYAG4bg-PMvsikDml4_-xgUoy5KWOuqq4sSZU,1903
|
|
6
|
+
km3smk/version.py,sha256=6qOLasbz9wqAxgrvUUaYPkWNHbPSBllRoUk8-vUtduI,712
|
|
7
|
+
km3smk/workflow_settings.py,sha256=NZg-tGmrpvj2grdyeyxrh7tDlHKlvSixSPOHV2dkmeQ,2275
|
|
8
|
+
km3smk-0.2.0.dist-info/LICENSE,sha256=r6jdhbSLttMxFBglePOJ-GFlTIO_WLBMZt7gghKtUSk,1524
|
|
9
|
+
km3smk-0.2.0.dist-info/METADATA,sha256=IROEP02hZDbvZKvJnmkvMznxOuv5RABarfM-MmXD4DY,3669
|
|
10
|
+
km3smk-0.2.0.dist-info/WHEEL,sha256=YqkG3aKUxtNWfdKBz3DzkIakbboD_OAzmgL1ryH-ru4,110
|
|
11
|
+
km3smk-0.2.0.dist-info/top_level.txt,sha256=8l0LYESsvTJMKF0MfyRQVP8hZeXvaGm9BYyCfbdz-Ls,7
|
|
12
|
+
km3smk-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
km3smk
|