looper 1.5.0__py3-none-any.whl → 1.6.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- looper/__init__.py +3 -498
- looper/__main__.py +2 -2
- looper/_version.py +1 -1
- looper/cli_divvy.py +182 -0
- looper/cli_looper.py +776 -0
- looper/conductor.py +53 -206
- looper/const.py +51 -3
- looper/divvy.py +28 -196
- looper/exceptions.py +18 -0
- looper/looper.py +177 -612
- looper/plugins.py +160 -0
- looper/processed_project.py +1 -1
- looper/project.py +229 -117
- looper/utils.py +119 -43
- {looper-1.5.0.dist-info → looper-1.6.0a1.dist-info}/METADATA +6 -6
- {looper-1.5.0.dist-info → looper-1.6.0a1.dist-info}/RECORD +20 -20
- {looper-1.5.0.dist-info → looper-1.6.0a1.dist-info}/WHEEL +1 -1
- looper/html_reports.py +0 -1057
- looper/html_reports_pipestat.py +0 -924
- looper/html_reports_project_pipestat.py +0 -269
- {looper-1.5.0.dist-info → looper-1.6.0a1.dist-info}/LICENSE.txt +0 -0
- {looper-1.5.0.dist-info → looper-1.6.0a1.dist-info}/entry_points.txt +0 -0
- {looper-1.5.0.dist-info → looper-1.6.0a1.dist-info}/top_level.txt +0 -0
looper/plugins.py
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
from .const import *
|
4
|
+
from .conductor import _get_yaml_path
|
5
|
+
|
6
|
+
_LOGGER = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
|
9
|
+
def write_sample_yaml_prj(namespaces):
|
10
|
+
"""
|
11
|
+
Plugin: saves sample representation with project reference to YAML.
|
12
|
+
|
13
|
+
This plugin can be parametrized by providing the path value/template in
|
14
|
+
'pipeline.var_templates.sample_yaml_prj_path'. This needs to be a complete and
|
15
|
+
absolute path to the file where sample YAML representation is to be
|
16
|
+
stored.
|
17
|
+
|
18
|
+
:param dict namespaces: variable namespaces dict
|
19
|
+
:return dict: sample namespace dict
|
20
|
+
"""
|
21
|
+
sample = namespaces["sample"]
|
22
|
+
sample.to_yaml(
|
23
|
+
_get_yaml_path(namespaces, SAMPLE_YAML_PRJ_PATH_KEY, "_sample_prj"),
|
24
|
+
add_prj_ref=True,
|
25
|
+
)
|
26
|
+
return {"sample": sample}
|
27
|
+
|
28
|
+
|
29
|
+
def write_custom_template(namespaces):
|
30
|
+
"""
|
31
|
+
Plugin: Populates a user-provided jinja template
|
32
|
+
|
33
|
+
Parameterize by providing pipeline.var_templates.custom_template
|
34
|
+
"""
|
35
|
+
|
36
|
+
def load_template(pipeline):
|
37
|
+
with open(namespaces["pipeline"]["var_templates"]["custom_template"], "r") as f:
|
38
|
+
x = f.read()
|
39
|
+
t = jinja2.Template(x)
|
40
|
+
return t
|
41
|
+
|
42
|
+
err_msg = (
|
43
|
+
"Custom template plugin requires a template in var_templates.custom_template"
|
44
|
+
)
|
45
|
+
if "var_templates" not in namespaces["pipeline"].keys():
|
46
|
+
_LOGGER.error(err_msg)
|
47
|
+
return None
|
48
|
+
|
49
|
+
if "custom_template" not in namespaces["pipeline"]["var_templates"].keys():
|
50
|
+
_LOGGER.error(err_msg)
|
51
|
+
return None
|
52
|
+
|
53
|
+
import jinja2
|
54
|
+
|
55
|
+
tpl = load_template(namespaces["pipeline"])
|
56
|
+
content = tpl.render(namespaces)
|
57
|
+
pth = _get_yaml_path(namespaces, "custom_template_output", "config")
|
58
|
+
namespaces["sample"]["custom_template_output"] = pth
|
59
|
+
with open(pth, "wb") as fh:
|
60
|
+
# print(content)
|
61
|
+
fh.write(content.encode())
|
62
|
+
|
63
|
+
return {"sample": namespaces["sample"]}
|
64
|
+
|
65
|
+
|
66
|
+
def write_sample_yaml_cwl(namespaces):
|
67
|
+
"""
|
68
|
+
Plugin: Produce a cwl-compatible yaml representation of the sample
|
69
|
+
|
70
|
+
Also adds the 'cwl_yaml' attribute to sample objects, which points
|
71
|
+
to the file produced.
|
72
|
+
|
73
|
+
This plugin can be parametrized by providing the path value/template in
|
74
|
+
'pipeline.var_templates.sample_cwl_yaml_path'. This needs to be a complete and
|
75
|
+
absolute path to the file where sample YAML representation is to be
|
76
|
+
stored.
|
77
|
+
|
78
|
+
:param dict namespaces: variable namespaces dict
|
79
|
+
:return dict: updated variable namespaces dict
|
80
|
+
"""
|
81
|
+
from eido import read_schema
|
82
|
+
from ubiquerg import is_url
|
83
|
+
|
84
|
+
def _get_schema_source(
|
85
|
+
schema_source, piface_dir=namespaces["looper"]["piface_dir"]
|
86
|
+
):
|
87
|
+
# Stolen from piface object; should be a better way to do this...
|
88
|
+
if is_url(schema_source):
|
89
|
+
return schema_source
|
90
|
+
elif not os.path.isabs(schema_source):
|
91
|
+
schema_source = os.path.join(piface_dir, schema_source)
|
92
|
+
return schema_source
|
93
|
+
|
94
|
+
# To be compatible as a CWL job input, we need to handle the
|
95
|
+
# File and Directory object types directly.
|
96
|
+
sample = namespaces["sample"]
|
97
|
+
sample.sample_yaml_cwl = _get_yaml_path(
|
98
|
+
namespaces, SAMPLE_CWL_YAML_PATH_KEY, "_sample_cwl"
|
99
|
+
)
|
100
|
+
|
101
|
+
if "input_schema" in namespaces["pipeline"]:
|
102
|
+
schema_path = _get_schema_source(namespaces["pipeline"]["input_schema"])
|
103
|
+
file_list = []
|
104
|
+
for ischema in read_schema(schema_path):
|
105
|
+
if "files" in ischema["properties"]["samples"]["items"]:
|
106
|
+
file_list.extend(ischema["properties"]["samples"]["items"]["files"])
|
107
|
+
|
108
|
+
for file_attr in file_list:
|
109
|
+
_LOGGER.debug("CWL-ing file attribute: {}".format(file_attr))
|
110
|
+
file_attr_value = sample[file_attr]
|
111
|
+
# file paths are assumed relative to the sample table;
|
112
|
+
# but CWL assumes they are relative to the yaml output file,
|
113
|
+
# so we convert here.
|
114
|
+
file_attr_rel = os.path.relpath(
|
115
|
+
file_attr_value, os.path.dirname(sample.sample_yaml_cwl)
|
116
|
+
)
|
117
|
+
sample[file_attr] = {"class": "File", "path": file_attr_rel}
|
118
|
+
|
119
|
+
directory_list = []
|
120
|
+
for ischema in read_schema(schema_path):
|
121
|
+
if "directories" in ischema["properties"]["samples"]["items"]:
|
122
|
+
directory_list.extend(
|
123
|
+
ischema["properties"]["samples"]["items"]["directories"]
|
124
|
+
)
|
125
|
+
|
126
|
+
for dir_attr in directory_list:
|
127
|
+
_LOGGER.debug("CWL-ing directory attribute: {}".format(dir_attr))
|
128
|
+
dir_attr_value = sample[dir_attr]
|
129
|
+
# file paths are assumed relative to the sample table;
|
130
|
+
# but CWL assumes they are relative to the yaml output file,
|
131
|
+
# so we convert here.
|
132
|
+
sample[dir_attr] = {"class": "Directory", "location": dir_attr_value}
|
133
|
+
else:
|
134
|
+
_LOGGER.warning(
|
135
|
+
"No 'input_schema' defined, producing a regular "
|
136
|
+
"sample YAML representation"
|
137
|
+
)
|
138
|
+
_LOGGER.info("Writing sample yaml to {}".format(sample.sample_yaml_cwl))
|
139
|
+
sample.to_yaml(sample.sample_yaml_cwl)
|
140
|
+
return {"sample": sample}
|
141
|
+
|
142
|
+
|
143
|
+
def write_sample_yaml(namespaces):
|
144
|
+
"""
|
145
|
+
Plugin: saves sample representation to YAML.
|
146
|
+
|
147
|
+
This plugin can be parametrized by providing the path value/template in
|
148
|
+
'pipeline.var_templates.sample_yaml_path'. This needs to be a complete and
|
149
|
+
absolute path to the file where sample YAML representation is to be
|
150
|
+
stored.
|
151
|
+
|
152
|
+
:param dict namespaces: variable namespaces dict
|
153
|
+
:return dict: sample namespace dict
|
154
|
+
"""
|
155
|
+
sample = namespaces["sample"]
|
156
|
+
sample["sample_yaml_path"] = _get_yaml_path(
|
157
|
+
namespaces, SAMPLE_YAML_PATH_KEY, "_sample"
|
158
|
+
)
|
159
|
+
sample.to_yaml(sample["sample_yaml_path"], add_prj_ref=False)
|
160
|
+
return {"sample": sample}
|
looper/processed_project.py
CHANGED
@@ -203,7 +203,7 @@ def populate_sample_paths(sample, schema, check_exist=False):
|
|
203
203
|
raise TypeError("Can only populate paths in peppy.Sample objects")
|
204
204
|
# schema = schema[-1] # use only first schema, in case there are imports
|
205
205
|
if PROP_KEY in schema and "samples" in schema[PROP_KEY]:
|
206
|
-
_populate_paths(sample, schema
|
206
|
+
_populate_paths(sample, schema, check_exist)
|
207
207
|
|
208
208
|
|
209
209
|
def populate_project_paths(project, schema, check_exist=False):
|
looper/project.py
CHANGED
@@ -19,6 +19,8 @@ from peppy import Project as peppyProject
|
|
19
19
|
from peppy.utils import make_abs_via_cfg
|
20
20
|
from pipestat import PipestatError, PipestatManager
|
21
21
|
from ubiquerg import expandpath, is_command_callable
|
22
|
+
from yacman import YAMLConfigManager
|
23
|
+
from .conductor import write_pipestat_config
|
22
24
|
|
23
25
|
from .exceptions import *
|
24
26
|
from .pipeline_interface import PipelineInterface
|
@@ -34,7 +36,13 @@ class ProjectContext(object):
|
|
34
36
|
"""Wrap a Project to provide protocol-specific Sample selection."""
|
35
37
|
|
36
38
|
def __init__(
|
37
|
-
self,
|
39
|
+
self,
|
40
|
+
prj,
|
41
|
+
selector_attribute=None,
|
42
|
+
selector_include=None,
|
43
|
+
selector_exclude=None,
|
44
|
+
selector_flag=None,
|
45
|
+
exclusion_flag=None,
|
38
46
|
):
|
39
47
|
"""Project and what to include/exclude defines the context."""
|
40
48
|
if not isinstance(selector_attribute, str):
|
@@ -46,6 +54,8 @@ class ProjectContext(object):
|
|
46
54
|
self.include = selector_include
|
47
55
|
self.exclude = selector_exclude
|
48
56
|
self.attribute = selector_attribute
|
57
|
+
self.selector_flag = selector_flag
|
58
|
+
self.exclusion_flag = exclusion_flag
|
49
59
|
|
50
60
|
def __getattr__(self, item):
|
51
61
|
"""Samples are context-specific; other requests are handled
|
@@ -56,13 +66,18 @@ class ProjectContext(object):
|
|
56
66
|
selector_attribute=self.attribute,
|
57
67
|
selector_include=self.include,
|
58
68
|
selector_exclude=self.exclude,
|
69
|
+
selector_flag=self.selector_flag,
|
70
|
+
exclusion_flag=self.exclusion_flag,
|
59
71
|
)
|
60
72
|
if item in ["prj", "include", "exclude"]:
|
61
73
|
# Attributes requests that this context/wrapper handles
|
62
74
|
return self.__dict__[item]
|
63
75
|
else:
|
64
76
|
# Dispatch attribute request to Project.
|
65
|
-
|
77
|
+
if hasattr(self.prj, item):
|
78
|
+
return getattr(self.prj, item)
|
79
|
+
else:
|
80
|
+
return self.prj.get(item)
|
66
81
|
|
67
82
|
def __getitem__(self, item):
|
68
83
|
"""Provide the Mapping-like item access to the instance's Project."""
|
@@ -101,13 +116,17 @@ class Project(peppyProject):
|
|
101
116
|
):
|
102
117
|
super(Project, self).__init__(cfg=cfg, amendments=amendments)
|
103
118
|
prj_dict = kwargs.get("project_dict")
|
119
|
+
pep_config = kwargs.get("pep_config", None)
|
120
|
+
if pep_config:
|
121
|
+
self["pep_config"] = pep_config
|
104
122
|
|
105
|
-
# init project from pephub:
|
123
|
+
# init project from pephub pep_config:
|
106
124
|
if prj_dict is not None and cfg is None:
|
107
125
|
self.from_dict(prj_dict)
|
108
|
-
self["_config_file"] = os.getcwd()
|
126
|
+
self["_config_file"] = os.getcwd() # for finding pipeline interface
|
127
|
+
self["pep_config"] = pep_config
|
109
128
|
|
110
|
-
|
129
|
+
self[EXTRA_KEY] = {}
|
111
130
|
|
112
131
|
# add sample pipeline interface to the project
|
113
132
|
if kwargs.get(SAMPLE_PL_ARG):
|
@@ -115,7 +134,8 @@ class Project(peppyProject):
|
|
115
134
|
|
116
135
|
for attr_name in CLI_PROJ_ATTRS:
|
117
136
|
if attr_name in kwargs:
|
118
|
-
|
137
|
+
self[EXTRA_KEY][attr_name] = kwargs[attr_name]
|
138
|
+
# setattr(self[EXTRA_KEY], attr_name, kwargs[attr_name])
|
119
139
|
self._samples_by_interface = self._samples_by_piface(self.piface_key)
|
120
140
|
self._interfaces_by_sample = self._piface_by_samples()
|
121
141
|
self.linked_sample_interfaces = self._get_linked_pifaces()
|
@@ -128,7 +148,7 @@ class Project(peppyProject):
|
|
128
148
|
if divcfg_path is None
|
129
149
|
else ComputingConfiguration(filepath=divcfg_path)
|
130
150
|
)
|
131
|
-
if
|
151
|
+
if DRY_RUN_KEY in self and not self[DRY_RUN_KEY]:
|
132
152
|
_LOGGER.debug("Ensuring project directories exist")
|
133
153
|
self.make_project_dirs()
|
134
154
|
|
@@ -184,7 +204,8 @@ class Project(peppyProject):
|
|
184
204
|
found
|
185
205
|
"""
|
186
206
|
try:
|
187
|
-
result =
|
207
|
+
result = self[EXTRA_KEY][attr_name]
|
208
|
+
# getattr(self[EXTRA_KEY], attr_name))
|
188
209
|
except (AttributeError, KeyError):
|
189
210
|
pass
|
190
211
|
else:
|
@@ -452,12 +473,14 @@ class Project(peppyProject):
|
|
452
473
|
"""
|
453
474
|
try:
|
454
475
|
if project_level:
|
455
|
-
self._get_pipestat_configuration(
|
476
|
+
pipestat_configured = self._get_pipestat_configuration(
|
456
477
|
sample_name=None, project_level=project_level
|
457
478
|
)
|
458
479
|
else:
|
459
480
|
for s in self.samples:
|
460
|
-
self._get_pipestat_configuration(
|
481
|
+
pipestat_configured = self._get_pipestat_configuration(
|
482
|
+
sample_name=s.sample_name
|
483
|
+
)
|
461
484
|
except Exception as e:
|
462
485
|
context = (
|
463
486
|
f"Project '{self.name}'"
|
@@ -469,92 +492,105 @@ class Project(peppyProject):
|
|
469
492
|
f"caught exception: {getattr(e, 'message', repr(e))}"
|
470
493
|
)
|
471
494
|
return False
|
472
|
-
|
495
|
+
else:
|
496
|
+
if pipestat_configured is not None and pipestat_configured != {}:
|
497
|
+
return True
|
498
|
+
else:
|
499
|
+
return False
|
473
500
|
|
474
501
|
def _get_pipestat_configuration(self, sample_name=None, project_level=False):
|
475
502
|
"""
|
476
|
-
Get all required pipestat configuration variables
|
503
|
+
Get all required pipestat configuration variables from looper_config file
|
477
504
|
"""
|
478
505
|
|
479
|
-
def _get_val_from_attr(pipestat_sect, object, attr_name, default, no_err=False):
|
480
|
-
"""
|
481
|
-
Get configuration value from an object's attribute or return default
|
482
|
-
|
483
|
-
:param dict pipestat_sect: pipestat section for sample or project
|
484
|
-
:param peppy.Sample | peppy.Project object: object to get the
|
485
|
-
configuration values for
|
486
|
-
:param str attr_name: attribute name with the value to retrieve
|
487
|
-
:param str default: default attribute name
|
488
|
-
:param bool no_err: do not raise error in case the attribute is missing,
|
489
|
-
in order to use the values specified in a different way, e.g. in pipestat config
|
490
|
-
:return str: retrieved configuration value
|
491
|
-
"""
|
492
|
-
if pipestat_sect is not None and attr_name in pipestat_sect:
|
493
|
-
return pipestat_sect[attr_name]
|
494
|
-
try:
|
495
|
-
return getattr(object, default)
|
496
|
-
except AttributeError:
|
497
|
-
if no_err:
|
498
|
-
return None
|
499
|
-
raise AttributeError(f"'{default}' attribute is missing")
|
500
|
-
|
501
506
|
ret = {}
|
502
507
|
if not project_level and sample_name is None:
|
503
508
|
raise ValueError(
|
504
509
|
"Must provide the sample_name to determine the "
|
505
510
|
"sample to get the PipestatManagers for"
|
506
511
|
)
|
507
|
-
|
508
|
-
if
|
509
|
-
|
510
|
-
and LOOPER_KEY in self[CONFIG_KEY]
|
511
|
-
and PIPESTAT_KEY in self[CONFIG_KEY][LOOPER_KEY]
|
512
|
-
and key in self[CONFIG_KEY][LOOPER_KEY][PIPESTAT_KEY]
|
513
|
-
):
|
514
|
-
pipestat_section = self[CONFIG_KEY][LOOPER_KEY][PIPESTAT_KEY][key]
|
512
|
+
|
513
|
+
if PIPESTAT_KEY in self[EXTRA_KEY]:
|
514
|
+
pipestat_config_dict = self[EXTRA_KEY][PIPESTAT_KEY]
|
515
515
|
else:
|
516
516
|
_LOGGER.debug(
|
517
517
|
f"'{PIPESTAT_KEY}' not found in '{LOOPER_KEY}' section of the "
|
518
|
-
f"project configuration file.
|
518
|
+
f"project configuration file."
|
519
519
|
)
|
520
|
-
|
521
|
-
|
522
|
-
pipestat_section,
|
523
|
-
self.config if project_level else self.get_sample(sample_name),
|
524
|
-
PIPESTAT_CONFIG_ATTR_KEY,
|
525
|
-
DEFAULT_PIPESTAT_CONFIG_ATTR,
|
526
|
-
True, # allow for missing pipestat cfg attr, the settings may be provided as Project/Sample attrs
|
527
|
-
)
|
520
|
+
# We cannot use pipestat without it being defined in the looper config file.
|
521
|
+
raise ValueError
|
528
522
|
|
529
|
-
|
523
|
+
# Expand paths in the event ENV variables were used in config files
|
524
|
+
output_dir = expandpath(self.output_dir)
|
525
|
+
|
526
|
+
# Get looper user configured items first and update the pipestat_config_dict
|
527
|
+
try:
|
528
|
+
results_file_path = expandpath(pipestat_config_dict["results_file_path"])
|
529
|
+
if not os.path.exists(os.path.dirname(results_file_path)):
|
530
|
+
results_file_path = os.path.join(
|
531
|
+
os.path.dirname(output_dir), results_file_path
|
532
|
+
)
|
533
|
+
pipestat_config_dict.update({"results_file_path": results_file_path})
|
534
|
+
except KeyError:
|
535
|
+
results_file_path = None
|
536
|
+
|
537
|
+
try:
|
538
|
+
flag_file_dir = expandpath(pipestat_config_dict["flag_file_dir"])
|
539
|
+
if not os.path.isabs(flag_file_dir):
|
540
|
+
flag_file_dir = os.path.join(os.path.dirname(output_dir), flag_file_dir)
|
541
|
+
pipestat_config_dict.update({"flag_file_dir": flag_file_dir})
|
542
|
+
except KeyError:
|
543
|
+
flag_file_dir = None
|
544
|
+
|
545
|
+
if sample_name:
|
546
|
+
pipestat_config_dict.update({"record_identifier": sample_name})
|
547
|
+
|
548
|
+
if project_level and "project_name" in pipestat_config_dict:
|
549
|
+
pipestat_config_dict.update(
|
550
|
+
{"project_name": pipestat_config_dict["project_name"]}
|
551
|
+
)
|
552
|
+
|
553
|
+
if project_level and "{record_identifier}" in results_file_path:
|
554
|
+
# if project level and using {record_identifier}, pipestat needs some sort of record_identifier during creation
|
555
|
+
pipestat_config_dict.update(
|
556
|
+
{"record_identifier": "default_project_record_identifier"}
|
557
|
+
)
|
558
|
+
|
559
|
+
pipestat_config_dict.update({"output_dir": output_dir})
|
530
560
|
|
531
|
-
results_file_path = _get_val_from_attr(
|
532
|
-
pipestat_section,
|
533
|
-
self.config if project_level else self.get_sample(sample_name),
|
534
|
-
PIPESTAT_RESULTS_FILE_ATTR_KEY,
|
535
|
-
DEFAULT_PIPESTAT_RESULTS_FILE_ATTR,
|
536
|
-
pipestat_config and os.path.exists(pipestat_config),
|
537
|
-
)
|
538
|
-
if results_file_path is not None:
|
539
|
-
results_file_path = expandpath(results_file_path)
|
540
|
-
if not os.path.isabs(results_file_path):
|
541
|
-
results_file_path = os.path.join(self.output_dir, results_file_path)
|
542
561
|
pifaces = (
|
543
562
|
self.project_pipeline_interfaces
|
544
563
|
if project_level
|
545
564
|
else self._interfaces_by_sample[sample_name]
|
546
565
|
)
|
566
|
+
|
547
567
|
for piface in pifaces:
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
568
|
+
# We must also obtain additional pipestat items from the pipeline author's piface
|
569
|
+
if "output_schema" in piface.data:
|
570
|
+
schema_path = expandpath(piface.data["output_schema"])
|
571
|
+
if not os.path.isabs(schema_path):
|
572
|
+
# Get path relative to the pipeline_interface
|
573
|
+
schema_path = os.path.join(
|
574
|
+
os.path.dirname(piface.pipe_iface_file), schema_path
|
575
|
+
)
|
576
|
+
pipestat_config_dict.update({"schema_path": schema_path})
|
577
|
+
if "pipeline_name" in piface.data:
|
578
|
+
pipestat_config_dict.update(
|
579
|
+
{"pipeline_name": piface.data["pipeline_name"]}
|
580
|
+
)
|
581
|
+
if "pipeline_type" in piface.data:
|
582
|
+
pipestat_config_dict.update(
|
583
|
+
{"pipeline_type": piface.data["pipeline_type"]}
|
584
|
+
)
|
585
|
+
|
586
|
+
# Pipestat_dict_ is now updated from all sources and can be written to a yaml.
|
587
|
+
looper_pipestat_config_path = os.path.join(
|
588
|
+
os.path.dirname(output_dir), "looper_pipestat_config.yaml"
|
552
589
|
)
|
590
|
+
write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict)
|
591
|
+
|
553
592
|
ret[piface.pipeline_name] = {
|
554
|
-
"config_file":
|
555
|
-
"results_file_path": results_file_path,
|
556
|
-
"sample_name": rec_id,
|
557
|
-
"schema_path": piface.get_pipeline_schemas(OUTPUT_SCHEMA_KEY),
|
593
|
+
"config_file": looper_pipestat_config_path,
|
558
594
|
}
|
559
595
|
return ret
|
560
596
|
|
@@ -701,15 +737,20 @@ class Project(peppyProject):
|
|
701
737
|
|
702
738
|
:param list | str sample_piface: sample pipeline interface
|
703
739
|
"""
|
704
|
-
self.
|
705
|
-
self.
|
740
|
+
self.config.setdefault("sample_modifiers", {})
|
741
|
+
self.config["sample_modifiers"].setdefault("append", {})
|
706
742
|
self.config["sample_modifiers"]["append"]["pipeline_interfaces"] = sample_piface
|
707
743
|
|
708
744
|
self.modify_samples()
|
709
745
|
|
710
746
|
|
711
747
|
def fetch_samples(
|
712
|
-
prj,
|
748
|
+
prj,
|
749
|
+
selector_attribute=None,
|
750
|
+
selector_include=None,
|
751
|
+
selector_exclude=None,
|
752
|
+
selector_flag=None,
|
753
|
+
exclusion_flag=None,
|
713
754
|
):
|
714
755
|
"""
|
715
756
|
Collect samples of particular protocol(s).
|
@@ -730,6 +771,8 @@ def fetch_samples(
|
|
730
771
|
:param Iterable[str] | str selector_include: protocol(s) of interest;
|
731
772
|
if specified, a Sample must
|
732
773
|
:param Iterable[str] | str selector_exclude: protocol(s) to include
|
774
|
+
:param Iterable[str] | str selector_flag: flag to select on, e.g. FAILED, COMPLETED
|
775
|
+
:param Iterable[str] | str exclusion_flag: flag to exclude on, e.g. FAILED, COMPLETED
|
733
776
|
:return list[Sample]: Collection of this Project's samples with
|
734
777
|
protocol that either matches one of those in selector_include,
|
735
778
|
or either
|
@@ -741,10 +784,15 @@ def fetch_samples(
|
|
741
784
|
Python2;
|
742
785
|
also possible if name of attribute for selection isn't a string
|
743
786
|
"""
|
787
|
+
|
788
|
+
kept_samples = prj.samples
|
789
|
+
|
744
790
|
if not selector_include and not selector_exclude:
|
745
791
|
# Default case where user does not use selector_include or selector exclude.
|
746
792
|
# Assume that user wants to exclude samples if toggle = 0.
|
747
|
-
if any([hasattr(s, "toggle") for s in prj.samples]):
|
793
|
+
# if any([hasattr(s, "toggle") for s in prj.samples]):
|
794
|
+
# if any("toggle" in s for s in prj.samples):
|
795
|
+
if "toggle" in prj.samples[0]: # assume the samples have the same schema
|
748
796
|
selector_exclude = [0]
|
749
797
|
|
750
798
|
def keep(s):
|
@@ -753,9 +801,16 @@ def fetch_samples(
|
|
753
801
|
or getattr(s, selector_attribute) not in selector_exclude
|
754
802
|
)
|
755
803
|
|
756
|
-
|
804
|
+
kept_samples = list(filter(keep, prj.samples))
|
757
805
|
else:
|
758
|
-
|
806
|
+
kept_samples = prj.samples
|
807
|
+
|
808
|
+
# Intersection between selector_include and selector_exclude is
|
809
|
+
# nonsense user error.
|
810
|
+
if selector_include and selector_exclude:
|
811
|
+
raise TypeError(
|
812
|
+
"Specify only selector_include or selector_exclude parameter, " "not both."
|
813
|
+
)
|
759
814
|
|
760
815
|
if not isinstance(selector_attribute, str):
|
761
816
|
raise TypeError(
|
@@ -766,46 +821,103 @@ def fetch_samples(
|
|
766
821
|
|
767
822
|
# At least one of the samples has to have the specified attribute
|
768
823
|
if prj.samples and not any([hasattr(s, selector_attribute) for s in prj.samples]):
|
769
|
-
|
770
|
-
|
771
|
-
|
824
|
+
if selector_attribute == "toggle":
|
825
|
+
# this is the default, so silently pass.
|
826
|
+
pass
|
827
|
+
else:
|
828
|
+
raise AttributeError(
|
829
|
+
"The Project samples do not have the attribute '{attr}'".format(
|
830
|
+
attr=selector_attribute
|
831
|
+
)
|
772
832
|
)
|
773
|
-
)
|
774
833
|
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
834
|
+
if prj.samples:
|
835
|
+
# Use the attr check here rather than exception block in case the
|
836
|
+
# hypothetical AttributeError would occur; we want such
|
837
|
+
# an exception to arise, not to catch it as if the Sample lacks
|
838
|
+
# "protocol"
|
839
|
+
if not selector_include:
|
840
|
+
# Loose; keep all samples not in the selector_exclude.
|
841
|
+
def keep(s):
|
842
|
+
return not hasattr(s, selector_attribute) or getattr(
|
843
|
+
s, selector_attribute
|
844
|
+
) not in make_set(selector_exclude)
|
781
845
|
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
846
|
+
else:
|
847
|
+
# Strict; keep only samples in the selector_include.
|
848
|
+
def keep(s):
|
849
|
+
return hasattr(s, selector_attribute) and getattr(
|
850
|
+
s, selector_attribute
|
851
|
+
) in make_set(selector_include)
|
852
|
+
|
853
|
+
kept_samples = list(filter(keep, kept_samples))
|
854
|
+
|
855
|
+
if selector_flag and exclusion_flag:
|
856
|
+
raise TypeError("Specify only selector_flag or exclusion_flag not both.")
|
857
|
+
|
858
|
+
flags = selector_flag or exclusion_flag or None
|
859
|
+
if flags:
|
860
|
+
# Collect uppercase flags or error if not str
|
861
|
+
if not isinstance(flags, list):
|
862
|
+
flags = [str(flags)]
|
863
|
+
for flag in flags:
|
864
|
+
if not isinstance(flag, str):
|
865
|
+
raise TypeError(
|
866
|
+
f"Supplied flags must be a string! Flag:{flag} {type(flag)}"
|
867
|
+
)
|
868
|
+
flags.remove(flag)
|
869
|
+
flags.insert(0, flag.upper())
|
870
|
+
# Look for flags
|
871
|
+
# Is pipestat configured? Then, the user may have set the flag folder
|
872
|
+
if prj.pipestat_configured:
|
873
|
+
try:
|
874
|
+
flag_dir = expandpath(prj[EXTRA_KEY][PIPESTAT_KEY]["flag_file_dir"])
|
875
|
+
if not os.path.isabs(flag_dir):
|
876
|
+
flag_dir = os.path.join(
|
877
|
+
os.path.dirname(prj.output_dir), flag_dir
|
878
|
+
)
|
879
|
+
except KeyError:
|
880
|
+
_LOGGER.warning(
|
881
|
+
"Pipestat is configured but no flag_file_dir supplied, defaulting to output_dir"
|
882
|
+
)
|
883
|
+
flag_dir = prj.output_dir
|
884
|
+
else:
|
885
|
+
# if pipestat not configured, check the looper output dir
|
886
|
+
flag_dir = prj.output_dir
|
887
|
+
|
888
|
+
# Using flag_dir, search for flags:
|
889
|
+
for sample in kept_samples:
|
890
|
+
sample_pifaces = prj.get_sample_piface(sample[prj.sample_table_index])
|
891
|
+
pl_name = sample_pifaces[0].pipeline_name
|
892
|
+
flag_files = fetch_sample_flags(prj, sample, pl_name, flag_dir)
|
893
|
+
status = get_sample_status(sample.sample_name, flag_files)
|
894
|
+
sample.update({"status": status})
|
895
|
+
|
896
|
+
if not selector_flag:
|
897
|
+
# Loose; keep all samples not in the exclusion_flag.
|
898
|
+
def keep(s):
|
899
|
+
return not hasattr(s, "status") or getattr(
|
900
|
+
s, "status"
|
901
|
+
) not in make_set(flags)
|
902
|
+
|
903
|
+
else:
|
904
|
+
# Strict; keep only samples in the selector_flag
|
905
|
+
def keep(s):
|
906
|
+
return hasattr(s, "status") and getattr(s, "status") in make_set(
|
907
|
+
flags
|
908
|
+
)
|
909
|
+
|
910
|
+
kept_samples = list(filter(keep, kept_samples))
|
911
|
+
|
912
|
+
return kept_samples
|
913
|
+
|
914
|
+
|
915
|
+
def make_set(items):
|
916
|
+
try:
|
917
|
+
# Check if user input single integer value for inclusion/exclusion criteria
|
918
|
+
if len(items) == 1:
|
919
|
+
items = list(map(int, items)) # list(int(items[0]))
|
920
|
+
except:
|
921
|
+
if isinstance(items, str):
|
922
|
+
items = [items]
|
923
|
+
return items
|