looper 1.7.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- looper/__main__.py +1 -1
- looper/_version.py +2 -1
- looper/cli_divvy.py +10 -6
- looper/cli_pydantic.py +413 -0
- looper/command_models/DEVELOPER.md +85 -0
- looper/command_models/README.md +4 -0
- looper/command_models/__init__.py +6 -0
- looper/command_models/arguments.py +293 -0
- looper/command_models/commands.py +335 -0
- looper/conductor.py +147 -28
- looper/const.py +9 -0
- looper/divvy.py +56 -47
- looper/exceptions.py +9 -1
- looper/looper.py +196 -169
- looper/pipeline_interface.py +2 -12
- looper/project.py +154 -176
- looper/schemas/pipeline_interface_schema_generic.yaml +14 -6
- looper/utils.py +450 -78
- {looper-1.7.0.dist-info → looper-2.0.0.dist-info}/METADATA +24 -14
- {looper-1.7.0.dist-info → looper-2.0.0.dist-info}/RECORD +24 -19
- {looper-1.7.0.dist-info → looper-2.0.0.dist-info}/WHEEL +1 -1
- {looper-1.7.0.dist-info → looper-2.0.0.dist-info}/entry_points.txt +1 -1
- looper/cli_looper.py +0 -796
- {looper-1.7.0.dist-info → looper-2.0.0.dist-info}/LICENSE.txt +0 -0
- {looper-1.7.0.dist-info → looper-2.0.0.dist-info}/top_level.txt +0 -0
looper/project.py
CHANGED
@@ -3,29 +3,28 @@
|
|
3
3
|
import itertools
|
4
4
|
import os
|
5
5
|
|
6
|
+
from yaml import safe_load
|
7
|
+
|
6
8
|
try:
|
7
9
|
from functools import cached_property
|
8
10
|
except ImportError:
|
9
11
|
# cached_property was introduced in python 3.8
|
10
12
|
cached_property = property
|
11
|
-
from logging import getLogger
|
12
13
|
|
13
14
|
from .divvy import ComputingConfiguration
|
14
15
|
from eido import PathAttrNotFoundError, read_schema
|
15
16
|
from jsonschema import ValidationError
|
16
17
|
from pandas.core.common import flatten
|
17
|
-
from peppy import CONFIG_KEY, OUTDIR_KEY
|
18
|
-
from peppy import Project as peppyProject
|
19
18
|
from peppy.utils import make_abs_via_cfg
|
20
|
-
from pipestat import
|
21
|
-
|
22
|
-
from yacman import YAMLConfigManager
|
19
|
+
from pipestat import PipestatManager
|
20
|
+
|
23
21
|
from .conductor import write_pipestat_config
|
24
22
|
|
25
23
|
from .exceptions import *
|
26
24
|
from .pipeline_interface import PipelineInterface
|
27
25
|
from .processed_project import populate_project_paths, populate_sample_paths
|
28
26
|
from .utils import *
|
27
|
+
from .const import PipelineLevel
|
29
28
|
|
30
29
|
__all__ = ["Project"]
|
31
30
|
|
@@ -126,6 +125,12 @@ class Project(peppyProject):
|
|
126
125
|
|
127
126
|
self[EXTRA_KEY] = {}
|
128
127
|
|
128
|
+
try:
|
129
|
+
# For loading PEPs via CSV, Peppy cannot infer project name.
|
130
|
+
name = self.name
|
131
|
+
except NotImplementedError:
|
132
|
+
self.name = None
|
133
|
+
|
129
134
|
# add sample pipeline interface to the project
|
130
135
|
if kwargs.get(SAMPLE_PL_ARG):
|
131
136
|
self.set_sample_piface(kwargs.get(SAMPLE_PL_ARG))
|
@@ -144,7 +149,7 @@ class Project(peppyProject):
|
|
144
149
|
self.dcc = (
|
145
150
|
None
|
146
151
|
if divcfg_path is None
|
147
|
-
else ComputingConfiguration(filepath=divcfg_path)
|
152
|
+
else ComputingConfiguration.from_yaml_file(filepath=divcfg_path)
|
148
153
|
)
|
149
154
|
if DRY_RUN_KEY in self and not self[DRY_RUN_KEY]:
|
150
155
|
_LOGGER.debug("Ensuring project directories exist")
|
@@ -300,7 +305,7 @@ class Project(peppyProject):
|
|
300
305
|
:return list[looper.PipelineInterface]: list of pipeline interfaces
|
301
306
|
"""
|
302
307
|
return [
|
303
|
-
PipelineInterface(pi, pipeline_type=
|
308
|
+
PipelineInterface(pi, pipeline_type=PipelineLevel.PROJECT.value)
|
304
309
|
for pi in self.project_pipeline_interface_sources
|
305
310
|
]
|
306
311
|
|
@@ -343,7 +348,9 @@ class Project(peppyProject):
|
|
343
348
|
|
344
349
|
:return bool: whether pipestat configuration is complete
|
345
350
|
"""
|
346
|
-
return self._check_if_pipestat_configured(
|
351
|
+
return self._check_if_pipestat_configured(
|
352
|
+
pipeline_type=PipelineLevel.PROJECT.value
|
353
|
+
)
|
347
354
|
|
348
355
|
def get_sample_piface(self, sample_name):
|
349
356
|
"""
|
@@ -363,65 +370,6 @@ class Project(peppyProject):
|
|
363
370
|
except KeyError:
|
364
371
|
return None
|
365
372
|
|
366
|
-
def build_submission_bundles(self, protocol, priority=True):
|
367
|
-
"""
|
368
|
-
Create pipelines to submit for each sample of a particular protocol.
|
369
|
-
|
370
|
-
With the argument (flag) to the priority parameter, there's control
|
371
|
-
over whether to submit pipeline(s) from only one of the project's
|
372
|
-
known pipeline locations with a match for the protocol, or whether to
|
373
|
-
submit pipelines created from all locations with a match for the
|
374
|
-
protocol.
|
375
|
-
|
376
|
-
:param str protocol: name of the protocol/library for which to
|
377
|
-
create pipeline(s)
|
378
|
-
:param bool priority: to only submit pipeline(s) from the first of the
|
379
|
-
pipelines location(s) (indicated in the project config file) that
|
380
|
-
has a match for the given protocol; optional, default True
|
381
|
-
:return Iterable[(PipelineInterface, type, str, str)]:
|
382
|
-
:raises AssertionError: if there's a failure in the attempt to
|
383
|
-
partition an interface's pipeline scripts into disjoint subsets of
|
384
|
-
those already mapped and those not yet mapped
|
385
|
-
"""
|
386
|
-
|
387
|
-
if not priority:
|
388
|
-
raise NotImplementedError(
|
389
|
-
"Currently, only prioritized protocol mapping is supported "
|
390
|
-
"(i.e., pipeline interfaces collection is a prioritized list, "
|
391
|
-
"so only the first interface with a protocol match is used.)"
|
392
|
-
)
|
393
|
-
|
394
|
-
# Pull out the collection of interfaces (potentially one from each of
|
395
|
-
# the locations indicated in the project configuration file) as a
|
396
|
-
# sort of pool of information about possible ways in which to submit
|
397
|
-
# pipeline(s) for sample(s) of the indicated protocol.
|
398
|
-
pifaces = self.interfaces.get_pipeline_interface(protocol)
|
399
|
-
if not pifaces:
|
400
|
-
raise PipelineInterfaceConfigError(
|
401
|
-
"No interfaces for protocol: {}".format(protocol)
|
402
|
-
)
|
403
|
-
|
404
|
-
# coonvert to a list, in the future we might allow to match multiple
|
405
|
-
pifaces = pifaces if isinstance(pifaces, str) else [pifaces]
|
406
|
-
|
407
|
-
job_submission_bundles = []
|
408
|
-
new_jobs = []
|
409
|
-
|
410
|
-
_LOGGER.debug("Building pipelines matched by protocol: {}".format(protocol))
|
411
|
-
|
412
|
-
for pipe_iface in pifaces:
|
413
|
-
# Determine how to reference the pipeline and where it is.
|
414
|
-
path = pipe_iface["path"]
|
415
|
-
if not (os.path.exists(path) or is_command_callable(path)):
|
416
|
-
_LOGGER.warning("Missing pipeline script: {}".format(path))
|
417
|
-
continue
|
418
|
-
|
419
|
-
# Add this bundle to the collection of ones relevant for the
|
420
|
-
# current PipelineInterface.
|
421
|
-
new_jobs.append(pipe_iface)
|
422
|
-
job_submission_bundles.append(new_jobs)
|
423
|
-
return list(itertools.chain(*job_submission_bundles))
|
424
|
-
|
425
373
|
@staticmethod
|
426
374
|
def get_schemas(pifaces, schema_key=INPUT_SCHEMA_KEY):
|
427
375
|
"""
|
@@ -441,73 +389,95 @@ class Project(peppyProject):
|
|
441
389
|
schema_set.update([schema_file])
|
442
390
|
return list(schema_set)
|
443
391
|
|
444
|
-
def
|
445
|
-
"""
|
446
|
-
Get a collection of pipestat managers for the selected sample or project.
|
392
|
+
def _check_if_pipestat_configured(self, pipeline_type=PipelineLevel.SAMPLE.value):
|
447
393
|
|
448
|
-
|
449
|
-
output schemas in the pipeline interfaces specified by the sample or project.
|
394
|
+
# First check if pipestat key is in looper_config, if not return false
|
450
395
|
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
return {
|
461
|
-
pipeline_name: PipestatManager(**pipestat_vars)
|
462
|
-
for pipeline_name, pipestat_vars in pipestat_configs.items()
|
463
|
-
}
|
396
|
+
if PIPESTAT_KEY not in self[EXTRA_KEY]:
|
397
|
+
return False
|
398
|
+
elif PIPESTAT_KEY in self[EXTRA_KEY]:
|
399
|
+
if self[EXTRA_KEY][PIPESTAT_KEY] is None:
|
400
|
+
return False
|
401
|
+
else:
|
402
|
+
# If pipestat key is available assume user desires pipestat usage
|
403
|
+
# This should return True OR raise an exception at this point.
|
404
|
+
return self._get_pipestat_configuration(pipeline_type)
|
464
405
|
|
465
|
-
def
|
466
|
-
"""
|
467
|
-
A helper method determining whether pipestat configuration is complete
|
406
|
+
def _get_pipestat_configuration(self, pipeline_type=PipelineLevel.SAMPLE.value):
|
468
407
|
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
408
|
+
# First check if it already exists
|
409
|
+
|
410
|
+
if pipeline_type == PipelineLevel.SAMPLE.value:
|
411
|
+
for piface in self.pipeline_interfaces:
|
412
|
+
|
413
|
+
pipestat_config_path = self._check_for_existing_pipestat_config(piface)
|
414
|
+
|
415
|
+
if not pipestat_config_path:
|
416
|
+
self._create_pipestat_config(piface, pipeline_type)
|
417
|
+
else:
|
418
|
+
piface.psm = PipestatManager(
|
419
|
+
config_file=pipestat_config_path,
|
420
|
+
multi_pipelines=True,
|
421
|
+
pipeline_type="sample",
|
422
|
+
)
|
423
|
+
|
424
|
+
elif pipeline_type == PipelineLevel.PROJECT.value:
|
425
|
+
for prj_piface in self.project_pipeline_interfaces:
|
426
|
+
pipestat_config_path = self._check_for_existing_pipestat_config(
|
427
|
+
prj_piface
|
476
428
|
)
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
429
|
+
|
430
|
+
if not pipestat_config_path:
|
431
|
+
self._create_pipestat_config(prj_piface, pipeline_type)
|
432
|
+
else:
|
433
|
+
prj_piface.psm = PipestatManager(
|
434
|
+
config_file=pipestat_config_path,
|
435
|
+
multi_pipelines=True,
|
436
|
+
pipeline_type="project",
|
481
437
|
)
|
482
|
-
except Exception as e:
|
483
|
-
context = (
|
484
|
-
f"Project '{self.name}'"
|
485
|
-
if project_level
|
486
|
-
else f"Sample '{s.sample_name}'"
|
487
|
-
)
|
488
|
-
_LOGGER.debug(
|
489
|
-
f"Pipestat configuration incomplete for {context}; "
|
490
|
-
f"caught exception: {getattr(e, 'message', repr(e))}"
|
491
|
-
)
|
492
|
-
return False
|
493
438
|
else:
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
439
|
+
_LOGGER.error(
|
440
|
+
msg="No pipeline type specified during pipestat configuration"
|
441
|
+
)
|
442
|
+
|
443
|
+
return True
|
498
444
|
|
499
|
-
def
|
445
|
+
def _check_for_existing_pipestat_config(self, piface):
|
500
446
|
"""
|
501
|
-
|
447
|
+
|
448
|
+
config files should be in looper output directory and named as:
|
449
|
+
|
450
|
+
pipestat_config_pipelinename.yaml
|
451
|
+
|
502
452
|
"""
|
503
453
|
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
454
|
+
# Cannot do much if we cannot retrieve the pipeline_name
|
455
|
+
try:
|
456
|
+
pipeline_name = piface.data["pipeline_name"]
|
457
|
+
except KeyError:
|
458
|
+
raise Exception(
|
459
|
+
"To use pipestat, a pipeline_name must be set in the pipeline interface."
|
509
460
|
)
|
510
461
|
|
462
|
+
config_file_name = f"pipestat_config_{pipeline_name}.yaml"
|
463
|
+
output_dir = expandpath(self.output_dir)
|
464
|
+
|
465
|
+
config_file_path = os.path.join(
|
466
|
+
# os.path.dirname(output_dir), config_file_name
|
467
|
+
output_dir,
|
468
|
+
config_file_name,
|
469
|
+
)
|
470
|
+
|
471
|
+
if os.path.exists(config_file_path):
|
472
|
+
return config_file_path
|
473
|
+
else:
|
474
|
+
return None
|
475
|
+
|
476
|
+
def _create_pipestat_config(self, piface, pipeline_type):
|
477
|
+
"""
|
478
|
+
Each piface needs its own config file and associated psm
|
479
|
+
"""
|
480
|
+
|
511
481
|
if PIPESTAT_KEY in self[EXTRA_KEY]:
|
512
482
|
pipestat_config_dict = self[EXTRA_KEY][PIPESTAT_KEY]
|
513
483
|
else:
|
@@ -521,13 +491,56 @@ class Project(peppyProject):
|
|
521
491
|
# Expand paths in the event ENV variables were used in config files
|
522
492
|
output_dir = expandpath(self.output_dir)
|
523
493
|
|
524
|
-
|
494
|
+
pipestat_config_dict.update({"output_dir": output_dir})
|
495
|
+
|
496
|
+
if "output_schema" in piface.data:
|
497
|
+
schema_path = expandpath(piface.data["output_schema"])
|
498
|
+
if not os.path.isabs(schema_path):
|
499
|
+
# Get path relative to the pipeline_interface
|
500
|
+
schema_path = os.path.join(
|
501
|
+
os.path.dirname(piface.pipe_iface_file), schema_path
|
502
|
+
)
|
503
|
+
pipestat_config_dict.update({"schema_path": schema_path})
|
504
|
+
try:
|
505
|
+
with open(schema_path, "r") as f:
|
506
|
+
output_schema_data = safe_load(f)
|
507
|
+
output_schema_pipeline_name = output_schema_data[
|
508
|
+
PIPELINE_INTERFACE_PIPELINE_NAME_KEY
|
509
|
+
]
|
510
|
+
except Exception:
|
511
|
+
output_schema_pipeline_name = None
|
512
|
+
else:
|
513
|
+
output_schema_pipeline_name = None
|
514
|
+
if "pipeline_name" in piface.data:
|
515
|
+
pipeline_name = piface.data["pipeline_name"]
|
516
|
+
pipestat_config_dict.update({"pipeline_name": piface.data["pipeline_name"]})
|
517
|
+
else:
|
518
|
+
pipeline_name = None
|
519
|
+
|
520
|
+
# Warn user if there is a mismatch in pipeline_names from sources!!!
|
521
|
+
if pipeline_name != output_schema_pipeline_name:
|
522
|
+
_LOGGER.warning(
|
523
|
+
msg=f"Pipeline name mismatch detected. Pipeline interface: {pipeline_name} Output schema: {output_schema_pipeline_name} Defaulting to pipeline_interface value."
|
524
|
+
)
|
525
|
+
|
525
526
|
try:
|
526
527
|
results_file_path = expandpath(pipestat_config_dict["results_file_path"])
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
528
|
+
|
529
|
+
if not os.path.isabs(results_file_path):
|
530
|
+
# e.g. user configures "results.yaml" as results_file_path
|
531
|
+
if "{record_identifier}" in results_file_path:
|
532
|
+
# this is specifically to check if the user wishes tro generate a file for EACH record
|
533
|
+
if not os.path.exists(os.path.dirname(results_file_path)):
|
534
|
+
results_file_path = os.path.join(output_dir, results_file_path)
|
535
|
+
else:
|
536
|
+
if not os.path.exists(os.path.dirname(results_file_path)):
|
537
|
+
results_file_path = os.path.join(
|
538
|
+
output_dir, f"{pipeline_name}/", results_file_path
|
539
|
+
)
|
540
|
+
else:
|
541
|
+
# Do nothing because the user has given an absolute file path
|
542
|
+
pass
|
543
|
+
|
531
544
|
pipestat_config_dict.update({"results_file_path": results_file_path})
|
532
545
|
except KeyError:
|
533
546
|
results_file_path = None
|
@@ -540,57 +553,20 @@ class Project(peppyProject):
|
|
540
553
|
except KeyError:
|
541
554
|
flag_file_dir = None
|
542
555
|
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
pipestat_config_dict.update(
|
548
|
-
{"project_name": pipestat_config_dict["project_name"]}
|
549
|
-
)
|
550
|
-
|
551
|
-
if project_level and "{record_identifier}" in results_file_path:
|
552
|
-
# if project level and using {record_identifier}, pipestat needs some sort of record_identifier during creation
|
553
|
-
pipestat_config_dict.update(
|
554
|
-
{"record_identifier": "default_project_record_identifier"}
|
555
|
-
)
|
556
|
-
|
557
|
-
pipestat_config_dict.update({"output_dir": output_dir})
|
558
|
-
|
559
|
-
pifaces = (
|
560
|
-
self.project_pipeline_interfaces
|
561
|
-
if project_level
|
562
|
-
else self._interfaces_by_sample[sample_name]
|
556
|
+
# Pipestat_dict_ is now updated from all sources and can be written to a yaml.
|
557
|
+
pipestat_config_path = os.path.join(
|
558
|
+
output_dir,
|
559
|
+
f"pipestat_config_{pipeline_name}.yaml",
|
563
560
|
)
|
564
561
|
|
565
|
-
|
566
|
-
|
567
|
-
if "output_schema" in piface.data:
|
568
|
-
schema_path = expandpath(piface.data["output_schema"])
|
569
|
-
if not os.path.isabs(schema_path):
|
570
|
-
# Get path relative to the pipeline_interface
|
571
|
-
schema_path = os.path.join(
|
572
|
-
os.path.dirname(piface.pipe_iface_file), schema_path
|
573
|
-
)
|
574
|
-
pipestat_config_dict.update({"schema_path": schema_path})
|
575
|
-
if "pipeline_name" in piface.data:
|
576
|
-
pipestat_config_dict.update(
|
577
|
-
{"pipeline_name": piface.data["pipeline_name"]}
|
578
|
-
)
|
579
|
-
if "pipeline_type" in piface.data:
|
580
|
-
pipestat_config_dict.update(
|
581
|
-
{"pipeline_type": piface.data["pipeline_type"]}
|
582
|
-
)
|
562
|
+
# Two end goals, create a config file
|
563
|
+
write_pipestat_config(pipestat_config_path, pipestat_config_dict)
|
583
564
|
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
)
|
588
|
-
write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict)
|
565
|
+
piface.psm = PipestatManager(
|
566
|
+
config_file=pipestat_config_path, multi_pipelines=True
|
567
|
+
)
|
589
568
|
|
590
|
-
|
591
|
-
"config_file": looper_pipestat_config_path,
|
592
|
-
}
|
593
|
-
return ret
|
569
|
+
return None
|
594
570
|
|
595
571
|
def populate_pipeline_outputs(self):
|
596
572
|
"""
|
@@ -657,7 +633,7 @@ class Project(peppyProject):
|
|
657
633
|
pifaces_by_sample = {}
|
658
634
|
for source, sample_names in self._samples_by_interface.items():
|
659
635
|
try:
|
660
|
-
pi = PipelineInterface(source, pipeline_type=
|
636
|
+
pi = PipelineInterface(source, pipeline_type=PipelineLevel.SAMPLE.value)
|
661
637
|
except PipelineInterfaceConfigError as e:
|
662
638
|
_LOGGER.debug(f"Skipping pipeline interface creation: {e}")
|
663
639
|
else:
|
@@ -708,7 +684,9 @@ class Project(peppyProject):
|
|
708
684
|
for source in piface_srcs:
|
709
685
|
source = self._resolve_path_with_cfg(source)
|
710
686
|
try:
|
711
|
-
PipelineInterface(
|
687
|
+
PipelineInterface(
|
688
|
+
source, pipeline_type=PipelineLevel.SAMPLE.value
|
689
|
+
)
|
712
690
|
except (
|
713
691
|
ValidationError,
|
714
692
|
IOError,
|
@@ -9,12 +9,20 @@ properties:
|
|
9
9
|
type: string
|
10
10
|
enum: ["project", "sample"]
|
11
11
|
description: "type of the pipeline, either 'project' or 'sample'"
|
12
|
-
|
13
|
-
type:
|
14
|
-
description: "
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
sample_interface:
|
13
|
+
type: object
|
14
|
+
description: "Section that defines compute environment settings"
|
15
|
+
properties:
|
16
|
+
command_template:
|
17
|
+
type: string
|
18
|
+
description: "Jinja2-like template to construct the command to run"
|
19
|
+
project_interface:
|
20
|
+
type: object
|
21
|
+
description: "Section that defines compute environment settings"
|
22
|
+
properties:
|
23
|
+
command_template:
|
24
|
+
type: string
|
25
|
+
description: "Jinja2-like template to construct the command to run"
|
18
26
|
compute:
|
19
27
|
type: object
|
20
28
|
description: "Section that defines compute environment settings"
|