looper 1.7.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- looper/__main__.py +1 -1
- looper/_version.py +2 -1
- looper/cli_divvy.py +10 -6
- looper/cli_pydantic.py +413 -0
- looper/command_models/DEVELOPER.md +85 -0
- looper/command_models/README.md +4 -0
- looper/command_models/__init__.py +6 -0
- looper/command_models/arguments.py +293 -0
- looper/command_models/commands.py +335 -0
- looper/conductor.py +147 -28
- looper/const.py +9 -0
- looper/divvy.py +56 -47
- looper/exceptions.py +9 -1
- looper/looper.py +196 -169
- looper/pipeline_interface.py +2 -12
- looper/project.py +154 -176
- looper/schemas/pipeline_interface_schema_generic.yaml +14 -6
- looper/utils.py +450 -78
- {looper-1.7.0.dist-info → looper-2.0.0.dist-info}/METADATA +24 -14
- {looper-1.7.0.dist-info → looper-2.0.0.dist-info}/RECORD +24 -19
- {looper-1.7.0.dist-info → looper-2.0.0.dist-info}/WHEEL +1 -1
- {looper-1.7.0.dist-info → looper-2.0.0.dist-info}/entry_points.txt +1 -1
- looper/cli_looper.py +0 -796
- {looper-1.7.0.dist-info → looper-2.0.0.dist-info}/LICENSE.txt +0 -0
- {looper-1.7.0.dist-info → looper-2.0.0.dist-info}/top_level.txt +0 -0
looper/project.py
CHANGED
@@ -3,29 +3,28 @@
|
|
3
3
|
import itertools
|
4
4
|
import os
|
5
5
|
|
6
|
+
from yaml import safe_load
|
7
|
+
|
6
8
|
try:
|
7
9
|
from functools import cached_property
|
8
10
|
except ImportError:
|
9
11
|
# cached_property was introduced in python 3.8
|
10
12
|
cached_property = property
|
11
|
-
from logging import getLogger
|
12
13
|
|
13
14
|
from .divvy import ComputingConfiguration
|
14
15
|
from eido import PathAttrNotFoundError, read_schema
|
15
16
|
from jsonschema import ValidationError
|
16
17
|
from pandas.core.common import flatten
|
17
|
-
from peppy import CONFIG_KEY, OUTDIR_KEY
|
18
|
-
from peppy import Project as peppyProject
|
19
18
|
from peppy.utils import make_abs_via_cfg
|
20
|
-
from pipestat import
|
21
|
-
|
22
|
-
from yacman import YAMLConfigManager
|
19
|
+
from pipestat import PipestatManager
|
20
|
+
|
23
21
|
from .conductor import write_pipestat_config
|
24
22
|
|
25
23
|
from .exceptions import *
|
26
24
|
from .pipeline_interface import PipelineInterface
|
27
25
|
from .processed_project import populate_project_paths, populate_sample_paths
|
28
26
|
from .utils import *
|
27
|
+
from .const import PipelineLevel
|
29
28
|
|
30
29
|
__all__ = ["Project"]
|
31
30
|
|
@@ -126,6 +125,12 @@ class Project(peppyProject):
|
|
126
125
|
|
127
126
|
self[EXTRA_KEY] = {}
|
128
127
|
|
128
|
+
try:
|
129
|
+
# For loading PEPs via CSV, Peppy cannot infer project name.
|
130
|
+
name = self.name
|
131
|
+
except NotImplementedError:
|
132
|
+
self.name = None
|
133
|
+
|
129
134
|
# add sample pipeline interface to the project
|
130
135
|
if kwargs.get(SAMPLE_PL_ARG):
|
131
136
|
self.set_sample_piface(kwargs.get(SAMPLE_PL_ARG))
|
@@ -144,7 +149,7 @@ class Project(peppyProject):
|
|
144
149
|
self.dcc = (
|
145
150
|
None
|
146
151
|
if divcfg_path is None
|
147
|
-
else ComputingConfiguration(filepath=divcfg_path)
|
152
|
+
else ComputingConfiguration.from_yaml_file(filepath=divcfg_path)
|
148
153
|
)
|
149
154
|
if DRY_RUN_KEY in self and not self[DRY_RUN_KEY]:
|
150
155
|
_LOGGER.debug("Ensuring project directories exist")
|
@@ -300,7 +305,7 @@ class Project(peppyProject):
|
|
300
305
|
:return list[looper.PipelineInterface]: list of pipeline interfaces
|
301
306
|
"""
|
302
307
|
return [
|
303
|
-
PipelineInterface(pi, pipeline_type=
|
308
|
+
PipelineInterface(pi, pipeline_type=PipelineLevel.PROJECT.value)
|
304
309
|
for pi in self.project_pipeline_interface_sources
|
305
310
|
]
|
306
311
|
|
@@ -343,7 +348,9 @@ class Project(peppyProject):
|
|
343
348
|
|
344
349
|
:return bool: whether pipestat configuration is complete
|
345
350
|
"""
|
346
|
-
return self._check_if_pipestat_configured(
|
351
|
+
return self._check_if_pipestat_configured(
|
352
|
+
pipeline_type=PipelineLevel.PROJECT.value
|
353
|
+
)
|
347
354
|
|
348
355
|
def get_sample_piface(self, sample_name):
|
349
356
|
"""
|
@@ -363,65 +370,6 @@ class Project(peppyProject):
|
|
363
370
|
except KeyError:
|
364
371
|
return None
|
365
372
|
|
366
|
-
def build_submission_bundles(self, protocol, priority=True):
|
367
|
-
"""
|
368
|
-
Create pipelines to submit for each sample of a particular protocol.
|
369
|
-
|
370
|
-
With the argument (flag) to the priority parameter, there's control
|
371
|
-
over whether to submit pipeline(s) from only one of the project's
|
372
|
-
known pipeline locations with a match for the protocol, or whether to
|
373
|
-
submit pipelines created from all locations with a match for the
|
374
|
-
protocol.
|
375
|
-
|
376
|
-
:param str protocol: name of the protocol/library for which to
|
377
|
-
create pipeline(s)
|
378
|
-
:param bool priority: to only submit pipeline(s) from the first of the
|
379
|
-
pipelines location(s) (indicated in the project config file) that
|
380
|
-
has a match for the given protocol; optional, default True
|
381
|
-
:return Iterable[(PipelineInterface, type, str, str)]:
|
382
|
-
:raises AssertionError: if there's a failure in the attempt to
|
383
|
-
partition an interface's pipeline scripts into disjoint subsets of
|
384
|
-
those already mapped and those not yet mapped
|
385
|
-
"""
|
386
|
-
|
387
|
-
if not priority:
|
388
|
-
raise NotImplementedError(
|
389
|
-
"Currently, only prioritized protocol mapping is supported "
|
390
|
-
"(i.e., pipeline interfaces collection is a prioritized list, "
|
391
|
-
"so only the first interface with a protocol match is used.)"
|
392
|
-
)
|
393
|
-
|
394
|
-
# Pull out the collection of interfaces (potentially one from each of
|
395
|
-
# the locations indicated in the project configuration file) as a
|
396
|
-
# sort of pool of information about possible ways in which to submit
|
397
|
-
# pipeline(s) for sample(s) of the indicated protocol.
|
398
|
-
pifaces = self.interfaces.get_pipeline_interface(protocol)
|
399
|
-
if not pifaces:
|
400
|
-
raise PipelineInterfaceConfigError(
|
401
|
-
"No interfaces for protocol: {}".format(protocol)
|
402
|
-
)
|
403
|
-
|
404
|
-
# coonvert to a list, in the future we might allow to match multiple
|
405
|
-
pifaces = pifaces if isinstance(pifaces, str) else [pifaces]
|
406
|
-
|
407
|
-
job_submission_bundles = []
|
408
|
-
new_jobs = []
|
409
|
-
|
410
|
-
_LOGGER.debug("Building pipelines matched by protocol: {}".format(protocol))
|
411
|
-
|
412
|
-
for pipe_iface in pifaces:
|
413
|
-
# Determine how to reference the pipeline and where it is.
|
414
|
-
path = pipe_iface["path"]
|
415
|
-
if not (os.path.exists(path) or is_command_callable(path)):
|
416
|
-
_LOGGER.warning("Missing pipeline script: {}".format(path))
|
417
|
-
continue
|
418
|
-
|
419
|
-
# Add this bundle to the collection of ones relevant for the
|
420
|
-
# current PipelineInterface.
|
421
|
-
new_jobs.append(pipe_iface)
|
422
|
-
job_submission_bundles.append(new_jobs)
|
423
|
-
return list(itertools.chain(*job_submission_bundles))
|
424
|
-
|
425
373
|
@staticmethod
|
426
374
|
def get_schemas(pifaces, schema_key=INPUT_SCHEMA_KEY):
|
427
375
|
"""
|
@@ -441,73 +389,95 @@ class Project(peppyProject):
|
|
441
389
|
schema_set.update([schema_file])
|
442
390
|
return list(schema_set)
|
443
391
|
|
444
|
-
def
|
445
|
-
"""
|
446
|
-
Get a collection of pipestat managers for the selected sample or project.
|
392
|
+
def _check_if_pipestat_configured(self, pipeline_type=PipelineLevel.SAMPLE.value):
|
447
393
|
|
448
|
-
|
449
|
-
output schemas in the pipeline interfaces specified by the sample or project.
|
394
|
+
# First check if pipestat key is in looper_config, if not return false
|
450
395
|
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
return {
|
461
|
-
pipeline_name: PipestatManager(**pipestat_vars)
|
462
|
-
for pipeline_name, pipestat_vars in pipestat_configs.items()
|
463
|
-
}
|
396
|
+
if PIPESTAT_KEY not in self[EXTRA_KEY]:
|
397
|
+
return False
|
398
|
+
elif PIPESTAT_KEY in self[EXTRA_KEY]:
|
399
|
+
if self[EXTRA_KEY][PIPESTAT_KEY] is None:
|
400
|
+
return False
|
401
|
+
else:
|
402
|
+
# If pipestat key is available assume user desires pipestat usage
|
403
|
+
# This should return True OR raise an exception at this point.
|
404
|
+
return self._get_pipestat_configuration(pipeline_type)
|
464
405
|
|
465
|
-
def
|
466
|
-
"""
|
467
|
-
A helper method determining whether pipestat configuration is complete
|
406
|
+
def _get_pipestat_configuration(self, pipeline_type=PipelineLevel.SAMPLE.value):
|
468
407
|
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
408
|
+
# First check if it already exists
|
409
|
+
|
410
|
+
if pipeline_type == PipelineLevel.SAMPLE.value:
|
411
|
+
for piface in self.pipeline_interfaces:
|
412
|
+
|
413
|
+
pipestat_config_path = self._check_for_existing_pipestat_config(piface)
|
414
|
+
|
415
|
+
if not pipestat_config_path:
|
416
|
+
self._create_pipestat_config(piface, pipeline_type)
|
417
|
+
else:
|
418
|
+
piface.psm = PipestatManager(
|
419
|
+
config_file=pipestat_config_path,
|
420
|
+
multi_pipelines=True,
|
421
|
+
pipeline_type="sample",
|
422
|
+
)
|
423
|
+
|
424
|
+
elif pipeline_type == PipelineLevel.PROJECT.value:
|
425
|
+
for prj_piface in self.project_pipeline_interfaces:
|
426
|
+
pipestat_config_path = self._check_for_existing_pipestat_config(
|
427
|
+
prj_piface
|
476
428
|
)
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
429
|
+
|
430
|
+
if not pipestat_config_path:
|
431
|
+
self._create_pipestat_config(prj_piface, pipeline_type)
|
432
|
+
else:
|
433
|
+
prj_piface.psm = PipestatManager(
|
434
|
+
config_file=pipestat_config_path,
|
435
|
+
multi_pipelines=True,
|
436
|
+
pipeline_type="project",
|
481
437
|
)
|
482
|
-
except Exception as e:
|
483
|
-
context = (
|
484
|
-
f"Project '{self.name}'"
|
485
|
-
if project_level
|
486
|
-
else f"Sample '{s.sample_name}'"
|
487
|
-
)
|
488
|
-
_LOGGER.debug(
|
489
|
-
f"Pipestat configuration incomplete for {context}; "
|
490
|
-
f"caught exception: {getattr(e, 'message', repr(e))}"
|
491
|
-
)
|
492
|
-
return False
|
493
438
|
else:
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
439
|
+
_LOGGER.error(
|
440
|
+
msg="No pipeline type specified during pipestat configuration"
|
441
|
+
)
|
442
|
+
|
443
|
+
return True
|
498
444
|
|
499
|
-
def
|
445
|
+
def _check_for_existing_pipestat_config(self, piface):
|
500
446
|
"""
|
501
|
-
|
447
|
+
|
448
|
+
config files should be in looper output directory and named as:
|
449
|
+
|
450
|
+
pipestat_config_pipelinename.yaml
|
451
|
+
|
502
452
|
"""
|
503
453
|
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
454
|
+
# Cannot do much if we cannot retrieve the pipeline_name
|
455
|
+
try:
|
456
|
+
pipeline_name = piface.data["pipeline_name"]
|
457
|
+
except KeyError:
|
458
|
+
raise Exception(
|
459
|
+
"To use pipestat, a pipeline_name must be set in the pipeline interface."
|
509
460
|
)
|
510
461
|
|
462
|
+
config_file_name = f"pipestat_config_{pipeline_name}.yaml"
|
463
|
+
output_dir = expandpath(self.output_dir)
|
464
|
+
|
465
|
+
config_file_path = os.path.join(
|
466
|
+
# os.path.dirname(output_dir), config_file_name
|
467
|
+
output_dir,
|
468
|
+
config_file_name,
|
469
|
+
)
|
470
|
+
|
471
|
+
if os.path.exists(config_file_path):
|
472
|
+
return config_file_path
|
473
|
+
else:
|
474
|
+
return None
|
475
|
+
|
476
|
+
def _create_pipestat_config(self, piface, pipeline_type):
|
477
|
+
"""
|
478
|
+
Each piface needs its own config file and associated psm
|
479
|
+
"""
|
480
|
+
|
511
481
|
if PIPESTAT_KEY in self[EXTRA_KEY]:
|
512
482
|
pipestat_config_dict = self[EXTRA_KEY][PIPESTAT_KEY]
|
513
483
|
else:
|
@@ -521,13 +491,56 @@ class Project(peppyProject):
|
|
521
491
|
# Expand paths in the event ENV variables were used in config files
|
522
492
|
output_dir = expandpath(self.output_dir)
|
523
493
|
|
524
|
-
|
494
|
+
pipestat_config_dict.update({"output_dir": output_dir})
|
495
|
+
|
496
|
+
if "output_schema" in piface.data:
|
497
|
+
schema_path = expandpath(piface.data["output_schema"])
|
498
|
+
if not os.path.isabs(schema_path):
|
499
|
+
# Get path relative to the pipeline_interface
|
500
|
+
schema_path = os.path.join(
|
501
|
+
os.path.dirname(piface.pipe_iface_file), schema_path
|
502
|
+
)
|
503
|
+
pipestat_config_dict.update({"schema_path": schema_path})
|
504
|
+
try:
|
505
|
+
with open(schema_path, "r") as f:
|
506
|
+
output_schema_data = safe_load(f)
|
507
|
+
output_schema_pipeline_name = output_schema_data[
|
508
|
+
PIPELINE_INTERFACE_PIPELINE_NAME_KEY
|
509
|
+
]
|
510
|
+
except Exception:
|
511
|
+
output_schema_pipeline_name = None
|
512
|
+
else:
|
513
|
+
output_schema_pipeline_name = None
|
514
|
+
if "pipeline_name" in piface.data:
|
515
|
+
pipeline_name = piface.data["pipeline_name"]
|
516
|
+
pipestat_config_dict.update({"pipeline_name": piface.data["pipeline_name"]})
|
517
|
+
else:
|
518
|
+
pipeline_name = None
|
519
|
+
|
520
|
+
# Warn user if there is a mismatch in pipeline_names from sources!!!
|
521
|
+
if pipeline_name != output_schema_pipeline_name:
|
522
|
+
_LOGGER.warning(
|
523
|
+
msg=f"Pipeline name mismatch detected. Pipeline interface: {pipeline_name} Output schema: {output_schema_pipeline_name} Defaulting to pipeline_interface value."
|
524
|
+
)
|
525
|
+
|
525
526
|
try:
|
526
527
|
results_file_path = expandpath(pipestat_config_dict["results_file_path"])
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
528
|
+
|
529
|
+
if not os.path.isabs(results_file_path):
|
530
|
+
# e.g. user configures "results.yaml" as results_file_path
|
531
|
+
if "{record_identifier}" in results_file_path:
|
532
|
+
# this is specifically to check if the user wishes tro generate a file for EACH record
|
533
|
+
if not os.path.exists(os.path.dirname(results_file_path)):
|
534
|
+
results_file_path = os.path.join(output_dir, results_file_path)
|
535
|
+
else:
|
536
|
+
if not os.path.exists(os.path.dirname(results_file_path)):
|
537
|
+
results_file_path = os.path.join(
|
538
|
+
output_dir, f"{pipeline_name}/", results_file_path
|
539
|
+
)
|
540
|
+
else:
|
541
|
+
# Do nothing because the user has given an absolute file path
|
542
|
+
pass
|
543
|
+
|
531
544
|
pipestat_config_dict.update({"results_file_path": results_file_path})
|
532
545
|
except KeyError:
|
533
546
|
results_file_path = None
|
@@ -540,57 +553,20 @@ class Project(peppyProject):
|
|
540
553
|
except KeyError:
|
541
554
|
flag_file_dir = None
|
542
555
|
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
pipestat_config_dict.update(
|
548
|
-
{"project_name": pipestat_config_dict["project_name"]}
|
549
|
-
)
|
550
|
-
|
551
|
-
if project_level and "{record_identifier}" in results_file_path:
|
552
|
-
# if project level and using {record_identifier}, pipestat needs some sort of record_identifier during creation
|
553
|
-
pipestat_config_dict.update(
|
554
|
-
{"record_identifier": "default_project_record_identifier"}
|
555
|
-
)
|
556
|
-
|
557
|
-
pipestat_config_dict.update({"output_dir": output_dir})
|
558
|
-
|
559
|
-
pifaces = (
|
560
|
-
self.project_pipeline_interfaces
|
561
|
-
if project_level
|
562
|
-
else self._interfaces_by_sample[sample_name]
|
556
|
+
# Pipestat_dict_ is now updated from all sources and can be written to a yaml.
|
557
|
+
pipestat_config_path = os.path.join(
|
558
|
+
output_dir,
|
559
|
+
f"pipestat_config_{pipeline_name}.yaml",
|
563
560
|
)
|
564
561
|
|
565
|
-
|
566
|
-
|
567
|
-
if "output_schema" in piface.data:
|
568
|
-
schema_path = expandpath(piface.data["output_schema"])
|
569
|
-
if not os.path.isabs(schema_path):
|
570
|
-
# Get path relative to the pipeline_interface
|
571
|
-
schema_path = os.path.join(
|
572
|
-
os.path.dirname(piface.pipe_iface_file), schema_path
|
573
|
-
)
|
574
|
-
pipestat_config_dict.update({"schema_path": schema_path})
|
575
|
-
if "pipeline_name" in piface.data:
|
576
|
-
pipestat_config_dict.update(
|
577
|
-
{"pipeline_name": piface.data["pipeline_name"]}
|
578
|
-
)
|
579
|
-
if "pipeline_type" in piface.data:
|
580
|
-
pipestat_config_dict.update(
|
581
|
-
{"pipeline_type": piface.data["pipeline_type"]}
|
582
|
-
)
|
562
|
+
# Two end goals, create a config file
|
563
|
+
write_pipestat_config(pipestat_config_path, pipestat_config_dict)
|
583
564
|
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
)
|
588
|
-
write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict)
|
565
|
+
piface.psm = PipestatManager(
|
566
|
+
config_file=pipestat_config_path, multi_pipelines=True
|
567
|
+
)
|
589
568
|
|
590
|
-
|
591
|
-
"config_file": looper_pipestat_config_path,
|
592
|
-
}
|
593
|
-
return ret
|
569
|
+
return None
|
594
570
|
|
595
571
|
def populate_pipeline_outputs(self):
|
596
572
|
"""
|
@@ -657,7 +633,7 @@ class Project(peppyProject):
|
|
657
633
|
pifaces_by_sample = {}
|
658
634
|
for source, sample_names in self._samples_by_interface.items():
|
659
635
|
try:
|
660
|
-
pi = PipelineInterface(source, pipeline_type=
|
636
|
+
pi = PipelineInterface(source, pipeline_type=PipelineLevel.SAMPLE.value)
|
661
637
|
except PipelineInterfaceConfigError as e:
|
662
638
|
_LOGGER.debug(f"Skipping pipeline interface creation: {e}")
|
663
639
|
else:
|
@@ -708,7 +684,9 @@ class Project(peppyProject):
|
|
708
684
|
for source in piface_srcs:
|
709
685
|
source = self._resolve_path_with_cfg(source)
|
710
686
|
try:
|
711
|
-
PipelineInterface(
|
687
|
+
PipelineInterface(
|
688
|
+
source, pipeline_type=PipelineLevel.SAMPLE.value
|
689
|
+
)
|
712
690
|
except (
|
713
691
|
ValidationError,
|
714
692
|
IOError,
|
@@ -9,12 +9,20 @@ properties:
|
|
9
9
|
type: string
|
10
10
|
enum: ["project", "sample"]
|
11
11
|
description: "type of the pipeline, either 'project' or 'sample'"
|
12
|
-
|
13
|
-
type:
|
14
|
-
description: "
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
sample_interface:
|
13
|
+
type: object
|
14
|
+
description: "Section that defines compute environment settings"
|
15
|
+
properties:
|
16
|
+
command_template:
|
17
|
+
type: string
|
18
|
+
description: "Jinja2-like template to construct the command to run"
|
19
|
+
project_interface:
|
20
|
+
type: object
|
21
|
+
description: "Section that defines compute environment settings"
|
22
|
+
properties:
|
23
|
+
command_template:
|
24
|
+
type: string
|
25
|
+
description: "Jinja2-like template to construct the command to run"
|
18
26
|
compute:
|
19
27
|
type: object
|
20
28
|
description: "Section that defines compute environment settings"
|