looper 1.7.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
looper/project.py CHANGED
@@ -3,29 +3,28 @@
3
3
  import itertools
4
4
  import os
5
5
 
6
+ from yaml import safe_load
7
+
6
8
  try:
7
9
  from functools import cached_property
8
10
  except ImportError:
9
11
  # cached_property was introduced in python 3.8
10
12
  cached_property = property
11
- from logging import getLogger
12
13
 
13
14
  from .divvy import ComputingConfiguration
14
15
  from eido import PathAttrNotFoundError, read_schema
15
16
  from jsonschema import ValidationError
16
17
  from pandas.core.common import flatten
17
- from peppy import CONFIG_KEY, OUTDIR_KEY
18
- from peppy import Project as peppyProject
19
18
  from peppy.utils import make_abs_via_cfg
20
- from pipestat import PipestatError, PipestatManager
21
- from ubiquerg import expandpath, is_command_callable
22
- from yacman import YAMLConfigManager
19
+ from pipestat import PipestatManager
20
+
23
21
  from .conductor import write_pipestat_config
24
22
 
25
23
  from .exceptions import *
26
24
  from .pipeline_interface import PipelineInterface
27
25
  from .processed_project import populate_project_paths, populate_sample_paths
28
26
  from .utils import *
27
+ from .const import PipelineLevel
29
28
 
30
29
  __all__ = ["Project"]
31
30
 
@@ -126,6 +125,12 @@ class Project(peppyProject):
126
125
 
127
126
  self[EXTRA_KEY] = {}
128
127
 
128
+ try:
129
+ # For loading PEPs via CSV, Peppy cannot infer project name.
130
+ name = self.name
131
+ except NotImplementedError:
132
+ self.name = None
133
+
129
134
  # add sample pipeline interface to the project
130
135
  if kwargs.get(SAMPLE_PL_ARG):
131
136
  self.set_sample_piface(kwargs.get(SAMPLE_PL_ARG))
@@ -144,7 +149,7 @@ class Project(peppyProject):
144
149
  self.dcc = (
145
150
  None
146
151
  if divcfg_path is None
147
- else ComputingConfiguration(filepath=divcfg_path)
152
+ else ComputingConfiguration.from_yaml_file(filepath=divcfg_path)
148
153
  )
149
154
  if DRY_RUN_KEY in self and not self[DRY_RUN_KEY]:
150
155
  _LOGGER.debug("Ensuring project directories exist")
@@ -300,7 +305,7 @@ class Project(peppyProject):
300
305
  :return list[looper.PipelineInterface]: list of pipeline interfaces
301
306
  """
302
307
  return [
303
- PipelineInterface(pi, pipeline_type="project")
308
+ PipelineInterface(pi, pipeline_type=PipelineLevel.PROJECT.value)
304
309
  for pi in self.project_pipeline_interface_sources
305
310
  ]
306
311
 
@@ -343,7 +348,9 @@ class Project(peppyProject):
343
348
 
344
349
  :return bool: whether pipestat configuration is complete
345
350
  """
346
- return self._check_if_pipestat_configured(project_level=True)
351
+ return self._check_if_pipestat_configured(
352
+ pipeline_type=PipelineLevel.PROJECT.value
353
+ )
347
354
 
348
355
  def get_sample_piface(self, sample_name):
349
356
  """
@@ -363,65 +370,6 @@ class Project(peppyProject):
363
370
  except KeyError:
364
371
  return None
365
372
 
366
- def build_submission_bundles(self, protocol, priority=True):
367
- """
368
- Create pipelines to submit for each sample of a particular protocol.
369
-
370
- With the argument (flag) to the priority parameter, there's control
371
- over whether to submit pipeline(s) from only one of the project's
372
- known pipeline locations with a match for the protocol, or whether to
373
- submit pipelines created from all locations with a match for the
374
- protocol.
375
-
376
- :param str protocol: name of the protocol/library for which to
377
- create pipeline(s)
378
- :param bool priority: to only submit pipeline(s) from the first of the
379
- pipelines location(s) (indicated in the project config file) that
380
- has a match for the given protocol; optional, default True
381
- :return Iterable[(PipelineInterface, type, str, str)]:
382
- :raises AssertionError: if there's a failure in the attempt to
383
- partition an interface's pipeline scripts into disjoint subsets of
384
- those already mapped and those not yet mapped
385
- """
386
-
387
- if not priority:
388
- raise NotImplementedError(
389
- "Currently, only prioritized protocol mapping is supported "
390
- "(i.e., pipeline interfaces collection is a prioritized list, "
391
- "so only the first interface with a protocol match is used.)"
392
- )
393
-
394
- # Pull out the collection of interfaces (potentially one from each of
395
- # the locations indicated in the project configuration file) as a
396
- # sort of pool of information about possible ways in which to submit
397
- # pipeline(s) for sample(s) of the indicated protocol.
398
- pifaces = self.interfaces.get_pipeline_interface(protocol)
399
- if not pifaces:
400
- raise PipelineInterfaceConfigError(
401
- "No interfaces for protocol: {}".format(protocol)
402
- )
403
-
404
- # coonvert to a list, in the future we might allow to match multiple
405
- pifaces = pifaces if isinstance(pifaces, str) else [pifaces]
406
-
407
- job_submission_bundles = []
408
- new_jobs = []
409
-
410
- _LOGGER.debug("Building pipelines matched by protocol: {}".format(protocol))
411
-
412
- for pipe_iface in pifaces:
413
- # Determine how to reference the pipeline and where it is.
414
- path = pipe_iface["path"]
415
- if not (os.path.exists(path) or is_command_callable(path)):
416
- _LOGGER.warning("Missing pipeline script: {}".format(path))
417
- continue
418
-
419
- # Add this bundle to the collection of ones relevant for the
420
- # current PipelineInterface.
421
- new_jobs.append(pipe_iface)
422
- job_submission_bundles.append(new_jobs)
423
- return list(itertools.chain(*job_submission_bundles))
424
-
425
373
  @staticmethod
426
374
  def get_schemas(pifaces, schema_key=INPUT_SCHEMA_KEY):
427
375
  """
@@ -441,73 +389,95 @@ class Project(peppyProject):
441
389
  schema_set.update([schema_file])
442
390
  return list(schema_set)
443
391
 
444
- def get_pipestat_managers(self, sample_name=None, project_level=False):
445
- """
446
- Get a collection of pipestat managers for the selected sample or project.
392
+ def _check_if_pipestat_configured(self, pipeline_type=PipelineLevel.SAMPLE.value):
447
393
 
448
- The number of pipestat managers corresponds to the number of unique
449
- output schemas in the pipeline interfaces specified by the sample or project.
394
+ # First check if pipestat key is in looper_config, if not return false
450
395
 
451
- :param str sample_name: sample name to get pipestat managers for
452
- :param bool project_level: whether the project PipestatManagers
453
- should be returned
454
- :return dict[str, pipestat.PipestatManager]: a mapping of pipestat
455
- managers by pipeline interface name
456
- """
457
- pipestat_configs = self._get_pipestat_configuration(
458
- sample_name=sample_name, project_level=project_level
459
- )
460
- return {
461
- pipeline_name: PipestatManager(**pipestat_vars)
462
- for pipeline_name, pipestat_vars in pipestat_configs.items()
463
- }
396
+ if PIPESTAT_KEY not in self[EXTRA_KEY]:
397
+ return False
398
+ elif PIPESTAT_KEY in self[EXTRA_KEY]:
399
+ if self[EXTRA_KEY][PIPESTAT_KEY] is None:
400
+ return False
401
+ else:
402
+ # If pipestat key is available assume user desires pipestat usage
403
+ # This should return True OR raise an exception at this point.
404
+ return self._get_pipestat_configuration(pipeline_type)
464
405
 
465
- def _check_if_pipestat_configured(self, project_level=False):
466
- """
467
- A helper method determining whether pipestat configuration is complete
406
+ def _get_pipestat_configuration(self, pipeline_type=PipelineLevel.SAMPLE.value):
468
407
 
469
- :param bool project_level: whether the project pipestat config should be checked
470
- :return bool: whether pipestat configuration is complete
471
- """
472
- try:
473
- if project_level:
474
- pipestat_configured = self._get_pipestat_configuration(
475
- sample_name=None, project_level=project_level
408
+ # First check if it already exists
409
+
410
+ if pipeline_type == PipelineLevel.SAMPLE.value:
411
+ for piface in self.pipeline_interfaces:
412
+
413
+ pipestat_config_path = self._check_for_existing_pipestat_config(piface)
414
+
415
+ if not pipestat_config_path:
416
+ self._create_pipestat_config(piface, pipeline_type)
417
+ else:
418
+ piface.psm = PipestatManager(
419
+ config_file=pipestat_config_path,
420
+ multi_pipelines=True,
421
+ pipeline_type="sample",
422
+ )
423
+
424
+ elif pipeline_type == PipelineLevel.PROJECT.value:
425
+ for prj_piface in self.project_pipeline_interfaces:
426
+ pipestat_config_path = self._check_for_existing_pipestat_config(
427
+ prj_piface
476
428
  )
477
- else:
478
- for s in self.samples:
479
- pipestat_configured = self._get_pipestat_configuration(
480
- sample_name=s.sample_name
429
+
430
+ if not pipestat_config_path:
431
+ self._create_pipestat_config(prj_piface, pipeline_type)
432
+ else:
433
+ prj_piface.psm = PipestatManager(
434
+ config_file=pipestat_config_path,
435
+ multi_pipelines=True,
436
+ pipeline_type="project",
481
437
  )
482
- except Exception as e:
483
- context = (
484
- f"Project '{self.name}'"
485
- if project_level
486
- else f"Sample '{s.sample_name}'"
487
- )
488
- _LOGGER.debug(
489
- f"Pipestat configuration incomplete for {context}; "
490
- f"caught exception: {getattr(e, 'message', repr(e))}"
491
- )
492
- return False
493
438
  else:
494
- if pipestat_configured is not None and pipestat_configured != {}:
495
- return True
496
- else:
497
- return False
439
+ _LOGGER.error(
440
+ msg="No pipeline type specified during pipestat configuration"
441
+ )
442
+
443
+ return True
498
444
 
499
- def _get_pipestat_configuration(self, sample_name=None, project_level=False):
445
+ def _check_for_existing_pipestat_config(self, piface):
500
446
  """
501
- Get all required pipestat configuration variables from looper_config file
447
+
448
+ config files should be in looper output directory and named as:
449
+
450
+ pipestat_config_pipelinename.yaml
451
+
502
452
  """
503
453
 
504
- ret = {}
505
- if not project_level and sample_name is None:
506
- raise ValueError(
507
- "Must provide the sample_name to determine the "
508
- "sample to get the PipestatManagers for"
454
+ # Cannot do much if we cannot retrieve the pipeline_name
455
+ try:
456
+ pipeline_name = piface.data["pipeline_name"]
457
+ except KeyError:
458
+ raise Exception(
459
+ "To use pipestat, a pipeline_name must be set in the pipeline interface."
509
460
  )
510
461
 
462
+ config_file_name = f"pipestat_config_{pipeline_name}.yaml"
463
+ output_dir = expandpath(self.output_dir)
464
+
465
+ config_file_path = os.path.join(
466
+ # os.path.dirname(output_dir), config_file_name
467
+ output_dir,
468
+ config_file_name,
469
+ )
470
+
471
+ if os.path.exists(config_file_path):
472
+ return config_file_path
473
+ else:
474
+ return None
475
+
476
+ def _create_pipestat_config(self, piface, pipeline_type):
477
+ """
478
+ Each piface needs its own config file and associated psm
479
+ """
480
+
511
481
  if PIPESTAT_KEY in self[EXTRA_KEY]:
512
482
  pipestat_config_dict = self[EXTRA_KEY][PIPESTAT_KEY]
513
483
  else:
@@ -521,13 +491,56 @@ class Project(peppyProject):
521
491
  # Expand paths in the event ENV variables were used in config files
522
492
  output_dir = expandpath(self.output_dir)
523
493
 
524
- # Get looper user configured items first and update the pipestat_config_dict
494
+ pipestat_config_dict.update({"output_dir": output_dir})
495
+
496
+ if "output_schema" in piface.data:
497
+ schema_path = expandpath(piface.data["output_schema"])
498
+ if not os.path.isabs(schema_path):
499
+ # Get path relative to the pipeline_interface
500
+ schema_path = os.path.join(
501
+ os.path.dirname(piface.pipe_iface_file), schema_path
502
+ )
503
+ pipestat_config_dict.update({"schema_path": schema_path})
504
+ try:
505
+ with open(schema_path, "r") as f:
506
+ output_schema_data = safe_load(f)
507
+ output_schema_pipeline_name = output_schema_data[
508
+ PIPELINE_INTERFACE_PIPELINE_NAME_KEY
509
+ ]
510
+ except Exception:
511
+ output_schema_pipeline_name = None
512
+ else:
513
+ output_schema_pipeline_name = None
514
+ if "pipeline_name" in piface.data:
515
+ pipeline_name = piface.data["pipeline_name"]
516
+ pipestat_config_dict.update({"pipeline_name": piface.data["pipeline_name"]})
517
+ else:
518
+ pipeline_name = None
519
+
520
+ # Warn user if there is a mismatch in pipeline_names from sources!!!
521
+ if pipeline_name != output_schema_pipeline_name:
522
+ _LOGGER.warning(
523
+ msg=f"Pipeline name mismatch detected. Pipeline interface: {pipeline_name} Output schema: {output_schema_pipeline_name} Defaulting to pipeline_interface value."
524
+ )
525
+
525
526
  try:
526
527
  results_file_path = expandpath(pipestat_config_dict["results_file_path"])
527
- if not os.path.exists(os.path.dirname(results_file_path)):
528
- results_file_path = os.path.join(
529
- os.path.dirname(output_dir), results_file_path
530
- )
528
+
529
+ if not os.path.isabs(results_file_path):
530
+ # e.g. user configures "results.yaml" as results_file_path
531
+ if "{record_identifier}" in results_file_path:
532
+ # this is specifically to check if the user wishes tro generate a file for EACH record
533
+ if not os.path.exists(os.path.dirname(results_file_path)):
534
+ results_file_path = os.path.join(output_dir, results_file_path)
535
+ else:
536
+ if not os.path.exists(os.path.dirname(results_file_path)):
537
+ results_file_path = os.path.join(
538
+ output_dir, f"{pipeline_name}/", results_file_path
539
+ )
540
+ else:
541
+ # Do nothing because the user has given an absolute file path
542
+ pass
543
+
531
544
  pipestat_config_dict.update({"results_file_path": results_file_path})
532
545
  except KeyError:
533
546
  results_file_path = None
@@ -540,57 +553,20 @@ class Project(peppyProject):
540
553
  except KeyError:
541
554
  flag_file_dir = None
542
555
 
543
- if sample_name:
544
- pipestat_config_dict.update({"record_identifier": sample_name})
545
-
546
- if project_level and "project_name" in pipestat_config_dict:
547
- pipestat_config_dict.update(
548
- {"project_name": pipestat_config_dict["project_name"]}
549
- )
550
-
551
- if project_level and "{record_identifier}" in results_file_path:
552
- # if project level and using {record_identifier}, pipestat needs some sort of record_identifier during creation
553
- pipestat_config_dict.update(
554
- {"record_identifier": "default_project_record_identifier"}
555
- )
556
-
557
- pipestat_config_dict.update({"output_dir": output_dir})
558
-
559
- pifaces = (
560
- self.project_pipeline_interfaces
561
- if project_level
562
- else self._interfaces_by_sample[sample_name]
556
+ # Pipestat_dict_ is now updated from all sources and can be written to a yaml.
557
+ pipestat_config_path = os.path.join(
558
+ output_dir,
559
+ f"pipestat_config_{pipeline_name}.yaml",
563
560
  )
564
561
 
565
- for piface in pifaces:
566
- # We must also obtain additional pipestat items from the pipeline author's piface
567
- if "output_schema" in piface.data:
568
- schema_path = expandpath(piface.data["output_schema"])
569
- if not os.path.isabs(schema_path):
570
- # Get path relative to the pipeline_interface
571
- schema_path = os.path.join(
572
- os.path.dirname(piface.pipe_iface_file), schema_path
573
- )
574
- pipestat_config_dict.update({"schema_path": schema_path})
575
- if "pipeline_name" in piface.data:
576
- pipestat_config_dict.update(
577
- {"pipeline_name": piface.data["pipeline_name"]}
578
- )
579
- if "pipeline_type" in piface.data:
580
- pipestat_config_dict.update(
581
- {"pipeline_type": piface.data["pipeline_type"]}
582
- )
562
+ # Two end goals, create a config file
563
+ write_pipestat_config(pipestat_config_path, pipestat_config_dict)
583
564
 
584
- # Pipestat_dict_ is now updated from all sources and can be written to a yaml.
585
- looper_pipestat_config_path = os.path.join(
586
- os.path.dirname(output_dir), "looper_pipestat_config.yaml"
587
- )
588
- write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict)
565
+ piface.psm = PipestatManager(
566
+ config_file=pipestat_config_path, multi_pipelines=True
567
+ )
589
568
 
590
- ret[piface.pipeline_name] = {
591
- "config_file": looper_pipestat_config_path,
592
- }
593
- return ret
569
+ return None
594
570
 
595
571
  def populate_pipeline_outputs(self):
596
572
  """
@@ -657,7 +633,7 @@ class Project(peppyProject):
657
633
  pifaces_by_sample = {}
658
634
  for source, sample_names in self._samples_by_interface.items():
659
635
  try:
660
- pi = PipelineInterface(source, pipeline_type="sample")
636
+ pi = PipelineInterface(source, pipeline_type=PipelineLevel.SAMPLE.value)
661
637
  except PipelineInterfaceConfigError as e:
662
638
  _LOGGER.debug(f"Skipping pipeline interface creation: {e}")
663
639
  else:
@@ -708,7 +684,9 @@ class Project(peppyProject):
708
684
  for source in piface_srcs:
709
685
  source = self._resolve_path_with_cfg(source)
710
686
  try:
711
- PipelineInterface(source, pipeline_type="sample")
687
+ PipelineInterface(
688
+ source, pipeline_type=PipelineLevel.SAMPLE.value
689
+ )
712
690
  except (
713
691
  ValidationError,
714
692
  IOError,
@@ -9,12 +9,20 @@ properties:
9
9
  type: string
10
10
  enum: ["project", "sample"]
11
11
  description: "type of the pipeline, either 'project' or 'sample'"
12
- command_template:
13
- type: string
14
- description: "Jinja2-like template to construct the command to run"
15
- path:
16
- type: string
17
- description: "path to the pipeline program. Relative to pipeline interface file or absolute."
12
+ sample_interface:
13
+ type: object
14
+ description: "Section that defines compute environment settings"
15
+ properties:
16
+ command_template:
17
+ type: string
18
+ description: "Jinja2-like template to construct the command to run"
19
+ project_interface:
20
+ type: object
21
+ description: "Section that defines compute environment settings"
22
+ properties:
23
+ command_template:
24
+ type: string
25
+ description: "Jinja2-like template to construct the command to run"
18
26
  compute:
19
27
  type: object
20
28
  description: "Section that defines compute environment settings"