looper 1.7.0a1__py3-none-any.whl → 2.0.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
looper/project.py CHANGED
@@ -3,29 +3,28 @@
3
3
  import itertools
4
4
  import os
5
5
 
6
+ from yaml import safe_load
7
+
6
8
  try:
7
9
  from functools import cached_property
8
10
  except ImportError:
9
11
  # cached_property was introduced in python 3.8
10
12
  cached_property = property
11
- from logging import getLogger
12
13
 
13
14
  from .divvy import ComputingConfiguration
14
15
  from eido import PathAttrNotFoundError, read_schema
15
16
  from jsonschema import ValidationError
16
17
  from pandas.core.common import flatten
17
- from peppy import CONFIG_KEY, OUTDIR_KEY
18
- from peppy import Project as peppyProject
19
18
  from peppy.utils import make_abs_via_cfg
20
- from pipestat import PipestatError, PipestatManager
21
- from ubiquerg import expandpath, is_command_callable
22
- from yacman import YAMLConfigManager
19
+ from pipestat import PipestatManager
20
+
23
21
  from .conductor import write_pipestat_config
24
22
 
25
23
  from .exceptions import *
26
24
  from .pipeline_interface import PipelineInterface
27
25
  from .processed_project import populate_project_paths, populate_sample_paths
28
26
  from .utils import *
27
+ from .const import PipelineLevel
29
28
 
30
29
  __all__ = ["Project"]
31
30
 
@@ -126,6 +125,12 @@ class Project(peppyProject):
126
125
 
127
126
  self[EXTRA_KEY] = {}
128
127
 
128
+ try:
129
+ # For loading PEPs via CSV, Peppy cannot infer project name.
130
+ name = self.name
131
+ except NotImplementedError:
132
+ self.name = None
133
+
129
134
  # add sample pipeline interface to the project
130
135
  if kwargs.get(SAMPLE_PL_ARG):
131
136
  self.set_sample_piface(kwargs.get(SAMPLE_PL_ARG))
@@ -144,7 +149,7 @@ class Project(peppyProject):
144
149
  self.dcc = (
145
150
  None
146
151
  if divcfg_path is None
147
- else ComputingConfiguration(filepath=divcfg_path)
152
+ else ComputingConfiguration.from_yaml_file(filepath=divcfg_path)
148
153
  )
149
154
  if DRY_RUN_KEY in self and not self[DRY_RUN_KEY]:
150
155
  _LOGGER.debug("Ensuring project directories exist")
@@ -300,7 +305,7 @@ class Project(peppyProject):
300
305
  :return list[looper.PipelineInterface]: list of pipeline interfaces
301
306
  """
302
307
  return [
303
- PipelineInterface(pi, pipeline_type="project")
308
+ PipelineInterface(pi, pipeline_type=PipelineLevel.PROJECT.value)
304
309
  for pi in self.project_pipeline_interface_sources
305
310
  ]
306
311
 
@@ -343,7 +348,9 @@ class Project(peppyProject):
343
348
 
344
349
  :return bool: whether pipestat configuration is complete
345
350
  """
346
- return self._check_if_pipestat_configured(project_level=True)
351
+ return self._check_if_pipestat_configured(
352
+ pipeline_type=PipelineLevel.PROJECT.value
353
+ )
347
354
 
348
355
  def get_sample_piface(self, sample_name):
349
356
  """
@@ -363,65 +370,6 @@ class Project(peppyProject):
363
370
  except KeyError:
364
371
  return None
365
372
 
366
- def build_submission_bundles(self, protocol, priority=True):
367
- """
368
- Create pipelines to submit for each sample of a particular protocol.
369
-
370
- With the argument (flag) to the priority parameter, there's control
371
- over whether to submit pipeline(s) from only one of the project's
372
- known pipeline locations with a match for the protocol, or whether to
373
- submit pipelines created from all locations with a match for the
374
- protocol.
375
-
376
- :param str protocol: name of the protocol/library for which to
377
- create pipeline(s)
378
- :param bool priority: to only submit pipeline(s) from the first of the
379
- pipelines location(s) (indicated in the project config file) that
380
- has a match for the given protocol; optional, default True
381
- :return Iterable[(PipelineInterface, type, str, str)]:
382
- :raises AssertionError: if there's a failure in the attempt to
383
- partition an interface's pipeline scripts into disjoint subsets of
384
- those already mapped and those not yet mapped
385
- """
386
-
387
- if not priority:
388
- raise NotImplementedError(
389
- "Currently, only prioritized protocol mapping is supported "
390
- "(i.e., pipeline interfaces collection is a prioritized list, "
391
- "so only the first interface with a protocol match is used.)"
392
- )
393
-
394
- # Pull out the collection of interfaces (potentially one from each of
395
- # the locations indicated in the project configuration file) as a
396
- # sort of pool of information about possible ways in which to submit
397
- # pipeline(s) for sample(s) of the indicated protocol.
398
- pifaces = self.interfaces.get_pipeline_interface(protocol)
399
- if not pifaces:
400
- raise PipelineInterfaceConfigError(
401
- "No interfaces for protocol: {}".format(protocol)
402
- )
403
-
404
- # coonvert to a list, in the future we might allow to match multiple
405
- pifaces = pifaces if isinstance(pifaces, str) else [pifaces]
406
-
407
- job_submission_bundles = []
408
- new_jobs = []
409
-
410
- _LOGGER.debug("Building pipelines matched by protocol: {}".format(protocol))
411
-
412
- for pipe_iface in pifaces:
413
- # Determine how to reference the pipeline and where it is.
414
- path = pipe_iface["path"]
415
- if not (os.path.exists(path) or is_command_callable(path)):
416
- _LOGGER.warning("Missing pipeline script: {}".format(path))
417
- continue
418
-
419
- # Add this bundle to the collection of ones relevant for the
420
- # current PipelineInterface.
421
- new_jobs.append(pipe_iface)
422
- job_submission_bundles.append(new_jobs)
423
- return list(itertools.chain(*job_submission_bundles))
424
-
425
373
  @staticmethod
426
374
  def get_schemas(pifaces, schema_key=INPUT_SCHEMA_KEY):
427
375
  """
@@ -441,73 +389,95 @@ class Project(peppyProject):
441
389
  schema_set.update([schema_file])
442
390
  return list(schema_set)
443
391
 
444
- def get_pipestat_managers(self, sample_name=None, project_level=False):
445
- """
446
- Get a collection of pipestat managers for the selected sample or project.
392
+ def _check_if_pipestat_configured(self, pipeline_type=PipelineLevel.SAMPLE.value):
447
393
 
448
- The number of pipestat managers corresponds to the number of unique
449
- output schemas in the pipeline interfaces specified by the sample or project.
394
+ # First check if pipestat key is in looper_config, if not return false
450
395
 
451
- :param str sample_name: sample name to get pipestat managers for
452
- :param bool project_level: whether the project PipestatManagers
453
- should be returned
454
- :return dict[str, pipestat.PipestatManager]: a mapping of pipestat
455
- managers by pipeline interface name
456
- """
457
- pipestat_configs = self._get_pipestat_configuration(
458
- sample_name=sample_name, project_level=project_level
459
- )
460
- return {
461
- pipeline_name: PipestatManager(**pipestat_vars)
462
- for pipeline_name, pipestat_vars in pipestat_configs.items()
463
- }
396
+ if PIPESTAT_KEY not in self[EXTRA_KEY]:
397
+ return False
398
+ elif PIPESTAT_KEY in self[EXTRA_KEY]:
399
+ if self[EXTRA_KEY][PIPESTAT_KEY] is None:
400
+ return False
401
+ else:
402
+ # If pipestat key is available assume user desires pipestat usage
403
+ # This should return True OR raise an exception at this point.
404
+ return self._get_pipestat_configuration(pipeline_type)
464
405
 
465
- def _check_if_pipestat_configured(self, project_level=False):
466
- """
467
- A helper method determining whether pipestat configuration is complete
406
+ def _get_pipestat_configuration(self, pipeline_type=PipelineLevel.SAMPLE.value):
468
407
 
469
- :param bool project_level: whether the project pipestat config should be checked
470
- :return bool: whether pipestat configuration is complete
471
- """
472
- try:
473
- if project_level:
474
- pipestat_configured = self._get_pipestat_configuration(
475
- sample_name=None, project_level=project_level
408
+ # First check if it already exists
409
+
410
+ if pipeline_type == PipelineLevel.SAMPLE.value:
411
+ for piface in self.pipeline_interfaces:
412
+
413
+ pipestat_config_path = self._check_for_existing_pipestat_config(piface)
414
+
415
+ if not pipestat_config_path:
416
+ self._create_pipestat_config(piface, pipeline_type)
417
+ else:
418
+ piface.psm = PipestatManager(
419
+ config_file=pipestat_config_path,
420
+ multi_pipelines=True,
421
+ pipeline_type="sample",
422
+ )
423
+
424
+ elif pipeline_type == PipelineLevel.PROJECT.value:
425
+ for prj_piface in self.project_pipeline_interfaces:
426
+ pipestat_config_path = self._check_for_existing_pipestat_config(
427
+ prj_piface
476
428
  )
477
- else:
478
- for s in self.samples:
479
- pipestat_configured = self._get_pipestat_configuration(
480
- sample_name=s.sample_name
429
+
430
+ if not pipestat_config_path:
431
+ self._create_pipestat_config(prj_piface, pipeline_type)
432
+ else:
433
+ prj_piface.psm = PipestatManager(
434
+ config_file=pipestat_config_path,
435
+ multi_pipelines=True,
436
+ pipeline_type="project",
481
437
  )
482
- except Exception as e:
483
- context = (
484
- f"Project '{self.name}'"
485
- if project_level
486
- else f"Sample '{s.sample_name}'"
487
- )
488
- _LOGGER.debug(
489
- f"Pipestat configuration incomplete for {context}; "
490
- f"caught exception: {getattr(e, 'message', repr(e))}"
491
- )
492
- return False
493
438
  else:
494
- if pipestat_configured is not None and pipestat_configured != {}:
495
- return True
496
- else:
497
- return False
439
+ _LOGGER.error(
440
+ msg="No pipeline type specified during pipestat configuration"
441
+ )
442
+
443
+ return True
498
444
 
499
- def _get_pipestat_configuration(self, sample_name=None, project_level=False):
445
+ def _check_for_existing_pipestat_config(self, piface):
500
446
  """
501
- Get all required pipestat configuration variables from looper_config file
447
+
448
+ config files should be in looper output directory and named as:
449
+
450
+ pipestat_config_pipelinename.yaml
451
+
502
452
  """
503
453
 
504
- ret = {}
505
- if not project_level and sample_name is None:
506
- raise ValueError(
507
- "Must provide the sample_name to determine the "
508
- "sample to get the PipestatManagers for"
454
+ # Cannot do much if we cannot retrieve the pipeline_name
455
+ try:
456
+ pipeline_name = piface.data["pipeline_name"]
457
+ except KeyError:
458
+ raise Exception(
459
+ "To use pipestat, a pipeline_name must be set in the pipeline interface."
509
460
  )
510
461
 
462
+ config_file_name = f"pipestat_config_{pipeline_name}.yaml"
463
+ output_dir = expandpath(self.output_dir)
464
+
465
+ config_file_path = os.path.join(
466
+ # os.path.dirname(output_dir), config_file_name
467
+ output_dir,
468
+ config_file_name,
469
+ )
470
+
471
+ if os.path.exists(config_file_path):
472
+ return config_file_path
473
+ else:
474
+ return None
475
+
476
+ def _create_pipestat_config(self, piface, pipeline_type):
477
+ """
478
+ Each piface needs its own config file and associated psm
479
+ """
480
+
511
481
  if PIPESTAT_KEY in self[EXTRA_KEY]:
512
482
  pipestat_config_dict = self[EXTRA_KEY][PIPESTAT_KEY]
513
483
  else:
@@ -521,13 +491,56 @@ class Project(peppyProject):
521
491
  # Expand paths in the event ENV variables were used in config files
522
492
  output_dir = expandpath(self.output_dir)
523
493
 
524
- # Get looper user configured items first and update the pipestat_config_dict
494
+ pipestat_config_dict.update({"output_dir": output_dir})
495
+
496
+ if "output_schema" in piface.data:
497
+ schema_path = expandpath(piface.data["output_schema"])
498
+ if not os.path.isabs(schema_path):
499
+ # Get path relative to the pipeline_interface
500
+ schema_path = os.path.join(
501
+ os.path.dirname(piface.pipe_iface_file), schema_path
502
+ )
503
+ pipestat_config_dict.update({"schema_path": schema_path})
504
+ try:
505
+ with open(schema_path, "r") as f:
506
+ output_schema_data = safe_load(f)
507
+ output_schema_pipeline_name = output_schema_data[
508
+ PIPELINE_INTERFACE_PIPELINE_NAME_KEY
509
+ ]
510
+ except Exception:
511
+ output_schema_pipeline_name = None
512
+ else:
513
+ output_schema_pipeline_name = None
514
+ if "pipeline_name" in piface.data:
515
+ pipeline_name = piface.data["pipeline_name"]
516
+ pipestat_config_dict.update({"pipeline_name": piface.data["pipeline_name"]})
517
+ else:
518
+ pipeline_name = None
519
+
520
+ # Warn user if there is a mismatch in pipeline_names from sources!!!
521
+ if pipeline_name != output_schema_pipeline_name:
522
+ _LOGGER.warning(
523
+ msg=f"Pipeline name mismatch detected. Pipeline interface: {pipeline_name} Output schema: {output_schema_pipeline_name} Defaulting to pipeline_interface value."
524
+ )
525
+
525
526
  try:
526
527
  results_file_path = expandpath(pipestat_config_dict["results_file_path"])
527
- if not os.path.exists(os.path.dirname(results_file_path)):
528
- results_file_path = os.path.join(
529
- os.path.dirname(output_dir), results_file_path
530
- )
528
+
529
+ if not os.path.isabs(results_file_path):
530
+ # e.g. user configures "results.yaml" as results_file_path
531
+ if "{record_identifier}" in results_file_path:
532
+ # this is specifically to check if the user wishes tro generate a file for EACH record
533
+ if not os.path.exists(os.path.dirname(results_file_path)):
534
+ results_file_path = os.path.join(output_dir, results_file_path)
535
+ else:
536
+ if not os.path.exists(os.path.dirname(results_file_path)):
537
+ results_file_path = os.path.join(
538
+ output_dir, f"{pipeline_name}/", results_file_path
539
+ )
540
+ else:
541
+ # Do nothing because the user has given an absolute file path
542
+ pass
543
+
531
544
  pipestat_config_dict.update({"results_file_path": results_file_path})
532
545
  except KeyError:
533
546
  results_file_path = None
@@ -540,57 +553,20 @@ class Project(peppyProject):
540
553
  except KeyError:
541
554
  flag_file_dir = None
542
555
 
543
- if sample_name:
544
- pipestat_config_dict.update({"record_identifier": sample_name})
545
-
546
- if project_level and "project_name" in pipestat_config_dict:
547
- pipestat_config_dict.update(
548
- {"project_name": pipestat_config_dict["project_name"]}
549
- )
550
-
551
- if project_level and "{record_identifier}" in results_file_path:
552
- # if project level and using {record_identifier}, pipestat needs some sort of record_identifier during creation
553
- pipestat_config_dict.update(
554
- {"record_identifier": "default_project_record_identifier"}
555
- )
556
-
557
- pipestat_config_dict.update({"output_dir": output_dir})
558
-
559
- pifaces = (
560
- self.project_pipeline_interfaces
561
- if project_level
562
- else self._interfaces_by_sample[sample_name]
556
+ # Pipestat_dict_ is now updated from all sources and can be written to a yaml.
557
+ pipestat_config_path = os.path.join(
558
+ output_dir,
559
+ f"pipestat_config_{pipeline_name}.yaml",
563
560
  )
564
561
 
565
- for piface in pifaces:
566
- # We must also obtain additional pipestat items from the pipeline author's piface
567
- if "output_schema" in piface.data:
568
- schema_path = expandpath(piface.data["output_schema"])
569
- if not os.path.isabs(schema_path):
570
- # Get path relative to the pipeline_interface
571
- schema_path = os.path.join(
572
- os.path.dirname(piface.pipe_iface_file), schema_path
573
- )
574
- pipestat_config_dict.update({"schema_path": schema_path})
575
- if "pipeline_name" in piface.data:
576
- pipestat_config_dict.update(
577
- {"pipeline_name": piface.data["pipeline_name"]}
578
- )
579
- if "pipeline_type" in piface.data:
580
- pipestat_config_dict.update(
581
- {"pipeline_type": piface.data["pipeline_type"]}
582
- )
562
+ # Two end goals, create a config file
563
+ write_pipestat_config(pipestat_config_path, pipestat_config_dict)
583
564
 
584
- # Pipestat_dict_ is now updated from all sources and can be written to a yaml.
585
- looper_pipestat_config_path = os.path.join(
586
- os.path.dirname(output_dir), "looper_pipestat_config.yaml"
587
- )
588
- write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict)
565
+ piface.psm = PipestatManager(
566
+ config_file=pipestat_config_path, multi_pipelines=True
567
+ )
589
568
 
590
- ret[piface.pipeline_name] = {
591
- "config_file": looper_pipestat_config_path,
592
- }
593
- return ret
569
+ return None
594
570
 
595
571
  def populate_pipeline_outputs(self):
596
572
  """
@@ -657,7 +633,7 @@ class Project(peppyProject):
657
633
  pifaces_by_sample = {}
658
634
  for source, sample_names in self._samples_by_interface.items():
659
635
  try:
660
- pi = PipelineInterface(source, pipeline_type="sample")
636
+ pi = PipelineInterface(source, pipeline_type=PipelineLevel.SAMPLE.value)
661
637
  except PipelineInterfaceConfigError as e:
662
638
  _LOGGER.debug(f"Skipping pipeline interface creation: {e}")
663
639
  else:
@@ -708,7 +684,9 @@ class Project(peppyProject):
708
684
  for source in piface_srcs:
709
685
  source = self._resolve_path_with_cfg(source)
710
686
  try:
711
- PipelineInterface(source, pipeline_type="sample")
687
+ PipelineInterface(
688
+ source, pipeline_type=PipelineLevel.SAMPLE.value
689
+ )
712
690
  except (
713
691
  ValidationError,
714
692
  IOError,
@@ -9,12 +9,20 @@ properties:
9
9
  type: string
10
10
  enum: ["project", "sample"]
11
11
  description: "type of the pipeline, either 'project' or 'sample'"
12
- command_template:
13
- type: string
14
- description: "Jinja2-like template to construct the command to run"
15
- path:
16
- type: string
17
- description: "path to the pipeline program. Relative to pipeline interface file or absolute."
12
+ sample_interface:
13
+ type: object
14
+ description: "Section that defines compute environment settings"
15
+ properties:
16
+ command_template:
17
+ type: string
18
+ description: "Jinja2-like template to construct the command to run"
19
+ project_interface:
20
+ type: object
21
+ description: "Section that defines compute environment settings"
22
+ properties:
23
+ command_template:
24
+ type: string
25
+ description: "Jinja2-like template to construct the command to run"
18
26
  compute:
19
27
  type: object
20
28
  description: "Section that defines compute environment settings"