looper 1.5.0__py3-none-any.whl → 1.6.0a1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
looper/looper.py CHANGED
@@ -4,17 +4,12 @@ Looper: a pipeline submission engine. https://github.com/pepkit/looper
4
4
  """
5
5
 
6
6
  import abc
7
+ import argparse
7
8
  import csv
8
9
  import logging
9
10
  import subprocess
10
- import sys
11
-
12
- if sys.version_info < (3, 3):
13
- from collections import Mapping
14
- else:
15
- from collections.abc import Mapping
16
-
17
- import logmuse
11
+ import yaml
12
+ import os
18
13
  import pandas as _pd
19
14
 
20
15
  # Need specific sequence of actions for colorama imports?
@@ -23,11 +18,12 @@ from colorama import init
23
18
  init()
24
19
  from shutil import rmtree
25
20
 
21
+ # from collections.abc import Mapping
22
+ from collections import defaultdict
26
23
  from colorama import Fore, Style
27
- from eido import inspect_project, validate_config, validate_sample
24
+ from eido import validate_config, validate_sample
28
25
  from eido.exceptions import EidoValidationError
29
26
  from jsonschema import ValidationError
30
- from pephubclient import PEPHubClient
31
27
  from peppy.const import *
32
28
  from peppy.exceptions import RemoteYAMLError
33
29
  from rich.color import Color
@@ -36,21 +32,20 @@ from rich.table import Table
36
32
  from ubiquerg.cli_tools import query_yes_no
37
33
  from ubiquerg.collection import uniqify
38
34
 
39
- from . import __version__, build_parser, validate_post_parse
35
+
40
36
  from .conductor import SubmissionConductor
37
+
38
+ from .exceptions import *
41
39
  from .const import *
42
- from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config
43
- from .exceptions import (
44
- JobSubmissionException,
45
- MisconfigurationException,
46
- SampleFailedException,
47
- )
48
- from .html_reports import HTMLReportBuilderOld
49
- from .html_reports_pipestat import HTMLReportBuilder, fetch_pipeline_results
50
- from .html_reports_project_pipestat import HTMLReportBuilderProject
51
40
  from .pipeline_interface import PipelineInterface
52
- from .project import Project, ProjectContext
53
- from .utils import *
41
+ from .project import Project
42
+ from .utils import (
43
+ desired_samples_range_skipped,
44
+ desired_samples_range_limited,
45
+ sample_folder,
46
+ )
47
+ from pipestat.reports import get_file_for_table
48
+ from pipestat.reports import get_file_for_project
54
49
 
55
50
  _PKGNAME = "looper"
56
51
  _LOGGER = logging.getLogger(_PKGNAME)
@@ -104,7 +99,7 @@ class Checker(Executor):
104
99
  for sample in self.prj.samples:
105
100
  psms = self.prj.get_pipestat_managers(sample_name=sample.sample_name)
106
101
  for pipeline_name, psm in psms.items():
107
- s = psm.get_status(sample_name=sample.sample_name)
102
+ s = psm.get_status(record_identifier=sample.sample_name)
108
103
  status.setdefault(pipeline_name, {})
109
104
  status[pipeline_name][sample.sample_name] = s
110
105
  _LOGGER.debug(f"{sample.sample_name} ({pipeline_name}): {s}")
@@ -171,60 +166,7 @@ class Checker(Executor):
171
166
  desc = ""
172
167
  table.add_row(status, desc)
173
168
  console.print(table)
174
-
175
-
176
- class CheckerOld(Executor):
177
- def __call__(self, flags=None, all_folders=False, max_file_count=30):
178
- """
179
- Check Project status, based on flag files.
180
-
181
- :param Iterable[str] | str flags: Names of flags to check, optional;
182
- if unspecified, all known flags will be checked.
183
- :param bool all_folders: Whether to check flags in all folders, not
184
- just those for samples in the config file from which the Project
185
- was created.
186
- :param int max_file_count: Maximum number of filepaths to display for a
187
- given flag.
188
- """
189
-
190
- # Handle single or multiple flags, and alphabetize.
191
- flags = sorted([flags] if isinstance(flags, str) else list(flags or FLAGS))
192
- flag_text = ", ".join(flags)
193
-
194
- # Collect the files by flag and sort by flag name.
195
- _LOGGER.debug("Checking project folders for flags: %s", flag_text)
196
- if all_folders:
197
- files_by_flag = fetch_flag_files(
198
- results_folder=self.prj.results_folder, flags=flags
199
- )
200
- else:
201
- files_by_flag = fetch_flag_files(prj=self.prj, flags=flags)
202
-
203
- # For each flag, output occurrence count.
204
- for flag in flags:
205
- _LOGGER.info("%s: %d", flag.upper(), len(files_by_flag[flag]))
206
-
207
- # For each flag, output filepath(s) if not overly verbose.
208
- for flag in flags:
209
- try:
210
- files = files_by_flag[flag]
211
- except Exception as e:
212
- _LOGGER.debug(
213
- "No files for {} flag. Caught exception: {}".format(
214
- flags, getattr(e, "message", repr(e))
215
- )
216
- )
217
- continue
218
- # If checking on a specific flag, do not limit the number of
219
- # reported filepaths, but do not report empty file lists
220
- if len(flags) == 1 and len(files) > 0:
221
- _LOGGER.info("%s (%d):\n%s", flag.upper(), len(files), "\n".join(files))
222
- # Regardless of whether 0-count flags are previously reported,
223
- # don't report an empty file list for a flag that's absent.
224
- # If the flag-to-files mapping is defaultdict, absent flag (key)
225
- # will fetch an empty collection, so check for length of 0.
226
- if 0 < len(files) <= max_file_count:
227
- _LOGGER.info("%s (%d):\n%s", flag.upper(), len(files), "\n".join(files))
169
+ return status
228
170
 
229
171
 
230
172
  class Cleaner(Executor):
@@ -270,7 +212,8 @@ class Cleaner(Executor):
270
212
  return self(args, preview_flag=False)
271
213
 
272
214
 
273
- def select_samples(prj: Project, args: argparse.Namespace) -> Iterable[Any]:
215
+ # NOTE: Adding type hint -> Iterable[Any] gives me TypeError: 'ABCMeta' object is not subscriptable
216
+ def select_samples(prj: Project, args: argparse.Namespace):
274
217
  """Use CLI limit/skip arguments to select subset of project's samples."""
275
218
  # TODO: get proper element type for signature.
276
219
  num_samples = len(prj.samples)
@@ -310,7 +253,17 @@ class Destroyer(Executor):
310
253
  _remove_or_dry_run(sample_output_folder, args.dry_run)
311
254
 
312
255
  _LOGGER.info("Removing summary:")
313
- destroy_summary(self.prj, args.dry_run)
256
+ use_pipestat = (
257
+ self.prj.pipestat_configured_project
258
+ if args.project
259
+ else self.prj.pipestat_configured
260
+ )
261
+ if use_pipestat:
262
+ destroy_summary(self.prj, args.dry_run, args.project)
263
+ else:
264
+ _LOGGER.warning(
265
+ "Pipestat must be configured to destroy any created summaries."
266
+ )
314
267
 
315
268
  if not preview_flag:
316
269
  _LOGGER.info("Destroy complete.")
@@ -354,6 +307,7 @@ class Collator(Executor):
354
307
  arguments, recognized by looper
355
308
  """
356
309
  jobs = 0
310
+ self.debug = {}
357
311
  project_pifaces = self.prj.project_pipeline_interface_sources
358
312
  if not project_pifaces:
359
313
  raise MisconfigurationException(
@@ -399,6 +353,8 @@ class Collator(Executor):
399
353
  jobs += conductor.num_job_submissions
400
354
  _LOGGER.info("\nLooper finished")
401
355
  _LOGGER.info("Jobs submitted: {}".format(jobs))
356
+ self.debug[DEBUG_JOBS] = jobs
357
+ return self.debug
402
358
 
403
359
 
404
360
  class Runner(Executor):
@@ -415,6 +371,7 @@ class Runner(Executor):
415
371
  :param bool rerun: whether the given sample is being rerun rather than
416
372
  run for the first time
417
373
  """
374
+ self.debug = {} # initialize empty dict for return values
418
375
  max_cmds = sum(list(map(len, self.prj._samples_by_interface.values())))
419
376
  self.counter.total = max_cmds
420
377
  failures = defaultdict(list) # Collect problems by sample.
@@ -453,6 +410,9 @@ class Runner(Executor):
453
410
  submission_conductors[piface.pipe_iface_file] = conductor
454
411
 
455
412
  _LOGGER.info(f"Pipestat compatible: {self.prj.pipestat_configured_project}")
413
+ self.debug["Pipestat compatible"] = (
414
+ self.prj.pipestat_configured_project or self.prj.pipestat_configured
415
+ )
456
416
 
457
417
  for sample in select_samples(prj=self.prj, args=args):
458
418
  pl_fails = []
@@ -474,10 +434,17 @@ class Runner(Executor):
474
434
  try:
475
435
  validate_sample(self.prj, sample.sample_name, schema_file)
476
436
  except EidoValidationError as e:
477
- _LOGGER.error(f"Short-circuiting due to validation error: {e}")
437
+ _LOGGER.error(
438
+ f"Short-circuiting due to validation error!\nSchema file: "
439
+ f"{schema_file}\nError: {e}\n{list(e.errors_by_type.keys())}"
440
+ )
441
+ self.debug[DEBUG_EIDO_VALIDATION] = (
442
+ f"Short-circuiting due to validation error!\nSchema file: "
443
+ f"{schema_file}\nError: {e}\n{list(e.errors_by_type.keys())}"
444
+ )
478
445
  return False
479
446
  except RemoteYAMLError:
480
- _LOGGER.warn(
447
+ _LOGGER.warning(
481
448
  f"Could not read remote schema, skipping '{sample.sample_name}' "
482
449
  f"sample validation against {schema_file}"
483
450
  )
@@ -518,9 +485,15 @@ class Runner(Executor):
518
485
  )
519
486
  )
520
487
  _LOGGER.info("Commands submitted: {} of {}".format(cmd_sub_total, max_cmds))
521
- _LOGGER.info("Jobs submitted: {}".format(job_sub_total))
488
+ self.debug[DEBUG_COMMANDS] = "{} of {}".format(cmd_sub_total, max_cmds)
522
489
  if args.dry_run:
523
- _LOGGER.info("Dry run. No jobs were actually submitted.")
490
+ job_sub_total_if_real = job_sub_total
491
+ job_sub_total = 0
492
+ _LOGGER.info(
493
+ f"Dry run. No jobs were actually submitted, but {job_sub_total_if_real} would have been."
494
+ )
495
+ _LOGGER.info("Jobs submitted: {}".format(job_sub_total))
496
+ self.debug[DEBUG_JOBS] = job_sub_total
524
497
 
525
498
  # Restructure sample/failure data for display.
526
499
  samples_by_reason = defaultdict(set)
@@ -528,6 +501,7 @@ class Runner(Executor):
528
501
  for sample, failures in failures.items():
529
502
  for f in failures:
530
503
  samples_by_reason[f].add(sample)
504
+ self.debug[f] = sample
531
505
  # Collect samples by pipeline with submission failure.
532
506
  for piface, conductor in submission_conductors.items():
533
507
  # Don't add failure key if there are no samples that failed for
@@ -562,6 +536,8 @@ class Runner(Executor):
562
536
  _LOGGER.debug("Raising SampleFailedException")
563
537
  raise SampleFailedException
564
538
 
539
+ return self.debug
540
+
565
541
 
566
542
  class Reporter(Executor):
567
543
  """Combine project outputs into a browsable HTML report"""
@@ -576,305 +552,82 @@ class Reporter(Executor):
576
552
  print(psms)
577
553
  for name, psm in psms.items():
578
554
  # Summarize will generate the static HTML Report Function
579
- psm.summarize()
555
+ report_directory = psm.summarize(looper_samples=self.prj.samples)
556
+ print(f"Report directory: {report_directory}")
580
557
  else:
581
- for sample in p.prj.samples:
582
- psms = self.prj.get_pipestat_managers(sample_name=sample.sample_name)
558
+ for piface_source_samples in self.prj._samples_by_piface(
559
+ self.prj.piface_key
560
+ ).values():
561
+ # For each piface_key, we have a list of samples, but we only need one sample from the list to
562
+ # call the related pipestat manager object which will pull ALL samples when using psm.summarize
563
+ first_sample_name = list(piface_source_samples)[0]
564
+ psms = self.prj.get_pipestat_managers(
565
+ sample_name=first_sample_name, project_level=False
566
+ )
583
567
  print(psms)
584
568
  for name, psm in psms.items():
585
569
  # Summarize will generate the static HTML Report Function
586
- psm.summarize()
570
+ report_directory = psm.summarize(looper_samples=self.prj.samples)
571
+ print(f"Report directory: {report_directory}")
587
572
 
588
573
 
589
- class Tabulator(Executor):
590
- """Project/Sample statistics and table output generator"""
574
+ class Linker(Executor):
575
+ """Create symlinks for reported results. Requires pipestat to be configured."""
591
576
 
592
577
  def __call__(self, args):
578
+ # initialize the report builder
579
+ p = self.prj
593
580
  project_level = args.project
581
+ link_dir = args.output_dir
582
+
594
583
  if project_level:
595
- self.counter = LooperCounter(len(self.prj.project_pipeline_interfaces))
596
- for piface in self.prj.project_pipeline_interfaces:
597
- # Do the stats and object summarization.
598
- pipeline_name = piface.pipeline_name
599
- # pull together all the fits and stats from each sample into
600
- # project-combined spreadsheets.
601
- self.stats = _create_stats_summary(
602
- self.prj, pipeline_name, project_level, self.counter
603
- )
604
- self.objs = _create_obj_summary(
605
- self.prj, pipeline_name, project_level, self.counter
606
- )
584
+ psms = self.prj.get_pipestat_managers(project_level=True)
585
+ for name, psm in psms.items():
586
+ linked_results_path = psm.link(link_dir=link_dir)
587
+ print(f"Linked directory: {linked_results_path}")
607
588
  else:
608
- for piface_source in self.prj._samples_by_piface(
589
+ for piface_source_samples in self.prj._samples_by_piface(
609
590
  self.prj.piface_key
610
- ).keys():
611
- # Do the stats and object summarization.
612
- pipeline_name = PipelineInterface(config=piface_source).pipeline_name
613
- # pull together all the fits and stats from each sample into
614
- # project-combined spreadsheets.
615
- self.stats = _create_stats_summary(
616
- self.prj, pipeline_name, project_level, self.counter
617
- )
618
- self.objs = _create_obj_summary(
619
- self.prj, pipeline_name, project_level, self.counter
591
+ ).values():
592
+ # For each piface_key, we have a list of samples, but we only need one sample from the list to
593
+ # call the related pipestat manager object which will pull ALL samples when using psm.summarize
594
+ first_sample_name = list(piface_source_samples)[0]
595
+ psms = self.prj.get_pipestat_managers(
596
+ sample_name=first_sample_name, project_level=False
620
597
  )
621
- return self
598
+ for name, psm in psms.items():
599
+ linked_results_path = psm.link(link_dir=link_dir)
600
+ print(f"Linked directory: {linked_results_path}")
622
601
 
623
602
 
624
- def _create_stats_summary(project, pipeline_name, project_level, counter):
625
- """
626
- Create stats spreadsheet and columns to be considered in the report, save
627
- the spreadsheet to file
628
-
629
- :param looper.Project project: the project to be summarized
630
- :param str pipeline_name: name of the pipeline to tabulate results for
631
- :param bool project_level: whether the project-level pipeline resutlts
632
- should be tabulated
633
- :param looper.LooperCounter counter: a counter object
634
- """
635
- # Create stats_summary file
636
- columns = set()
637
- stats = []
638
- _LOGGER.info("Creating stats summary")
639
- if project_level:
640
- _LOGGER.info(
641
- counter.show(name=project.name, type="project", pipeline_name=pipeline_name)
642
- )
643
- reported_stats = {"project_name": project.name}
644
- results = fetch_pipeline_results(
645
- project=project,
646
- pipeline_name=pipeline_name,
647
- inclusion_fun=lambda x: x not in OBJECT_TYPES,
648
- )
649
- reported_stats.update(results)
650
- stats.append(reported_stats)
651
- columns |= set(reported_stats.keys())
603
+ class Tabulator(Executor):
604
+ """Project/Sample statistics and table output generator
652
605
 
653
- else:
654
- for sample in project.samples:
655
- sn = sample.sample_name
656
- _LOGGER.info(counter.show(sn, pipeline_name))
657
- reported_stats = {project.sample_table_index: sn}
658
- results = fetch_pipeline_results(
659
- project=project,
660
- pipeline_name=pipeline_name,
661
- sample_name=sn,
662
- inclusion_fun=lambda x: x not in OBJECT_TYPES,
663
- )
664
- reported_stats.update(results)
665
- stats.append(reported_stats)
666
- columns |= set(reported_stats.keys())
667
-
668
- tsv_outfile_path = get_file_for_project(project, pipeline_name, "stats_summary.tsv")
669
- tsv_outfile = open(tsv_outfile_path, "w")
670
- tsv_writer = csv.DictWriter(
671
- tsv_outfile, fieldnames=list(columns), delimiter="\t", extrasaction="ignore"
672
- )
673
- tsv_writer.writeheader()
674
- for row in stats:
675
- tsv_writer.writerow(row)
676
- tsv_outfile.close()
677
- _LOGGER.info(
678
- f"'{pipeline_name}' pipeline stats summary (n={len(stats)}):"
679
- f" {tsv_outfile_path}"
680
- )
681
- counter.reset()
682
- return stats
683
-
684
-
685
- def _create_obj_summary(project, pipeline_name, project_level, counter):
606
+ :return list[str|any] results: list containing output file paths of stats and objects
686
607
  """
687
- Read sample specific objects files and save to a data frame
688
-
689
- :param looper.Project project: the project to be summarized
690
- :param str pipeline_name: name of the pipeline to tabulate results for
691
- :param looper.LooperCounter counter: a counter object
692
- :param bool project_level: whether the project-level pipeline resutlts
693
- should be tabulated
694
- """
695
- _LOGGER.info("Creating objects summary")
696
- reported_objects = {}
697
- if project_level:
698
- _LOGGER.info(
699
- counter.show(name=project.name, type="project", pipeline_name=pipeline_name)
700
- )
701
- res = fetch_pipeline_results(
702
- project=project,
703
- pipeline_name=pipeline_name,
704
- inclusion_fun=lambda x: x in OBJECT_TYPES,
705
- )
706
- # need to cast to a dict, since other mapping-like objects might
707
- # cause issues when writing to the collective yaml file below
708
- project_reported_objects = {k: dict(v) for k, v in res.items()}
709
- reported_objects[project.name] = project_reported_objects
710
- else:
711
- for sample in project.samples:
712
- sn = sample.sample_name
713
- _LOGGER.info(counter.show(sn, pipeline_name))
714
- res = fetch_pipeline_results(
715
- project=project,
716
- pipeline_name=pipeline_name,
717
- sample_name=sn,
718
- inclusion_fun=lambda x: x in OBJECT_TYPES,
719
- )
720
- # need to cast to a dict, since other mapping-like objects might
721
- # cause issues when writing to the collective yaml file below
722
- sample_reported_objects = {k: dict(v) for k, v in res.items()}
723
- reported_objects[sn] = sample_reported_objects
724
- objs_yaml_path = get_file_for_project(project, pipeline_name, "objs_summary.yaml")
725
- with open(objs_yaml_path, "w") as outfile:
726
- yaml.dump(reported_objects, outfile)
727
- _LOGGER.info(
728
- f"'{pipeline_name}' pipeline objects summary "
729
- f"(n={len(reported_objects.keys())}): {objs_yaml_path}"
730
- )
731
- counter.reset()
732
- return reported_objects
733
-
734
-
735
- class ReportOld(Executor):
736
- """Combine project outputs into a browsable HTML report"""
737
-
738
- def __init__(self, prj):
739
- # call the inherited initialization
740
- super(ReportOld, self).__init__(prj)
741
- self.prj = prj
742
608
 
743
609
  def __call__(self, args):
744
- # initialize the report builder
745
- report_builder = HTMLReportBuilderOld(self.prj)
746
-
747
- # Do the stats and object summarization.
748
- table = TableOld(self.prj)()
749
- # run the report builder. a set of HTML pages is produced
750
- report_path = report_builder(table.objs, table.stats, uniqify(table.columns))
751
-
752
- _LOGGER.info("HTML Report (n=" + str(len(table.stats)) + "): " + report_path)
753
-
754
-
755
- class TableOld(Executor):
756
- """Project/Sample statistics and table output generator"""
757
-
758
- def __init__(self, prj):
759
- # call the inherited initialization
760
- super(TableOld, self).__init__(prj)
761
- self.prj = prj
762
-
763
- def __call__(self):
764
- def _create_stats_summary_old(project, counter):
765
- """
766
- Create stats spreadsheet and columns to be considered in the report, save
767
- the spreadsheet to file
768
- :param looper.Project project: the project to be summarized
769
- :param looper.LooperCounter counter: a counter object
770
- """
771
- # Create stats_summary file
772
- columns = []
773
- stats = []
774
- project_samples = project.samples
775
- missing_files = []
776
- _LOGGER.info("Creating stats summary...")
777
- for sample in project_samples:
778
- _LOGGER.info(counter.show(sample.sample_name, sample.protocol))
779
- sample_output_folder = sample_folder(project, sample)
780
- # Grab the basic info from the annotation sheet for this sample.
781
- # This will correspond to a row in the output.
782
- sample_stats = sample.get_sheet_dict()
783
- columns.extend(sample_stats.keys())
784
- # Version 0.3 standardized all stats into a single file
785
- stats_file = os.path.join(sample_output_folder, "stats.tsv")
786
- if not os.path.isfile(stats_file):
787
- missing_files.append(stats_file)
788
- continue
789
- t = _pd.read_csv(
790
- stats_file, sep="\t", header=None, names=["key", "value", "pl"]
791
- )
792
- t.drop_duplicates(subset=["key", "pl"], keep="last", inplace=True)
793
- t.loc[:, "plkey"] = t["pl"] + ":" + t["key"]
794
- dupes = t.duplicated(subset=["key"], keep=False)
795
- t.loc[dupes, "key"] = t.loc[dupes, "plkey"]
796
- sample_stats.update(t.set_index("key")["value"].to_dict())
797
- stats.append(sample_stats)
798
- columns.extend(t.key.tolist())
799
- if missing_files:
800
- _LOGGER.warning(
801
- "Stats files missing for {} samples: {}".format(
802
- len(missing_files), missing_files
803
- )
804
- )
805
- tsv_outfile_path = get_file_for_project_old(project, "stats_summary.tsv")
806
- tsv_outfile = open(tsv_outfile_path, "w")
807
- tsv_writer = csv.DictWriter(
808
- tsv_outfile,
809
- fieldnames=uniqify(columns),
810
- delimiter="\t",
811
- extrasaction="ignore",
812
- )
813
- tsv_writer.writeheader()
814
- for row in stats:
815
- tsv_writer.writerow(row)
816
- tsv_outfile.close()
817
- _LOGGER.info(
818
- "Statistics summary (n=" + str(len(stats)) + "): " + tsv_outfile_path
819
- )
820
- counter.reset()
821
- return stats, uniqify(columns)
822
-
823
- def _create_obj_summary_old(project, counter):
824
- """
825
- Read sample specific objects files and save to a data frame
826
- :param looper.Project project: the project to be summarized
827
- :param looper.LooperCounter counter: a counter object
828
- :return pandas.DataFrame: objects spreadsheet
829
- """
830
- _LOGGER.info("Creating objects summary...")
831
- objs = _pd.DataFrame()
832
- # Create objects summary file
833
- missing_files = []
834
- for sample in project.samples:
835
- # Process any reported objects
836
- _LOGGER.info(counter.show(sample.sample_name, sample.protocol))
837
- sample_output_folder = sample_folder(project, sample)
838
- objs_file = os.path.join(sample_output_folder, "objects.tsv")
839
- if not os.path.isfile(objs_file):
840
- missing_files.append(objs_file)
841
- continue
842
- t = _pd.read_csv(
843
- objs_file,
844
- sep="\t",
845
- header=None,
846
- names=[
847
- "key",
848
- "filename",
849
- "anchor_text",
850
- "anchor_image",
851
- "annotation",
852
- ],
853
- )
854
- t["sample_name"] = sample.sample_name
855
- objs = objs.append(t, ignore_index=True)
856
- if missing_files:
857
- _LOGGER.warning(
858
- "Object files missing for {} samples: {}".format(
859
- len(missing_files), missing_files
860
- )
610
+ # p = self.prj
611
+ project_level = args.project
612
+ results = []
613
+ if project_level:
614
+ psms = self.prj.get_pipestat_managers(project_level=True)
615
+ for name, psm in psms.items():
616
+ results = psm.table()
617
+ else:
618
+ for piface_source_samples in self.prj._samples_by_piface(
619
+ self.prj.piface_key
620
+ ).values():
621
+ # For each piface_key, we have a list of samples, but we only need one sample from the list to
622
+ # call the related pipestat manager object which will pull ALL samples when using psm.table
623
+ first_sample_name = list(piface_source_samples)[0]
624
+ psms = self.prj.get_pipestat_managers(
625
+ sample_name=first_sample_name, project_level=False
861
626
  )
862
- # create the path to save the objects file in
863
- objs_file = get_file_for_project_old(project, "objs_summary.tsv")
864
- objs.to_csv(objs_file, sep="\t")
865
- _LOGGER.info(
866
- "Objects summary (n="
867
- + str(len(project.samples) - len(missing_files))
868
- + "): "
869
- + objs_file
870
- )
871
- return objs
872
-
873
- # pull together all the fits and stats from each sample into
874
- # project-combined spreadsheets.
875
- self.stats, self.columns = _create_stats_summary_old(self.prj, self.counter)
876
- self.objs = _create_obj_summary_old(self.prj, self.counter)
877
- return self
627
+ for name, psm in psms.items():
628
+ results = psm.table()
629
+ # Results contains paths to stats and object summaries.
630
+ return results
878
631
 
879
632
 
880
633
  def _create_failure_message(reason, samples):
@@ -889,7 +642,7 @@ def _remove_or_dry_run(paths, dry_run=False):
889
642
 
890
643
  :param list|str paths: list of paths to files/dirs to be removed
891
644
  :param bool dry_run: logical indicating whether the files should remain
892
- untouched and massage printed
645
+ untouched and message printed
893
646
  """
894
647
  paths = paths if isinstance(paths, list) else [paths]
895
648
  for path in paths:
@@ -906,20 +659,70 @@ def _remove_or_dry_run(paths, dry_run=False):
906
659
  _LOGGER.info(path + " does not exist.")
907
660
 
908
661
 
909
- def destroy_summary(prj, dry_run=False):
662
+ def destroy_summary(prj, dry_run=False, project_level=False):
910
663
  """
911
664
  Delete the summary files if not in dry run mode
665
+ This function is for use with pipestat configured projects.
912
666
  """
913
- # TODO: update after get_file_for_project signature change
914
- _remove_or_dry_run(
915
- [
916
- get_file_for_project(prj, "summary.html"),
917
- get_file_for_project(prj, "stats_summary.tsv"),
918
- get_file_for_project(prj, "objs_summary.tsv"),
919
- get_file_for_project(prj, "reports"),
920
- ],
921
- dry_run,
922
- )
667
+
668
+ if project_level:
669
+ psms = prj.get_pipestat_managers(project_level=True)
670
+ for name, psm in psms.items():
671
+ _remove_or_dry_run(
672
+ [
673
+ get_file_for_project(
674
+ psm,
675
+ pipeline_name=psm["_pipeline_name"],
676
+ directory="reports",
677
+ ),
678
+ get_file_for_table(
679
+ psm,
680
+ pipeline_name=psm["_pipeline_name"],
681
+ appendix="stats_summary.tsv",
682
+ ),
683
+ get_file_for_table(
684
+ psm,
685
+ pipeline_name=psm["_pipeline_name"],
686
+ appendix="objs_summary.yaml",
687
+ ),
688
+ get_file_for_table(
689
+ psm, pipeline_name=psm["_pipeline_name"], appendix="reports"
690
+ ),
691
+ ],
692
+ dry_run,
693
+ )
694
+ else:
695
+ for piface_source_samples in prj._samples_by_piface(prj.piface_key).values():
696
+ # For each piface_key, we have a list of samples, but we only need one sample from the list to
697
+ # call the related pipestat manager object which will pull ALL samples when using psm.table
698
+ first_sample_name = list(piface_source_samples)[0]
699
+ psms = prj.get_pipestat_managers(
700
+ sample_name=first_sample_name, project_level=False
701
+ )
702
+ for name, psm in psms.items():
703
+ _remove_or_dry_run(
704
+ [
705
+ get_file_for_project(
706
+ psm,
707
+ pipeline_name=psm["_pipeline_name"],
708
+ directory="reports",
709
+ ),
710
+ get_file_for_table(
711
+ psm,
712
+ pipeline_name=psm["_pipeline_name"],
713
+ appendix="stats_summary.tsv",
714
+ ),
715
+ get_file_for_table(
716
+ psm,
717
+ pipeline_name=psm["_pipeline_name"],
718
+ appendix="objs_summary.yaml",
719
+ ),
720
+ get_file_for_table(
721
+ psm, pipeline_name=psm["_pipeline_name"], appendix="reports"
722
+ ),
723
+ ],
724
+ dry_run,
725
+ )
923
726
 
924
727
 
925
728
  class LooperCounter(object):
@@ -972,241 +775,3 @@ def _submission_status_text(
972
775
  if pipeline_name:
973
776
  txt += f"; pipeline: {pipeline_name}"
974
777
  return txt + Style.RESET_ALL
975
-
976
-
977
- def _proc_resources_spec(args):
978
- """
979
- Process CLI-sources compute setting specification. There are two sources
980
- of compute settings in the CLI alone:
981
- * YAML file (--settings argument)
982
- * itemized compute settings (--compute argument)
983
-
984
- The itemized compute specification is given priority
985
-
986
- :param argparse.Namespace: arguments namespace
987
- :return Mapping[str, str]: binding between resource setting name and value
988
- :raise ValueError: if interpretation of the given specification as encoding
989
- of key-value pairs fails
990
- """
991
- spec = getattr(args, "compute", None)
992
- try:
993
- settings_data = read_yaml_file(args.settings) or {}
994
- except yaml.YAMLError:
995
- _LOGGER.warning(
996
- "Settings file ({}) does not follow YAML format,"
997
- " disregarding".format(args.settings)
998
- )
999
- settings_data = {}
1000
- if not spec:
1001
- return settings_data
1002
- pairs = [(kv, kv.split("=")) for kv in spec]
1003
- bads = []
1004
- for orig, pair in pairs:
1005
- try:
1006
- k, v = pair
1007
- except ValueError:
1008
- bads.append(orig)
1009
- else:
1010
- settings_data[k] = v
1011
- if bads:
1012
- raise ValueError(
1013
- "Could not correctly parse itemized compute specification. "
1014
- "Correct format: " + EXAMPLE_COMPUTE_SPEC_FMT
1015
- )
1016
- return settings_data
1017
-
1018
-
1019
- def main(test_args=None):
1020
- """Primary workflow"""
1021
- global _LOGGER
1022
-
1023
- parser, aux_parser = build_parser()
1024
- aux_parser.suppress_defaults()
1025
-
1026
- if test_args:
1027
- args, remaining_args = parser.parse_known_args(args=test_args)
1028
- else:
1029
- args, remaining_args = parser.parse_known_args()
1030
-
1031
- cli_use_errors = validate_post_parse(args)
1032
- if cli_use_errors:
1033
- parser.print_help(sys.stderr)
1034
- parser.error(
1035
- f"{len(cli_use_errors)} CLI use problem(s): {', '.join(cli_use_errors)}"
1036
- )
1037
- if args.command is None:
1038
- parser.print_help(sys.stderr)
1039
- sys.exit(1)
1040
- if "config_file" in vars(args):
1041
- if args.config_file is None:
1042
- msg = "No project config defined (peppy)"
1043
- try:
1044
- if args.looper_config:
1045
- looper_config_dict = read_looper_config_file(args.looper_config)
1046
- else:
1047
- looper_config_dict = read_looper_dotfile()
1048
- print(
1049
- msg + f", using: {read_looper_dotfile()}. "
1050
- f"Read from dotfile ({dotfile_path()})."
1051
- )
1052
-
1053
- for looper_config_key, looper_config_item in looper_config_dict.items():
1054
- setattr(args, looper_config_key, looper_config_item)
1055
-
1056
- except OSError:
1057
- print(msg + f" and dotfile does not exist: {dotfile_path()}")
1058
- parser.print_help(sys.stderr)
1059
- sys.exit(1)
1060
- else:
1061
- _LOGGER.warning(
1062
- "The Looper config specification through the PEP project is deprecated and will "
1063
- "be removed in future versions. Please use the new running method by "
1064
- f"utilizing a looper config file. For more information: {'here is more information'} "
1065
- )
1066
-
1067
- if args.command == "init":
1068
- sys.exit(
1069
- int(
1070
- not init_dotfile(
1071
- dotfile_path(),
1072
- args.config_file,
1073
- args.output_dir,
1074
- args.sample_pipeline_interfaces,
1075
- args.project_pipeline_interfaces,
1076
- args.force,
1077
- )
1078
- )
1079
- )
1080
-
1081
- if args.command == "init-piface":
1082
- sys.exit(int(not init_generic_pipeline()))
1083
-
1084
- args = enrich_args_via_cfg(args, aux_parser, test_args)
1085
-
1086
- # If project pipeline interface defined in the cli, change name to: "pipeline_interface"
1087
- if vars(args)[PROJECT_PL_ARG]:
1088
- args.pipeline_interfaces = vars(args)[PROJECT_PL_ARG]
1089
-
1090
- _LOGGER = logmuse.logger_via_cli(args, make_root=True)
1091
-
1092
- _LOGGER.info("Looper version: {}\nCommand: {}".format(__version__, args.command))
1093
-
1094
- if len(remaining_args) > 0:
1095
- _LOGGER.warning(
1096
- "Unrecognized arguments: {}".format(
1097
- " ".join([str(x) for x in remaining_args])
1098
- )
1099
- )
1100
-
1101
- divcfg = (
1102
- select_divvy_config(filepath=args.divvy) if hasattr(args, "divvy") else None
1103
- )
1104
-
1105
- # Initialize project
1106
- if is_registry_path(args.config_file):
1107
- if vars(args)[SAMPLE_PL_ARG]:
1108
- p = Project(
1109
- amendments=args.amend,
1110
- divcfg_path=divcfg,
1111
- runp=args.command == "runp",
1112
- project_dict=PEPHubClient()._load_raw_pep(
1113
- registry_path=args.config_file
1114
- ),
1115
- **{
1116
- attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args
1117
- },
1118
- )
1119
- else:
1120
- raise MisconfigurationException(
1121
- f"`sample_pipeline_interface` is missing. Provide it in the parameters."
1122
- )
1123
- else:
1124
- try:
1125
- p = Project(
1126
- cfg=args.config_file,
1127
- amendments=args.amend,
1128
- divcfg_path=divcfg,
1129
- runp=args.command == "runp",
1130
- **{
1131
- attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args
1132
- },
1133
- )
1134
- except yaml.parser.ParserError as e:
1135
- _LOGGER.error(f"Project config parse failed -- {e}")
1136
- sys.exit(1)
1137
-
1138
- selected_compute_pkg = p.selected_compute_package or DEFAULT_COMPUTE_RESOURCES_NAME
1139
- if p.dcc is not None and not p.dcc.activate_package(selected_compute_pkg):
1140
- _LOGGER.info(
1141
- "Failed to activate '{}' computing package. "
1142
- "Using the default one".format(selected_compute_pkg)
1143
- )
1144
-
1145
- with ProjectContext(
1146
- prj=p,
1147
- selector_attribute=args.sel_attr,
1148
- selector_include=args.sel_incl,
1149
- selector_exclude=args.sel_excl,
1150
- ) as prj:
1151
- if args.command in ["run", "rerun"]:
1152
- run = Runner(prj)
1153
- try:
1154
- compute_kwargs = _proc_resources_spec(args)
1155
- run(args, rerun=(args.command == "rerun"), **compute_kwargs)
1156
- except SampleFailedException:
1157
- sys.exit(1)
1158
- except IOError:
1159
- _LOGGER.error(
1160
- "{} pipeline_interfaces: '{}'".format(
1161
- prj.__class__.__name__, prj.pipeline_interface_sources
1162
- )
1163
- )
1164
- raise
1165
-
1166
- if args.command == "runp":
1167
- compute_kwargs = _proc_resources_spec(args)
1168
- collate = Collator(prj)
1169
- collate(args, **compute_kwargs)
1170
-
1171
- if args.command == "destroy":
1172
- return Destroyer(prj)(args)
1173
-
1174
- # pipestat support introduces breaking changes and pipelines run
1175
- # with no pipestat reporting would not be compatible with
1176
- # commands: table, report and check. Therefore we plan maintain
1177
- # the old implementations for a couple of releases.
1178
- if hasattr(args, "project"):
1179
- use_pipestat = (
1180
- prj.pipestat_configured_project
1181
- if args.project
1182
- else prj.pipestat_configured
1183
- )
1184
- if args.command == "table":
1185
- if use_pipestat:
1186
- Tabulator(prj)(args)
1187
- else:
1188
- TableOld(prj)()
1189
-
1190
- if args.command == "report":
1191
- if use_pipestat:
1192
- Reporter(prj)(args)
1193
- else:
1194
- ReportOld(prj)(args)
1195
-
1196
- if args.command == "check":
1197
- if use_pipestat:
1198
- Checker(prj)(args)
1199
- else:
1200
- CheckerOld(prj)(flags=args.flags)
1201
-
1202
- if args.command == "clean":
1203
- return Cleaner(prj)(args)
1204
-
1205
- if args.command == "inspect":
1206
- inspect_project(p, args.sample_names, args.attr_limit)
1207
- from warnings import warn
1208
-
1209
- warn(
1210
- "The inspect feature has moved to eido and will be removed in the future release of looper. "
1211
- "Use `eido inspect` from now on.",
1212
- )