looper 1.5.0__py3-none-any.whl → 1.6.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
looper/looper.py CHANGED
@@ -4,17 +4,12 @@ Looper: a pipeline submission engine. https://github.com/pepkit/looper
4
4
  """
5
5
 
6
6
  import abc
7
+ import argparse
7
8
  import csv
8
9
  import logging
9
10
  import subprocess
10
- import sys
11
-
12
- if sys.version_info < (3, 3):
13
- from collections import Mapping
14
- else:
15
- from collections.abc import Mapping
16
-
17
- import logmuse
11
+ import yaml
12
+ import os
18
13
  import pandas as _pd
19
14
 
20
15
  # Need specific sequence of actions for colorama imports?
@@ -23,11 +18,12 @@ from colorama import init
23
18
  init()
24
19
  from shutil import rmtree
25
20
 
21
+ # from collections.abc import Mapping
22
+ from collections import defaultdict
26
23
  from colorama import Fore, Style
27
- from eido import inspect_project, validate_config, validate_sample
24
+ from eido import validate_config, validate_sample
28
25
  from eido.exceptions import EidoValidationError
29
26
  from jsonschema import ValidationError
30
- from pephubclient import PEPHubClient
31
27
  from peppy.const import *
32
28
  from peppy.exceptions import RemoteYAMLError
33
29
  from rich.color import Color
@@ -36,21 +32,20 @@ from rich.table import Table
36
32
  from ubiquerg.cli_tools import query_yes_no
37
33
  from ubiquerg.collection import uniqify
38
34
 
39
- from . import __version__, build_parser, validate_post_parse
35
+
40
36
  from .conductor import SubmissionConductor
37
+
38
+ from .exceptions import *
41
39
  from .const import *
42
- from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config
43
- from .exceptions import (
44
- JobSubmissionException,
45
- MisconfigurationException,
46
- SampleFailedException,
47
- )
48
- from .html_reports import HTMLReportBuilderOld
49
- from .html_reports_pipestat import HTMLReportBuilder, fetch_pipeline_results
50
- from .html_reports_project_pipestat import HTMLReportBuilderProject
51
40
  from .pipeline_interface import PipelineInterface
52
- from .project import Project, ProjectContext
53
- from .utils import *
41
+ from .project import Project
42
+ from .utils import (
43
+ desired_samples_range_skipped,
44
+ desired_samples_range_limited,
45
+ sample_folder,
46
+ )
47
+ from pipestat.reports import get_file_for_table
48
+ from pipestat.reports import get_file_for_project
54
49
 
55
50
  _PKGNAME = "looper"
56
51
  _LOGGER = logging.getLogger(_PKGNAME)
@@ -104,7 +99,7 @@ class Checker(Executor):
104
99
  for sample in self.prj.samples:
105
100
  psms = self.prj.get_pipestat_managers(sample_name=sample.sample_name)
106
101
  for pipeline_name, psm in psms.items():
107
- s = psm.get_status(sample_name=sample.sample_name)
102
+ s = psm.get_status(record_identifier=sample.sample_name)
108
103
  status.setdefault(pipeline_name, {})
109
104
  status[pipeline_name][sample.sample_name] = s
110
105
  _LOGGER.debug(f"{sample.sample_name} ({pipeline_name}): {s}")
@@ -171,60 +166,7 @@ class Checker(Executor):
171
166
  desc = ""
172
167
  table.add_row(status, desc)
173
168
  console.print(table)
174
-
175
-
176
- class CheckerOld(Executor):
177
- def __call__(self, flags=None, all_folders=False, max_file_count=30):
178
- """
179
- Check Project status, based on flag files.
180
-
181
- :param Iterable[str] | str flags: Names of flags to check, optional;
182
- if unspecified, all known flags will be checked.
183
- :param bool all_folders: Whether to check flags in all folders, not
184
- just those for samples in the config file from which the Project
185
- was created.
186
- :param int max_file_count: Maximum number of filepaths to display for a
187
- given flag.
188
- """
189
-
190
- # Handle single or multiple flags, and alphabetize.
191
- flags = sorted([flags] if isinstance(flags, str) else list(flags or FLAGS))
192
- flag_text = ", ".join(flags)
193
-
194
- # Collect the files by flag and sort by flag name.
195
- _LOGGER.debug("Checking project folders for flags: %s", flag_text)
196
- if all_folders:
197
- files_by_flag = fetch_flag_files(
198
- results_folder=self.prj.results_folder, flags=flags
199
- )
200
- else:
201
- files_by_flag = fetch_flag_files(prj=self.prj, flags=flags)
202
-
203
- # For each flag, output occurrence count.
204
- for flag in flags:
205
- _LOGGER.info("%s: %d", flag.upper(), len(files_by_flag[flag]))
206
-
207
- # For each flag, output filepath(s) if not overly verbose.
208
- for flag in flags:
209
- try:
210
- files = files_by_flag[flag]
211
- except Exception as e:
212
- _LOGGER.debug(
213
- "No files for {} flag. Caught exception: {}".format(
214
- flags, getattr(e, "message", repr(e))
215
- )
216
- )
217
- continue
218
- # If checking on a specific flag, do not limit the number of
219
- # reported filepaths, but do not report empty file lists
220
- if len(flags) == 1 and len(files) > 0:
221
- _LOGGER.info("%s (%d):\n%s", flag.upper(), len(files), "\n".join(files))
222
- # Regardless of whether 0-count flags are previously reported,
223
- # don't report an empty file list for a flag that's absent.
224
- # If the flag-to-files mapping is defaultdict, absent flag (key)
225
- # will fetch an empty collection, so check for length of 0.
226
- if 0 < len(files) <= max_file_count:
227
- _LOGGER.info("%s (%d):\n%s", flag.upper(), len(files), "\n".join(files))
169
+ return status
228
170
 
229
171
 
230
172
  class Cleaner(Executor):
@@ -270,7 +212,8 @@ class Cleaner(Executor):
270
212
  return self(args, preview_flag=False)
271
213
 
272
214
 
273
- def select_samples(prj: Project, args: argparse.Namespace) -> Iterable[Any]:
215
+ # NOTE: Adding type hint -> Iterable[Any] gives me TypeError: 'ABCMeta' object is not subscriptable
216
+ def select_samples(prj: Project, args: argparse.Namespace):
274
217
  """Use CLI limit/skip arguments to select subset of project's samples."""
275
218
  # TODO: get proper element type for signature.
276
219
  num_samples = len(prj.samples)
@@ -310,7 +253,17 @@ class Destroyer(Executor):
310
253
  _remove_or_dry_run(sample_output_folder, args.dry_run)
311
254
 
312
255
  _LOGGER.info("Removing summary:")
313
- destroy_summary(self.prj, args.dry_run)
256
+ use_pipestat = (
257
+ self.prj.pipestat_configured_project
258
+ if args.project
259
+ else self.prj.pipestat_configured
260
+ )
261
+ if use_pipestat:
262
+ destroy_summary(self.prj, args.dry_run, args.project)
263
+ else:
264
+ _LOGGER.warning(
265
+ "Pipestat must be configured to destroy any created summaries."
266
+ )
314
267
 
315
268
  if not preview_flag:
316
269
  _LOGGER.info("Destroy complete.")
@@ -354,6 +307,7 @@ class Collator(Executor):
354
307
  arguments, recognized by looper
355
308
  """
356
309
  jobs = 0
310
+ self.debug = {}
357
311
  project_pifaces = self.prj.project_pipeline_interface_sources
358
312
  if not project_pifaces:
359
313
  raise MisconfigurationException(
@@ -399,6 +353,8 @@ class Collator(Executor):
399
353
  jobs += conductor.num_job_submissions
400
354
  _LOGGER.info("\nLooper finished")
401
355
  _LOGGER.info("Jobs submitted: {}".format(jobs))
356
+ self.debug[DEBUG_JOBS] = jobs
357
+ return self.debug
402
358
 
403
359
 
404
360
  class Runner(Executor):
@@ -415,6 +371,7 @@ class Runner(Executor):
415
371
  :param bool rerun: whether the given sample is being rerun rather than
416
372
  run for the first time
417
373
  """
374
+ self.debug = {} # initialize empty dict for return values
418
375
  max_cmds = sum(list(map(len, self.prj._samples_by_interface.values())))
419
376
  self.counter.total = max_cmds
420
377
  failures = defaultdict(list) # Collect problems by sample.
@@ -453,6 +410,9 @@ class Runner(Executor):
453
410
  submission_conductors[piface.pipe_iface_file] = conductor
454
411
 
455
412
  _LOGGER.info(f"Pipestat compatible: {self.prj.pipestat_configured_project}")
413
+ self.debug["Pipestat compatible"] = (
414
+ self.prj.pipestat_configured_project or self.prj.pipestat_configured
415
+ )
456
416
 
457
417
  for sample in select_samples(prj=self.prj, args=args):
458
418
  pl_fails = []
@@ -474,10 +434,17 @@ class Runner(Executor):
474
434
  try:
475
435
  validate_sample(self.prj, sample.sample_name, schema_file)
476
436
  except EidoValidationError as e:
477
- _LOGGER.error(f"Short-circuiting due to validation error: {e}")
437
+ _LOGGER.error(
438
+ f"Short-circuiting due to validation error!\nSchema file: "
439
+ f"{schema_file}\nError: {e}\n{list(e.errors_by_type.keys())}"
440
+ )
441
+ self.debug[DEBUG_EIDO_VALIDATION] = (
442
+ f"Short-circuiting due to validation error!\nSchema file: "
443
+ f"{schema_file}\nError: {e}\n{list(e.errors_by_type.keys())}"
444
+ )
478
445
  return False
479
446
  except RemoteYAMLError:
480
- _LOGGER.warn(
447
+ _LOGGER.warning(
481
448
  f"Could not read remote schema, skipping '{sample.sample_name}' "
482
449
  f"sample validation against {schema_file}"
483
450
  )
@@ -518,9 +485,15 @@ class Runner(Executor):
518
485
  )
519
486
  )
520
487
  _LOGGER.info("Commands submitted: {} of {}".format(cmd_sub_total, max_cmds))
521
- _LOGGER.info("Jobs submitted: {}".format(job_sub_total))
488
+ self.debug[DEBUG_COMMANDS] = "{} of {}".format(cmd_sub_total, max_cmds)
522
489
  if args.dry_run:
523
- _LOGGER.info("Dry run. No jobs were actually submitted.")
490
+ job_sub_total_if_real = job_sub_total
491
+ job_sub_total = 0
492
+ _LOGGER.info(
493
+ f"Dry run. No jobs were actually submitted, but {job_sub_total_if_real} would have been."
494
+ )
495
+ _LOGGER.info("Jobs submitted: {}".format(job_sub_total))
496
+ self.debug[DEBUG_JOBS] = job_sub_total
524
497
 
525
498
  # Restructure sample/failure data for display.
526
499
  samples_by_reason = defaultdict(set)
@@ -528,6 +501,7 @@ class Runner(Executor):
528
501
  for sample, failures in failures.items():
529
502
  for f in failures:
530
503
  samples_by_reason[f].add(sample)
504
+ self.debug[f] = sample
531
505
  # Collect samples by pipeline with submission failure.
532
506
  for piface, conductor in submission_conductors.items():
533
507
  # Don't add failure key if there are no samples that failed for
@@ -562,6 +536,8 @@ class Runner(Executor):
562
536
  _LOGGER.debug("Raising SampleFailedException")
563
537
  raise SampleFailedException
564
538
 
539
+ return self.debug
540
+
565
541
 
566
542
  class Reporter(Executor):
567
543
  """Combine project outputs into a browsable HTML report"""
@@ -576,305 +552,82 @@ class Reporter(Executor):
576
552
  print(psms)
577
553
  for name, psm in psms.items():
578
554
  # Summarize will generate the static HTML Report Function
579
- psm.summarize()
555
+ report_directory = psm.summarize(looper_samples=self.prj.samples)
556
+ print(f"Report directory: {report_directory}")
580
557
  else:
581
- for sample in p.prj.samples:
582
- psms = self.prj.get_pipestat_managers(sample_name=sample.sample_name)
558
+ for piface_source_samples in self.prj._samples_by_piface(
559
+ self.prj.piface_key
560
+ ).values():
561
+ # For each piface_key, we have a list of samples, but we only need one sample from the list to
562
+ # call the related pipestat manager object which will pull ALL samples when using psm.summarize
563
+ first_sample_name = list(piface_source_samples)[0]
564
+ psms = self.prj.get_pipestat_managers(
565
+ sample_name=first_sample_name, project_level=False
566
+ )
583
567
  print(psms)
584
568
  for name, psm in psms.items():
585
569
  # Summarize will generate the static HTML Report Function
586
- psm.summarize()
570
+ report_directory = psm.summarize(looper_samples=self.prj.samples)
571
+ print(f"Report directory: {report_directory}")
587
572
 
588
573
 
589
- class Tabulator(Executor):
590
- """Project/Sample statistics and table output generator"""
574
+ class Linker(Executor):
575
+ """Create symlinks for reported results. Requires pipestat to be configured."""
591
576
 
592
577
  def __call__(self, args):
578
+ # initialize the report builder
579
+ p = self.prj
593
580
  project_level = args.project
581
+ link_dir = args.output_dir
582
+
594
583
  if project_level:
595
- self.counter = LooperCounter(len(self.prj.project_pipeline_interfaces))
596
- for piface in self.prj.project_pipeline_interfaces:
597
- # Do the stats and object summarization.
598
- pipeline_name = piface.pipeline_name
599
- # pull together all the fits and stats from each sample into
600
- # project-combined spreadsheets.
601
- self.stats = _create_stats_summary(
602
- self.prj, pipeline_name, project_level, self.counter
603
- )
604
- self.objs = _create_obj_summary(
605
- self.prj, pipeline_name, project_level, self.counter
606
- )
584
+ psms = self.prj.get_pipestat_managers(project_level=True)
585
+ for name, psm in psms.items():
586
+ linked_results_path = psm.link(link_dir=link_dir)
587
+ print(f"Linked directory: {linked_results_path}")
607
588
  else:
608
- for piface_source in self.prj._samples_by_piface(
589
+ for piface_source_samples in self.prj._samples_by_piface(
609
590
  self.prj.piface_key
610
- ).keys():
611
- # Do the stats and object summarization.
612
- pipeline_name = PipelineInterface(config=piface_source).pipeline_name
613
- # pull together all the fits and stats from each sample into
614
- # project-combined spreadsheets.
615
- self.stats = _create_stats_summary(
616
- self.prj, pipeline_name, project_level, self.counter
617
- )
618
- self.objs = _create_obj_summary(
619
- self.prj, pipeline_name, project_level, self.counter
591
+ ).values():
592
+ # For each piface_key, we have a list of samples, but we only need one sample from the list to
593
+ # call the related pipestat manager object which will pull ALL samples when using psm.summarize
594
+ first_sample_name = list(piface_source_samples)[0]
595
+ psms = self.prj.get_pipestat_managers(
596
+ sample_name=first_sample_name, project_level=False
620
597
  )
621
- return self
598
+ for name, psm in psms.items():
599
+ linked_results_path = psm.link(link_dir=link_dir)
600
+ print(f"Linked directory: {linked_results_path}")
622
601
 
623
602
 
624
- def _create_stats_summary(project, pipeline_name, project_level, counter):
625
- """
626
- Create stats spreadsheet and columns to be considered in the report, save
627
- the spreadsheet to file
628
-
629
- :param looper.Project project: the project to be summarized
630
- :param str pipeline_name: name of the pipeline to tabulate results for
631
- :param bool project_level: whether the project-level pipeline resutlts
632
- should be tabulated
633
- :param looper.LooperCounter counter: a counter object
634
- """
635
- # Create stats_summary file
636
- columns = set()
637
- stats = []
638
- _LOGGER.info("Creating stats summary")
639
- if project_level:
640
- _LOGGER.info(
641
- counter.show(name=project.name, type="project", pipeline_name=pipeline_name)
642
- )
643
- reported_stats = {"project_name": project.name}
644
- results = fetch_pipeline_results(
645
- project=project,
646
- pipeline_name=pipeline_name,
647
- inclusion_fun=lambda x: x not in OBJECT_TYPES,
648
- )
649
- reported_stats.update(results)
650
- stats.append(reported_stats)
651
- columns |= set(reported_stats.keys())
603
+ class Tabulator(Executor):
604
+ """Project/Sample statistics and table output generator
652
605
 
653
- else:
654
- for sample in project.samples:
655
- sn = sample.sample_name
656
- _LOGGER.info(counter.show(sn, pipeline_name))
657
- reported_stats = {project.sample_table_index: sn}
658
- results = fetch_pipeline_results(
659
- project=project,
660
- pipeline_name=pipeline_name,
661
- sample_name=sn,
662
- inclusion_fun=lambda x: x not in OBJECT_TYPES,
663
- )
664
- reported_stats.update(results)
665
- stats.append(reported_stats)
666
- columns |= set(reported_stats.keys())
667
-
668
- tsv_outfile_path = get_file_for_project(project, pipeline_name, "stats_summary.tsv")
669
- tsv_outfile = open(tsv_outfile_path, "w")
670
- tsv_writer = csv.DictWriter(
671
- tsv_outfile, fieldnames=list(columns), delimiter="\t", extrasaction="ignore"
672
- )
673
- tsv_writer.writeheader()
674
- for row in stats:
675
- tsv_writer.writerow(row)
676
- tsv_outfile.close()
677
- _LOGGER.info(
678
- f"'{pipeline_name}' pipeline stats summary (n={len(stats)}):"
679
- f" {tsv_outfile_path}"
680
- )
681
- counter.reset()
682
- return stats
683
-
684
-
685
- def _create_obj_summary(project, pipeline_name, project_level, counter):
606
+ :return list[str|any] results: list containing output file paths of stats and objects
686
607
  """
687
- Read sample specific objects files and save to a data frame
688
-
689
- :param looper.Project project: the project to be summarized
690
- :param str pipeline_name: name of the pipeline to tabulate results for
691
- :param looper.LooperCounter counter: a counter object
692
- :param bool project_level: whether the project-level pipeline resutlts
693
- should be tabulated
694
- """
695
- _LOGGER.info("Creating objects summary")
696
- reported_objects = {}
697
- if project_level:
698
- _LOGGER.info(
699
- counter.show(name=project.name, type="project", pipeline_name=pipeline_name)
700
- )
701
- res = fetch_pipeline_results(
702
- project=project,
703
- pipeline_name=pipeline_name,
704
- inclusion_fun=lambda x: x in OBJECT_TYPES,
705
- )
706
- # need to cast to a dict, since other mapping-like objects might
707
- # cause issues when writing to the collective yaml file below
708
- project_reported_objects = {k: dict(v) for k, v in res.items()}
709
- reported_objects[project.name] = project_reported_objects
710
- else:
711
- for sample in project.samples:
712
- sn = sample.sample_name
713
- _LOGGER.info(counter.show(sn, pipeline_name))
714
- res = fetch_pipeline_results(
715
- project=project,
716
- pipeline_name=pipeline_name,
717
- sample_name=sn,
718
- inclusion_fun=lambda x: x in OBJECT_TYPES,
719
- )
720
- # need to cast to a dict, since other mapping-like objects might
721
- # cause issues when writing to the collective yaml file below
722
- sample_reported_objects = {k: dict(v) for k, v in res.items()}
723
- reported_objects[sn] = sample_reported_objects
724
- objs_yaml_path = get_file_for_project(project, pipeline_name, "objs_summary.yaml")
725
- with open(objs_yaml_path, "w") as outfile:
726
- yaml.dump(reported_objects, outfile)
727
- _LOGGER.info(
728
- f"'{pipeline_name}' pipeline objects summary "
729
- f"(n={len(reported_objects.keys())}): {objs_yaml_path}"
730
- )
731
- counter.reset()
732
- return reported_objects
733
-
734
-
735
- class ReportOld(Executor):
736
- """Combine project outputs into a browsable HTML report"""
737
-
738
- def __init__(self, prj):
739
- # call the inherited initialization
740
- super(ReportOld, self).__init__(prj)
741
- self.prj = prj
742
608
 
743
609
  def __call__(self, args):
744
- # initialize the report builder
745
- report_builder = HTMLReportBuilderOld(self.prj)
746
-
747
- # Do the stats and object summarization.
748
- table = TableOld(self.prj)()
749
- # run the report builder. a set of HTML pages is produced
750
- report_path = report_builder(table.objs, table.stats, uniqify(table.columns))
751
-
752
- _LOGGER.info("HTML Report (n=" + str(len(table.stats)) + "): " + report_path)
753
-
754
-
755
- class TableOld(Executor):
756
- """Project/Sample statistics and table output generator"""
757
-
758
- def __init__(self, prj):
759
- # call the inherited initialization
760
- super(TableOld, self).__init__(prj)
761
- self.prj = prj
762
-
763
- def __call__(self):
764
- def _create_stats_summary_old(project, counter):
765
- """
766
- Create stats spreadsheet and columns to be considered in the report, save
767
- the spreadsheet to file
768
- :param looper.Project project: the project to be summarized
769
- :param looper.LooperCounter counter: a counter object
770
- """
771
- # Create stats_summary file
772
- columns = []
773
- stats = []
774
- project_samples = project.samples
775
- missing_files = []
776
- _LOGGER.info("Creating stats summary...")
777
- for sample in project_samples:
778
- _LOGGER.info(counter.show(sample.sample_name, sample.protocol))
779
- sample_output_folder = sample_folder(project, sample)
780
- # Grab the basic info from the annotation sheet for this sample.
781
- # This will correspond to a row in the output.
782
- sample_stats = sample.get_sheet_dict()
783
- columns.extend(sample_stats.keys())
784
- # Version 0.3 standardized all stats into a single file
785
- stats_file = os.path.join(sample_output_folder, "stats.tsv")
786
- if not os.path.isfile(stats_file):
787
- missing_files.append(stats_file)
788
- continue
789
- t = _pd.read_csv(
790
- stats_file, sep="\t", header=None, names=["key", "value", "pl"]
791
- )
792
- t.drop_duplicates(subset=["key", "pl"], keep="last", inplace=True)
793
- t.loc[:, "plkey"] = t["pl"] + ":" + t["key"]
794
- dupes = t.duplicated(subset=["key"], keep=False)
795
- t.loc[dupes, "key"] = t.loc[dupes, "plkey"]
796
- sample_stats.update(t.set_index("key")["value"].to_dict())
797
- stats.append(sample_stats)
798
- columns.extend(t.key.tolist())
799
- if missing_files:
800
- _LOGGER.warning(
801
- "Stats files missing for {} samples: {}".format(
802
- len(missing_files), missing_files
803
- )
804
- )
805
- tsv_outfile_path = get_file_for_project_old(project, "stats_summary.tsv")
806
- tsv_outfile = open(tsv_outfile_path, "w")
807
- tsv_writer = csv.DictWriter(
808
- tsv_outfile,
809
- fieldnames=uniqify(columns),
810
- delimiter="\t",
811
- extrasaction="ignore",
812
- )
813
- tsv_writer.writeheader()
814
- for row in stats:
815
- tsv_writer.writerow(row)
816
- tsv_outfile.close()
817
- _LOGGER.info(
818
- "Statistics summary (n=" + str(len(stats)) + "): " + tsv_outfile_path
819
- )
820
- counter.reset()
821
- return stats, uniqify(columns)
822
-
823
- def _create_obj_summary_old(project, counter):
824
- """
825
- Read sample specific objects files and save to a data frame
826
- :param looper.Project project: the project to be summarized
827
- :param looper.LooperCounter counter: a counter object
828
- :return pandas.DataFrame: objects spreadsheet
829
- """
830
- _LOGGER.info("Creating objects summary...")
831
- objs = _pd.DataFrame()
832
- # Create objects summary file
833
- missing_files = []
834
- for sample in project.samples:
835
- # Process any reported objects
836
- _LOGGER.info(counter.show(sample.sample_name, sample.protocol))
837
- sample_output_folder = sample_folder(project, sample)
838
- objs_file = os.path.join(sample_output_folder, "objects.tsv")
839
- if not os.path.isfile(objs_file):
840
- missing_files.append(objs_file)
841
- continue
842
- t = _pd.read_csv(
843
- objs_file,
844
- sep="\t",
845
- header=None,
846
- names=[
847
- "key",
848
- "filename",
849
- "anchor_text",
850
- "anchor_image",
851
- "annotation",
852
- ],
853
- )
854
- t["sample_name"] = sample.sample_name
855
- objs = objs.append(t, ignore_index=True)
856
- if missing_files:
857
- _LOGGER.warning(
858
- "Object files missing for {} samples: {}".format(
859
- len(missing_files), missing_files
860
- )
610
+ # p = self.prj
611
+ project_level = args.project
612
+ results = []
613
+ if project_level:
614
+ psms = self.prj.get_pipestat_managers(project_level=True)
615
+ for name, psm in psms.items():
616
+ results = psm.table()
617
+ else:
618
+ for piface_source_samples in self.prj._samples_by_piface(
619
+ self.prj.piface_key
620
+ ).values():
621
+ # For each piface_key, we have a list of samples, but we only need one sample from the list to
622
+ # call the related pipestat manager object which will pull ALL samples when using psm.table
623
+ first_sample_name = list(piface_source_samples)[0]
624
+ psms = self.prj.get_pipestat_managers(
625
+ sample_name=first_sample_name, project_level=False
861
626
  )
862
- # create the path to save the objects file in
863
- objs_file = get_file_for_project_old(project, "objs_summary.tsv")
864
- objs.to_csv(objs_file, sep="\t")
865
- _LOGGER.info(
866
- "Objects summary (n="
867
- + str(len(project.samples) - len(missing_files))
868
- + "): "
869
- + objs_file
870
- )
871
- return objs
872
-
873
- # pull together all the fits and stats from each sample into
874
- # project-combined spreadsheets.
875
- self.stats, self.columns = _create_stats_summary_old(self.prj, self.counter)
876
- self.objs = _create_obj_summary_old(self.prj, self.counter)
877
- return self
627
+ for name, psm in psms.items():
628
+ results = psm.table()
629
+ # Results contains paths to stats and object summaries.
630
+ return results
878
631
 
879
632
 
880
633
  def _create_failure_message(reason, samples):
@@ -889,7 +642,7 @@ def _remove_or_dry_run(paths, dry_run=False):
889
642
 
890
643
  :param list|str paths: list of paths to files/dirs to be removed
891
644
  :param bool dry_run: logical indicating whether the files should remain
892
- untouched and massage printed
645
+ untouched and message printed
893
646
  """
894
647
  paths = paths if isinstance(paths, list) else [paths]
895
648
  for path in paths:
@@ -906,20 +659,70 @@ def _remove_or_dry_run(paths, dry_run=False):
906
659
  _LOGGER.info(path + " does not exist.")
907
660
 
908
661
 
909
- def destroy_summary(prj, dry_run=False):
662
+ def destroy_summary(prj, dry_run=False, project_level=False):
910
663
  """
911
664
  Delete the summary files if not in dry run mode
665
+ This function is for use with pipestat configured projects.
912
666
  """
913
- # TODO: update after get_file_for_project signature change
914
- _remove_or_dry_run(
915
- [
916
- get_file_for_project(prj, "summary.html"),
917
- get_file_for_project(prj, "stats_summary.tsv"),
918
- get_file_for_project(prj, "objs_summary.tsv"),
919
- get_file_for_project(prj, "reports"),
920
- ],
921
- dry_run,
922
- )
667
+
668
+ if project_level:
669
+ psms = prj.get_pipestat_managers(project_level=True)
670
+ for name, psm in psms.items():
671
+ _remove_or_dry_run(
672
+ [
673
+ get_file_for_project(
674
+ psm,
675
+ pipeline_name=psm["_pipeline_name"],
676
+ directory="reports",
677
+ ),
678
+ get_file_for_table(
679
+ psm,
680
+ pipeline_name=psm["_pipeline_name"],
681
+ appendix="stats_summary.tsv",
682
+ ),
683
+ get_file_for_table(
684
+ psm,
685
+ pipeline_name=psm["_pipeline_name"],
686
+ appendix="objs_summary.yaml",
687
+ ),
688
+ get_file_for_table(
689
+ psm, pipeline_name=psm["_pipeline_name"], appendix="reports"
690
+ ),
691
+ ],
692
+ dry_run,
693
+ )
694
+ else:
695
+ for piface_source_samples in prj._samples_by_piface(prj.piface_key).values():
696
+ # For each piface_key, we have a list of samples, but we only need one sample from the list to
697
+ # call the related pipestat manager object which will pull ALL samples when using psm.table
698
+ first_sample_name = list(piface_source_samples)[0]
699
+ psms = prj.get_pipestat_managers(
700
+ sample_name=first_sample_name, project_level=False
701
+ )
702
+ for name, psm in psms.items():
703
+ _remove_or_dry_run(
704
+ [
705
+ get_file_for_project(
706
+ psm,
707
+ pipeline_name=psm["_pipeline_name"],
708
+ directory="reports",
709
+ ),
710
+ get_file_for_table(
711
+ psm,
712
+ pipeline_name=psm["_pipeline_name"],
713
+ appendix="stats_summary.tsv",
714
+ ),
715
+ get_file_for_table(
716
+ psm,
717
+ pipeline_name=psm["_pipeline_name"],
718
+ appendix="objs_summary.yaml",
719
+ ),
720
+ get_file_for_table(
721
+ psm, pipeline_name=psm["_pipeline_name"], appendix="reports"
722
+ ),
723
+ ],
724
+ dry_run,
725
+ )
923
726
 
924
727
 
925
728
  class LooperCounter(object):
@@ -972,241 +775,3 @@ def _submission_status_text(
972
775
  if pipeline_name:
973
776
  txt += f"; pipeline: {pipeline_name}"
974
777
  return txt + Style.RESET_ALL
975
-
976
-
977
- def _proc_resources_spec(args):
978
- """
979
- Process CLI-sources compute setting specification. There are two sources
980
- of compute settings in the CLI alone:
981
- * YAML file (--settings argument)
982
- * itemized compute settings (--compute argument)
983
-
984
- The itemized compute specification is given priority
985
-
986
- :param argparse.Namespace: arguments namespace
987
- :return Mapping[str, str]: binding between resource setting name and value
988
- :raise ValueError: if interpretation of the given specification as encoding
989
- of key-value pairs fails
990
- """
991
- spec = getattr(args, "compute", None)
992
- try:
993
- settings_data = read_yaml_file(args.settings) or {}
994
- except yaml.YAMLError:
995
- _LOGGER.warning(
996
- "Settings file ({}) does not follow YAML format,"
997
- " disregarding".format(args.settings)
998
- )
999
- settings_data = {}
1000
- if not spec:
1001
- return settings_data
1002
- pairs = [(kv, kv.split("=")) for kv in spec]
1003
- bads = []
1004
- for orig, pair in pairs:
1005
- try:
1006
- k, v = pair
1007
- except ValueError:
1008
- bads.append(orig)
1009
- else:
1010
- settings_data[k] = v
1011
- if bads:
1012
- raise ValueError(
1013
- "Could not correctly parse itemized compute specification. "
1014
- "Correct format: " + EXAMPLE_COMPUTE_SPEC_FMT
1015
- )
1016
- return settings_data
1017
-
1018
-
1019
- def main(test_args=None):
1020
- """Primary workflow"""
1021
- global _LOGGER
1022
-
1023
- parser, aux_parser = build_parser()
1024
- aux_parser.suppress_defaults()
1025
-
1026
- if test_args:
1027
- args, remaining_args = parser.parse_known_args(args=test_args)
1028
- else:
1029
- args, remaining_args = parser.parse_known_args()
1030
-
1031
- cli_use_errors = validate_post_parse(args)
1032
- if cli_use_errors:
1033
- parser.print_help(sys.stderr)
1034
- parser.error(
1035
- f"{len(cli_use_errors)} CLI use problem(s): {', '.join(cli_use_errors)}"
1036
- )
1037
- if args.command is None:
1038
- parser.print_help(sys.stderr)
1039
- sys.exit(1)
1040
- if "config_file" in vars(args):
1041
- if args.config_file is None:
1042
- msg = "No project config defined (peppy)"
1043
- try:
1044
- if args.looper_config:
1045
- looper_config_dict = read_looper_config_file(args.looper_config)
1046
- else:
1047
- looper_config_dict = read_looper_dotfile()
1048
- print(
1049
- msg + f", using: {read_looper_dotfile()}. "
1050
- f"Read from dotfile ({dotfile_path()})."
1051
- )
1052
-
1053
- for looper_config_key, looper_config_item in looper_config_dict.items():
1054
- setattr(args, looper_config_key, looper_config_item)
1055
-
1056
- except OSError:
1057
- print(msg + f" and dotfile does not exist: {dotfile_path()}")
1058
- parser.print_help(sys.stderr)
1059
- sys.exit(1)
1060
- else:
1061
- _LOGGER.warning(
1062
- "The Looper config specification through the PEP project is deprecated and will "
1063
- "be removed in future versions. Please use the new running method by "
1064
- f"utilizing a looper config file. For more information: {'here is more information'} "
1065
- )
1066
-
1067
- if args.command == "init":
1068
- sys.exit(
1069
- int(
1070
- not init_dotfile(
1071
- dotfile_path(),
1072
- args.config_file,
1073
- args.output_dir,
1074
- args.sample_pipeline_interfaces,
1075
- args.project_pipeline_interfaces,
1076
- args.force,
1077
- )
1078
- )
1079
- )
1080
-
1081
- if args.command == "init-piface":
1082
- sys.exit(int(not init_generic_pipeline()))
1083
-
1084
- args = enrich_args_via_cfg(args, aux_parser, test_args)
1085
-
1086
- # If project pipeline interface defined in the cli, change name to: "pipeline_interface"
1087
- if vars(args)[PROJECT_PL_ARG]:
1088
- args.pipeline_interfaces = vars(args)[PROJECT_PL_ARG]
1089
-
1090
- _LOGGER = logmuse.logger_via_cli(args, make_root=True)
1091
-
1092
- _LOGGER.info("Looper version: {}\nCommand: {}".format(__version__, args.command))
1093
-
1094
- if len(remaining_args) > 0:
1095
- _LOGGER.warning(
1096
- "Unrecognized arguments: {}".format(
1097
- " ".join([str(x) for x in remaining_args])
1098
- )
1099
- )
1100
-
1101
- divcfg = (
1102
- select_divvy_config(filepath=args.divvy) if hasattr(args, "divvy") else None
1103
- )
1104
-
1105
- # Initialize project
1106
- if is_registry_path(args.config_file):
1107
- if vars(args)[SAMPLE_PL_ARG]:
1108
- p = Project(
1109
- amendments=args.amend,
1110
- divcfg_path=divcfg,
1111
- runp=args.command == "runp",
1112
- project_dict=PEPHubClient()._load_raw_pep(
1113
- registry_path=args.config_file
1114
- ),
1115
- **{
1116
- attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args
1117
- },
1118
- )
1119
- else:
1120
- raise MisconfigurationException(
1121
- f"`sample_pipeline_interface` is missing. Provide it in the parameters."
1122
- )
1123
- else:
1124
- try:
1125
- p = Project(
1126
- cfg=args.config_file,
1127
- amendments=args.amend,
1128
- divcfg_path=divcfg,
1129
- runp=args.command == "runp",
1130
- **{
1131
- attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args
1132
- },
1133
- )
1134
- except yaml.parser.ParserError as e:
1135
- _LOGGER.error(f"Project config parse failed -- {e}")
1136
- sys.exit(1)
1137
-
1138
- selected_compute_pkg = p.selected_compute_package or DEFAULT_COMPUTE_RESOURCES_NAME
1139
- if p.dcc is not None and not p.dcc.activate_package(selected_compute_pkg):
1140
- _LOGGER.info(
1141
- "Failed to activate '{}' computing package. "
1142
- "Using the default one".format(selected_compute_pkg)
1143
- )
1144
-
1145
- with ProjectContext(
1146
- prj=p,
1147
- selector_attribute=args.sel_attr,
1148
- selector_include=args.sel_incl,
1149
- selector_exclude=args.sel_excl,
1150
- ) as prj:
1151
- if args.command in ["run", "rerun"]:
1152
- run = Runner(prj)
1153
- try:
1154
- compute_kwargs = _proc_resources_spec(args)
1155
- run(args, rerun=(args.command == "rerun"), **compute_kwargs)
1156
- except SampleFailedException:
1157
- sys.exit(1)
1158
- except IOError:
1159
- _LOGGER.error(
1160
- "{} pipeline_interfaces: '{}'".format(
1161
- prj.__class__.__name__, prj.pipeline_interface_sources
1162
- )
1163
- )
1164
- raise
1165
-
1166
- if args.command == "runp":
1167
- compute_kwargs = _proc_resources_spec(args)
1168
- collate = Collator(prj)
1169
- collate(args, **compute_kwargs)
1170
-
1171
- if args.command == "destroy":
1172
- return Destroyer(prj)(args)
1173
-
1174
- # pipestat support introduces breaking changes and pipelines run
1175
- # with no pipestat reporting would not be compatible with
1176
- # commands: table, report and check. Therefore we plan maintain
1177
- # the old implementations for a couple of releases.
1178
- if hasattr(args, "project"):
1179
- use_pipestat = (
1180
- prj.pipestat_configured_project
1181
- if args.project
1182
- else prj.pipestat_configured
1183
- )
1184
- if args.command == "table":
1185
- if use_pipestat:
1186
- Tabulator(prj)(args)
1187
- else:
1188
- TableOld(prj)()
1189
-
1190
- if args.command == "report":
1191
- if use_pipestat:
1192
- Reporter(prj)(args)
1193
- else:
1194
- ReportOld(prj)(args)
1195
-
1196
- if args.command == "check":
1197
- if use_pipestat:
1198
- Checker(prj)(args)
1199
- else:
1200
- CheckerOld(prj)(flags=args.flags)
1201
-
1202
- if args.command == "clean":
1203
- return Cleaner(prj)(args)
1204
-
1205
- if args.command == "inspect":
1206
- inspect_project(p, args.sample_names, args.attr_limit)
1207
- from warnings import warn
1208
-
1209
- warn(
1210
- "The inspect feature has moved to eido and will be removed in the future release of looper. "
1211
- "Use `eido inspect` from now on.",
1212
- )