looper 1.5.0__py3-none-any.whl → 1.6.0a1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- looper/__init__.py +3 -498
- looper/__main__.py +2 -2
- looper/_version.py +1 -1
- looper/cli_divvy.py +182 -0
- looper/cli_looper.py +776 -0
- looper/conductor.py +53 -206
- looper/const.py +51 -3
- looper/divvy.py +28 -196
- looper/exceptions.py +18 -0
- looper/looper.py +177 -612
- looper/plugins.py +160 -0
- looper/processed_project.py +1 -1
- looper/project.py +229 -117
- looper/utils.py +119 -43
- {looper-1.5.0.dist-info → looper-1.6.0a1.dist-info}/METADATA +6 -6
- {looper-1.5.0.dist-info → looper-1.6.0a1.dist-info}/RECORD +20 -20
- {looper-1.5.0.dist-info → looper-1.6.0a1.dist-info}/WHEEL +1 -1
- looper/html_reports.py +0 -1057
- looper/html_reports_pipestat.py +0 -924
- looper/html_reports_project_pipestat.py +0 -269
- {looper-1.5.0.dist-info → looper-1.6.0a1.dist-info}/LICENSE.txt +0 -0
- {looper-1.5.0.dist-info → looper-1.6.0a1.dist-info}/entry_points.txt +0 -0
- {looper-1.5.0.dist-info → looper-1.6.0a1.dist-info}/top_level.txt +0 -0
looper/looper.py
CHANGED
@@ -4,17 +4,12 @@ Looper: a pipeline submission engine. https://github.com/pepkit/looper
|
|
4
4
|
"""
|
5
5
|
|
6
6
|
import abc
|
7
|
+
import argparse
|
7
8
|
import csv
|
8
9
|
import logging
|
9
10
|
import subprocess
|
10
|
-
import
|
11
|
-
|
12
|
-
if sys.version_info < (3, 3):
|
13
|
-
from collections import Mapping
|
14
|
-
else:
|
15
|
-
from collections.abc import Mapping
|
16
|
-
|
17
|
-
import logmuse
|
11
|
+
import yaml
|
12
|
+
import os
|
18
13
|
import pandas as _pd
|
19
14
|
|
20
15
|
# Need specific sequence of actions for colorama imports?
|
@@ -23,11 +18,12 @@ from colorama import init
|
|
23
18
|
init()
|
24
19
|
from shutil import rmtree
|
25
20
|
|
21
|
+
# from collections.abc import Mapping
|
22
|
+
from collections import defaultdict
|
26
23
|
from colorama import Fore, Style
|
27
|
-
from eido import
|
24
|
+
from eido import validate_config, validate_sample
|
28
25
|
from eido.exceptions import EidoValidationError
|
29
26
|
from jsonschema import ValidationError
|
30
|
-
from pephubclient import PEPHubClient
|
31
27
|
from peppy.const import *
|
32
28
|
from peppy.exceptions import RemoteYAMLError
|
33
29
|
from rich.color import Color
|
@@ -36,21 +32,20 @@ from rich.table import Table
|
|
36
32
|
from ubiquerg.cli_tools import query_yes_no
|
37
33
|
from ubiquerg.collection import uniqify
|
38
34
|
|
39
|
-
|
35
|
+
|
40
36
|
from .conductor import SubmissionConductor
|
37
|
+
|
38
|
+
from .exceptions import *
|
41
39
|
from .const import *
|
42
|
-
from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config
|
43
|
-
from .exceptions import (
|
44
|
-
JobSubmissionException,
|
45
|
-
MisconfigurationException,
|
46
|
-
SampleFailedException,
|
47
|
-
)
|
48
|
-
from .html_reports import HTMLReportBuilderOld
|
49
|
-
from .html_reports_pipestat import HTMLReportBuilder, fetch_pipeline_results
|
50
|
-
from .html_reports_project_pipestat import HTMLReportBuilderProject
|
51
40
|
from .pipeline_interface import PipelineInterface
|
52
|
-
from .project import Project
|
53
|
-
from .utils import
|
41
|
+
from .project import Project
|
42
|
+
from .utils import (
|
43
|
+
desired_samples_range_skipped,
|
44
|
+
desired_samples_range_limited,
|
45
|
+
sample_folder,
|
46
|
+
)
|
47
|
+
from pipestat.reports import get_file_for_table
|
48
|
+
from pipestat.reports import get_file_for_project
|
54
49
|
|
55
50
|
_PKGNAME = "looper"
|
56
51
|
_LOGGER = logging.getLogger(_PKGNAME)
|
@@ -104,7 +99,7 @@ class Checker(Executor):
|
|
104
99
|
for sample in self.prj.samples:
|
105
100
|
psms = self.prj.get_pipestat_managers(sample_name=sample.sample_name)
|
106
101
|
for pipeline_name, psm in psms.items():
|
107
|
-
s = psm.get_status(
|
102
|
+
s = psm.get_status(record_identifier=sample.sample_name)
|
108
103
|
status.setdefault(pipeline_name, {})
|
109
104
|
status[pipeline_name][sample.sample_name] = s
|
110
105
|
_LOGGER.debug(f"{sample.sample_name} ({pipeline_name}): {s}")
|
@@ -171,60 +166,7 @@ class Checker(Executor):
|
|
171
166
|
desc = ""
|
172
167
|
table.add_row(status, desc)
|
173
168
|
console.print(table)
|
174
|
-
|
175
|
-
|
176
|
-
class CheckerOld(Executor):
|
177
|
-
def __call__(self, flags=None, all_folders=False, max_file_count=30):
|
178
|
-
"""
|
179
|
-
Check Project status, based on flag files.
|
180
|
-
|
181
|
-
:param Iterable[str] | str flags: Names of flags to check, optional;
|
182
|
-
if unspecified, all known flags will be checked.
|
183
|
-
:param bool all_folders: Whether to check flags in all folders, not
|
184
|
-
just those for samples in the config file from which the Project
|
185
|
-
was created.
|
186
|
-
:param int max_file_count: Maximum number of filepaths to display for a
|
187
|
-
given flag.
|
188
|
-
"""
|
189
|
-
|
190
|
-
# Handle single or multiple flags, and alphabetize.
|
191
|
-
flags = sorted([flags] if isinstance(flags, str) else list(flags or FLAGS))
|
192
|
-
flag_text = ", ".join(flags)
|
193
|
-
|
194
|
-
# Collect the files by flag and sort by flag name.
|
195
|
-
_LOGGER.debug("Checking project folders for flags: %s", flag_text)
|
196
|
-
if all_folders:
|
197
|
-
files_by_flag = fetch_flag_files(
|
198
|
-
results_folder=self.prj.results_folder, flags=flags
|
199
|
-
)
|
200
|
-
else:
|
201
|
-
files_by_flag = fetch_flag_files(prj=self.prj, flags=flags)
|
202
|
-
|
203
|
-
# For each flag, output occurrence count.
|
204
|
-
for flag in flags:
|
205
|
-
_LOGGER.info("%s: %d", flag.upper(), len(files_by_flag[flag]))
|
206
|
-
|
207
|
-
# For each flag, output filepath(s) if not overly verbose.
|
208
|
-
for flag in flags:
|
209
|
-
try:
|
210
|
-
files = files_by_flag[flag]
|
211
|
-
except Exception as e:
|
212
|
-
_LOGGER.debug(
|
213
|
-
"No files for {} flag. Caught exception: {}".format(
|
214
|
-
flags, getattr(e, "message", repr(e))
|
215
|
-
)
|
216
|
-
)
|
217
|
-
continue
|
218
|
-
# If checking on a specific flag, do not limit the number of
|
219
|
-
# reported filepaths, but do not report empty file lists
|
220
|
-
if len(flags) == 1 and len(files) > 0:
|
221
|
-
_LOGGER.info("%s (%d):\n%s", flag.upper(), len(files), "\n".join(files))
|
222
|
-
# Regardless of whether 0-count flags are previously reported,
|
223
|
-
# don't report an empty file list for a flag that's absent.
|
224
|
-
# If the flag-to-files mapping is defaultdict, absent flag (key)
|
225
|
-
# will fetch an empty collection, so check for length of 0.
|
226
|
-
if 0 < len(files) <= max_file_count:
|
227
|
-
_LOGGER.info("%s (%d):\n%s", flag.upper(), len(files), "\n".join(files))
|
169
|
+
return status
|
228
170
|
|
229
171
|
|
230
172
|
class Cleaner(Executor):
|
@@ -270,7 +212,8 @@ class Cleaner(Executor):
|
|
270
212
|
return self(args, preview_flag=False)
|
271
213
|
|
272
214
|
|
273
|
-
|
215
|
+
# NOTE: Adding type hint -> Iterable[Any] gives me TypeError: 'ABCMeta' object is not subscriptable
|
216
|
+
def select_samples(prj: Project, args: argparse.Namespace):
|
274
217
|
"""Use CLI limit/skip arguments to select subset of project's samples."""
|
275
218
|
# TODO: get proper element type for signature.
|
276
219
|
num_samples = len(prj.samples)
|
@@ -310,7 +253,17 @@ class Destroyer(Executor):
|
|
310
253
|
_remove_or_dry_run(sample_output_folder, args.dry_run)
|
311
254
|
|
312
255
|
_LOGGER.info("Removing summary:")
|
313
|
-
|
256
|
+
use_pipestat = (
|
257
|
+
self.prj.pipestat_configured_project
|
258
|
+
if args.project
|
259
|
+
else self.prj.pipestat_configured
|
260
|
+
)
|
261
|
+
if use_pipestat:
|
262
|
+
destroy_summary(self.prj, args.dry_run, args.project)
|
263
|
+
else:
|
264
|
+
_LOGGER.warning(
|
265
|
+
"Pipestat must be configured to destroy any created summaries."
|
266
|
+
)
|
314
267
|
|
315
268
|
if not preview_flag:
|
316
269
|
_LOGGER.info("Destroy complete.")
|
@@ -354,6 +307,7 @@ class Collator(Executor):
|
|
354
307
|
arguments, recognized by looper
|
355
308
|
"""
|
356
309
|
jobs = 0
|
310
|
+
self.debug = {}
|
357
311
|
project_pifaces = self.prj.project_pipeline_interface_sources
|
358
312
|
if not project_pifaces:
|
359
313
|
raise MisconfigurationException(
|
@@ -399,6 +353,8 @@ class Collator(Executor):
|
|
399
353
|
jobs += conductor.num_job_submissions
|
400
354
|
_LOGGER.info("\nLooper finished")
|
401
355
|
_LOGGER.info("Jobs submitted: {}".format(jobs))
|
356
|
+
self.debug[DEBUG_JOBS] = jobs
|
357
|
+
return self.debug
|
402
358
|
|
403
359
|
|
404
360
|
class Runner(Executor):
|
@@ -415,6 +371,7 @@ class Runner(Executor):
|
|
415
371
|
:param bool rerun: whether the given sample is being rerun rather than
|
416
372
|
run for the first time
|
417
373
|
"""
|
374
|
+
self.debug = {} # initialize empty dict for return values
|
418
375
|
max_cmds = sum(list(map(len, self.prj._samples_by_interface.values())))
|
419
376
|
self.counter.total = max_cmds
|
420
377
|
failures = defaultdict(list) # Collect problems by sample.
|
@@ -453,6 +410,9 @@ class Runner(Executor):
|
|
453
410
|
submission_conductors[piface.pipe_iface_file] = conductor
|
454
411
|
|
455
412
|
_LOGGER.info(f"Pipestat compatible: {self.prj.pipestat_configured_project}")
|
413
|
+
self.debug["Pipestat compatible"] = (
|
414
|
+
self.prj.pipestat_configured_project or self.prj.pipestat_configured
|
415
|
+
)
|
456
416
|
|
457
417
|
for sample in select_samples(prj=self.prj, args=args):
|
458
418
|
pl_fails = []
|
@@ -474,10 +434,17 @@ class Runner(Executor):
|
|
474
434
|
try:
|
475
435
|
validate_sample(self.prj, sample.sample_name, schema_file)
|
476
436
|
except EidoValidationError as e:
|
477
|
-
_LOGGER.error(
|
437
|
+
_LOGGER.error(
|
438
|
+
f"Short-circuiting due to validation error!\nSchema file: "
|
439
|
+
f"{schema_file}\nError: {e}\n{list(e.errors_by_type.keys())}"
|
440
|
+
)
|
441
|
+
self.debug[DEBUG_EIDO_VALIDATION] = (
|
442
|
+
f"Short-circuiting due to validation error!\nSchema file: "
|
443
|
+
f"{schema_file}\nError: {e}\n{list(e.errors_by_type.keys())}"
|
444
|
+
)
|
478
445
|
return False
|
479
446
|
except RemoteYAMLError:
|
480
|
-
_LOGGER.
|
447
|
+
_LOGGER.warning(
|
481
448
|
f"Could not read remote schema, skipping '{sample.sample_name}' "
|
482
449
|
f"sample validation against {schema_file}"
|
483
450
|
)
|
@@ -518,9 +485,15 @@ class Runner(Executor):
|
|
518
485
|
)
|
519
486
|
)
|
520
487
|
_LOGGER.info("Commands submitted: {} of {}".format(cmd_sub_total, max_cmds))
|
521
|
-
|
488
|
+
self.debug[DEBUG_COMMANDS] = "{} of {}".format(cmd_sub_total, max_cmds)
|
522
489
|
if args.dry_run:
|
523
|
-
|
490
|
+
job_sub_total_if_real = job_sub_total
|
491
|
+
job_sub_total = 0
|
492
|
+
_LOGGER.info(
|
493
|
+
f"Dry run. No jobs were actually submitted, but {job_sub_total_if_real} would have been."
|
494
|
+
)
|
495
|
+
_LOGGER.info("Jobs submitted: {}".format(job_sub_total))
|
496
|
+
self.debug[DEBUG_JOBS] = job_sub_total
|
524
497
|
|
525
498
|
# Restructure sample/failure data for display.
|
526
499
|
samples_by_reason = defaultdict(set)
|
@@ -528,6 +501,7 @@ class Runner(Executor):
|
|
528
501
|
for sample, failures in failures.items():
|
529
502
|
for f in failures:
|
530
503
|
samples_by_reason[f].add(sample)
|
504
|
+
self.debug[f] = sample
|
531
505
|
# Collect samples by pipeline with submission failure.
|
532
506
|
for piface, conductor in submission_conductors.items():
|
533
507
|
# Don't add failure key if there are no samples that failed for
|
@@ -562,6 +536,8 @@ class Runner(Executor):
|
|
562
536
|
_LOGGER.debug("Raising SampleFailedException")
|
563
537
|
raise SampleFailedException
|
564
538
|
|
539
|
+
return self.debug
|
540
|
+
|
565
541
|
|
566
542
|
class Reporter(Executor):
|
567
543
|
"""Combine project outputs into a browsable HTML report"""
|
@@ -576,305 +552,82 @@ class Reporter(Executor):
|
|
576
552
|
print(psms)
|
577
553
|
for name, psm in psms.items():
|
578
554
|
# Summarize will generate the static HTML Report Function
|
579
|
-
psm.summarize()
|
555
|
+
report_directory = psm.summarize(looper_samples=self.prj.samples)
|
556
|
+
print(f"Report directory: {report_directory}")
|
580
557
|
else:
|
581
|
-
for
|
582
|
-
|
558
|
+
for piface_source_samples in self.prj._samples_by_piface(
|
559
|
+
self.prj.piface_key
|
560
|
+
).values():
|
561
|
+
# For each piface_key, we have a list of samples, but we only need one sample from the list to
|
562
|
+
# call the related pipestat manager object which will pull ALL samples when using psm.summarize
|
563
|
+
first_sample_name = list(piface_source_samples)[0]
|
564
|
+
psms = self.prj.get_pipestat_managers(
|
565
|
+
sample_name=first_sample_name, project_level=False
|
566
|
+
)
|
583
567
|
print(psms)
|
584
568
|
for name, psm in psms.items():
|
585
569
|
# Summarize will generate the static HTML Report Function
|
586
|
-
psm.summarize()
|
570
|
+
report_directory = psm.summarize(looper_samples=self.prj.samples)
|
571
|
+
print(f"Report directory: {report_directory}")
|
587
572
|
|
588
573
|
|
589
|
-
class
|
590
|
-
"""
|
574
|
+
class Linker(Executor):
|
575
|
+
"""Create symlinks for reported results. Requires pipestat to be configured."""
|
591
576
|
|
592
577
|
def __call__(self, args):
|
578
|
+
# initialize the report builder
|
579
|
+
p = self.prj
|
593
580
|
project_level = args.project
|
581
|
+
link_dir = args.output_dir
|
582
|
+
|
594
583
|
if project_level:
|
595
|
-
|
596
|
-
for
|
597
|
-
|
598
|
-
|
599
|
-
# pull together all the fits and stats from each sample into
|
600
|
-
# project-combined spreadsheets.
|
601
|
-
self.stats = _create_stats_summary(
|
602
|
-
self.prj, pipeline_name, project_level, self.counter
|
603
|
-
)
|
604
|
-
self.objs = _create_obj_summary(
|
605
|
-
self.prj, pipeline_name, project_level, self.counter
|
606
|
-
)
|
584
|
+
psms = self.prj.get_pipestat_managers(project_level=True)
|
585
|
+
for name, psm in psms.items():
|
586
|
+
linked_results_path = psm.link(link_dir=link_dir)
|
587
|
+
print(f"Linked directory: {linked_results_path}")
|
607
588
|
else:
|
608
|
-
for
|
589
|
+
for piface_source_samples in self.prj._samples_by_piface(
|
609
590
|
self.prj.piface_key
|
610
|
-
).
|
611
|
-
#
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
self.prj, pipeline_name, project_level, self.counter
|
617
|
-
)
|
618
|
-
self.objs = _create_obj_summary(
|
619
|
-
self.prj, pipeline_name, project_level, self.counter
|
591
|
+
).values():
|
592
|
+
# For each piface_key, we have a list of samples, but we only need one sample from the list to
|
593
|
+
# call the related pipestat manager object which will pull ALL samples when using psm.summarize
|
594
|
+
first_sample_name = list(piface_source_samples)[0]
|
595
|
+
psms = self.prj.get_pipestat_managers(
|
596
|
+
sample_name=first_sample_name, project_level=False
|
620
597
|
)
|
621
|
-
|
598
|
+
for name, psm in psms.items():
|
599
|
+
linked_results_path = psm.link(link_dir=link_dir)
|
600
|
+
print(f"Linked directory: {linked_results_path}")
|
622
601
|
|
623
602
|
|
624
|
-
|
625
|
-
"""
|
626
|
-
Create stats spreadsheet and columns to be considered in the report, save
|
627
|
-
the spreadsheet to file
|
628
|
-
|
629
|
-
:param looper.Project project: the project to be summarized
|
630
|
-
:param str pipeline_name: name of the pipeline to tabulate results for
|
631
|
-
:param bool project_level: whether the project-level pipeline resutlts
|
632
|
-
should be tabulated
|
633
|
-
:param looper.LooperCounter counter: a counter object
|
634
|
-
"""
|
635
|
-
# Create stats_summary file
|
636
|
-
columns = set()
|
637
|
-
stats = []
|
638
|
-
_LOGGER.info("Creating stats summary")
|
639
|
-
if project_level:
|
640
|
-
_LOGGER.info(
|
641
|
-
counter.show(name=project.name, type="project", pipeline_name=pipeline_name)
|
642
|
-
)
|
643
|
-
reported_stats = {"project_name": project.name}
|
644
|
-
results = fetch_pipeline_results(
|
645
|
-
project=project,
|
646
|
-
pipeline_name=pipeline_name,
|
647
|
-
inclusion_fun=lambda x: x not in OBJECT_TYPES,
|
648
|
-
)
|
649
|
-
reported_stats.update(results)
|
650
|
-
stats.append(reported_stats)
|
651
|
-
columns |= set(reported_stats.keys())
|
603
|
+
class Tabulator(Executor):
|
604
|
+
"""Project/Sample statistics and table output generator
|
652
605
|
|
653
|
-
|
654
|
-
for sample in project.samples:
|
655
|
-
sn = sample.sample_name
|
656
|
-
_LOGGER.info(counter.show(sn, pipeline_name))
|
657
|
-
reported_stats = {project.sample_table_index: sn}
|
658
|
-
results = fetch_pipeline_results(
|
659
|
-
project=project,
|
660
|
-
pipeline_name=pipeline_name,
|
661
|
-
sample_name=sn,
|
662
|
-
inclusion_fun=lambda x: x not in OBJECT_TYPES,
|
663
|
-
)
|
664
|
-
reported_stats.update(results)
|
665
|
-
stats.append(reported_stats)
|
666
|
-
columns |= set(reported_stats.keys())
|
667
|
-
|
668
|
-
tsv_outfile_path = get_file_for_project(project, pipeline_name, "stats_summary.tsv")
|
669
|
-
tsv_outfile = open(tsv_outfile_path, "w")
|
670
|
-
tsv_writer = csv.DictWriter(
|
671
|
-
tsv_outfile, fieldnames=list(columns), delimiter="\t", extrasaction="ignore"
|
672
|
-
)
|
673
|
-
tsv_writer.writeheader()
|
674
|
-
for row in stats:
|
675
|
-
tsv_writer.writerow(row)
|
676
|
-
tsv_outfile.close()
|
677
|
-
_LOGGER.info(
|
678
|
-
f"'{pipeline_name}' pipeline stats summary (n={len(stats)}):"
|
679
|
-
f" {tsv_outfile_path}"
|
680
|
-
)
|
681
|
-
counter.reset()
|
682
|
-
return stats
|
683
|
-
|
684
|
-
|
685
|
-
def _create_obj_summary(project, pipeline_name, project_level, counter):
|
606
|
+
:return list[str|any] results: list containing output file paths of stats and objects
|
686
607
|
"""
|
687
|
-
Read sample specific objects files and save to a data frame
|
688
|
-
|
689
|
-
:param looper.Project project: the project to be summarized
|
690
|
-
:param str pipeline_name: name of the pipeline to tabulate results for
|
691
|
-
:param looper.LooperCounter counter: a counter object
|
692
|
-
:param bool project_level: whether the project-level pipeline resutlts
|
693
|
-
should be tabulated
|
694
|
-
"""
|
695
|
-
_LOGGER.info("Creating objects summary")
|
696
|
-
reported_objects = {}
|
697
|
-
if project_level:
|
698
|
-
_LOGGER.info(
|
699
|
-
counter.show(name=project.name, type="project", pipeline_name=pipeline_name)
|
700
|
-
)
|
701
|
-
res = fetch_pipeline_results(
|
702
|
-
project=project,
|
703
|
-
pipeline_name=pipeline_name,
|
704
|
-
inclusion_fun=lambda x: x in OBJECT_TYPES,
|
705
|
-
)
|
706
|
-
# need to cast to a dict, since other mapping-like objects might
|
707
|
-
# cause issues when writing to the collective yaml file below
|
708
|
-
project_reported_objects = {k: dict(v) for k, v in res.items()}
|
709
|
-
reported_objects[project.name] = project_reported_objects
|
710
|
-
else:
|
711
|
-
for sample in project.samples:
|
712
|
-
sn = sample.sample_name
|
713
|
-
_LOGGER.info(counter.show(sn, pipeline_name))
|
714
|
-
res = fetch_pipeline_results(
|
715
|
-
project=project,
|
716
|
-
pipeline_name=pipeline_name,
|
717
|
-
sample_name=sn,
|
718
|
-
inclusion_fun=lambda x: x in OBJECT_TYPES,
|
719
|
-
)
|
720
|
-
# need to cast to a dict, since other mapping-like objects might
|
721
|
-
# cause issues when writing to the collective yaml file below
|
722
|
-
sample_reported_objects = {k: dict(v) for k, v in res.items()}
|
723
|
-
reported_objects[sn] = sample_reported_objects
|
724
|
-
objs_yaml_path = get_file_for_project(project, pipeline_name, "objs_summary.yaml")
|
725
|
-
with open(objs_yaml_path, "w") as outfile:
|
726
|
-
yaml.dump(reported_objects, outfile)
|
727
|
-
_LOGGER.info(
|
728
|
-
f"'{pipeline_name}' pipeline objects summary "
|
729
|
-
f"(n={len(reported_objects.keys())}): {objs_yaml_path}"
|
730
|
-
)
|
731
|
-
counter.reset()
|
732
|
-
return reported_objects
|
733
|
-
|
734
|
-
|
735
|
-
class ReportOld(Executor):
|
736
|
-
"""Combine project outputs into a browsable HTML report"""
|
737
|
-
|
738
|
-
def __init__(self, prj):
|
739
|
-
# call the inherited initialization
|
740
|
-
super(ReportOld, self).__init__(prj)
|
741
|
-
self.prj = prj
|
742
608
|
|
743
609
|
def __call__(self, args):
|
744
|
-
#
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
super(TableOld, self).__init__(prj)
|
761
|
-
self.prj = prj
|
762
|
-
|
763
|
-
def __call__(self):
|
764
|
-
def _create_stats_summary_old(project, counter):
|
765
|
-
"""
|
766
|
-
Create stats spreadsheet and columns to be considered in the report, save
|
767
|
-
the spreadsheet to file
|
768
|
-
:param looper.Project project: the project to be summarized
|
769
|
-
:param looper.LooperCounter counter: a counter object
|
770
|
-
"""
|
771
|
-
# Create stats_summary file
|
772
|
-
columns = []
|
773
|
-
stats = []
|
774
|
-
project_samples = project.samples
|
775
|
-
missing_files = []
|
776
|
-
_LOGGER.info("Creating stats summary...")
|
777
|
-
for sample in project_samples:
|
778
|
-
_LOGGER.info(counter.show(sample.sample_name, sample.protocol))
|
779
|
-
sample_output_folder = sample_folder(project, sample)
|
780
|
-
# Grab the basic info from the annotation sheet for this sample.
|
781
|
-
# This will correspond to a row in the output.
|
782
|
-
sample_stats = sample.get_sheet_dict()
|
783
|
-
columns.extend(sample_stats.keys())
|
784
|
-
# Version 0.3 standardized all stats into a single file
|
785
|
-
stats_file = os.path.join(sample_output_folder, "stats.tsv")
|
786
|
-
if not os.path.isfile(stats_file):
|
787
|
-
missing_files.append(stats_file)
|
788
|
-
continue
|
789
|
-
t = _pd.read_csv(
|
790
|
-
stats_file, sep="\t", header=None, names=["key", "value", "pl"]
|
791
|
-
)
|
792
|
-
t.drop_duplicates(subset=["key", "pl"], keep="last", inplace=True)
|
793
|
-
t.loc[:, "plkey"] = t["pl"] + ":" + t["key"]
|
794
|
-
dupes = t.duplicated(subset=["key"], keep=False)
|
795
|
-
t.loc[dupes, "key"] = t.loc[dupes, "plkey"]
|
796
|
-
sample_stats.update(t.set_index("key")["value"].to_dict())
|
797
|
-
stats.append(sample_stats)
|
798
|
-
columns.extend(t.key.tolist())
|
799
|
-
if missing_files:
|
800
|
-
_LOGGER.warning(
|
801
|
-
"Stats files missing for {} samples: {}".format(
|
802
|
-
len(missing_files), missing_files
|
803
|
-
)
|
804
|
-
)
|
805
|
-
tsv_outfile_path = get_file_for_project_old(project, "stats_summary.tsv")
|
806
|
-
tsv_outfile = open(tsv_outfile_path, "w")
|
807
|
-
tsv_writer = csv.DictWriter(
|
808
|
-
tsv_outfile,
|
809
|
-
fieldnames=uniqify(columns),
|
810
|
-
delimiter="\t",
|
811
|
-
extrasaction="ignore",
|
812
|
-
)
|
813
|
-
tsv_writer.writeheader()
|
814
|
-
for row in stats:
|
815
|
-
tsv_writer.writerow(row)
|
816
|
-
tsv_outfile.close()
|
817
|
-
_LOGGER.info(
|
818
|
-
"Statistics summary (n=" + str(len(stats)) + "): " + tsv_outfile_path
|
819
|
-
)
|
820
|
-
counter.reset()
|
821
|
-
return stats, uniqify(columns)
|
822
|
-
|
823
|
-
def _create_obj_summary_old(project, counter):
|
824
|
-
"""
|
825
|
-
Read sample specific objects files and save to a data frame
|
826
|
-
:param looper.Project project: the project to be summarized
|
827
|
-
:param looper.LooperCounter counter: a counter object
|
828
|
-
:return pandas.DataFrame: objects spreadsheet
|
829
|
-
"""
|
830
|
-
_LOGGER.info("Creating objects summary...")
|
831
|
-
objs = _pd.DataFrame()
|
832
|
-
# Create objects summary file
|
833
|
-
missing_files = []
|
834
|
-
for sample in project.samples:
|
835
|
-
# Process any reported objects
|
836
|
-
_LOGGER.info(counter.show(sample.sample_name, sample.protocol))
|
837
|
-
sample_output_folder = sample_folder(project, sample)
|
838
|
-
objs_file = os.path.join(sample_output_folder, "objects.tsv")
|
839
|
-
if not os.path.isfile(objs_file):
|
840
|
-
missing_files.append(objs_file)
|
841
|
-
continue
|
842
|
-
t = _pd.read_csv(
|
843
|
-
objs_file,
|
844
|
-
sep="\t",
|
845
|
-
header=None,
|
846
|
-
names=[
|
847
|
-
"key",
|
848
|
-
"filename",
|
849
|
-
"anchor_text",
|
850
|
-
"anchor_image",
|
851
|
-
"annotation",
|
852
|
-
],
|
853
|
-
)
|
854
|
-
t["sample_name"] = sample.sample_name
|
855
|
-
objs = objs.append(t, ignore_index=True)
|
856
|
-
if missing_files:
|
857
|
-
_LOGGER.warning(
|
858
|
-
"Object files missing for {} samples: {}".format(
|
859
|
-
len(missing_files), missing_files
|
860
|
-
)
|
610
|
+
# p = self.prj
|
611
|
+
project_level = args.project
|
612
|
+
results = []
|
613
|
+
if project_level:
|
614
|
+
psms = self.prj.get_pipestat_managers(project_level=True)
|
615
|
+
for name, psm in psms.items():
|
616
|
+
results = psm.table()
|
617
|
+
else:
|
618
|
+
for piface_source_samples in self.prj._samples_by_piface(
|
619
|
+
self.prj.piface_key
|
620
|
+
).values():
|
621
|
+
# For each piface_key, we have a list of samples, but we only need one sample from the list to
|
622
|
+
# call the related pipestat manager object which will pull ALL samples when using psm.table
|
623
|
+
first_sample_name = list(piface_source_samples)[0]
|
624
|
+
psms = self.prj.get_pipestat_managers(
|
625
|
+
sample_name=first_sample_name, project_level=False
|
861
626
|
)
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
"Objects summary (n="
|
867
|
-
+ str(len(project.samples) - len(missing_files))
|
868
|
-
+ "): "
|
869
|
-
+ objs_file
|
870
|
-
)
|
871
|
-
return objs
|
872
|
-
|
873
|
-
# pull together all the fits and stats from each sample into
|
874
|
-
# project-combined spreadsheets.
|
875
|
-
self.stats, self.columns = _create_stats_summary_old(self.prj, self.counter)
|
876
|
-
self.objs = _create_obj_summary_old(self.prj, self.counter)
|
877
|
-
return self
|
627
|
+
for name, psm in psms.items():
|
628
|
+
results = psm.table()
|
629
|
+
# Results contains paths to stats and object summaries.
|
630
|
+
return results
|
878
631
|
|
879
632
|
|
880
633
|
def _create_failure_message(reason, samples):
|
@@ -889,7 +642,7 @@ def _remove_or_dry_run(paths, dry_run=False):
|
|
889
642
|
|
890
643
|
:param list|str paths: list of paths to files/dirs to be removed
|
891
644
|
:param bool dry_run: logical indicating whether the files should remain
|
892
|
-
untouched and
|
645
|
+
untouched and message printed
|
893
646
|
"""
|
894
647
|
paths = paths if isinstance(paths, list) else [paths]
|
895
648
|
for path in paths:
|
@@ -906,20 +659,70 @@ def _remove_or_dry_run(paths, dry_run=False):
|
|
906
659
|
_LOGGER.info(path + " does not exist.")
|
907
660
|
|
908
661
|
|
909
|
-
def destroy_summary(prj, dry_run=False):
|
662
|
+
def destroy_summary(prj, dry_run=False, project_level=False):
|
910
663
|
"""
|
911
664
|
Delete the summary files if not in dry run mode
|
665
|
+
This function is for use with pipestat configured projects.
|
912
666
|
"""
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
667
|
+
|
668
|
+
if project_level:
|
669
|
+
psms = prj.get_pipestat_managers(project_level=True)
|
670
|
+
for name, psm in psms.items():
|
671
|
+
_remove_or_dry_run(
|
672
|
+
[
|
673
|
+
get_file_for_project(
|
674
|
+
psm,
|
675
|
+
pipeline_name=psm["_pipeline_name"],
|
676
|
+
directory="reports",
|
677
|
+
),
|
678
|
+
get_file_for_table(
|
679
|
+
psm,
|
680
|
+
pipeline_name=psm["_pipeline_name"],
|
681
|
+
appendix="stats_summary.tsv",
|
682
|
+
),
|
683
|
+
get_file_for_table(
|
684
|
+
psm,
|
685
|
+
pipeline_name=psm["_pipeline_name"],
|
686
|
+
appendix="objs_summary.yaml",
|
687
|
+
),
|
688
|
+
get_file_for_table(
|
689
|
+
psm, pipeline_name=psm["_pipeline_name"], appendix="reports"
|
690
|
+
),
|
691
|
+
],
|
692
|
+
dry_run,
|
693
|
+
)
|
694
|
+
else:
|
695
|
+
for piface_source_samples in prj._samples_by_piface(prj.piface_key).values():
|
696
|
+
# For each piface_key, we have a list of samples, but we only need one sample from the list to
|
697
|
+
# call the related pipestat manager object which will pull ALL samples when using psm.table
|
698
|
+
first_sample_name = list(piface_source_samples)[0]
|
699
|
+
psms = prj.get_pipestat_managers(
|
700
|
+
sample_name=first_sample_name, project_level=False
|
701
|
+
)
|
702
|
+
for name, psm in psms.items():
|
703
|
+
_remove_or_dry_run(
|
704
|
+
[
|
705
|
+
get_file_for_project(
|
706
|
+
psm,
|
707
|
+
pipeline_name=psm["_pipeline_name"],
|
708
|
+
directory="reports",
|
709
|
+
),
|
710
|
+
get_file_for_table(
|
711
|
+
psm,
|
712
|
+
pipeline_name=psm["_pipeline_name"],
|
713
|
+
appendix="stats_summary.tsv",
|
714
|
+
),
|
715
|
+
get_file_for_table(
|
716
|
+
psm,
|
717
|
+
pipeline_name=psm["_pipeline_name"],
|
718
|
+
appendix="objs_summary.yaml",
|
719
|
+
),
|
720
|
+
get_file_for_table(
|
721
|
+
psm, pipeline_name=psm["_pipeline_name"], appendix="reports"
|
722
|
+
),
|
723
|
+
],
|
724
|
+
dry_run,
|
725
|
+
)
|
923
726
|
|
924
727
|
|
925
728
|
class LooperCounter(object):
|
@@ -972,241 +775,3 @@ def _submission_status_text(
|
|
972
775
|
if pipeline_name:
|
973
776
|
txt += f"; pipeline: {pipeline_name}"
|
974
777
|
return txt + Style.RESET_ALL
|
975
|
-
|
976
|
-
|
977
|
-
def _proc_resources_spec(args):
|
978
|
-
"""
|
979
|
-
Process CLI-sources compute setting specification. There are two sources
|
980
|
-
of compute settings in the CLI alone:
|
981
|
-
* YAML file (--settings argument)
|
982
|
-
* itemized compute settings (--compute argument)
|
983
|
-
|
984
|
-
The itemized compute specification is given priority
|
985
|
-
|
986
|
-
:param argparse.Namespace: arguments namespace
|
987
|
-
:return Mapping[str, str]: binding between resource setting name and value
|
988
|
-
:raise ValueError: if interpretation of the given specification as encoding
|
989
|
-
of key-value pairs fails
|
990
|
-
"""
|
991
|
-
spec = getattr(args, "compute", None)
|
992
|
-
try:
|
993
|
-
settings_data = read_yaml_file(args.settings) or {}
|
994
|
-
except yaml.YAMLError:
|
995
|
-
_LOGGER.warning(
|
996
|
-
"Settings file ({}) does not follow YAML format,"
|
997
|
-
" disregarding".format(args.settings)
|
998
|
-
)
|
999
|
-
settings_data = {}
|
1000
|
-
if not spec:
|
1001
|
-
return settings_data
|
1002
|
-
pairs = [(kv, kv.split("=")) for kv in spec]
|
1003
|
-
bads = []
|
1004
|
-
for orig, pair in pairs:
|
1005
|
-
try:
|
1006
|
-
k, v = pair
|
1007
|
-
except ValueError:
|
1008
|
-
bads.append(orig)
|
1009
|
-
else:
|
1010
|
-
settings_data[k] = v
|
1011
|
-
if bads:
|
1012
|
-
raise ValueError(
|
1013
|
-
"Could not correctly parse itemized compute specification. "
|
1014
|
-
"Correct format: " + EXAMPLE_COMPUTE_SPEC_FMT
|
1015
|
-
)
|
1016
|
-
return settings_data
|
1017
|
-
|
1018
|
-
|
1019
|
-
def main(test_args=None):
|
1020
|
-
"""Primary workflow"""
|
1021
|
-
global _LOGGER
|
1022
|
-
|
1023
|
-
parser, aux_parser = build_parser()
|
1024
|
-
aux_parser.suppress_defaults()
|
1025
|
-
|
1026
|
-
if test_args:
|
1027
|
-
args, remaining_args = parser.parse_known_args(args=test_args)
|
1028
|
-
else:
|
1029
|
-
args, remaining_args = parser.parse_known_args()
|
1030
|
-
|
1031
|
-
cli_use_errors = validate_post_parse(args)
|
1032
|
-
if cli_use_errors:
|
1033
|
-
parser.print_help(sys.stderr)
|
1034
|
-
parser.error(
|
1035
|
-
f"{len(cli_use_errors)} CLI use problem(s): {', '.join(cli_use_errors)}"
|
1036
|
-
)
|
1037
|
-
if args.command is None:
|
1038
|
-
parser.print_help(sys.stderr)
|
1039
|
-
sys.exit(1)
|
1040
|
-
if "config_file" in vars(args):
|
1041
|
-
if args.config_file is None:
|
1042
|
-
msg = "No project config defined (peppy)"
|
1043
|
-
try:
|
1044
|
-
if args.looper_config:
|
1045
|
-
looper_config_dict = read_looper_config_file(args.looper_config)
|
1046
|
-
else:
|
1047
|
-
looper_config_dict = read_looper_dotfile()
|
1048
|
-
print(
|
1049
|
-
msg + f", using: {read_looper_dotfile()}. "
|
1050
|
-
f"Read from dotfile ({dotfile_path()})."
|
1051
|
-
)
|
1052
|
-
|
1053
|
-
for looper_config_key, looper_config_item in looper_config_dict.items():
|
1054
|
-
setattr(args, looper_config_key, looper_config_item)
|
1055
|
-
|
1056
|
-
except OSError:
|
1057
|
-
print(msg + f" and dotfile does not exist: {dotfile_path()}")
|
1058
|
-
parser.print_help(sys.stderr)
|
1059
|
-
sys.exit(1)
|
1060
|
-
else:
|
1061
|
-
_LOGGER.warning(
|
1062
|
-
"The Looper config specification through the PEP project is deprecated and will "
|
1063
|
-
"be removed in future versions. Please use the new running method by "
|
1064
|
-
f"utilizing a looper config file. For more information: {'here is more information'} "
|
1065
|
-
)
|
1066
|
-
|
1067
|
-
if args.command == "init":
|
1068
|
-
sys.exit(
|
1069
|
-
int(
|
1070
|
-
not init_dotfile(
|
1071
|
-
dotfile_path(),
|
1072
|
-
args.config_file,
|
1073
|
-
args.output_dir,
|
1074
|
-
args.sample_pipeline_interfaces,
|
1075
|
-
args.project_pipeline_interfaces,
|
1076
|
-
args.force,
|
1077
|
-
)
|
1078
|
-
)
|
1079
|
-
)
|
1080
|
-
|
1081
|
-
if args.command == "init-piface":
|
1082
|
-
sys.exit(int(not init_generic_pipeline()))
|
1083
|
-
|
1084
|
-
args = enrich_args_via_cfg(args, aux_parser, test_args)
|
1085
|
-
|
1086
|
-
# If project pipeline interface defined in the cli, change name to: "pipeline_interface"
|
1087
|
-
if vars(args)[PROJECT_PL_ARG]:
|
1088
|
-
args.pipeline_interfaces = vars(args)[PROJECT_PL_ARG]
|
1089
|
-
|
1090
|
-
_LOGGER = logmuse.logger_via_cli(args, make_root=True)
|
1091
|
-
|
1092
|
-
_LOGGER.info("Looper version: {}\nCommand: {}".format(__version__, args.command))
|
1093
|
-
|
1094
|
-
if len(remaining_args) > 0:
|
1095
|
-
_LOGGER.warning(
|
1096
|
-
"Unrecognized arguments: {}".format(
|
1097
|
-
" ".join([str(x) for x in remaining_args])
|
1098
|
-
)
|
1099
|
-
)
|
1100
|
-
|
1101
|
-
divcfg = (
|
1102
|
-
select_divvy_config(filepath=args.divvy) if hasattr(args, "divvy") else None
|
1103
|
-
)
|
1104
|
-
|
1105
|
-
# Initialize project
|
1106
|
-
if is_registry_path(args.config_file):
|
1107
|
-
if vars(args)[SAMPLE_PL_ARG]:
|
1108
|
-
p = Project(
|
1109
|
-
amendments=args.amend,
|
1110
|
-
divcfg_path=divcfg,
|
1111
|
-
runp=args.command == "runp",
|
1112
|
-
project_dict=PEPHubClient()._load_raw_pep(
|
1113
|
-
registry_path=args.config_file
|
1114
|
-
),
|
1115
|
-
**{
|
1116
|
-
attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args
|
1117
|
-
},
|
1118
|
-
)
|
1119
|
-
else:
|
1120
|
-
raise MisconfigurationException(
|
1121
|
-
f"`sample_pipeline_interface` is missing. Provide it in the parameters."
|
1122
|
-
)
|
1123
|
-
else:
|
1124
|
-
try:
|
1125
|
-
p = Project(
|
1126
|
-
cfg=args.config_file,
|
1127
|
-
amendments=args.amend,
|
1128
|
-
divcfg_path=divcfg,
|
1129
|
-
runp=args.command == "runp",
|
1130
|
-
**{
|
1131
|
-
attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args
|
1132
|
-
},
|
1133
|
-
)
|
1134
|
-
except yaml.parser.ParserError as e:
|
1135
|
-
_LOGGER.error(f"Project config parse failed -- {e}")
|
1136
|
-
sys.exit(1)
|
1137
|
-
|
1138
|
-
selected_compute_pkg = p.selected_compute_package or DEFAULT_COMPUTE_RESOURCES_NAME
|
1139
|
-
if p.dcc is not None and not p.dcc.activate_package(selected_compute_pkg):
|
1140
|
-
_LOGGER.info(
|
1141
|
-
"Failed to activate '{}' computing package. "
|
1142
|
-
"Using the default one".format(selected_compute_pkg)
|
1143
|
-
)
|
1144
|
-
|
1145
|
-
with ProjectContext(
|
1146
|
-
prj=p,
|
1147
|
-
selector_attribute=args.sel_attr,
|
1148
|
-
selector_include=args.sel_incl,
|
1149
|
-
selector_exclude=args.sel_excl,
|
1150
|
-
) as prj:
|
1151
|
-
if args.command in ["run", "rerun"]:
|
1152
|
-
run = Runner(prj)
|
1153
|
-
try:
|
1154
|
-
compute_kwargs = _proc_resources_spec(args)
|
1155
|
-
run(args, rerun=(args.command == "rerun"), **compute_kwargs)
|
1156
|
-
except SampleFailedException:
|
1157
|
-
sys.exit(1)
|
1158
|
-
except IOError:
|
1159
|
-
_LOGGER.error(
|
1160
|
-
"{} pipeline_interfaces: '{}'".format(
|
1161
|
-
prj.__class__.__name__, prj.pipeline_interface_sources
|
1162
|
-
)
|
1163
|
-
)
|
1164
|
-
raise
|
1165
|
-
|
1166
|
-
if args.command == "runp":
|
1167
|
-
compute_kwargs = _proc_resources_spec(args)
|
1168
|
-
collate = Collator(prj)
|
1169
|
-
collate(args, **compute_kwargs)
|
1170
|
-
|
1171
|
-
if args.command == "destroy":
|
1172
|
-
return Destroyer(prj)(args)
|
1173
|
-
|
1174
|
-
# pipestat support introduces breaking changes and pipelines run
|
1175
|
-
# with no pipestat reporting would not be compatible with
|
1176
|
-
# commands: table, report and check. Therefore we plan maintain
|
1177
|
-
# the old implementations for a couple of releases.
|
1178
|
-
if hasattr(args, "project"):
|
1179
|
-
use_pipestat = (
|
1180
|
-
prj.pipestat_configured_project
|
1181
|
-
if args.project
|
1182
|
-
else prj.pipestat_configured
|
1183
|
-
)
|
1184
|
-
if args.command == "table":
|
1185
|
-
if use_pipestat:
|
1186
|
-
Tabulator(prj)(args)
|
1187
|
-
else:
|
1188
|
-
TableOld(prj)()
|
1189
|
-
|
1190
|
-
if args.command == "report":
|
1191
|
-
if use_pipestat:
|
1192
|
-
Reporter(prj)(args)
|
1193
|
-
else:
|
1194
|
-
ReportOld(prj)(args)
|
1195
|
-
|
1196
|
-
if args.command == "check":
|
1197
|
-
if use_pipestat:
|
1198
|
-
Checker(prj)(args)
|
1199
|
-
else:
|
1200
|
-
CheckerOld(prj)(flags=args.flags)
|
1201
|
-
|
1202
|
-
if args.command == "clean":
|
1203
|
-
return Cleaner(prj)(args)
|
1204
|
-
|
1205
|
-
if args.command == "inspect":
|
1206
|
-
inspect_project(p, args.sample_names, args.attr_limit)
|
1207
|
-
from warnings import warn
|
1208
|
-
|
1209
|
-
warn(
|
1210
|
-
"The inspect feature has moved to eido and will be removed in the future release of looper. "
|
1211
|
-
"Use `eido inspect` from now on.",
|
1212
|
-
)
|