looper 1.7.1__py3-none-any.whl → 1.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
looper/looper.py CHANGED
@@ -6,6 +6,7 @@ Looper: a pipeline submission engine. https://github.com/pepkit/looper
6
6
  import abc
7
7
  import argparse
8
8
  import csv
9
+ import glob
9
10
  import logging
10
11
  import subprocess
11
12
  import yaml
@@ -15,6 +16,8 @@ import pandas as _pd
15
16
  # Need specific sequence of actions for colorama imports?
16
17
  from colorama import init
17
18
 
19
+ from .const import PipelineLevel
20
+
18
21
  init()
19
22
  from shutil import rmtree
20
23
 
@@ -45,7 +48,6 @@ from .utils import (
45
48
  sample_folder,
46
49
  )
47
50
  from pipestat.reports import get_file_for_table
48
- from pipestat.reports import get_file_for_project
49
51
 
50
52
  _PKGNAME = "looper"
51
53
  _LOGGER = logging.getLogger(_PKGNAME)
@@ -88,21 +90,29 @@ class Checker(Executor):
88
90
 
89
91
  # aggregate pipeline status data
90
92
  status = {}
91
- if args.project:
92
- psms = self.prj.get_pipestat_managers(project_level=True)
93
- for pipeline_name, psm in psms.items():
94
- s = psm.get_status() or "unknown"
95
- status.setdefault(pipeline_name, {})
96
- status[pipeline_name][self.prj.name] = s
97
- _LOGGER.debug(f"{self.prj.name} ({pipeline_name}): {s}")
93
+
94
+ psms = {}
95
+ if getattr(args, "project", None):
96
+
97
+ for piface in self.prj.project_pipeline_interfaces:
98
+ if piface.psm.pipeline_type == PipelineLevel.PROJECT.value:
99
+ psms[piface.psm.pipeline_name] = piface.psm
100
+ s = piface.psm.get_status() or "unknown"
101
+ status.setdefault(piface.psm.pipeline_name, {})
102
+ status[piface.psm.pipeline_name][self.prj.name] = s
103
+ _LOGGER.debug(f"{self.prj.name} ({piface.psm.pipeline_name}): {s}")
104
+
98
105
  else:
99
106
  for sample in self.prj.samples:
100
- psms = self.prj.get_pipestat_managers(sample_name=sample.sample_name)
101
- for pipeline_name, psm in psms.items():
102
- s = psm.get_status(record_identifier=sample.sample_name)
103
- status.setdefault(pipeline_name, {})
104
- status[pipeline_name][sample.sample_name] = s
105
- _LOGGER.debug(f"{sample.sample_name} ({pipeline_name}): {s}")
107
+ for piface in sample.project.pipeline_interfaces:
108
+ if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value:
109
+ psms[piface.psm.pipeline_name] = piface.psm
110
+ s = piface.psm.get_status(record_identifier=sample.sample_name)
111
+ status.setdefault(piface.psm.pipeline_name, {})
112
+ status[piface.psm.pipeline_name][sample.sample_name] = s
113
+ _LOGGER.debug(
114
+ f"{sample.sample_name} ({piface.psm.pipeline_name}): {s}"
115
+ )
106
116
 
107
117
  console = Console()
108
118
 
@@ -116,14 +126,14 @@ class Checker(Executor):
116
126
  )
117
127
  table.add_column(f"Status", justify="center")
118
128
  table.add_column("Jobs count/total jobs", justify="center")
119
- for status_id in psm.status_schema.keys():
129
+ for status_id in psms[pipeline_name].status_schema.keys():
120
130
  status_list = list(pipeline_status.values())
121
131
  if status_id in status_list:
122
132
  status_count = status_list.count(status_id)
123
133
  table.add_row(status_id, f"{status_count}/{len(status_list)}")
124
134
  console.print(table)
125
135
 
126
- if args.itemized:
136
+ if getattr(args, "itemized", None):
127
137
  for pipeline_name, pipeline_status in status.items():
128
138
  table_title = f"Pipeline: '{pipeline_name}'"
129
139
  table = Table(
@@ -141,7 +151,7 @@ class Checker(Executor):
141
151
  for name, status_id in pipeline_status.items():
142
152
  try:
143
153
  color = Color.from_rgb(
144
- *psm.status_schema[status_id]["color"]
154
+ *psms[pipeline_name].status_schema[status_id]["color"]
145
155
  ).name
146
156
  except KeyError:
147
157
  color = "#bcbcbc"
@@ -150,16 +160,17 @@ class Checker(Executor):
150
160
  console.print(table)
151
161
 
152
162
  if args.describe_codes:
163
+ # TODO this needs to be redone because it only takes the last psm in the list and gets status code and descriptions
153
164
  table = Table(
154
165
  show_header=True,
155
166
  header_style="bold magenta",
156
167
  title=f"Status codes description",
157
- width=len(psm.status_schema_source) + 20,
158
- caption=f"Descriptions source: {psm.status_schema_source}",
168
+ width=len(psms[pipeline_name].status_schema_source) + 20,
169
+ caption=f"Descriptions source: {psms[pipeline_name].status_schema_source}",
159
170
  )
160
171
  table.add_column("Status code", justify="center")
161
172
  table.add_column("Description", justify="left")
162
- for status, status_obj in psm.status_schema.items():
173
+ for status, status_obj in psms[pipeline_name].status_schema.items():
163
174
  if "description" in status_obj:
164
175
  desc = status_obj["description"]
165
176
  else:
@@ -199,10 +210,10 @@ class Cleaner(Executor):
199
210
  if not preview_flag:
200
211
  _LOGGER.info("Clean complete.")
201
212
  return 0
202
- if args.dry_run:
213
+ if getattr(args, "dry_run", None):
203
214
  _LOGGER.info("Dry run. No files cleaned.")
204
215
  return 0
205
- if not args.force_yes and not query_yes_no(
216
+ if not getattr(args, "force_yes", None) and not query_yes_no(
206
217
  "Are you sure you want to permanently delete all "
207
218
  "intermediate pipeline results for this project?"
208
219
  ):
@@ -241,8 +252,22 @@ class Destroyer(Executor):
241
252
  :param bool preview_flag: whether to halt before actually removing files
242
253
  """
243
254
 
244
- _LOGGER.info("Removing results:")
255
+ use_pipestat = (
256
+ self.prj.pipestat_configured_project
257
+ if getattr(args, "project", None)
258
+ else self.prj.pipestat_configured
259
+ )
260
+
261
+ if use_pipestat:
262
+ _LOGGER.info("Removing summary:")
263
+ destroy_summary(
264
+ self.prj,
265
+ dry_run=preview_flag,
266
+ project_level=getattr(args, "project", None),
267
+ )
245
268
 
269
+ _LOGGER.info("Removing results:")
270
+ psms = {}
246
271
  for sample in select_samples(prj=self.prj, args=args):
247
272
  _LOGGER.info(self.counter.show(sample.sample_name))
248
273
  sample_output_folder = sample_folder(self.prj, sample)
@@ -250,30 +275,26 @@ class Destroyer(Executor):
250
275
  # Preview: Don't actually delete, just show files.
251
276
  _LOGGER.info(str(sample_output_folder))
252
277
  else:
253
- _remove_or_dry_run(sample_output_folder, args.dry_run)
254
-
255
- _LOGGER.info("Removing summary:")
256
- use_pipestat = (
257
- self.prj.pipestat_configured_project
258
- if args.project
259
- else self.prj.pipestat_configured
260
- )
261
- if use_pipestat:
262
- destroy_summary(self.prj, args.dry_run, args.project)
263
- else:
264
- _LOGGER.warning(
265
- "Pipestat must be configured to destroy any created summaries."
266
- )
278
+ if use_pipestat:
279
+ for piface in sample.project.pipeline_interfaces:
280
+ if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value:
281
+ psms[piface.psm.pipeline_name] = piface.psm
282
+ for pipeline_name, psm in psms.items():
283
+ psm.backend.remove_record(
284
+ record_identifier=sample.sample_name, rm_record=True
285
+ )
286
+ else:
287
+ _remove_or_dry_run(sample_output_folder, args.dry_run)
267
288
 
268
289
  if not preview_flag:
269
290
  _LOGGER.info("Destroy complete.")
270
291
  return 0
271
292
 
272
- if args.dry_run:
293
+ if getattr(args, "dry_run", None):
273
294
  _LOGGER.info("Dry run. No files destroyed.")
274
295
  return 0
275
296
 
276
- if not args.force_yes and not query_yes_no(
297
+ if not getattr(args, "force_yes", None) and not query_yes_no(
277
298
  "Are you sure you want to permanently delete all pipeline "
278
299
  "results for this project?"
279
300
  ):
@@ -308,7 +329,7 @@ class Collator(Executor):
308
329
  """
309
330
  jobs = 0
310
331
  self.debug = {}
311
- project_pifaces = self.prj.project_pipeline_interface_sources
332
+ project_pifaces = self.prj.project_pipeline_interfaces
312
333
  if not project_pifaces:
313
334
  raise MisconfigurationException(
314
335
  "Looper requires a pointer to at least one project pipeline. "
@@ -318,36 +339,26 @@ class Collator(Executor):
318
339
  )
319
340
  self.counter = LooperCounter(len(project_pifaces))
320
341
  for project_piface in project_pifaces:
321
- try:
322
- project_piface_object = PipelineInterface(
323
- project_piface, pipeline_type="project"
324
- )
325
- except (IOError, ValidationError) as e:
326
- _LOGGER.warning(
327
- "Ignoring invalid pipeline interface source: {}. "
328
- "Caught exception: {}".format(
329
- project_piface, getattr(e, "message", repr(e))
330
- )
331
- )
332
- continue
333
342
  _LOGGER.info(
334
343
  self.counter.show(
335
344
  name=self.prj.name,
336
345
  type="project",
337
- pipeline_name=project_piface_object.pipeline_name,
346
+ pipeline_name=project_piface.pipeline_name,
338
347
  )
339
348
  )
340
349
  conductor = SubmissionConductor(
341
- pipeline_interface=project_piface_object,
350
+ pipeline_interface=project_piface,
342
351
  prj=self.prj,
343
352
  compute_variables=compute_kwargs,
344
- delay=args.time_delay,
345
- extra_args=args.command_extra,
346
- extra_args_override=args.command_extra_override,
347
- ignore_flags=args.ignore_flags,
353
+ delay=getattr(args, "time_delay", None),
354
+ extra_args=getattr(args, "command_extra", None),
355
+ extra_args_override=getattr(args, "command_extra_override", None),
356
+ ignore_flags=getattr(args, "ignore_flags", None),
348
357
  collate=True,
349
358
  )
350
- if conductor.is_project_submittable(force=args.ignore_flags):
359
+ if conductor.is_project_submittable(
360
+ force=getattr(args, "ignore_flags", None)
361
+ ):
351
362
  conductor._pool = [None]
352
363
  conductor.submit()
353
364
  jobs += conductor.num_job_submissions
@@ -360,7 +371,7 @@ class Collator(Executor):
360
371
  class Runner(Executor):
361
372
  """The true submitter of pipelines"""
362
373
 
363
- def __call__(self, args, rerun=False, **compute_kwargs):
374
+ def __call__(self, args, top_level_args=None, rerun=False, **compute_kwargs):
364
375
  """
365
376
  Do the Sample submission.
366
377
 
@@ -395,25 +406,24 @@ class Runner(Executor):
395
406
  )
396
407
 
397
408
  submission_conductors = {}
409
+
398
410
  for piface in self.prj.pipeline_interfaces:
399
411
  conductor = SubmissionConductor(
400
412
  pipeline_interface=piface,
401
413
  prj=self.prj,
402
414
  compute_variables=comp_vars,
403
- delay=args.time_delay,
404
- extra_args=args.command_extra,
405
- extra_args_override=args.command_extra_override,
406
- ignore_flags=args.ignore_flags,
407
- max_cmds=args.lump_n,
408
- max_size=args.lump_s,
409
- max_jobs=args.lump_j,
415
+ delay=getattr(args, "time_delay", None),
416
+ extra_args=getattr(args, "command_extra", None),
417
+ extra_args_override=getattr(args, "command_extra_override", None),
418
+ ignore_flags=getattr(args, "ignore_flags", None),
419
+ max_cmds=getattr(args, "lump_n", None),
420
+ max_size=getattr(args, "lump", None),
421
+ max_jobs=getattr(args, "lump_j", None),
410
422
  )
411
423
  submission_conductors[piface.pipe_iface_file] = conductor
412
424
 
413
- _LOGGER.info(f"Pipestat compatible: {self.prj.pipestat_configured_project}")
414
- self.debug["Pipestat compatible"] = (
415
- self.prj.pipestat_configured_project or self.prj.pipestat_configured
416
- )
425
+ _LOGGER.debug(f"Pipestat compatible: {self.prj.pipestat_configured}")
426
+ self.debug["Pipestat compatible"] = self.prj.pipestat_configured
417
427
 
418
428
  for sample in select_samples(prj=self.prj, args=args):
419
429
  pl_fails = []
@@ -485,15 +495,15 @@ class Runner(Executor):
485
495
  len(processed_samples), num_samples
486
496
  )
487
497
  )
488
- _LOGGER.info("Commands submitted: {} of {}".format(cmd_sub_total, max_cmds))
498
+ _LOGGER.debug("Commands submitted: {} of {}".format(cmd_sub_total, max_cmds))
489
499
  self.debug[DEBUG_COMMANDS] = "{} of {}".format(cmd_sub_total, max_cmds)
490
- if args.dry_run:
500
+ if getattr(args, "dry_run", None):
491
501
  job_sub_total_if_real = job_sub_total
492
502
  job_sub_total = 0
493
503
  _LOGGER.info(
494
504
  f"Dry run. No jobs were actually submitted, but {job_sub_total_if_real} would have been."
495
505
  )
496
- _LOGGER.info("Jobs submitted: {}".format(job_sub_total))
506
+ _LOGGER.debug("Jobs submitted: {}".format(job_sub_total))
497
507
  self.debug[DEBUG_JOBS] = job_sub_total
498
508
 
499
509
  # Restructure sample/failure data for display.
@@ -545,37 +555,35 @@ class Reporter(Executor):
545
555
 
546
556
  def __call__(self, args):
547
557
  # initialize the report builder
558
+ self.debug = {}
548
559
  p = self.prj
549
- project_level = args.project
560
+ project_level = getattr(args, "project", None)
550
561
 
551
562
  portable = args.portable
552
563
 
564
+ psms = {}
565
+
553
566
  if project_level:
554
- psms = self.prj.get_pipestat_managers(project_level=True)
555
- print(psms)
556
- for name, psm in psms.items():
557
- # Summarize will generate the static HTML Report Function
558
- report_directory = psm.summarize(
559
- looper_samples=self.prj.samples, portable=portable
560
- )
567
+
568
+ for piface in self.prj.project_pipeline_interfaces:
569
+ if piface.psm.pipeline_type == PipelineLevel.PROJECT.value:
570
+ psms[piface.psm.pipeline_name] = piface.psm
571
+ report_directory = piface.psm.summarize(
572
+ looper_samples=self.prj.samples, portable=portable
573
+ )
561
574
  print(f"Report directory: {report_directory}")
575
+ self.debug["report_directory"] = report_directory
576
+ return self.debug
562
577
  else:
563
- for piface_source_samples in self.prj._samples_by_piface(
564
- self.prj.piface_key
565
- ).values():
566
- # For each piface_key, we have a list of samples, but we only need one sample from the list to
567
- # call the related pipestat manager object which will pull ALL samples when using psm.summarize
568
- first_sample_name = list(piface_source_samples)[0]
569
- psms = self.prj.get_pipestat_managers(
570
- sample_name=first_sample_name, project_level=False
571
- )
572
- print(psms)
573
- for name, psm in psms.items():
574
- # Summarize will generate the static HTML Report Function
575
- report_directory = psm.summarize(
578
+ for piface in self.prj.pipeline_interfaces:
579
+ if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value:
580
+ psms[piface.psm.pipeline_name] = piface.psm
581
+ report_directory = piface.psm.summarize(
576
582
  looper_samples=self.prj.samples, portable=portable
577
583
  )
578
584
  print(f"Report directory: {report_directory}")
585
+ self.debug["report_directory"] = report_directory
586
+ return self.debug
579
587
 
580
588
 
581
589
  class Linker(Executor):
@@ -584,26 +592,22 @@ class Linker(Executor):
584
592
  def __call__(self, args):
585
593
  # initialize the report builder
586
594
  p = self.prj
587
- project_level = args.project
588
- link_dir = args.output_dir
595
+ project_level = getattr(args, "project", None)
596
+ link_dir = getattr(args, "output_dir", None)
597
+
598
+ psms = {}
589
599
 
590
600
  if project_level:
591
- psms = self.prj.get_pipestat_managers(project_level=True)
592
- for name, psm in psms.items():
593
- linked_results_path = psm.link(link_dir=link_dir)
594
- print(f"Linked directory: {linked_results_path}")
601
+ for piface in self.prj.project_pipeline_interfaces:
602
+ if piface.psm.pipeline_type == PipelineLevel.PROJECT.value:
603
+ psms[piface.psm.pipeline_name] = piface.psm
604
+ linked_results_path = piface.psm.link(link_dir=link_dir)
605
+ print(f"Linked directory: {linked_results_path}")
595
606
  else:
596
- for piface_source_samples in self.prj._samples_by_piface(
597
- self.prj.piface_key
598
- ).values():
599
- # For each piface_key, we have a list of samples, but we only need one sample from the list to
600
- # call the related pipestat manager object which will pull ALL samples when using psm.summarize
601
- first_sample_name = list(piface_source_samples)[0]
602
- psms = self.prj.get_pipestat_managers(
603
- sample_name=first_sample_name, project_level=False
604
- )
605
- for name, psm in psms.items():
606
- linked_results_path = psm.link(link_dir=link_dir)
607
+ for piface in self.prj.pipeline_interfaces:
608
+ if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value:
609
+ psms[piface.psm.pipeline_name] = piface.psm
610
+ linked_results_path = piface.psm.link(link_dir=link_dir)
607
611
  print(f"Linked directory: {linked_results_path}")
608
612
 
609
613
 
@@ -615,24 +619,19 @@ class Tabulator(Executor):
615
619
 
616
620
  def __call__(self, args):
617
621
  # p = self.prj
618
- project_level = args.project
622
+ project_level = getattr(args, "project", None)
619
623
  results = []
624
+ psms = {}
620
625
  if project_level:
621
- psms = self.prj.get_pipestat_managers(project_level=True)
622
- for name, psm in psms.items():
623
- results = psm.table()
626
+ for piface in self.prj.project_pipeline_interfaces:
627
+ if piface.psm.pipeline_type == PipelineLevel.PROJECT.value:
628
+ psms[piface.psm.pipeline_name] = piface.psm
629
+ results = piface.psm.table()
624
630
  else:
625
- for piface_source_samples in self.prj._samples_by_piface(
626
- self.prj.piface_key
627
- ).values():
628
- # For each piface_key, we have a list of samples, but we only need one sample from the list to
629
- # call the related pipestat manager object which will pull ALL samples when using psm.table
630
- first_sample_name = list(piface_source_samples)[0]
631
- psms = self.prj.get_pipestat_managers(
632
- sample_name=first_sample_name, project_level=False
633
- )
634
- for name, psm in psms.items():
635
- results = psm.table()
631
+ for piface in self.prj.pipeline_interfaces:
632
+ if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value:
633
+ psms[piface.psm.pipeline_name] = piface.psm
634
+ results = piface.psm.table()
636
635
  # Results contains paths to stats and object summaries.
637
636
  return results
638
637
 
@@ -672,64 +671,60 @@ def destroy_summary(prj, dry_run=False, project_level=False):
672
671
  This function is for use with pipestat configured projects.
673
672
  """
674
673
 
674
+ psms = {}
675
675
  if project_level:
676
- psms = prj.get_pipestat_managers(project_level=True)
676
+ for piface in prj.pipeline_interfaces:
677
+ if piface.psm.pipeline_type == PipelineLevel.PROJECT.value:
678
+ psms[piface.psm.pipeline_name] = piface.psm
679
+
677
680
  for name, psm in psms.items():
678
681
  _remove_or_dry_run(
679
682
  [
680
- get_file_for_project(
681
- psm,
682
- pipeline_name=psm["_pipeline_name"],
683
- directory="reports",
683
+ get_file_for_table(
684
+ psm, pipeline_name=psm.pipeline_name, directory="reports"
684
685
  ),
685
686
  get_file_for_table(
686
687
  psm,
687
- pipeline_name=psm["_pipeline_name"],
688
+ pipeline_name=psm.pipeline_name,
688
689
  appendix="stats_summary.tsv",
689
690
  ),
690
691
  get_file_for_table(
691
692
  psm,
692
- pipeline_name=psm["_pipeline_name"],
693
+ pipeline_name=psm.pipeline_name,
693
694
  appendix="objs_summary.yaml",
694
695
  ),
695
- get_file_for_table(
696
- psm, pipeline_name=psm["_pipeline_name"], appendix="reports"
696
+ os.path.join(
697
+ os.path.dirname(psm.config_path), "aggregate_results.yaml"
697
698
  ),
698
699
  ],
699
700
  dry_run,
700
701
  )
701
702
  else:
702
- for piface_source_samples in prj._samples_by_piface(prj.piface_key).values():
703
- # For each piface_key, we have a list of samples, but we only need one sample from the list to
704
- # call the related pipestat manager object which will pull ALL samples when using psm.table
705
- first_sample_name = list(piface_source_samples)[0]
706
- psms = prj.get_pipestat_managers(
707
- sample_name=first_sample_name, project_level=False
703
+ for piface in prj.pipeline_interfaces:
704
+ if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value:
705
+ psms[piface.psm.pipeline_name] = piface.psm
706
+ for name, psm in psms.items():
707
+ _remove_or_dry_run(
708
+ [
709
+ get_file_for_table(
710
+ psm, pipeline_name=psm.pipeline_name, directory="reports"
711
+ ),
712
+ get_file_for_table(
713
+ psm,
714
+ pipeline_name=psm.pipeline_name,
715
+ appendix="stats_summary.tsv",
716
+ ),
717
+ get_file_for_table(
718
+ psm,
719
+ pipeline_name=psm.pipeline_name,
720
+ appendix="objs_summary.yaml",
721
+ ),
722
+ os.path.join(
723
+ os.path.dirname(psm.config_path), "aggregate_results.yaml"
724
+ ),
725
+ ],
726
+ dry_run,
708
727
  )
709
- for name, psm in psms.items():
710
- _remove_or_dry_run(
711
- [
712
- get_file_for_project(
713
- psm,
714
- pipeline_name=psm["_pipeline_name"],
715
- directory="reports",
716
- ),
717
- get_file_for_table(
718
- psm,
719
- pipeline_name=psm["_pipeline_name"],
720
- appendix="stats_summary.tsv",
721
- ),
722
- get_file_for_table(
723
- psm,
724
- pipeline_name=psm["_pipeline_name"],
725
- appendix="objs_summary.yaml",
726
- ),
727
- get_file_for_table(
728
- psm, pipeline_name=psm["_pipeline_name"], appendix="reports"
729
- ),
730
- ],
731
- dry_run,
732
- )
733
728
 
734
729
 
735
730
  class LooperCounter(object):