looper 1.7.0a1__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
looper/looper.py CHANGED
@@ -6,6 +6,7 @@ Looper: a pipeline submission engine. https://github.com/pepkit/looper
6
6
  import abc
7
7
  import argparse
8
8
  import csv
9
+ import glob
9
10
  import logging
10
11
  import subprocess
11
12
  import yaml
@@ -15,6 +16,8 @@ import pandas as _pd
15
16
  # Need specific sequence of actions for colorama imports?
16
17
  from colorama import init
17
18
 
19
+ from .const import PipelineLevel
20
+
18
21
  init()
19
22
  from shutil import rmtree
20
23
 
@@ -45,7 +48,6 @@ from .utils import (
45
48
  sample_folder,
46
49
  )
47
50
  from pipestat.reports import get_file_for_table
48
- from pipestat.reports import get_file_for_project
49
51
 
50
52
  _PKGNAME = "looper"
51
53
  _LOGGER = logging.getLogger(_PKGNAME)
@@ -88,21 +90,29 @@ class Checker(Executor):
88
90
 
89
91
  # aggregate pipeline status data
90
92
  status = {}
91
- if args.project:
92
- psms = self.prj.get_pipestat_managers(project_level=True)
93
- for pipeline_name, psm in psms.items():
94
- s = psm.get_status() or "unknown"
95
- status.setdefault(pipeline_name, {})
96
- status[pipeline_name][self.prj.name] = s
97
- _LOGGER.debug(f"{self.prj.name} ({pipeline_name}): {s}")
93
+
94
+ psms = {}
95
+ if getattr(args, "project", None):
96
+
97
+ for piface in self.prj.pipeline_interfaces:
98
+ if piface.psm.pipeline_type == PipelineLevel.PROJECT.value:
99
+ psms[piface.psm.pipeline_name] = piface.psm
100
+ s = piface.psm.get_status() or "unknown"
101
+ status.setdefault(piface.psm.pipeline_name, {})
102
+ status[piface.psm.pipeline_name][self.prj.name] = s
103
+ _LOGGER.debug(f"{self.prj.name} ({piface.psm.pipeline_name}): {s}")
104
+
98
105
  else:
99
106
  for sample in self.prj.samples:
100
- psms = self.prj.get_pipestat_managers(sample_name=sample.sample_name)
101
- for pipeline_name, psm in psms.items():
102
- s = psm.get_status(record_identifier=sample.sample_name)
103
- status.setdefault(pipeline_name, {})
104
- status[pipeline_name][sample.sample_name] = s
105
- _LOGGER.debug(f"{sample.sample_name} ({pipeline_name}): {s}")
107
+ for piface in sample.project.pipeline_interfaces:
108
+ if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value:
109
+ psms[piface.psm.pipeline_name] = piface.psm
110
+ s = piface.psm.get_status(record_identifier=sample.sample_name)
111
+ status.setdefault(piface.psm.pipeline_name, {})
112
+ status[piface.psm.pipeline_name][sample.sample_name] = s
113
+ _LOGGER.debug(
114
+ f"{sample.sample_name} ({piface.psm.pipeline_name}): {s}"
115
+ )
106
116
 
107
117
  console = Console()
108
118
 
@@ -116,14 +126,14 @@ class Checker(Executor):
116
126
  )
117
127
  table.add_column(f"Status", justify="center")
118
128
  table.add_column("Jobs count/total jobs", justify="center")
119
- for status_id in psm.status_schema.keys():
129
+ for status_id in psms[pipeline_name].status_schema.keys():
120
130
  status_list = list(pipeline_status.values())
121
131
  if status_id in status_list:
122
132
  status_count = status_list.count(status_id)
123
133
  table.add_row(status_id, f"{status_count}/{len(status_list)}")
124
134
  console.print(table)
125
135
 
126
- if args.itemized:
136
+ if getattr(args, "itemized", None):
127
137
  for pipeline_name, pipeline_status in status.items():
128
138
  table_title = f"Pipeline: '{pipeline_name}'"
129
139
  table = Table(
@@ -141,7 +151,7 @@ class Checker(Executor):
141
151
  for name, status_id in pipeline_status.items():
142
152
  try:
143
153
  color = Color.from_rgb(
144
- *psm.status_schema[status_id]["color"]
154
+ *psms[pipeline_name].status_schema[status_id]["color"]
145
155
  ).name
146
156
  except KeyError:
147
157
  color = "#bcbcbc"
@@ -150,16 +160,17 @@ class Checker(Executor):
150
160
  console.print(table)
151
161
 
152
162
  if args.describe_codes:
163
+ # TODO this needs to be redone because it only takes the last psm in the list and gets status code and descriptions
153
164
  table = Table(
154
165
  show_header=True,
155
166
  header_style="bold magenta",
156
167
  title=f"Status codes description",
157
- width=len(psm.status_schema_source) + 20,
158
- caption=f"Descriptions source: {psm.status_schema_source}",
168
+ width=len(psms[pipeline_name].status_schema_source) + 20,
169
+ caption=f"Descriptions source: {psms[pipeline_name].status_schema_source}",
159
170
  )
160
171
  table.add_column("Status code", justify="center")
161
172
  table.add_column("Description", justify="left")
162
- for status, status_obj in psm.status_schema.items():
173
+ for status, status_obj in psms[pipeline_name].status_schema.items():
163
174
  if "description" in status_obj:
164
175
  desc = status_obj["description"]
165
176
  else:
@@ -199,10 +210,10 @@ class Cleaner(Executor):
199
210
  if not preview_flag:
200
211
  _LOGGER.info("Clean complete.")
201
212
  return 0
202
- if args.dry_run:
213
+ if getattr(args, "dry_run", None):
203
214
  _LOGGER.info("Dry run. No files cleaned.")
204
215
  return 0
205
- if not args.force_yes and not query_yes_no(
216
+ if not getattr(args, "force_yes", None) and not query_yes_no(
206
217
  "Are you sure you want to permanently delete all "
207
218
  "intermediate pipeline results for this project?"
208
219
  ):
@@ -241,8 +252,22 @@ class Destroyer(Executor):
241
252
  :param bool preview_flag: whether to halt before actually removing files
242
253
  """
243
254
 
244
- _LOGGER.info("Removing results:")
255
+ use_pipestat = (
256
+ self.prj.pipestat_configured_project
257
+ if getattr(args, "project", None)
258
+ else self.prj.pipestat_configured
259
+ )
260
+
261
+ if use_pipestat:
262
+ _LOGGER.info("Removing summary:")
263
+ destroy_summary(
264
+ self.prj,
265
+ dry_run=preview_flag,
266
+ project_level=getattr(args, "project", None),
267
+ )
245
268
 
269
+ _LOGGER.info("Removing results:")
270
+ psms = {}
246
271
  for sample in select_samples(prj=self.prj, args=args):
247
272
  _LOGGER.info(self.counter.show(sample.sample_name))
248
273
  sample_output_folder = sample_folder(self.prj, sample)
@@ -250,30 +275,26 @@ class Destroyer(Executor):
250
275
  # Preview: Don't actually delete, just show files.
251
276
  _LOGGER.info(str(sample_output_folder))
252
277
  else:
253
- _remove_or_dry_run(sample_output_folder, args.dry_run)
254
-
255
- _LOGGER.info("Removing summary:")
256
- use_pipestat = (
257
- self.prj.pipestat_configured_project
258
- if args.project
259
- else self.prj.pipestat_configured
260
- )
261
- if use_pipestat:
262
- destroy_summary(self.prj, args.dry_run, args.project)
263
- else:
264
- _LOGGER.warning(
265
- "Pipestat must be configured to destroy any created summaries."
266
- )
278
+ if use_pipestat:
279
+ for piface in sample.project.pipeline_interfaces:
280
+ if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value:
281
+ psms[piface.psm.pipeline_name] = piface.psm
282
+ for pipeline_name, psm in psms.items():
283
+ psm.backend.remove_record(
284
+ record_identifier=sample.sample_name, rm_record=True
285
+ )
286
+ else:
287
+ _remove_or_dry_run(sample_output_folder, args.dry_run)
267
288
 
268
289
  if not preview_flag:
269
290
  _LOGGER.info("Destroy complete.")
270
291
  return 0
271
292
 
272
- if args.dry_run:
293
+ if getattr(args, "dry_run", None):
273
294
  _LOGGER.info("Dry run. No files destroyed.")
274
295
  return 0
275
296
 
276
- if not args.force_yes and not query_yes_no(
297
+ if not getattr(args, "force_yes", None) and not query_yes_no(
277
298
  "Are you sure you want to permanently delete all pipeline "
278
299
  "results for this project?"
279
300
  ):
@@ -308,7 +329,7 @@ class Collator(Executor):
308
329
  """
309
330
  jobs = 0
310
331
  self.debug = {}
311
- project_pifaces = self.prj.project_pipeline_interface_sources
332
+ project_pifaces = self.prj.project_pipeline_interfaces
312
333
  if not project_pifaces:
313
334
  raise MisconfigurationException(
314
335
  "Looper requires a pointer to at least one project pipeline. "
@@ -318,36 +339,26 @@ class Collator(Executor):
318
339
  )
319
340
  self.counter = LooperCounter(len(project_pifaces))
320
341
  for project_piface in project_pifaces:
321
- try:
322
- project_piface_object = PipelineInterface(
323
- project_piface, pipeline_type="project"
324
- )
325
- except (IOError, ValidationError) as e:
326
- _LOGGER.warning(
327
- "Ignoring invalid pipeline interface source: {}. "
328
- "Caught exception: {}".format(
329
- project_piface, getattr(e, "message", repr(e))
330
- )
331
- )
332
- continue
333
342
  _LOGGER.info(
334
343
  self.counter.show(
335
344
  name=self.prj.name,
336
345
  type="project",
337
- pipeline_name=project_piface_object.pipeline_name,
346
+ pipeline_name=project_piface.pipeline_name,
338
347
  )
339
348
  )
340
349
  conductor = SubmissionConductor(
341
- pipeline_interface=project_piface_object,
350
+ pipeline_interface=project_piface,
342
351
  prj=self.prj,
343
352
  compute_variables=compute_kwargs,
344
- delay=args.time_delay,
345
- extra_args=args.command_extra,
346
- extra_args_override=args.command_extra_override,
347
- ignore_flags=args.ignore_flags,
353
+ delay=getattr(args, "time_delay", None),
354
+ extra_args=getattr(args, "command_extra", None),
355
+ extra_args_override=getattr(args, "command_extra_override", None),
356
+ ignore_flags=getattr(args, "ignore_flags", None),
348
357
  collate=True,
349
358
  )
350
- if conductor.is_project_submittable(force=args.ignore_flags):
359
+ if conductor.is_project_submittable(
360
+ force=getattr(args, "ignore_flags", None)
361
+ ):
351
362
  conductor._pool = [None]
352
363
  conductor.submit()
353
364
  jobs += conductor.num_job_submissions
@@ -360,7 +371,7 @@ class Collator(Executor):
360
371
  class Runner(Executor):
361
372
  """The true submitter of pipelines"""
362
373
 
363
- def __call__(self, args, rerun=False, **compute_kwargs):
374
+ def __call__(self, args, top_level_args=None, rerun=False, **compute_kwargs):
364
375
  """
365
376
  Do the Sample submission.
366
377
 
@@ -395,24 +406,24 @@ class Runner(Executor):
395
406
  )
396
407
 
397
408
  submission_conductors = {}
409
+
398
410
  for piface in self.prj.pipeline_interfaces:
399
411
  conductor = SubmissionConductor(
400
412
  pipeline_interface=piface,
401
413
  prj=self.prj,
402
414
  compute_variables=comp_vars,
403
- delay=args.time_delay,
404
- extra_args=args.command_extra,
405
- extra_args_override=args.command_extra_override,
406
- ignore_flags=args.ignore_flags,
407
- max_cmds=args.lumpn,
408
- max_size=args.lump,
415
+ delay=getattr(args, "time_delay", None),
416
+ extra_args=getattr(args, "command_extra", None),
417
+ extra_args_override=getattr(args, "command_extra_override", None),
418
+ ignore_flags=getattr(args, "ignore_flags", None),
419
+ max_cmds=getattr(args, "lump_n", None),
420
+ max_size=getattr(args, "lump", None),
421
+ max_jobs=getattr(args, "lump_j", None),
409
422
  )
410
423
  submission_conductors[piface.pipe_iface_file] = conductor
411
424
 
412
- _LOGGER.info(f"Pipestat compatible: {self.prj.pipestat_configured_project}")
413
- self.debug["Pipestat compatible"] = (
414
- self.prj.pipestat_configured_project or self.prj.pipestat_configured
415
- )
425
+ _LOGGER.debug(f"Pipestat compatible: {self.prj.pipestat_configured}")
426
+ self.debug["Pipestat compatible"] = self.prj.pipestat_configured
416
427
 
417
428
  for sample in select_samples(prj=self.prj, args=args):
418
429
  pl_fails = []
@@ -484,15 +495,15 @@ class Runner(Executor):
484
495
  len(processed_samples), num_samples
485
496
  )
486
497
  )
487
- _LOGGER.info("Commands submitted: {} of {}".format(cmd_sub_total, max_cmds))
498
+ _LOGGER.debug("Commands submitted: {} of {}".format(cmd_sub_total, max_cmds))
488
499
  self.debug[DEBUG_COMMANDS] = "{} of {}".format(cmd_sub_total, max_cmds)
489
- if args.dry_run:
500
+ if getattr(args, "dry_run", None):
490
501
  job_sub_total_if_real = job_sub_total
491
502
  job_sub_total = 0
492
503
  _LOGGER.info(
493
504
  f"Dry run. No jobs were actually submitted, but {job_sub_total_if_real} would have been."
494
505
  )
495
- _LOGGER.info("Jobs submitted: {}".format(job_sub_total))
506
+ _LOGGER.debug("Jobs submitted: {}".format(job_sub_total))
496
507
  self.debug[DEBUG_JOBS] = job_sub_total
497
508
 
498
509
  # Restructure sample/failure data for display.
@@ -544,37 +555,35 @@ class Reporter(Executor):
544
555
 
545
556
  def __call__(self, args):
546
557
  # initialize the report builder
558
+ self.debug = {}
547
559
  p = self.prj
548
- project_level = args.project
560
+ project_level = getattr(args, "project", None)
549
561
 
550
562
  portable = args.portable
551
563
 
564
+ psms = {}
565
+
552
566
  if project_level:
553
- psms = self.prj.get_pipestat_managers(project_level=True)
554
- print(psms)
555
- for name, psm in psms.items():
556
- # Summarize will generate the static HTML Report Function
557
- report_directory = psm.summarize(
558
- looper_samples=self.prj.samples, portable=portable
559
- )
567
+
568
+ for piface in self.prj.pipeline_interfaces:
569
+ if piface.psm.pipeline_type == PipelineLevel.PROJECT.value:
570
+ psms[piface.psm.pipeline_name] = piface.psm
571
+ report_directory = piface.psm.summarize(
572
+ looper_samples=self.prj.samples, portable=portable
573
+ )
560
574
  print(f"Report directory: {report_directory}")
575
+ self.debug["report_directory"] = report_directory
576
+ return self.debug
561
577
  else:
562
- for piface_source_samples in self.prj._samples_by_piface(
563
- self.prj.piface_key
564
- ).values():
565
- # For each piface_key, we have a list of samples, but we only need one sample from the list to
566
- # call the related pipestat manager object which will pull ALL samples when using psm.summarize
567
- first_sample_name = list(piface_source_samples)[0]
568
- psms = self.prj.get_pipestat_managers(
569
- sample_name=first_sample_name, project_level=False
570
- )
571
- print(psms)
572
- for name, psm in psms.items():
573
- # Summarize will generate the static HTML Report Function
574
- report_directory = psm.summarize(
578
+ for piface in self.prj.pipeline_interfaces:
579
+ if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value:
580
+ psms[piface.psm.pipeline_name] = piface.psm
581
+ report_directory = piface.psm.summarize(
575
582
  looper_samples=self.prj.samples, portable=portable
576
583
  )
577
584
  print(f"Report directory: {report_directory}")
585
+ self.debug["report_directory"] = report_directory
586
+ return self.debug
578
587
 
579
588
 
580
589
  class Linker(Executor):
@@ -583,26 +592,22 @@ class Linker(Executor):
583
592
  def __call__(self, args):
584
593
  # initialize the report builder
585
594
  p = self.prj
586
- project_level = args.project
587
- link_dir = args.output_dir
595
+ project_level = getattr(args, "project", None)
596
+ link_dir = getattr(args, "output_dir", None)
597
+
598
+ psms = {}
588
599
 
589
600
  if project_level:
590
- psms = self.prj.get_pipestat_managers(project_level=True)
591
- for name, psm in psms.items():
592
- linked_results_path = psm.link(link_dir=link_dir)
593
- print(f"Linked directory: {linked_results_path}")
601
+ for piface in self.prj.pipeline_interfaces:
602
+ if piface.psm.pipeline_type == PipelineLevel.PROJECT.value:
603
+ psms[piface.psm.pipeline_name] = piface.psm
604
+ linked_results_path = piface.psm.link(link_dir=link_dir)
605
+ print(f"Linked directory: {linked_results_path}")
594
606
  else:
595
- for piface_source_samples in self.prj._samples_by_piface(
596
- self.prj.piface_key
597
- ).values():
598
- # For each piface_key, we have a list of samples, but we only need one sample from the list to
599
- # call the related pipestat manager object which will pull ALL samples when using psm.summarize
600
- first_sample_name = list(piface_source_samples)[0]
601
- psms = self.prj.get_pipestat_managers(
602
- sample_name=first_sample_name, project_level=False
603
- )
604
- for name, psm in psms.items():
605
- linked_results_path = psm.link(link_dir=link_dir)
607
+ for piface in self.prj.pipeline_interfaces:
608
+ if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value:
609
+ psms[piface.psm.pipeline_name] = piface.psm
610
+ linked_results_path = piface.psm.link(link_dir=link_dir)
606
611
  print(f"Linked directory: {linked_results_path}")
607
612
 
608
613
 
@@ -614,24 +619,19 @@ class Tabulator(Executor):
614
619
 
615
620
  def __call__(self, args):
616
621
  # p = self.prj
617
- project_level = args.project
622
+ project_level = getattr(args, "project", None)
618
623
  results = []
624
+ psms = {}
619
625
  if project_level:
620
- psms = self.prj.get_pipestat_managers(project_level=True)
621
- for name, psm in psms.items():
622
- results = psm.table()
626
+ for piface in self.prj.pipeline_interfaces:
627
+ if piface.psm.pipeline_type == PipelineLevel.PROJECT.value:
628
+ psms[piface.psm.pipeline_name] = piface.psm
629
+ results = piface.psm.table()
623
630
  else:
624
- for piface_source_samples in self.prj._samples_by_piface(
625
- self.prj.piface_key
626
- ).values():
627
- # For each piface_key, we have a list of samples, but we only need one sample from the list to
628
- # call the related pipestat manager object which will pull ALL samples when using psm.table
629
- first_sample_name = list(piface_source_samples)[0]
630
- psms = self.prj.get_pipestat_managers(
631
- sample_name=first_sample_name, project_level=False
632
- )
633
- for name, psm in psms.items():
634
- results = psm.table()
631
+ for piface in self.prj.pipeline_interfaces:
632
+ if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value:
633
+ psms[piface.psm.pipeline_name] = piface.psm
634
+ results = piface.psm.table()
635
635
  # Results contains paths to stats and object summaries.
636
636
  return results
637
637
 
@@ -671,64 +671,60 @@ def destroy_summary(prj, dry_run=False, project_level=False):
671
671
  This function is for use with pipestat configured projects.
672
672
  """
673
673
 
674
+ psms = {}
674
675
  if project_level:
675
- psms = prj.get_pipestat_managers(project_level=True)
676
+ for piface in prj.pipeline_interfaces:
677
+ if piface.psm.pipeline_type == PipelineLevel.PROJECT.value:
678
+ psms[piface.psm.pipeline_name] = piface.psm
679
+
676
680
  for name, psm in psms.items():
677
681
  _remove_or_dry_run(
678
682
  [
679
- get_file_for_project(
680
- psm,
681
- pipeline_name=psm["_pipeline_name"],
682
- directory="reports",
683
+ get_file_for_table(
684
+ psm, pipeline_name=psm.pipeline_name, directory="reports"
683
685
  ),
684
686
  get_file_for_table(
685
687
  psm,
686
- pipeline_name=psm["_pipeline_name"],
688
+ pipeline_name=psm.pipeline_name,
687
689
  appendix="stats_summary.tsv",
688
690
  ),
689
691
  get_file_for_table(
690
692
  psm,
691
- pipeline_name=psm["_pipeline_name"],
693
+ pipeline_name=psm.pipeline_name,
692
694
  appendix="objs_summary.yaml",
693
695
  ),
694
- get_file_for_table(
695
- psm, pipeline_name=psm["_pipeline_name"], appendix="reports"
696
+ os.path.join(
697
+ os.path.dirname(psm.config_path), "aggregate_results.yaml"
696
698
  ),
697
699
  ],
698
700
  dry_run,
699
701
  )
700
702
  else:
701
- for piface_source_samples in prj._samples_by_piface(prj.piface_key).values():
702
- # For each piface_key, we have a list of samples, but we only need one sample from the list to
703
- # call the related pipestat manager object which will pull ALL samples when using psm.table
704
- first_sample_name = list(piface_source_samples)[0]
705
- psms = prj.get_pipestat_managers(
706
- sample_name=first_sample_name, project_level=False
703
+ for piface in prj.pipeline_interfaces:
704
+ if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value:
705
+ psms[piface.psm.pipeline_name] = piface.psm
706
+ for name, psm in psms.items():
707
+ _remove_or_dry_run(
708
+ [
709
+ get_file_for_table(
710
+ psm, pipeline_name=psm.pipeline_name, directory="reports"
711
+ ),
712
+ get_file_for_table(
713
+ psm,
714
+ pipeline_name=psm.pipeline_name,
715
+ appendix="stats_summary.tsv",
716
+ ),
717
+ get_file_for_table(
718
+ psm,
719
+ pipeline_name=psm.pipeline_name,
720
+ appendix="objs_summary.yaml",
721
+ ),
722
+ os.path.join(
723
+ os.path.dirname(psm.config_path), "aggregate_results.yaml"
724
+ ),
725
+ ],
726
+ dry_run,
707
727
  )
708
- for name, psm in psms.items():
709
- _remove_or_dry_run(
710
- [
711
- get_file_for_project(
712
- psm,
713
- pipeline_name=psm["_pipeline_name"],
714
- directory="reports",
715
- ),
716
- get_file_for_table(
717
- psm,
718
- pipeline_name=psm["_pipeline_name"],
719
- appendix="stats_summary.tsv",
720
- ),
721
- get_file_for_table(
722
- psm,
723
- pipeline_name=psm["_pipeline_name"],
724
- appendix="objs_summary.yaml",
725
- ),
726
- get_file_for_table(
727
- psm, pipeline_name=psm["_pipeline_name"], appendix="reports"
728
- ),
729
- ],
730
- dry_run,
731
- )
732
728
 
733
729
 
734
730
  class LooperCounter(object):