looper 1.5.0__py3-none-any.whl → 1.6.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
looper/conductor.py CHANGED
@@ -5,6 +5,7 @@ import logging
5
5
  import os
6
6
  import subprocess
7
7
  import time
8
+ import yaml
8
9
  from copy import copy, deepcopy
9
10
  from json import loads
10
11
  from subprocess import check_output
@@ -19,7 +20,7 @@ from peppy.exceptions import RemoteYAMLError
19
20
  from pipestat import PipestatError
20
21
  from ubiquerg import expandpath, is_command_callable
21
22
  from yaml import dump
22
- from yacman import YAMLConfigManager, expandpath as expath
23
+ from yacman import YAMLConfigManager
23
24
 
24
25
  from .const import *
25
26
  from .exceptions import JobSubmissionException, SampleFailedException
@@ -81,158 +82,15 @@ def _get_yaml_path(namespaces, template_key, default_name_appendix="", filename=
81
82
  return final_path
82
83
 
83
84
 
84
- def write_sample_yaml(namespaces):
85
+ def write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict):
85
86
  """
86
- Plugin: saves sample representation to YAML.
87
-
88
- This plugin can be parametrized by providing the path value/template in
89
- 'pipeline.var_templates.sample_yaml_path'. This needs to be a complete and
90
- absolute path to the file where sample YAML representation is to be
91
- stored.
92
-
93
- :param dict namespaces: variable namespaces dict
94
- :return dict: sample namespace dict
95
- """
96
- sample = namespaces["sample"]
97
- sample["sample_yaml_path"] = _get_yaml_path(
98
- namespaces, SAMPLE_YAML_PATH_KEY, "_sample"
99
- )
100
- sample.to_yaml(sample["sample_yaml_path"], add_prj_ref=False)
101
- return {"sample": sample}
102
-
103
-
104
- def write_sample_yaml_prj(namespaces):
105
- """
106
- Plugin: saves sample representation with project reference to YAML.
107
-
108
- This plugin can be parametrized by providing the path value/template in
109
- 'pipeline.var_templates.sample_yaml_prj_path'. This needs to be a complete and
110
- absolute path to the file where sample YAML representation is to be
111
- stored.
112
-
113
- :param dict namespaces: variable namespaces dict
114
- :return dict: sample namespace dict
115
- """
116
- sample = namespaces["sample"]
117
- sample.to_yaml(
118
- _get_yaml_path(namespaces, SAMPLE_YAML_PRJ_PATH_KEY, "_sample_prj"),
119
- add_prj_ref=True,
120
- )
121
- return {"sample": sample}
122
-
123
-
124
- def write_custom_template(namespaces):
125
- """
126
- Plugin: Populates a user-provided jinja template
127
-
128
- Parameterize by providing pipeline.var_templates.custom_template
129
- """
130
-
131
- def load_template(pipeline):
132
- with open(namespaces["pipeline"]["var_templates"]["custom_template"], "r") as f:
133
- x = f.read()
134
- t = jinja2.Template(x)
135
- return t
136
-
137
- err_msg = (
138
- "Custom template plugin requires a template in var_templates.custom_template"
139
- )
140
- if "var_templates" not in namespaces["pipeline"].keys():
141
- _LOGGER.error(err_msg)
142
- return None
143
-
144
- if "custom_template" not in namespaces["pipeline"]["var_templates"].keys():
145
- _LOGGER.error(err_msg)
146
- return None
147
-
148
- import jinja2
149
-
150
- tpl = load_template(namespaces["pipeline"])
151
- content = tpl.render(namespaces)
152
- pth = _get_yaml_path(namespaces, "custom_template_output", "_config")
153
- namespaces["sample"]["custom_template_output"] = pth
154
- with open(pth, "wb") as fh:
155
- # print(content)
156
- fh.write(content.encode())
157
-
158
- return {"sample": namespaces["sample"]}
159
-
160
-
161
- def write_sample_yaml_cwl(namespaces):
162
- """
163
- Plugin: Produce a cwl-compatible yaml representation of the sample
164
-
165
- Also adds the 'cwl_yaml' attribute to sample objects, which points
166
- to the file produced.
167
-
168
- This plugin can be parametrized by providing the path value/template in
169
- 'pipeline.var_templates.sample_cwl_yaml_path'. This needs to be a complete and
170
- absolute path to the file where sample YAML representation is to be
171
- stored.
172
-
173
- :param dict namespaces: variable namespaces dict
174
- :return dict: updated variable namespaces dict
87
+ This is run at the project level, not at the sample level.
175
88
  """
176
- from eido import read_schema
177
- from ubiquerg import is_url
178
-
179
- def _get_schema_source(
180
- schema_source, piface_dir=namespaces["looper"]["piface_dir"]
181
- ):
182
- # Stolen from piface object; should be a better way to do this...
183
- if is_url(schema_source):
184
- return schema_source
185
- elif not os.path.isabs(schema_source):
186
- schema_source = os.path.join(piface_dir, schema_source)
187
- return schema_source
188
-
189
- # To be compatible as a CWL job input, we need to handle the
190
- # File and Directory object types directly.
191
- sample = namespaces["sample"]
192
- sample.sample_yaml_cwl = _get_yaml_path(
193
- namespaces, SAMPLE_CWL_YAML_PATH_KEY, "_sample_cwl"
194
- )
195
-
196
- if "input_schema" in namespaces["pipeline"]:
197
- schema_path = _get_schema_source(namespaces["pipeline"]["input_schema"])
198
- file_list = []
199
- for ischema in read_schema(schema_path):
200
- if "files" in ischema["properties"]["samples"]["items"]:
201
- file_list.extend(ischema["properties"]["samples"]["items"]["files"])
202
-
203
- for file_attr in file_list:
204
- _LOGGER.debug("CWL-ing file attribute: {}".format(file_attr))
205
- file_attr_value = sample[file_attr]
206
- # file paths are assumed relative to the sample table;
207
- # but CWL assumes they are relative to the yaml output file,
208
- # so we convert here.
209
- file_attr_rel = os.path.relpath(
210
- file_attr_value, os.path.dirname(sample.sample_yaml_cwl)
211
- )
212
- sample[file_attr] = {"class": "File", "path": file_attr_rel}
89
+ with open(looper_pipestat_config_path, "w") as f:
90
+ yaml.dump(pipestat_config_dict, f)
91
+ print(f"Initialized looper config file: {looper_pipestat_config_path}")
213
92
 
214
- directory_list = []
215
- for ischema in read_schema(schema_path):
216
- if "directories" in ischema["properties"]["samples"]["items"]:
217
- directory_list.extend(
218
- ischema["properties"]["samples"]["items"]["directories"]
219
- )
220
-
221
- for dir_attr in directory_list:
222
- _LOGGER.debug("CWL-ing directory attribute: {}".format(dir_attr))
223
- dir_attr_value = sample[dir_attr]
224
- # file paths are assumed relative to the sample table;
225
- # but CWL assumes they are relative to the yaml output file,
226
- # so we convert here.
227
- sample[dir_attr] = {"class": "Directory", "location": dir_attr_value}
228
- else:
229
- _LOGGER.warning(
230
- "No 'input_schema' defined, producing a regular "
231
- "sample YAML representation"
232
- )
233
- _LOGGER.info("Writing sample yaml to {}".format(sample.sample_yaml_cwl))
234
- sample.to_yaml(sample.sample_yaml_cwl)
235
- return {"sample": sample}
93
+ return True
236
94
 
237
95
 
238
96
  def write_submission_yaml(namespaces):
@@ -245,7 +103,7 @@ def write_submission_yaml(namespaces):
245
103
  path = _get_yaml_path(namespaces, SAMPLE_CWL_YAML_PATH_KEY, "_submission")
246
104
  my_namespaces = {}
247
105
  for namespace, values in namespaces.items():
248
- my_namespaces.update({str(namespace): values.to_dict()})
106
+ my_namespaces.update({str(namespace): dict(values)})
249
107
  with open(path, "w") as yamlfile:
250
108
  dump(my_namespaces, yamlfile)
251
109
  return my_namespaces
@@ -417,28 +275,40 @@ class SubmissionConductor(object):
417
275
  )
418
276
  if self.prj.pipestat_configured:
419
277
  psms = self.prj.get_pipestat_managers(sample_name=sample.sample_name)
420
- sample_statuses = psms[self.pl_name].get_status()
278
+ sample_statuses = psms[self.pl_name].get_status(
279
+ record_identifier=sample.sample_name
280
+ )
281
+ if sample_statuses == "failed" and rerun is True:
282
+ psms[self.pl_name].set_status(
283
+ record_identifier=sample.sample_name, status_identifier="waiting"
284
+ )
285
+ sample_statuses = "waiting"
421
286
  sample_statuses = [sample_statuses] if sample_statuses else []
422
287
  else:
423
288
  sample_statuses = fetch_sample_flags(self.prj, sample, self.pl_name)
424
- use_this_sample = not rerun
425
289
 
426
- if sample_statuses or rerun:
427
- if not self.ignore_flags:
428
- use_this_sample = False
429
- # But rescue the sample in case rerun/failed passes
290
+ use_this_sample = True # default to running this sample
291
+ msg = None
292
+ if sample_statuses:
293
+ status_str = ", ".join(sample_statuses)
430
294
  failed_flag = any("failed" in x for x in sample_statuses)
295
+ if self.ignore_flags:
296
+ msg = f"> Found existing status: {status_str}. Ignoring."
297
+ else: # this pipeline already has a status
298
+ msg = f"> Found existing status: {status_str}. Skipping sample."
299
+ if failed_flag:
300
+ msg += " Use rerun to ignore failed status." # help guidance
301
+ use_this_sample = False
431
302
  if rerun:
303
+ # Rescue the sample if rerun requested, and failed flag is found
432
304
  if failed_flag:
433
- _LOGGER.info("> Re-running failed sample")
305
+ msg = f"> Re-running failed sample. Status: {status_str}"
434
306
  use_this_sample = True
435
307
  else:
308
+ msg = f"> Skipping sample because rerun requested, but no failed flag found. Status: {status_str}"
436
309
  use_this_sample = False
437
- if not use_this_sample:
438
- msg = "> Skipping sample"
439
- if sample_statuses:
440
- msg += f". Determined status: {', '.join(sample_statuses)}"
441
- _LOGGER.info(msg)
310
+ if msg:
311
+ _LOGGER.info(msg)
442
312
 
443
313
  skip_reasons = []
444
314
  validation = {}
@@ -512,7 +382,7 @@ class SubmissionConductor(object):
512
382
  if self.dry_run:
513
383
  _LOGGER.info("Dry run, not submitted")
514
384
  elif self._rendered_ok:
515
- sub_cmd = self.prj.dcc.compute.submission_command
385
+ sub_cmd = self.prj.dcc.compute["submission_command"]
516
386
  submission_command = "{} {}".format(sub_cmd, script)
517
387
  # Capture submission command return value so that we can
518
388
  # intercept and report basic submission failures; #167
@@ -600,7 +470,9 @@ class SubmissionConductor(object):
600
470
  :return yacman.YAMLConfigManager: looper/submission related settings
601
471
  """
602
472
  settings = YAMLConfigManager()
603
- settings["pep_config"] = self.prj.config_file
473
+ settings["config_file"] = self.prj.config_file
474
+ settings["pep_config"] = self.prj.pep_config
475
+
604
476
  settings[RESULTS_SUBDIR_KEY] = self.prj.results_folder
605
477
  settings[SUBMISSION_SUBDIR_KEY] = self.prj.submission_folder
606
478
  settings[OUTDIR_KEY] = self.prj.output_dir
@@ -659,11 +531,9 @@ class SubmissionConductor(object):
659
531
  return YAMLConfigManager()
660
532
  else:
661
533
  full_namespace = {
662
- "schema": psm.schema_path,
663
534
  "results_file": psm.file,
664
- "record_id": psm.sample_name,
665
- "namespace": psm.project_name,
666
- "config": psm.config_path,
535
+ "record_identifier": psm.record_identifier,
536
+ "config_file": psm.config_path,
667
537
  }
668
538
  filtered_namespace = {k: v for k, v in full_namespace.items() if v}
669
539
  return YAMLConfigManager(filtered_namespace)
@@ -703,10 +573,15 @@ class SubmissionConductor(object):
703
573
  namespaces.update({"sample": sample})
704
574
  else:
705
575
  namespaces.update({"samples": self.prj.samples})
706
- pipestat_namespace = self._set_pipestat_namespace(
707
- sample_name=sample.sample_name if sample else None
708
- )
709
- namespaces.update({"pipestat": pipestat_namespace})
576
+ if self.prj.pipestat_configured:
577
+ pipestat_namespace = self._set_pipestat_namespace(
578
+ sample_name=sample.sample_name if sample else None
579
+ )
580
+ namespaces.update({"pipestat": pipestat_namespace})
581
+ else:
582
+ # Pipestat isn't configured, simply place empty YAMLConfigManager object instead.
583
+ pipestat_namespace = YAMLConfigManager()
584
+ namespaces.update({"pipestat": pipestat_namespace})
710
585
  res_pkg = self.pl_iface.choose_resource_package(
711
586
  namespaces, size or 0
712
587
  ) # config
@@ -721,12 +596,9 @@ class SubmissionConductor(object):
721
596
  )
722
597
  _LOGGER.debug(f"namespace pipelines: { pl_iface }")
723
598
 
724
- # check here to ensure command is executable
725
- self.check_executable_path(pl_iface)
726
-
727
599
  namespaces["pipeline"]["var_templates"] = pl_iface[VAR_TEMPL_KEY] or {}
728
600
  for k, v in namespaces["pipeline"]["var_templates"].items():
729
- namespaces["pipeline"]["var_templates"][k] = expath(v)
601
+ namespaces["pipeline"]["var_templates"][k] = expandpath(v)
730
602
 
731
603
  # pre_submit hook namespace updates
732
604
  namespaces = _exec_pre_submit(pl_iface, namespaces)
@@ -735,6 +607,7 @@ class SubmissionConductor(object):
735
607
  argstring = jinja_render_template_strictly(
736
608
  template=templ, namespaces=namespaces
737
609
  )
610
+ print(argstring)
738
611
  except UndefinedError as jinja_exception:
739
612
  _LOGGER.warning(NOT_SUB_MSG.format(str(jinja_exception)))
740
613
  except KeyError as e:
@@ -761,7 +634,9 @@ class SubmissionConductor(object):
761
634
  _LOGGER.debug("compute namespace:\n{}".format(self.prj.dcc.compute))
762
635
  _LOGGER.debug("looper namespace:\n{}".format(looper))
763
636
  _LOGGER.debug("pipestat namespace:\n{}".format(pipestat_namespace))
764
- subm_base = os.path.join(self.prj.submission_folder, looper[JOB_NAME_KEY])
637
+ subm_base = os.path.join(
638
+ expandpath(self.prj.submission_folder), looper[JOB_NAME_KEY]
639
+ )
765
640
  return self.prj.dcc.write_script(
766
641
  output_path=subm_base + ".sub", extra_vars=[{"looper": looper}]
767
642
  )
@@ -775,34 +650,6 @@ class SubmissionConductor(object):
775
650
  self._curr_skip_pool = []
776
651
  self._curr_skip_size = 0
777
652
 
778
- def check_executable_path(self, pl_iface):
779
- """Determines if supplied pipelines are callable.
780
- Raises error and exits Looper if not callable
781
- :param dict pl_iface: pipeline interface that stores paths to executables
782
- :return bool: True if path is callable.
783
- """
784
- pipeline_commands = []
785
- if "path" in pl_iface.keys():
786
- pipeline_commands.append(pl_iface["path"])
787
-
788
- if (
789
- "var_templates" in pl_iface.keys()
790
- and "pipeline" in pl_iface["var_templates"].keys()
791
- ):
792
- pipeline_commands.append(pl_iface["var_templates"]["pipeline"])
793
- for command in pipeline_commands:
794
- try:
795
- result = is_command_callable(command)
796
- except:
797
- _LOGGER.error(f" {command} IS NOT EXECUTABLE. EXITING")
798
- raise SampleFailedException
799
- else:
800
- if not result:
801
- _LOGGER.error(f" {command} IS NOT EXECUTABLE. EXITING...")
802
- raise SampleFailedException
803
- else:
804
- return True
805
-
806
653
 
807
654
  def _use_sample(flag, skips):
808
655
  return flag and not skips
looper/const.py CHANGED
@@ -81,6 +81,17 @@ __all__ = [
81
81
  "DEFAULT_CONFIG_FILEPATH",
82
82
  "DEFAULT_CONFIG_SCHEMA",
83
83
  "DEFAULT_COMPUTE_RESOURCES_NAME",
84
+ "MESSAGE_BY_SUBCOMMAND",
85
+ "SAMPLE_SELECTION_ATTRIBUTE_OPTNAME",
86
+ "SAMPLE_EXCLUSION_OPTNAME",
87
+ "SAMPLE_INCLUSION_OPTNAME",
88
+ "SAMPLE_SELECTION_FLAG_OPTNAME",
89
+ "SAMPLE_EXCLUSION_FLAG_OPTNAME",
90
+ "DEBUG_JOBS",
91
+ "DEBUG_COMMANDS",
92
+ "DEBUG_EIDO_VALIDATION",
93
+ "LOOPER_GENERIC_OUTPUT_SCHEMA",
94
+ "LOOPER_GENERIC_COUNT_LINES",
84
95
  ]
85
96
 
86
97
  FLAGS = ["completed", "running", "failed", "waiting", "partial"]
@@ -112,6 +123,11 @@ def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG):
112
123
  return ret
113
124
 
114
125
 
126
+ # Debug keys
127
+ DEBUG_JOBS = "Jobs submitted"
128
+ DEBUG_COMMANDS = "Commands submitted"
129
+ DEBUG_EIDO_VALIDATION = "EidoValidationError"
130
+
115
131
  # Compute-related (for divvy)
116
132
  COMPUTE_SETTINGS_VARNAME = ["DIVCFG"]
117
133
  DEFAULT_COMPUTE_RESOURCES_NAME = "default"
@@ -145,7 +161,9 @@ PIFACE_SCHEMA_SRC = os.path.join(
145
161
  EXTRA_SAMPLE_CMD_TEMPLATE = (
146
162
  "{%- if sample.command_extra is defined %} {sample.command_extra} {% endif -%}"
147
163
  )
148
- EXTRA_PROJECT_CMD_TEMPLATE = "{%- if project.looper.command_extra is defined %} {project.looper.command_extra}{% endif -%}"
164
+ EXTRA_PROJECT_CMD_TEMPLATE = (
165
+ "{%- if looper.command_extra is defined %} {looper.command_extra}{% endif -%}"
166
+ )
149
167
  DOTFILE_CFG_PTH_KEY = "config_file_path"
150
168
  INPUT_SCHEMA_KEY = "input_schema"
151
169
  OUTPUT_SCHEMA_KEY = "output_schema"
@@ -175,7 +193,7 @@ DEFAULT_PIPESTAT_CONFIG_ATTR = "pipestat_config"
175
193
  DEFAULT_PIPESTAT_RESULTS_FILE_ATTR = "pipestat_results_file"
176
194
  PIPESTAT_NAMESPACE_ATTR_KEY = "namespace_attribute"
177
195
  PIPESTAT_CONFIG_ATTR_KEY = "config_attribute"
178
- PIPESTAT_RESULTS_FILE_ATTR_KEY = "results_file_attribute"
196
+ PIPESTAT_RESULTS_FILE_ATTR_KEY = "results_file_path"
179
197
 
180
198
  PIPE_ARGS_SECTION = "pipeline_args"
181
199
  CLI_KEY = "cli"
@@ -193,7 +211,9 @@ FILE_CHECKS_KEY = "skip_file_checks"
193
211
  EXAMPLE_COMPUTE_SPEC_FMT = "k1=v1 k2=v2"
194
212
  SUBMISSION_FAILURE_MESSAGE = "Cluster resource failure"
195
213
  LOOPER_DOTFILE_NAME = "." + LOOPER_KEY + ".yaml"
196
- LOOPER_GENERIC_PIPELINE = "generic_pipeline_interface.yaml"
214
+ LOOPER_GENERIC_PIPELINE = "pipeline_interface.yaml"
215
+ LOOPER_GENERIC_OUTPUT_SCHEMA = "output_schema.yaml"
216
+ LOOPER_GENERIC_COUNT_LINES = "count_lines.sh"
197
217
  POSITIONAL = [PEP_CONFIG_FILE_KEY, "command"]
198
218
  SELECTED_COMPUTE_PKG = "package"
199
219
  EXTRA_KEY = "_cli_extra"
@@ -201,6 +221,7 @@ ALL_SUBCMD_KEY = "all"
201
221
  SAMPLE_PL_ARG = "sample_pipeline_interfaces"
202
222
  PROJECT_PL_ARG = "project_pipeline_interfaces"
203
223
 
224
+
204
225
  DEFAULT_CFG_PATH = os.path.join(os.getcwd(), LOOPER_DOTFILE_NAME)
205
226
  CLI_PROJ_ATTRS = [
206
227
  OUTDIR_KEY,
@@ -212,6 +233,9 @@ CLI_PROJ_ATTRS = [
212
233
  DRY_RUN_KEY,
213
234
  FILE_CHECKS_KEY,
214
235
  SAMPLE_PL_ARG,
236
+ PIPESTAT_KEY,
237
+ DEFAULT_PIPESTAT_CONFIG_ATTR,
238
+ PEP_CONFIG_KEY,
215
239
  ]
216
240
 
217
241
  # resource package TSV-related consts
@@ -220,3 +244,27 @@ FILE_SIZE_COLNAME = "max_file_size"
220
244
  IMAGE_EXTS = (".png", ".jpg", ".jpeg", ".svg", ".gif")
221
245
  # this strongly depends on pypiper's profile.tsv format
222
246
  PROFILE_COLNAMES = ["pid", "hash", "cid", "runtime", "mem", "cmd", "lock"]
247
+
248
+
249
+ # Argument option names
250
+
251
+ SAMPLE_SELECTION_ATTRIBUTE_OPTNAME = "sel-attr"
252
+ SAMPLE_EXCLUSION_OPTNAME = "sel-excl"
253
+ SAMPLE_INCLUSION_OPTNAME = "sel-incl"
254
+ SAMPLE_SELECTION_FLAG_OPTNAME = "sel-flag"
255
+ SAMPLE_EXCLUSION_FLAG_OPTNAME = "exc-flag"
256
+
257
+ MESSAGE_BY_SUBCOMMAND = {
258
+ "run": "Run or submit sample jobs.",
259
+ "rerun": "Resubmit sample jobs with failed flags.",
260
+ "runp": "Run or submit project jobs.",
261
+ "table": "Write summary stats table for project samples.",
262
+ "report": "Create browsable HTML report of project results.",
263
+ "destroy": "Remove output files of the project.",
264
+ "check": "Check flag status of current runs.",
265
+ "clean": "Run clean scripts of already processed jobs.",
266
+ "inspect": "Print information about a project.",
267
+ "init": "Initialize looper config file.",
268
+ "init-piface": "Initialize generic pipeline interface.",
269
+ "link": "Create directory of symlinks for reported results.",
270
+ }