looper 1.5.0__py3-none-any.whl → 1.6.0a1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
looper/conductor.py CHANGED
@@ -5,6 +5,7 @@ import logging
5
5
  import os
6
6
  import subprocess
7
7
  import time
8
+ import yaml
8
9
  from copy import copy, deepcopy
9
10
  from json import loads
10
11
  from subprocess import check_output
@@ -19,7 +20,7 @@ from peppy.exceptions import RemoteYAMLError
19
20
  from pipestat import PipestatError
20
21
  from ubiquerg import expandpath, is_command_callable
21
22
  from yaml import dump
22
- from yacman import YAMLConfigManager, expandpath as expath
23
+ from yacman import YAMLConfigManager
23
24
 
24
25
  from .const import *
25
26
  from .exceptions import JobSubmissionException, SampleFailedException
@@ -81,158 +82,15 @@ def _get_yaml_path(namespaces, template_key, default_name_appendix="", filename=
81
82
  return final_path
82
83
 
83
84
 
84
- def write_sample_yaml(namespaces):
85
+ def write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict):
85
86
  """
86
- Plugin: saves sample representation to YAML.
87
-
88
- This plugin can be parametrized by providing the path value/template in
89
- 'pipeline.var_templates.sample_yaml_path'. This needs to be a complete and
90
- absolute path to the file where sample YAML representation is to be
91
- stored.
92
-
93
- :param dict namespaces: variable namespaces dict
94
- :return dict: sample namespace dict
95
- """
96
- sample = namespaces["sample"]
97
- sample["sample_yaml_path"] = _get_yaml_path(
98
- namespaces, SAMPLE_YAML_PATH_KEY, "_sample"
99
- )
100
- sample.to_yaml(sample["sample_yaml_path"], add_prj_ref=False)
101
- return {"sample": sample}
102
-
103
-
104
- def write_sample_yaml_prj(namespaces):
105
- """
106
- Plugin: saves sample representation with project reference to YAML.
107
-
108
- This plugin can be parametrized by providing the path value/template in
109
- 'pipeline.var_templates.sample_yaml_prj_path'. This needs to be a complete and
110
- absolute path to the file where sample YAML representation is to be
111
- stored.
112
-
113
- :param dict namespaces: variable namespaces dict
114
- :return dict: sample namespace dict
115
- """
116
- sample = namespaces["sample"]
117
- sample.to_yaml(
118
- _get_yaml_path(namespaces, SAMPLE_YAML_PRJ_PATH_KEY, "_sample_prj"),
119
- add_prj_ref=True,
120
- )
121
- return {"sample": sample}
122
-
123
-
124
- def write_custom_template(namespaces):
125
- """
126
- Plugin: Populates a user-provided jinja template
127
-
128
- Parameterize by providing pipeline.var_templates.custom_template
129
- """
130
-
131
- def load_template(pipeline):
132
- with open(namespaces["pipeline"]["var_templates"]["custom_template"], "r") as f:
133
- x = f.read()
134
- t = jinja2.Template(x)
135
- return t
136
-
137
- err_msg = (
138
- "Custom template plugin requires a template in var_templates.custom_template"
139
- )
140
- if "var_templates" not in namespaces["pipeline"].keys():
141
- _LOGGER.error(err_msg)
142
- return None
143
-
144
- if "custom_template" not in namespaces["pipeline"]["var_templates"].keys():
145
- _LOGGER.error(err_msg)
146
- return None
147
-
148
- import jinja2
149
-
150
- tpl = load_template(namespaces["pipeline"])
151
- content = tpl.render(namespaces)
152
- pth = _get_yaml_path(namespaces, "custom_template_output", "_config")
153
- namespaces["sample"]["custom_template_output"] = pth
154
- with open(pth, "wb") as fh:
155
- # print(content)
156
- fh.write(content.encode())
157
-
158
- return {"sample": namespaces["sample"]}
159
-
160
-
161
- def write_sample_yaml_cwl(namespaces):
162
- """
163
- Plugin: Produce a cwl-compatible yaml representation of the sample
164
-
165
- Also adds the 'cwl_yaml' attribute to sample objects, which points
166
- to the file produced.
167
-
168
- This plugin can be parametrized by providing the path value/template in
169
- 'pipeline.var_templates.sample_cwl_yaml_path'. This needs to be a complete and
170
- absolute path to the file where sample YAML representation is to be
171
- stored.
172
-
173
- :param dict namespaces: variable namespaces dict
174
- :return dict: updated variable namespaces dict
87
+ This is run at the project level, not at the sample level.
175
88
  """
176
- from eido import read_schema
177
- from ubiquerg import is_url
178
-
179
- def _get_schema_source(
180
- schema_source, piface_dir=namespaces["looper"]["piface_dir"]
181
- ):
182
- # Stolen from piface object; should be a better way to do this...
183
- if is_url(schema_source):
184
- return schema_source
185
- elif not os.path.isabs(schema_source):
186
- schema_source = os.path.join(piface_dir, schema_source)
187
- return schema_source
188
-
189
- # To be compatible as a CWL job input, we need to handle the
190
- # File and Directory object types directly.
191
- sample = namespaces["sample"]
192
- sample.sample_yaml_cwl = _get_yaml_path(
193
- namespaces, SAMPLE_CWL_YAML_PATH_KEY, "_sample_cwl"
194
- )
195
-
196
- if "input_schema" in namespaces["pipeline"]:
197
- schema_path = _get_schema_source(namespaces["pipeline"]["input_schema"])
198
- file_list = []
199
- for ischema in read_schema(schema_path):
200
- if "files" in ischema["properties"]["samples"]["items"]:
201
- file_list.extend(ischema["properties"]["samples"]["items"]["files"])
202
-
203
- for file_attr in file_list:
204
- _LOGGER.debug("CWL-ing file attribute: {}".format(file_attr))
205
- file_attr_value = sample[file_attr]
206
- # file paths are assumed relative to the sample table;
207
- # but CWL assumes they are relative to the yaml output file,
208
- # so we convert here.
209
- file_attr_rel = os.path.relpath(
210
- file_attr_value, os.path.dirname(sample.sample_yaml_cwl)
211
- )
212
- sample[file_attr] = {"class": "File", "path": file_attr_rel}
89
+ with open(looper_pipestat_config_path, "w") as f:
90
+ yaml.dump(pipestat_config_dict, f)
91
+ print(f"Initialized looper config file: {looper_pipestat_config_path}")
213
92
 
214
- directory_list = []
215
- for ischema in read_schema(schema_path):
216
- if "directories" in ischema["properties"]["samples"]["items"]:
217
- directory_list.extend(
218
- ischema["properties"]["samples"]["items"]["directories"]
219
- )
220
-
221
- for dir_attr in directory_list:
222
- _LOGGER.debug("CWL-ing directory attribute: {}".format(dir_attr))
223
- dir_attr_value = sample[dir_attr]
224
- # file paths are assumed relative to the sample table;
225
- # but CWL assumes they are relative to the yaml output file,
226
- # so we convert here.
227
- sample[dir_attr] = {"class": "Directory", "location": dir_attr_value}
228
- else:
229
- _LOGGER.warning(
230
- "No 'input_schema' defined, producing a regular "
231
- "sample YAML representation"
232
- )
233
- _LOGGER.info("Writing sample yaml to {}".format(sample.sample_yaml_cwl))
234
- sample.to_yaml(sample.sample_yaml_cwl)
235
- return {"sample": sample}
93
+ return True
236
94
 
237
95
 
238
96
  def write_submission_yaml(namespaces):
@@ -245,7 +103,7 @@ def write_submission_yaml(namespaces):
245
103
  path = _get_yaml_path(namespaces, SAMPLE_CWL_YAML_PATH_KEY, "_submission")
246
104
  my_namespaces = {}
247
105
  for namespace, values in namespaces.items():
248
- my_namespaces.update({str(namespace): values.to_dict()})
106
+ my_namespaces.update({str(namespace): dict(values)})
249
107
  with open(path, "w") as yamlfile:
250
108
  dump(my_namespaces, yamlfile)
251
109
  return my_namespaces
@@ -417,28 +275,40 @@ class SubmissionConductor(object):
417
275
  )
418
276
  if self.prj.pipestat_configured:
419
277
  psms = self.prj.get_pipestat_managers(sample_name=sample.sample_name)
420
- sample_statuses = psms[self.pl_name].get_status()
278
+ sample_statuses = psms[self.pl_name].get_status(
279
+ record_identifier=sample.sample_name
280
+ )
281
+ if sample_statuses == "failed" and rerun is True:
282
+ psms[self.pl_name].set_status(
283
+ record_identifier=sample.sample_name, status_identifier="waiting"
284
+ )
285
+ sample_statuses = "waiting"
421
286
  sample_statuses = [sample_statuses] if sample_statuses else []
422
287
  else:
423
288
  sample_statuses = fetch_sample_flags(self.prj, sample, self.pl_name)
424
- use_this_sample = not rerun
425
289
 
426
- if sample_statuses or rerun:
427
- if not self.ignore_flags:
428
- use_this_sample = False
429
- # But rescue the sample in case rerun/failed passes
290
+ use_this_sample = True # default to running this sample
291
+ msg = None
292
+ if sample_statuses:
293
+ status_str = ", ".join(sample_statuses)
430
294
  failed_flag = any("failed" in x for x in sample_statuses)
295
+ if self.ignore_flags:
296
+ msg = f"> Found existing status: {status_str}. Ignoring."
297
+ else: # this pipeline already has a status
298
+ msg = f"> Found existing status: {status_str}. Skipping sample."
299
+ if failed_flag:
300
+ msg += " Use rerun to ignore failed status." # help guidance
301
+ use_this_sample = False
431
302
  if rerun:
303
+ # Rescue the sample if rerun requested, and failed flag is found
432
304
  if failed_flag:
433
- _LOGGER.info("> Re-running failed sample")
305
+ msg = f"> Re-running failed sample. Status: {status_str}"
434
306
  use_this_sample = True
435
307
  else:
308
+ msg = f"> Skipping sample because rerun requested, but no failed flag found. Status: {status_str}"
436
309
  use_this_sample = False
437
- if not use_this_sample:
438
- msg = "> Skipping sample"
439
- if sample_statuses:
440
- msg += f". Determined status: {', '.join(sample_statuses)}"
441
- _LOGGER.info(msg)
310
+ if msg:
311
+ _LOGGER.info(msg)
442
312
 
443
313
  skip_reasons = []
444
314
  validation = {}
@@ -512,7 +382,7 @@ class SubmissionConductor(object):
512
382
  if self.dry_run:
513
383
  _LOGGER.info("Dry run, not submitted")
514
384
  elif self._rendered_ok:
515
- sub_cmd = self.prj.dcc.compute.submission_command
385
+ sub_cmd = self.prj.dcc.compute["submission_command"]
516
386
  submission_command = "{} {}".format(sub_cmd, script)
517
387
  # Capture submission command return value so that we can
518
388
  # intercept and report basic submission failures; #167
@@ -600,7 +470,9 @@ class SubmissionConductor(object):
600
470
  :return yacman.YAMLConfigManager: looper/submission related settings
601
471
  """
602
472
  settings = YAMLConfigManager()
603
- settings["pep_config"] = self.prj.config_file
473
+ settings["config_file"] = self.prj.config_file
474
+ settings["pep_config"] = self.prj.pep_config
475
+
604
476
  settings[RESULTS_SUBDIR_KEY] = self.prj.results_folder
605
477
  settings[SUBMISSION_SUBDIR_KEY] = self.prj.submission_folder
606
478
  settings[OUTDIR_KEY] = self.prj.output_dir
@@ -659,11 +531,9 @@ class SubmissionConductor(object):
659
531
  return YAMLConfigManager()
660
532
  else:
661
533
  full_namespace = {
662
- "schema": psm.schema_path,
663
534
  "results_file": psm.file,
664
- "record_id": psm.sample_name,
665
- "namespace": psm.project_name,
666
- "config": psm.config_path,
535
+ "record_identifier": psm.record_identifier,
536
+ "config_file": psm.config_path,
667
537
  }
668
538
  filtered_namespace = {k: v for k, v in full_namespace.items() if v}
669
539
  return YAMLConfigManager(filtered_namespace)
@@ -703,10 +573,15 @@ class SubmissionConductor(object):
703
573
  namespaces.update({"sample": sample})
704
574
  else:
705
575
  namespaces.update({"samples": self.prj.samples})
706
- pipestat_namespace = self._set_pipestat_namespace(
707
- sample_name=sample.sample_name if sample else None
708
- )
709
- namespaces.update({"pipestat": pipestat_namespace})
576
+ if self.prj.pipestat_configured:
577
+ pipestat_namespace = self._set_pipestat_namespace(
578
+ sample_name=sample.sample_name if sample else None
579
+ )
580
+ namespaces.update({"pipestat": pipestat_namespace})
581
+ else:
582
+ # Pipestat isn't configured, simply place empty YAMLConfigManager object instead.
583
+ pipestat_namespace = YAMLConfigManager()
584
+ namespaces.update({"pipestat": pipestat_namespace})
710
585
  res_pkg = self.pl_iface.choose_resource_package(
711
586
  namespaces, size or 0
712
587
  ) # config
@@ -721,12 +596,9 @@ class SubmissionConductor(object):
721
596
  )
722
597
  _LOGGER.debug(f"namespace pipelines: { pl_iface }")
723
598
 
724
- # check here to ensure command is executable
725
- self.check_executable_path(pl_iface)
726
-
727
599
  namespaces["pipeline"]["var_templates"] = pl_iface[VAR_TEMPL_KEY] or {}
728
600
  for k, v in namespaces["pipeline"]["var_templates"].items():
729
- namespaces["pipeline"]["var_templates"][k] = expath(v)
601
+ namespaces["pipeline"]["var_templates"][k] = expandpath(v)
730
602
 
731
603
  # pre_submit hook namespace updates
732
604
  namespaces = _exec_pre_submit(pl_iface, namespaces)
@@ -735,6 +607,7 @@ class SubmissionConductor(object):
735
607
  argstring = jinja_render_template_strictly(
736
608
  template=templ, namespaces=namespaces
737
609
  )
610
+ print(argstring)
738
611
  except UndefinedError as jinja_exception:
739
612
  _LOGGER.warning(NOT_SUB_MSG.format(str(jinja_exception)))
740
613
  except KeyError as e:
@@ -761,7 +634,9 @@ class SubmissionConductor(object):
761
634
  _LOGGER.debug("compute namespace:\n{}".format(self.prj.dcc.compute))
762
635
  _LOGGER.debug("looper namespace:\n{}".format(looper))
763
636
  _LOGGER.debug("pipestat namespace:\n{}".format(pipestat_namespace))
764
- subm_base = os.path.join(self.prj.submission_folder, looper[JOB_NAME_KEY])
637
+ subm_base = os.path.join(
638
+ expandpath(self.prj.submission_folder), looper[JOB_NAME_KEY]
639
+ )
765
640
  return self.prj.dcc.write_script(
766
641
  output_path=subm_base + ".sub", extra_vars=[{"looper": looper}]
767
642
  )
@@ -775,34 +650,6 @@ class SubmissionConductor(object):
775
650
  self._curr_skip_pool = []
776
651
  self._curr_skip_size = 0
777
652
 
778
- def check_executable_path(self, pl_iface):
779
- """Determines if supplied pipelines are callable.
780
- Raises error and exits Looper if not callable
781
- :param dict pl_iface: pipeline interface that stores paths to executables
782
- :return bool: True if path is callable.
783
- """
784
- pipeline_commands = []
785
- if "path" in pl_iface.keys():
786
- pipeline_commands.append(pl_iface["path"])
787
-
788
- if (
789
- "var_templates" in pl_iface.keys()
790
- and "pipeline" in pl_iface["var_templates"].keys()
791
- ):
792
- pipeline_commands.append(pl_iface["var_templates"]["pipeline"])
793
- for command in pipeline_commands:
794
- try:
795
- result = is_command_callable(command)
796
- except:
797
- _LOGGER.error(f" {command} IS NOT EXECUTABLE. EXITING")
798
- raise SampleFailedException
799
- else:
800
- if not result:
801
- _LOGGER.error(f" {command} IS NOT EXECUTABLE. EXITING...")
802
- raise SampleFailedException
803
- else:
804
- return True
805
-
806
653
 
807
654
  def _use_sample(flag, skips):
808
655
  return flag and not skips
looper/const.py CHANGED
@@ -81,6 +81,17 @@ __all__ = [
81
81
  "DEFAULT_CONFIG_FILEPATH",
82
82
  "DEFAULT_CONFIG_SCHEMA",
83
83
  "DEFAULT_COMPUTE_RESOURCES_NAME",
84
+ "MESSAGE_BY_SUBCOMMAND",
85
+ "SAMPLE_SELECTION_ATTRIBUTE_OPTNAME",
86
+ "SAMPLE_EXCLUSION_OPTNAME",
87
+ "SAMPLE_INCLUSION_OPTNAME",
88
+ "SAMPLE_SELECTION_FLAG_OPTNAME",
89
+ "SAMPLE_EXCLUSION_FLAG_OPTNAME",
90
+ "DEBUG_JOBS",
91
+ "DEBUG_COMMANDS",
92
+ "DEBUG_EIDO_VALIDATION",
93
+ "LOOPER_GENERIC_OUTPUT_SCHEMA",
94
+ "LOOPER_GENERIC_COUNT_LINES",
84
95
  ]
85
96
 
86
97
  FLAGS = ["completed", "running", "failed", "waiting", "partial"]
@@ -112,6 +123,11 @@ def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG):
112
123
  return ret
113
124
 
114
125
 
126
+ # Debug keys
127
+ DEBUG_JOBS = "Jobs submitted"
128
+ DEBUG_COMMANDS = "Commands submitted"
129
+ DEBUG_EIDO_VALIDATION = "EidoValidationError"
130
+
115
131
  # Compute-related (for divvy)
116
132
  COMPUTE_SETTINGS_VARNAME = ["DIVCFG"]
117
133
  DEFAULT_COMPUTE_RESOURCES_NAME = "default"
@@ -145,7 +161,9 @@ PIFACE_SCHEMA_SRC = os.path.join(
145
161
  EXTRA_SAMPLE_CMD_TEMPLATE = (
146
162
  "{%- if sample.command_extra is defined %} {sample.command_extra} {% endif -%}"
147
163
  )
148
- EXTRA_PROJECT_CMD_TEMPLATE = "{%- if project.looper.command_extra is defined %} {project.looper.command_extra}{% endif -%}"
164
+ EXTRA_PROJECT_CMD_TEMPLATE = (
165
+ "{%- if looper.command_extra is defined %} {looper.command_extra}{% endif -%}"
166
+ )
149
167
  DOTFILE_CFG_PTH_KEY = "config_file_path"
150
168
  INPUT_SCHEMA_KEY = "input_schema"
151
169
  OUTPUT_SCHEMA_KEY = "output_schema"
@@ -175,7 +193,7 @@ DEFAULT_PIPESTAT_CONFIG_ATTR = "pipestat_config"
175
193
  DEFAULT_PIPESTAT_RESULTS_FILE_ATTR = "pipestat_results_file"
176
194
  PIPESTAT_NAMESPACE_ATTR_KEY = "namespace_attribute"
177
195
  PIPESTAT_CONFIG_ATTR_KEY = "config_attribute"
178
- PIPESTAT_RESULTS_FILE_ATTR_KEY = "results_file_attribute"
196
+ PIPESTAT_RESULTS_FILE_ATTR_KEY = "results_file_path"
179
197
 
180
198
  PIPE_ARGS_SECTION = "pipeline_args"
181
199
  CLI_KEY = "cli"
@@ -193,7 +211,9 @@ FILE_CHECKS_KEY = "skip_file_checks"
193
211
  EXAMPLE_COMPUTE_SPEC_FMT = "k1=v1 k2=v2"
194
212
  SUBMISSION_FAILURE_MESSAGE = "Cluster resource failure"
195
213
  LOOPER_DOTFILE_NAME = "." + LOOPER_KEY + ".yaml"
196
- LOOPER_GENERIC_PIPELINE = "generic_pipeline_interface.yaml"
214
+ LOOPER_GENERIC_PIPELINE = "pipeline_interface.yaml"
215
+ LOOPER_GENERIC_OUTPUT_SCHEMA = "output_schema.yaml"
216
+ LOOPER_GENERIC_COUNT_LINES = "count_lines.sh"
197
217
  POSITIONAL = [PEP_CONFIG_FILE_KEY, "command"]
198
218
  SELECTED_COMPUTE_PKG = "package"
199
219
  EXTRA_KEY = "_cli_extra"
@@ -201,6 +221,7 @@ ALL_SUBCMD_KEY = "all"
201
221
  SAMPLE_PL_ARG = "sample_pipeline_interfaces"
202
222
  PROJECT_PL_ARG = "project_pipeline_interfaces"
203
223
 
224
+
204
225
  DEFAULT_CFG_PATH = os.path.join(os.getcwd(), LOOPER_DOTFILE_NAME)
205
226
  CLI_PROJ_ATTRS = [
206
227
  OUTDIR_KEY,
@@ -212,6 +233,9 @@ CLI_PROJ_ATTRS = [
212
233
  DRY_RUN_KEY,
213
234
  FILE_CHECKS_KEY,
214
235
  SAMPLE_PL_ARG,
236
+ PIPESTAT_KEY,
237
+ DEFAULT_PIPESTAT_CONFIG_ATTR,
238
+ PEP_CONFIG_KEY,
215
239
  ]
216
240
 
217
241
  # resource package TSV-related consts
@@ -220,3 +244,27 @@ FILE_SIZE_COLNAME = "max_file_size"
220
244
  IMAGE_EXTS = (".png", ".jpg", ".jpeg", ".svg", ".gif")
221
245
  # this strongly depends on pypiper's profile.tsv format
222
246
  PROFILE_COLNAMES = ["pid", "hash", "cid", "runtime", "mem", "cmd", "lock"]
247
+
248
+
249
+ # Argument option names
250
+
251
+ SAMPLE_SELECTION_ATTRIBUTE_OPTNAME = "sel-attr"
252
+ SAMPLE_EXCLUSION_OPTNAME = "sel-excl"
253
+ SAMPLE_INCLUSION_OPTNAME = "sel-incl"
254
+ SAMPLE_SELECTION_FLAG_OPTNAME = "sel-flag"
255
+ SAMPLE_EXCLUSION_FLAG_OPTNAME = "exc-flag"
256
+
257
+ MESSAGE_BY_SUBCOMMAND = {
258
+ "run": "Run or submit sample jobs.",
259
+ "rerun": "Resubmit sample jobs with failed flags.",
260
+ "runp": "Run or submit project jobs.",
261
+ "table": "Write summary stats table for project samples.",
262
+ "report": "Create browsable HTML report of project results.",
263
+ "destroy": "Remove output files of the project.",
264
+ "check": "Check flag status of current runs.",
265
+ "clean": "Run clean scripts of already processed jobs.",
266
+ "inspect": "Print information about a project.",
267
+ "init": "Initialize looper config file.",
268
+ "init-piface": "Initialize generic pipeline interface.",
269
+ "link": "Create directory of symlinks for reported results.",
270
+ }