looper 1.7.0a1__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
looper/conductor.py CHANGED
@@ -6,6 +6,7 @@ import os
6
6
  import subprocess
7
7
  import time
8
8
  import yaml
9
+ from math import ceil
9
10
  from copy import copy, deepcopy
10
11
  from json import loads
11
12
  from subprocess import check_output
@@ -20,12 +21,13 @@ from peppy.exceptions import RemoteYAMLError
20
21
  from pipestat import PipestatError
21
22
  from ubiquerg import expandpath, is_command_callable
22
23
  from yaml import dump
23
- from yacman import YAMLConfigManager
24
+ from yacman import FutureYAMLConfigManager as YAMLConfigManager
24
25
 
25
26
  from .const import *
26
27
  from .exceptions import JobSubmissionException, SampleFailedException
27
28
  from .processed_project import populate_sample_paths
28
29
  from .utils import fetch_sample_flags, jinja_render_template_strictly
30
+ from .const import PipelineLevel
29
31
 
30
32
 
31
33
  _LOGGER = logging.getLogger(__name__)
@@ -84,11 +86,23 @@ def _get_yaml_path(namespaces, template_key, default_name_appendix="", filename=
84
86
 
85
87
  def write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict):
86
88
  """
87
- This is run at the project level, not at the sample level.
89
+ This writes a combined configuration file to be passed to a PipestatManager.
90
+ :param str looper_pipestat_config_path: path to the created pipestat configuration file
91
+ :param dict pipestat_config_dict: the dict containing key value pairs to be written to the pipestat configutation
92
+ return bool
88
93
  """
94
+
95
+ if not os.path.exists(os.path.dirname(looper_pipestat_config_path)):
96
+ try:
97
+ os.makedirs(os.path.dirname(looper_pipestat_config_path))
98
+ except FileExistsError:
99
+ pass
100
+
89
101
  with open(looper_pipestat_config_path, "w") as f:
90
102
  yaml.dump(pipestat_config_dict, f)
91
- print(f"Initialized looper config file: {looper_pipestat_config_path}")
103
+ _LOGGER.debug(
104
+ msg=f"Initialized pipestat config file: {looper_pipestat_config_path}"
105
+ )
92
106
 
93
107
  return True
94
108
 
@@ -132,6 +146,7 @@ class SubmissionConductor(object):
132
146
  compute_variables=None,
133
147
  max_cmds=None,
134
148
  max_size=None,
149
+ max_jobs=None,
135
150
  automatic=True,
136
151
  collate=False,
137
152
  ):
@@ -166,6 +181,8 @@ class SubmissionConductor(object):
166
181
  include in a single job script.
167
182
  :param int | float | NoneType max_size: Upper bound on total file
168
183
  size of inputs used by the commands lumped into single job script.
184
+ :param int | float | NoneType max_jobs: Upper bound on total number of jobs to
185
+ group samples for submission.
169
186
  :param bool automatic: Whether the submission should be automatic once
170
187
  the pool reaches capacity.
171
188
  :param bool collate: Whether a collate job is to be submitted (runs on
@@ -200,6 +217,16 @@ class SubmissionConductor(object):
200
217
  "{}".format(self.extra_pipe_args)
201
218
  )
202
219
 
220
+ if max_jobs:
221
+ if max_jobs == 0 or max_jobs < 0:
222
+ raise ValueError(
223
+ "If specified, max job command count must be a positive integer, greater than zero."
224
+ )
225
+
226
+ num_samples = len(self.prj.samples)
227
+ samples_per_job = num_samples / max_jobs
228
+ max_cmds = ceil(samples_per_job)
229
+
203
230
  if not self.collate:
204
231
  self.automatic = automatic
205
232
  if max_cmds is None and max_size is None:
@@ -247,8 +274,12 @@ class SubmissionConductor(object):
247
274
 
248
275
  :param bool frorce: whether to force the project submission (ignore status/flags)
249
276
  """
277
+ psms = {}
250
278
  if self.prj.pipestat_configured_project:
251
- psm = self.prj.get_pipestat_managers(project_level=True)[self.pl_name]
279
+ for piface in self.prj.project_pipeline_interfaces:
280
+ if piface.psm.pipeline_type == PipelineLevel.PROJECT.value:
281
+ psms[piface.psm.pipeline_name] = piface.psm
282
+ psm = psms[self.pl_name]
252
283
  status = psm.get_status()
253
284
  if not force and status is not None:
254
285
  _LOGGER.info(f"> Skipping project. Determined status: {status}")
@@ -274,12 +305,11 @@ class SubmissionConductor(object):
274
305
  )
275
306
  )
276
307
  if self.prj.pipestat_configured:
277
- psms = self.prj.get_pipestat_managers(sample_name=sample.sample_name)
278
- sample_statuses = psms[self.pl_name].get_status(
308
+ sample_statuses = self.pl_iface.psm.get_status(
279
309
  record_identifier=sample.sample_name
280
310
  )
281
311
  if sample_statuses == "failed" and rerun is True:
282
- psms[self.pl_name].set_status(
312
+ self.pl_iface.psm.set_status(
283
313
  record_identifier=sample.sample_name, status_identifier="waiting"
284
314
  )
285
315
  sample_statuses = "waiting"
@@ -289,23 +319,27 @@ class SubmissionConductor(object):
289
319
 
290
320
  use_this_sample = True # default to running this sample
291
321
  msg = None
322
+ if rerun and sample_statuses == []:
323
+ msg = f"> Skipping sample because rerun requested, but no failed or waiting flag found."
324
+ use_this_sample = False
292
325
  if sample_statuses:
293
326
  status_str = ", ".join(sample_statuses)
294
327
  failed_flag = any("failed" in x for x in sample_statuses)
328
+ waiting_flag = any("waiting" in x for x in sample_statuses)
295
329
  if self.ignore_flags:
296
330
  msg = f"> Found existing status: {status_str}. Ignoring."
297
331
  else: # this pipeline already has a status
298
332
  msg = f"> Found existing status: {status_str}. Skipping sample."
299
- if failed_flag:
333
+ if failed_flag and not rerun:
300
334
  msg += " Use rerun to ignore failed status." # help guidance
301
335
  use_this_sample = False
302
336
  if rerun:
303
337
  # Rescue the sample if rerun requested, and failed flag is found
304
- if failed_flag:
305
- msg = f"> Re-running failed sample. Status: {status_str}"
338
+ if failed_flag or waiting_flag:
339
+ msg = f"> Re-running sample. Status: {status_str}"
306
340
  use_this_sample = True
307
341
  else:
308
- msg = f"> Skipping sample because rerun requested, but no failed flag found. Status: {status_str}"
342
+ msg = f"> Skipping sample because rerun requested, but no failed or waiting flag found. Status: {status_str}"
309
343
  use_this_sample = False
310
344
  if msg:
311
345
  _LOGGER.info(msg)
@@ -514,12 +548,7 @@ class SubmissionConductor(object):
514
548
  :return yacman.YAMLConfigManager: pipestat namespace
515
549
  """
516
550
  try:
517
- psms = (
518
- self.prj.get_pipestat_managers(sample_name)
519
- if sample_name
520
- else self.prj.get_pipestat_managers(project_level=True)
521
- )
522
- psm = psms[self.pl_iface.pipeline_name]
551
+ psm = self.pl_iface.psm
523
552
  except (PipestatError, AttributeError) as e:
524
553
  # pipestat section faulty or not found in project.looper or sample
525
554
  # or project is missing required pipestat attributes
@@ -607,7 +636,6 @@ class SubmissionConductor(object):
607
636
  argstring = jinja_render_template_strictly(
608
637
  template=templ, namespaces=namespaces
609
638
  )
610
- print(argstring)
611
639
  except UndefinedError as jinja_exception:
612
640
  _LOGGER.warning(NOT_SUB_MSG.format(str(jinja_exception)))
613
641
  except KeyError as e:
looper/const.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """ Shared project constants """
2
2
 
3
3
  import os
4
+ from enum import Enum
4
5
 
5
6
  __author__ = "Databio lab"
6
7
  __email__ = "nathan@code.databio.org"
@@ -268,3 +269,10 @@ MESSAGE_BY_SUBCOMMAND = {
268
269
  "init-piface": "Initialize generic pipeline interface.",
269
270
  "link": "Create directory of symlinks for reported results.",
270
271
  }
272
+
273
+ # Add project/sample enum
274
+
275
+
276
+ class PipelineLevel(Enum):
277
+ SAMPLE = "sample"
278
+ PROJECT = "project"
looper/divvy.py CHANGED
@@ -6,11 +6,14 @@ import os
6
6
  import sys
7
7
  import shutil
8
8
  import yaml
9
- from yaml import SafeLoader
10
- from shutil import copytree
11
9
 
10
+
11
+ from shutil import copytree
12
+ from yacman import FutureYAMLConfigManager as YAMLConfigManager
13
+ from yacman import write_lock, FILEPATH_KEY, load_yaml, select_config
14
+ from yaml import SafeLoader
12
15
  from ubiquerg import is_writable, VersionInHelpParser
13
- import yacman
16
+
14
17
 
15
18
  from .const import (
16
19
  COMPUTE_SETTINGS_VARNAME,
@@ -28,7 +31,7 @@ _LOGGER = logging.getLogger(__name__)
28
31
  # This is the divvy.py submodule from divvy
29
32
 
30
33
 
31
- class ComputingConfiguration(yacman.YAMLConfigManager):
34
+ class ComputingConfiguration(YAMLConfigManager):
32
35
  """
33
36
  Represents computing configuration objects.
34
37
 
@@ -44,36 +47,31 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
44
47
  `DIVCFG` file)
45
48
  """
46
49
 
47
- def __init__(self, entries=None, filepath=None):
48
- if not entries and not filepath:
49
- # Handle the case of an empty one, when we'll use the default
50
- filepath = select_divvy_config(None)
51
-
52
- super(ComputingConfiguration, self).__init__(
53
- entries=entries,
54
- filepath=filepath,
55
- schema_source=DEFAULT_CONFIG_SCHEMA,
56
- validate_on_write=True,
50
+ def __init__(
51
+ self,
52
+ entries=None,
53
+ wait_max=None,
54
+ strict_ro_locks=False,
55
+ schema_source=None,
56
+ validate_on_write=False,
57
+ ):
58
+ super().__init__(
59
+ entries, wait_max, strict_ro_locks, schema_source, validate_on_write
57
60
  )
58
61
 
59
- if not "compute_packages" in self:
60
- raise Exception(
61
- "Your divvy config file is not in divvy config format "
62
- "(it lacks a compute_packages section): '{}'".format(filepath)
63
- )
64
- # We require that compute_packages be present, even if empty
62
+ if "compute_packages" not in self:
65
63
  self["compute_packages"] = {}
66
-
67
64
  # Initialize default compute settings.
68
65
  _LOGGER.debug("Establishing project compute settings")
69
66
  self.compute = None
70
67
  self.setdefault("adapters", None)
71
68
  self.activate_package(DEFAULT_COMPUTE_RESOURCES_NAME)
72
- self.config_file = self.filepath
73
69
 
74
70
  def write(self, filename=None):
75
- super(ComputingConfiguration, self).write(filepath=filename, exclude_case=True)
76
- filename = filename or getattr(self, yacman.FILEPATH_KEY)
71
+ with write_lock(self) as locked_ym:
72
+ locked_ym.rebase()
73
+ locked_ym.write()
74
+ filename = filename or getattr(self, FILEPATH_KEY)
77
75
  filedir = os.path.dirname(filename)
78
76
  # For this object, we *also* have to write the template files
79
77
  for pkg_name, pkg in self["compute_packages"].items():
@@ -151,12 +149,12 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
151
149
  # Augment compute, creating it if needed.
152
150
  if self.compute is None:
153
151
  _LOGGER.debug("Creating Project compute")
154
- self.compute = yacman.YAMLConfigManager()
152
+ self.compute = YAMLConfigManager()
155
153
  _LOGGER.debug(
156
154
  "Adding entries for package_name '{}'".format(package_name)
157
155
  )
158
156
 
159
- self.compute.update(self["compute_packages"][package_name])
157
+ self.compute.update_from_obj(self["compute_packages"][package_name])
160
158
 
161
159
  # Ensure submission template is absolute. This *used to be* handled
162
160
  # at update (so the paths were stored as absolutes in the packages),
@@ -165,7 +163,7 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
165
163
  if not os.path.isabs(self.compute["submission_template"]):
166
164
  try:
167
165
  self.compute["submission_template"] = os.path.join(
168
- os.path.dirname(self.filepath),
166
+ os.path.dirname(self.default_config_file),
169
167
  self.compute["submission_template"],
170
168
  )
171
169
  except AttributeError as e:
@@ -200,11 +198,11 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
200
198
  self.reset_active_settings()
201
199
  return self.activate_package(package_name)
202
200
 
203
- def get_active_package(self):
201
+ def get_active_package(self) -> YAMLConfigManager:
204
202
  """
205
203
  Returns settings for the currently active compute package
206
204
 
207
- :return yacman.YacAttMap: data defining the active compute package
205
+ :return YAMLConfigManager: data defining the active compute package
208
206
  """
209
207
  return self.compute
210
208
 
@@ -222,7 +220,7 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
222
220
 
223
221
  :return bool: success flag
224
222
  """
225
- self.compute = yacman.YacAttMap()
223
+ self.compute = YAMLConfigManager()
226
224
  return True
227
225
 
228
226
  def update_packages(self, config_file):
@@ -235,11 +233,11 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
235
233
 
236
234
  :param str config_file: path to file with new divvy configuration data
237
235
  """
238
- entries = yacman.load_yaml(config_file)
236
+ entries = load_yaml(config_file)
239
237
  self.update(entries)
240
238
  return True
241
239
 
242
- def get_adapters(self):
240
+ def get_adapters(self) -> YAMLConfigManager:
243
241
  """
244
242
  Get current adapters, if defined.
245
243
 
@@ -248,9 +246,9 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
248
246
  package-specific set of adapters, if any defined in 'adapters' section
249
247
  under currently active compute package.
250
248
 
251
- :return yacman.YAMLConfigManager: current adapters mapping
249
+ :return YAMLConfigManager: current adapters mapping
252
250
  """
253
- adapters = yacman.YAMLConfigManager()
251
+ adapters = YAMLConfigManager()
254
252
  if "adapters" in self and self["adapters"] is not None:
255
253
  adapters.update(self["adapters"])
256
254
  if "compute" in self and "adapters" in self.compute:
@@ -376,7 +374,7 @@ def select_divvy_config(filepath):
376
374
  :param str | NoneType filepath: direct file path specification
377
375
  :return str: path to the config file to read
378
376
  """
379
- divcfg = yacman.select_config(
377
+ divcfg = select_config(
380
378
  config_filepath=filepath,
381
379
  config_env_vars=COMPUTE_SETTINGS_VARNAME,
382
380
  default_config_filepath=DEFAULT_CONFIG_FILEPATH,