looper 1.7.0a1__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- looper/__main__.py +1 -1
- looper/_version.py +1 -1
- looper/cli_pydantic.py +385 -0
- looper/command_models/DEVELOPER.md +85 -0
- looper/command_models/README.md +4 -0
- looper/command_models/__init__.py +6 -0
- looper/command_models/arguments.py +283 -0
- looper/command_models/commands.py +332 -0
- looper/conductor.py +46 -18
- looper/const.py +8 -0
- looper/divvy.py +33 -35
- looper/looper.py +161 -165
- looper/project.py +150 -111
- looper/utils.py +82 -30
- {looper-1.7.0a1.dist-info → looper-1.8.0.dist-info}/METADATA +6 -5
- {looper-1.7.0a1.dist-info → looper-1.8.0.dist-info}/RECORD +20 -15
- {looper-1.7.0a1.dist-info → looper-1.8.0.dist-info}/WHEEL +1 -1
- {looper-1.7.0a1.dist-info → looper-1.8.0.dist-info}/entry_points.txt +1 -1
- looper/cli_looper.py +0 -788
- {looper-1.7.0a1.dist-info → looper-1.8.0.dist-info}/LICENSE.txt +0 -0
- {looper-1.7.0a1.dist-info → looper-1.8.0.dist-info}/top_level.txt +0 -0
looper/conductor.py
CHANGED
@@ -6,6 +6,7 @@ import os
|
|
6
6
|
import subprocess
|
7
7
|
import time
|
8
8
|
import yaml
|
9
|
+
from math import ceil
|
9
10
|
from copy import copy, deepcopy
|
10
11
|
from json import loads
|
11
12
|
from subprocess import check_output
|
@@ -20,12 +21,13 @@ from peppy.exceptions import RemoteYAMLError
|
|
20
21
|
from pipestat import PipestatError
|
21
22
|
from ubiquerg import expandpath, is_command_callable
|
22
23
|
from yaml import dump
|
23
|
-
from yacman import YAMLConfigManager
|
24
|
+
from yacman import FutureYAMLConfigManager as YAMLConfigManager
|
24
25
|
|
25
26
|
from .const import *
|
26
27
|
from .exceptions import JobSubmissionException, SampleFailedException
|
27
28
|
from .processed_project import populate_sample_paths
|
28
29
|
from .utils import fetch_sample_flags, jinja_render_template_strictly
|
30
|
+
from .const import PipelineLevel
|
29
31
|
|
30
32
|
|
31
33
|
_LOGGER = logging.getLogger(__name__)
|
@@ -84,11 +86,23 @@ def _get_yaml_path(namespaces, template_key, default_name_appendix="", filename=
|
|
84
86
|
|
85
87
|
def write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict):
|
86
88
|
"""
|
87
|
-
This
|
89
|
+
This writes a combined configuration file to be passed to a PipestatManager.
|
90
|
+
:param str looper_pipestat_config_path: path to the created pipestat configuration file
|
91
|
+
:param dict pipestat_config_dict: the dict containing key value pairs to be written to the pipestat configutation
|
92
|
+
return bool
|
88
93
|
"""
|
94
|
+
|
95
|
+
if not os.path.exists(os.path.dirname(looper_pipestat_config_path)):
|
96
|
+
try:
|
97
|
+
os.makedirs(os.path.dirname(looper_pipestat_config_path))
|
98
|
+
except FileExistsError:
|
99
|
+
pass
|
100
|
+
|
89
101
|
with open(looper_pipestat_config_path, "w") as f:
|
90
102
|
yaml.dump(pipestat_config_dict, f)
|
91
|
-
|
103
|
+
_LOGGER.debug(
|
104
|
+
msg=f"Initialized pipestat config file: {looper_pipestat_config_path}"
|
105
|
+
)
|
92
106
|
|
93
107
|
return True
|
94
108
|
|
@@ -132,6 +146,7 @@ class SubmissionConductor(object):
|
|
132
146
|
compute_variables=None,
|
133
147
|
max_cmds=None,
|
134
148
|
max_size=None,
|
149
|
+
max_jobs=None,
|
135
150
|
automatic=True,
|
136
151
|
collate=False,
|
137
152
|
):
|
@@ -166,6 +181,8 @@ class SubmissionConductor(object):
|
|
166
181
|
include in a single job script.
|
167
182
|
:param int | float | NoneType max_size: Upper bound on total file
|
168
183
|
size of inputs used by the commands lumped into single job script.
|
184
|
+
:param int | float | NoneType max_jobs: Upper bound on total number of jobs to
|
185
|
+
group samples for submission.
|
169
186
|
:param bool automatic: Whether the submission should be automatic once
|
170
187
|
the pool reaches capacity.
|
171
188
|
:param bool collate: Whether a collate job is to be submitted (runs on
|
@@ -200,6 +217,16 @@ class SubmissionConductor(object):
|
|
200
217
|
"{}".format(self.extra_pipe_args)
|
201
218
|
)
|
202
219
|
|
220
|
+
if max_jobs:
|
221
|
+
if max_jobs == 0 or max_jobs < 0:
|
222
|
+
raise ValueError(
|
223
|
+
"If specified, max job command count must be a positive integer, greater than zero."
|
224
|
+
)
|
225
|
+
|
226
|
+
num_samples = len(self.prj.samples)
|
227
|
+
samples_per_job = num_samples / max_jobs
|
228
|
+
max_cmds = ceil(samples_per_job)
|
229
|
+
|
203
230
|
if not self.collate:
|
204
231
|
self.automatic = automatic
|
205
232
|
if max_cmds is None and max_size is None:
|
@@ -247,8 +274,12 @@ class SubmissionConductor(object):
|
|
247
274
|
|
248
275
|
:param bool frorce: whether to force the project submission (ignore status/flags)
|
249
276
|
"""
|
277
|
+
psms = {}
|
250
278
|
if self.prj.pipestat_configured_project:
|
251
|
-
|
279
|
+
for piface in self.prj.project_pipeline_interfaces:
|
280
|
+
if piface.psm.pipeline_type == PipelineLevel.PROJECT.value:
|
281
|
+
psms[piface.psm.pipeline_name] = piface.psm
|
282
|
+
psm = psms[self.pl_name]
|
252
283
|
status = psm.get_status()
|
253
284
|
if not force and status is not None:
|
254
285
|
_LOGGER.info(f"> Skipping project. Determined status: {status}")
|
@@ -274,12 +305,11 @@ class SubmissionConductor(object):
|
|
274
305
|
)
|
275
306
|
)
|
276
307
|
if self.prj.pipestat_configured:
|
277
|
-
|
278
|
-
sample_statuses = psms[self.pl_name].get_status(
|
308
|
+
sample_statuses = self.pl_iface.psm.get_status(
|
279
309
|
record_identifier=sample.sample_name
|
280
310
|
)
|
281
311
|
if sample_statuses == "failed" and rerun is True:
|
282
|
-
|
312
|
+
self.pl_iface.psm.set_status(
|
283
313
|
record_identifier=sample.sample_name, status_identifier="waiting"
|
284
314
|
)
|
285
315
|
sample_statuses = "waiting"
|
@@ -289,23 +319,27 @@ class SubmissionConductor(object):
|
|
289
319
|
|
290
320
|
use_this_sample = True # default to running this sample
|
291
321
|
msg = None
|
322
|
+
if rerun and sample_statuses == []:
|
323
|
+
msg = f"> Skipping sample because rerun requested, but no failed or waiting flag found."
|
324
|
+
use_this_sample = False
|
292
325
|
if sample_statuses:
|
293
326
|
status_str = ", ".join(sample_statuses)
|
294
327
|
failed_flag = any("failed" in x for x in sample_statuses)
|
328
|
+
waiting_flag = any("waiting" in x for x in sample_statuses)
|
295
329
|
if self.ignore_flags:
|
296
330
|
msg = f"> Found existing status: {status_str}. Ignoring."
|
297
331
|
else: # this pipeline already has a status
|
298
332
|
msg = f"> Found existing status: {status_str}. Skipping sample."
|
299
|
-
if failed_flag:
|
333
|
+
if failed_flag and not rerun:
|
300
334
|
msg += " Use rerun to ignore failed status." # help guidance
|
301
335
|
use_this_sample = False
|
302
336
|
if rerun:
|
303
337
|
# Rescue the sample if rerun requested, and failed flag is found
|
304
|
-
if failed_flag:
|
305
|
-
msg = f"> Re-running
|
338
|
+
if failed_flag or waiting_flag:
|
339
|
+
msg = f"> Re-running sample. Status: {status_str}"
|
306
340
|
use_this_sample = True
|
307
341
|
else:
|
308
|
-
msg = f"> Skipping sample because rerun requested, but no failed flag found. Status: {status_str}"
|
342
|
+
msg = f"> Skipping sample because rerun requested, but no failed or waiting flag found. Status: {status_str}"
|
309
343
|
use_this_sample = False
|
310
344
|
if msg:
|
311
345
|
_LOGGER.info(msg)
|
@@ -514,12 +548,7 @@ class SubmissionConductor(object):
|
|
514
548
|
:return yacman.YAMLConfigManager: pipestat namespace
|
515
549
|
"""
|
516
550
|
try:
|
517
|
-
|
518
|
-
self.prj.get_pipestat_managers(sample_name)
|
519
|
-
if sample_name
|
520
|
-
else self.prj.get_pipestat_managers(project_level=True)
|
521
|
-
)
|
522
|
-
psm = psms[self.pl_iface.pipeline_name]
|
551
|
+
psm = self.pl_iface.psm
|
523
552
|
except (PipestatError, AttributeError) as e:
|
524
553
|
# pipestat section faulty or not found in project.looper or sample
|
525
554
|
# or project is missing required pipestat attributes
|
@@ -607,7 +636,6 @@ class SubmissionConductor(object):
|
|
607
636
|
argstring = jinja_render_template_strictly(
|
608
637
|
template=templ, namespaces=namespaces
|
609
638
|
)
|
610
|
-
print(argstring)
|
611
639
|
except UndefinedError as jinja_exception:
|
612
640
|
_LOGGER.warning(NOT_SUB_MSG.format(str(jinja_exception)))
|
613
641
|
except KeyError as e:
|
looper/const.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
""" Shared project constants """
|
2
2
|
|
3
3
|
import os
|
4
|
+
from enum import Enum
|
4
5
|
|
5
6
|
__author__ = "Databio lab"
|
6
7
|
__email__ = "nathan@code.databio.org"
|
@@ -268,3 +269,10 @@ MESSAGE_BY_SUBCOMMAND = {
|
|
268
269
|
"init-piface": "Initialize generic pipeline interface.",
|
269
270
|
"link": "Create directory of symlinks for reported results.",
|
270
271
|
}
|
272
|
+
|
273
|
+
# Add project/sample enum
|
274
|
+
|
275
|
+
|
276
|
+
class PipelineLevel(Enum):
|
277
|
+
SAMPLE = "sample"
|
278
|
+
PROJECT = "project"
|
looper/divvy.py
CHANGED
@@ -6,11 +6,14 @@ import os
|
|
6
6
|
import sys
|
7
7
|
import shutil
|
8
8
|
import yaml
|
9
|
-
from yaml import SafeLoader
|
10
|
-
from shutil import copytree
|
11
9
|
|
10
|
+
|
11
|
+
from shutil import copytree
|
12
|
+
from yacman import FutureYAMLConfigManager as YAMLConfigManager
|
13
|
+
from yacman import write_lock, FILEPATH_KEY, load_yaml, select_config
|
14
|
+
from yaml import SafeLoader
|
12
15
|
from ubiquerg import is_writable, VersionInHelpParser
|
13
|
-
|
16
|
+
|
14
17
|
|
15
18
|
from .const import (
|
16
19
|
COMPUTE_SETTINGS_VARNAME,
|
@@ -28,7 +31,7 @@ _LOGGER = logging.getLogger(__name__)
|
|
28
31
|
# This is the divvy.py submodule from divvy
|
29
32
|
|
30
33
|
|
31
|
-
class ComputingConfiguration(
|
34
|
+
class ComputingConfiguration(YAMLConfigManager):
|
32
35
|
"""
|
33
36
|
Represents computing configuration objects.
|
34
37
|
|
@@ -44,36 +47,31 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
|
|
44
47
|
`DIVCFG` file)
|
45
48
|
"""
|
46
49
|
|
47
|
-
def __init__(
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
validate_on_write
|
50
|
+
def __init__(
|
51
|
+
self,
|
52
|
+
entries=None,
|
53
|
+
wait_max=None,
|
54
|
+
strict_ro_locks=False,
|
55
|
+
schema_source=None,
|
56
|
+
validate_on_write=False,
|
57
|
+
):
|
58
|
+
super().__init__(
|
59
|
+
entries, wait_max, strict_ro_locks, schema_source, validate_on_write
|
57
60
|
)
|
58
61
|
|
59
|
-
if
|
60
|
-
raise Exception(
|
61
|
-
"Your divvy config file is not in divvy config format "
|
62
|
-
"(it lacks a compute_packages section): '{}'".format(filepath)
|
63
|
-
)
|
64
|
-
# We require that compute_packages be present, even if empty
|
62
|
+
if "compute_packages" not in self:
|
65
63
|
self["compute_packages"] = {}
|
66
|
-
|
67
64
|
# Initialize default compute settings.
|
68
65
|
_LOGGER.debug("Establishing project compute settings")
|
69
66
|
self.compute = None
|
70
67
|
self.setdefault("adapters", None)
|
71
68
|
self.activate_package(DEFAULT_COMPUTE_RESOURCES_NAME)
|
72
|
-
self.config_file = self.filepath
|
73
69
|
|
74
70
|
def write(self, filename=None):
|
75
|
-
|
76
|
-
|
71
|
+
with write_lock(self) as locked_ym:
|
72
|
+
locked_ym.rebase()
|
73
|
+
locked_ym.write()
|
74
|
+
filename = filename or getattr(self, FILEPATH_KEY)
|
77
75
|
filedir = os.path.dirname(filename)
|
78
76
|
# For this object, we *also* have to write the template files
|
79
77
|
for pkg_name, pkg in self["compute_packages"].items():
|
@@ -151,12 +149,12 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
|
|
151
149
|
# Augment compute, creating it if needed.
|
152
150
|
if self.compute is None:
|
153
151
|
_LOGGER.debug("Creating Project compute")
|
154
|
-
self.compute =
|
152
|
+
self.compute = YAMLConfigManager()
|
155
153
|
_LOGGER.debug(
|
156
154
|
"Adding entries for package_name '{}'".format(package_name)
|
157
155
|
)
|
158
156
|
|
159
|
-
self.compute.
|
157
|
+
self.compute.update_from_obj(self["compute_packages"][package_name])
|
160
158
|
|
161
159
|
# Ensure submission template is absolute. This *used to be* handled
|
162
160
|
# at update (so the paths were stored as absolutes in the packages),
|
@@ -165,7 +163,7 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
|
|
165
163
|
if not os.path.isabs(self.compute["submission_template"]):
|
166
164
|
try:
|
167
165
|
self.compute["submission_template"] = os.path.join(
|
168
|
-
os.path.dirname(self.
|
166
|
+
os.path.dirname(self.default_config_file),
|
169
167
|
self.compute["submission_template"],
|
170
168
|
)
|
171
169
|
except AttributeError as e:
|
@@ -200,11 +198,11 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
|
|
200
198
|
self.reset_active_settings()
|
201
199
|
return self.activate_package(package_name)
|
202
200
|
|
203
|
-
def get_active_package(self):
|
201
|
+
def get_active_package(self) -> YAMLConfigManager:
|
204
202
|
"""
|
205
203
|
Returns settings for the currently active compute package
|
206
204
|
|
207
|
-
:return
|
205
|
+
:return YAMLConfigManager: data defining the active compute package
|
208
206
|
"""
|
209
207
|
return self.compute
|
210
208
|
|
@@ -222,7 +220,7 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
|
|
222
220
|
|
223
221
|
:return bool: success flag
|
224
222
|
"""
|
225
|
-
self.compute =
|
223
|
+
self.compute = YAMLConfigManager()
|
226
224
|
return True
|
227
225
|
|
228
226
|
def update_packages(self, config_file):
|
@@ -235,11 +233,11 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
|
|
235
233
|
|
236
234
|
:param str config_file: path to file with new divvy configuration data
|
237
235
|
"""
|
238
|
-
entries =
|
236
|
+
entries = load_yaml(config_file)
|
239
237
|
self.update(entries)
|
240
238
|
return True
|
241
239
|
|
242
|
-
def get_adapters(self):
|
240
|
+
def get_adapters(self) -> YAMLConfigManager:
|
243
241
|
"""
|
244
242
|
Get current adapters, if defined.
|
245
243
|
|
@@ -248,9 +246,9 @@ class ComputingConfiguration(yacman.YAMLConfigManager):
|
|
248
246
|
package-specific set of adapters, if any defined in 'adapters' section
|
249
247
|
under currently active compute package.
|
250
248
|
|
251
|
-
:return
|
249
|
+
:return YAMLConfigManager: current adapters mapping
|
252
250
|
"""
|
253
|
-
adapters =
|
251
|
+
adapters = YAMLConfigManager()
|
254
252
|
if "adapters" in self and self["adapters"] is not None:
|
255
253
|
adapters.update(self["adapters"])
|
256
254
|
if "compute" in self and "adapters" in self.compute:
|
@@ -376,7 +374,7 @@ def select_divvy_config(filepath):
|
|
376
374
|
:param str | NoneType filepath: direct file path specification
|
377
375
|
:return str: path to the config file to read
|
378
376
|
"""
|
379
|
-
divcfg =
|
377
|
+
divcfg = select_config(
|
380
378
|
config_filepath=filepath,
|
381
379
|
config_env_vars=COMPUTE_SETTINGS_VARNAME,
|
382
380
|
default_config_filepath=DEFAULT_CONFIG_FILEPATH,
|