scipion-pyworkflow 3.10.6__py3-none-any.whl → 3.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/config.py +131 -67
- pyworkflow/constants.py +2 -1
- pyworkflow/plugin.py +93 -44
- pyworkflow/resources/showj/arrowDown.png +0 -0
- pyworkflow/resources/showj/arrowUp.png +0 -0
- pyworkflow/resources/showj/background_section.png +0 -0
- pyworkflow/resources/showj/colRowModeOff.png +0 -0
- pyworkflow/resources/showj/colRowModeOn.png +0 -0
- pyworkflow/resources/showj/delete.png +0 -0
- pyworkflow/resources/showj/doc_icon.png +0 -0
- pyworkflow/resources/showj/download_icon.png +0 -0
- pyworkflow/resources/showj/enabled_gallery.png +0 -0
- pyworkflow/resources/showj/galleryViewOff.png +0 -0
- pyworkflow/resources/showj/galleryViewOn.png +0 -0
- pyworkflow/resources/showj/goto.png +0 -0
- pyworkflow/resources/showj/menu.png +0 -0
- pyworkflow/resources/showj/separator.png +0 -0
- pyworkflow/resources/showj/tableViewOff.png +0 -0
- pyworkflow/resources/showj/tableViewOn.png +0 -0
- pyworkflow/resources/showj/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
- pyworkflow/resources/showj/ui-bg_glass_95_fef1ec_1x400.png +0 -0
- pyworkflow/resources/showj/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
- pyworkflow/resources/showj/volumeOff.png +0 -0
- pyworkflow/resources/showj/volumeOn.png +0 -0
- pyworkflow/viewer.py +23 -1
- pyworkflowtests/protocols.py +1 -3
- {scipion_pyworkflow-3.10.6.dist-info → scipion_pyworkflow-3.11.0.dist-info}/METADATA +13 -27
- scipion_pyworkflow-3.11.0.dist-info/RECORD +71 -0
- {scipion_pyworkflow-3.10.6.dist-info → scipion_pyworkflow-3.11.0.dist-info}/WHEEL +1 -1
- pyworkflow/apps/__init__.py +0 -29
- pyworkflow/apps/pw_manager.py +0 -37
- pyworkflow/apps/pw_plot.py +0 -51
- pyworkflow/apps/pw_project.py +0 -113
- pyworkflow/apps/pw_protocol_list.py +0 -143
- pyworkflow/apps/pw_protocol_run.py +0 -51
- pyworkflow/apps/pw_run_tests.py +0 -267
- pyworkflow/apps/pw_schedule_run.py +0 -322
- pyworkflow/apps/pw_sleep.py +0 -37
- pyworkflow/apps/pw_sync_data.py +0 -439
- pyworkflow/apps/pw_viewer.py +0 -78
- pyworkflow/gui/__init__.py +0 -36
- pyworkflow/gui/browser.py +0 -726
- pyworkflow/gui/canvas.py +0 -1190
- pyworkflow/gui/dialog.py +0 -977
- pyworkflow/gui/form.py +0 -2637
- pyworkflow/gui/graph.py +0 -247
- pyworkflow/gui/graph_layout.py +0 -271
- pyworkflow/gui/gui.py +0 -566
- pyworkflow/gui/matplotlib_image.py +0 -233
- pyworkflow/gui/plotter.py +0 -247
- pyworkflow/gui/project/__init__.py +0 -25
- pyworkflow/gui/project/base.py +0 -192
- pyworkflow/gui/project/constants.py +0 -139
- pyworkflow/gui/project/labels.py +0 -205
- pyworkflow/gui/project/project.py +0 -492
- pyworkflow/gui/project/searchprotocol.py +0 -154
- pyworkflow/gui/project/searchrun.py +0 -181
- pyworkflow/gui/project/steps.py +0 -171
- pyworkflow/gui/project/utils.py +0 -332
- pyworkflow/gui/project/variables.py +0 -179
- pyworkflow/gui/project/viewdata.py +0 -472
- pyworkflow/gui/project/viewprojects.py +0 -510
- pyworkflow/gui/project/viewprotocols.py +0 -2093
- pyworkflow/gui/project/viewprotocols_extra.py +0 -559
- pyworkflow/gui/text.py +0 -771
- pyworkflow/gui/tooltip.py +0 -185
- pyworkflow/gui/tree.py +0 -684
- pyworkflow/gui/widgets.py +0 -307
- pyworkflow/mapper/__init__.py +0 -26
- pyworkflow/mapper/mapper.py +0 -222
- pyworkflow/mapper/sqlite.py +0 -1581
- pyworkflow/mapper/sqlite_db.py +0 -145
- pyworkflow/project/__init__.py +0 -31
- pyworkflow/project/config.py +0 -454
- pyworkflow/project/manager.py +0 -180
- pyworkflow/project/project.py +0 -2007
- pyworkflow/protocol/__init__.py +0 -38
- pyworkflow/protocol/bibtex.py +0 -48
- pyworkflow/protocol/constants.py +0 -87
- pyworkflow/protocol/executor.py +0 -471
- pyworkflow/protocol/hosts.py +0 -314
- pyworkflow/protocol/launch.py +0 -270
- pyworkflow/protocol/package.py +0 -42
- pyworkflow/protocol/params.py +0 -741
- pyworkflow/protocol/protocol.py +0 -2641
- pyworkflow/tests/__init__.py +0 -29
- pyworkflow/tests/test_utils.py +0 -25
- pyworkflow/tests/tests.py +0 -341
- pyworkflow/utils/__init__.py +0 -38
- pyworkflow/utils/dataset.py +0 -414
- pyworkflow/utils/echo.py +0 -104
- pyworkflow/utils/graph.py +0 -169
- pyworkflow/utils/log.py +0 -284
- pyworkflow/utils/path.py +0 -528
- pyworkflow/utils/process.py +0 -153
- pyworkflow/utils/profiler.py +0 -92
- pyworkflow/utils/progressbar.py +0 -154
- pyworkflow/utils/properties.py +0 -631
- pyworkflow/utils/reflection.py +0 -129
- pyworkflow/utils/utils.py +0 -879
- pyworkflow/utils/which.py +0 -229
- pyworkflow/webservices/__init__.py +0 -8
- pyworkflow/webservices/config.py +0 -11
- pyworkflow/webservices/notifier.py +0 -162
- pyworkflow/webservices/repository.py +0 -59
- pyworkflow/webservices/workflowhub.py +0 -74
- pyworkflowtests/tests/__init__.py +0 -0
- pyworkflowtests/tests/test_canvas.py +0 -72
- pyworkflowtests/tests/test_domain.py +0 -45
- pyworkflowtests/tests/test_logs.py +0 -74
- pyworkflowtests/tests/test_mappers.py +0 -392
- pyworkflowtests/tests/test_object.py +0 -507
- pyworkflowtests/tests/test_project.py +0 -42
- pyworkflowtests/tests/test_protocol_execution.py +0 -142
- pyworkflowtests/tests/test_protocol_export.py +0 -78
- pyworkflowtests/tests/test_protocol_output.py +0 -158
- pyworkflowtests/tests/test_streaming.py +0 -47
- pyworkflowtests/tests/test_utils.py +0 -210
- scipion_pyworkflow-3.10.6.dist-info/RECORD +0 -140
- scipion_pyworkflow-3.10.6.dist-info/dependency_links.txt +0 -1
- {scipion_pyworkflow-3.10.6.dist-info → scipion_pyworkflow-3.11.0.dist-info}/entry_points.txt +0 -0
- {scipion_pyworkflow-3.10.6.dist-info → scipion_pyworkflow-3.11.0.dist-info}/licenses/LICENSE.txt +0 -0
- {scipion_pyworkflow-3.10.6.dist-info → scipion_pyworkflow-3.11.0.dist-info}/top_level.txt +0 -0
pyworkflow/protocol/__init__.py
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
# **************************************************************************
|
2
|
-
# *
|
3
|
-
# * Authors: J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es)
|
4
|
-
# *
|
5
|
-
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
|
6
|
-
# *
|
7
|
-
# * This program is free software; you can redistribute it and/or modify
|
8
|
-
# * it under the terms of the GNU General Public License as published by
|
9
|
-
# * the Free Software Foundation; either version 3 of the License, or
|
10
|
-
# * (at your option) any later version.
|
11
|
-
# *
|
12
|
-
# * This program is distributed in the hope that it will be useful,
|
13
|
-
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# * GNU General Public License for more details.
|
16
|
-
# *
|
17
|
-
# * You should have received a copy of the GNU General Public License
|
18
|
-
# * along with this program; if not, write to the Free Software
|
19
|
-
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
20
|
-
# * 02111-1307 USA
|
21
|
-
# *
|
22
|
-
# * All comments concerning this program package may be sent to the
|
23
|
-
# * e-mail address 'scipion@cnb.csic.es'
|
24
|
-
# *
|
25
|
-
# **************************************************************************
|
26
|
-
"""
|
27
|
-
This modules contains classes required for the workflow
|
28
|
-
execution and tracking like: Step and Protocol
|
29
|
-
"""
|
30
|
-
|
31
|
-
from .protocol import *
|
32
|
-
from .executor import *
|
33
|
-
from .constants import *
|
34
|
-
from .params import *
|
35
|
-
|
36
|
-
from .launch import *
|
37
|
-
|
38
|
-
from .hosts import HostConfig
|
pyworkflow/protocol/bibtex.py
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# **************************************************************************
|
3
|
-
# *
|
4
|
-
# * Authors: Yaiza Rancel (cyrancel@cnb.csic.es)
|
5
|
-
# *
|
6
|
-
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
|
7
|
-
# *
|
8
|
-
# * This program is free software; you can redistribute it and/or modify
|
9
|
-
# * it under the terms of the GNU General Public License as published by
|
10
|
-
# * the Free Software Foundation; either version 3 of the License, or
|
11
|
-
# * (at your option) any later version.
|
12
|
-
# *
|
13
|
-
# * This program is distributed in the hope that it will be useful,
|
14
|
-
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
-
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
-
# * GNU General Public License for more details.
|
17
|
-
# *
|
18
|
-
# * You should have received a copy of the GNU General Public License
|
19
|
-
# * along with this program; if not, write to the Free Software
|
20
|
-
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
21
|
-
# * 02111-1307 USA
|
22
|
-
# *
|
23
|
-
# * All comments concerning this program package may be sent to the
|
24
|
-
# * e-mail address 'scipion@cnb.csic.es'
|
25
|
-
# *
|
26
|
-
# **************************************************************************
|
27
|
-
"""
|
28
|
-
@article{delaRosaTrevin201693,
|
29
|
-
title = "Scipion: A software framework toward integration, reproducibility and validation in 3D electron microscopy ",
|
30
|
-
journal = "Journal of Structural Biology",
|
31
|
-
volume = "195",
|
32
|
-
number = "1",
|
33
|
-
pages = "93 - 99",
|
34
|
-
year = "2016",
|
35
|
-
note = "",
|
36
|
-
issn = "1047-8477",
|
37
|
-
doi = "http://doi.org/10.1016/j.jsb.2016.04.010",
|
38
|
-
url = "http://www.sciencedirect.com/science/article/pii/S104784771630079X",
|
39
|
-
author = "J.M. de la Rosa-Trevín and A. Quintana and L. del Cano and A. Zaldívar and I. Foche and J. Gutiérrez and J. Gómez-Blanco and J. Burguet-Castell and J. Cuenca-Alba and V. Abrishami and J. Vargas and J. Otón and G. Sharov and J.L. Vilas and J. Navas and P. Conesa and M. Kazemi and R. Marabini and C.O.S. Sorzano and J.M. Carazo",
|
40
|
-
keywords = "Electron microscopy",
|
41
|
-
keywords = "Single particle analysis",
|
42
|
-
keywords = "Image processing",
|
43
|
-
keywords = "Software package",
|
44
|
-
keywords = "Workflows",
|
45
|
-
keywords = "Reproducibility "
|
46
|
-
}
|
47
|
-
"""
|
48
|
-
|
pyworkflow/protocol/constants.py
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
# **************************************************************************
|
2
|
-
# *
|
3
|
-
# * Authors: J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es)
|
4
|
-
# *
|
5
|
-
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
|
6
|
-
# *
|
7
|
-
# * This program is free software; you can redistribute it and/or modify
|
8
|
-
# * it under the terms of the GNU General Public License as published by
|
9
|
-
# * the Free Software Foundation; either version 3 of the License, or
|
10
|
-
# * (at your option) any later version.
|
11
|
-
# *
|
12
|
-
# * This program is distributed in the hope that it will be useful,
|
13
|
-
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# * GNU General Public License for more details.
|
16
|
-
# *
|
17
|
-
# * You should have received a copy of the GNU General Public License
|
18
|
-
# * along with this program; if not, write to the Free Software
|
19
|
-
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
20
|
-
# * 02111-1307 USA
|
21
|
-
# *
|
22
|
-
# * All comments concerning this program package may be sent to the
|
23
|
-
# * e-mail address 'scipion@cnb.csic.es'
|
24
|
-
# *
|
25
|
-
# **************************************************************************
|
26
|
-
"""
|
27
|
-
This modules contains classes required for the workflow
|
28
|
-
execution and tracking like: Step and Protocol
|
29
|
-
"""
|
30
|
-
|
31
|
-
# ------------------ Constants values -----------------------------------------
|
32
|
-
|
33
|
-
# Possible status of a protocol run, used mainly to monitor progress
|
34
|
-
|
35
|
-
STATUS_SAVED = "saved" # Parameters saved for later use
|
36
|
-
STATUS_LAUNCHED = "launched" # launched to queue system, only useful for protocols
|
37
|
-
STATUS_NEW = "new"
|
38
|
-
STATUS_RUNNING = "running" # currently executing
|
39
|
-
STATUS_FAILED = "failed" # it run and failed
|
40
|
-
STATUS_FINISHED = "finished" # successfully finished
|
41
|
-
STATUS_ABORTED = "aborted"
|
42
|
-
STATUS_INTERACTIVE = "interactive" # waiting for user interaction
|
43
|
-
STATUS_WAITING = "waiting" # following status is used for streaming only
|
44
|
-
# Allow to schedule jobs not ready for running yet
|
45
|
-
STATUS_SCHEDULED = "scheduled"
|
46
|
-
|
47
|
-
ACTIVE_STATUS = [STATUS_LAUNCHED, STATUS_RUNNING, STATUS_INTERACTIVE,
|
48
|
-
STATUS_SCHEDULED]
|
49
|
-
|
50
|
-
# Execution modes
|
51
|
-
MODE_RESUME = 0 # Try to starting at the first changed step, skipping unchanged ones
|
52
|
-
MODE_RESTART = 1 # Restart the protocol from the beginning, deleting all previous results
|
53
|
-
MODE_CONTINUE = 2 # Continue from specific step, not widely used //DEPRECATED.
|
54
|
-
# JMRT: We now use 'Continue' label instead of 'Resume' which is more intuitive for users.
|
55
|
-
MODE_CHOICES = ('Continue', 'Restart') # , 'Continue')
|
56
|
-
|
57
|
-
# Initial sleeping time (in seconds) in order to launch a scheduled protocol.
|
58
|
-
# This value is multiply to the protocol level into the workflow.
|
59
|
-
INITIAL_SLEEP_TIME = 30
|
60
|
-
|
61
|
-
# Maximum time (in seconds) waiting for a scheduled protocol to check if it
|
62
|
-
# can be launched.
|
63
|
-
MAX_SLEEP_TIME = 120
|
64
|
-
|
65
|
-
|
66
|
-
# Steps execution mode
|
67
|
-
STEPS_SERIAL = 0 # Execute steps serially, some of the steps can be mpi programs
|
68
|
-
STEPS_PARALLEL = 1 # Execute steps in parallel through threads
|
69
|
-
|
70
|
-
# Level of expertise for the input parameters, mainly used in the protocol form
|
71
|
-
LEVEL_NORMAL = 0
|
72
|
-
LEVEL_ADVANCED = 1
|
73
|
-
LEVEL_CHOICES = ('Normal', 'Advanced')
|
74
|
-
|
75
|
-
# Param names for GPU processing
|
76
|
-
USE_GPU = 'useGpu'
|
77
|
-
GPU_LIST = 'gpuList'
|
78
|
-
VOID_GPU = 99
|
79
|
-
|
80
|
-
# Job management
|
81
|
-
UNKNOWN_JOBID = -1
|
82
|
-
|
83
|
-
# File sizes
|
84
|
-
SIZE_1KB = 1024
|
85
|
-
SIZE_1MB = SIZE_1KB * SIZE_1KB
|
86
|
-
SIZE_1GB = SIZE_1MB * SIZE_1KB
|
87
|
-
SIZE_1TB = SIZE_1GB * SIZE_1KB
|
pyworkflow/protocol/executor.py
DELETED
@@ -1,471 +0,0 @@
|
|
1
|
-
# **************************************************************************
|
2
|
-
# *
|
3
|
-
# * Authors: J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es)
|
4
|
-
# *
|
5
|
-
# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia, CSIC
|
6
|
-
# *
|
7
|
-
# * This program is free software; you can redistribute it and/or modify
|
8
|
-
# * it under the terms of the GNU General Public License as published by
|
9
|
-
# * the Free Software Foundation; either version 3 of the License, or
|
10
|
-
# * (at your option) any later version.
|
11
|
-
# *
|
12
|
-
# * This program is distributed in the hope that it will be useful,
|
13
|
-
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# * GNU General Public License for more details.
|
16
|
-
# *
|
17
|
-
# * You should have received a copy of the GNU General Public License
|
18
|
-
# * along with this program; if not, write to the Free Software
|
19
|
-
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
20
|
-
# * 02111-1307 USA
|
21
|
-
# *
|
22
|
-
# * All comments concerning this program package may be sent to the
|
23
|
-
# * e-mail address 'scipion@cnb.csic.es'
|
24
|
-
# *
|
25
|
-
# **************************************************************************
|
26
|
-
"""
|
27
|
-
This module have the classes for execution of protocol steps.
|
28
|
-
The basic one will run steps, one by one, after completion.
|
29
|
-
There is one based on threads to execute steps in parallel
|
30
|
-
using different threads and the last one with MPI processes.
|
31
|
-
"""
|
32
|
-
|
33
|
-
import logging
|
34
|
-
logger = logging.getLogger(__name__)
|
35
|
-
import time
|
36
|
-
import datetime
|
37
|
-
import threading
|
38
|
-
import os
|
39
|
-
|
40
|
-
import pyworkflow.utils.process as process
|
41
|
-
from pyworkflow.utils.path import getParentFolder, removeExt
|
42
|
-
from pyworkflow.constants import PLUGIN_MODULE_VAR, RUN_JOB_GPU_PARAM_SEARCH
|
43
|
-
from . import constants as cts
|
44
|
-
|
45
|
-
from .launch import _submit, UNKNOWN_JOBID, _checkJobStatus
|
46
|
-
|
47
|
-
|
48
|
-
class StepExecutor:
|
49
|
-
""" Run a list of Protocol steps. """
|
50
|
-
def __init__(self, hostConfig, **kwargs):
|
51
|
-
self.hostConfig = hostConfig
|
52
|
-
self.gpuList = kwargs.get(cts.GPU_LIST, None)
|
53
|
-
self.protocol = None
|
54
|
-
|
55
|
-
def getGpuList(self):
|
56
|
-
""" Return the GPU list assigned to current thread. """
|
57
|
-
return self.gpuList
|
58
|
-
|
59
|
-
def setProtocol(self, protocol):
|
60
|
-
""" Set protocol to append active jobs to its jobIds. """
|
61
|
-
self.protocol = protocol
|
62
|
-
|
63
|
-
def getRunContext(self):
|
64
|
-
return {PLUGIN_MODULE_VAR: self.protocol.getPlugin().getName()}
|
65
|
-
|
66
|
-
def runJob(self, log, programName, params,
|
67
|
-
numberOfMpi=1, numberOfThreads=1,
|
68
|
-
env=None, cwd=None, executable=None):
|
69
|
-
""" This function is a wrapper around runJob,
|
70
|
-
providing the host configuration.
|
71
|
-
"""
|
72
|
-
process.runJob(log, programName, params,
|
73
|
-
numberOfMpi, numberOfThreads,
|
74
|
-
self.hostConfig,
|
75
|
-
env=env, cwd=cwd, gpuList=self._getGPUListForCommand(programName, params), executable=executable, context=self.protocol.getSubmitDict())
|
76
|
-
|
77
|
-
def _getGPUListForCommand(self, program, params):
|
78
|
-
""" Returns the list of GPUs if the program or the params have the GPU placeholder %(GPU)s """
|
79
|
-
if RUN_JOB_GPU_PARAM_SEARCH in params or RUN_JOB_GPU_PARAM_SEARCH in program:
|
80
|
-
return self.getGpuList()
|
81
|
-
else:
|
82
|
-
return []
|
83
|
-
|
84
|
-
def _getRunnable(self, steps, n=1):
|
85
|
-
""" Return the n steps that are 'new' and all its
|
86
|
-
dependencies have been finished, or None if none ready.
|
87
|
-
"""
|
88
|
-
rs = [] # return a list of runnable steps
|
89
|
-
|
90
|
-
for s in steps:
|
91
|
-
if (s.getStatus() == cts.STATUS_NEW and
|
92
|
-
all(steps[i-1].isFinished() for i in s._prerequisites)):
|
93
|
-
|
94
|
-
if self._isStepRunnable(s):
|
95
|
-
rs.append(s)
|
96
|
-
if len(rs) == n:
|
97
|
-
break
|
98
|
-
return rs
|
99
|
-
def _isStepRunnable(self, step):
|
100
|
-
""" Should be implemented by inherited classes to test extra conditions """
|
101
|
-
return True
|
102
|
-
|
103
|
-
def _arePending(self, steps):
|
104
|
-
""" Return True if there are pending steps (either running or waiting)
|
105
|
-
that can be done and thus enable other steps to be executed.
|
106
|
-
"""
|
107
|
-
return any(s.isRunning() or s.isWaiting() for s in steps)
|
108
|
-
|
109
|
-
def runSteps(self, steps,
|
110
|
-
stepStartedCallback,
|
111
|
-
stepFinishedCallback,
|
112
|
-
stepsCheckCallback,
|
113
|
-
stepsCheckSecs=3):
|
114
|
-
# Even if this will run the steps in a single thread
|
115
|
-
# let's follow a similar approach than the parallel one
|
116
|
-
# In this way we can take into account the steps graph
|
117
|
-
# dependency and also the case when using streaming
|
118
|
-
|
119
|
-
delta = datetime.timedelta(seconds=stepsCheckSecs)
|
120
|
-
lastCheck = datetime.datetime.now()
|
121
|
-
|
122
|
-
while True:
|
123
|
-
# Get a step to run, if there is any
|
124
|
-
runnableSteps = self._getRunnable(steps)
|
125
|
-
|
126
|
-
if runnableSteps:
|
127
|
-
step = runnableSteps[0]
|
128
|
-
# We found a step to work in, so let's start a new
|
129
|
-
# thread to do the job and book it.
|
130
|
-
step.setRunning()
|
131
|
-
stepStartedCallback(step)
|
132
|
-
step.run()
|
133
|
-
doContinue = stepFinishedCallback(step)
|
134
|
-
|
135
|
-
if not doContinue:
|
136
|
-
break
|
137
|
-
|
138
|
-
elif self._arePending(steps):
|
139
|
-
# We have not found any runnable step, but still there
|
140
|
-
# there are some running or waiting for dependencies
|
141
|
-
# So, let's wait a bit to check if something changes
|
142
|
-
time.sleep(0.5)
|
143
|
-
else:
|
144
|
-
# No steps to run, neither running or waiting
|
145
|
-
# So, we are done, either failed or finished :)
|
146
|
-
break
|
147
|
-
|
148
|
-
now = datetime.datetime.now()
|
149
|
-
if now - lastCheck > delta:
|
150
|
-
stepsCheckCallback()
|
151
|
-
lastCheck = now
|
152
|
-
|
153
|
-
stepsCheckCallback() # one last check to finalize stuff
|
154
|
-
|
155
|
-
|
156
|
-
class StepThread(threading.Thread):
|
157
|
-
""" Thread to run Steps in parallel. """
|
158
|
-
def __init__(self, step, lock):
|
159
|
-
threading.Thread.__init__(self)
|
160
|
-
self.thId = step.getObjId()
|
161
|
-
self.step = step
|
162
|
-
self.lock = lock
|
163
|
-
|
164
|
-
def run(self):
|
165
|
-
error = None
|
166
|
-
try:
|
167
|
-
self.step._run() # not self.step.run() , to avoid race conditions
|
168
|
-
except Exception as e:
|
169
|
-
error = str(e)
|
170
|
-
logger.error("Couldn't run the code in a thread." , exc_info=e)
|
171
|
-
finally:
|
172
|
-
with self.lock:
|
173
|
-
if error is None:
|
174
|
-
self.step.setFinished()
|
175
|
-
else:
|
176
|
-
self.step.setFailed(error)
|
177
|
-
|
178
|
-
|
179
|
-
class ThreadStepExecutor(StepExecutor):
|
180
|
-
""" Run steps in parallel using threads. """
|
181
|
-
def __init__(self, hostConfig, nThreads, **kwargs):
|
182
|
-
StepExecutor.__init__(self, hostConfig, **kwargs)
|
183
|
-
self.numberOfProcs = nThreads
|
184
|
-
# If the gpuList was specified, we need to distribute GPUs among
|
185
|
-
# all the threads
|
186
|
-
self.gpuDict = {}
|
187
|
-
|
188
|
-
self._assignGPUperNode()
|
189
|
-
|
190
|
-
def _assignGPUperNode(self):
|
191
|
-
# If we have GPUs
|
192
|
-
if self.gpuList:
|
193
|
-
|
194
|
-
nThreads = self.numberOfProcs
|
195
|
-
|
196
|
-
# Nodes: each concurrent steps
|
197
|
-
nodes = range(1, nThreads+1)
|
198
|
-
|
199
|
-
# Number of GPUs
|
200
|
-
nGpu = len(self.gpuList)
|
201
|
-
|
202
|
-
# If more GPUs than threads
|
203
|
-
if nGpu > nThreads:
|
204
|
-
|
205
|
-
# Get the ratio: 2 GPUs per thread? 3 GPUs per thread?
|
206
|
-
# 3 GPU and 2 threads is rounded to 1 (flooring)
|
207
|
-
step = int(nGpu / nThreads)
|
208
|
-
spare = nGpu % nThreads
|
209
|
-
fromPos = 0
|
210
|
-
# For each node(concurrent thread)
|
211
|
-
for node in nodes:
|
212
|
-
# Store the GPUS per thread:
|
213
|
-
# GPUs: 0 1 2
|
214
|
-
# Threads 2 (step 1)
|
215
|
-
# Node 0 : GPU 0 1
|
216
|
-
# Node 1 : GPU 2
|
217
|
-
|
218
|
-
extraGpu = 1 if spare>0 else 0
|
219
|
-
toPos = fromPos + step +extraGpu
|
220
|
-
gpusForNode = list(self.gpuList[fromPos:toPos])
|
221
|
-
|
222
|
-
newGpusForNode = self.cleanVoidGPUs(gpusForNode)
|
223
|
-
if len(newGpusForNode) == 0:
|
224
|
-
logger.info("Gpu slot cancelled: all were null Gpus -> %s" % gpusForNode)
|
225
|
-
else:
|
226
|
-
logger.info("GPUs %s assigned to node %s" % (newGpusForNode, node))
|
227
|
-
self.gpuDict[-node] = newGpusForNode
|
228
|
-
|
229
|
-
fromPos = toPos
|
230
|
-
spare-=1
|
231
|
-
|
232
|
-
else:
|
233
|
-
# Expand gpuList repeating until reach nThreads items
|
234
|
-
if nThreads > nGpu:
|
235
|
-
logger.warning("GPUs are no longer extended. If you want all GPUs to match threads repeat as many "
|
236
|
-
"GPUs as threads.")
|
237
|
-
# newList = self.gpuList * (int(nThreads / nGpu) + 1)
|
238
|
-
# self.gpuList = newList[:nThreads]
|
239
|
-
|
240
|
-
for index, gpu in enumerate(self.gpuList):
|
241
|
-
|
242
|
-
if gpu == cts.VOID_GPU:
|
243
|
-
logger.info("Void GPU (%s) found in the list. Skipping the slot." % cts.VOID_GPU)
|
244
|
-
else:
|
245
|
-
logger.info("GPU slot for gpu %s." % gpu)
|
246
|
-
# Any negative number in the key means a free gpu slot. can't be 0!
|
247
|
-
self.gpuDict[-index-1] = [gpu]
|
248
|
-
|
249
|
-
def cleanVoidGPUs(self, gpuList):
|
250
|
-
newGPUList=[]
|
251
|
-
for gpuid in gpuList:
|
252
|
-
if gpuid == cts.VOID_GPU:
|
253
|
-
logger.info("Void GPU detected in %s" % gpuList)
|
254
|
-
else:
|
255
|
-
newGPUList.append(gpuid)
|
256
|
-
return newGPUList
|
257
|
-
|
258
|
-
def getGpuList(self):
|
259
|
-
""" Return the GPU list assigned to current thread
|
260
|
-
or empty list if not using GPUs. """
|
261
|
-
|
262
|
-
# If the node id has assigned gpus?
|
263
|
-
nodeId = threading.current_thread().thId
|
264
|
-
if nodeId in self.gpuDict:
|
265
|
-
gpus = self.gpuDict.get(nodeId)
|
266
|
-
logger.info("Reusing GPUs (%s) slot for %s" % (gpus, nodeId))
|
267
|
-
return gpus
|
268
|
-
else:
|
269
|
-
|
270
|
-
gpus = self.getFreeGpuSlot(nodeId)
|
271
|
-
if gpus is None:
|
272
|
-
logger.warning("Step on node %s is requesting GPUs but there isn't any available. Review configuration of threads/GPUs. Returning an empty list." % nodeId)
|
273
|
-
return []
|
274
|
-
else:
|
275
|
-
return gpus
|
276
|
-
def getFreeGpuSlot(self, stepId=None):
|
277
|
-
""" Returns a free gpu slot available or None. If node is passed it also reserves it for that node
|
278
|
-
|
279
|
-
:param node: node to make the reserve of Gpus
|
280
|
-
"""
|
281
|
-
for node in self.gpuDict.keys():
|
282
|
-
# This is a free node. Book it
|
283
|
-
if node < 0:
|
284
|
-
gpus = self.gpuDict[node]
|
285
|
-
|
286
|
-
if stepId is not None:
|
287
|
-
self.gpuDict.pop(node)
|
288
|
-
self.gpuDict[stepId] = gpus
|
289
|
-
logger.info("GPUs %s assigned to step %s" % (gpus, stepId))
|
290
|
-
else:
|
291
|
-
logger.info("Free gpu slot found at %s" % node)
|
292
|
-
return gpus
|
293
|
-
|
294
|
-
return None
|
295
|
-
def freeGpusSlot(self, node):
|
296
|
-
gpus = self.gpuDict.get(node, None)
|
297
|
-
|
298
|
-
# Some nodes/threads do not use gpus so may not be booked and not in the dictionary
|
299
|
-
if gpus is not None:
|
300
|
-
self.gpuDict.pop(node)
|
301
|
-
self.gpuDict[-node] = gpus
|
302
|
-
logger.info("GPUs %s freed from step %s" % (gpus, node))
|
303
|
-
else:
|
304
|
-
logger.debug("step id %s not found in GPU slots" % node)
|
305
|
-
|
306
|
-
def _isStepRunnable(self, step):
|
307
|
-
""" Overwrite this method to check GPUs availability"""
|
308
|
-
|
309
|
-
if self.gpuList and step.needsGPU() and self.getFreeGpuSlot(step.getObjId()) is None:
|
310
|
-
logger.info("Can't run step %s. Needs gpus and there are no free gpu slots" % step)
|
311
|
-
return False
|
312
|
-
|
313
|
-
return True
|
314
|
-
|
315
|
-
def runSteps(self, steps,
|
316
|
-
stepStartedCallback,
|
317
|
-
stepFinishedCallback,
|
318
|
-
stepsCheckCallback,
|
319
|
-
stepsCheckSecs=5):
|
320
|
-
"""
|
321
|
-
Creates threads and synchronize the steps execution.
|
322
|
-
|
323
|
-
:param steps: list of steps to run
|
324
|
-
:param stepStartedCallback: callback to be called before starting any step
|
325
|
-
:param stepFinishedCallback: callback to be run after all steps are done
|
326
|
-
:param stepsCheckCallback: callback to check if there are new steps to add (streaming)
|
327
|
-
:param stepsCheckSecs: seconds between stepsCheckCallback calls
|
328
|
-
|
329
|
-
"""
|
330
|
-
|
331
|
-
delta = datetime.timedelta(seconds=stepsCheckSecs)
|
332
|
-
lastCheck = datetime.datetime.now()
|
333
|
-
|
334
|
-
sharedLock = threading.Lock()
|
335
|
-
|
336
|
-
runningSteps = {} # currently running step in each node ({node: step})
|
337
|
-
freeNodes = list(range(1, self.numberOfProcs+1)) # available nodes to send jobs
|
338
|
-
logger.info("Execution threads: %s" % freeNodes)
|
339
|
-
logger.info("Running steps using %s threads. 1 thread is used for this main proccess." % self.numberOfProcs)
|
340
|
-
|
341
|
-
while True:
|
342
|
-
# See which of the runningSteps are not really running anymore.
|
343
|
-
# Update them and freeNodes, and call final callback for step.
|
344
|
-
with sharedLock:
|
345
|
-
nodesFinished = [node for node, step in runningSteps.items()
|
346
|
-
if not step.isRunning()]
|
347
|
-
doContinue = True
|
348
|
-
for node in nodesFinished:
|
349
|
-
step = runningSteps.pop(node) # remove entry from runningSteps
|
350
|
-
freeNodes.append(node) # the node is available now
|
351
|
-
self.freeGpusSlot(step.getObjId())
|
352
|
-
# Notify steps termination and check if we should continue
|
353
|
-
doContinue = stepFinishedCallback(step)
|
354
|
-
if not doContinue:
|
355
|
-
break
|
356
|
-
|
357
|
-
if not doContinue:
|
358
|
-
break
|
359
|
-
|
360
|
-
anyLaunched = False
|
361
|
-
# If there are available nodes, send next runnable step.
|
362
|
-
with sharedLock:
|
363
|
-
if freeNodes:
|
364
|
-
runnableSteps = self._getRunnable(steps, len(freeNodes))
|
365
|
-
|
366
|
-
for step in runnableSteps:
|
367
|
-
# We found a step to work in, so let's start a new
|
368
|
-
# thread to do the job and book it.
|
369
|
-
anyLaunched = True
|
370
|
-
step.setRunning()
|
371
|
-
stepStartedCallback(step)
|
372
|
-
node = freeNodes.pop(0) # take an available node
|
373
|
-
runningSteps[node] = step
|
374
|
-
logger.debug("Running step %s on node %s" % (step, node))
|
375
|
-
t = StepThread(step, sharedLock)
|
376
|
-
# won't keep process up if main thread ends
|
377
|
-
t.daemon = True
|
378
|
-
t.start()
|
379
|
-
anyPending = self._arePending(steps)
|
380
|
-
|
381
|
-
if not anyLaunched:
|
382
|
-
if anyPending: # nothing running
|
383
|
-
time.sleep(3)
|
384
|
-
else:
|
385
|
-
break # yeah, we are done, either failed or finished :)
|
386
|
-
|
387
|
-
now = datetime.datetime.now()
|
388
|
-
if now - lastCheck > delta:
|
389
|
-
stepsCheckCallback()
|
390
|
-
lastCheck = now
|
391
|
-
|
392
|
-
stepsCheckCallback()
|
393
|
-
|
394
|
-
# Wait for all threads now.
|
395
|
-
for t in threading.enumerate():
|
396
|
-
if t is not threading.current_thread():
|
397
|
-
t.join()
|
398
|
-
|
399
|
-
|
400
|
-
class QueueStepExecutor(ThreadStepExecutor):
|
401
|
-
def __init__(self, hostConfig, submitDict, nThreads, **kwargs):
|
402
|
-
ThreadStepExecutor.__init__(self, hostConfig, nThreads, **kwargs)
|
403
|
-
self.submitDict = submitDict
|
404
|
-
# Command counter per thread
|
405
|
-
self.threadCommands = {}
|
406
|
-
|
407
|
-
if nThreads > 1:
|
408
|
-
self.runJobs = ThreadStepExecutor.runSteps
|
409
|
-
else:
|
410
|
-
self.runJobs = StepExecutor.runSteps
|
411
|
-
|
412
|
-
self.renameGpuIds()
|
413
|
-
|
414
|
-
def renameGpuIds(self):
|
415
|
-
""" Reorganize the gpus ids starting from 0 since the queue engine is the one assigning them.
|
416
|
-
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars """
|
417
|
-
for threadId, gpuList in self.gpuDict.items():
|
418
|
-
for i in range(len(gpuList)):
|
419
|
-
self.gpuDict[threadId][i] = i
|
420
|
-
|
421
|
-
logger.debug("Updated gpus ids rebase starting from 0: %s per thread" %self.gpuDict)
|
422
|
-
|
423
|
-
def getThreadJobId(self, stepId):
|
424
|
-
""" Returns the job id extension assigned to each thread/step """
|
425
|
-
if not stepId in self.threadCommands:
|
426
|
-
self.threadCommands[stepId] = 0
|
427
|
-
|
428
|
-
self.threadCommands[stepId] += 1
|
429
|
-
|
430
|
-
return self.threadCommands[stepId]
|
431
|
-
|
432
|
-
def runJob(self, log, programName, params, numberOfMpi=1, numberOfThreads=1, env=None, cwd=None, executable=None):
|
433
|
-
threadId = threading.current_thread().thId
|
434
|
-
submitDict = dict(self.hostConfig.getQueuesDefault())
|
435
|
-
submitDict.update(self.submitDict)
|
436
|
-
threadJobId = self.getThreadJobId(threadId)
|
437
|
-
subthreadId = '-%s-%s' % (threadId, threadJobId)
|
438
|
-
submitDict['JOB_NAME'] = submitDict['JOB_NAME'] + subthreadId
|
439
|
-
submitDict['JOB_SCRIPT'] = os.path.abspath(removeExt(submitDict['JOB_SCRIPT']) + subthreadId + ".job")
|
440
|
-
submitDict['JOB_LOGS'] = os.path.join(getParentFolder(submitDict['JOB_SCRIPT']), submitDict['JOB_NAME'])
|
441
|
-
|
442
|
-
logger.debug("Variables available for replacement in submission command are: %s" % submitDict)
|
443
|
-
|
444
|
-
submitDict['JOB_COMMAND'] = process.buildRunCommand(programName, params, numberOfMpi,
|
445
|
-
self.hostConfig, env,
|
446
|
-
gpuList=self._getGPUListForCommand(programName, params),
|
447
|
-
context=submitDict)
|
448
|
-
|
449
|
-
|
450
|
-
jobid = _submit(self.hostConfig, submitDict, cwd, env)
|
451
|
-
self.protocol.appendJobId(jobid) # append active jobs
|
452
|
-
self.protocol._store(self.protocol._jobId)
|
453
|
-
|
454
|
-
if (jobid is None) or (jobid == UNKNOWN_JOBID):
|
455
|
-
logger.info("jobId is none therefore we set it to fail")
|
456
|
-
raise Exception("Failed to submit to queue.")
|
457
|
-
|
458
|
-
status = cts.STATUS_RUNNING
|
459
|
-
wait = 3
|
460
|
-
|
461
|
-
# Check status while job running
|
462
|
-
# REVIEW this to minimize the overhead in time put by this delay check
|
463
|
-
while _checkJobStatus(self.hostConfig, jobid) == cts.STATUS_RUNNING:
|
464
|
-
time.sleep(wait)
|
465
|
-
if wait < 300:
|
466
|
-
wait += 3
|
467
|
-
|
468
|
-
self.protocol.removeJobId(jobid) # After completion, remove inactive jobs.
|
469
|
-
self.protocol._store(self.protocol._jobId)
|
470
|
-
|
471
|
-
return status
|