scipion-pyworkflow 3.10.5__py3-none-any.whl → 3.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. pyworkflow/config.py +131 -67
  2. pyworkflow/constants.py +12 -2
  3. pyworkflow/object.py +3 -2
  4. pyworkflow/plugin.py +93 -44
  5. pyworkflow/project/scripts/fix_links.py +4 -1
  6. pyworkflow/resources/showj/arrowDown.png +0 -0
  7. pyworkflow/resources/showj/arrowUp.png +0 -0
  8. pyworkflow/resources/showj/background_section.png +0 -0
  9. pyworkflow/resources/showj/colRowModeOff.png +0 -0
  10. pyworkflow/resources/showj/colRowModeOn.png +0 -0
  11. pyworkflow/resources/showj/delete.png +0 -0
  12. pyworkflow/resources/showj/doc_icon.png +0 -0
  13. pyworkflow/resources/showj/download_icon.png +0 -0
  14. pyworkflow/resources/showj/enabled_gallery.png +0 -0
  15. pyworkflow/resources/showj/galleryViewOff.png +0 -0
  16. pyworkflow/resources/showj/galleryViewOn.png +0 -0
  17. pyworkflow/resources/showj/goto.png +0 -0
  18. pyworkflow/resources/showj/menu.png +0 -0
  19. pyworkflow/resources/showj/separator.png +0 -0
  20. pyworkflow/resources/showj/tableViewOff.png +0 -0
  21. pyworkflow/resources/showj/tableViewOn.png +0 -0
  22. pyworkflow/resources/showj/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
  23. pyworkflow/resources/showj/ui-bg_glass_95_fef1ec_1x400.png +0 -0
  24. pyworkflow/resources/showj/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
  25. pyworkflow/resources/showj/volumeOff.png +0 -0
  26. pyworkflow/resources/showj/volumeOn.png +0 -0
  27. pyworkflow/viewer.py +23 -1
  28. pyworkflowtests/objects.py +2 -2
  29. pyworkflowtests/protocols.py +1 -3
  30. {scipion_pyworkflow-3.10.5.dist-info → scipion_pyworkflow-3.11.0.dist-info}/METADATA +21 -25
  31. scipion_pyworkflow-3.11.0.dist-info/RECORD +71 -0
  32. {scipion_pyworkflow-3.10.5.dist-info → scipion_pyworkflow-3.11.0.dist-info}/WHEEL +1 -1
  33. scipion_pyworkflow-3.11.0.dist-info/entry_points.txt +2 -0
  34. pyworkflow/apps/__init__.py +0 -29
  35. pyworkflow/apps/pw_manager.py +0 -37
  36. pyworkflow/apps/pw_plot.py +0 -51
  37. pyworkflow/apps/pw_project.py +0 -113
  38. pyworkflow/apps/pw_protocol_list.py +0 -143
  39. pyworkflow/apps/pw_protocol_run.py +0 -51
  40. pyworkflow/apps/pw_run_tests.py +0 -267
  41. pyworkflow/apps/pw_schedule_run.py +0 -322
  42. pyworkflow/apps/pw_sleep.py +0 -37
  43. pyworkflow/apps/pw_sync_data.py +0 -439
  44. pyworkflow/apps/pw_viewer.py +0 -78
  45. pyworkflow/gui/__init__.py +0 -36
  46. pyworkflow/gui/browser.py +0 -726
  47. pyworkflow/gui/canvas.py +0 -1190
  48. pyworkflow/gui/dialog.py +0 -977
  49. pyworkflow/gui/form.py +0 -2637
  50. pyworkflow/gui/graph.py +0 -247
  51. pyworkflow/gui/graph_layout.py +0 -271
  52. pyworkflow/gui/gui.py +0 -566
  53. pyworkflow/gui/matplotlib_image.py +0 -233
  54. pyworkflow/gui/plotter.py +0 -247
  55. pyworkflow/gui/project/__init__.py +0 -25
  56. pyworkflow/gui/project/base.py +0 -192
  57. pyworkflow/gui/project/constants.py +0 -139
  58. pyworkflow/gui/project/labels.py +0 -205
  59. pyworkflow/gui/project/project.py +0 -492
  60. pyworkflow/gui/project/searchprotocol.py +0 -154
  61. pyworkflow/gui/project/searchrun.py +0 -181
  62. pyworkflow/gui/project/steps.py +0 -171
  63. pyworkflow/gui/project/utils.py +0 -332
  64. pyworkflow/gui/project/variables.py +0 -179
  65. pyworkflow/gui/project/viewdata.py +0 -472
  66. pyworkflow/gui/project/viewprojects.py +0 -510
  67. pyworkflow/gui/project/viewprotocols.py +0 -2093
  68. pyworkflow/gui/project/viewprotocols_extra.py +0 -560
  69. pyworkflow/gui/text.py +0 -771
  70. pyworkflow/gui/tooltip.py +0 -185
  71. pyworkflow/gui/tree.py +0 -684
  72. pyworkflow/gui/widgets.py +0 -307
  73. pyworkflow/mapper/__init__.py +0 -26
  74. pyworkflow/mapper/mapper.py +0 -222
  75. pyworkflow/mapper/sqlite.py +0 -1578
  76. pyworkflow/mapper/sqlite_db.py +0 -145
  77. pyworkflow/project/__init__.py +0 -31
  78. pyworkflow/project/config.py +0 -454
  79. pyworkflow/project/manager.py +0 -180
  80. pyworkflow/project/project.py +0 -2010
  81. pyworkflow/protocol/__init__.py +0 -38
  82. pyworkflow/protocol/bibtex.py +0 -48
  83. pyworkflow/protocol/constants.py +0 -87
  84. pyworkflow/protocol/executor.py +0 -455
  85. pyworkflow/protocol/hosts.py +0 -313
  86. pyworkflow/protocol/launch.py +0 -270
  87. pyworkflow/protocol/package.py +0 -42
  88. pyworkflow/protocol/params.py +0 -741
  89. pyworkflow/protocol/protocol.py +0 -2582
  90. pyworkflow/tests/__init__.py +0 -29
  91. pyworkflow/tests/test_utils.py +0 -25
  92. pyworkflow/tests/tests.py +0 -341
  93. pyworkflow/utils/__init__.py +0 -38
  94. pyworkflow/utils/dataset.py +0 -414
  95. pyworkflow/utils/echo.py +0 -104
  96. pyworkflow/utils/graph.py +0 -169
  97. pyworkflow/utils/log.py +0 -284
  98. pyworkflow/utils/path.py +0 -528
  99. pyworkflow/utils/process.py +0 -132
  100. pyworkflow/utils/profiler.py +0 -92
  101. pyworkflow/utils/progressbar.py +0 -154
  102. pyworkflow/utils/properties.py +0 -631
  103. pyworkflow/utils/reflection.py +0 -129
  104. pyworkflow/utils/utils.py +0 -879
  105. pyworkflow/utils/which.py +0 -229
  106. pyworkflow/webservices/__init__.py +0 -8
  107. pyworkflow/webservices/config.py +0 -11
  108. pyworkflow/webservices/notifier.py +0 -162
  109. pyworkflow/webservices/repository.py +0 -59
  110. pyworkflow/webservices/workflowhub.py +0 -74
  111. pyworkflowtests/tests/__init__.py +0 -0
  112. pyworkflowtests/tests/test_canvas.py +0 -72
  113. pyworkflowtests/tests/test_domain.py +0 -45
  114. pyworkflowtests/tests/test_logs.py +0 -74
  115. pyworkflowtests/tests/test_mappers.py +0 -392
  116. pyworkflowtests/tests/test_object.py +0 -507
  117. pyworkflowtests/tests/test_project.py +0 -42
  118. pyworkflowtests/tests/test_protocol_execution.py +0 -135
  119. pyworkflowtests/tests/test_protocol_export.py +0 -78
  120. pyworkflowtests/tests/test_protocol_output.py +0 -158
  121. pyworkflowtests/tests/test_streaming.py +0 -47
  122. pyworkflowtests/tests/test_utils.py +0 -210
  123. scipion_pyworkflow-3.10.5.dist-info/RECORD +0 -140
  124. scipion_pyworkflow-3.10.5.dist-info/dependency_links.txt +0 -1
  125. scipion_pyworkflow-3.10.5.dist-info/entry_points.txt +0 -5
  126. {scipion_pyworkflow-3.10.5.dist-info → scipion_pyworkflow-3.11.0.dist-info/licenses}/LICENSE.txt +0 -0
  127. {scipion_pyworkflow-3.10.5.dist-info → scipion_pyworkflow-3.11.0.dist-info}/top_level.txt +0 -0
@@ -1,38 +0,0 @@
1
- # **************************************************************************
2
- # *
3
- # * Authors: J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es)
4
- # *
5
- # * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6
- # *
7
- # * This program is free software; you can redistribute it and/or modify
8
- # * it under the terms of the GNU General Public License as published by
9
- # * the Free Software Foundation; either version 3 of the License, or
10
- # * (at your option) any later version.
11
- # *
12
- # * This program is distributed in the hope that it will be useful,
13
- # * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # * GNU General Public License for more details.
16
- # *
17
- # * You should have received a copy of the GNU General Public License
18
- # * along with this program; if not, write to the Free Software
19
- # * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20
- # * 02111-1307 USA
21
- # *
22
- # * All comments concerning this program package may be sent to the
23
- # * e-mail address 'scipion@cnb.csic.es'
24
- # *
25
- # **************************************************************************
26
- """
27
- This modules contains classes required for the workflow
28
- execution and tracking like: Step and Protocol
29
- """
30
-
31
- from .protocol import *
32
- from .executor import *
33
- from .constants import *
34
- from .params import *
35
-
36
- from .launch import *
37
-
38
- from .hosts import HostConfig
@@ -1,48 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # **************************************************************************
3
- # *
4
- # * Authors: Yaiza Rancel (cyrancel@cnb.csic.es)
5
- # *
6
- # * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
7
- # *
8
- # * This program is free software; you can redistribute it and/or modify
9
- # * it under the terms of the GNU General Public License as published by
10
- # * the Free Software Foundation; either version 3 of the License, or
11
- # * (at your option) any later version.
12
- # *
13
- # * This program is distributed in the hope that it will be useful,
14
- # * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
- # * GNU General Public License for more details.
17
- # *
18
- # * You should have received a copy of the GNU General Public License
19
- # * along with this program; if not, write to the Free Software
20
- # * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21
- # * 02111-1307 USA
22
- # *
23
- # * All comments concerning this program package may be sent to the
24
- # * e-mail address 'scipion@cnb.csic.es'
25
- # *
26
- # **************************************************************************
27
- """
28
- @article{delaRosaTrevin201693,
29
- title = "Scipion: A software framework toward integration, reproducibility and validation in 3D electron microscopy ",
30
- journal = "Journal of Structural Biology",
31
- volume = "195",
32
- number = "1",
33
- pages = "93 - 99",
34
- year = "2016",
35
- note = "",
36
- issn = "1047-8477",
37
- doi = "http://doi.org/10.1016/j.jsb.2016.04.010",
38
- url = "http://www.sciencedirect.com/science/article/pii/S104784771630079X",
39
- author = "J.M. de la Rosa-Trevín and A. Quintana and L. del Cano and A. Zaldívar and I. Foche and J. Gutiérrez and J. Gómez-Blanco and J. Burguet-Castell and J. Cuenca-Alba and V. Abrishami and J. Vargas and J. Otón and G. Sharov and J.L. Vilas and J. Navas and P. Conesa and M. Kazemi and R. Marabini and C.O.S. Sorzano and J.M. Carazo",
40
- keywords = "Electron microscopy",
41
- keywords = "Single particle analysis",
42
- keywords = "Image processing",
43
- keywords = "Software package",
44
- keywords = "Workflows",
45
- keywords = "Reproducibility "
46
- }
47
- """
48
-
@@ -1,87 +0,0 @@
1
- # **************************************************************************
2
- # *
3
- # * Authors: J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es)
4
- # *
5
- # * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6
- # *
7
- # * This program is free software; you can redistribute it and/or modify
8
- # * it under the terms of the GNU General Public License as published by
9
- # * the Free Software Foundation; either version 3 of the License, or
10
- # * (at your option) any later version.
11
- # *
12
- # * This program is distributed in the hope that it will be useful,
13
- # * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # * GNU General Public License for more details.
16
- # *
17
- # * You should have received a copy of the GNU General Public License
18
- # * along with this program; if not, write to the Free Software
19
- # * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20
- # * 02111-1307 USA
21
- # *
22
- # * All comments concerning this program package may be sent to the
23
- # * e-mail address 'scipion@cnb.csic.es'
24
- # *
25
- # **************************************************************************
26
- """
27
- This modules contains classes required for the workflow
28
- execution and tracking like: Step and Protocol
29
- """
30
-
31
- # ------------------ Constants values -----------------------------------------
32
-
33
- # Possible status of a protocol run, used mainly to monitor progress
34
-
35
- STATUS_SAVED = "saved" # Parameters saved for later use
36
- STATUS_LAUNCHED = "launched" # launched to queue system, only useful for protocols
37
- STATUS_NEW = "new"
38
- STATUS_RUNNING = "running" # currently executing
39
- STATUS_FAILED = "failed" # it run and failed
40
- STATUS_FINISHED = "finished" # successfully finished
41
- STATUS_ABORTED = "aborted"
42
- STATUS_INTERACTIVE = "interactive" # waiting for user interaction
43
- STATUS_WAITING = "waiting" # following status is used for streaming only
44
- # Allow to schedule jobs not ready for running yet
45
- STATUS_SCHEDULED = "scheduled"
46
-
47
- ACTIVE_STATUS = [STATUS_LAUNCHED, STATUS_RUNNING, STATUS_INTERACTIVE,
48
- STATUS_SCHEDULED]
49
-
50
- # Execution modes
51
- MODE_RESUME = 0 # Try to starting at the first changed step, skipping unchanged ones
52
- MODE_RESTART = 1 # Restart the protocol from the beginning, deleting all previous results
53
- MODE_CONTINUE = 2 # Continue from specific step, not widely used //DEPRECATED.
54
- # JMRT: We now use 'Continue' label instead of 'Resume' which is more intuitive for users.
55
- MODE_CHOICES = ('Continue', 'Restart') # , 'Continue')
56
-
57
- # Initial sleeping time (in seconds) in order to launch a scheduled protocol.
58
- # This value is multiply to the protocol level into the workflow.
59
- INITIAL_SLEEP_TIME = 30
60
-
61
- # Maximum time (in seconds) waiting for a scheduled protocol to check if it
62
- # can be launched.
63
- MAX_SLEEP_TIME = 120
64
-
65
-
66
- # Steps execution mode
67
- STEPS_SERIAL = 0 # Execute steps serially, some of the steps can be mpi programs
68
- STEPS_PARALLEL = 1 # Execute steps in parallel through threads
69
-
70
- # Level of expertise for the input parameters, mainly used in the protocol form
71
- LEVEL_NORMAL = 0
72
- LEVEL_ADVANCED = 1
73
- LEVEL_CHOICES = ('Normal', 'Advanced')
74
-
75
- # Param names for GPU processing
76
- USE_GPU = 'useGpu'
77
- GPU_LIST = 'gpuList'
78
- VOID_GPU = 99
79
-
80
- # Job management
81
- UNKNOWN_JOBID = -1
82
-
83
- # File sizes
84
- SIZE_1KB = 1024
85
- SIZE_1MB = SIZE_1KB * SIZE_1KB
86
- SIZE_1GB = SIZE_1MB * SIZE_1KB
87
- SIZE_1TB = SIZE_1GB * SIZE_1KB
@@ -1,455 +0,0 @@
1
- # **************************************************************************
2
- # *
3
- # * Authors: J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es)
4
- # *
5
- # * Unidad de Bioinformatica of Centro Nacional de Biotecnologia, CSIC
6
- # *
7
- # * This program is free software; you can redistribute it and/or modify
8
- # * it under the terms of the GNU General Public License as published by
9
- # * the Free Software Foundation; either version 3 of the License, or
10
- # * (at your option) any later version.
11
- # *
12
- # * This program is distributed in the hope that it will be useful,
13
- # * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # * GNU General Public License for more details.
16
- # *
17
- # * You should have received a copy of the GNU General Public License
18
- # * along with this program; if not, write to the Free Software
19
- # * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20
- # * 02111-1307 USA
21
- # *
22
- # * All comments concerning this program package may be sent to the
23
- # * e-mail address 'scipion@cnb.csic.es'
24
- # *
25
- # **************************************************************************
26
- """
27
- This module have the classes for execution of protocol steps.
28
- The basic one will run steps, one by one, after completion.
29
- There is one based on threads to execute steps in parallel
30
- using different threads and the last one with MPI processes.
31
- """
32
-
33
- import logging
34
- logger = logging.getLogger(__name__)
35
- import time
36
- import datetime
37
- import threading
38
- import os
39
-
40
- import pyworkflow.utils.process as process
41
- from pyworkflow.utils.path import getParentFolder, removeExt
42
- from . import constants as cts
43
-
44
- from .launch import _submit, UNKNOWN_JOBID, _checkJobStatus
45
-
46
-
47
- class StepExecutor:
48
- """ Run a list of Protocol steps. """
49
- def __init__(self, hostConfig, **kwargs):
50
- self.hostConfig = hostConfig
51
- self.gpuList = kwargs.get(cts.GPU_LIST, None)
52
- self.protocol = None
53
-
54
- def getGpuList(self):
55
- """ Return the GPU list assigned to current thread. """
56
- return self.gpuList
57
-
58
- def setProtocol(self, protocol):
59
- """ Set protocol to append active jobs to its jobIds. """
60
- self.protocol = protocol
61
-
62
- def runJob(self, log, programName, params,
63
- numberOfMpi=1, numberOfThreads=1,
64
- env=None, cwd=None, executable=None):
65
- """ This function is a wrapper around runJob,
66
- providing the host configuration.
67
- """
68
- process.runJob(log, programName, params,
69
- numberOfMpi, numberOfThreads,
70
- self.hostConfig,
71
- env=env, cwd=cwd, gpuList=self.getGpuList(), executable=executable)
72
-
73
- def _getRunnable(self, steps, n=1):
74
- """ Return the n steps that are 'new' and all its
75
- dependencies have been finished, or None if none ready.
76
- """
77
- rs = [] # return a list of runnable steps
78
-
79
- for s in steps:
80
- if (s.getStatus() == cts.STATUS_NEW and
81
- all(steps[i-1].isFinished() for i in s._prerequisites)):
82
-
83
- if self._isStepRunnable(s):
84
- rs.append(s)
85
- if len(rs) == n:
86
- break
87
- return rs
88
- def _isStepRunnable(self, step):
89
- """ Should be implemented by inherited classes to test extra conditions """
90
- return True
91
-
92
- def _arePending(self, steps):
93
- """ Return True if there are pending steps (either running or waiting)
94
- that can be done and thus enable other steps to be executed.
95
- """
96
- return any(s.isRunning() or s.isWaiting() for s in steps)
97
-
98
- def runSteps(self, steps,
99
- stepStartedCallback,
100
- stepFinishedCallback,
101
- stepsCheckCallback,
102
- stepsCheckSecs=3):
103
- # Even if this will run the steps in a single thread
104
- # let's follow a similar approach than the parallel one
105
- # In this way we can take into account the steps graph
106
- # dependency and also the case when using streaming
107
-
108
- delta = datetime.timedelta(seconds=stepsCheckSecs)
109
- lastCheck = datetime.datetime.now()
110
-
111
- while True:
112
- # Get a step to run, if there is any
113
- runnableSteps = self._getRunnable(steps)
114
-
115
- if runnableSteps:
116
- step = runnableSteps[0]
117
- # We found a step to work in, so let's start a new
118
- # thread to do the job and book it.
119
- step.setRunning()
120
- stepStartedCallback(step)
121
- step.run()
122
- doContinue = stepFinishedCallback(step)
123
-
124
- if not doContinue:
125
- break
126
-
127
- elif self._arePending(steps):
128
- # We have not found any runnable step, but still there
129
- # there are some running or waiting for dependencies
130
- # So, let's wait a bit to check if something changes
131
- time.sleep(0.5)
132
- else:
133
- # No steps to run, neither running or waiting
134
- # So, we are done, either failed or finished :)
135
- break
136
-
137
- now = datetime.datetime.now()
138
- if now - lastCheck > delta:
139
- stepsCheckCallback()
140
- lastCheck = now
141
-
142
- stepsCheckCallback() # one last check to finalize stuff
143
-
144
-
145
- class StepThread(threading.Thread):
146
- """ Thread to run Steps in parallel. """
147
- def __init__(self, step, lock):
148
- threading.Thread.__init__(self)
149
- self.thId = step.getObjId()
150
- self.step = step
151
- self.lock = lock
152
-
153
- def run(self):
154
- error = None
155
- try:
156
- self.step._run() # not self.step.run() , to avoid race conditions
157
- except Exception as e:
158
- error = str(e)
159
- logger.error("Couldn't run the code in a thread." , exc_info=e)
160
- finally:
161
- with self.lock:
162
- if error is None:
163
- self.step.setFinished()
164
- else:
165
- self.step.setFailed(error)
166
-
167
-
168
- class ThreadStepExecutor(StepExecutor):
169
- """ Run steps in parallel using threads. """
170
- def __init__(self, hostConfig, nThreads, **kwargs):
171
- StepExecutor.__init__(self, hostConfig, **kwargs)
172
- self.numberOfProcs = nThreads
173
- # If the gpuList was specified, we need to distribute GPUs among
174
- # all the threads
175
- self.gpuDict = {}
176
-
177
- self._assignGPUperNode()
178
-
179
- def _assignGPUperNode(self):
180
- # If we have GPUs
181
- if self.gpuList:
182
-
183
- nThreads = self.numberOfProcs
184
-
185
- # Nodes: each concurrent steps
186
- nodes = range(1, nThreads+1)
187
-
188
- # Number of GPUs
189
- nGpu = len(self.gpuList)
190
-
191
- # If more GPUs than threads
192
- if nGpu > nThreads:
193
-
194
- # Get the ratio: 2 GPUs per thread? 3 GPUs per thread?
195
- # 3 GPU and 2 threads is rounded to 1 (flooring)
196
- step = int(nGpu / nThreads)
197
- spare = nGpu % nThreads
198
- fromPos = 0
199
- # For each node(concurrent thread)
200
- for node in nodes:
201
- # Store the GPUS per thread:
202
- # GPUs: 0 1 2
203
- # Threads 2 (step 1)
204
- # Node 0 : GPU 0 1
205
- # Node 1 : GPU 2
206
-
207
- extraGpu = 1 if spare>0 else 0
208
- toPos = fromPos + step +extraGpu
209
- gpusForNode = list(self.gpuList[fromPos:toPos])
210
-
211
- newGpusForNode = self.cleanVoidGPUs(gpusForNode)
212
- if len(newGpusForNode) == 0:
213
- logger.info("Gpu slot cancelled: all were null Gpus -> %s" % gpusForNode)
214
- else:
215
- logger.info("GPUs %s assigned to node %s" % (newGpusForNode, node))
216
- self.gpuDict[-node] = newGpusForNode
217
-
218
- fromPos = toPos
219
- spare-=1
220
-
221
- else:
222
- # Expand gpuList repeating until reach nThreads items
223
- if nThreads > nGpu:
224
- logger.warning("GPUs are no longer extended. If you want all GPUs to match threads repeat as many "
225
- "GPUs as threads.")
226
- # newList = self.gpuList * (int(nThreads / nGpu) + 1)
227
- # self.gpuList = newList[:nThreads]
228
-
229
- for index, gpu in enumerate(self.gpuList):
230
-
231
- if gpu == cts.VOID_GPU:
232
- logger.info("Void GPU (%s) found in the list. Skipping the slot." % cts.VOID_GPU)
233
- else:
234
- logger.info("GPU slot for gpu %s." % gpu)
235
- # Any negative number in the key means a free gpu slot. can't be 0!
236
- self.gpuDict[-index-1] = [gpu]
237
-
238
- def cleanVoidGPUs(self, gpuList):
239
- newGPUList=[]
240
- for gpuid in gpuList:
241
- if gpuid == cts.VOID_GPU:
242
- logger.info("Void GPU detected in %s" % gpuList)
243
- else:
244
- newGPUList.append(gpuid)
245
- return newGPUList
246
-
247
- def getGpuList(self):
248
- """ Return the GPU list assigned to current thread
249
- or empty list if not using GPUs. """
250
-
251
- # If the node id has assigned gpus?
252
- nodeId = threading.current_thread().thId
253
- if nodeId in self.gpuDict:
254
- gpus = self.gpuDict.get(nodeId)
255
- logger.info("Reusing GPUs (%s) slot for %s" % (gpus, nodeId))
256
- return gpus
257
- else:
258
-
259
- gpus = self.getFreeGpuSlot(nodeId)
260
- if gpus is None:
261
- logger.warning("Step on node %s is requesting GPUs but there isn't any available. Review configuration of threads/GPUs. Returning and empty list." % nodeId)
262
- return []
263
- else:
264
- return gpus
265
- def getFreeGpuSlot(self, stepId=None):
266
- """ Returns a free gpu slot available or None. If node is passed it also reserves it for that node
267
-
268
- :param node: node to make the reserve of Gpus
269
- """
270
- for node in self.gpuDict.keys():
271
- # This is a free node. Book it
272
- if node < 0:
273
- gpus = self.gpuDict[node]
274
-
275
- if stepId is not None:
276
- self.gpuDict.pop(node)
277
- self.gpuDict[stepId] = gpus
278
- logger.info("GPUs %s assigned to step %s" % (gpus, stepId))
279
- else:
280
- logger.info("Free gpu slot found at %s" % node)
281
- return gpus
282
-
283
- return None
284
- def freeGpusSlot(self, node):
285
- gpus = self.gpuDict.get(node, None)
286
-
287
- # Some nodes/threads do not use gpus so may not be booked and not in the dictionary
288
- if gpus is not None:
289
- self.gpuDict.pop(node)
290
- self.gpuDict[-node] = gpus
291
- logger.info("GPUs %s freed from step %s" % (gpus, node))
292
- else:
293
- logger.debug("step id %s not found in GPU slots" % node)
294
-
295
- def _isStepRunnable(self, step):
296
- """ Overwrite this method to check GPUs availability"""
297
-
298
- if self.gpuList and step.needsGPU() and self.getFreeGpuSlot(step.getObjId()) is None:
299
- logger.info("Can't run step %s. Needs gpus and there are no free gpu slots" % step)
300
- return False
301
-
302
- return True
303
-
304
- def runSteps(self, steps,
305
- stepStartedCallback,
306
- stepFinishedCallback,
307
- stepsCheckCallback,
308
- stepsCheckSecs=5):
309
- """
310
- Creates threads and synchronize the steps execution.
311
-
312
- :param steps: list of steps to run
313
- :param stepStartedCallback: callback to be called before starting any step
314
- :param stepFinishedCallback: callback to be run after all steps are done
315
- :param stepsCheckCallback: callback to check if there are new steps to add (streaming)
316
- :param stepsCheckSecs: seconds between stepsCheckCallback calls
317
-
318
- """
319
-
320
- delta = datetime.timedelta(seconds=stepsCheckSecs)
321
- lastCheck = datetime.datetime.now()
322
-
323
- sharedLock = threading.Lock()
324
-
325
- runningSteps = {} # currently running step in each node ({node: step})
326
- freeNodes = list(range(1, self.numberOfProcs+1)) # available nodes to send jobs
327
- logger.info("Execution threads: %s" % freeNodes)
328
- logger.info("Running steps using %s threads. 1 thread is used for this main proccess." % self.numberOfProcs)
329
-
330
- while True:
331
- # See which of the runningSteps are not really running anymore.
332
- # Update them and freeNodes, and call final callback for step.
333
- with sharedLock:
334
- nodesFinished = [node for node, step in runningSteps.items()
335
- if not step.isRunning()]
336
- doContinue = True
337
- for node in nodesFinished:
338
- step = runningSteps.pop(node) # remove entry from runningSteps
339
- freeNodes.append(node) # the node is available now
340
- self.freeGpusSlot(step.getObjId())
341
- # Notify steps termination and check if we should continue
342
- doContinue = stepFinishedCallback(step)
343
- if not doContinue:
344
- break
345
-
346
- if not doContinue:
347
- break
348
-
349
- anyLaunched = False
350
- # If there are available nodes, send next runnable step.
351
- with sharedLock:
352
- if freeNodes:
353
- runnableSteps = self._getRunnable(steps, len(freeNodes))
354
-
355
- for step in runnableSteps:
356
- # We found a step to work in, so let's start a new
357
- # thread to do the job and book it.
358
- anyLaunched = True
359
- step.setRunning()
360
- stepStartedCallback(step)
361
- node = freeNodes.pop(0) # take an available node
362
- runningSteps[node] = step
363
- logger.debug("Running step %s on node %s" % (step, node))
364
- t = StepThread(step, sharedLock)
365
- # won't keep process up if main thread ends
366
- t.daemon = True
367
- t.start()
368
- anyPending = self._arePending(steps)
369
-
370
- if not anyLaunched:
371
- if anyPending: # nothing running
372
- time.sleep(3)
373
- else:
374
- break # yeah, we are done, either failed or finished :)
375
-
376
- now = datetime.datetime.now()
377
- if now - lastCheck > delta:
378
- stepsCheckCallback()
379
- lastCheck = now
380
-
381
- stepsCheckCallback()
382
-
383
- # Wait for all threads now.
384
- for t in threading.enumerate():
385
- if t is not threading.current_thread():
386
- t.join()
387
-
388
-
389
- class QueueStepExecutor(ThreadStepExecutor):
390
- def __init__(self, hostConfig, submitDict, nThreads, **kwargs):
391
- ThreadStepExecutor.__init__(self, hostConfig, nThreads, **kwargs)
392
- self.submitDict = submitDict
393
- # Command counter per thread
394
- self.threadCommands = {}
395
-
396
- if nThreads > 1:
397
- self.runJobs = ThreadStepExecutor.runSteps
398
- else:
399
- self.runJobs = StepExecutor.runSteps
400
-
401
- self.renameGpuIds()
402
-
403
- def renameGpuIds(self):
404
- """ Reorganize the gpus ids starting from 0 since the queue engine is the one assigning them.
405
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars """
406
- for threadId, gpuList in self.gpuDict.items():
407
- for i in range(len(gpuList)):
408
- self.gpuDict[threadId][i] = i
409
-
410
- logger.debug("Updated gpus ids rebase starting from 0: %s per thread" %self.gpuDict)
411
-
412
- def getThreadJobId(self, stepId):
413
- """ Returns the job id extension assigned to each thread/step """
414
- if not stepId in self.threadCommands:
415
- self.threadCommands[stepId] = 0
416
-
417
- self.threadCommands[stepId] += 1
418
-
419
- return self.threadCommands[stepId]
420
-
421
- def runJob(self, log, programName, params, numberOfMpi=1, numberOfThreads=1, env=None, cwd=None, executable=None):
422
- threadId = threading.current_thread().thId
423
- submitDict = dict(self.hostConfig.getQueuesDefault())
424
- submitDict.update(self.submitDict)
425
- submitDict['JOB_COMMAND'] = process.buildRunCommand(programName, params, numberOfMpi,
426
- self.hostConfig, env,
427
- gpuList=self.getGpuList())
428
- threadJobId = self.getThreadJobId(threadId)
429
- subthreadId = '-%s-%s' % (threadId, threadJobId)
430
- submitDict['JOB_NAME'] = submitDict['JOB_NAME'] + subthreadId
431
- submitDict['JOB_SCRIPT'] = os.path.abspath(removeExt(submitDict['JOB_SCRIPT']) + subthreadId + ".job")
432
- submitDict['JOB_LOGS'] = os.path.join(getParentFolder(submitDict['JOB_SCRIPT']), submitDict['JOB_NAME'])
433
-
434
- jobid = _submit(self.hostConfig, submitDict, cwd, env)
435
- self.protocol.appendJobId(jobid) # append active jobs
436
- self.protocol._store(self.protocol._jobId)
437
-
438
- if (jobid is None) or (jobid == UNKNOWN_JOBID):
439
- logger.info("jobId is none therefore we set it to fail")
440
- raise Exception("Failed to submit to queue.")
441
-
442
- status = cts.STATUS_RUNNING
443
- wait = 3
444
-
445
- # Check status while job running
446
- # REVIEW this to minimize the overhead in time put by this delay check
447
- while _checkJobStatus(self.hostConfig, jobid) == cts.STATUS_RUNNING:
448
- time.sleep(wait)
449
- if wait < 300:
450
- wait += 3
451
-
452
- self.protocol.removeJobId(jobid) # After completion, remove inactive jobs.
453
- self.protocol._store(self.protocol._jobId)
454
-
455
- return status