siliconcompiler 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- siliconcompiler/__init__.py +2 -0
- siliconcompiler/_metadata.py +1 -1
- siliconcompiler/apps/sc_issue.py +5 -3
- siliconcompiler/apps/sc_remote.py +0 -17
- siliconcompiler/apps/utils/replay.py +5 -5
- siliconcompiler/checklist.py +1 -1
- siliconcompiler/core.py +39 -48
- siliconcompiler/data/templates/replay/replay.sh.j2 +18 -1
- siliconcompiler/dependencyschema.py +392 -0
- siliconcompiler/design.py +664 -0
- siliconcompiler/flowgraph.py +32 -1
- siliconcompiler/metric.py +19 -0
- siliconcompiler/package/__init__.py +383 -223
- siliconcompiler/package/git.py +75 -77
- siliconcompiler/package/github.py +70 -97
- siliconcompiler/package/https.py +77 -93
- siliconcompiler/packageschema.py +260 -0
- siliconcompiler/pdk.py +2 -2
- siliconcompiler/record.py +57 -5
- siliconcompiler/remote/client.py +61 -13
- siliconcompiler/remote/server.py +109 -64
- siliconcompiler/report/dashboard/cli/board.py +1 -2
- siliconcompiler/scheduler/__init__.py +3 -1375
- siliconcompiler/scheduler/docker.py +268 -0
- siliconcompiler/scheduler/run_node.py +20 -19
- siliconcompiler/scheduler/scheduler.py +308 -0
- siliconcompiler/scheduler/schedulernode.py +934 -0
- siliconcompiler/scheduler/slurm.py +147 -163
- siliconcompiler/scheduler/taskscheduler.py +39 -52
- siliconcompiler/schema/__init__.py +3 -3
- siliconcompiler/schema/baseschema.py +256 -11
- siliconcompiler/schema/editableschema.py +4 -0
- siliconcompiler/schema/journal.py +210 -0
- siliconcompiler/schema/namedschema.py +31 -2
- siliconcompiler/schema/parameter.py +14 -1
- siliconcompiler/schema/parametervalue.py +1 -34
- siliconcompiler/schema/schema_cfg.py +211 -350
- siliconcompiler/tool.py +139 -37
- siliconcompiler/tools/_common/__init__.py +14 -11
- siliconcompiler/tools/builtin/concatenate.py +2 -2
- siliconcompiler/tools/builtin/verify.py +1 -2
- siliconcompiler/tools/openroad/scripts/common/procs.tcl +27 -25
- siliconcompiler/tools/slang/__init__.py +3 -2
- siliconcompiler/tools/vpr/route.py +69 -0
- siliconcompiler/tools/yosys/sc_synth_asic.tcl +0 -4
- siliconcompiler/toolscripts/_tools.json +13 -8
- siliconcompiler/toolscripts/ubuntu22/install-klayout.sh +4 -0
- siliconcompiler/toolscripts/ubuntu24/install-klayout.sh +4 -0
- siliconcompiler/utils/__init__.py +2 -23
- siliconcompiler/utils/flowgraph.py +5 -5
- siliconcompiler/utils/logging.py +2 -1
- {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.34.0.dist-info}/METADATA +8 -6
- {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.34.0.dist-info}/RECORD +57 -52
- {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.34.0.dist-info}/WHEEL +1 -1
- siliconcompiler/scheduler/docker_runner.py +0 -254
- siliconcompiler/schema/journalingschema.py +0 -238
- {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.34.0.dist-info}/entry_points.txt +0 -0
- {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.34.0.dist-info}/licenses/LICENSE +0 -0
- {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.34.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import docker
|
|
2
|
+
import os
|
|
3
|
+
import shlex
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from siliconcompiler.package import RemoteResolver
|
|
9
|
+
from siliconcompiler.utils import default_email_credentials_file
|
|
10
|
+
from siliconcompiler.scheduler.schedulernode import SchedulerNode
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_image(chip, step, index):
|
|
14
|
+
from siliconcompiler import __version__
|
|
15
|
+
|
|
16
|
+
queue = chip.get('option', 'scheduler', 'queue', step=step, index=index)
|
|
17
|
+
if queue:
|
|
18
|
+
return queue
|
|
19
|
+
|
|
20
|
+
return os.getenv(
|
|
21
|
+
'SC_DOCKER_IMAGE',
|
|
22
|
+
f'ghcr.io/siliconcompiler/sc_runner:v{__version__}')
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_volumes_directories(chip, cache_dir, workdir, step, index):
|
|
26
|
+
all_dirs = set()
|
|
27
|
+
# Collect files
|
|
28
|
+
for key in chip.allkeys():
|
|
29
|
+
sc_type = chip.get(*key, field='type')
|
|
30
|
+
|
|
31
|
+
if 'file' in sc_type or 'dir' in sc_type:
|
|
32
|
+
cstep = step
|
|
33
|
+
cindex = index
|
|
34
|
+
|
|
35
|
+
if chip.get(*key, field='pernode').is_never():
|
|
36
|
+
cstep = None
|
|
37
|
+
cindex = None
|
|
38
|
+
|
|
39
|
+
files = chip.find_files(*key, step=cstep, index=cindex, missing_ok=True)
|
|
40
|
+
if files:
|
|
41
|
+
if not isinstance(files, list):
|
|
42
|
+
files = [files]
|
|
43
|
+
for path in files:
|
|
44
|
+
if path is None:
|
|
45
|
+
continue
|
|
46
|
+
if 'file' in sc_type:
|
|
47
|
+
all_dirs.add(os.path.dirname(path))
|
|
48
|
+
else:
|
|
49
|
+
all_dirs.add(path)
|
|
50
|
+
|
|
51
|
+
# Collect caches
|
|
52
|
+
for resolver in chip.get('package', field="schema").get_resolvers().values():
|
|
53
|
+
all_dirs.add(resolver())
|
|
54
|
+
|
|
55
|
+
all_dirs = [
|
|
56
|
+
Path(cache_dir),
|
|
57
|
+
Path(workdir),
|
|
58
|
+
Path(chip.scroot),
|
|
59
|
+
*[Path(path) for path in all_dirs]]
|
|
60
|
+
|
|
61
|
+
pruned_dirs = all_dirs.copy()
|
|
62
|
+
for base_path in all_dirs:
|
|
63
|
+
if base_path not in pruned_dirs:
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
new_pruned_dirs = [base_path]
|
|
67
|
+
for check_path in pruned_dirs:
|
|
68
|
+
if base_path == check_path:
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
if base_path not in check_path.parents:
|
|
72
|
+
new_pruned_dirs.append(check_path)
|
|
73
|
+
pruned_dirs = new_pruned_dirs
|
|
74
|
+
|
|
75
|
+
pruned_dirs = set(pruned_dirs)
|
|
76
|
+
|
|
77
|
+
builddir = chip.find_files('option', 'builddir')
|
|
78
|
+
|
|
79
|
+
rw_volumes = set()
|
|
80
|
+
|
|
81
|
+
for path in pruned_dirs:
|
|
82
|
+
for rw_allow in (Path(builddir), Path(workdir), Path(cache_dir)):
|
|
83
|
+
if path == rw_allow or path in rw_allow.parents:
|
|
84
|
+
rw_volumes.add(path)
|
|
85
|
+
|
|
86
|
+
ro_volumes = pruned_dirs.difference(rw_volumes)
|
|
87
|
+
|
|
88
|
+
return rw_volumes, ro_volumes
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class DockerSchedulerNode(SchedulerNode):
|
|
92
|
+
def __init__(self, chip, step, index, replay=False):
|
|
93
|
+
super().__init__(chip, step, index, replay=replay)
|
|
94
|
+
|
|
95
|
+
self.__queue = get_image(self.chip, self.step, self.index)
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def queue(self):
|
|
99
|
+
return self.__queue
|
|
100
|
+
|
|
101
|
+
@staticmethod
|
|
102
|
+
def init(chip):
|
|
103
|
+
if sys.platform == 'win32':
|
|
104
|
+
# this avoids the issue of different file system types
|
|
105
|
+
chip.logger.error('Setting copy field to true for docker run on Windows')
|
|
106
|
+
for key in chip.allkeys():
|
|
107
|
+
if key[0] == 'history':
|
|
108
|
+
continue
|
|
109
|
+
sc_type = chip.get(*key, field='type')
|
|
110
|
+
if 'dir' in sc_type or 'file' in sc_type:
|
|
111
|
+
chip.set(*key, True, field='copy')
|
|
112
|
+
chip.collect()
|
|
113
|
+
|
|
114
|
+
def run(self):
|
|
115
|
+
try:
|
|
116
|
+
client = docker.from_env()
|
|
117
|
+
client.version()
|
|
118
|
+
except (docker.errors.DockerException, docker.errors.APIError) as e:
|
|
119
|
+
self.logger.error(f'Unable to connect to docker: {e}')
|
|
120
|
+
self.halt()
|
|
121
|
+
|
|
122
|
+
is_windows = sys.platform == 'win32'
|
|
123
|
+
|
|
124
|
+
workdir = self.chip.getworkdir()
|
|
125
|
+
start_cwd = os.getcwd()
|
|
126
|
+
|
|
127
|
+
# Remove handlers from logger
|
|
128
|
+
for handler in self.logger.handlers.copy():
|
|
129
|
+
self.logger.removeHandler(handler)
|
|
130
|
+
|
|
131
|
+
# Reinit logger
|
|
132
|
+
self.chip._init_logger(step=self.step, index=self.index, in_run=True)
|
|
133
|
+
|
|
134
|
+
# Change working directory since the run may delete this folder
|
|
135
|
+
os.makedirs(workdir, exist_ok=True)
|
|
136
|
+
os.chdir(workdir)
|
|
137
|
+
|
|
138
|
+
image_name = get_image(self.chip, self.step, self.index)
|
|
139
|
+
|
|
140
|
+
# Pull image if needed
|
|
141
|
+
try:
|
|
142
|
+
image = client.images.get(image_name)
|
|
143
|
+
except docker.errors.ImageNotFound:
|
|
144
|
+
# Needs a lock to avoid downloading a bunch in parallel
|
|
145
|
+
image_repo, image_tag = image_name.split(':')
|
|
146
|
+
self.logger.info(f'Pulling docker image {image_name}')
|
|
147
|
+
try:
|
|
148
|
+
image = client.images.pull(image_repo, tag=image_tag)
|
|
149
|
+
except docker.errors.APIError as e:
|
|
150
|
+
self.logger.error(f'Unable to pull image: {e}')
|
|
151
|
+
image_src = image_repo.split('/')[0]
|
|
152
|
+
self.logger.error(f" if you are logged into {image_src} with expired credentials, "
|
|
153
|
+
f"please use 'docker logout {image_src}'")
|
|
154
|
+
self.halt()
|
|
155
|
+
|
|
156
|
+
email_file = default_email_credentials_file()
|
|
157
|
+
if is_windows:
|
|
158
|
+
# Hack to get around manifest merging
|
|
159
|
+
self.chip.set('option', 'cachedir', None)
|
|
160
|
+
cache_dir = '/sc_cache'
|
|
161
|
+
cwd = '/sc_docker'
|
|
162
|
+
builddir = f'{cwd}/build'
|
|
163
|
+
|
|
164
|
+
local_cfg = os.path.join(start_cwd, 'sc_docker.json')
|
|
165
|
+
job = self.chip.get('option', 'jobname')
|
|
166
|
+
cfg = f'{builddir}/{self.chip.design}/{job}/{self.step}/{self.index}/sc_docker.json'
|
|
167
|
+
|
|
168
|
+
user = None
|
|
169
|
+
|
|
170
|
+
volumes = [
|
|
171
|
+
f"{self.chip.cwd}:{cwd}:rw",
|
|
172
|
+
f"{RemoteResolver.determine_cache_dir(self.chip)}:{cache_dir}:rw"
|
|
173
|
+
]
|
|
174
|
+
self.logger.debug(f'Volumes: {volumes}')
|
|
175
|
+
|
|
176
|
+
env = {}
|
|
177
|
+
|
|
178
|
+
if os.path.exists(email_file):
|
|
179
|
+
env["HOME"] = "/sc_home"
|
|
180
|
+
|
|
181
|
+
volumes.append(f'{os.path.dirname(email_file)}:/sc_home/.sc:ro')
|
|
182
|
+
else:
|
|
183
|
+
cache_dir = RemoteResolver.determine_cache_dir(self.chip)
|
|
184
|
+
cwd = self.chip.cwd
|
|
185
|
+
builddir = self.chip.find_files('option', 'builddir')
|
|
186
|
+
|
|
187
|
+
local_cfg = os.path.abspath('sc_docker.json')
|
|
188
|
+
cfg = local_cfg
|
|
189
|
+
|
|
190
|
+
user = os.getuid()
|
|
191
|
+
|
|
192
|
+
rw_volumes, ro_volumes = get_volumes_directories(
|
|
193
|
+
self.chip, cache_dir, workdir, self.step, self.index)
|
|
194
|
+
volumes = [
|
|
195
|
+
*[
|
|
196
|
+
f'{path}:{path}:rw' for path in rw_volumes
|
|
197
|
+
],
|
|
198
|
+
*[
|
|
199
|
+
f'{path}:{path}:ro' for path in ro_volumes
|
|
200
|
+
]
|
|
201
|
+
]
|
|
202
|
+
self.logger.debug(f'Read write volumes: {rw_volumes}')
|
|
203
|
+
self.logger.debug(f'Read only volumes: {ro_volumes}')
|
|
204
|
+
|
|
205
|
+
env = {}
|
|
206
|
+
if os.path.exists(email_file):
|
|
207
|
+
env["HOME"] = "/sc_home"
|
|
208
|
+
|
|
209
|
+
volumes.append(f'{os.path.dirname(email_file)}:/sc_home/.sc:ro')
|
|
210
|
+
|
|
211
|
+
container = None
|
|
212
|
+
try:
|
|
213
|
+
container = client.containers.run(
|
|
214
|
+
image.id,
|
|
215
|
+
volumes=volumes,
|
|
216
|
+
labels=[
|
|
217
|
+
"siliconcompiler",
|
|
218
|
+
f"sc_node:{self.chip.design}:{self.step}{self.index}"
|
|
219
|
+
],
|
|
220
|
+
user=user,
|
|
221
|
+
detach=True,
|
|
222
|
+
tty=True,
|
|
223
|
+
auto_remove=True,
|
|
224
|
+
environment=env)
|
|
225
|
+
|
|
226
|
+
# Write manifest to make it available to the docker runner
|
|
227
|
+
self.chip.write_manifest(local_cfg)
|
|
228
|
+
|
|
229
|
+
cachemap = []
|
|
230
|
+
for package, resolver in self.chip.get(
|
|
231
|
+
'package', field="schema").get_resolvers().items():
|
|
232
|
+
cachemap.append(f'{package}:{resolver()}')
|
|
233
|
+
|
|
234
|
+
self.logger.info('Running in docker container: '
|
|
235
|
+
f'{container.name} ({container.short_id})')
|
|
236
|
+
args = [
|
|
237
|
+
'-cfg', cfg,
|
|
238
|
+
'-cwd', cwd,
|
|
239
|
+
'-builddir', str(builddir),
|
|
240
|
+
'-cachedir', str(cache_dir),
|
|
241
|
+
'-step', self.step,
|
|
242
|
+
'-index', self.index,
|
|
243
|
+
'-unset_scheduler'
|
|
244
|
+
]
|
|
245
|
+
if not is_windows and cachemap:
|
|
246
|
+
args.append('-cachemap')
|
|
247
|
+
args.extend(cachemap)
|
|
248
|
+
cmd = f'python3 -m siliconcompiler.scheduler.run_node {shlex.join(args)}'
|
|
249
|
+
exec_handle = client.api.exec_create(container.name, cmd)
|
|
250
|
+
stream = client.api.exec_start(exec_handle, stream=True)
|
|
251
|
+
|
|
252
|
+
# Print the log
|
|
253
|
+
for chunk in stream:
|
|
254
|
+
for line in chunk.decode().splitlines():
|
|
255
|
+
print(line)
|
|
256
|
+
|
|
257
|
+
if client.api.exec_inspect(exec_handle['Id']).get('ExitCode') != 0:
|
|
258
|
+
self.halt()
|
|
259
|
+
finally:
|
|
260
|
+
# Ensure we clean up containers
|
|
261
|
+
if container:
|
|
262
|
+
try:
|
|
263
|
+
container.stop()
|
|
264
|
+
except docker.errors.APIError:
|
|
265
|
+
self.logger.error(f'Failed to stop docker container: {container.name}')
|
|
266
|
+
|
|
267
|
+
# Restore working directory
|
|
268
|
+
os.chdir(start_cwd)
|
|
@@ -4,9 +4,10 @@ import argparse
|
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
6
|
import tarfile
|
|
7
|
+
import os.path
|
|
8
|
+
|
|
7
9
|
from siliconcompiler import Chip, Schema
|
|
8
|
-
from siliconcompiler.
|
|
9
|
-
from siliconcompiler.scheduler import _runtask, _executenode
|
|
10
|
+
from siliconcompiler.scheduler.schedulernode import SchedulerNode
|
|
10
11
|
from siliconcompiler import __version__
|
|
11
12
|
|
|
12
13
|
|
|
@@ -38,16 +39,12 @@ def main():
|
|
|
38
39
|
field='shorthelp'))
|
|
39
40
|
parser.add_argument('-cachedir',
|
|
40
41
|
metavar='<directory>',
|
|
41
|
-
required=True,
|
|
42
42
|
help=schema.get('option', 'cachedir',
|
|
43
43
|
field='shorthelp'))
|
|
44
44
|
parser.add_argument('-cachemap',
|
|
45
45
|
metavar='<package>:<directory>',
|
|
46
46
|
nargs='+',
|
|
47
47
|
help='Map of caches to prepopulate runner with')
|
|
48
|
-
parser.add_argument('-fetch_cache',
|
|
49
|
-
action='store_true',
|
|
50
|
-
help='Allow for cache downloads')
|
|
51
48
|
parser.add_argument('-step',
|
|
52
49
|
required=True,
|
|
53
50
|
metavar='<step>',
|
|
@@ -72,12 +69,15 @@ def main():
|
|
|
72
69
|
parser.add_argument('-unset_scheduler',
|
|
73
70
|
action='store_true',
|
|
74
71
|
help='Unset scheduler to ensure local run')
|
|
72
|
+
parser.add_argument('-replay',
|
|
73
|
+
action='store_true',
|
|
74
|
+
help='Running as replay')
|
|
75
75
|
args = parser.parse_args()
|
|
76
76
|
|
|
77
77
|
# Change to working directory to allow rel path to be build dir
|
|
78
78
|
# this avoids needing to deal with the job hash on the client
|
|
79
79
|
# side
|
|
80
|
-
os.chdir(args.cwd)
|
|
80
|
+
os.chdir(os.path.abspath(args.cwd))
|
|
81
81
|
|
|
82
82
|
# Create the Chip object.
|
|
83
83
|
chip = Chip('<design>')
|
|
@@ -86,8 +86,10 @@ def main():
|
|
|
86
86
|
# setup work directory
|
|
87
87
|
chip.set('arg', 'step', args.step)
|
|
88
88
|
chip.set('arg', 'index', args.index)
|
|
89
|
-
chip.set('option', 'builddir', args.builddir)
|
|
90
|
-
|
|
89
|
+
chip.set('option', 'builddir', os.path.abspath(args.builddir))
|
|
90
|
+
|
|
91
|
+
if args.cachedir:
|
|
92
|
+
chip.set('option', 'cachedir', os.path.abspath(args.cachedir))
|
|
91
93
|
|
|
92
94
|
if args.remoteid:
|
|
93
95
|
chip.set('record', 'remoteid', args.remoteid)
|
|
@@ -98,27 +100,26 @@ def main():
|
|
|
98
100
|
chip.unset('option', 'scheduler', 'name', step=step, index=index)
|
|
99
101
|
|
|
100
102
|
# Init logger to ensure consistent view
|
|
101
|
-
chip._init_logger(step=
|
|
102
|
-
index=
|
|
103
|
+
chip._init_logger(step=args.step,
|
|
104
|
+
index=args.index,
|
|
103
105
|
in_run=True)
|
|
104
106
|
|
|
105
107
|
if args.cachemap:
|
|
106
108
|
for cachepair in args.cachemap:
|
|
107
109
|
package, path = cachepair.split(':')
|
|
108
|
-
chip.
|
|
110
|
+
chip.get("package", field="schema")._set_cache(package, path)
|
|
109
111
|
|
|
110
112
|
# Populate cache
|
|
111
|
-
for
|
|
112
|
-
|
|
113
|
+
for resolver in chip.get('package', field='schema').get_resolvers().values():
|
|
114
|
+
resolver()
|
|
113
115
|
|
|
114
116
|
# Run the task.
|
|
115
117
|
error = True
|
|
116
118
|
try:
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
_executenode)
|
|
119
|
+
SchedulerNode(chip,
|
|
120
|
+
args.step,
|
|
121
|
+
args.index,
|
|
122
|
+
replay=args.replay).run()
|
|
122
123
|
error = False
|
|
123
124
|
|
|
124
125
|
finally:
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import shutil
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
import os.path
|
|
7
|
+
|
|
8
|
+
from siliconcompiler import Schema
|
|
9
|
+
from siliconcompiler import NodeStatus
|
|
10
|
+
from siliconcompiler.schema import Journal
|
|
11
|
+
from siliconcompiler.flowgraph import RuntimeFlowgraph
|
|
12
|
+
from siliconcompiler.scheduler.schedulernode import SchedulerNode
|
|
13
|
+
from siliconcompiler.scheduler.slurm import SlurmSchedulerNode
|
|
14
|
+
from siliconcompiler.scheduler.docker import DockerSchedulerNode
|
|
15
|
+
from siliconcompiler.scheduler.taskscheduler import TaskScheduler
|
|
16
|
+
|
|
17
|
+
from siliconcompiler import utils
|
|
18
|
+
from siliconcompiler.scheduler import send_messages
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Scheduler:
|
|
22
|
+
def __init__(self, chip):
|
|
23
|
+
self.__chip = chip
|
|
24
|
+
self.__logger = self.__chip.logger
|
|
25
|
+
|
|
26
|
+
flow = self.__chip.get("option", "flow")
|
|
27
|
+
if not flow:
|
|
28
|
+
raise ValueError("flow must be specified")
|
|
29
|
+
|
|
30
|
+
if flow not in self.__chip.getkeys("flowgraph"):
|
|
31
|
+
raise ValueError("flow is not defined")
|
|
32
|
+
|
|
33
|
+
self.__flow = self.__chip.schema.get("flowgraph", flow, field="schema")
|
|
34
|
+
from_steps = self.__chip.get('option', 'from')
|
|
35
|
+
to_steps = self.__chip.get('option', 'to')
|
|
36
|
+
prune_nodes = self.__chip.get('option', 'prune')
|
|
37
|
+
|
|
38
|
+
if not self.__flow.validate(logger=self.__logger):
|
|
39
|
+
raise ValueError(f"{self.__flow.name()} flowgraph contains errors and cannot be run.")
|
|
40
|
+
if not RuntimeFlowgraph.validate(
|
|
41
|
+
self.__flow,
|
|
42
|
+
from_steps=from_steps,
|
|
43
|
+
to_steps=to_steps,
|
|
44
|
+
prune_nodes=prune_nodes,
|
|
45
|
+
logger=chip.logger):
|
|
46
|
+
raise ValueError(f"{self.__flow.name()} flowgraph contains errors and cannot be run.")
|
|
47
|
+
|
|
48
|
+
self.__flow_runtime = RuntimeFlowgraph(
|
|
49
|
+
self.__flow,
|
|
50
|
+
from_steps=from_steps,
|
|
51
|
+
to_steps=to_steps,
|
|
52
|
+
prune_nodes=self.__chip.get('option', 'prune'))
|
|
53
|
+
|
|
54
|
+
self.__flow_runtime_no_prune = RuntimeFlowgraph(
|
|
55
|
+
self.__flow,
|
|
56
|
+
from_steps=from_steps,
|
|
57
|
+
to_steps=to_steps)
|
|
58
|
+
|
|
59
|
+
self.__flow_load_runtime = RuntimeFlowgraph(
|
|
60
|
+
self.__flow,
|
|
61
|
+
to_steps=from_steps,
|
|
62
|
+
prune_nodes=prune_nodes)
|
|
63
|
+
|
|
64
|
+
self.__flow_something = RuntimeFlowgraph(
|
|
65
|
+
self.__flow,
|
|
66
|
+
from_steps=set([step for step, _ in self.__flow.get_entry_nodes()]),
|
|
67
|
+
prune_nodes=prune_nodes)
|
|
68
|
+
|
|
69
|
+
self.__record = self.__chip.schema.get("record", field="schema")
|
|
70
|
+
self.__metrics = self.__chip.schema.get("metric", field="schema")
|
|
71
|
+
|
|
72
|
+
self.__tasks = {}
|
|
73
|
+
|
|
74
|
+
def __print_status(self, header):
|
|
75
|
+
self.__logger.debug(f"#### {header}")
|
|
76
|
+
for step, index in self.__flow.get_nodes():
|
|
77
|
+
self.__logger.debug(f"({step}, {index}) -> "
|
|
78
|
+
f"{self.__record.get('status', step=step, index=index)}")
|
|
79
|
+
self.__logger.debug("####")
|
|
80
|
+
|
|
81
|
+
def check_manifest(self):
|
|
82
|
+
self.__logger.info("Checking manifest before running.")
|
|
83
|
+
return self.__chip.check_manifest()
|
|
84
|
+
|
|
85
|
+
def run_core(self):
|
|
86
|
+
self.__record.record_python_packages()
|
|
87
|
+
|
|
88
|
+
task_scheduler = TaskScheduler(self.__chip, self.__tasks)
|
|
89
|
+
task_scheduler.run()
|
|
90
|
+
task_scheduler.check()
|
|
91
|
+
|
|
92
|
+
def run(self):
|
|
93
|
+
self.__run_setup()
|
|
94
|
+
self.configure_nodes()
|
|
95
|
+
|
|
96
|
+
# Check validity of setup
|
|
97
|
+
if not self.check_manifest():
|
|
98
|
+
raise RuntimeError("check_manifest() failed")
|
|
99
|
+
|
|
100
|
+
self.run_core()
|
|
101
|
+
|
|
102
|
+
# Store run in history
|
|
103
|
+
self.__chip.schema.record_history()
|
|
104
|
+
|
|
105
|
+
# Record final manifest
|
|
106
|
+
filepath = os.path.join(self.__chip.getworkdir(), f"{self.__chip.design}.pkg.json")
|
|
107
|
+
self.__chip.write_manifest(filepath)
|
|
108
|
+
|
|
109
|
+
send_messages.send(self.__chip, 'summary', None, None)
|
|
110
|
+
|
|
111
|
+
def __mark_pending(self, step, index):
|
|
112
|
+
if (step, index) not in self.__flow_runtime.get_nodes():
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
self.__record.set('status', NodeStatus.PENDING, step=step, index=index)
|
|
116
|
+
for next_step, next_index in self.__flow_runtime.get_nodes_starting_at(step, index):
|
|
117
|
+
if self.__record.get('status', step=next_step, index=next_index) == NodeStatus.SKIPPED:
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
# Mark following steps as pending
|
|
121
|
+
self.__record.set('status', NodeStatus.PENDING, step=next_step, index=next_index)
|
|
122
|
+
|
|
123
|
+
def __run_setup(self):
|
|
124
|
+
self.__check_display()
|
|
125
|
+
|
|
126
|
+
org_jobname = self.__chip.get('option', 'jobname')
|
|
127
|
+
copy_prev_job = self.__increment_job_name()
|
|
128
|
+
|
|
129
|
+
# Create tasks
|
|
130
|
+
copy_from_nodes = set(self.__flow_load_runtime.get_nodes()).difference(
|
|
131
|
+
self.__flow_runtime.get_entry_nodes())
|
|
132
|
+
for step, index in self.__flow.get_nodes():
|
|
133
|
+
node_cls = SchedulerNode
|
|
134
|
+
|
|
135
|
+
node_scheduler = self.__chip.get('option', 'scheduler', 'name', step=step, index=index)
|
|
136
|
+
if node_scheduler == 'slurm':
|
|
137
|
+
node_cls = SlurmSchedulerNode
|
|
138
|
+
elif node_scheduler == 'docker':
|
|
139
|
+
node_cls = DockerSchedulerNode
|
|
140
|
+
self.__tasks[(step, index)] = node_cls(self.__chip, step, index)
|
|
141
|
+
if self.__flow.get(step, index, "tool") == "builtin":
|
|
142
|
+
self.__tasks[(step, index)].set_builtin()
|
|
143
|
+
|
|
144
|
+
if copy_prev_job and (step, index) in copy_from_nodes:
|
|
145
|
+
self.__tasks[(step, index)].copy_from(org_jobname)
|
|
146
|
+
|
|
147
|
+
if copy_prev_job:
|
|
148
|
+
# Copy collection directory
|
|
149
|
+
copy_from = self.__chip._getcollectdir(jobname=org_jobname)
|
|
150
|
+
copy_to = self.__chip._getcollectdir()
|
|
151
|
+
if os.path.exists(copy_from):
|
|
152
|
+
shutil.copytree(copy_from, copy_to,
|
|
153
|
+
dirs_exist_ok=True,
|
|
154
|
+
copy_function=utils.link_copy)
|
|
155
|
+
|
|
156
|
+
self.__clean_build_dir()
|
|
157
|
+
self.__reset_flow_nodes()
|
|
158
|
+
|
|
159
|
+
def __reset_flow_nodes(self):
|
|
160
|
+
# Reset record
|
|
161
|
+
for step, index in self.__flow.get_nodes():
|
|
162
|
+
self.__record.clear(step, index, keep=['remoteid', 'status', 'pythonpackage'])
|
|
163
|
+
self.__record.set('status', NodeStatus.PENDING, step=step, index=index)
|
|
164
|
+
|
|
165
|
+
# Reset metrics
|
|
166
|
+
for step, index in self.__flow.get_nodes():
|
|
167
|
+
self.__metrics.clear(step, index)
|
|
168
|
+
|
|
169
|
+
def __clean_build_dir(self):
|
|
170
|
+
if self.__record.get('remoteid'):
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
if self.__chip.get('option', 'clean') and not self.__chip.get('option', 'from'):
|
|
174
|
+
# If no step or nodes to start from were specified, the whole flow is being run
|
|
175
|
+
# start-to-finish. Delete the build dir to clear stale results.
|
|
176
|
+
cur_job_dir = self.__chip.getworkdir()
|
|
177
|
+
if os.path.isdir(cur_job_dir):
|
|
178
|
+
shutil.rmtree(cur_job_dir)
|
|
179
|
+
|
|
180
|
+
def configure_nodes(self):
|
|
181
|
+
from_nodes = []
|
|
182
|
+
extra_setup_nodes = {}
|
|
183
|
+
|
|
184
|
+
journal = Journal.access(self.__chip.schema)
|
|
185
|
+
journal.start()
|
|
186
|
+
|
|
187
|
+
self.__print_status("Start")
|
|
188
|
+
|
|
189
|
+
if self.__chip.get('option', 'clean'):
|
|
190
|
+
if self.__chip.get("option", "from"):
|
|
191
|
+
from_nodes = self.__flow_runtime.get_entry_nodes()
|
|
192
|
+
load_nodes = self.__flow.get_nodes()
|
|
193
|
+
else:
|
|
194
|
+
if self.__chip.get("option", "from"):
|
|
195
|
+
from_nodes = self.__flow_runtime.get_entry_nodes()
|
|
196
|
+
load_nodes = self.__flow_load_runtime.get_nodes()
|
|
197
|
+
|
|
198
|
+
# Collect previous run information
|
|
199
|
+
for step, index in self.__flow.get_nodes():
|
|
200
|
+
if (step, index) not in load_nodes:
|
|
201
|
+
# Node not marked for loading
|
|
202
|
+
continue
|
|
203
|
+
if (step, index) in from_nodes:
|
|
204
|
+
# Node will be run so no need to load
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
manifest = os.path.join(self.__chip.getworkdir(step=step, index=index),
|
|
208
|
+
'outputs',
|
|
209
|
+
f'{self.__chip.design}.pkg.json')
|
|
210
|
+
if os.path.exists(manifest):
|
|
211
|
+
# ensure we setup these nodes again
|
|
212
|
+
try:
|
|
213
|
+
extra_setup_nodes[(step, index)] = Schema.from_manifest(filepath=manifest)
|
|
214
|
+
except Exception:
|
|
215
|
+
pass
|
|
216
|
+
|
|
217
|
+
# Setup tools for all nodes to run
|
|
218
|
+
for layer_nodes in self.__flow.get_execution_order():
|
|
219
|
+
for step, index in layer_nodes:
|
|
220
|
+
node_kept = self.__tasks[(step, index)].setup()
|
|
221
|
+
if not node_kept and (step, index) in extra_setup_nodes:
|
|
222
|
+
# remove from previous node data
|
|
223
|
+
del extra_setup_nodes[(step, index)]
|
|
224
|
+
|
|
225
|
+
if (step, index) in extra_setup_nodes:
|
|
226
|
+
schema = extra_setup_nodes[(step, index)]
|
|
227
|
+
node_status = None
|
|
228
|
+
try:
|
|
229
|
+
node_status = schema.get('record', 'status', step=step, index=index)
|
|
230
|
+
except: # noqa E722
|
|
231
|
+
pass
|
|
232
|
+
if node_status:
|
|
233
|
+
# Forward old status
|
|
234
|
+
self.__record.set('status', node_status, step=step, index=index)
|
|
235
|
+
|
|
236
|
+
self.__print_status("After setup")
|
|
237
|
+
|
|
238
|
+
# Check for modified information
|
|
239
|
+
for layer_nodes in self.__flow.get_execution_order():
|
|
240
|
+
for step, index in layer_nodes:
|
|
241
|
+
# Only look at successful nodes
|
|
242
|
+
if self.__record.get("status", step=step, index=index) != NodeStatus.SUCCESS:
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
if self.__tasks[(step, index)].requires_run():
|
|
246
|
+
# This node must be run
|
|
247
|
+
self.__mark_pending(step, index)
|
|
248
|
+
elif (step, index) in extra_setup_nodes:
|
|
249
|
+
# import old information
|
|
250
|
+
Journal.access(extra_setup_nodes[(step, index)]).replay(self.__chip.schema)
|
|
251
|
+
|
|
252
|
+
self.__print_status("After requires run")
|
|
253
|
+
|
|
254
|
+
# Ensure all nodes are marked as pending if needed
|
|
255
|
+
for layer_nodes in self.__flow_runtime.get_execution_order():
|
|
256
|
+
for step, index in layer_nodes:
|
|
257
|
+
status = self.__record.get("status", step=step, index=index)
|
|
258
|
+
if NodeStatus.is_waiting(status) or NodeStatus.is_error(status):
|
|
259
|
+
self.__mark_pending(step, index)
|
|
260
|
+
|
|
261
|
+
self.__print_status("After ensure")
|
|
262
|
+
|
|
263
|
+
self.__chip.write_manifest(os.path.join(self.__chip.getworkdir(),
|
|
264
|
+
f"{self.__chip.get('design')}.pkg.json"))
|
|
265
|
+
journal.stop()
|
|
266
|
+
|
|
267
|
+
# Clean nodes marked pending
|
|
268
|
+
for step, index in self.__flow_runtime.get_nodes():
|
|
269
|
+
if NodeStatus.is_waiting(self.__record.get('status', step=step, index=index)):
|
|
270
|
+
self.__tasks[(step, index)].clean_directory()
|
|
271
|
+
|
|
272
|
+
def __check_display(self):
|
|
273
|
+
'''
|
|
274
|
+
Automatically disable display for Linux systems without desktop environment
|
|
275
|
+
'''
|
|
276
|
+
|
|
277
|
+
if not self.__chip.get('option', 'nodisplay') and sys.platform == 'linux' \
|
|
278
|
+
and 'DISPLAY' not in os.environ and 'WAYLAND_DISPLAY' not in os.environ:
|
|
279
|
+
self.__logger.warning('Environment variable $DISPLAY or $WAYLAND_DISPLAY not set')
|
|
280
|
+
self.__logger.warning("Setting [option,nodisplay] to True")
|
|
281
|
+
self.__chip.set('option', 'nodisplay', True)
|
|
282
|
+
|
|
283
|
+
def __increment_job_name(self):
|
|
284
|
+
'''
|
|
285
|
+
Auto-update jobname if [option,jobincr] is True
|
|
286
|
+
Do this before initializing logger so that it picks up correct jobname
|
|
287
|
+
'''
|
|
288
|
+
|
|
289
|
+
if not self.__chip.get('option', 'clean'):
|
|
290
|
+
return False
|
|
291
|
+
if not self.__chip.get('option', 'jobincr'):
|
|
292
|
+
return False
|
|
293
|
+
|
|
294
|
+
workdir = self.__chip.getworkdir()
|
|
295
|
+
if os.path.isdir(workdir):
|
|
296
|
+
# Strip off digits following jobname, if any
|
|
297
|
+
stem = self.__chip.get('option', 'jobname').rstrip('0123456789')
|
|
298
|
+
|
|
299
|
+
dir_check = re.compile(fr'{stem}(\d+)')
|
|
300
|
+
|
|
301
|
+
jobid = 0
|
|
302
|
+
for job in os.listdir(os.path.dirname(workdir)):
|
|
303
|
+
m = dir_check.match(job)
|
|
304
|
+
if m:
|
|
305
|
+
jobid = max(jobid, int(m.group(1)))
|
|
306
|
+
self.__chip.set('option', 'jobname', f'{stem}{jobid + 1}')
|
|
307
|
+
return True
|
|
308
|
+
return False
|