siliconcompiler 0.33.2__py3-none-any.whl → 0.34.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- siliconcompiler/__init__.py +2 -0
- siliconcompiler/_metadata.py +1 -1
- siliconcompiler/apps/_common.py +1 -1
- siliconcompiler/apps/sc.py +1 -1
- siliconcompiler/apps/sc_issue.py +6 -4
- siliconcompiler/apps/sc_remote.py +3 -20
- siliconcompiler/apps/sc_show.py +2 -2
- siliconcompiler/apps/utils/replay.py +4 -4
- siliconcompiler/checklist.py +202 -1
- siliconcompiler/core.py +62 -293
- siliconcompiler/data/templates/email/general.j2 +3 -3
- siliconcompiler/data/templates/email/summary.j2 +1 -1
- siliconcompiler/data/templates/issue/README.txt +1 -1
- siliconcompiler/data/templates/report/sc_report.j2 +7 -7
- siliconcompiler/dependencyschema.py +392 -0
- siliconcompiler/design.py +758 -0
- siliconcompiler/flowgraph.py +79 -13
- siliconcompiler/optimizer/vizier.py +2 -2
- siliconcompiler/package/__init__.py +383 -223
- siliconcompiler/package/git.py +75 -77
- siliconcompiler/package/github.py +70 -97
- siliconcompiler/package/https.py +77 -93
- siliconcompiler/packageschema.py +260 -0
- siliconcompiler/pdk.py +5 -5
- siliconcompiler/remote/client.py +33 -15
- siliconcompiler/remote/server.py +2 -2
- siliconcompiler/report/dashboard/cli/__init__.py +6 -6
- siliconcompiler/report/dashboard/cli/board.py +4 -4
- siliconcompiler/report/dashboard/web/components/__init__.py +5 -5
- siliconcompiler/report/dashboard/web/components/flowgraph.py +4 -4
- siliconcompiler/report/dashboard/web/components/graph.py +2 -2
- siliconcompiler/report/dashboard/web/state.py +1 -1
- siliconcompiler/report/dashboard/web/utils/__init__.py +5 -5
- siliconcompiler/report/html_report.py +1 -1
- siliconcompiler/report/report.py +4 -4
- siliconcompiler/report/summary_table.py +2 -2
- siliconcompiler/report/utils.py +5 -5
- siliconcompiler/scheduler/__init__.py +3 -1382
- siliconcompiler/scheduler/docker.py +263 -0
- siliconcompiler/scheduler/run_node.py +10 -21
- siliconcompiler/scheduler/scheduler.py +311 -0
- siliconcompiler/scheduler/schedulernode.py +944 -0
- siliconcompiler/scheduler/send_messages.py +3 -3
- siliconcompiler/scheduler/slurm.py +149 -163
- siliconcompiler/scheduler/taskscheduler.py +45 -57
- siliconcompiler/schema/__init__.py +3 -3
- siliconcompiler/schema/baseschema.py +234 -11
- siliconcompiler/schema/editableschema.py +4 -0
- siliconcompiler/schema/journal.py +210 -0
- siliconcompiler/schema/namedschema.py +55 -2
- siliconcompiler/schema/parameter.py +14 -1
- siliconcompiler/schema/parametervalue.py +1 -34
- siliconcompiler/schema/schema_cfg.py +210 -349
- siliconcompiler/tool.py +412 -148
- siliconcompiler/tools/__init__.py +2 -0
- siliconcompiler/tools/builtin/_common.py +5 -5
- siliconcompiler/tools/builtin/concatenate.py +7 -7
- siliconcompiler/tools/builtin/minimum.py +4 -4
- siliconcompiler/tools/builtin/mux.py +4 -4
- siliconcompiler/tools/builtin/nop.py +4 -4
- siliconcompiler/tools/builtin/verify.py +8 -9
- siliconcompiler/tools/execute/exec_input.py +1 -1
- siliconcompiler/tools/genfasm/genfasm.py +1 -6
- siliconcompiler/tools/openroad/_apr.py +5 -1
- siliconcompiler/tools/openroad/antenna_repair.py +1 -1
- siliconcompiler/tools/openroad/macro_placement.py +1 -1
- siliconcompiler/tools/openroad/power_grid.py +1 -1
- siliconcompiler/tools/openroad/scripts/common/procs.tcl +32 -25
- siliconcompiler/tools/opensta/timing.py +26 -3
- siliconcompiler/tools/slang/__init__.py +2 -2
- siliconcompiler/tools/surfer/__init__.py +0 -0
- siliconcompiler/tools/surfer/show.py +53 -0
- siliconcompiler/tools/surfer/surfer.py +30 -0
- siliconcompiler/tools/vpr/route.py +82 -0
- siliconcompiler/tools/vpr/vpr.py +23 -6
- siliconcompiler/tools/yosys/__init__.py +1 -1
- siliconcompiler/tools/yosys/scripts/procs.tcl +143 -0
- siliconcompiler/tools/yosys/{sc_synth_asic.tcl → scripts/sc_synth_asic.tcl} +4 -0
- siliconcompiler/tools/yosys/{sc_synth_fpga.tcl → scripts/sc_synth_fpga.tcl} +24 -77
- siliconcompiler/tools/yosys/syn_fpga.py +14 -0
- siliconcompiler/toolscripts/_tools.json +9 -13
- siliconcompiler/toolscripts/rhel9/install-vpr.sh +0 -2
- siliconcompiler/toolscripts/ubuntu22/install-surfer.sh +33 -0
- siliconcompiler/toolscripts/ubuntu24/install-surfer.sh +33 -0
- siliconcompiler/utils/__init__.py +4 -24
- siliconcompiler/utils/flowgraph.py +29 -28
- siliconcompiler/utils/issue.py +23 -29
- siliconcompiler/utils/logging.py +37 -7
- siliconcompiler/utils/showtools.py +6 -1
- {siliconcompiler-0.33.2.dist-info → siliconcompiler-0.34.1.dist-info}/METADATA +16 -25
- {siliconcompiler-0.33.2.dist-info → siliconcompiler-0.34.1.dist-info}/RECORD +98 -91
- siliconcompiler/scheduler/docker_runner.py +0 -254
- siliconcompiler/schema/journalingschema.py +0 -242
- siliconcompiler/tools/yosys/procs.tcl +0 -71
- siliconcompiler/toolscripts/rhel9/install-yosys-parmys.sh +0 -68
- siliconcompiler/toolscripts/ubuntu22/install-yosys-parmys.sh +0 -68
- siliconcompiler/toolscripts/ubuntu24/install-yosys-parmys.sh +0 -68
- /siliconcompiler/tools/yosys/{sc_lec.tcl → scripts/sc_lec.tcl} +0 -0
- /siliconcompiler/tools/yosys/{sc_screenshot.tcl → scripts/sc_screenshot.tcl} +0 -0
- /siliconcompiler/tools/yosys/{syn_strategies.tcl → scripts/syn_strategies.tcl} +0 -0
- {siliconcompiler-0.33.2.dist-info → siliconcompiler-0.34.1.dist-info}/WHEEL +0 -0
- {siliconcompiler-0.33.2.dist-info → siliconcompiler-0.34.1.dist-info}/entry_points.txt +0 -0
- {siliconcompiler-0.33.2.dist-info → siliconcompiler-0.34.1.dist-info}/licenses/LICENSE +0 -0
- {siliconcompiler-0.33.2.dist-info → siliconcompiler-0.34.1.dist-info}/top_level.txt +0 -0
|
@@ -66,7 +66,7 @@ def send(chip, msg_type, step, index):
|
|
|
66
66
|
msg = MIMEMultipart()
|
|
67
67
|
|
|
68
68
|
if step and index:
|
|
69
|
-
subject = f'SiliconCompiler : {chip.design} | {jobname} | {step}{index} | {msg_type}'
|
|
69
|
+
subject = f'SiliconCompiler : {chip.design} | {jobname} | {step} | {index} | {msg_type}'
|
|
70
70
|
else:
|
|
71
71
|
subject = f'SiliconCompiler : {chip.design} | {jobname} | {msg_type}'
|
|
72
72
|
|
|
@@ -92,7 +92,7 @@ def send(chip, msg_type, step, index):
|
|
|
92
92
|
msg.attach(img_attach)
|
|
93
93
|
|
|
94
94
|
runtime = RuntimeFlowgraph(
|
|
95
|
-
chip.
|
|
95
|
+
chip.get("flowgraph", flow, field='schema'),
|
|
96
96
|
from_steps=chip.get('option', 'from'),
|
|
97
97
|
to_steps=chip.get('option', 'to'),
|
|
98
98
|
prune_nodes=chip.get('option', 'prune'))
|
|
@@ -110,7 +110,7 @@ def send(chip, msg_type, step, index):
|
|
|
110
110
|
metric_keys=metrics_to_show)
|
|
111
111
|
else:
|
|
112
112
|
# Attach logs
|
|
113
|
-
for log in (f'sc_{step}{index}.log', f'{step}.log'):
|
|
113
|
+
for log in (f'sc_{step}_{index}.log', f'{step}.log'):
|
|
114
114
|
log_file = f'{chip.getworkdir(step=step, index=index)}/{log}'
|
|
115
115
|
if os.path.exists(log_file):
|
|
116
116
|
with sc_open(log_file) as f:
|
|
@@ -5,171 +5,157 @@ import stat
|
|
|
5
5
|
import uuid
|
|
6
6
|
import json
|
|
7
7
|
import shutil
|
|
8
|
-
from siliconcompiler import utils, SiliconCompilerError
|
|
9
|
-
from siliconcompiler.package import get_cache_path
|
|
10
|
-
from siliconcompiler.flowgraph import RuntimeFlowgraph
|
|
11
|
-
|
|
12
|
-
# Full list of Slurm states, split into 'active' and 'inactive' categories.
|
|
13
|
-
# Many of these do not apply to a minimal configuration, but we'll track them all.
|
|
14
|
-
# https://slurm.schedmd.com/squeue.html#SECTION_JOB-STATE-CODES
|
|
15
|
-
SLURM_ACTIVE_STATES = [
|
|
16
|
-
'RUNNING',
|
|
17
|
-
'PENDING',
|
|
18
|
-
'CONFIGURING',
|
|
19
|
-
'COMPLETING',
|
|
20
|
-
'SIGNALING',
|
|
21
|
-
'STAGE_OUT',
|
|
22
|
-
'RESIZING',
|
|
23
|
-
'REQUEUED',
|
|
24
|
-
]
|
|
25
|
-
SLURM_INACTIVE_STATES = [
|
|
26
|
-
'BOOT_FAIL',
|
|
27
|
-
'CANCELLED',
|
|
28
|
-
'COMPLETED',
|
|
29
|
-
'DEADLINE',
|
|
30
|
-
'FAILED',
|
|
31
|
-
'NODE_FAIL',
|
|
32
|
-
'OUT_OF_MEMORY',
|
|
33
|
-
'PREEMPTED',
|
|
34
|
-
'RESV_DEL_HOLD',
|
|
35
|
-
'REQUEUE_FED',
|
|
36
|
-
'REQUEUE_HOLD',
|
|
37
|
-
'REVOKED',
|
|
38
|
-
'SPECIAL_EXIT',
|
|
39
|
-
'STOPPED',
|
|
40
|
-
'SUSPENDED',
|
|
41
|
-
'TIMEOUT',
|
|
42
|
-
]
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
###########################################################################
|
|
46
|
-
def get_configuration_directory(chip):
|
|
47
|
-
'''
|
|
48
|
-
Helper function to get the configuration directory for the scheduler
|
|
49
|
-
'''
|
|
50
|
-
|
|
51
|
-
return f'{chip.getworkdir()}/configs'
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def init(chip):
|
|
55
|
-
if os.path.exists(chip._getcollectdir()):
|
|
56
|
-
# nothing to do
|
|
57
|
-
return
|
|
58
|
-
|
|
59
|
-
collect = False
|
|
60
|
-
flow = chip.get('option', 'flow')
|
|
61
|
-
entry_nodes = chip.schema.get("flowgraph", flow, field="schema").get_entry_nodes()
|
|
62
|
-
|
|
63
|
-
runtime = RuntimeFlowgraph(
|
|
64
|
-
chip.schema.get("flowgraph", flow, field='schema'),
|
|
65
|
-
from_steps=chip.get('option', 'from'),
|
|
66
|
-
to_steps=chip.get('option', 'to'),
|
|
67
|
-
prune_nodes=chip.get('option', 'prune'))
|
|
68
|
-
|
|
69
|
-
for (step, index) in runtime.get_nodes():
|
|
70
|
-
if (step, index) in entry_nodes:
|
|
71
|
-
collect = True
|
|
72
|
-
|
|
73
|
-
if collect:
|
|
74
|
-
chip.collect()
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
###########################################################################
|
|
78
|
-
def _defernode(chip, step, index, replay):
|
|
79
|
-
'''
|
|
80
|
-
Helper method to run an individual step on a slurm cluster.
|
|
81
|
-
|
|
82
|
-
Blocks until the compute node
|
|
83
|
-
finishes processing this step, and it sets the active/error bits.
|
|
84
|
-
'''
|
|
85
|
-
|
|
86
|
-
# Determine which HPC job scheduler being used.
|
|
87
|
-
scheduler_type = chip.get('option', 'scheduler', 'name', step=step, index=index)
|
|
88
|
-
|
|
89
|
-
if scheduler_type != 'slurm':
|
|
90
|
-
raise ValueError(f'{scheduler_type} is not a supported scheduler')
|
|
91
|
-
|
|
92
|
-
if not check_slurm():
|
|
93
|
-
raise SiliconCompilerError('slurm is not available or installed on this machine', chip=chip)
|
|
94
|
-
|
|
95
|
-
# Determine which cluster parititon to use. (Default value can be overridden on per-step basis)
|
|
96
|
-
partition = chip.get('option', 'scheduler', 'queue', step=step, index=index)
|
|
97
|
-
if not partition:
|
|
98
|
-
partition = _get_slurm_partition()
|
|
99
|
-
|
|
100
|
-
# Get the temporary UID associated with this job run.
|
|
101
|
-
job_hash = chip.get('record', 'remoteid')
|
|
102
|
-
if not job_hash:
|
|
103
|
-
# Generate a new uuid since it was not set
|
|
104
|
-
job_hash = uuid.uuid4().hex
|
|
105
|
-
|
|
106
|
-
job_name = f'{job_hash}_{step}{index}'
|
|
107
|
-
|
|
108
|
-
# Write out the current schema for the compute node to pick up.
|
|
109
|
-
cfg_dir = get_configuration_directory(chip)
|
|
110
|
-
cfg_file = f'{cfg_dir}/{step}{index}.json'
|
|
111
|
-
log_file = f'{cfg_dir}/{step}{index}.log'
|
|
112
|
-
script_file = f'{cfg_dir}/{step}{index}.sh'
|
|
113
|
-
os.makedirs(cfg_dir, exist_ok=True)
|
|
114
|
-
|
|
115
|
-
chip.set('option', 'scheduler', 'name', None, step=step, index=index)
|
|
116
|
-
chip.write_manifest(cfg_file)
|
|
117
|
-
|
|
118
|
-
# Allow user-defined compute node execution script if it already exists on the filesystem.
|
|
119
|
-
# Otherwise, create a minimal script to run the task using the SiliconCompiler CLI.
|
|
120
|
-
if not os.path.isfile(script_file):
|
|
121
|
-
with open(script_file, 'w') as sf:
|
|
122
|
-
sf.write(utils.get_file_template('slurm/run.sh').render(
|
|
123
|
-
cfg_file=shlex.quote(cfg_file),
|
|
124
|
-
build_dir=shlex.quote(chip.get("option", "builddir")),
|
|
125
|
-
step=shlex.quote(step),
|
|
126
|
-
index=shlex.quote(index),
|
|
127
|
-
cachedir=shlex.quote(get_cache_path(chip))
|
|
128
|
-
))
|
|
129
|
-
|
|
130
|
-
# This is Python for: `chmod +x [script_path]`
|
|
131
|
-
os.chmod(script_file,
|
|
132
|
-
os.stat(script_file).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
|
133
|
-
|
|
134
|
-
schedule_cmd = ['srun',
|
|
135
|
-
'--exclusive',
|
|
136
|
-
'--partition', partition,
|
|
137
|
-
'--chdir', chip.cwd,
|
|
138
|
-
'--job-name', job_name,
|
|
139
|
-
'--output', log_file]
|
|
140
|
-
|
|
141
|
-
# Only delay the starting time if the 'defer' Schema option is specified.
|
|
142
|
-
defer_time = chip.get('option', 'scheduler', 'defer', step=step, index=index)
|
|
143
|
-
if defer_time:
|
|
144
|
-
schedule_cmd.extend(['--begin', defer_time])
|
|
145
|
-
|
|
146
|
-
schedule_cmd.append(script_file)
|
|
147
8
|
|
|
148
|
-
|
|
149
|
-
# TODO: output should be fed to log, and stdout if quiet = False
|
|
150
|
-
step_result = subprocess.Popen(schedule_cmd,
|
|
151
|
-
stdout=subprocess.PIPE,
|
|
152
|
-
stderr=subprocess.STDOUT)
|
|
9
|
+
import os.path
|
|
153
10
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
def _get_slurm_partition():
|
|
161
|
-
partitions = subprocess.run(['sinfo', '--json'],
|
|
162
|
-
stdout=subprocess.PIPE,
|
|
163
|
-
stderr=subprocess.STDOUT)
|
|
164
|
-
|
|
165
|
-
if partitions.returncode != 0:
|
|
166
|
-
raise RuntimeError('Unable to determine partitions in slurm')
|
|
167
|
-
|
|
168
|
-
sinfo = json.loads(partitions.stdout.decode())
|
|
11
|
+
from siliconcompiler import utils
|
|
12
|
+
from siliconcompiler.package import RemoteResolver
|
|
13
|
+
from siliconcompiler.flowgraph import RuntimeFlowgraph
|
|
14
|
+
from siliconcompiler.scheduler.schedulernode import SchedulerNode
|
|
169
15
|
|
|
170
|
-
# Return the first listed partition
|
|
171
|
-
return sinfo['nodes'][0]['partitions'][0]
|
|
172
16
|
|
|
17
|
+
class SlurmSchedulerNode(SchedulerNode):
|
|
18
|
+
def __init__(self, chip, step, index, replay=False):
|
|
19
|
+
super().__init__(chip, step, index, replay=replay)
|
|
173
20
|
|
|
174
|
-
|
|
175
|
-
|
|
21
|
+
# Get the temporary UID associated with this job run.
|
|
22
|
+
self.__job_hash = chip.get('record', 'remoteid')
|
|
23
|
+
if not self.__job_hash:
|
|
24
|
+
# Generate a new uuid since it was not set
|
|
25
|
+
self.__job_hash = uuid.uuid4().hex
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def jobhash(self):
|
|
29
|
+
return self.__job_hash
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def init(chip):
|
|
33
|
+
if os.path.exists(chip._getcollectdir()):
|
|
34
|
+
# nothing to do
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
collect = False
|
|
38
|
+
flow = chip.get('option', 'flow')
|
|
39
|
+
entry_nodes = chip.get("flowgraph", flow, field="schema").get_entry_nodes()
|
|
40
|
+
|
|
41
|
+
runtime = RuntimeFlowgraph(
|
|
42
|
+
chip.get("flowgraph", flow, field='schema'),
|
|
43
|
+
from_steps=chip.get('option', 'from'),
|
|
44
|
+
to_steps=chip.get('option', 'to'),
|
|
45
|
+
prune_nodes=chip.get('option', 'prune'))
|
|
46
|
+
|
|
47
|
+
for (step, index) in runtime.get_nodes():
|
|
48
|
+
if (step, index) in entry_nodes:
|
|
49
|
+
collect = True
|
|
50
|
+
|
|
51
|
+
if collect:
|
|
52
|
+
chip.collect()
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def is_local(self):
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def get_configuration_directory(chip):
|
|
60
|
+
'''
|
|
61
|
+
Helper function to get the configuration directory for the scheduler
|
|
62
|
+
'''
|
|
63
|
+
|
|
64
|
+
return os.path.join(chip.getworkdir(), 'sc_configs')
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def get_job_name(jobhash, step, index):
|
|
68
|
+
return f'{jobhash}_{step}_{index}'
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def get_runtime_file_name(jobhash, step, index, ext):
|
|
72
|
+
return f"{SlurmSchedulerNode.get_job_name(jobhash, step, index)}.{ext}"
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def get_slurm_partition():
|
|
76
|
+
partitions = subprocess.run(['sinfo', '--json'],
|
|
77
|
+
stdout=subprocess.PIPE,
|
|
78
|
+
stderr=subprocess.STDOUT)
|
|
79
|
+
|
|
80
|
+
if partitions.returncode != 0:
|
|
81
|
+
raise RuntimeError('Unable to determine partitions in slurm')
|
|
82
|
+
|
|
83
|
+
sinfo = json.loads(partitions.stdout.decode())
|
|
84
|
+
|
|
85
|
+
# Return the first listed partition
|
|
86
|
+
return sinfo['nodes'][0]['partitions'][0]
|
|
87
|
+
|
|
88
|
+
def run(self):
|
|
89
|
+
'''
|
|
90
|
+
Helper method to run an individual step on a slurm cluster.
|
|
91
|
+
|
|
92
|
+
Blocks until the compute node
|
|
93
|
+
finishes processing this step, and it sets the active/error bits.
|
|
94
|
+
'''
|
|
95
|
+
|
|
96
|
+
self._init_run_logger()
|
|
97
|
+
|
|
98
|
+
if shutil.which('sinfo') is None:
|
|
99
|
+
raise RuntimeError('slurm is not available or installed on this machine')
|
|
100
|
+
|
|
101
|
+
# Determine which cluster parititon to use.
|
|
102
|
+
partition = self.chip.get('option', 'scheduler', 'queue', step=self.step, index=self.index)
|
|
103
|
+
if not partition:
|
|
104
|
+
partition = SlurmSchedulerNode.get_slurm_partition()
|
|
105
|
+
|
|
106
|
+
# Write out the current schema for the compute node to pick up.
|
|
107
|
+
cfg_dir = SlurmSchedulerNode.get_configuration_directory(self.chip)
|
|
108
|
+
os.makedirs(cfg_dir, exist_ok=True)
|
|
109
|
+
|
|
110
|
+
cfg_file = os.path.join(cfg_dir, SlurmSchedulerNode.get_runtime_file_name(
|
|
111
|
+
self.__job_hash, self.step, self.index, "pkg.json"))
|
|
112
|
+
log_file = os.path.join(cfg_dir, SlurmSchedulerNode.get_runtime_file_name(
|
|
113
|
+
self.__job_hash, self.step, self.index, "log"))
|
|
114
|
+
script_file = os.path.join(cfg_dir, SlurmSchedulerNode.get_runtime_file_name(
|
|
115
|
+
self.__job_hash, self.step, self.index, "sh"))
|
|
116
|
+
|
|
117
|
+
# Remove scheduler as this is now a local run
|
|
118
|
+
self.chip.set('option', 'scheduler', 'name', None, step=self.step, index=self.index)
|
|
119
|
+
self.chip.write_manifest(cfg_file)
|
|
120
|
+
|
|
121
|
+
# Allow user-defined compute node execution script if it already exists on the filesystem.
|
|
122
|
+
# Otherwise, create a minimal script to run the task using the SiliconCompiler CLI.
|
|
123
|
+
if not os.path.isfile(script_file):
|
|
124
|
+
with open(script_file, 'w') as sf:
|
|
125
|
+
sf.write(utils.get_file_template('slurm/run.sh').render(
|
|
126
|
+
cfg_file=shlex.quote(cfg_file),
|
|
127
|
+
build_dir=shlex.quote(self.chip.get("option", "builddir")),
|
|
128
|
+
step=shlex.quote(self.step),
|
|
129
|
+
index=shlex.quote(self.index),
|
|
130
|
+
cachedir=shlex.quote(str(RemoteResolver.determine_cache_dir(self.chip)))
|
|
131
|
+
))
|
|
132
|
+
|
|
133
|
+
# This is Python for: `chmod +x [script_path]`
|
|
134
|
+
os.chmod(script_file,
|
|
135
|
+
os.stat(script_file).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
|
136
|
+
|
|
137
|
+
schedule_cmd = ['srun',
|
|
138
|
+
'--exclusive',
|
|
139
|
+
'--partition', partition,
|
|
140
|
+
'--chdir', self.chip.cwd,
|
|
141
|
+
'--job-name', SlurmSchedulerNode.get_job_name(self.__job_hash,
|
|
142
|
+
self.step, self.index),
|
|
143
|
+
'--output', log_file]
|
|
144
|
+
|
|
145
|
+
# Only delay the starting time if the 'defer' Schema option is specified.
|
|
146
|
+
defer_time = self.chip.get('option', 'scheduler', 'defer', step=self.step, index=self.index)
|
|
147
|
+
if defer_time:
|
|
148
|
+
schedule_cmd.extend(['--begin', defer_time])
|
|
149
|
+
|
|
150
|
+
schedule_cmd.append(script_file)
|
|
151
|
+
|
|
152
|
+
# Run the 'srun' command, and track its output.
|
|
153
|
+
# TODO: output should be fed to log, and stdout if quiet = False
|
|
154
|
+
step_result = subprocess.Popen(schedule_cmd,
|
|
155
|
+
stdout=subprocess.PIPE,
|
|
156
|
+
stderr=subprocess.STDOUT)
|
|
157
|
+
|
|
158
|
+
# Wait for the subprocess call to complete. It should already be done,
|
|
159
|
+
# as it has closed its output stream. But if we don't call '.wait()',
|
|
160
|
+
# the '.returncode' value will not be set correctly.
|
|
161
|
+
step_result.wait()
|
|
@@ -11,11 +11,8 @@ from siliconcompiler import SiliconCompilerError
|
|
|
11
11
|
from siliconcompiler import utils
|
|
12
12
|
from siliconcompiler.flowgraph import RuntimeFlowgraph
|
|
13
13
|
|
|
14
|
-
from siliconcompiler.schema import
|
|
14
|
+
from siliconcompiler.schema import Journal
|
|
15
15
|
|
|
16
|
-
from siliconcompiler.scheduler import slurm
|
|
17
|
-
from siliconcompiler.scheduler import docker_runner
|
|
18
|
-
from siliconcompiler.tools._common import get_tool_task
|
|
19
16
|
from siliconcompiler.utils.logging import SCBlankLoggerFormatter
|
|
20
17
|
|
|
21
18
|
|
|
@@ -33,9 +30,10 @@ class TaskScheduler:
|
|
|
33
30
|
raise ValueError(f"{hook} is not a valid callback")
|
|
34
31
|
TaskScheduler.__callbacks[hook] = func
|
|
35
32
|
|
|
36
|
-
def __init__(self, chip):
|
|
33
|
+
def __init__(self, chip, tasks):
|
|
37
34
|
self.__chip = chip
|
|
38
35
|
self.__logger = self.__chip.logger
|
|
36
|
+
self.__logger_console_handler = self.__chip._logger_console
|
|
39
37
|
self.__schema = self.__chip.schema
|
|
40
38
|
self.__flow = self.__schema.get("flowgraph", self.__chip.get('option', 'flow'),
|
|
41
39
|
field="schema")
|
|
@@ -50,17 +48,21 @@ class TaskScheduler:
|
|
|
50
48
|
# clip max parallel jobs to 1 <= jobs <= max_cores
|
|
51
49
|
self.__max_parallel_run = max(1, min(self.__max_parallel_run, self.__max_cores))
|
|
52
50
|
|
|
51
|
+
self.__runtime_flow = RuntimeFlowgraph(
|
|
52
|
+
self.__flow,
|
|
53
|
+
from_steps=self.__chip.get('option', 'from'),
|
|
54
|
+
to_steps=self.__chip.get('option', 'to'),
|
|
55
|
+
prune_nodes=self.__chip.get('option', 'prune'))
|
|
56
|
+
|
|
53
57
|
self.__log_queue = multiprocessing.Queue(-1)
|
|
54
58
|
|
|
55
59
|
self.__nodes = {}
|
|
56
60
|
self.__startTimes = {}
|
|
57
61
|
self.__dwellTime = 0.1
|
|
58
62
|
|
|
59
|
-
self.__create_nodes()
|
|
60
|
-
|
|
61
|
-
def __create_nodes(self):
|
|
62
|
-
from siliconcompiler.scheduler import _executenode, _runtask
|
|
63
|
+
self.__create_nodes(tasks)
|
|
63
64
|
|
|
65
|
+
def __create_nodes(self, tasks):
|
|
64
66
|
runtime = RuntimeFlowgraph(
|
|
65
67
|
self.__flow,
|
|
66
68
|
from_steps=set([step for step, _ in self.__flow.get_entry_nodes()]),
|
|
@@ -68,63 +70,34 @@ class TaskScheduler:
|
|
|
68
70
|
|
|
69
71
|
init_funcs = set()
|
|
70
72
|
|
|
71
|
-
|
|
72
|
-
self.__flow,
|
|
73
|
-
from_steps=self.__chip.get('option', 'from'),
|
|
74
|
-
to_steps=self.__chip.get('option', 'to'),
|
|
75
|
-
prune_nodes=self.__chip.get('option', 'prune'))
|
|
76
|
-
|
|
77
|
-
for step, index in runtime_flow.get_nodes():
|
|
73
|
+
for step, index in self.__runtime_flow.get_nodes():
|
|
78
74
|
if self.__record.get('status', step=step, index=index) != NodeStatus.PENDING:
|
|
79
75
|
continue
|
|
80
76
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
step=step, index=index)
|
|
77
|
+
with tasks[(step, index)].runtime():
|
|
78
|
+
threads = tasks[(step, index)].threads
|
|
84
79
|
if not threads:
|
|
85
80
|
threads = self.__max_threads
|
|
86
81
|
threads = max(1, min(threads, self.__max_threads))
|
|
87
82
|
|
|
88
83
|
task = {
|
|
89
|
-
"name": f"{step}{index}",
|
|
84
|
+
"name": f"{step}/{index}",
|
|
90
85
|
"inputs": runtime.get_node_inputs(step, index, record=self.__record),
|
|
91
86
|
"proc": None,
|
|
92
|
-
"child_pipe": None,
|
|
93
87
|
"parent_pipe": None,
|
|
94
|
-
"local": False,
|
|
95
|
-
"tool": tool_name,
|
|
96
|
-
"task": task_name,
|
|
97
88
|
"threads": threads,
|
|
98
89
|
"running": False,
|
|
99
90
|
"manifest": os.path.join(self.__chip.getworkdir(step=step, index=index),
|
|
100
91
|
'outputs',
|
|
101
|
-
f'{self.__chip.design}.pkg.json')
|
|
92
|
+
f'{self.__chip.design}.pkg.json'),
|
|
93
|
+
"node": tasks[(step, index)]
|
|
102
94
|
}
|
|
103
95
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
node_scheduler = self.__chip.get('option', 'scheduler', 'name', step=step, index=index)
|
|
107
|
-
if node_scheduler == 'slurm':
|
|
108
|
-
# Defer job to compute node
|
|
109
|
-
# If the job is configured to run on a cluster, collect the schema
|
|
110
|
-
# and send it to a compute node for deferred execution.
|
|
111
|
-
init_funcs.add(slurm.init)
|
|
112
|
-
exec_func = slurm._defernode
|
|
113
|
-
elif node_scheduler == 'docker':
|
|
114
|
-
# Run job in docker
|
|
115
|
-
init_funcs.add(docker_runner.init)
|
|
116
|
-
exec_func = docker_runner.run
|
|
117
|
-
task["local"] = True
|
|
118
|
-
else:
|
|
119
|
-
task["local"] = True
|
|
120
|
-
|
|
121
|
-
task["parent_pipe"], task["child_pipe"] = multiprocessing.Pipe()
|
|
122
|
-
task["proc"] = multiprocessing.Process(
|
|
123
|
-
target=_runtask,
|
|
124
|
-
args=(self.__chip, self.__flow.name(), step, index, exec_func),
|
|
125
|
-
kwargs={"pipe": task["child_pipe"],
|
|
126
|
-
"queue": self.__log_queue})
|
|
96
|
+
task["parent_pipe"], pipe = multiprocessing.Pipe()
|
|
97
|
+
task["node"].set_queue(pipe, self.__log_queue)
|
|
127
98
|
|
|
99
|
+
task["proc"] = multiprocessing.Process(target=task["node"].run)
|
|
100
|
+
init_funcs.add(task["node"].init)
|
|
128
101
|
self.__nodes[(step, index)] = task
|
|
129
102
|
|
|
130
103
|
# Call preprocessing for schedulers
|
|
@@ -136,9 +109,9 @@ class TaskScheduler:
|
|
|
136
109
|
multiprocessing.freeze_support()
|
|
137
110
|
|
|
138
111
|
# Handle logs across threads
|
|
139
|
-
log_listener = QueueListener(self.__log_queue, self.
|
|
140
|
-
console_format = self.
|
|
141
|
-
self.
|
|
112
|
+
log_listener = QueueListener(self.__log_queue, self.__logger_console_handler)
|
|
113
|
+
console_format = self.__logger_console_handler.formatter
|
|
114
|
+
self.__logger_console_handler.setFormatter(SCBlankLoggerFormatter())
|
|
142
115
|
log_listener.start()
|
|
143
116
|
|
|
144
117
|
# Update dashboard before run begins
|
|
@@ -158,7 +131,7 @@ class TaskScheduler:
|
|
|
158
131
|
|
|
159
132
|
# Cleanup logger
|
|
160
133
|
log_listener.stop()
|
|
161
|
-
self.
|
|
134
|
+
self.__logger_console_handler.setFormatter(console_format)
|
|
162
135
|
|
|
163
136
|
def __run_loop(self):
|
|
164
137
|
self.__startTimes = {None: time.time()}
|
|
@@ -217,13 +190,17 @@ class TaskScheduler:
|
|
|
217
190
|
self.__logger.debug(f'{info["name"]} is complete merging: {manifest}')
|
|
218
191
|
|
|
219
192
|
if os.path.exists(manifest):
|
|
220
|
-
|
|
193
|
+
Journal.replay_file(self.__schema, manifest)
|
|
194
|
+
# TODO: once tool is fixed this can go away
|
|
195
|
+
self.__schema.unset("arg", "step")
|
|
196
|
+
self.__schema.unset("arg", "index")
|
|
221
197
|
|
|
222
198
|
if info["parent_pipe"] and info["parent_pipe"].poll(1):
|
|
223
199
|
try:
|
|
224
200
|
packages = info["parent_pipe"].recv()
|
|
225
201
|
if isinstance(packages, dict):
|
|
226
|
-
|
|
202
|
+
for package, path in packages.items():
|
|
203
|
+
self.__chip.get("package", field="schema")._set_cache(package, path)
|
|
227
204
|
except: # noqa E722
|
|
228
205
|
pass
|
|
229
206
|
|
|
@@ -249,7 +226,7 @@ class TaskScheduler:
|
|
|
249
226
|
def __allow_start(self, node):
|
|
250
227
|
info = self.__nodes[node]
|
|
251
228
|
|
|
252
|
-
if not info["
|
|
229
|
+
if not info["node"].is_local:
|
|
253
230
|
# using a different scheduler, so allow
|
|
254
231
|
return True
|
|
255
232
|
|
|
@@ -286,7 +263,7 @@ class TaskScheduler:
|
|
|
286
263
|
if not NodeStatus.is_done(in_status):
|
|
287
264
|
ready = False
|
|
288
265
|
break
|
|
289
|
-
if NodeStatus.is_error(in_status) and info["
|
|
266
|
+
if NodeStatus.is_error(in_status) and not info["node"].is_builtin:
|
|
290
267
|
# Fail if any dependency failed for non-builtin task
|
|
291
268
|
self.__record.set("status", NodeStatus.ERROR, step=step, index=index)
|
|
292
269
|
|
|
@@ -295,7 +272,7 @@ class TaskScheduler:
|
|
|
295
272
|
any_success = any([status == NodeStatus.SUCCESS for status in inputs])
|
|
296
273
|
else:
|
|
297
274
|
any_success = True
|
|
298
|
-
if ready and info["
|
|
275
|
+
if ready and info["node"].is_builtin and not any_success:
|
|
299
276
|
self.__record.set("status", NodeStatus.ERROR, step=step, index=index)
|
|
300
277
|
|
|
301
278
|
if self.__record.get('status', step=step, index=index) == NodeStatus.ERROR:
|
|
@@ -318,3 +295,14 @@ class TaskScheduler:
|
|
|
318
295
|
info["proc"].start()
|
|
319
296
|
|
|
320
297
|
return changed
|
|
298
|
+
|
|
299
|
+
def check(self):
|
|
300
|
+
exit_steps = set([step for step, _ in self.__runtime_flow.get_exit_nodes()])
|
|
301
|
+
completed_steps = set([step for step, _ in
|
|
302
|
+
self.__runtime_flow.get_completed_nodes(record=self.__record)])
|
|
303
|
+
|
|
304
|
+
unreached = set(exit_steps).difference(completed_steps)
|
|
305
|
+
|
|
306
|
+
if unreached:
|
|
307
|
+
raise RuntimeError(
|
|
308
|
+
f'These final steps could not be reached: {", ".join(sorted(unreached))}')
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from .parameter import Parameter, Scope, PerNode
|
|
2
|
+
from .journal import Journal
|
|
2
3
|
from .safeschema import SafeSchema
|
|
3
4
|
from .editableschema import EditableSchema
|
|
4
5
|
from .baseschema import BaseSchema
|
|
5
6
|
from .cmdlineschema import CommandLineSchema
|
|
6
|
-
from .journalingschema import JournalingSchema
|
|
7
7
|
from .namedschema import NamedSchema
|
|
8
8
|
from .packageschema import PackageSchema
|
|
9
9
|
|
|
@@ -15,10 +15,10 @@ __all__ = [
|
|
|
15
15
|
"SafeSchema",
|
|
16
16
|
"EditableSchema",
|
|
17
17
|
"CommandLineSchema",
|
|
18
|
-
"JournalingSchema",
|
|
19
18
|
"NamedSchema",
|
|
20
19
|
"PackageSchema",
|
|
21
20
|
"Parameter",
|
|
22
21
|
"Scope",
|
|
23
|
-
"PerNode"
|
|
22
|
+
"PerNode",
|
|
23
|
+
"Journal"
|
|
24
24
|
]
|