siliconcompiler 0.35.2__py3-none-any.whl → 0.35.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- siliconcompiler/_metadata.py +1 -1
- siliconcompiler/apps/sc_issue.py +18 -2
- siliconcompiler/apps/smake.py +106 -100
- siliconcompiler/checklist.py +2 -1
- siliconcompiler/constraints/asic_component.py +49 -11
- siliconcompiler/constraints/asic_floorplan.py +23 -21
- siliconcompiler/constraints/asic_pins.py +55 -17
- siliconcompiler/constraints/asic_timing.py +53 -22
- siliconcompiler/constraints/fpga_timing.py +5 -6
- siliconcompiler/data/templates/replay/replay.sh.j2 +27 -14
- siliconcompiler/flowgraph.py +418 -129
- siliconcompiler/library.py +5 -4
- siliconcompiler/package/__init__.py +17 -6
- siliconcompiler/package/https.py +10 -5
- siliconcompiler/project.py +92 -33
- siliconcompiler/remote/client.py +17 -6
- siliconcompiler/scheduler/docker.py +24 -25
- siliconcompiler/scheduler/scheduler.py +284 -121
- siliconcompiler/scheduler/schedulernode.py +196 -90
- siliconcompiler/scheduler/slurm.py +113 -29
- siliconcompiler/scheduler/taskscheduler.py +0 -7
- siliconcompiler/schema/__init__.py +3 -2
- siliconcompiler/schema/_metadata.py +1 -1
- siliconcompiler/schema/baseschema.py +205 -93
- siliconcompiler/schema/editableschema.py +29 -0
- siliconcompiler/schema/namedschema.py +21 -13
- siliconcompiler/schema/parametervalue.py +14 -2
- siliconcompiler/schema/safeschema.py +18 -7
- siliconcompiler/schema_support/dependencyschema.py +4 -3
- siliconcompiler/schema_support/option.py +82 -1
- siliconcompiler/schema_support/pathschema.py +14 -15
- siliconcompiler/schema_support/record.py +5 -4
- siliconcompiler/targets/asap7_demo.py +4 -1
- siliconcompiler/tool.py +56 -29
- siliconcompiler/tools/builtin/__init__.py +2 -0
- siliconcompiler/tools/builtin/filter.py +8 -1
- siliconcompiler/tools/builtin/importfiles.py +2 -0
- siliconcompiler/tools/klayout/__init__.py +3 -0
- siliconcompiler/tools/klayout/scripts/klayout_convert_drc_db.py +1 -0
- siliconcompiler/tools/klayout/scripts/klayout_export.py +1 -0
- siliconcompiler/tools/klayout/scripts/klayout_operations.py +1 -0
- siliconcompiler/tools/klayout/scripts/klayout_show.py +2 -1
- siliconcompiler/tools/klayout/scripts/klayout_utils.py +3 -4
- siliconcompiler/tools/klayout/show.py +17 -5
- siliconcompiler/tools/openroad/__init__.py +27 -1
- siliconcompiler/tools/openroad/_apr.py +81 -4
- siliconcompiler/tools/openroad/clock_tree_synthesis.py +1 -0
- siliconcompiler/tools/openroad/global_placement.py +1 -0
- siliconcompiler/tools/openroad/init_floorplan.py +116 -7
- siliconcompiler/tools/openroad/power_grid_analysis.py +174 -0
- siliconcompiler/tools/openroad/repair_design.py +1 -0
- siliconcompiler/tools/openroad/repair_timing.py +1 -0
- siliconcompiler/tools/openroad/scripts/apr/preamble.tcl +1 -1
- siliconcompiler/tools/openroad/scripts/apr/sc_init_floorplan.tcl +42 -4
- siliconcompiler/tools/openroad/scripts/apr/sc_irdrop.tcl +146 -0
- siliconcompiler/tools/openroad/scripts/apr/sc_repair_design.tcl +1 -1
- siliconcompiler/tools/openroad/scripts/apr/sc_write_data.tcl +4 -6
- siliconcompiler/tools/openroad/scripts/common/procs.tcl +1 -1
- siliconcompiler/tools/openroad/scripts/common/reports.tcl +1 -1
- siliconcompiler/tools/openroad/scripts/rcx/sc_rcx_bench.tcl +2 -4
- siliconcompiler/tools/opensta/__init__.py +1 -1
- siliconcompiler/tools/opensta/scripts/sc_timing.tcl +17 -12
- siliconcompiler/tools/vivado/scripts/sc_bitstream.tcl +11 -0
- siliconcompiler/tools/vivado/scripts/sc_place.tcl +11 -0
- siliconcompiler/tools/vivado/scripts/sc_route.tcl +11 -0
- siliconcompiler/tools/vivado/scripts/sc_syn_fpga.tcl +10 -0
- siliconcompiler/tools/vpr/__init__.py +28 -0
- siliconcompiler/tools/yosys/prepareLib.py +7 -2
- siliconcompiler/tools/yosys/scripts/sc_screenshot.tcl +1 -1
- siliconcompiler/tools/yosys/scripts/sc_synth_asic.tcl +40 -4
- siliconcompiler/tools/yosys/scripts/sc_synth_fpga.tcl +15 -5
- siliconcompiler/tools/yosys/syn_asic.py +62 -2
- siliconcompiler/tools/yosys/syn_fpga.py +8 -0
- siliconcompiler/toolscripts/_tools.json +6 -6
- siliconcompiler/utils/__init__.py +243 -51
- siliconcompiler/utils/curation.py +89 -56
- siliconcompiler/utils/issue.py +6 -1
- siliconcompiler/utils/multiprocessing.py +35 -2
- siliconcompiler/utils/paths.py +21 -0
- siliconcompiler/utils/settings.py +141 -0
- {siliconcompiler-0.35.2.dist-info → siliconcompiler-0.35.4.dist-info}/METADATA +5 -4
- {siliconcompiler-0.35.2.dist-info → siliconcompiler-0.35.4.dist-info}/RECORD +86 -83
- {siliconcompiler-0.35.2.dist-info → siliconcompiler-0.35.4.dist-info}/WHEEL +0 -0
- {siliconcompiler-0.35.2.dist-info → siliconcompiler-0.35.4.dist-info}/entry_points.txt +0 -0
- {siliconcompiler-0.35.2.dist-info → siliconcompiler-0.35.4.dist-info}/licenses/LICENSE +0 -0
- {siliconcompiler-0.35.2.dist-info → siliconcompiler-0.35.4.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import io
|
|
2
2
|
import logging
|
|
3
|
+
import multiprocessing
|
|
3
4
|
import os
|
|
4
5
|
import re
|
|
5
6
|
import shutil
|
|
@@ -9,7 +10,9 @@ import traceback
|
|
|
9
10
|
|
|
10
11
|
import os.path
|
|
11
12
|
|
|
12
|
-
from
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
|
|
15
|
+
from typing import Union, Dict, Optional, Tuple, List, Set, TYPE_CHECKING
|
|
13
16
|
|
|
14
17
|
from siliconcompiler import NodeStatus
|
|
15
18
|
from siliconcompiler.schema import Journal
|
|
@@ -18,7 +21,7 @@ from siliconcompiler.scheduler import SchedulerNode
|
|
|
18
21
|
from siliconcompiler.scheduler import SlurmSchedulerNode
|
|
19
22
|
from siliconcompiler.scheduler import DockerSchedulerNode
|
|
20
23
|
from siliconcompiler.scheduler import TaskScheduler
|
|
21
|
-
from siliconcompiler.scheduler.schedulernode import SchedulerFlowReset
|
|
24
|
+
from siliconcompiler.scheduler.schedulernode import SchedulerFlowReset, SchedulerNodeReset
|
|
22
25
|
from siliconcompiler.tool import TaskExecutableNotFound, TaskExecutableNotReceived
|
|
23
26
|
|
|
24
27
|
from siliconcompiler import utils
|
|
@@ -26,6 +29,7 @@ from siliconcompiler.utils.logging import SCLoggerFormatter
|
|
|
26
29
|
from siliconcompiler.utils.multiprocessing import MPManager
|
|
27
30
|
from siliconcompiler.scheduler import send_messages, SCRuntimeError
|
|
28
31
|
from siliconcompiler.utils.paths import collectiondir, jobdir, workdir
|
|
32
|
+
from siliconcompiler.utils.curation import collect
|
|
29
33
|
|
|
30
34
|
if TYPE_CHECKING:
|
|
31
35
|
from siliconcompiler.project import Project
|
|
@@ -58,7 +62,7 @@ class Scheduler:
|
|
|
58
62
|
SCRuntimeError: If the specified flow is not defined or fails validation.
|
|
59
63
|
"""
|
|
60
64
|
self.__project = project
|
|
61
|
-
self.__logger: logging.Logger = project.logger
|
|
65
|
+
self.__logger: logging.Logger = project.logger.getChild("scheduler")
|
|
62
66
|
self.__name = project.name
|
|
63
67
|
|
|
64
68
|
flow = self.__project.get("option", "flow")
|
|
@@ -97,13 +101,35 @@ class Scheduler:
|
|
|
97
101
|
self.__record: "RecordSchema" = self.__project.get("record", field="schema")
|
|
98
102
|
self.__metrics: "MetricSchema" = self.__project.get("metric", field="schema")
|
|
99
103
|
|
|
100
|
-
self.__tasks = {}
|
|
104
|
+
self.__tasks: Dict[Tuple[str, str], SchedulerNode] = {}
|
|
105
|
+
self.__skippedtasks: Set[Tuple[str, str]] = set()
|
|
101
106
|
|
|
102
107
|
# Create dummy handler
|
|
103
108
|
self.__joblog_handler = logging.NullHandler()
|
|
104
109
|
self.__org_job_name = self.__project.get("option", "jobname")
|
|
105
110
|
self.__logfile = None
|
|
106
111
|
|
|
112
|
+
# Create tasks
|
|
113
|
+
for step, index in self.__flow.get_nodes():
|
|
114
|
+
node_cls = SchedulerNode
|
|
115
|
+
|
|
116
|
+
node_scheduler = self.__project.get('option', 'scheduler', 'name',
|
|
117
|
+
step=step, index=index)
|
|
118
|
+
if node_scheduler == 'slurm':
|
|
119
|
+
node_cls = SlurmSchedulerNode
|
|
120
|
+
elif node_scheduler == 'docker':
|
|
121
|
+
node_cls = DockerSchedulerNode
|
|
122
|
+
self.__tasks[(step, index)] = node_cls(self.__project, step, index)
|
|
123
|
+
if self.__flow.get(step, index, "tool") == "builtin":
|
|
124
|
+
self.__tasks[(step, index)].set_builtin()
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def manifest(self) -> str:
|
|
128
|
+
"""
|
|
129
|
+
Returns the path to the job manifest
|
|
130
|
+
"""
|
|
131
|
+
return os.path.join(jobdir(self.__project), f"{self.__name}.pkg.json")
|
|
132
|
+
|
|
107
133
|
@property
|
|
108
134
|
def log(self) -> Union[None, str]:
|
|
109
135
|
"""
|
|
@@ -131,7 +157,7 @@ class Scheduler:
|
|
|
131
157
|
Args:
|
|
132
158
|
header (str): A header message to print before the status list.
|
|
133
159
|
"""
|
|
134
|
-
self.__logger.debug(f"#### {header}")
|
|
160
|
+
self.__logger.debug(f"#### {header} : {datetime.now().strftime('%H:%M:%S')}")
|
|
135
161
|
for step, index in self.__flow.get_nodes():
|
|
136
162
|
self.__logger.debug(f"({step}, {index}) -> "
|
|
137
163
|
f"{self.__record.get('status', step=step, index=index)}")
|
|
@@ -260,6 +286,9 @@ class Scheduler:
|
|
|
260
286
|
if not self.check_manifest():
|
|
261
287
|
raise SCRuntimeError("check_manifest() failed")
|
|
262
288
|
|
|
289
|
+
# Initialize schedulers
|
|
290
|
+
self.__init_schedulers()
|
|
291
|
+
|
|
263
292
|
self.__run_setup()
|
|
264
293
|
self.configure_nodes()
|
|
265
294
|
|
|
@@ -278,16 +307,23 @@ class Scheduler:
|
|
|
278
307
|
if not self.__check_flowgraph_io():
|
|
279
308
|
raise SCRuntimeError("Flowgraph file IO constrains errors")
|
|
280
309
|
|
|
281
|
-
|
|
310
|
+
# Collect files for remote runs
|
|
311
|
+
if self.__check_collect_files():
|
|
312
|
+
collect(self.project)
|
|
282
313
|
|
|
283
|
-
|
|
284
|
-
|
|
314
|
+
try:
|
|
315
|
+
self.run_core()
|
|
316
|
+
except SCRuntimeError as e:
|
|
317
|
+
raise e
|
|
285
318
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
319
|
+
finally:
|
|
320
|
+
# Store run in history
|
|
321
|
+
self.__project._record_history()
|
|
289
322
|
|
|
290
|
-
|
|
323
|
+
# Record final manifest
|
|
324
|
+
self.__project.write_manifest(self.manifest)
|
|
325
|
+
|
|
326
|
+
send_messages.send(self.__project, 'summary', None, None)
|
|
291
327
|
finally:
|
|
292
328
|
if self.__joblog_handler is not None:
|
|
293
329
|
self.__logger.removeHandler(self.__joblog_handler)
|
|
@@ -310,50 +346,10 @@ class Scheduler:
|
|
|
310
346
|
error = False
|
|
311
347
|
|
|
312
348
|
for (step, index) in self.__flow_runtime.get_nodes():
|
|
313
|
-
|
|
314
|
-
check_file_access = not self.__project.option.get_remote() and scheduler is None
|
|
315
|
-
|
|
316
|
-
node = SchedulerNode(self.__project, step, index)
|
|
317
|
-
requires = []
|
|
318
|
-
with node.runtime():
|
|
319
|
-
requires = node.task.get('require')
|
|
320
|
-
|
|
321
|
-
for item in sorted(set(requires)):
|
|
322
|
-
keypath = item.split(',')
|
|
323
|
-
if not self.__project.valid(*keypath):
|
|
324
|
-
self.__logger.error(f'Cannot resolve required keypath [{",".join(keypath)}] '
|
|
325
|
-
f'for {step}/{index}.')
|
|
326
|
-
error = True
|
|
327
|
-
continue
|
|
349
|
+
node = self.__tasks[(step, index)]
|
|
328
350
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
if param.get(field='pernode').is_never():
|
|
332
|
-
check_step, check_index = None, None
|
|
333
|
-
|
|
334
|
-
if not param.has_value(step=check_step, index=check_index):
|
|
335
|
-
self.__logger.error('No value set for required keypath '
|
|
336
|
-
f'[{",".join(keypath)}] for {step}/{index}.')
|
|
337
|
-
error = True
|
|
338
|
-
continue
|
|
339
|
-
|
|
340
|
-
paramtype = param.get(field='type')
|
|
341
|
-
if check_file_access and (('file' in paramtype) or ('dir' in paramtype)):
|
|
342
|
-
abspath = self.__project.find_files(*keypath,
|
|
343
|
-
missing_ok=True,
|
|
344
|
-
step=check_step, index=check_index)
|
|
345
|
-
|
|
346
|
-
unresolved_paths = param.get(step=check_step, index=check_index)
|
|
347
|
-
if not isinstance(abspath, list):
|
|
348
|
-
abspath = [abspath]
|
|
349
|
-
unresolved_paths = [unresolved_paths]
|
|
350
|
-
|
|
351
|
-
for path, setpath in zip(abspath, unresolved_paths):
|
|
352
|
-
if path is None:
|
|
353
|
-
self.__logger.error(f'Cannot resolve path {setpath} in '
|
|
354
|
-
f'required file keypath [{",".join(keypath)}] '
|
|
355
|
-
f'for {step}/{index}.')
|
|
356
|
-
error = True
|
|
351
|
+
error |= not node.check_required_values()
|
|
352
|
+
error |= not node.check_required_paths()
|
|
357
353
|
|
|
358
354
|
return not error
|
|
359
355
|
|
|
@@ -372,6 +368,8 @@ class Scheduler:
|
|
|
372
368
|
nodes = self.__flow_runtime.get_nodes()
|
|
373
369
|
error = False
|
|
374
370
|
|
|
371
|
+
manifest_name = os.path.basename(self.manifest)
|
|
372
|
+
|
|
375
373
|
for (step, index) in nodes:
|
|
376
374
|
# Get files we receive from input nodes.
|
|
377
375
|
in_nodes = self.__flow_runtime.get_node_inputs(step, index, record=self.__record)
|
|
@@ -396,21 +394,17 @@ class Scheduler:
|
|
|
396
394
|
inputs = []
|
|
397
395
|
continue
|
|
398
396
|
|
|
399
|
-
|
|
400
|
-
manifest = f'{design}.pkg.json'
|
|
401
|
-
inputs = [inp for inp in os.listdir(in_step_out_dir) if inp != manifest]
|
|
397
|
+
inputs = [inp for inp in os.listdir(in_step_out_dir) if inp != manifest_name]
|
|
402
398
|
else:
|
|
403
399
|
in_tool = self.__flow.get(in_step, in_index, "tool")
|
|
404
400
|
in_task = self.__flow.get(in_step, in_index, "task")
|
|
405
401
|
in_task_class = self.__project.get("tool", in_tool, "task", in_task,
|
|
406
402
|
field="schema")
|
|
407
403
|
|
|
408
|
-
with in_task_class.runtime(
|
|
409
|
-
in_step, in_index)) as task:
|
|
404
|
+
with in_task_class.runtime(self.__tasks[(in_step, in_index)]) as task:
|
|
410
405
|
inputs = task.get_output_files()
|
|
411
406
|
|
|
412
|
-
with task_class.runtime(
|
|
413
|
-
step, index)) as task:
|
|
407
|
+
with task_class.runtime(self.__tasks[(step, index)]) as task:
|
|
414
408
|
for inp in inputs:
|
|
415
409
|
node_inp = task.compute_input_file_node_name(inp, in_step, in_index)
|
|
416
410
|
if node_inp in requirements:
|
|
@@ -446,7 +440,7 @@ class Scheduler:
|
|
|
446
440
|
|
|
447
441
|
self.__record.set('status', NodeStatus.PENDING, step=step, index=index)
|
|
448
442
|
for next_step, next_index in self.__flow_runtime.get_nodes_starting_at(step, index):
|
|
449
|
-
if
|
|
443
|
+
if (next_step, next_index) in self.__skippedtasks:
|
|
450
444
|
continue
|
|
451
445
|
|
|
452
446
|
# Mark following steps as pending
|
|
@@ -466,18 +460,6 @@ class Scheduler:
|
|
|
466
460
|
copy_from_nodes = set(self.__flow_load_runtime.get_nodes()).difference(
|
|
467
461
|
self.__flow_runtime.get_entry_nodes())
|
|
468
462
|
for step, index in self.__flow.get_nodes():
|
|
469
|
-
node_cls = SchedulerNode
|
|
470
|
-
|
|
471
|
-
node_scheduler = self.__project.get('option', 'scheduler', 'name',
|
|
472
|
-
step=step, index=index)
|
|
473
|
-
if node_scheduler == 'slurm':
|
|
474
|
-
node_cls = SlurmSchedulerNode
|
|
475
|
-
elif node_scheduler == 'docker':
|
|
476
|
-
node_cls = DockerSchedulerNode
|
|
477
|
-
self.__tasks[(step, index)] = node_cls(self.__project, step, index)
|
|
478
|
-
if self.__flow.get(step, index, "tool") == "builtin":
|
|
479
|
-
self.__tasks[(step, index)].set_builtin()
|
|
480
|
-
|
|
481
463
|
if self.__org_job_name and (step, index) in copy_from_nodes:
|
|
482
464
|
self.__tasks[(step, index)].copy_from(self.__org_job_name)
|
|
483
465
|
|
|
@@ -581,33 +563,28 @@ class Scheduler:
|
|
|
581
563
|
if NodeStatus.is_waiting(self.__record.get('status', step=step, index=index)):
|
|
582
564
|
with self.__tasks[(step, index)].runtime():
|
|
583
565
|
self.__tasks[(step, index)].clean_directory()
|
|
566
|
+
parent_dir = os.path.dirname(self.__tasks[(step, index)].workdir)
|
|
567
|
+
if os.path.exists(parent_dir) and len(os.listdir(parent_dir)) == 0:
|
|
568
|
+
# Step directory is empty so safe to remove
|
|
569
|
+
os.rmdir(parent_dir)
|
|
584
570
|
|
|
585
|
-
def
|
|
586
|
-
"""
|
|
587
|
-
Prepare and configure all flow nodes before execution, including loading prior run state,
|
|
588
|
-
running per-node setup, and marking nodes that require rerun.
|
|
571
|
+
def __configure_collect_previous_information(self) -> Dict[Tuple[str, str], "Project"]:
|
|
572
|
+
"""Collects information from previous runs for nodes that won't be re-executed.
|
|
589
573
|
|
|
590
|
-
This method
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
as pending.
|
|
599
|
-
- Persists the resulting manifest for the current job before returning.
|
|
574
|
+
This method identifies nodes that are marked for loading (not cleaning) and
|
|
575
|
+
are not part of the current 'from' execution path. For each of these
|
|
576
|
+
nodes, it attempts to load its manifest from a previous run.
|
|
577
|
+
|
|
578
|
+
Returns:
|
|
579
|
+
Dict[Tuple[str, str], "Project"]: A dictionary mapping (step, index)
|
|
580
|
+
tuples to their corresponding loaded Project objects from
|
|
581
|
+
previous runs.
|
|
600
582
|
"""
|
|
601
583
|
from siliconcompiler import Project
|
|
584
|
+
self.__print_status("Start - collect")
|
|
602
585
|
|
|
603
|
-
from_nodes = []
|
|
604
586
|
extra_setup_nodes = {}
|
|
605
|
-
|
|
606
|
-
journal = Journal.access(self.__project)
|
|
607
|
-
journal.start()
|
|
608
|
-
|
|
609
|
-
self.__print_status("Start")
|
|
610
|
-
|
|
587
|
+
from_nodes = []
|
|
611
588
|
if self.__project.get('option', 'clean'):
|
|
612
589
|
if self.__project.get("option", "from"):
|
|
613
590
|
from_nodes = self.__flow_runtime.get_entry_nodes()
|
|
@@ -626,25 +603,49 @@ class Scheduler:
|
|
|
626
603
|
# Node will be run so no need to load
|
|
627
604
|
continue
|
|
628
605
|
|
|
629
|
-
manifest =
|
|
630
|
-
'outputs',
|
|
631
|
-
f'{self.__name}.pkg.json')
|
|
606
|
+
manifest = self.__tasks[(step, index)].get_manifest()
|
|
632
607
|
if os.path.exists(manifest):
|
|
633
608
|
# ensure we setup these nodes again
|
|
634
609
|
try:
|
|
635
610
|
extra_setup_nodes[(step, index)] = Project.from_manifest(filepath=manifest)
|
|
636
|
-
except Exception:
|
|
611
|
+
except Exception as e:
|
|
612
|
+
self.__logger.debug(f"Reading {manifest} caused: {e}")
|
|
637
613
|
pass
|
|
638
614
|
|
|
615
|
+
self.__print_status("End - collect")
|
|
616
|
+
|
|
617
|
+
return extra_setup_nodes
|
|
618
|
+
|
|
619
|
+
def __configure_run_setup(self, extra_setup_nodes: Dict[Tuple[str, str], "Project"]) -> None:
|
|
620
|
+
"""Runs the setup() method for all flow nodes and forwards previous status.
|
|
621
|
+
|
|
622
|
+
This method iterates through all nodes in execution order and calls
|
|
623
|
+
their respective `setup()` methods.
|
|
624
|
+
|
|
625
|
+
It also uses the `extra_setup_nodes` to:
|
|
626
|
+
1. Prune nodes from `extra_setup_nodes` if their `setup()` method
|
|
627
|
+
returns False (indicating the node is no longer valid).
|
|
628
|
+
2. Forward the 'status' from a valid, previously-run node (found in
|
|
629
|
+
`extra_setup_nodes`) into the current job's records.
|
|
630
|
+
|
|
631
|
+
Args:
|
|
632
|
+
extra_setup_nodes (Dict[Tuple[str, str], "Project"]): A dictionary
|
|
633
|
+
of loaded Project objects from previous runs. This dictionary
|
|
634
|
+
may be modified in-place (nodes may be removed).
|
|
635
|
+
"""
|
|
636
|
+
self.__print_status("Start - setup")
|
|
639
637
|
# Setup tools for all nodes to run
|
|
640
638
|
for layer_nodes in self.__flow.get_execution_order():
|
|
641
639
|
for step, index in layer_nodes:
|
|
642
640
|
with self.__tasks[(step, index)].runtime():
|
|
643
641
|
node_kept = self.__tasks[(step, index)].setup()
|
|
642
|
+
if not node_kept:
|
|
643
|
+
self.__skippedtasks.add((step, index))
|
|
644
644
|
if not node_kept and (step, index) in extra_setup_nodes:
|
|
645
645
|
# remove from previous node data
|
|
646
646
|
del extra_setup_nodes[(step, index)]
|
|
647
647
|
|
|
648
|
+
# Copy in old status information, this will be overwritten if needed
|
|
648
649
|
if (step, index) in extra_setup_nodes:
|
|
649
650
|
schema = extra_setup_nodes[(step, index)]
|
|
650
651
|
node_status = None
|
|
@@ -655,28 +656,155 @@ class Scheduler:
|
|
|
655
656
|
if node_status:
|
|
656
657
|
# Forward old status
|
|
657
658
|
self.__record.set('status', node_status, step=step, index=index)
|
|
659
|
+
self.__print_status("End - setup")
|
|
660
|
+
|
|
661
|
+
@staticmethod
|
|
662
|
+
def _configure_run_required(task: SchedulerNode) \
|
|
663
|
+
-> Optional[Union[SchedulerFlowReset, SchedulerNodeReset]]:
|
|
664
|
+
"""
|
|
665
|
+
Helper method to run requires_run() with threads.
|
|
666
|
+
"""
|
|
667
|
+
with task.runtime():
|
|
668
|
+
try:
|
|
669
|
+
task.requires_run()
|
|
670
|
+
except (SchedulerFlowReset, SchedulerNodeReset) as e:
|
|
671
|
+
return e
|
|
672
|
+
return None
|
|
673
|
+
|
|
674
|
+
def __configure_check_run_required(self) -> List[Tuple[str, str]]:
|
|
675
|
+
"""Checks which nodes require a re-run and which can be replayed.
|
|
676
|
+
|
|
677
|
+
This method iterates through all nodes that are currently marked as
|
|
678
|
+
'SUCCESS' (typically from a previous run). It calls `requires_run()`
|
|
679
|
+
on each to determine if inputs, parameters, or other dependencies
|
|
680
|
+
have changed.
|
|
681
|
+
|
|
682
|
+
- If `requires_run()` is True, the node is marked as 'pending' (and
|
|
683
|
+
will be re-executed).
|
|
684
|
+
- If `requires_run()` is False, the node is added to the 'replay' list,
|
|
685
|
+
indicating its previous results can be reused.
|
|
686
|
+
|
|
687
|
+
Returns:
|
|
688
|
+
List[Tuple[str, str]]: A list of (step, index) tuples for nodes
|
|
689
|
+
that do *not* require a re-run and whose results can be
|
|
690
|
+
replayed from the journal.
|
|
691
|
+
"""
|
|
692
|
+
self.__print_status("Start - check")
|
|
693
|
+
|
|
694
|
+
replay: List[Tuple[str, str]] = []
|
|
658
695
|
|
|
659
|
-
|
|
696
|
+
nodes: List[Tuple[str, str]] = []
|
|
697
|
+
|
|
698
|
+
def filter_nodes(nodes: List[Tuple[str, str]]) -> None:
|
|
699
|
+
for step, index in tuple(nodes):
|
|
700
|
+
# Only look at successful nodes
|
|
701
|
+
if self.__record.get("status", step=step, index=index) != NodeStatus.SUCCESS:
|
|
702
|
+
nodes.remove((step, index))
|
|
703
|
+
|
|
704
|
+
def create_node_group(nodes: List[Tuple[str, str]], size: int) -> List[Tuple[str, str]]:
|
|
705
|
+
group = []
|
|
706
|
+
for _ in range(size):
|
|
707
|
+
if nodes:
|
|
708
|
+
group.append(nodes.pop(0))
|
|
709
|
+
return group
|
|
710
|
+
|
|
711
|
+
# Collect initial list of nodes to process
|
|
712
|
+
for layer_nodes in self.__flow.get_execution_order():
|
|
713
|
+
nodes.extend(layer_nodes)
|
|
714
|
+
|
|
715
|
+
# Determine pool size
|
|
716
|
+
cores = utils.get_cores()
|
|
717
|
+
pool_size = self.project.option.scheduler.get_maxthreads() or cores
|
|
718
|
+
pool_size = max(1, min(cores, pool_size))
|
|
719
|
+
|
|
720
|
+
# Limit based on number of nodes if less than number of cores
|
|
721
|
+
filter_nodes(nodes)
|
|
722
|
+
if not nodes:
|
|
723
|
+
# No nodes left so just return
|
|
724
|
+
return []
|
|
725
|
+
|
|
726
|
+
pool_size = min(pool_size, len(nodes))
|
|
727
|
+
|
|
728
|
+
self.__logger.debug(f"Check pool size: {pool_size}")
|
|
729
|
+
|
|
730
|
+
# Call this in case this was invoked without __main__
|
|
731
|
+
multiprocessing.freeze_support()
|
|
732
|
+
|
|
733
|
+
with multiprocessing.get_context("spawn").Pool(pool_size) as pool:
|
|
734
|
+
while True:
|
|
735
|
+
# Filter nodes
|
|
736
|
+
filter_nodes(nodes)
|
|
737
|
+
|
|
738
|
+
# Generate a group of nodes to run
|
|
739
|
+
group = create_node_group(nodes, pool_size)
|
|
740
|
+
self.__logger.debug(f"Group to check: {group}")
|
|
741
|
+
if not group:
|
|
742
|
+
# Group is empty
|
|
743
|
+
break
|
|
744
|
+
|
|
745
|
+
tasks = [self.__tasks[(step, index)] for step, index in group]
|
|
746
|
+
# Suppress excess info messages during checks
|
|
747
|
+
cur_level = self.project.logger.level
|
|
748
|
+
self.project.logger.setLevel(logging.WARNING)
|
|
749
|
+
try:
|
|
750
|
+
runcheck = pool.map(Scheduler._configure_run_required, tasks)
|
|
751
|
+
finally:
|
|
752
|
+
self.project.logger.setLevel(cur_level)
|
|
753
|
+
|
|
754
|
+
for node, runrequired in zip(group, runcheck):
|
|
755
|
+
if self.__record.get("status", step=node[0], index=node[1]) != \
|
|
756
|
+
NodeStatus.SUCCESS:
|
|
757
|
+
continue
|
|
758
|
+
|
|
759
|
+
self.__logger.debug(f" Result: {node} -> {runrequired}")
|
|
760
|
+
|
|
761
|
+
if runrequired is not None:
|
|
762
|
+
runrequired.log(self.__logger)
|
|
763
|
+
|
|
764
|
+
if isinstance(runrequired, SchedulerFlowReset):
|
|
765
|
+
raise runrequired from None
|
|
766
|
+
|
|
767
|
+
# This node must be run
|
|
768
|
+
self.__mark_pending(*node)
|
|
769
|
+
else:
|
|
770
|
+
# import old information
|
|
771
|
+
replay.append(node)
|
|
772
|
+
|
|
773
|
+
self.__print_status("End - check")
|
|
774
|
+
|
|
775
|
+
return replay
|
|
776
|
+
|
|
777
|
+
def configure_nodes(self) -> None:
|
|
778
|
+
"""
|
|
779
|
+
Prepare and configure all flow nodes before execution, including loading prior run state,
|
|
780
|
+
running per-node setup, and marking nodes that require rerun.
|
|
781
|
+
|
|
782
|
+
This method:
|
|
783
|
+
- Loads available node manifests from previous jobs and uses them to populate setup data
|
|
784
|
+
where appropriate.
|
|
785
|
+
- Runs each node's setup routine to initialize tools and runtime state.
|
|
786
|
+
- For nodes whose parameters or inputs have changed, marks them and all downstream nodes
|
|
787
|
+
as pending so they will be re-executed.
|
|
788
|
+
- Replays preserved journaled results for nodes that remain valid to reuse previous outputs.
|
|
789
|
+
- On a SchedulerFlowReset, forces a full build-directory recheck and marks every node
|
|
790
|
+
as pending.
|
|
791
|
+
- Persists the resulting manifest for the current job before returning.
|
|
792
|
+
"""
|
|
793
|
+
journal = Journal.access(self.__project)
|
|
794
|
+
journal.start()
|
|
795
|
+
|
|
796
|
+
extra_setup_nodes = self.__configure_collect_previous_information()
|
|
797
|
+
|
|
798
|
+
self.__configure_run_setup(extra_setup_nodes)
|
|
660
799
|
|
|
661
800
|
# Check for modified information
|
|
662
801
|
try:
|
|
663
|
-
replay =
|
|
664
|
-
for layer_nodes in self.__flow.get_execution_order():
|
|
665
|
-
for step, index in layer_nodes:
|
|
666
|
-
# Only look at successful nodes
|
|
667
|
-
if self.__record.get("status", step=step, index=index) != NodeStatus.SUCCESS:
|
|
668
|
-
continue
|
|
802
|
+
replay = self.__configure_check_run_required()
|
|
669
803
|
|
|
670
|
-
with self.__tasks[(step, index)].runtime():
|
|
671
|
-
if self.__tasks[(step, index)].requires_run():
|
|
672
|
-
# This node must be run
|
|
673
|
-
self.__mark_pending(step, index)
|
|
674
|
-
elif (step, index) in extra_setup_nodes:
|
|
675
|
-
# import old information
|
|
676
|
-
replay.append((step, index))
|
|
677
804
|
# Replay previous information
|
|
678
805
|
for step, index in replay:
|
|
679
|
-
|
|
806
|
+
if (step, index) in extra_setup_nodes:
|
|
807
|
+
Journal.access(extra_setup_nodes[(step, index)]).replay(self.__project)
|
|
680
808
|
except SchedulerFlowReset:
|
|
681
809
|
# Mark all nodes as pending
|
|
682
810
|
self.__clean_build_dir_full(recheck=True)
|
|
@@ -684,7 +812,7 @@ class Scheduler:
|
|
|
684
812
|
for step, index in self.__flow.get_nodes():
|
|
685
813
|
self.__mark_pending(step, index)
|
|
686
814
|
|
|
687
|
-
self.__print_status("
|
|
815
|
+
self.__print_status("Before ensure")
|
|
688
816
|
|
|
689
817
|
# Ensure all nodes are marked as pending if needed
|
|
690
818
|
for layer_nodes in self.__flow_runtime.get_execution_order():
|
|
@@ -693,11 +821,12 @@ class Scheduler:
|
|
|
693
821
|
if NodeStatus.is_waiting(status) or NodeStatus.is_error(status):
|
|
694
822
|
self.__mark_pending(step, index)
|
|
695
823
|
|
|
696
|
-
self.__print_status("
|
|
824
|
+
self.__print_status("FINAL")
|
|
825
|
+
|
|
826
|
+
# Write configured manifest
|
|
827
|
+
os.makedirs(os.path.dirname(self.manifest), exist_ok=True)
|
|
828
|
+
self.__project.write_manifest(self.manifest)
|
|
697
829
|
|
|
698
|
-
os.makedirs(jobdir(self.__project), exist_ok=True)
|
|
699
|
-
self.__project.write_manifest(os.path.join(jobdir(self.__project),
|
|
700
|
-
f"{self.__name}.pkg.json"))
|
|
701
830
|
journal.stop()
|
|
702
831
|
|
|
703
832
|
def __check_display(self) -> None:
|
|
@@ -744,6 +873,8 @@ class Scheduler:
|
|
|
744
873
|
if m:
|
|
745
874
|
jobid = max(jobid, int(m.group(1)))
|
|
746
875
|
self.__project.set('option', 'jobname', f'{stem}{jobid + 1}')
|
|
876
|
+
for task in self.__tasks.values():
|
|
877
|
+
task._update_job()
|
|
747
878
|
return True
|
|
748
879
|
return False
|
|
749
880
|
|
|
@@ -788,7 +919,7 @@ class Scheduler:
|
|
|
788
919
|
if self.__project.option.scheduler.get_name(step=step, index=index) is not None:
|
|
789
920
|
continue
|
|
790
921
|
|
|
791
|
-
node =
|
|
922
|
+
node = self.__tasks[(step, index)]
|
|
792
923
|
with node.runtime():
|
|
793
924
|
try:
|
|
794
925
|
exe = node.get_exe_path()
|
|
@@ -816,3 +947,35 @@ class Scheduler:
|
|
|
816
947
|
os.chdir(cwd)
|
|
817
948
|
|
|
818
949
|
return not error
|
|
950
|
+
|
|
951
|
+
def __check_collect_files(self) -> bool:
|
|
952
|
+
"""
|
|
953
|
+
Iterates through all tasks in the scheduler, and checks if the there
|
|
954
|
+
are files or directories that need to be collected
|
|
955
|
+
|
|
956
|
+
Returns:
|
|
957
|
+
bool: True if there is something to be collected, False otherwise.
|
|
958
|
+
"""
|
|
959
|
+
do_collect = False
|
|
960
|
+
for task in self.__tasks.values():
|
|
961
|
+
if task.mark_copy():
|
|
962
|
+
do_collect = True
|
|
963
|
+
|
|
964
|
+
return do_collect
|
|
965
|
+
|
|
966
|
+
def __init_schedulers(self) -> None:
|
|
967
|
+
"""
|
|
968
|
+
Collect and invoke unique initialization callbacks from all task schedulers.
|
|
969
|
+
|
|
970
|
+
This method gathers init functions from all SchedulerNode instances, deduplicates them
|
|
971
|
+
(since multiple tasks may share the same scheduler class), and invokes each once to
|
|
972
|
+
perform early validation (e.g., checking Docker/Slurm availability).
|
|
973
|
+
"""
|
|
974
|
+
self.__logger.debug("Collecting unique scheduler initialization callbacks")
|
|
975
|
+
init_funcs = set()
|
|
976
|
+
for step, index in self.__flow_runtime.get_nodes():
|
|
977
|
+
init_funcs.add(self.__tasks[(step, index)].init)
|
|
978
|
+
|
|
979
|
+
for init in sorted(init_funcs, key=lambda func: func.__qualname__):
|
|
980
|
+
self.__logger.debug(f"Initializing scheduler: {init.__qualname__}")
|
|
981
|
+
init(self.__project)
|