siliconcompiler 0.33.1__py3-none-any.whl → 0.33.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- siliconcompiler/_metadata.py +1 -1
- siliconcompiler/apps/utils/replay.py +5 -5
- siliconcompiler/core.py +5 -1
- siliconcompiler/data/templates/replay/replay.sh.j2 +18 -1
- siliconcompiler/metric.py +19 -0
- siliconcompiler/package/git.py +1 -1
- siliconcompiler/record.py +57 -5
- siliconcompiler/remote/client.py +47 -11
- siliconcompiler/remote/server.py +109 -64
- siliconcompiler/report/dashboard/cli/board.py +0 -1
- siliconcompiler/scheduler/__init__.py +12 -5
- siliconcompiler/scheduler/run_node.py +12 -5
- siliconcompiler/schema/baseschema.py +25 -4
- siliconcompiler/schema/journalingschema.py +4 -0
- siliconcompiler/schema/schema_cfg.py +1 -1
- siliconcompiler/tool.py +79 -18
- siliconcompiler/tools/_common/__init__.py +14 -11
- siliconcompiler/tools/slang/__init__.py +3 -2
- siliconcompiler/tools/yosys/sc_synth_asic.tcl +0 -4
- siliconcompiler/toolscripts/_tools.json +12 -7
- siliconcompiler/toolscripts/ubuntu22/install-klayout.sh +4 -0
- siliconcompiler/toolscripts/ubuntu24/install-klayout.sh +4 -0
- {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.33.2.dist-info}/METADATA +5 -4
- {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.33.2.dist-info}/RECORD +28 -28
- {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.33.2.dist-info}/WHEEL +1 -1
- {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.33.2.dist-info}/entry_points.txt +0 -0
- {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.33.2.dist-info}/licenses/LICENSE +0 -0
- {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.33.2.dist-info}/top_level.txt +0 -0
siliconcompiler/_metadata.py
CHANGED
|
@@ -18,6 +18,7 @@ import siliconcompiler
|
|
|
18
18
|
from siliconcompiler.apps._common import UNSET_DESIGN
|
|
19
19
|
from siliconcompiler import SiliconCompilerError
|
|
20
20
|
from siliconcompiler import utils
|
|
21
|
+
from siliconcompiler.record import RecordTime
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
def make_bytes(data):
|
|
@@ -127,11 +128,10 @@ def main():
|
|
|
127
128
|
path = os.path.abspath(args['file'])
|
|
128
129
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
129
130
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
starttime = min(starttimes).strftime('%Y-%m-%d %H:%M:%S')
|
|
131
|
+
record_schema = chip.schema.get('history', jobname, 'record', field="schema")
|
|
132
|
+
starttime = datetime.fromtimestamp(
|
|
133
|
+
record_schema.get_earliest_time(RecordTime.START)).strftime(
|
|
134
|
+
'%Y-%m-%d %H:%M:%S')
|
|
135
135
|
|
|
136
136
|
with io.StringIO() as fd:
|
|
137
137
|
fd.write(utils.get_file_template('replay/requirements.txt').render(
|
siliconcompiler/core.py
CHANGED
|
@@ -1729,7 +1729,11 @@ class Chip:
|
|
|
1729
1729
|
error = True
|
|
1730
1730
|
self.logger.error(f'No executable or run() function specified for {tool}/{task}')
|
|
1731
1731
|
|
|
1732
|
-
|
|
1732
|
+
runtime_full = RuntimeFlowgraph(
|
|
1733
|
+
self.schema.get("flowgraph", flow, field='schema'),
|
|
1734
|
+
to_steps=self.get('option', 'to'),
|
|
1735
|
+
prune_nodes=self.get('option', 'prune'))
|
|
1736
|
+
if not error and not _check_flowgraph_io(self, nodes=runtime_full.get_nodes()):
|
|
1733
1737
|
error = True
|
|
1734
1738
|
|
|
1735
1739
|
return not error
|
|
@@ -9,6 +9,7 @@ CD_WORK="{{ work_dir }}"
|
|
|
9
9
|
PRINT=""
|
|
10
10
|
CMDPREFIX=""
|
|
11
11
|
SKIPEXPORT=0
|
|
12
|
+
DONODE={{ node_only }}
|
|
12
13
|
while [[ $# -gt 0 ]]; do
|
|
13
14
|
case $1 in
|
|
14
15
|
--which)
|
|
@@ -40,6 +41,11 @@ while [[ $# -gt 0 ]]; do
|
|
|
40
41
|
shift
|
|
41
42
|
shift
|
|
42
43
|
;;
|
|
44
|
+
--node)
|
|
45
|
+
DONODE=1
|
|
46
|
+
shift
|
|
47
|
+
shift
|
|
48
|
+
;;
|
|
43
49
|
-h|--help)
|
|
44
50
|
echo "Usage: $0"
|
|
45
51
|
echo " Options:"
|
|
@@ -49,7 +55,8 @@ while [[ $# -gt 0 ]]; do
|
|
|
49
55
|
echo " --command print the execution command"
|
|
50
56
|
echo " --skipcd do not change directory into replay directory"
|
|
51
57
|
echo " --skipexports do not export environmental variables"
|
|
52
|
-
echo " --cmdprefix <cmd> prefix to add to the replay command, such as
|
|
58
|
+
echo " --cmdprefix <cmd> prefix to add to the replay command, such as dgb"
|
|
59
|
+
echo " --node execute entire node"
|
|
53
60
|
echo " -h,--help print this help"
|
|
54
61
|
exit 0
|
|
55
62
|
;;
|
|
@@ -87,6 +94,16 @@ case $PRINT in
|
|
|
87
94
|
;;
|
|
88
95
|
esac
|
|
89
96
|
|
|
97
|
+
if [ $DONODE == 1 ]; then
|
|
98
|
+
python3 -m siliconcompiler.scheduler.run_node \
|
|
99
|
+
-cfg "{{ cfg_file }}" \
|
|
100
|
+
-builddir "${PWD}/../../../../" \
|
|
101
|
+
-step "{{ step }}" \
|
|
102
|
+
-index "{{ index }}" \
|
|
103
|
+
-cwd "$PWD" \
|
|
104
|
+
-replay
|
|
105
|
+
{% if cmds|length > 0 %}else
|
|
90
106
|
# Command execution
|
|
91
107
|
$CMDPREFIX \{% for cmd in cmds %}
|
|
92
108
|
{% if not loop.first %} {% endif %}{{ cmd }}{% if not loop.last %} \{% endif %}{% endfor %}
|
|
109
|
+
{% endif %}fi
|
siliconcompiler/metric.py
CHANGED
|
@@ -44,6 +44,25 @@ class MetricSchema(BaseSchema):
|
|
|
44
44
|
|
|
45
45
|
return self.set(metric, value, step=step, index=str(index))
|
|
46
46
|
|
|
47
|
+
def record_tasktime(self, step, index, record):
|
|
48
|
+
"""
|
|
49
|
+
Record the task time for this node
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
step (str): step to record
|
|
53
|
+
index (str/int): index to record
|
|
54
|
+
record (:class:`RecordSchema`): record to lookup data in
|
|
55
|
+
"""
|
|
56
|
+
start_time, end_time = [
|
|
57
|
+
record.get_recorded_time(step, index, RecordTime.START),
|
|
58
|
+
record.get_recorded_time(step, index, RecordTime.END)
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
if start_time is None or end_time is None:
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
return self.record(step, index, "tasktime", end_time-start_time, unit="s")
|
|
65
|
+
|
|
47
66
|
def record_totaltime(self, step, index, flow, record):
|
|
48
67
|
"""
|
|
49
68
|
Record the total time for this node
|
siliconcompiler/package/git.py
CHANGED
siliconcompiler/record.py
CHANGED
|
@@ -30,13 +30,25 @@ class RecordTool(Enum):
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class RecordSchema(BaseSchema):
|
|
33
|
-
__TIMEFORMAT = "%Y-%m-%d %H:%M:%S"
|
|
33
|
+
__TIMEFORMAT = "%Y-%m-%d %H:%M:%S.%f"
|
|
34
34
|
|
|
35
35
|
def __init__(self):
|
|
36
36
|
super().__init__()
|
|
37
37
|
|
|
38
38
|
schema_record(self)
|
|
39
39
|
|
|
40
|
+
def _from_dict(self, manifest, keypath, version=None):
|
|
41
|
+
ret = super()._from_dict(manifest, keypath, version)
|
|
42
|
+
|
|
43
|
+
# Correct for change specification
|
|
44
|
+
if version and version < (0, 50, 4):
|
|
45
|
+
for timekey in RecordTime:
|
|
46
|
+
start_param = self.get(timekey.value, field=None)
|
|
47
|
+
for value, step, index in start_param.getvalues():
|
|
48
|
+
start_param.set(f"{value}.000000", step=step, index=index)
|
|
49
|
+
|
|
50
|
+
return ret
|
|
51
|
+
|
|
40
52
|
def clear(self, step, index, keep=None):
|
|
41
53
|
'''
|
|
42
54
|
Clear all saved metrics for a given step and index
|
|
@@ -275,6 +287,44 @@ class RecordSchema(BaseSchema):
|
|
|
275
287
|
record_time+"+0000",
|
|
276
288
|
RecordSchema.__TIMEFORMAT+"%z").timestamp()
|
|
277
289
|
|
|
290
|
+
def get_earliest_time(self, type):
|
|
291
|
+
'''
|
|
292
|
+
Returns the earliest recorded time.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
type (:class:`RecordTime`): type of time to record
|
|
296
|
+
'''
|
|
297
|
+
type = RecordTime(type)
|
|
298
|
+
record_param = self.get(type.value, field=None)
|
|
299
|
+
|
|
300
|
+
times = set()
|
|
301
|
+
for _, step, index in record_param.getvalues():
|
|
302
|
+
times.add(self.get_recorded_time(step, index, type))
|
|
303
|
+
|
|
304
|
+
if not times:
|
|
305
|
+
return None
|
|
306
|
+
|
|
307
|
+
return min(times)
|
|
308
|
+
|
|
309
|
+
def get_latest_time(self, type):
|
|
310
|
+
'''
|
|
311
|
+
Returns the last recorded time.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
type (:class:`RecordTime`): type of time to record
|
|
315
|
+
'''
|
|
316
|
+
type = RecordTime(type)
|
|
317
|
+
record_param = self.get(type.value, field=None)
|
|
318
|
+
|
|
319
|
+
times = set()
|
|
320
|
+
for _, step, index in record_param.getvalues():
|
|
321
|
+
times.add(self.get_recorded_time(step, index, type))
|
|
322
|
+
|
|
323
|
+
if not times:
|
|
324
|
+
return None
|
|
325
|
+
|
|
326
|
+
return max(times)
|
|
327
|
+
|
|
278
328
|
def record_tool(self, step, index, info, type):
|
|
279
329
|
'''
|
|
280
330
|
Record information about the tool used during this record.
|
|
@@ -320,11 +370,13 @@ def schema_record(schema):
|
|
|
320
370
|
'x86_64',
|
|
321
371
|
'(x86_64, rv64imafdc)'],
|
|
322
372
|
'starttime': ['start time',
|
|
323
|
-
'\"2021-09-06 12:20:20\"',
|
|
324
|
-
'Time is
|
|
373
|
+
'\"2021-09-06 12:20:20.000000\"',
|
|
374
|
+
'Time is recorded with the format YYYY-MM-DD HR:MIN:SEC.MICROSEC for '
|
|
375
|
+
'UTC'],
|
|
325
376
|
'endtime': ['end time',
|
|
326
|
-
'\"2021-09-06 12:20:20\"',
|
|
327
|
-
'Time is
|
|
377
|
+
'\"2021-09-06 12:20:20.000000\"',
|
|
378
|
+
'Time is recorded with the format YYYY-MM-DD HR:MIN:SEC.MICROSEC for '
|
|
379
|
+
'UTC'],
|
|
328
380
|
'region': ['cloud region',
|
|
329
381
|
'\"US Gov Boston\"',
|
|
330
382
|
"""Recommended naming methodology:
|
siliconcompiler/remote/client.py
CHANGED
|
@@ -10,10 +10,12 @@ import tarfile
|
|
|
10
10
|
import tempfile
|
|
11
11
|
import multiprocessing
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
import os.path
|
|
14
|
+
|
|
15
|
+
from siliconcompiler import utils, SiliconCompilerError
|
|
14
16
|
from siliconcompiler import NodeStatus as SCNodeStatus
|
|
15
17
|
from siliconcompiler._metadata import default_server
|
|
16
|
-
from siliconcompiler.remote import JobStatus
|
|
18
|
+
from siliconcompiler.remote import JobStatus, NodeStatus
|
|
17
19
|
from siliconcompiler.report.dashboard import DashboardType
|
|
18
20
|
from siliconcompiler.flowgraph import RuntimeFlowgraph
|
|
19
21
|
from siliconcompiler.schema import JournalingSchema
|
|
@@ -310,19 +312,29 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
|
|
|
310
312
|
try:
|
|
311
313
|
# Decode response JSON, if possible.
|
|
312
314
|
job_info = json.loads(info['message'])
|
|
313
|
-
|
|
314
|
-
|
|
315
|
+
if "null" in job_info:
|
|
316
|
+
job_info[None] = job_info["null"]
|
|
317
|
+
del job_info["null"]
|
|
318
|
+
except json.JSONDecodeError:
|
|
319
|
+
self.__logger.warning(f"Job is still running: {info['message']}")
|
|
315
320
|
return completed, starttimes, True
|
|
316
321
|
|
|
317
322
|
nodes_to_log = {}
|
|
318
323
|
for node, node_info in job_info.items():
|
|
319
324
|
status = node_info['status']
|
|
320
|
-
|
|
325
|
+
|
|
326
|
+
if status == NodeStatus.UPLOADED:
|
|
327
|
+
status = SCNodeStatus.PENDING
|
|
321
328
|
|
|
322
329
|
if SCNodeStatus.is_done(status):
|
|
323
330
|
# collect completed
|
|
324
331
|
completed.append(node)
|
|
325
332
|
|
|
333
|
+
if not node:
|
|
334
|
+
continue
|
|
335
|
+
|
|
336
|
+
nodes_to_log.setdefault(status, []).append((node, node_info))
|
|
337
|
+
|
|
326
338
|
if self.__node_information and node in self.__node_information:
|
|
327
339
|
self.__chip.set('record', 'status', status,
|
|
328
340
|
step=self.__node_information[node]["step"],
|
|
@@ -580,6 +592,22 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
|
|
|
580
592
|
raise SiliconCompilerError('Job canceled by user keyboard interrupt')
|
|
581
593
|
|
|
582
594
|
def __import_run_manifests(self, starttimes):
|
|
595
|
+
if not self.__setup_information_loaded:
|
|
596
|
+
if self.__setup_information_fetched:
|
|
597
|
+
manifest = os.path.join(self.__chip.getworkdir(), f'{self.__chip.design}.pkg.json')
|
|
598
|
+
if os.path.exists(manifest):
|
|
599
|
+
try:
|
|
600
|
+
JournalingSchema(self.__chip.schema).read_journal(manifest)
|
|
601
|
+
self.__setup_information_loaded = True
|
|
602
|
+
changed = True
|
|
603
|
+
except: # noqa E722
|
|
604
|
+
# Import may fail if file is still getting written
|
|
605
|
+
pass
|
|
606
|
+
|
|
607
|
+
if not self.__setup_information_loaded:
|
|
608
|
+
# Dont do anything until this has been loaded
|
|
609
|
+
return
|
|
610
|
+
|
|
583
611
|
changed = False
|
|
584
612
|
for _, node_info in self.__node_information.items():
|
|
585
613
|
if node_info["imported"]:
|
|
@@ -599,7 +627,7 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
|
|
|
599
627
|
pass
|
|
600
628
|
elif self.__chip.get('record', 'status',
|
|
601
629
|
step=node_info["step"], index=node_info["index"]) \
|
|
602
|
-
==
|
|
630
|
+
== SCNodeStatus.SKIPPED:
|
|
603
631
|
node_info["imported"] = True
|
|
604
632
|
changed = True
|
|
605
633
|
|
|
@@ -618,6 +646,9 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
|
|
|
618
646
|
check_info = self.__check()
|
|
619
647
|
self.__check_interval = check_info['progress_interval']
|
|
620
648
|
|
|
649
|
+
self.__setup_information_fetched = False
|
|
650
|
+
self.__setup_information_loaded = False
|
|
651
|
+
|
|
621
652
|
self.__node_information = {}
|
|
622
653
|
runtime = RuntimeFlowgraph(
|
|
623
654
|
self.__chip.schema.get("flowgraph", self.__chip.get('option', 'flow'), field='schema'),
|
|
@@ -666,6 +697,11 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
|
|
|
666
697
|
# Update dashboard if active
|
|
667
698
|
self.__chip._dash.update_manifest({"starttimes": starttimes})
|
|
668
699
|
|
|
700
|
+
if None in completed:
|
|
701
|
+
completed.remove(None)
|
|
702
|
+
if not self.__setup_information_fetched:
|
|
703
|
+
self.__schedule_fetch_result(None)
|
|
704
|
+
|
|
669
705
|
nodes_to_fetch = []
|
|
670
706
|
for node in completed:
|
|
671
707
|
if not self.__node_information[node]["fetched"]:
|
|
@@ -681,7 +717,6 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
|
|
|
681
717
|
for node, node_info in self.__node_information.items():
|
|
682
718
|
if not node_info["fetched"]:
|
|
683
719
|
self.__schedule_fetch_result(node)
|
|
684
|
-
self.__schedule_fetch_result(node)
|
|
685
720
|
|
|
686
721
|
self._finalize_loop()
|
|
687
722
|
|
|
@@ -700,11 +735,12 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
|
|
|
700
735
|
self.__import_run_manifests({})
|
|
701
736
|
|
|
702
737
|
def __schedule_fetch_result(self, node):
|
|
703
|
-
|
|
738
|
+
if node:
|
|
739
|
+
self.__node_information[node]["fetched"] = True
|
|
740
|
+
self.__logger.info(f' {node}')
|
|
741
|
+
else:
|
|
742
|
+
self.__setup_information_fetched = True
|
|
704
743
|
self.__download_pool.apply_async(Client._fetch_result, (self, node))
|
|
705
|
-
if node is None:
|
|
706
|
-
node = 'final result'
|
|
707
|
-
self.__logger.info(f' {node}')
|
|
708
744
|
|
|
709
745
|
def _fetch_result(self, node):
|
|
710
746
|
'''
|
siliconcompiler/remote/server.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# Copyright 2020 Silicon Compiler Authors. All Rights Reserved.
|
|
2
2
|
|
|
3
3
|
from aiohttp import web
|
|
4
|
+
import copy
|
|
4
5
|
import threading
|
|
5
6
|
import json
|
|
6
7
|
import logging as log
|
|
@@ -12,7 +13,8 @@ import sys
|
|
|
12
13
|
import fastjsonschema
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
from fastjsonschema import JsonSchemaException
|
|
15
|
-
|
|
16
|
+
|
|
17
|
+
import os.path
|
|
16
18
|
|
|
17
19
|
from siliconcompiler import Chip, Schema
|
|
18
20
|
from siliconcompiler.schema import utils as schema_utils
|
|
@@ -20,8 +22,10 @@ from siliconcompiler._metadata import version as sc_version
|
|
|
20
22
|
from siliconcompiler.schema import SCHEMA_VERSION as sc_schema_version
|
|
21
23
|
from siliconcompiler.remote.schema import ServerSchema
|
|
22
24
|
from siliconcompiler.remote import banner, JobStatus
|
|
23
|
-
from siliconcompiler
|
|
25
|
+
from siliconcompiler import NodeStatus as SCNodeStatus
|
|
26
|
+
from siliconcompiler.remote import NodeStatus
|
|
24
27
|
from siliconcompiler.flowgraph import RuntimeFlowgraph
|
|
28
|
+
from siliconcompiler.scheduler.taskscheduler import TaskScheduler
|
|
25
29
|
|
|
26
30
|
|
|
27
31
|
# Compile validation code for API request bodies.
|
|
@@ -81,7 +85,61 @@ class Server:
|
|
|
81
85
|
self.schema = ServerSchema()
|
|
82
86
|
|
|
83
87
|
# Set up a dictionary to track running jobs.
|
|
88
|
+
self.sc_jobs_lock = threading.Lock()
|
|
84
89
|
self.sc_jobs = {}
|
|
90
|
+
self.sc_chip_lookup = {}
|
|
91
|
+
|
|
92
|
+
# Register callbacks
|
|
93
|
+
TaskScheduler.register_callback("pre_run", self.__run_start)
|
|
94
|
+
TaskScheduler.register_callback("pre_node", self.__node_start)
|
|
95
|
+
TaskScheduler.register_callback("post_node", self.__node_end)
|
|
96
|
+
|
|
97
|
+
def __run_start(self, chip):
|
|
98
|
+
flow = chip.get("option", "flow")
|
|
99
|
+
nodes = chip.schema.get("flowgraph", flow, field="schema").get_nodes()
|
|
100
|
+
|
|
101
|
+
with self.sc_jobs_lock:
|
|
102
|
+
job_hash = self.sc_chip_lookup[chip]["jobhash"]
|
|
103
|
+
|
|
104
|
+
start_tar = os.path.join(self.nfs_mount, job_hash, f'{job_hash}_None.tar.gz')
|
|
105
|
+
start_status = NodeStatus.SUCCESS
|
|
106
|
+
with tarfile.open(start_tar, "w:gz") as tf:
|
|
107
|
+
start_manifest = os.path.join(chip.getworkdir(), f"{chip.design}.pkg.json")
|
|
108
|
+
tf.add(start_manifest, arcname=os.path.relpath(start_manifest, self.nfs_mount))
|
|
109
|
+
|
|
110
|
+
with self.sc_jobs_lock:
|
|
111
|
+
job_name = self.sc_chip_lookup[chip]["name"]
|
|
112
|
+
|
|
113
|
+
self.sc_jobs[job_name][None]["status"] = start_status
|
|
114
|
+
|
|
115
|
+
for step, index in nodes:
|
|
116
|
+
name = f"{step}{index}"
|
|
117
|
+
if name not in self.sc_jobs[job_name]:
|
|
118
|
+
continue
|
|
119
|
+
self.sc_jobs[job_name][name]["status"] = \
|
|
120
|
+
chip.get('record', 'status', step=step, index=index)
|
|
121
|
+
|
|
122
|
+
def __node_start(self, chip, step, index):
|
|
123
|
+
with self.sc_jobs_lock:
|
|
124
|
+
job_name = self.sc_chip_lookup[chip]["name"]
|
|
125
|
+
self.sc_jobs[job_name][f"{step}{index}"]["status"] = NodeStatus.RUNNING
|
|
126
|
+
|
|
127
|
+
def __node_end(self, chip, step, index):
|
|
128
|
+
with self.sc_jobs_lock:
|
|
129
|
+
job_hash = self.sc_chip_lookup[chip]["jobhash"]
|
|
130
|
+
job_name = self.sc_chip_lookup[chip]["name"]
|
|
131
|
+
|
|
132
|
+
chip = copy.deepcopy(chip)
|
|
133
|
+
chip.cwd = os.path.join(chip.get('option', 'builddir'), '..')
|
|
134
|
+
with tarfile.open(os.path.join(self.nfs_mount,
|
|
135
|
+
job_hash,
|
|
136
|
+
f'{job_hash}_{step}{index}.tar.gz'),
|
|
137
|
+
mode='w:gz') as tf:
|
|
138
|
+
chip._archive_node(tf, step=step, index=index, include="*")
|
|
139
|
+
|
|
140
|
+
with self.sc_jobs_lock:
|
|
141
|
+
self.sc_jobs[job_name][f"{step}{index}"]["status"] = \
|
|
142
|
+
chip.get('record', 'status', step=step, index=index)
|
|
85
143
|
|
|
86
144
|
def run(self):
|
|
87
145
|
if not os.path.exists(self.nfs_mount):
|
|
@@ -226,9 +284,6 @@ class Server:
|
|
|
226
284
|
# Remove 'remote' JSON config value to run locally on compute node.
|
|
227
285
|
chip.set('option', 'remote', False)
|
|
228
286
|
|
|
229
|
-
# Write JSON config to shared compute storage.
|
|
230
|
-
os.makedirs(os.path.join(job_root, 'configs'), exist_ok=True)
|
|
231
|
-
|
|
232
287
|
# Run the job with the configured clustering option. (Non-blocking)
|
|
233
288
|
job_proc = threading.Thread(target=self.remote_sc,
|
|
234
289
|
args=[
|
|
@@ -258,31 +313,13 @@ class Server:
|
|
|
258
313
|
job_hash = job_params['job_hash']
|
|
259
314
|
node = job_params['node'] if 'node' in job_params else None
|
|
260
315
|
|
|
261
|
-
resp = web.StreamResponse(
|
|
262
|
-
status=200,
|
|
263
|
-
reason='OK',
|
|
264
|
-
headers={
|
|
265
|
-
'Content-Type': 'application/x-tar',
|
|
266
|
-
'Content-Disposition': f'attachment; filename="{job_hash}_{node}.tar.gz"'
|
|
267
|
-
},
|
|
268
|
-
)
|
|
269
|
-
await resp.prepare(request)
|
|
270
|
-
|
|
271
316
|
zipfn = os.path.join(self.nfs_mount, job_hash, f'{job_hash}_{node}.tar.gz')
|
|
272
|
-
if not
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
tarinfo = tarfile.TarInfo(f'{job_hash}/done')
|
|
277
|
-
tarinfo.size = metadata_file.getbuffer().nbytes
|
|
278
|
-
tar.addfile(tarinfo=tarinfo, fileobj=metadata_file)
|
|
317
|
+
if not os.path.exists(zipfn):
|
|
318
|
+
return web.json_response(
|
|
319
|
+
{'message': 'Could not find results for the requested job/node.'},
|
|
320
|
+
status=404)
|
|
279
321
|
|
|
280
|
-
|
|
281
|
-
await resp.write(zipf.read())
|
|
282
|
-
|
|
283
|
-
await resp.write_eof()
|
|
284
|
-
|
|
285
|
-
return resp
|
|
322
|
+
return web.FileResponse(zipfn)
|
|
286
323
|
|
|
287
324
|
####################
|
|
288
325
|
async def handle_delete_job(self, request):
|
|
@@ -300,9 +337,10 @@ class Server:
|
|
|
300
337
|
job_hash = job_params['job_hash']
|
|
301
338
|
|
|
302
339
|
# Determine if the job is running.
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
340
|
+
with self.sc_jobs_lock:
|
|
341
|
+
for job in self.sc_jobs:
|
|
342
|
+
if job_hash in job:
|
|
343
|
+
return self.__response("Error: job is still running.", status=400)
|
|
306
344
|
|
|
307
345
|
# Delete job hash directory, only if it exists.
|
|
308
346
|
# TODO: This assumes no malicious input.
|
|
@@ -342,16 +380,17 @@ class Server:
|
|
|
342
380
|
|
|
343
381
|
# Determine if the job is running.
|
|
344
382
|
# TODO: Return information about individual flowgraph nodes.
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
383
|
+
with self.sc_jobs_lock:
|
|
384
|
+
if jobname in self.sc_jobs:
|
|
385
|
+
resp = {
|
|
386
|
+
'status': JobStatus.RUNNING,
|
|
387
|
+
'message': self.sc_jobs[jobname],
|
|
388
|
+
}
|
|
389
|
+
else:
|
|
390
|
+
resp = {
|
|
391
|
+
'status': JobStatus.COMPLETED,
|
|
392
|
+
'message': 'Job has no running steps.',
|
|
393
|
+
}
|
|
355
394
|
return web.json_response(resp)
|
|
356
395
|
|
|
357
396
|
####################
|
|
@@ -402,18 +441,39 @@ class Server:
|
|
|
402
441
|
# Assemble core job parameters.
|
|
403
442
|
job_hash = chip.get('record', 'remoteid')
|
|
404
443
|
|
|
444
|
+
runtime = RuntimeFlowgraph(
|
|
445
|
+
chip.schema.get("flowgraph", chip.get('option', 'flow'), field='schema'),
|
|
446
|
+
from_steps=chip.get('option', 'from'),
|
|
447
|
+
to_steps=chip.get('option', 'to'),
|
|
448
|
+
prune_nodes=chip.get('option', 'prune'))
|
|
449
|
+
|
|
450
|
+
nodes = {}
|
|
451
|
+
nodes[None] = {
|
|
452
|
+
"status": SCNodeStatus.PENDING
|
|
453
|
+
}
|
|
454
|
+
for step, index in runtime.get_nodes():
|
|
455
|
+
status = chip.get('record', 'status', step=step, index=index)
|
|
456
|
+
if not status:
|
|
457
|
+
status = SCNodeStatus.PENDING
|
|
458
|
+
if SCNodeStatus.is_done(status):
|
|
459
|
+
status = NodeStatus.UPLOADED
|
|
460
|
+
nodes[f"{step}{index}"] = {
|
|
461
|
+
"status": status
|
|
462
|
+
}
|
|
463
|
+
|
|
405
464
|
# Mark the job run as busy.
|
|
406
465
|
sc_job_name = self.job_name(username, job_hash)
|
|
407
|
-
self.
|
|
466
|
+
with self.sc_jobs_lock:
|
|
467
|
+
self.sc_chip_lookup[chip] = {
|
|
468
|
+
"name": sc_job_name,
|
|
469
|
+
"jobhash": job_hash
|
|
470
|
+
}
|
|
471
|
+
self.sc_jobs[sc_job_name] = nodes
|
|
408
472
|
|
|
409
473
|
build_dir = os.path.join(self.nfs_mount, job_hash)
|
|
410
474
|
chip.set('option', 'builddir', build_dir)
|
|
411
475
|
chip.set('option', 'remote', False)
|
|
412
476
|
|
|
413
|
-
job_cfg_dir = get_configuration_directory(chip)
|
|
414
|
-
os.makedirs(job_cfg_dir, exist_ok=True)
|
|
415
|
-
chip.write_manifest(f"{job_cfg_dir}/chip{chip.get('option', 'jobname')}.json")
|
|
416
|
-
|
|
417
477
|
if self.get('option', 'cluster') == 'slurm':
|
|
418
478
|
# Run the job with slurm clustering.
|
|
419
479
|
chip.set('option', 'scheduler', 'name', 'slurm')
|
|
@@ -421,25 +481,10 @@ class Server:
|
|
|
421
481
|
# Run the job.
|
|
422
482
|
chip.run()
|
|
423
483
|
|
|
424
|
-
# Archive each task.
|
|
425
|
-
runtime = RuntimeFlowgraph(
|
|
426
|
-
chip.schema.get("flowgraph", chip.get('option', 'flow'), field='schema'),
|
|
427
|
-
from_steps=chip.get('option', 'from'),
|
|
428
|
-
to_steps=chip.get('option', 'to'),
|
|
429
|
-
prune_nodes=chip.get('option', 'prune'))
|
|
430
|
-
for (step, index) in runtime.get_nodes():
|
|
431
|
-
chip.cwd = os.path.join(chip.get('option', 'builddir'), '..')
|
|
432
|
-
tf = tarfile.open(os.path.join(self.nfs_mount,
|
|
433
|
-
job_hash,
|
|
434
|
-
f'{job_hash}_{step}{index}.tar.gz'),
|
|
435
|
-
mode='w:gz')
|
|
436
|
-
chip._archive_node(tf, step=step, index=index)
|
|
437
|
-
tf.close()
|
|
438
|
-
|
|
439
|
-
# (Email notifications can be sent here using your preferred API)
|
|
440
|
-
|
|
441
484
|
# Mark the job hash as being done.
|
|
442
|
-
self.
|
|
485
|
+
with self.sc_jobs_lock:
|
|
486
|
+
self.sc_jobs.pop(sc_job_name)
|
|
487
|
+
self.sc_chip_lookup.pop(chip)
|
|
443
488
|
|
|
444
489
|
####################
|
|
445
490
|
def __auth_password(self, username, password):
|
|
@@ -751,7 +751,6 @@ class Board(metaclass=BoardSingleton):
|
|
|
751
751
|
|
|
752
752
|
runtime_flow = RuntimeFlowgraph(
|
|
753
753
|
chip.schema.get("flowgraph", flow, field='schema'),
|
|
754
|
-
args=(chip.get('arg', 'step'), chip.get('arg', 'index')),
|
|
755
754
|
to_steps=chip.get('option', 'to'),
|
|
756
755
|
prune_nodes=chip.get('option', 'prune'))
|
|
757
756
|
record = chip.schema.get("record", field='schema')
|
|
@@ -152,6 +152,9 @@ def _local_process(chip, flow):
|
|
|
152
152
|
from_nodes = []
|
|
153
153
|
extra_setup_nodes = {}
|
|
154
154
|
|
|
155
|
+
chip.schema = JournalingSchema(chip.schema)
|
|
156
|
+
chip.schema.start_journal()
|
|
157
|
+
|
|
155
158
|
if chip.get('option', 'clean') or not chip.get('option', 'from'):
|
|
156
159
|
load_nodes = list(chip.schema.get("flowgraph", flow, field="schema").get_nodes())
|
|
157
160
|
else:
|
|
@@ -236,8 +239,7 @@ def _local_process(chip, flow):
|
|
|
236
239
|
mark_pending(step, index)
|
|
237
240
|
elif (step, index) in extra_setup_nodes:
|
|
238
241
|
# import old information
|
|
239
|
-
|
|
240
|
-
schema=extra_setup_nodes[(step, index)])
|
|
242
|
+
chip.schema.import_journal(schema=extra_setup_nodes[(step, index)])
|
|
241
243
|
|
|
242
244
|
# Ensure pending nodes cause following nodes to be run
|
|
243
245
|
for step, index in nodes:
|
|
@@ -250,6 +252,10 @@ def _local_process(chip, flow):
|
|
|
250
252
|
if chip.get('record', 'status', step=step, index=index) == NodeStatus.PENDING:
|
|
251
253
|
clean_node_dir(chip, step, index)
|
|
252
254
|
|
|
255
|
+
chip.write_manifest(os.path.join(chip.getworkdir(), f"{chip.get('design')}.pkg.json"))
|
|
256
|
+
chip.schema.stop_journal()
|
|
257
|
+
chip.schema = chip.schema.get_base_schema()
|
|
258
|
+
|
|
253
259
|
# Check validity of setup
|
|
254
260
|
chip.logger.info("Checking manifest before running.")
|
|
255
261
|
check_ok = chip.check_manifest()
|
|
@@ -604,9 +610,10 @@ def _executenode(chip, step, index, replay):
|
|
|
604
610
|
send_messages.send(chip, "begin", step, index)
|
|
605
611
|
|
|
606
612
|
try:
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
613
|
+
if not replay:
|
|
614
|
+
task_class.generate_replay_script(
|
|
615
|
+
os.path.join(workdir, "replay.sh"),
|
|
616
|
+
workdir)
|
|
610
617
|
ret_code = task_class.run_task(
|
|
611
618
|
workdir,
|
|
612
619
|
chip.get('option', 'quiet', step=step, index=index),
|