siliconcompiler 0.33.1__py3-none-any.whl → 0.33.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. siliconcompiler/_metadata.py +1 -1
  2. siliconcompiler/apps/utils/replay.py +5 -5
  3. siliconcompiler/core.py +5 -1
  4. siliconcompiler/data/templates/replay/replay.sh.j2 +18 -1
  5. siliconcompiler/metric.py +19 -0
  6. siliconcompiler/package/git.py +1 -1
  7. siliconcompiler/record.py +57 -5
  8. siliconcompiler/remote/client.py +47 -11
  9. siliconcompiler/remote/server.py +109 -64
  10. siliconcompiler/report/dashboard/cli/board.py +0 -1
  11. siliconcompiler/scheduler/__init__.py +12 -5
  12. siliconcompiler/scheduler/run_node.py +12 -5
  13. siliconcompiler/schema/baseschema.py +25 -4
  14. siliconcompiler/schema/journalingschema.py +4 -0
  15. siliconcompiler/schema/schema_cfg.py +1 -1
  16. siliconcompiler/tool.py +79 -18
  17. siliconcompiler/tools/_common/__init__.py +14 -11
  18. siliconcompiler/tools/slang/__init__.py +3 -2
  19. siliconcompiler/tools/yosys/sc_synth_asic.tcl +0 -4
  20. siliconcompiler/toolscripts/_tools.json +12 -7
  21. siliconcompiler/toolscripts/ubuntu22/install-klayout.sh +4 -0
  22. siliconcompiler/toolscripts/ubuntu24/install-klayout.sh +4 -0
  23. {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.33.2.dist-info}/METADATA +5 -4
  24. {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.33.2.dist-info}/RECORD +28 -28
  25. {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.33.2.dist-info}/WHEEL +1 -1
  26. {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.33.2.dist-info}/entry_points.txt +0 -0
  27. {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.33.2.dist-info}/licenses/LICENSE +0 -0
  28. {siliconcompiler-0.33.1.dist-info → siliconcompiler-0.33.2.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  # Version number following semver standard.
2
- version = '0.33.1'
2
+ version = '0.33.2'
3
3
 
4
4
  # Default server address for remote runs, if unspecified.
5
5
  default_server = 'https://server.siliconcompiler.com'
@@ -18,6 +18,7 @@ import siliconcompiler
18
18
  from siliconcompiler.apps._common import UNSET_DESIGN
19
19
  from siliconcompiler import SiliconCompilerError
20
20
  from siliconcompiler import utils
21
+ from siliconcompiler.record import RecordTime
21
22
 
22
23
 
23
24
  def make_bytes(data):
@@ -127,11 +128,10 @@ def main():
127
128
  path = os.path.abspath(args['file'])
128
129
  os.makedirs(os.path.dirname(path), exist_ok=True)
129
130
 
130
- starttimes = set()
131
- for starttime, step, index in chip.schema.get('history', jobname, 'record', 'starttime',
132
- field=None).getvalues():
133
- starttimes.add(datetime.strptime(starttime, '%Y-%m-%d %H:%M:%S'))
134
- starttime = min(starttimes).strftime('%Y-%m-%d %H:%M:%S')
131
+ record_schema = chip.schema.get('history', jobname, 'record', field="schema")
132
+ starttime = datetime.fromtimestamp(
133
+ record_schema.get_earliest_time(RecordTime.START)).strftime(
134
+ '%Y-%m-%d %H:%M:%S')
135
135
 
136
136
  with io.StringIO() as fd:
137
137
  fd.write(utils.get_file_template('replay/requirements.txt').render(
siliconcompiler/core.py CHANGED
@@ -1729,7 +1729,11 @@ class Chip:
1729
1729
  error = True
1730
1730
  self.logger.error(f'No executable or run() function specified for {tool}/{task}')
1731
1731
 
1732
- if not error and not _check_flowgraph_io(self, nodes=nodes):
1732
+ runtime_full = RuntimeFlowgraph(
1733
+ self.schema.get("flowgraph", flow, field='schema'),
1734
+ to_steps=self.get('option', 'to'),
1735
+ prune_nodes=self.get('option', 'prune'))
1736
+ if not error and not _check_flowgraph_io(self, nodes=runtime_full.get_nodes()):
1733
1737
  error = True
1734
1738
 
1735
1739
  return not error
@@ -9,6 +9,7 @@ CD_WORK="{{ work_dir }}"
9
9
  PRINT=""
10
10
  CMDPREFIX=""
11
11
  SKIPEXPORT=0
12
+ DONODE={{ node_only }}
12
13
  while [[ $# -gt 0 ]]; do
13
14
  case $1 in
14
15
  --which)
@@ -40,6 +41,11 @@ while [[ $# -gt 0 ]]; do
40
41
  shift
41
42
  shift
42
43
  ;;
44
+ --node)
45
+ DONODE=1
46
+ shift
47
+ shift
48
+ ;;
43
49
  -h|--help)
44
50
  echo "Usage: $0"
45
51
  echo " Options:"
@@ -49,7 +55,8 @@ while [[ $# -gt 0 ]]; do
49
55
  echo " --command print the execution command"
50
56
  echo " --skipcd do not change directory into replay directory"
51
57
  echo " --skipexports do not export environmental variables"
52
- echo " --cmdprefix <cmd> prefix to add to the replay command, such as gdb"
58
+ echo " --cmdprefix <cmd> prefix to add to the replay command, such as dgb"
59
+ echo " --node execute entire node"
53
60
  echo " -h,--help print this help"
54
61
  exit 0
55
62
  ;;
@@ -87,6 +94,16 @@ case $PRINT in
87
94
  ;;
88
95
  esac
89
96
 
97
+ if [ $DONODE == 1 ]; then
98
+ python3 -m siliconcompiler.scheduler.run_node \
99
+ -cfg "{{ cfg_file }}" \
100
+ -builddir "${PWD}/../../../../" \
101
+ -step "{{ step }}" \
102
+ -index "{{ index }}" \
103
+ -cwd "$PWD" \
104
+ -replay
105
+ {% if cmds|length > 0 %}else
90
106
  # Command execution
91
107
  $CMDPREFIX \{% for cmd in cmds %}
92
108
  {% if not loop.first %} {% endif %}{{ cmd }}{% if not loop.last %} \{% endif %}{% endfor %}
109
+ {% endif %}fi
siliconcompiler/metric.py CHANGED
@@ -44,6 +44,25 @@ class MetricSchema(BaseSchema):
44
44
 
45
45
  return self.set(metric, value, step=step, index=str(index))
46
46
 
47
+ def record_tasktime(self, step, index, record):
48
+ """
49
+ Record the task time for this node
50
+
51
+ Args:
52
+ step (str): step to record
53
+ index (str/int): index to record
54
+ record (:class:`RecordSchema`): record to lookup data in
55
+ """
56
+ start_time, end_time = [
57
+ record.get_recorded_time(step, index, RecordTime.START),
58
+ record.get_recorded_time(step, index, RecordTime.END)
59
+ ]
60
+
61
+ if start_time is None or end_time is None:
62
+ return False
63
+
64
+ return self.record(step, index, "tasktime", end_time-start_time, unit="s")
65
+
47
66
  def record_totaltime(self, step, index, flow, record):
48
67
  """
49
68
  Record the total time for this node
@@ -81,4 +81,4 @@ def clone_from_git(chip, package, path, ref, url, data_path):
81
81
  chip.logger.info(f'Checking out {ref}')
82
82
  repo.git.checkout(ref)
83
83
  for submodule in repo.submodules:
84
- submodule.update(init=True)
84
+ submodule.update(recursive=True, init=True, force=True)
siliconcompiler/record.py CHANGED
@@ -30,13 +30,25 @@ class RecordTool(Enum):
30
30
 
31
31
 
32
32
  class RecordSchema(BaseSchema):
33
- __TIMEFORMAT = "%Y-%m-%d %H:%M:%S"
33
+ __TIMEFORMAT = "%Y-%m-%d %H:%M:%S.%f"
34
34
 
35
35
  def __init__(self):
36
36
  super().__init__()
37
37
 
38
38
  schema_record(self)
39
39
 
40
+ def _from_dict(self, manifest, keypath, version=None):
41
+ ret = super()._from_dict(manifest, keypath, version)
42
+
43
+ # Correct for change specification
44
+ if version and version < (0, 50, 4):
45
+ for timekey in RecordTime:
46
+ start_param = self.get(timekey.value, field=None)
47
+ for value, step, index in start_param.getvalues():
48
+ start_param.set(f"{value}.000000", step=step, index=index)
49
+
50
+ return ret
51
+
40
52
  def clear(self, step, index, keep=None):
41
53
  '''
42
54
  Clear all saved metrics for a given step and index
@@ -275,6 +287,44 @@ class RecordSchema(BaseSchema):
275
287
  record_time+"+0000",
276
288
  RecordSchema.__TIMEFORMAT+"%z").timestamp()
277
289
 
290
+ def get_earliest_time(self, type):
291
+ '''
292
+ Returns the earliest recorded time.
293
+
294
+ Args:
295
+ type (:class:`RecordTime`): type of time to record
296
+ '''
297
+ type = RecordTime(type)
298
+ record_param = self.get(type.value, field=None)
299
+
300
+ times = set()
301
+ for _, step, index in record_param.getvalues():
302
+ times.add(self.get_recorded_time(step, index, type))
303
+
304
+ if not times:
305
+ return None
306
+
307
+ return min(times)
308
+
309
+ def get_latest_time(self, type):
310
+ '''
311
+ Returns the last recorded time.
312
+
313
+ Args:
314
+ type (:class:`RecordTime`): type of time to record
315
+ '''
316
+ type = RecordTime(type)
317
+ record_param = self.get(type.value, field=None)
318
+
319
+ times = set()
320
+ for _, step, index in record_param.getvalues():
321
+ times.add(self.get_recorded_time(step, index, type))
322
+
323
+ if not times:
324
+ return None
325
+
326
+ return max(times)
327
+
278
328
  def record_tool(self, step, index, info, type):
279
329
  '''
280
330
  Record information about the tool used during this record.
@@ -320,11 +370,13 @@ def schema_record(schema):
320
370
  'x86_64',
321
371
  '(x86_64, rv64imafdc)'],
322
372
  'starttime': ['start time',
323
- '\"2021-09-06 12:20:20\"',
324
- 'Time is reported in the ISO 8601 format YYYY-MM-DD HR:MIN:SEC'],
373
+ '\"2021-09-06 12:20:20.000000\"',
374
+ 'Time is recorded with the format YYYY-MM-DD HR:MIN:SEC.MICROSEC for '
375
+ 'UTC'],
325
376
  'endtime': ['end time',
326
- '\"2021-09-06 12:20:20\"',
327
- 'Time is reported in the ISO 8601 format YYYY-MM-DD HR:MIN:SEC'],
377
+ '\"2021-09-06 12:20:20.000000\"',
378
+ 'Time is recorded with the format YYYY-MM-DD HR:MIN:SEC.MICROSEC for '
379
+ 'UTC'],
328
380
  'region': ['cloud region',
329
381
  '\"US Gov Boston\"',
330
382
  """Recommended naming methodology:
@@ -10,10 +10,12 @@ import tarfile
10
10
  import tempfile
11
11
  import multiprocessing
12
12
 
13
- from siliconcompiler import utils, SiliconCompilerError, NodeStatus
13
+ import os.path
14
+
15
+ from siliconcompiler import utils, SiliconCompilerError
14
16
  from siliconcompiler import NodeStatus as SCNodeStatus
15
17
  from siliconcompiler._metadata import default_server
16
- from siliconcompiler.remote import JobStatus
18
+ from siliconcompiler.remote import JobStatus, NodeStatus
17
19
  from siliconcompiler.report.dashboard import DashboardType
18
20
  from siliconcompiler.flowgraph import RuntimeFlowgraph
19
21
  from siliconcompiler.schema import JournalingSchema
@@ -310,19 +312,29 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
310
312
  try:
311
313
  # Decode response JSON, if possible.
312
314
  job_info = json.loads(info['message'])
313
- except json.JSONDecodeError as e:
314
- self.__logger.warning(f"Job is still running: {e}")
315
+ if "null" in job_info:
316
+ job_info[None] = job_info["null"]
317
+ del job_info["null"]
318
+ except json.JSONDecodeError:
319
+ self.__logger.warning(f"Job is still running: {info['message']}")
315
320
  return completed, starttimes, True
316
321
 
317
322
  nodes_to_log = {}
318
323
  for node, node_info in job_info.items():
319
324
  status = node_info['status']
320
- nodes_to_log.setdefault(status, []).append((node, node_info))
325
+
326
+ if status == NodeStatus.UPLOADED:
327
+ status = SCNodeStatus.PENDING
321
328
 
322
329
  if SCNodeStatus.is_done(status):
323
330
  # collect completed
324
331
  completed.append(node)
325
332
 
333
+ if not node:
334
+ continue
335
+
336
+ nodes_to_log.setdefault(status, []).append((node, node_info))
337
+
326
338
  if self.__node_information and node in self.__node_information:
327
339
  self.__chip.set('record', 'status', status,
328
340
  step=self.__node_information[node]["step"],
@@ -580,6 +592,22 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
580
592
  raise SiliconCompilerError('Job canceled by user keyboard interrupt')
581
593
 
582
594
  def __import_run_manifests(self, starttimes):
595
+ if not self.__setup_information_loaded:
596
+ if self.__setup_information_fetched:
597
+ manifest = os.path.join(self.__chip.getworkdir(), f'{self.__chip.design}.pkg.json')
598
+ if os.path.exists(manifest):
599
+ try:
600
+ JournalingSchema(self.__chip.schema).read_journal(manifest)
601
+ self.__setup_information_loaded = True
602
+ changed = True
603
+ except: # noqa E722
604
+ # Import may fail if file is still getting written
605
+ pass
606
+
607
+ if not self.__setup_information_loaded:
608
+ # Dont do anything until this has been loaded
609
+ return
610
+
583
611
  changed = False
584
612
  for _, node_info in self.__node_information.items():
585
613
  if node_info["imported"]:
@@ -599,7 +627,7 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
599
627
  pass
600
628
  elif self.__chip.get('record', 'status',
601
629
  step=node_info["step"], index=node_info["index"]) \
602
- == NodeStatus.SKIPPED:
630
+ == SCNodeStatus.SKIPPED:
603
631
  node_info["imported"] = True
604
632
  changed = True
605
633
 
@@ -618,6 +646,9 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
618
646
  check_info = self.__check()
619
647
  self.__check_interval = check_info['progress_interval']
620
648
 
649
+ self.__setup_information_fetched = False
650
+ self.__setup_information_loaded = False
651
+
621
652
  self.__node_information = {}
622
653
  runtime = RuntimeFlowgraph(
623
654
  self.__chip.schema.get("flowgraph", self.__chip.get('option', 'flow'), field='schema'),
@@ -666,6 +697,11 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
666
697
  # Update dashboard if active
667
698
  self.__chip._dash.update_manifest({"starttimes": starttimes})
668
699
 
700
+ if None in completed:
701
+ completed.remove(None)
702
+ if not self.__setup_information_fetched:
703
+ self.__schedule_fetch_result(None)
704
+
669
705
  nodes_to_fetch = []
670
706
  for node in completed:
671
707
  if not self.__node_information[node]["fetched"]:
@@ -681,7 +717,6 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
681
717
  for node, node_info in self.__node_information.items():
682
718
  if not node_info["fetched"]:
683
719
  self.__schedule_fetch_result(node)
684
- self.__schedule_fetch_result(node)
685
720
 
686
721
  self._finalize_loop()
687
722
 
@@ -700,11 +735,12 @@ service, provided by SiliconCompiler, is not intended to process proprietary IP.
700
735
  self.__import_run_manifests({})
701
736
 
702
737
  def __schedule_fetch_result(self, node):
703
- self.__node_information[node]["fetched"] = True
738
+ if node:
739
+ self.__node_information[node]["fetched"] = True
740
+ self.__logger.info(f' {node}')
741
+ else:
742
+ self.__setup_information_fetched = True
704
743
  self.__download_pool.apply_async(Client._fetch_result, (self, node))
705
- if node is None:
706
- node = 'final result'
707
- self.__logger.info(f' {node}')
708
744
 
709
745
  def _fetch_result(self, node):
710
746
  '''
@@ -1,6 +1,7 @@
1
1
  # Copyright 2020 Silicon Compiler Authors. All Rights Reserved.
2
2
 
3
3
  from aiohttp import web
4
+ import copy
4
5
  import threading
5
6
  import json
6
7
  import logging as log
@@ -12,7 +13,8 @@ import sys
12
13
  import fastjsonschema
13
14
  from pathlib import Path
14
15
  from fastjsonschema import JsonSchemaException
15
- import io
16
+
17
+ import os.path
16
18
 
17
19
  from siliconcompiler import Chip, Schema
18
20
  from siliconcompiler.schema import utils as schema_utils
@@ -20,8 +22,10 @@ from siliconcompiler._metadata import version as sc_version
20
22
  from siliconcompiler.schema import SCHEMA_VERSION as sc_schema_version
21
23
  from siliconcompiler.remote.schema import ServerSchema
22
24
  from siliconcompiler.remote import banner, JobStatus
23
- from siliconcompiler.scheduler.slurm import get_configuration_directory
25
+ from siliconcompiler import NodeStatus as SCNodeStatus
26
+ from siliconcompiler.remote import NodeStatus
24
27
  from siliconcompiler.flowgraph import RuntimeFlowgraph
28
+ from siliconcompiler.scheduler.taskscheduler import TaskScheduler
25
29
 
26
30
 
27
31
  # Compile validation code for API request bodies.
@@ -81,7 +85,61 @@ class Server:
81
85
  self.schema = ServerSchema()
82
86
 
83
87
  # Set up a dictionary to track running jobs.
88
+ self.sc_jobs_lock = threading.Lock()
84
89
  self.sc_jobs = {}
90
+ self.sc_chip_lookup = {}
91
+
92
+ # Register callbacks
93
+ TaskScheduler.register_callback("pre_run", self.__run_start)
94
+ TaskScheduler.register_callback("pre_node", self.__node_start)
95
+ TaskScheduler.register_callback("post_node", self.__node_end)
96
+
97
+ def __run_start(self, chip):
98
+ flow = chip.get("option", "flow")
99
+ nodes = chip.schema.get("flowgraph", flow, field="schema").get_nodes()
100
+
101
+ with self.sc_jobs_lock:
102
+ job_hash = self.sc_chip_lookup[chip]["jobhash"]
103
+
104
+ start_tar = os.path.join(self.nfs_mount, job_hash, f'{job_hash}_None.tar.gz')
105
+ start_status = NodeStatus.SUCCESS
106
+ with tarfile.open(start_tar, "w:gz") as tf:
107
+ start_manifest = os.path.join(chip.getworkdir(), f"{chip.design}.pkg.json")
108
+ tf.add(start_manifest, arcname=os.path.relpath(start_manifest, self.nfs_mount))
109
+
110
+ with self.sc_jobs_lock:
111
+ job_name = self.sc_chip_lookup[chip]["name"]
112
+
113
+ self.sc_jobs[job_name][None]["status"] = start_status
114
+
115
+ for step, index in nodes:
116
+ name = f"{step}{index}"
117
+ if name not in self.sc_jobs[job_name]:
118
+ continue
119
+ self.sc_jobs[job_name][name]["status"] = \
120
+ chip.get('record', 'status', step=step, index=index)
121
+
122
+ def __node_start(self, chip, step, index):
123
+ with self.sc_jobs_lock:
124
+ job_name = self.sc_chip_lookup[chip]["name"]
125
+ self.sc_jobs[job_name][f"{step}{index}"]["status"] = NodeStatus.RUNNING
126
+
127
+ def __node_end(self, chip, step, index):
128
+ with self.sc_jobs_lock:
129
+ job_hash = self.sc_chip_lookup[chip]["jobhash"]
130
+ job_name = self.sc_chip_lookup[chip]["name"]
131
+
132
+ chip = copy.deepcopy(chip)
133
+ chip.cwd = os.path.join(chip.get('option', 'builddir'), '..')
134
+ with tarfile.open(os.path.join(self.nfs_mount,
135
+ job_hash,
136
+ f'{job_hash}_{step}{index}.tar.gz'),
137
+ mode='w:gz') as tf:
138
+ chip._archive_node(tf, step=step, index=index, include="*")
139
+
140
+ with self.sc_jobs_lock:
141
+ self.sc_jobs[job_name][f"{step}{index}"]["status"] = \
142
+ chip.get('record', 'status', step=step, index=index)
85
143
 
86
144
  def run(self):
87
145
  if not os.path.exists(self.nfs_mount):
@@ -226,9 +284,6 @@ class Server:
226
284
  # Remove 'remote' JSON config value to run locally on compute node.
227
285
  chip.set('option', 'remote', False)
228
286
 
229
- # Write JSON config to shared compute storage.
230
- os.makedirs(os.path.join(job_root, 'configs'), exist_ok=True)
231
-
232
287
  # Run the job with the configured clustering option. (Non-blocking)
233
288
  job_proc = threading.Thread(target=self.remote_sc,
234
289
  args=[
@@ -258,31 +313,13 @@ class Server:
258
313
  job_hash = job_params['job_hash']
259
314
  node = job_params['node'] if 'node' in job_params else None
260
315
 
261
- resp = web.StreamResponse(
262
- status=200,
263
- reason='OK',
264
- headers={
265
- 'Content-Type': 'application/x-tar',
266
- 'Content-Disposition': f'attachment; filename="{job_hash}_{node}.tar.gz"'
267
- },
268
- )
269
- await resp.prepare(request)
270
-
271
316
  zipfn = os.path.join(self.nfs_mount, job_hash, f'{job_hash}_{node}.tar.gz')
272
- if not node:
273
- with tarfile.open(zipfn, 'w:gz') as tar:
274
- text = "Done"
275
- metadata_file = io.BytesIO(text.encode('ascii'))
276
- tarinfo = tarfile.TarInfo(f'{job_hash}/done')
277
- tarinfo.size = metadata_file.getbuffer().nbytes
278
- tar.addfile(tarinfo=tarinfo, fileobj=metadata_file)
317
+ if not os.path.exists(zipfn):
318
+ return web.json_response(
319
+ {'message': 'Could not find results for the requested job/node.'},
320
+ status=404)
279
321
 
280
- with open(zipfn, 'rb') as zipf:
281
- await resp.write(zipf.read())
282
-
283
- await resp.write_eof()
284
-
285
- return resp
322
+ return web.FileResponse(zipfn)
286
323
 
287
324
  ####################
288
325
  async def handle_delete_job(self, request):
@@ -300,9 +337,10 @@ class Server:
300
337
  job_hash = job_params['job_hash']
301
338
 
302
339
  # Determine if the job is running.
303
- for job in self.sc_jobs:
304
- if job_hash in job:
305
- return self.__response("Error: job is still running.", status=400)
340
+ with self.sc_jobs_lock:
341
+ for job in self.sc_jobs:
342
+ if job_hash in job:
343
+ return self.__response("Error: job is still running.", status=400)
306
344
 
307
345
  # Delete job hash directory, only if it exists.
308
346
  # TODO: This assumes no malicious input.
@@ -342,16 +380,17 @@ class Server:
342
380
 
343
381
  # Determine if the job is running.
344
382
  # TODO: Return information about individual flowgraph nodes.
345
- if jobname in self.sc_jobs:
346
- resp = {
347
- 'status': JobStatus.RUNNING,
348
- 'message': 'Job is currently running on the server.',
349
- }
350
- else:
351
- resp = {
352
- 'status': JobStatus.COMPLETED,
353
- 'message': 'Job has no running steps.',
354
- }
383
+ with self.sc_jobs_lock:
384
+ if jobname in self.sc_jobs:
385
+ resp = {
386
+ 'status': JobStatus.RUNNING,
387
+ 'message': self.sc_jobs[jobname],
388
+ }
389
+ else:
390
+ resp = {
391
+ 'status': JobStatus.COMPLETED,
392
+ 'message': 'Job has no running steps.',
393
+ }
355
394
  return web.json_response(resp)
356
395
 
357
396
  ####################
@@ -402,18 +441,39 @@ class Server:
402
441
  # Assemble core job parameters.
403
442
  job_hash = chip.get('record', 'remoteid')
404
443
 
444
+ runtime = RuntimeFlowgraph(
445
+ chip.schema.get("flowgraph", chip.get('option', 'flow'), field='schema'),
446
+ from_steps=chip.get('option', 'from'),
447
+ to_steps=chip.get('option', 'to'),
448
+ prune_nodes=chip.get('option', 'prune'))
449
+
450
+ nodes = {}
451
+ nodes[None] = {
452
+ "status": SCNodeStatus.PENDING
453
+ }
454
+ for step, index in runtime.get_nodes():
455
+ status = chip.get('record', 'status', step=step, index=index)
456
+ if not status:
457
+ status = SCNodeStatus.PENDING
458
+ if SCNodeStatus.is_done(status):
459
+ status = NodeStatus.UPLOADED
460
+ nodes[f"{step}{index}"] = {
461
+ "status": status
462
+ }
463
+
405
464
  # Mark the job run as busy.
406
465
  sc_job_name = self.job_name(username, job_hash)
407
- self.sc_jobs[sc_job_name] = 'busy'
466
+ with self.sc_jobs_lock:
467
+ self.sc_chip_lookup[chip] = {
468
+ "name": sc_job_name,
469
+ "jobhash": job_hash
470
+ }
471
+ self.sc_jobs[sc_job_name] = nodes
408
472
 
409
473
  build_dir = os.path.join(self.nfs_mount, job_hash)
410
474
  chip.set('option', 'builddir', build_dir)
411
475
  chip.set('option', 'remote', False)
412
476
 
413
- job_cfg_dir = get_configuration_directory(chip)
414
- os.makedirs(job_cfg_dir, exist_ok=True)
415
- chip.write_manifest(f"{job_cfg_dir}/chip{chip.get('option', 'jobname')}.json")
416
-
417
477
  if self.get('option', 'cluster') == 'slurm':
418
478
  # Run the job with slurm clustering.
419
479
  chip.set('option', 'scheduler', 'name', 'slurm')
@@ -421,25 +481,10 @@ class Server:
421
481
  # Run the job.
422
482
  chip.run()
423
483
 
424
- # Archive each task.
425
- runtime = RuntimeFlowgraph(
426
- chip.schema.get("flowgraph", chip.get('option', 'flow'), field='schema'),
427
- from_steps=chip.get('option', 'from'),
428
- to_steps=chip.get('option', 'to'),
429
- prune_nodes=chip.get('option', 'prune'))
430
- for (step, index) in runtime.get_nodes():
431
- chip.cwd = os.path.join(chip.get('option', 'builddir'), '..')
432
- tf = tarfile.open(os.path.join(self.nfs_mount,
433
- job_hash,
434
- f'{job_hash}_{step}{index}.tar.gz'),
435
- mode='w:gz')
436
- chip._archive_node(tf, step=step, index=index)
437
- tf.close()
438
-
439
- # (Email notifications can be sent here using your preferred API)
440
-
441
484
  # Mark the job hash as being done.
442
- self.sc_jobs.pop(sc_job_name)
485
+ with self.sc_jobs_lock:
486
+ self.sc_jobs.pop(sc_job_name)
487
+ self.sc_chip_lookup.pop(chip)
443
488
 
444
489
  ####################
445
490
  def __auth_password(self, username, password):
@@ -751,7 +751,6 @@ class Board(metaclass=BoardSingleton):
751
751
 
752
752
  runtime_flow = RuntimeFlowgraph(
753
753
  chip.schema.get("flowgraph", flow, field='schema'),
754
- args=(chip.get('arg', 'step'), chip.get('arg', 'index')),
755
754
  to_steps=chip.get('option', 'to'),
756
755
  prune_nodes=chip.get('option', 'prune'))
757
756
  record = chip.schema.get("record", field='schema')
@@ -152,6 +152,9 @@ def _local_process(chip, flow):
152
152
  from_nodes = []
153
153
  extra_setup_nodes = {}
154
154
 
155
+ chip.schema = JournalingSchema(chip.schema)
156
+ chip.schema.start_journal()
157
+
155
158
  if chip.get('option', 'clean') or not chip.get('option', 'from'):
156
159
  load_nodes = list(chip.schema.get("flowgraph", flow, field="schema").get_nodes())
157
160
  else:
@@ -236,8 +239,7 @@ def _local_process(chip, flow):
236
239
  mark_pending(step, index)
237
240
  elif (step, index) in extra_setup_nodes:
238
241
  # import old information
239
- JournalingSchema(chip.schema).import_journal(
240
- schema=extra_setup_nodes[(step, index)])
242
+ chip.schema.import_journal(schema=extra_setup_nodes[(step, index)])
241
243
 
242
244
  # Ensure pending nodes cause following nodes to be run
243
245
  for step, index in nodes:
@@ -250,6 +252,10 @@ def _local_process(chip, flow):
250
252
  if chip.get('record', 'status', step=step, index=index) == NodeStatus.PENDING:
251
253
  clean_node_dir(chip, step, index)
252
254
 
255
+ chip.write_manifest(os.path.join(chip.getworkdir(), f"{chip.get('design')}.pkg.json"))
256
+ chip.schema.stop_journal()
257
+ chip.schema = chip.schema.get_base_schema()
258
+
253
259
  # Check validity of setup
254
260
  chip.logger.info("Checking manifest before running.")
255
261
  check_ok = chip.check_manifest()
@@ -604,9 +610,10 @@ def _executenode(chip, step, index, replay):
604
610
  send_messages.send(chip, "begin", step, index)
605
611
 
606
612
  try:
607
- task_class.generate_replay_script(
608
- os.path.join(workdir, "replay.sh"),
609
- workdir)
613
+ if not replay:
614
+ task_class.generate_replay_script(
615
+ os.path.join(workdir, "replay.sh"),
616
+ workdir)
610
617
  ret_code = task_class.run_task(
611
618
  workdir,
612
619
  chip.get('option', 'quiet', step=step, index=index),