experimaestro 1.6.2__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (79) hide show
  1. experimaestro/__init__.py +3 -1
  2. experimaestro/annotations.py +13 -3
  3. experimaestro/cli/filter.py +3 -3
  4. experimaestro/cli/jobs.py +1 -1
  5. experimaestro/commandline.py +3 -7
  6. experimaestro/connectors/__init__.py +22 -10
  7. experimaestro/connectors/local.py +17 -8
  8. experimaestro/connectors/ssh.py +1 -1
  9. experimaestro/core/arguments.py +26 -3
  10. experimaestro/core/objects.py +90 -6
  11. experimaestro/core/objects.pyi +7 -1
  12. experimaestro/core/types.py +33 -2
  13. experimaestro/experiments/cli.py +7 -3
  14. experimaestro/generators.py +6 -1
  15. experimaestro/ipc.py +4 -1
  16. experimaestro/launcherfinder/registry.py +23 -5
  17. experimaestro/launchers/slurm/base.py +47 -9
  18. experimaestro/notifications.py +1 -1
  19. experimaestro/run.py +1 -1
  20. experimaestro/scheduler/base.py +98 -10
  21. experimaestro/scheduler/dynamic_outputs.py +184 -0
  22. experimaestro/scriptbuilder.py +3 -1
  23. experimaestro/server/data/016b4a6cdced82ab3aa1.ttf +0 -0
  24. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  25. experimaestro/server/data/1815e00441357e01619e.ttf +0 -0
  26. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  27. experimaestro/server/data/2463b90d9a316e4e5294.woff2 +0 -0
  28. experimaestro/server/data/2582b0e4bcf85eceead0.ttf +0 -0
  29. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  30. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  31. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  32. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  33. experimaestro/server/data/50701fbb8177c2dde530.ttf +0 -0
  34. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  35. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  36. experimaestro/server/data/878f31251d960bd6266f.woff2 +0 -0
  37. experimaestro/server/data/89999bdf5d835c012025.woff2 +0 -0
  38. experimaestro/server/data/914997e1bdfc990d0897.ttf +0 -0
  39. experimaestro/server/data/b041b1fa4fe241b23445.woff2 +0 -0
  40. experimaestro/server/data/b6879d41b0852f01ed5b.woff2 +0 -0
  41. experimaestro/server/data/c210719e60948b211a12.woff2 +0 -0
  42. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  43. experimaestro/server/data/d75e3fd1eb12e9bd6655.ttf +0 -0
  44. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  45. experimaestro/server/data/favicon.ico +0 -0
  46. experimaestro/server/data/index.css +22963 -0
  47. experimaestro/server/data/index.css.map +1 -0
  48. experimaestro/server/data/index.html +27 -0
  49. experimaestro/server/data/index.js +101770 -0
  50. experimaestro/server/data/index.js.map +1 -0
  51. experimaestro/server/data/login.html +22 -0
  52. experimaestro/server/data/manifest.json +15 -0
  53. experimaestro/sphinx/__init__.py +7 -17
  54. experimaestro/taskglobals.py +7 -2
  55. experimaestro/tests/definitions_types.py +5 -3
  56. experimaestro/tests/launchers/bin/sbatch +34 -7
  57. experimaestro/tests/launchers/bin/srun +5 -0
  58. experimaestro/tests/launchers/common.py +16 -4
  59. experimaestro/tests/restart.py +6 -3
  60. experimaestro/tests/tasks/all.py +16 -10
  61. experimaestro/tests/tasks/foreign.py +2 -4
  62. experimaestro/tests/test_forward.py +5 -5
  63. experimaestro/tests/test_identifier.py +61 -66
  64. experimaestro/tests/test_instance.py +3 -6
  65. experimaestro/tests/test_param.py +40 -22
  66. experimaestro/tests/test_tags.py +5 -11
  67. experimaestro/tests/test_tokens.py +3 -2
  68. experimaestro/tests/test_types.py +17 -14
  69. experimaestro/tests/test_validation.py +48 -91
  70. experimaestro/tokens.py +16 -5
  71. experimaestro/typingutils.py +7 -0
  72. experimaestro/utils/asyncio.py +6 -2
  73. experimaestro/utils/resources.py +7 -3
  74. {experimaestro-1.6.2.dist-info → experimaestro-1.7.0.dist-info}/METADATA +3 -4
  75. experimaestro-1.7.0.dist-info/RECORD +154 -0
  76. {experimaestro-1.6.2.dist-info → experimaestro-1.7.0.dist-info}/WHEEL +1 -1
  77. experimaestro-1.6.2.dist-info/RECORD +0 -122
  78. {experimaestro-1.6.2.dist-info → experimaestro-1.7.0.dist-info}/LICENSE +0 -0
  79. {experimaestro-1.6.2.dist-info → experimaestro-1.7.0.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  # Configuration registers
2
2
 
3
+ from contextlib import contextmanager
3
4
  from typing import ClassVar, Dict, Optional, Set, Type, Union
4
5
 
5
6
  from pathlib import Path
@@ -7,7 +8,6 @@ import typing
7
8
  from omegaconf import DictConfig, OmegaConf, SCMode
8
9
  import pkg_resources
9
10
  from experimaestro.utils import logger
10
-
11
11
  from .base import ConnectorConfiguration, TokenConfiguration
12
12
  from .specs import HostRequirement
13
13
 
@@ -36,6 +36,16 @@ def load_yaml(schema, path: Path):
36
36
  )
37
37
 
38
38
 
39
+ @contextmanager
40
+ def ensure_enter(fp):
41
+ """Behaves as a resource, whether it is one or not"""
42
+ if hasattr(fp, "__enter__"):
43
+ with fp as _fp:
44
+ yield _fp
45
+ else:
46
+ yield fp
47
+
48
+
39
49
  class LauncherRegistry:
40
50
  INSTANCES: ClassVar[Dict[Path, "LauncherRegistry"]] = {}
41
51
  CURRENT_CONFIG_DIR: ClassVar[Optional[Path]] = None
@@ -78,13 +88,16 @@ class LauncherRegistry:
78
88
 
79
89
  from importlib import util
80
90
 
81
- spec = util.spec_from_file_location("xpm_launchers_conf", launchers_py)
82
- module = util.module_from_spec(spec)
83
- spec.loader.exec_module(module)
91
+ with ensure_enter(launchers_py.__fspath__()) as fp:
92
+ spec = util.spec_from_file_location("xpm_launchers_conf", fp)
93
+ module = util.module_from_spec(spec)
94
+ spec.loader.exec_module(module)
84
95
 
85
96
  self.find_launcher_fn = getattr(module, "find_launcher", None)
86
97
  if self.find_launcher_fn is None:
87
- logger.warn("No find_launcher() function was found in %s", launchers_py)
98
+ logger.warning(
99
+ "No find_launcher() function was found in %s", launchers_py
100
+ )
88
101
 
89
102
  # Read the configuration file
90
103
  self.connectors = load_yaml(
@@ -144,9 +157,14 @@ class LauncherRegistry:
144
157
  specs.append(spec)
145
158
 
146
159
  # Use launcher function
160
+ from experimaestro.launchers import Launcher
161
+
147
162
  if self.find_launcher_fn is not None:
148
163
  for spec in specs:
149
164
  if launcher := self.find_launcher_fn(spec, tags):
165
+ assert isinstance(
166
+ launcher, Launcher
167
+ ), "f{self.find_launcher_fn} did not return a Launcher but {type(launcher)}"
150
168
  return launcher
151
169
 
152
170
  return None
@@ -11,6 +11,7 @@ from typing import (
11
11
  )
12
12
  from experimaestro.connectors.local import LocalConnector
13
13
  import re
14
+ from shlex import quote as shquote
14
15
  from contextlib import contextmanager
15
16
  from dataclasses import dataclass
16
17
  from experimaestro.launcherfinder.registry import (
@@ -235,15 +236,15 @@ class SlurmProcessBuilder(ProcessBuilder):
235
236
  super().__init__()
236
237
  self.launcher = launcher
237
238
 
238
- def start(self) -> BatchSlurmProcess:
239
+ def start(self, task_mode: bool = False) -> BatchSlurmProcess:
239
240
  """Start the process"""
240
241
  builder = self.launcher.connector.processbuilder()
241
- builder.workingDirectory = self.workingDirectory
242
242
  builder.environ = self.launcher.launcherenv
243
243
  builder.detach = False
244
244
 
245
245
  if not self.detach:
246
246
  # Simplest case: we wait for the output
247
+ builder.workingDirectory = self.workingDirectory
247
248
  builder.command = [f"{self.launcher.binpath}/srun"]
248
249
  builder.command.extend(self.launcher.options.args())
249
250
  builder.command.extend(self.command)
@@ -255,11 +256,17 @@ class SlurmProcessBuilder(ProcessBuilder):
255
256
  return builder.start()
256
257
 
257
258
  builder.command = [f"{self.launcher.binpath}/sbatch", "--parsable"]
258
- builder.command.extend(self.launcher.options.args())
259
259
 
260
- addstream(builder.command, "-e", self.stderr)
261
- addstream(builder.command, "-o", self.stdout)
262
- addstream(builder.command, "-i", self.stdin)
260
+ if not task_mode:
261
+ # Use command line parameters when not running a task
262
+ builder.command.extend(self.launcher.options.args())
263
+
264
+ if self.workingDirectory:
265
+ workdir = self.launcher.connector.resolve(self.workingDirectory)
266
+ builder.command.append(f"--chdir={workdir}")
267
+ addstream(builder.command, "-e", self.stderr)
268
+ addstream(builder.command, "-o", self.stdout)
269
+ addstream(builder.command, "-i", self.stdin)
263
270
 
264
271
  builder.command.extend(self.command)
265
272
  logger.info(
@@ -427,12 +434,43 @@ class SlurmLauncher(Launcher):
427
434
 
428
435
  We assume *nix, but should be changed to PythonScriptBuilder when working
429
436
  """
430
- builder = PythonScriptBuilder()
431
- builder.processtype = "slurm"
432
- return builder
437
+ return SlurmScriptBuilder(self)
433
438
 
434
439
  def processbuilder(self) -> SlurmProcessBuilder:
435
440
  """Returns the process builder for this launcher
436
441
 
437
442
  By default, returns the associated connector builder"""
438
443
  return SlurmProcessBuilder(self)
444
+
445
+
446
+ class SlurmScriptBuilder(PythonScriptBuilder):
447
+ def __init__(self, launcher: SlurmLauncher, pythonpath=None):
448
+ super().__init__(pythonpath)
449
+ self.launcher = launcher
450
+ self.processtype = "slurm"
451
+
452
+ def write(self, job):
453
+ py_path = super().write(job)
454
+ main_path = py_path.parent
455
+
456
+ def relpath(path: Path):
457
+ return shquote(self.launcher.connector.resolve(path, main_path))
458
+
459
+ # Writes the sbatch shell script containing all the options
460
+ sh_path = job.jobpath / ("%s.sh" % job.name)
461
+ with sh_path.open("wt") as out:
462
+ out.write("""#!/bin/sh\n\n""")
463
+
464
+ workdir = self.launcher.connector.resolve(main_path)
465
+ out.write(f"#SBATCH --chdir={shquote(workdir)}\n")
466
+ out.write(f"""#SBATCH --error={relpath(job.stderr)}\n""")
467
+ out.write(f"""#SBATCH --output={relpath(job.stdout)}\n""")
468
+
469
+ for arg in self.launcher.options.args():
470
+ out.write(f"""#SBATCH {arg}\n""")
471
+
472
+ # We finish by the call to srun
473
+ out.write(f"""\nsrun ./{relpath(py_path)}\n\n""")
474
+
475
+ self.launcher.connector.setExecutable(sh_path, True)
476
+ return sh_path
@@ -78,7 +78,6 @@ class Reporter(threading.Thread):
78
78
 
79
79
  self.progress_threshold = 0.01
80
80
  self.cv = threading.Condition()
81
- self.start()
82
81
 
83
82
  def stop(self):
84
83
  self.stopping = True
@@ -222,6 +221,7 @@ class Reporter(threading.Thread):
222
221
  taskpath = TaskEnv.instance().taskpath
223
222
  assert taskpath is not None, "Task path is not defined"
224
223
  Reporter.INSTANCE = Reporter(taskpath)
224
+ Reporter.INSTANCE.start()
225
225
  return Reporter.INSTANCE
226
226
 
227
227
 
experimaestro/run.py CHANGED
@@ -140,10 +140,10 @@ class TaskRunner:
140
140
  run(workdir / "params.json")
141
141
 
142
142
  # ... remove the handlers
143
- logger.info("Task ended successfully")
144
143
  remove_signal_handlers(remove_cleanup=False)
145
144
 
146
145
  # Everything went OK
146
+ logger.info("Task ended successfully")
147
147
  sys.exit(0)
148
148
  except Exception:
149
149
  logger.exception("Got exception while running")
@@ -7,7 +7,16 @@ from pathlib import Path
7
7
  from shutil import rmtree
8
8
  import threading
9
9
  import time
10
- from typing import Any, Iterator, List, Optional, Set, TypeVar, Union, TYPE_CHECKING
10
+ from typing import (
11
+ Any,
12
+ Iterator,
13
+ List,
14
+ Optional,
15
+ Set,
16
+ TypeVar,
17
+ Union,
18
+ TYPE_CHECKING,
19
+ )
11
20
  import enum
12
21
  import signal
13
22
  import asyncio
@@ -18,9 +27,10 @@ from experimaestro.scheduler.services import Service
18
27
  from experimaestro.settings import WorkspaceSettings, get_settings
19
28
 
20
29
 
21
- from experimaestro.core.objects import Config, ConfigWalkContext
30
+ from experimaestro.core.objects import Config, ConfigWalkContext, WatchedOutput
22
31
  from experimaestro.utils import logger
23
32
  from experimaestro.locking import Locks, LockError, Lock
33
+ from experimaestro.utils.asyncio import asyncThreadcheck
24
34
  from .workspace import RunMode, Workspace
25
35
  from .dependencies import Dependency, DependencyStatus, Resource
26
36
  import concurrent.futures
@@ -111,7 +121,7 @@ class JobDependency(Dependency):
111
121
 
112
122
 
113
123
  class Job(Resource):
114
- """A job is a resouce that is produced by the execution of some code"""
124
+ """A job is a resource that is produced by the execution of some code"""
115
125
 
116
126
  # Set by the scheduler
117
127
  _readyEvent: Optional[asyncio.Event]
@@ -149,6 +159,11 @@ class Job(Resource):
149
159
  # Dependencies
150
160
  self.dependencies: Set[Dependency] = set() # as target
151
161
 
162
+ # Watched outputs
163
+ self.watched_outputs = {}
164
+ for watched in config.__xpm__.watched_outputs:
165
+ self.watch_output(watched)
166
+
152
167
  # Process
153
168
  self._process = None
154
169
  self.unsatisfied = 0
@@ -160,6 +175,23 @@ class Job(Resource):
160
175
  self._progress: List[LevelInformation] = []
161
176
  self.tags = config.tags()
162
177
 
178
+ def watch_output(self, watched: "WatchedOutput"):
179
+ """Monitor task outputs
180
+
181
+ :param watched: A description of the watched output
182
+ """
183
+ self.scheduler.xp.watch_output(watched)
184
+
185
+ def task_output_update(self, subpath: Path):
186
+ """Notification of an updated task output"""
187
+ if watcher := self.watched_outputs.get(subpath, None):
188
+ watcher.update()
189
+
190
+ def done_handler(self):
191
+ """The task has been completed"""
192
+ for watcher in self.watched_outputs.values():
193
+ watcher.update()
194
+
163
195
  def __str__(self):
164
196
  return "Job[{}]".format(self.identifier)
165
197
 
@@ -170,10 +202,8 @@ class Job(Resource):
170
202
  @cached_property
171
203
  def python_path(self) -> Iterator[str]:
172
204
  """Returns an iterator over python path"""
173
- return itertools.chain(
174
- self.workspace.python_path
175
- )
176
-
205
+ return itertools.chain(self.workspace.python_path)
206
+
177
207
  @cached_property
178
208
  def environ(self):
179
209
  """Returns the job environment
@@ -227,7 +257,7 @@ class Job(Resource):
227
257
  return self.state == JobState.READY
228
258
 
229
259
  @property
230
- def jobpath(self):
260
+ def jobpath(self) -> Path:
231
261
  """Deprecated, use `path`"""
232
262
  return self.workspace.jobspath / self.relpath
233
263
 
@@ -235,6 +265,14 @@ class Job(Resource):
235
265
  def path(self) -> Path:
236
266
  return self.workspace.jobspath / self.relpath
237
267
 
268
+ @property
269
+ def experimaestro_path(self) -> Path:
270
+ return (self.path / ".experimaestro").resolve()
271
+
272
+ @cached_property
273
+ def task_outputs_path(self) -> Path:
274
+ return self.experimaestro_path / "task-outputs.jsonl"
275
+
238
276
  @property
239
277
  def relpath(self):
240
278
  identifier = self.config.__xpm__.identifier
@@ -444,7 +482,7 @@ class Scheduler:
444
482
  self.jobs: Dict[str, "Job"] = {}
445
483
 
446
484
  # List of jobs
447
- self.waitingjobs = set()
485
+ self.waitingjobs: Set[Job] = set()
448
486
 
449
487
  # Listeners
450
488
  self.listeners: Set[Listener] = set()
@@ -467,10 +505,12 @@ class Scheduler:
467
505
 
468
506
  def submit(self, job: Job) -> Optional[Job]:
469
507
  # Wait for the future containing the submitted job
508
+ logger.debug("Registering the job %s within the scheduler", job)
470
509
  otherFuture = asyncio.run_coroutine_threadsafe(
471
510
  self.aio_registerJob(job), self.loop
472
511
  )
473
512
  other = otherFuture.result()
513
+ logger.debug("Job already submitted" if other else "First submission")
474
514
  if other:
475
515
  return other
476
516
 
@@ -606,9 +646,13 @@ class Scheduler:
606
646
  if job.state != JobState.DONE:
607
647
  self.xp.failedJobs[job.identifier] = job
608
648
 
649
+ # Process all remaining tasks outputs
650
+ await asyncThreadcheck("End of job processing", job.done_handler)
651
+
609
652
  # Decrement the number of unfinished jobs and notify
610
653
  self.xp.unfinishedJobs -= 1
611
654
  async with self.xp.central.exitCondition:
655
+ logging.debug("Updated number of unfinished jobs")
612
656
  self.xp.central.exitCondition.notify_all()
613
657
 
614
658
  job.endtime = time.time()
@@ -696,6 +740,7 @@ class Scheduler:
696
740
  code = await process.aio_code()
697
741
  logger.debug("Got return code %s for %s", code, job)
698
742
 
743
+ # Check the file if there is no return code
699
744
  if code is None:
700
745
  # Case where we cannot retrieve the code right away
701
746
  if job.donepath.is_file():
@@ -861,6 +906,7 @@ class experiment:
861
906
  assert self.central is not None
862
907
  async with self.central.exitCondition:
863
908
  self.exitMode = True
909
+ logging.debug("Setting exit mode to true")
864
910
  self.central.exitCondition.notify_all()
865
911
 
866
912
  assert self.central is not None and self.central.loop is not None
@@ -871,10 +917,22 @@ class experiment:
871
917
 
872
918
  async def awaitcompletion():
873
919
  assert self.central is not None
920
+ logger.debug("Waiting to exit scheduler...")
874
921
  async with self.central.exitCondition:
875
922
  while True:
876
- if self.unfinishedJobs == 0 or self.exitMode:
923
+ if self.exitMode:
877
924
  break
925
+
926
+ # If we have still unfinished jobs or possible new tasks, wait
927
+ logger.debug(
928
+ "Checking exit condition: unfinished jobs=%d, task output queue size=%d",
929
+ self.unfinishedJobs,
930
+ self.taskOutputQueueSize,
931
+ )
932
+ if self.unfinishedJobs == 0 and self.taskOutputQueueSize == 0:
933
+ break
934
+
935
+ # Wait for more news...
878
936
  await self.central.exitCondition.wait()
879
937
 
880
938
  if self.failedJobs:
@@ -906,6 +964,8 @@ class experiment:
906
964
  return self.workspace.connector.createtoken(name, count)
907
965
 
908
966
  def __enter__(self):
967
+ from .dynamic_outputs import TaskOutputsWorker
968
+
909
969
  if self.workspace.run_mode != RunMode.DRY_RUN:
910
970
  logger.info("Locking experiment %s", self.xplockpath)
911
971
  self.xplock = self.workspace.connector.lock(self.xplockpath, 0).__enter__()
@@ -934,6 +994,7 @@ class experiment:
934
994
  global SIGNAL_HANDLER
935
995
  # Number of unfinished jobs
936
996
  self.unfinishedJobs = 0
997
+ self.taskOutputQueueSize = 0
937
998
 
938
999
  # List of failed jobs
939
1000
  self.failedJobs: Dict[str, Job] = {}
@@ -942,6 +1003,8 @@ class experiment:
942
1003
  self.exitMode = False
943
1004
 
944
1005
  self.central = SchedulerCentral.create(self.scheduler.name)
1006
+ self.taskOutputsWorker = TaskOutputsWorker(self)
1007
+ self.taskOutputsWorker.start()
945
1008
 
946
1009
  SIGNAL_HANDLER.add(self)
947
1010
 
@@ -950,6 +1013,7 @@ class experiment:
950
1013
  return self
951
1014
 
952
1015
  def __exit__(self, exc_type, exc_value, traceback):
1016
+ logger.debug("Exiting scheduler context")
953
1017
  # If no exception and normal run mode, remove old "jobs"
954
1018
  if self.workspace.run_mode == RunMode.NORMAL:
955
1019
  if exc_type is None and self.jobsbakpath.is_dir():
@@ -975,8 +1039,13 @@ class experiment:
975
1039
  service.stop()
976
1040
 
977
1041
  if self.central is not None:
1042
+ logger.info("Stopping scheduler event loop")
978
1043
  self.central.loop.stop()
979
1044
 
1045
+ if self.taskOutputsWorker is not None:
1046
+ logger.info("Stopping tasks outputs worker")
1047
+ self.taskOutputsWorker.queue.put(None)
1048
+
980
1049
  self.central = None
981
1050
  self.workspace.__exit__(exc_type, exc_value, traceback)
982
1051
  if self.xplock:
@@ -985,8 +1054,27 @@ class experiment:
985
1054
  # Put back old experiment as current one
986
1055
  experiment.CURRENT = self.old_experiment
987
1056
  if self.server:
1057
+ logger.info("Stopping web server")
988
1058
  self.server.stop()
989
1059
 
1060
+ async def update_task_output_count(self, delta: int):
1061
+ """Change in the number of task outputs to process"""
1062
+ async with self.central.exitCondition:
1063
+ self.taskOutputQueueSize += delta
1064
+ logging.debug(
1065
+ "Updating queue size with %d => %d", delta, self.taskOutputQueueSize
1066
+ )
1067
+ if self.taskOutputQueueSize == 0:
1068
+ self.central.exitCondition.notify_all()
1069
+
1070
+ def watch_output(self, watched: "WatchedOutput"):
1071
+ """Watch an output
1072
+
1073
+ :param watched: The watched output specification
1074
+ """
1075
+
1076
+ self.taskOutputsWorker.watch_output(watched)
1077
+
990
1078
  def add_service(self, service: ServiceClass) -> ServiceClass:
991
1079
  """Adds a service (e.g. tensorboard viewer) to the experiment
992
1080
 
@@ -0,0 +1,184 @@
1
+ """Handles dynamic task outputs"""
2
+
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ import queue
7
+ import threading
8
+ from collections import defaultdict
9
+ from functools import cached_property
10
+ from pathlib import Path
11
+ from typing import Callable, TYPE_CHECKING
12
+
13
+ from watchdog.events import FileSystemEventHandler
14
+
15
+ from experimaestro.ipc import ipcom
16
+ from experimaestro.utils import logger
17
+
18
+ from .base import Job, experiment
19
+
20
+ if TYPE_CHECKING:
21
+ from experimaestro.core.objects import WatchedOutput
22
+
23
+
24
+ class TaskOutputCallbackHandler:
25
+ def __init__(self, converter: Callable):
26
+ pass
27
+
28
+
29
+ class TaskOutputs(FileSystemEventHandler):
30
+ """Represent and monitors dynamic outputs generated by one task"""
31
+
32
+ #: Global dictionary for handles
33
+ HANDLERS: dict[Path, "TaskOutputs"] = {}
34
+
35
+ #: Global lock to access current HANDLERS
36
+ LOCK = threading.Lock()
37
+
38
+ def create(job: Job):
39
+ with TaskOutputs.LOCK:
40
+ if instance := TaskOutputs.get(job.task_outputs_path, None):
41
+ return instance
42
+
43
+ instance = TaskOutputs(job.task_outputs_path)
44
+ TaskOutputs[job.task_outputs_path] = instance
45
+ return instance
46
+
47
+ def __init__(self, path: Path):
48
+ """Monitors an event path"""
49
+ logger.debug("Watching dynamic task outputs in %s", path)
50
+ self.path = path
51
+ self.handle = None
52
+ self.count = 0
53
+ self.lock = threading.Lock()
54
+ self.listeners: dict[str, dict[Callable, set[Callable]]] = defaultdict(
55
+ lambda: defaultdict(set)
56
+ )
57
+
58
+ #: The events registered so far
59
+ self.events = []
60
+
61
+ def __enter__(self):
62
+ """Starts monitoring task outputs"""
63
+ self.job.task_outputs_path.parent.mkdir(parents=True, exist_ok=True)
64
+ with self.lock:
65
+ if self.handle is None:
66
+ assert self.count == 0
67
+ self.handle = ipcom().fswatch(self, self.path.parent, False)
68
+ self.count += 1
69
+ return self
70
+
71
+ def __exit__(self, *args):
72
+ """Stops monitoring task outputs"""
73
+ with self.lock:
74
+ self.count -= 1
75
+ if self.count == 0:
76
+ ipcom().fsunwatch(self.handle)
77
+ self.fh.close()
78
+
79
+ self.handle = None
80
+ self._fh = None
81
+
82
+ def watch_output(self, watched: "WatchedOutput"):
83
+ """Add a new listener"""
84
+ key = f"{watched.config.__identifier__}/{watched.method_name}"
85
+ with self.lock:
86
+ # Process events so far
87
+ listener = self.listeners[key].get(watched.method, None)
88
+ if listener is None:
89
+ listener = TaskOutputCallbackHandler(watched.method)
90
+
91
+ # Register
92
+ self.listeners[key][watched.method].add(watched.callback)
93
+
94
+ #
95
+ # --- Events
96
+ #
97
+
98
+ @cached_property
99
+ def fh(self):
100
+ if self._fh is None:
101
+ self._fh = self.path.open("rt")
102
+ return self._fh
103
+
104
+ def on_modified(self, event):
105
+ self.handle(Path(event.src_path))
106
+
107
+ def on_created(self, event):
108
+ self.handle(Path(event.src_path))
109
+
110
+ def handle(self, path: Path):
111
+ if path != self.path:
112
+ return
113
+
114
+ with self.lock:
115
+ logger.debug("[TASK OUTPUT] Handling task output for %s", self.path)
116
+
117
+ while json_line := self.fh.readline():
118
+ # Read the event
119
+ event = json.loads(json_line)
120
+ logger.debug("Event: %s", event)
121
+
122
+ # FIXME: move elsewhere
123
+ # # Process the event
124
+ # event = self.config_method(
125
+ # self.job.config.__xpm__.mark_output,
126
+ # *event["args"],
127
+ # **event["kwargs"],
128
+ # )
129
+
130
+ self.events.append(event)
131
+ # self.job.scheduler.xp.taskOutputsWorker.add(self, event)
132
+
133
+
134
+ class TaskOutputsWorker(threading.Thread):
135
+ """This worker process dynamic output queue for one experiment"""
136
+
137
+ def __init__(self, xp: experiment):
138
+ super().__init__(name="task outputs worker", daemon=True)
139
+ self.queue = queue.Queue()
140
+ self.xp = xp
141
+
142
+ def watch_output(self, watched: "WatchedOutput"):
143
+ """Watch an output
144
+
145
+ :param watched: The watched output specification
146
+ """
147
+ logger.debug("Registering task output listener %s", watched)
148
+
149
+ # path = watched.job.tasks_output_path
150
+ TaskOutputs.create(watched.job).watch_output(watched)
151
+
152
+ def add(self, watcher, event):
153
+ asyncio.run_coroutine_threadsafe(
154
+ self.xp.update_task_output_count(1),
155
+ self.xp.scheduler.loop,
156
+ ).result()
157
+ self.queue.put((watcher, event))
158
+
159
+ def run(self):
160
+ logging.debug("Starting output listener queue")
161
+ while True:
162
+ # Get the next element in the queue
163
+ element = self.queue.get()
164
+ if element is None:
165
+ # end of processing
166
+ break
167
+
168
+ # Call all the listeners
169
+ logging.debug("Got one event: %s", element)
170
+ watcher, event = element
171
+ for listener in watcher.listeners:
172
+ try:
173
+ logger.debug("Calling listener [%s] with %s", listener, event)
174
+ listener(event)
175
+ logger.debug(
176
+ "[done] Calling listener [%s] with %s", listener, event
177
+ )
178
+ except Exception:
179
+ logging.exception("Exception while calling the listener")
180
+ self.queue.task_done()
181
+
182
+ asyncio.run_coroutine_threadsafe(
183
+ self.xp.update_task_output_count(-1), self.xp.scheduler.loop
184
+ ).result()
@@ -51,6 +51,8 @@ class PythonScriptBuilder:
51
51
  self.lockfiles: List[Path] = []
52
52
  self.notificationURL: Optional[str] = None
53
53
  self.command: Optional[AbstractCommand] = None
54
+
55
+ # This is used to serialize the full process identifier on disk
54
56
  self.processtype = "local"
55
57
 
56
58
  def write(self, job: CommandLineJob):
@@ -63,7 +65,7 @@ class PythonScriptBuilder:
63
65
  job {CommandLineJob} -- [description]
64
66
 
65
67
  Returns:
66
- [type] -- [description]
68
+ str -- The script path on disk
67
69
  """
68
70
  assert isinstance(
69
71
  job, CommandLineJob