experimaestro 1.6.1__py3-none-any.whl → 1.7.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (76) hide show
  1. experimaestro/__init__.py +3 -1
  2. experimaestro/annotations.py +13 -3
  3. experimaestro/cli/filter.py +3 -3
  4. experimaestro/cli/jobs.py +1 -1
  5. experimaestro/connectors/__init__.py +17 -8
  6. experimaestro/connectors/local.py +8 -3
  7. experimaestro/core/arguments.py +26 -3
  8. experimaestro/core/objects.py +90 -6
  9. experimaestro/core/objects.pyi +7 -1
  10. experimaestro/core/types.py +33 -2
  11. experimaestro/experiments/cli.py +18 -10
  12. experimaestro/generators.py +6 -1
  13. experimaestro/ipc.py +4 -1
  14. experimaestro/launcherfinder/registry.py +7 -4
  15. experimaestro/notifications.py +1 -1
  16. experimaestro/run.py +1 -1
  17. experimaestro/scheduler/base.py +98 -6
  18. experimaestro/scheduler/dynamic_outputs.py +184 -0
  19. experimaestro/scheduler/workspace.py +2 -1
  20. experimaestro/scriptbuilder.py +10 -1
  21. experimaestro/server/data/016b4a6cdced82ab3aa1.ttf +0 -0
  22. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  23. experimaestro/server/data/1815e00441357e01619e.ttf +0 -0
  24. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  25. experimaestro/server/data/2463b90d9a316e4e5294.woff2 +0 -0
  26. experimaestro/server/data/2582b0e4bcf85eceead0.ttf +0 -0
  27. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  28. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  29. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  30. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  31. experimaestro/server/data/50701fbb8177c2dde530.ttf +0 -0
  32. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  33. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  34. experimaestro/server/data/878f31251d960bd6266f.woff2 +0 -0
  35. experimaestro/server/data/89999bdf5d835c012025.woff2 +0 -0
  36. experimaestro/server/data/914997e1bdfc990d0897.ttf +0 -0
  37. experimaestro/server/data/b041b1fa4fe241b23445.woff2 +0 -0
  38. experimaestro/server/data/b6879d41b0852f01ed5b.woff2 +0 -0
  39. experimaestro/server/data/c210719e60948b211a12.woff2 +0 -0
  40. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  41. experimaestro/server/data/d75e3fd1eb12e9bd6655.ttf +0 -0
  42. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  43. experimaestro/server/data/favicon.ico +0 -0
  44. experimaestro/server/data/index.css +22963 -0
  45. experimaestro/server/data/index.css.map +1 -0
  46. experimaestro/server/data/index.html +27 -0
  47. experimaestro/server/data/index.js +101770 -0
  48. experimaestro/server/data/index.js.map +1 -0
  49. experimaestro/server/data/login.html +22 -0
  50. experimaestro/server/data/manifest.json +15 -0
  51. experimaestro/settings.py +2 -2
  52. experimaestro/taskglobals.py +7 -2
  53. experimaestro/tests/definitions_types.py +5 -3
  54. experimaestro/tests/launchers/bin/sbatch +18 -5
  55. experimaestro/tests/launchers/common.py +11 -3
  56. experimaestro/tests/restart.py +6 -3
  57. experimaestro/tests/tasks/all.py +16 -10
  58. experimaestro/tests/tasks/foreign.py +2 -4
  59. experimaestro/tests/test_forward.py +5 -5
  60. experimaestro/tests/test_identifier.py +61 -66
  61. experimaestro/tests/test_instance.py +3 -6
  62. experimaestro/tests/test_param.py +40 -22
  63. experimaestro/tests/test_tags.py +5 -11
  64. experimaestro/tests/test_tokens.py +3 -2
  65. experimaestro/tests/test_types.py +17 -14
  66. experimaestro/tests/test_validation.py +48 -91
  67. experimaestro/tokens.py +16 -5
  68. experimaestro/typingutils.py +7 -0
  69. experimaestro/utils/asyncio.py +6 -2
  70. experimaestro/utils/resources.py +7 -3
  71. {experimaestro-1.6.1.dist-info → experimaestro-1.7.0rc0.dist-info}/METADATA +3 -4
  72. experimaestro-1.7.0rc0.dist-info/RECORD +153 -0
  73. {experimaestro-1.6.1.dist-info → experimaestro-1.7.0rc0.dist-info}/WHEEL +1 -1
  74. experimaestro-1.6.1.dist-info/RECORD +0 -122
  75. {experimaestro-1.6.1.dist-info → experimaestro-1.7.0rc0.dist-info}/LICENSE +0 -0
  76. {experimaestro-1.6.1.dist-info → experimaestro-1.7.0rc0.dist-info}/entry_points.txt +0 -0
@@ -78,7 +78,6 @@ class Reporter(threading.Thread):
78
78
 
79
79
  self.progress_threshold = 0.01
80
80
  self.cv = threading.Condition()
81
- self.start()
82
81
 
83
82
  def stop(self):
84
83
  self.stopping = True
@@ -222,6 +221,7 @@ class Reporter(threading.Thread):
222
221
  taskpath = TaskEnv.instance().taskpath
223
222
  assert taskpath is not None, "Task path is not defined"
224
223
  Reporter.INSTANCE = Reporter(taskpath)
224
+ Reporter.INSTANCE.start()
225
225
  return Reporter.INSTANCE
226
226
 
227
227
 
experimaestro/run.py CHANGED
@@ -140,10 +140,10 @@ class TaskRunner:
140
140
  run(workdir / "params.json")
141
141
 
142
142
  # ... remove the handlers
143
- logger.info("Task ended successfully")
144
143
  remove_signal_handlers(remove_cleanup=False)
145
144
 
146
145
  # Everything went OK
146
+ logger.info("Task ended successfully")
147
147
  sys.exit(0)
148
148
  except Exception:
149
149
  logger.exception("Got exception while running")
@@ -1,12 +1,22 @@
1
1
  from collections import ChainMap
2
2
  from functools import cached_property
3
+ import itertools
3
4
  import logging
4
5
  import os
5
6
  from pathlib import Path
6
7
  from shutil import rmtree
7
8
  import threading
8
9
  import time
9
- from typing import Any, List, Optional, Set, TypeVar, Union, TYPE_CHECKING
10
+ from typing import (
11
+ Any,
12
+ Iterator,
13
+ List,
14
+ Optional,
15
+ Set,
16
+ TypeVar,
17
+ Union,
18
+ TYPE_CHECKING,
19
+ )
10
20
  import enum
11
21
  import signal
12
22
  import asyncio
@@ -17,9 +27,10 @@ from experimaestro.scheduler.services import Service
17
27
  from experimaestro.settings import WorkspaceSettings, get_settings
18
28
 
19
29
 
20
- from experimaestro.core.objects import Config, ConfigWalkContext
30
+ from experimaestro.core.objects import Config, ConfigWalkContext, WatchedOutput
21
31
  from experimaestro.utils import logger
22
32
  from experimaestro.locking import Locks, LockError, Lock
33
+ from experimaestro.utils.asyncio import asyncThreadcheck
23
34
  from .workspace import RunMode, Workspace
24
35
  from .dependencies import Dependency, DependencyStatus, Resource
25
36
  import concurrent.futures
@@ -110,7 +121,7 @@ class JobDependency(Dependency):
110
121
 
111
122
 
112
123
  class Job(Resource):
113
- """A job is a resouce that is produced by the execution of some code"""
124
+ """A job is a resource that is produced by the execution of some code"""
114
125
 
115
126
  # Set by the scheduler
116
127
  _readyEvent: Optional[asyncio.Event]
@@ -148,6 +159,11 @@ class Job(Resource):
148
159
  # Dependencies
149
160
  self.dependencies: Set[Dependency] = set() # as target
150
161
 
162
+ # Watched outputs
163
+ self.watched_outputs = {}
164
+ for watched in config.__xpm__.watched_outputs:
165
+ self.watch_output(watched)
166
+
151
167
  # Process
152
168
  self._process = None
153
169
  self.unsatisfied = 0
@@ -159,6 +175,23 @@ class Job(Resource):
159
175
  self._progress: List[LevelInformation] = []
160
176
  self.tags = config.tags()
161
177
 
178
+ def watch_output(self, watched: "WatchedOutput"):
179
+ """Monitor task outputs
180
+
181
+ :param watched: A description of the watched output
182
+ """
183
+ self.scheduler.xp.watch_output(watched)
184
+
185
+ def task_output_update(self, subpath: Path):
186
+ """Notification of an updated task output"""
187
+ if watcher := self.watched_outputs.get(subpath, None):
188
+ watcher.update()
189
+
190
+ def done_handler(self):
191
+ """The task has been completed"""
192
+ for watcher in self.watched_outputs.values():
193
+ watcher.update()
194
+
162
195
  def __str__(self):
163
196
  return "Job[{}]".format(self.identifier)
164
197
 
@@ -166,6 +199,11 @@ class Job(Resource):
166
199
  assert self._future, "Cannot wait a not submitted job"
167
200
  return self._future.result()
168
201
 
202
+ @cached_property
203
+ def python_path(self) -> Iterator[str]:
204
+ """Returns an iterator over python path"""
205
+ return itertools.chain(self.workspace.python_path)
206
+
169
207
  @cached_property
170
208
  def environ(self):
171
209
  """Returns the job environment
@@ -219,7 +257,7 @@ class Job(Resource):
219
257
  return self.state == JobState.READY
220
258
 
221
259
  @property
222
- def jobpath(self):
260
+ def jobpath(self) -> Path:
223
261
  """Deprecated, use `path`"""
224
262
  return self.workspace.jobspath / self.relpath
225
263
 
@@ -227,6 +265,14 @@ class Job(Resource):
227
265
  def path(self) -> Path:
228
266
  return self.workspace.jobspath / self.relpath
229
267
 
268
+ @property
269
+ def experimaestro_path(self) -> Path:
270
+ return (self.path / ".experimaestro").resolve()
271
+
272
+ @cached_property
273
+ def task_outputs_path(self) -> Path:
274
+ return self.experimaestro_path / "task-outputs.jsonl"
275
+
230
276
  @property
231
277
  def relpath(self):
232
278
  identifier = self.config.__xpm__.identifier
@@ -436,7 +482,7 @@ class Scheduler:
436
482
  self.jobs: Dict[str, "Job"] = {}
437
483
 
438
484
  # List of jobs
439
- self.waitingjobs = set()
485
+ self.waitingjobs: Set[Job] = set()
440
486
 
441
487
  # Listeners
442
488
  self.listeners: Set[Listener] = set()
@@ -459,10 +505,12 @@ class Scheduler:
459
505
 
460
506
  def submit(self, job: Job) -> Optional[Job]:
461
507
  # Wait for the future containing the submitted job
508
+ logger.debug("Registering the job %s within the scheduler", job)
462
509
  otherFuture = asyncio.run_coroutine_threadsafe(
463
510
  self.aio_registerJob(job), self.loop
464
511
  )
465
512
  other = otherFuture.result()
513
+ logger.debug("Job already submitted" if other else "First submission")
466
514
  if other:
467
515
  return other
468
516
 
@@ -598,9 +646,13 @@ class Scheduler:
598
646
  if job.state != JobState.DONE:
599
647
  self.xp.failedJobs[job.identifier] = job
600
648
 
649
+ # Process all remaining tasks outputs
650
+ await asyncThreadcheck("End of job processing", job.done_handler)
651
+
601
652
  # Decrement the number of unfinished jobs and notify
602
653
  self.xp.unfinishedJobs -= 1
603
654
  async with self.xp.central.exitCondition:
655
+ logging.debug("Updated number of unfinished jobs")
604
656
  self.xp.central.exitCondition.notify_all()
605
657
 
606
658
  job.endtime = time.time()
@@ -853,6 +905,7 @@ class experiment:
853
905
  assert self.central is not None
854
906
  async with self.central.exitCondition:
855
907
  self.exitMode = True
908
+ logging.debug("Setting exit mode to true")
856
909
  self.central.exitCondition.notify_all()
857
910
 
858
911
  assert self.central is not None and self.central.loop is not None
@@ -865,8 +918,19 @@ class experiment:
865
918
  assert self.central is not None
866
919
  async with self.central.exitCondition:
867
920
  while True:
868
- if self.unfinishedJobs == 0 or self.exitMode:
921
+ if self.exitMode:
869
922
  break
923
+
924
+ # If we have still unfinished jobs or possible new tasks, wait
925
+ logger.debug(
926
+ "Checking exit condition: unfinished jobs=%d, task output queue size=%d",
927
+ self.unfinishedJobs,
928
+ self.taskOutputQueueSize,
929
+ )
930
+ if self.unfinishedJobs == 0 and self.taskOutputQueueSize == 0:
931
+ break
932
+
933
+ # Wait for more news...
870
934
  await self.central.exitCondition.wait()
871
935
 
872
936
  if self.failedJobs:
@@ -898,6 +962,8 @@ class experiment:
898
962
  return self.workspace.connector.createtoken(name, count)
899
963
 
900
964
  def __enter__(self):
965
+ from .dynamic_outputs import TaskOutputsWorker
966
+
901
967
  if self.workspace.run_mode != RunMode.DRY_RUN:
902
968
  logger.info("Locking experiment %s", self.xplockpath)
903
969
  self.xplock = self.workspace.connector.lock(self.xplockpath, 0).__enter__()
@@ -926,6 +992,7 @@ class experiment:
926
992
  global SIGNAL_HANDLER
927
993
  # Number of unfinished jobs
928
994
  self.unfinishedJobs = 0
995
+ self.taskOutputQueueSize = 0
929
996
 
930
997
  # List of failed jobs
931
998
  self.failedJobs: Dict[str, Job] = {}
@@ -934,6 +1001,8 @@ class experiment:
934
1001
  self.exitMode = False
935
1002
 
936
1003
  self.central = SchedulerCentral.create(self.scheduler.name)
1004
+ self.taskOutputsWorker = TaskOutputsWorker(self)
1005
+ self.taskOutputsWorker.start()
937
1006
 
938
1007
  SIGNAL_HANDLER.add(self)
939
1008
 
@@ -967,8 +1036,13 @@ class experiment:
967
1036
  service.stop()
968
1037
 
969
1038
  if self.central is not None:
1039
+ logger.info("Stopping scheduler event loop")
970
1040
  self.central.loop.stop()
971
1041
 
1042
+ if self.taskOutputsWorker is not None:
1043
+ logger.info("Stopping tasks outputs worker")
1044
+ self.taskOutputsWorker.queue.put(None)
1045
+
972
1046
  self.central = None
973
1047
  self.workspace.__exit__(exc_type, exc_value, traceback)
974
1048
  if self.xplock:
@@ -979,6 +1053,24 @@ class experiment:
979
1053
  if self.server:
980
1054
  self.server.stop()
981
1055
 
1056
+ async def update_task_output_count(self, delta: int):
1057
+ """Change in the number of task outputs to process"""
1058
+ async with self.central.exitCondition:
1059
+ self.taskOutputQueueSize += delta
1060
+ logging.debug(
1061
+ "Updating queue size with %d => %d", delta, self.taskOutputQueueSize
1062
+ )
1063
+ if self.taskOutputQueueSize == 0:
1064
+ self.central.exitCondition.notify_all()
1065
+
1066
+ def watch_output(self, watched: "WatchedOutput"):
1067
+ """Watch an output
1068
+
1069
+ :param watched: The watched output specification
1070
+ """
1071
+
1072
+ self.taskOutputsWorker.watch_output(watched)
1073
+
982
1074
  def add_service(self, service: ServiceClass) -> ServiceClass:
983
1075
  """Adds a service (e.g. tensorboard viewer) to the experiment
984
1076
 
@@ -0,0 +1,184 @@
1
+ """Handles dynamic task outputs"""
2
+
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ import queue
7
+ import threading
8
+ from collections import defaultdict
9
+ from functools import cached_property
10
+ from pathlib import Path
11
+ from typing import Callable, TYPE_CHECKING
12
+
13
+ from watchdog.events import FileSystemEventHandler
14
+
15
+ from experimaestro.ipc import ipcom
16
+ from experimaestro.utils import logger
17
+
18
+ from .base import Job, experiment
19
+
20
+ if TYPE_CHECKING:
21
+ from experimaestro.core.objects import WatchedOutput
22
+
23
+
24
+ class TaskOutputCallbackHandler:
25
+ def __init__(self, converter: Callable):
26
+ pass
27
+
28
+
29
+ class TaskOutputs(FileSystemEventHandler):
30
+ """Represent and monitors dynamic outputs generated by one task"""
31
+
32
+ #: Global dictionary for handles
33
+ HANDLERS: dict[Path, "TaskOutputs"] = {}
34
+
35
+ #: Global lock to access current HANDLERS
36
+ LOCK = threading.Lock()
37
+
38
+ def create(job: Job):
39
+ with TaskOutputs.LOCK:
40
+ if instance := TaskOutputs.get(job.task_outputs_path, None):
41
+ return instance
42
+
43
+ instance = TaskOutputs(job.task_outputs_path)
44
+ TaskOutputs[job.task_outputs_path] = instance
45
+ return instance
46
+
47
+ def __init__(self, path: Path):
48
+ """Monitors an event path"""
49
+ logger.debug("Watching dynamic task outputs in %s", path)
50
+ self.path = path
51
+ self.handle = None
52
+ self.count = 0
53
+ self.lock = threading.Lock()
54
+ self.listeners: dict[str, dict[Callable, set[Callable]]] = defaultdict(
55
+ lambda: defaultdict(set)
56
+ )
57
+
58
+ #: The events registered so far
59
+ self.events = []
60
+
61
+ def __enter__(self):
62
+ """Starts monitoring task outputs"""
63
+ self.job.task_outputs_path.parent.mkdir(parents=True, exist_ok=True)
64
+ with self.lock:
65
+ if self.handle is None:
66
+ assert self.count == 0
67
+ self.handle = ipcom().fswatch(self, self.path.parent, False)
68
+ self.count += 1
69
+ return self
70
+
71
+ def __exit__(self, *args):
72
+ """Stops monitoring task outputs"""
73
+ with self.lock:
74
+ self.count -= 1
75
+ if self.count == 0:
76
+ ipcom().fsunwatch(self.handle)
77
+ self.fh.close()
78
+
79
+ self.handle = None
80
+ self._fh = None
81
+
82
+ def watch_output(self, watched: "WatchedOutput"):
83
+ """Add a new listener"""
84
+ key = f"{watched.config.__identifier__}/{watched.method_name}"
85
+ with self.lock:
86
+ # Process events so far
87
+ listener = self.listeners[key].get(watched.method, None)
88
+ if listener is None:
89
+ listener = TaskOutputCallbackHandler(watched.method)
90
+
91
+ # Register
92
+ self.listeners[key][watched.method].add(watched.callback)
93
+
94
+ #
95
+ # --- Events
96
+ #
97
+
98
+ @cached_property
99
+ def fh(self):
100
+ if self._fh is None:
101
+ self._fh = self.path.open("rt")
102
+ return self._fh
103
+
104
+ def on_modified(self, event):
105
+ self.handle(Path(event.src_path))
106
+
107
+ def on_created(self, event):
108
+ self.handle(Path(event.src_path))
109
+
110
+ def handle(self, path: Path):
111
+ if path != self.path:
112
+ return
113
+
114
+ with self.lock:
115
+ logger.debug("[TASK OUTPUT] Handling task output for %s", self.path)
116
+
117
+ while json_line := self.fh.readline():
118
+ # Read the event
119
+ event = json.loads(json_line)
120
+ logger.debug("Event: %s", event)
121
+
122
+ # FIXME: move elsewhere
123
+ # # Process the event
124
+ # event = self.config_method(
125
+ # self.job.config.__xpm__.mark_output,
126
+ # *event["args"],
127
+ # **event["kwargs"],
128
+ # )
129
+
130
+ self.events.append(event)
131
+ # self.job.scheduler.xp.taskOutputsWorker.add(self, event)
132
+
133
+
134
+ class TaskOutputsWorker(threading.Thread):
135
+ """This worker process dynamic output queue for one experiment"""
136
+
137
+ def __init__(self, xp: experiment):
138
+ super().__init__(name="task outputs worker")
139
+ self.queue = queue.Queue()
140
+ self.xp = xp
141
+
142
+ def watch_output(self, watched: "WatchedOutput"):
143
+ """Watch an output
144
+
145
+ :param watched: The watched output specification
146
+ """
147
+ logger.debug("Registering task output listener %s", watched)
148
+
149
+ # path = watched.job.tasks_output_path
150
+ TaskOutputs.create(watched.job).watch_output(watched)
151
+
152
+ def add(self, watcher, event):
153
+ asyncio.run_coroutine_threadsafe(
154
+ self.xp.update_task_output_count(1),
155
+ self.xp.scheduler.loop,
156
+ ).result()
157
+ self.queue.put((watcher, event))
158
+
159
+ def run(self):
160
+ logging.debug("Starting output listener queue")
161
+ while True:
162
+ # Get the next element in the queue
163
+ element = self.queue.get()
164
+ if element is None:
165
+ # end of processing
166
+ break
167
+
168
+ # Call all the listeners
169
+ logging.debug("Got one event: %s", element)
170
+ watcher, event = element
171
+ for listener in watcher.listeners:
172
+ try:
173
+ logger.debug("Calling listener [%s] with %s", listener, event)
174
+ listener(event)
175
+ logger.debug(
176
+ "[done] Calling listener [%s] with %s", listener, event
177
+ )
178
+ except Exception:
179
+ logging.exception("Exception while calling the listener")
180
+ self.queue.task_done()
181
+
182
+ asyncio.run_coroutine_threadsafe(
183
+ self.xp.update_task_output_count(-1), self.xp.scheduler.loop
184
+ ).result()
@@ -2,7 +2,7 @@ from collections import ChainMap
2
2
  from enum import Enum
3
3
  from functools import cached_property
4
4
  from pathlib import Path
5
- from typing import Optional
5
+ from typing import Iterator, Optional
6
6
  from experimaestro.settings import WorkspaceSettings, Settings
7
7
 
8
8
 
@@ -46,6 +46,7 @@ class Workspace:
46
46
  path = path.absolute()
47
47
  self.path = path
48
48
  self.run_mode = run_mode
49
+ self.python_path = []
49
50
  from ..launchers import Launcher
50
51
 
51
52
  self.launcher = launcher or Launcher.get(path)
@@ -94,6 +94,7 @@ class PythonScriptBuilder:
94
94
  out.write("# Experimaestro generated task\n\n")
95
95
  out.write(
96
96
  """import logging\n"""
97
+ """import sys\n"""
97
98
  """logging.basicConfig(level=logging.INFO, """
98
99
  """format='%(levelname)s:%(process)d:%(asctime)s [%(name)s] %(message)s', datefmt='%y-%m-%d %H:%M:%S')\n\n"""
99
100
  )
@@ -112,9 +113,17 @@ class PythonScriptBuilder:
112
113
  out.write(" ]\n")
113
114
 
114
115
  for name, value in job.environ.items():
115
- out.write(f""" os.environ["{name}"] = "{shquote(value)}"\n""")
116
+ if name == "PYTHONPATH":
117
+ # Handles properly python path
118
+ for path in value.split(":"):
119
+ out.write(f""" sys.path.insert(0, "{shquote(path)}")\n""")
120
+ else:
121
+ out.write(f""" os.environ["{name}"] = "{shquote(value)}"\n""")
116
122
  out.write("\n")
117
123
 
124
+ for path in job.python_path:
125
+ out.write(f""" sys.path.insert(0, "{shquote(str(path))}")\n""")
126
+
118
127
  out.write(
119
128
  f""" TaskRunner("{shquote(connector.resolve(scriptpath))}","""
120
129
  """ lockfiles).run()\n"""
Binary file