experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (116) hide show
  1. experimaestro/__init__.py +10 -11
  2. experimaestro/annotations.py +167 -206
  3. experimaestro/cli/__init__.py +130 -5
  4. experimaestro/cli/filter.py +42 -74
  5. experimaestro/cli/jobs.py +157 -106
  6. experimaestro/cli/refactor.py +249 -0
  7. experimaestro/click.py +0 -1
  8. experimaestro/commandline.py +19 -3
  9. experimaestro/connectors/__init__.py +20 -1
  10. experimaestro/connectors/local.py +12 -0
  11. experimaestro/core/arguments.py +182 -46
  12. experimaestro/core/identifier.py +107 -6
  13. experimaestro/core/objects/__init__.py +6 -0
  14. experimaestro/core/objects/config.py +542 -25
  15. experimaestro/core/objects/config_walk.py +20 -0
  16. experimaestro/core/serialization.py +91 -34
  17. experimaestro/core/subparameters.py +164 -0
  18. experimaestro/core/types.py +175 -38
  19. experimaestro/exceptions.py +26 -0
  20. experimaestro/experiments/cli.py +107 -25
  21. experimaestro/generators.py +50 -9
  22. experimaestro/huggingface.py +3 -1
  23. experimaestro/launcherfinder/parser.py +29 -0
  24. experimaestro/launchers/__init__.py +26 -1
  25. experimaestro/launchers/direct.py +12 -0
  26. experimaestro/launchers/slurm/base.py +154 -2
  27. experimaestro/mkdocs/metaloader.py +0 -1
  28. experimaestro/mypy.py +452 -7
  29. experimaestro/notifications.py +63 -13
  30. experimaestro/progress.py +0 -2
  31. experimaestro/rpyc.py +0 -1
  32. experimaestro/run.py +19 -6
  33. experimaestro/scheduler/base.py +489 -125
  34. experimaestro/scheduler/dependencies.py +43 -28
  35. experimaestro/scheduler/dynamic_outputs.py +259 -130
  36. experimaestro/scheduler/experiment.py +225 -30
  37. experimaestro/scheduler/interfaces.py +474 -0
  38. experimaestro/scheduler/jobs.py +216 -206
  39. experimaestro/scheduler/services.py +186 -12
  40. experimaestro/scheduler/state_db.py +388 -0
  41. experimaestro/scheduler/state_provider.py +2345 -0
  42. experimaestro/scheduler/state_sync.py +834 -0
  43. experimaestro/scheduler/workspace.py +52 -10
  44. experimaestro/scriptbuilder.py +7 -0
  45. experimaestro/server/__init__.py +147 -57
  46. experimaestro/server/data/index.css +0 -125
  47. experimaestro/server/data/index.css.map +1 -1
  48. experimaestro/server/data/index.js +194 -58
  49. experimaestro/server/data/index.js.map +1 -1
  50. experimaestro/settings.py +44 -5
  51. experimaestro/sphinx/__init__.py +3 -3
  52. experimaestro/taskglobals.py +20 -0
  53. experimaestro/tests/conftest.py +80 -0
  54. experimaestro/tests/core/test_generics.py +2 -2
  55. experimaestro/tests/identifier_stability.json +45 -0
  56. experimaestro/tests/launchers/bin/sacct +6 -2
  57. experimaestro/tests/launchers/bin/sbatch +4 -2
  58. experimaestro/tests/launchers/test_slurm.py +80 -0
  59. experimaestro/tests/tasks/test_dynamic.py +231 -0
  60. experimaestro/tests/test_cli_jobs.py +615 -0
  61. experimaestro/tests/test_deprecated.py +630 -0
  62. experimaestro/tests/test_environment.py +200 -0
  63. experimaestro/tests/test_file_progress_integration.py +1 -1
  64. experimaestro/tests/test_forward.py +3 -3
  65. experimaestro/tests/test_identifier.py +372 -41
  66. experimaestro/tests/test_identifier_stability.py +458 -0
  67. experimaestro/tests/test_instance.py +3 -3
  68. experimaestro/tests/test_multitoken.py +442 -0
  69. experimaestro/tests/test_mypy.py +433 -0
  70. experimaestro/tests/test_objects.py +312 -5
  71. experimaestro/tests/test_outputs.py +2 -2
  72. experimaestro/tests/test_param.py +8 -12
  73. experimaestro/tests/test_partial_paths.py +231 -0
  74. experimaestro/tests/test_progress.py +0 -48
  75. experimaestro/tests/test_resumable_task.py +480 -0
  76. experimaestro/tests/test_serializers.py +141 -1
  77. experimaestro/tests/test_state_db.py +434 -0
  78. experimaestro/tests/test_subparameters.py +160 -0
  79. experimaestro/tests/test_tags.py +136 -0
  80. experimaestro/tests/test_tasks.py +107 -121
  81. experimaestro/tests/test_token_locking.py +252 -0
  82. experimaestro/tests/test_tokens.py +17 -13
  83. experimaestro/tests/test_types.py +123 -1
  84. experimaestro/tests/test_workspace_triggers.py +158 -0
  85. experimaestro/tests/token_reschedule.py +4 -2
  86. experimaestro/tests/utils.py +2 -2
  87. experimaestro/tokens.py +154 -57
  88. experimaestro/tools/diff.py +1 -1
  89. experimaestro/tui/__init__.py +8 -0
  90. experimaestro/tui/app.py +2303 -0
  91. experimaestro/tui/app.tcss +353 -0
  92. experimaestro/tui/log_viewer.py +228 -0
  93. experimaestro/utils/__init__.py +23 -0
  94. experimaestro/utils/environment.py +148 -0
  95. experimaestro/utils/git.py +129 -0
  96. experimaestro/utils/resources.py +1 -1
  97. experimaestro/version.py +34 -0
  98. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/METADATA +68 -38
  99. experimaestro-2.0.0b4.dist-info/RECORD +181 -0
  100. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/WHEEL +1 -1
  101. experimaestro-2.0.0b4.dist-info/entry_points.txt +16 -0
  102. experimaestro/compat.py +0 -6
  103. experimaestro/core/objects.pyi +0 -221
  104. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  105. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  106. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  107. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  108. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  109. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  110. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  111. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  112. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  113. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  114. experimaestro-2.0.0a8.dist-info/RECORD +0 -166
  115. experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
  116. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,7 @@
1
1
  # --- Task and types definitions
2
2
 
3
3
  import logging
4
- from experimaestro import Config, Param
4
+ from experimaestro import Config, Param, field
5
5
  from typing import Union
6
6
 
7
7
  import pytest
@@ -63,3 +63,125 @@ def test_types_union():
63
63
  A.C(x="hello")
64
64
  with pytest.raises(ValueError):
65
65
  A.C(x=[])
66
+
67
+
68
+ def test_override_warning_without_flag(caplog):
69
+ """Test that overriding a parameter without overrides=True produces a warning"""
70
+
71
+ class Parent(Config):
72
+ value: Param[int]
73
+
74
+ with caplog.at_level(logging.WARNING, logger="xpm"):
75
+ # Child overrides value without overrides=True
76
+ class Child(Parent):
77
+ value: Param[int]
78
+
79
+ # Force initialization to trigger the warning
80
+ Child.__getxpmtype__().arguments
81
+
82
+ assert "overrides parent parameter" in caplog.text
83
+ assert "Child" in caplog.text
84
+ assert "value" in caplog.text
85
+
86
+
87
+ def test_override_no_warning_with_flag(caplog):
88
+ """Test that overriding with overrides=True suppresses the warning"""
89
+
90
+ class Parent(Config):
91
+ value: Param[int]
92
+
93
+ with caplog.at_level(logging.WARNING, logger="xpm"):
94
+ # Child overrides value with overrides=True
95
+ class Child(Parent):
96
+ value: Param[int] = field(overrides=True)
97
+
98
+ # Force initialization
99
+ Child.__getxpmtype__().arguments
100
+
101
+ # No warning should be issued
102
+ assert "overrides parent parameter" not in caplog.text
103
+
104
+
105
+ def test_override_type_check_subtype_config():
106
+ """Test that overriding Config type with subtype is allowed"""
107
+
108
+ class BaseValue(Config):
109
+ x: Param[int]
110
+
111
+ class DerivedValue(BaseValue):
112
+ y: Param[int]
113
+
114
+ class Parent(Config):
115
+ value: Param[BaseValue]
116
+
117
+ # Should succeed - DerivedValue is subtype of BaseValue
118
+ class Child(Parent):
119
+ value: Param[DerivedValue] = field(overrides=True)
120
+
121
+ Child.__getxpmtype__().arguments
122
+
123
+
124
+ def test_override_type_check_incompatible_config():
125
+ """Test that overriding Config type with incompatible type raises error"""
126
+
127
+ class ValueA(Config):
128
+ x: Param[int]
129
+
130
+ class ValueB(Config):
131
+ y: Param[int]
132
+
133
+ class Parent(Config):
134
+ value: Param[ValueA]
135
+
136
+ # Should fail - ValueB is not a subtype of ValueA
137
+ with pytest.raises(TypeError, match="is not a subtype"):
138
+
139
+ class Child(Parent):
140
+ value: Param[ValueB] = field(overrides=True)
141
+
142
+ Child.__getxpmtype__().arguments
143
+
144
+
145
+ def test_override_type_check_primitive_incompatible():
146
+ """Test that overriding primitive type with incompatible type raises error"""
147
+
148
+ class Parent(Config):
149
+ value: Param[int]
150
+
151
+ # Should fail - str is not a subtype of int
152
+ with pytest.raises(TypeError, match="is not compatible"):
153
+
154
+ class Child(Parent):
155
+ value: Param[str] = field(overrides=True)
156
+
157
+ Child.__getxpmtype__().arguments
158
+
159
+
160
+ def test_override_type_check_same_type():
161
+ """Test that overriding with the same type is allowed"""
162
+
163
+ class Parent(Config):
164
+ value: Param[int]
165
+
166
+ # Should succeed - same type
167
+ class Child(Parent):
168
+ value: Param[int] = field(overrides=True)
169
+
170
+ Child.__getxpmtype__().arguments
171
+
172
+
173
+ def test_no_override_warning_for_new_param(caplog):
174
+ """Test that defining a new parameter doesn't produce a warning"""
175
+
176
+ class Parent(Config):
177
+ x: Param[int]
178
+
179
+ with caplog.at_level(logging.WARNING, logger="xpm"):
180
+ # Child defines a new parameter y, doesn't override x
181
+ class Child(Parent):
182
+ y: Param[int]
183
+
184
+ Child.__getxpmtype__().arguments
185
+
186
+ # No warning should be issued for new parameter
187
+ assert "overrides parent parameter" not in caplog.text
@@ -0,0 +1,158 @@
1
+ """Tests for workspace trigger matching (issue #119)"""
2
+
3
+ from pathlib import Path
4
+ from experimaestro.settings import WorkspaceSettings, find_workspace, Settings
5
+ from unittest.mock import patch
6
+
7
+
8
+ def test_workspace_trigger_exact_match():
9
+ """Test exact match trigger"""
10
+ workspaces = [
11
+ WorkspaceSettings(
12
+ id="neuralir",
13
+ path=Path("/tmp/test1"),
14
+ triggers=["my-awesome-experiment"],
15
+ ),
16
+ WorkspaceSettings(
17
+ id="default",
18
+ path=Path("/tmp/test2"),
19
+ ),
20
+ ]
21
+
22
+ settings = Settings(workspaces=workspaces)
23
+
24
+ with patch("experimaestro.settings.get_settings", return_value=settings):
25
+ ws = find_workspace(experiment_id="my-awesome-experiment")
26
+ assert ws.id == "neuralir"
27
+
28
+
29
+ def test_workspace_trigger_glob_match():
30
+ """Test glob pattern trigger"""
31
+ workspaces = [
32
+ WorkspaceSettings(
33
+ id="neuralir",
34
+ path=Path("/tmp/test1"),
35
+ triggers=["base_id-*"],
36
+ ),
37
+ WorkspaceSettings(
38
+ id="default",
39
+ path=Path("/tmp/test2"),
40
+ ),
41
+ ]
42
+
43
+ settings = Settings(workspaces=workspaces)
44
+
45
+ with patch("experimaestro.settings.get_settings", return_value=settings):
46
+ ws = find_workspace(experiment_id="base_id-123")
47
+ assert ws.id == "neuralir"
48
+
49
+ ws = find_workspace(experiment_id="base_id-test")
50
+ assert ws.id == "neuralir"
51
+
52
+
53
+ def test_workspace_trigger_multiple_patterns():
54
+ """Test multiple trigger patterns"""
55
+ workspaces = [
56
+ WorkspaceSettings(
57
+ id="neuralir",
58
+ path=Path("/tmp/test1"),
59
+ triggers=["base_id-*", "my-awesome-experiment", "test-*"],
60
+ ),
61
+ WorkspaceSettings(
62
+ id="default",
63
+ path=Path("/tmp/test2"),
64
+ ),
65
+ ]
66
+
67
+ settings = Settings(workspaces=workspaces)
68
+
69
+ with patch("experimaestro.settings.get_settings", return_value=settings):
70
+ ws = find_workspace(experiment_id="base_id-123")
71
+ assert ws.id == "neuralir"
72
+
73
+ ws = find_workspace(experiment_id="my-awesome-experiment")
74
+ assert ws.id == "neuralir"
75
+
76
+ ws = find_workspace(experiment_id="test-foo")
77
+ assert ws.id == "neuralir"
78
+
79
+
80
+ def test_workspace_trigger_no_match_uses_default():
81
+ """Test that default workspace (first in list) is used when no trigger matches"""
82
+ workspaces = [
83
+ WorkspaceSettings(
84
+ id="default",
85
+ path=Path("/tmp/test1"),
86
+ ),
87
+ WorkspaceSettings(
88
+ id="neuralir",
89
+ path=Path("/tmp/test2"),
90
+ triggers=["base_id-*"],
91
+ ),
92
+ ]
93
+
94
+ settings = Settings(workspaces=workspaces)
95
+
96
+ with patch("experimaestro.settings.get_settings", return_value=settings):
97
+ ws = find_workspace(experiment_id="other-experiment")
98
+ assert ws.id == "default" # First workspace is the default
99
+
100
+
101
+ def test_workspace_trigger_first_match_wins():
102
+ """Test that first matching workspace is selected"""
103
+ workspaces = [
104
+ WorkspaceSettings(
105
+ id="first",
106
+ path=Path("/tmp/test1"),
107
+ triggers=["test-*"],
108
+ ),
109
+ WorkspaceSettings(
110
+ id="second",
111
+ path=Path("/tmp/test2"),
112
+ triggers=["test-*"],
113
+ ),
114
+ ]
115
+
116
+ settings = Settings(workspaces=workspaces)
117
+
118
+ with patch("experimaestro.settings.get_settings", return_value=settings):
119
+ ws = find_workspace(experiment_id="test-experiment")
120
+ assert ws.id == "first"
121
+
122
+
123
+ def test_workspace_explicit_takes_precedence():
124
+ """Test that explicit workspace parameter overrides triggers"""
125
+ workspaces = [
126
+ WorkspaceSettings(
127
+ id="neuralir",
128
+ path=Path("/tmp/test1"),
129
+ triggers=["base_id-*"],
130
+ ),
131
+ WorkspaceSettings(
132
+ id="other",
133
+ path=Path("/tmp/test2"),
134
+ ),
135
+ ]
136
+
137
+ settings = Settings(workspaces=workspaces)
138
+
139
+ with patch("experimaestro.settings.get_settings", return_value=settings):
140
+ # Even though experiment_id matches neuralir trigger, explicit workspace wins
141
+ ws = find_workspace(workspace="other", experiment_id="base_id-123")
142
+ assert ws.id == "other"
143
+
144
+
145
+ def test_workspace_no_triggers_backward_compatible():
146
+ """Test that workspaces without triggers still work (backward compatibility)"""
147
+ workspaces = [
148
+ WorkspaceSettings(
149
+ id="default",
150
+ path=Path("/tmp/test1"),
151
+ ),
152
+ ]
153
+
154
+ settings = Settings(workspaces=workspaces)
155
+
156
+ with patch("experimaestro.settings.get_settings", return_value=settings):
157
+ ws = find_workspace(experiment_id="any-experiment")
158
+ assert ws.id == "default"
@@ -19,9 +19,9 @@ if __name__ == "__main__":
19
19
  handler = logging.StreamHandler()
20
20
  bf = logging.Formatter(
21
21
  f"[XP{x}] "
22
- "[%(levelname)s] %(asctime)s %(name)s "
22
+ "[%(levelname)s] %(asctime)s.%(msecs)03d %(name)s "
23
23
  "[%(process)d/%(threadName)s]: %(message)s",
24
- datefmt="%H:%M:%S.%f",
24
+ datefmt="%H:%M:%S",
25
25
  )
26
26
  handler.setFormatter(bf)
27
27
  root.handlers.clear()
@@ -46,4 +46,6 @@ if __name__ == "__main__":
46
46
  # Wait until the experiment
47
47
  task.__xpm__.task.job.wait()
48
48
  logging.info("Reschedule with token [%s]: finished", x)
49
+
50
+ # Write the timestamp from the task so the test can retrieve them easily
49
51
  Path(timepath).write_text(Path(task.stdout()).read_text())
@@ -5,7 +5,7 @@ from pathlib import Path
5
5
  import logging
6
6
  import signal
7
7
 
8
- from experimaestro import experiment, task
8
+ from experimaestro import experiment, Task
9
9
  from experimaestro.scheduler.workspace import RunMode
10
10
 
11
11
 
@@ -24,7 +24,7 @@ class TimeInterval:
24
24
  return str(self)
25
25
 
26
26
 
27
- def get_times(task: task) -> TimeInterval:
27
+ def get_times(task: Task) -> TimeInterval:
28
28
  logging.info("Reading times from %s", task.stdout())
29
29
  return TimeInterval(
30
30
  *(float(t) for t in task.stdout().read_text().strip().split("\n"))
experimaestro/tokens.py CHANGED
@@ -5,6 +5,8 @@ a computational resource (e.g. number of launched jobs, etc.)
5
5
  from dataclasses import dataclass
6
6
  import sys
7
7
  from pathlib import Path
8
+ import time
9
+ import weakref
8
10
 
9
11
  from omegaconf import DictConfig
10
12
  from experimaestro.core.objects import Config
@@ -19,7 +21,7 @@ from experimaestro.launcherfinder.registry import LauncherRegistry
19
21
 
20
22
  from .ipc import ipcom
21
23
  from .locking import Lock, LockError
22
- from .scheduler.dependencies import Dependency, DependencyStatus, Resource
24
+ from .scheduler.dependencies import DynamicDependency, Resource
23
25
  import logging
24
26
  import json
25
27
 
@@ -32,16 +34,6 @@ class Token(Resource):
32
34
 
33
35
  available: int
34
36
 
35
- def aio_notify(self):
36
- # Notifying
37
- def check(dependency: Dependency):
38
- if self.available > 0:
39
- dependency.check()
40
-
41
- with self.dependents as dependents:
42
- for _dependency in dependents:
43
- _dependency.loop.call_soon_threadsafe(check, _dependency)
44
-
45
37
 
46
38
  class CounterTokenLock(Lock):
47
39
  def __init__(self, dependency: "CounterTokenDependency"):
@@ -58,8 +50,8 @@ class CounterTokenLock(Lock):
58
50
  return "Lock(%s)" % self.dependency
59
51
 
60
52
 
61
- class CounterTokenDependency(Dependency):
62
- """A dependency onto a token"""
53
+ class CounterTokenDependency(DynamicDependency):
54
+ """A dependency onto a token (dynamic - availability can change)"""
63
55
 
64
56
  def __init__(self, token: "CounterToken", count: int):
65
57
  super().__init__(token)
@@ -71,13 +63,54 @@ class CounterTokenDependency(Dependency):
71
63
  """The (file) name for this dependency, when taken"""
72
64
  return f"{self.target.identifier}.token"
73
65
 
74
- def status(self) -> DependencyStatus:
75
- if self.count <= self.token.available:
76
- return DependencyStatus.OK
77
- return DependencyStatus.WAIT
66
+ async def aio_lock(self, timeout: float = 0) -> "Lock":
67
+ """Acquire lock on token with event-driven waiting
68
+
69
+ Args:
70
+ timeout: Timeout in seconds (0 = wait indefinitely)
71
+
72
+ Returns:
73
+ Lock object
74
+
75
+ Raises:
76
+ LockError: If lock cannot be acquired within timeout
77
+ """
78
+ from experimaestro.utils.asyncio import asyncThreadcheck
79
+ import time
80
+
81
+ start_time = time.time()
82
+
83
+ while True:
84
+ try:
85
+ lock = CounterTokenLock(self)
86
+ lock.acquire()
87
+ return lock
88
+ except LockError:
89
+ # Wait for token availability notification
90
+ def wait_for_available():
91
+ with self.token.available_condition:
92
+ # Calculate remaining timeout
93
+ if timeout == 0:
94
+ wait_timeout = None # Wait indefinitely
95
+ else:
96
+ elapsed = time.time() - start_time
97
+ if elapsed >= timeout:
98
+ return False # Timeout exceeded
99
+ wait_timeout = timeout - elapsed
100
+
101
+ # Wait for notification
102
+ return self.token.available_condition.wait(timeout=wait_timeout)
103
+
104
+ # Wait in a thread (since condition is threading-based)
105
+ result = await asyncThreadcheck(
106
+ "token availability", wait_for_available
107
+ )
108
+
109
+ # If wait returned False, we timed out
110
+ if result is False:
111
+ raise LockError("Timeout waiting for tokens")
78
112
 
79
- def lock(self) -> "Lock":
80
- return CounterTokenLock(self)
113
+ # Otherwise, loop back to try acquiring again
81
114
 
82
115
  @property
83
116
  def token(self):
@@ -85,17 +118,38 @@ class CounterTokenDependency(Dependency):
85
118
 
86
119
 
87
120
  class TokenFile:
88
- """Represents a token file"""
121
+ """Represents a token file
122
+
123
+ The token file (whose name refers to the corresponding job) is composed of
124
+ two lines:
125
+
126
+ 1. The number of tokens taken by the job
127
+ 2. The URI reference of the job directory
128
+ """
89
129
 
90
130
  def __init__(self, path: Path):
91
- try:
92
- self.path = path
93
- with path.open("rt") as fp:
94
- count, self.uri = [line.strip() for line in fp.readlines()]
95
- self.count = int(count)
96
- except Exception:
97
- logging.exception("Error while reading %s", self.path)
98
- raise
131
+ # Case where the file was deleted
132
+ self.count = 0
133
+ self.uri = None
134
+
135
+ retries = 0
136
+ while retries < 5:
137
+ retries += 1
138
+ try:
139
+ self.path = path
140
+ with path.open("rt") as fp:
141
+ count, self.uri = [line.strip() for line in fp.readlines()]
142
+ self.count = int(count)
143
+ except FileNotFoundError:
144
+ # Case where the file was deleted
145
+ self.count = 0
146
+ self.uri = None
147
+ except Exception:
148
+ logging.exception("Error while reading %s", self.path)
149
+ time.sleep(0.1)
150
+ continue
151
+
152
+ break
99
153
 
100
154
  @staticmethod
101
155
  def create(dependency: CounterTokenDependency):
@@ -119,6 +173,11 @@ class TokenFile:
119
173
 
120
174
  def watch(self):
121
175
  """Watch the matching process"""
176
+
177
+ # No need to watch if there was no token file...
178
+ if self.uri is None:
179
+ return
180
+
122
181
  logger.debug(
123
182
  "Watching process for %s (%s, taken %d)", self.path, self.uri, self.count
124
183
  )
@@ -130,9 +189,11 @@ class TokenFile:
130
189
  def run():
131
190
  logger.debug("Locking job lock path %s", lockpath)
132
191
  process = None
192
+ # Acquire the job lock - blocks if scheduler is still starting the job
193
+ # Once we get the lock, the job has either started or finished
133
194
  with fasteners.InterProcessLock(lockpath):
134
195
  if not pidpath.is_file():
135
- logger.debug("Job already finished (no PID file)")
196
+ logger.debug("Job already finished (no PID file %s)", pidpath)
136
197
  else:
137
198
  s = ""
138
199
  while s == "":
@@ -157,6 +218,29 @@ class TokenFile:
157
218
  threading.Thread(target=run).start()
158
219
 
159
220
 
221
+ class CounterTokenProxy(FileSystemEventHandler):
222
+ """Hold a weak reference to the counter token to handle gracefully deleted
223
+ counter tokens"""
224
+
225
+ def __init__(self, token: "CounterToken"):
226
+ self._token_ref = weakref.ref(token)
227
+
228
+ def on_modified(self, event):
229
+ token = self._token_ref()
230
+ if token is not None:
231
+ return token.on_modified(event)
232
+
233
+ def on_deleted(self, event):
234
+ token = self._token_ref()
235
+ if token is not None:
236
+ return token.on_deleted(event)
237
+
238
+ def on_created(self, event):
239
+ token = self._token_ref()
240
+ if token is not None:
241
+ return token.on_created(event)
242
+
243
+
160
244
  class CounterToken(Token, FileSystemEventHandler):
161
245
  """File-based counter token
162
246
 
@@ -213,6 +297,9 @@ class CounterToken(Token, FileSystemEventHandler):
213
297
  self.ipc_lock = fasteners.InterProcessLock(path / "token.lock")
214
298
  self.lock = threading.Lock()
215
299
 
300
+ # Condition variable for waiting on token availability
301
+ self.available_condition = threading.Condition(self.lock)
302
+
216
303
  self.name = name
217
304
 
218
305
  # Set the new number of tokens
@@ -227,8 +314,16 @@ class CounterToken(Token, FileSystemEventHandler):
227
314
 
228
315
  # Watched path
229
316
  self.watchedpath = str(path.absolute())
230
- self.watcher = ipcom().fswatch(self, self.path, recursive=True)
231
- logger.info("Watching %s", self.watchedpath)
317
+ self.proxy = CounterTokenProxy(self)
318
+ self.watcher = ipcom().fswatch(self.proxy, self.path, recursive=True)
319
+ logger.debug("Watching %s", self.watchedpath)
320
+
321
+ def __del__(self):
322
+ # Remove the watcher
323
+ if self.watcher is not None:
324
+ logging.debug("Removing watcher on %s", self.watchedpath)
325
+ ipcom().fsunwatch(self.watcher)
326
+ self.watcher = None
232
327
 
233
328
  def _update(self):
234
329
  """Update the state by reading all the information from disk
@@ -281,9 +376,8 @@ class CounterToken(Token, FileSystemEventHandler):
281
376
  self.available,
282
377
  )
283
378
 
284
- # Do not lock here (notify only)
285
- if self.available > 0:
286
- self.aio_notify()
379
+ # Notify waiting tasks that tokens are available
380
+ self.available_condition.notify_all()
287
381
 
288
382
  def on_created(self, event):
289
383
  logger.debug(
@@ -311,7 +405,7 @@ class CounterToken(Token, FileSystemEventHandler):
311
405
  def on_modified(self, event):
312
406
  try:
313
407
  logger.debug(
314
- "Watched path notification %s [watched %s]",
408
+ "on modified path: %s [watched %s]",
315
409
  event.src_path,
316
410
  self.watchedpath,
317
411
  )
@@ -321,26 +415,29 @@ class CounterToken(Token, FileSystemEventHandler):
321
415
 
322
416
  if event.src_path == str(self.infopath):
323
417
  logger.debug("Token information modified")
324
- timestamp = os.path.getmtime(self.infopath)
325
- if timestamp <= self.timestamp:
418
+ with self.lock:
419
+ timestamp = os.path.getmtime(self.infopath)
420
+ if timestamp <= self.timestamp:
421
+ logger.debug(
422
+ "Not reading token file [%f <= %f]",
423
+ timestamp,
424
+ self.timestamp,
425
+ )
426
+ return
427
+
428
+ total = int(self.infopath.read_text())
429
+ delta = total - self.total
430
+ self.total = total
431
+ self.available += delta
326
432
  logger.debug(
327
- "Not reading token file [%f <= %f]", timestamp, self.timestamp
433
+ "Token information modified: available %d, total %d",
434
+ self.available,
435
+ self.total,
328
436
  )
329
437
 
330
- total = int(self.infopath.read_text())
331
- delta = total - self.total
332
- self.total = total
333
- self.available += delta
334
- logger.debug(
335
- "Token information modified: available %d, total %d",
336
- self.available,
337
- self.total,
338
- )
339
-
340
- if delta > 0 and self.available > 0:
341
- with self.dependents as dependents:
342
- for dependency in dependents:
343
- dependency.check()
438
+ # Notify waiting tasks if tokens became available
439
+ if delta > 0:
440
+ self.available_condition.notify_all()
344
441
 
345
442
  # A modified dependency not in cache
346
443
  elif path.name.endswith(".token") and path.name not in self.cache:
@@ -371,7 +468,7 @@ class CounterToken(Token, FileSystemEventHandler):
371
468
  with self.lock, self.ipc_lock:
372
469
  self._update()
373
470
  if self.available < dependency.count:
374
- logger.warning(
471
+ logger.debug(
375
472
  "Not enough available (%d available, %d requested)",
376
473
  self.available,
377
474
  dependency.count,
@@ -406,9 +503,11 @@ class CounterToken(Token, FileSystemEventHandler):
406
503
  del self.cache[dependency.name]
407
504
  self.available += tf.count
408
505
  logging.debug("%s: available %d", self, self.available)
409
- tf.delete()
410
506
 
411
- self.aio_notify()
507
+ # Notify waiting tasks that tokens are available
508
+ self.available_condition.notify_all()
509
+
510
+ tf.delete()
412
511
 
413
512
 
414
513
  class ProcessCounterToken(Token):
@@ -456,8 +555,6 @@ class ProcessCounterToken(Token):
456
555
  self.available,
457
556
  )
458
557
 
459
- self.aio_notify()
460
-
461
558
 
462
559
  if sys.platform != "win32":
463
560
  os.register_at_fork(after_in_child=CounterToken.forkhandler)
@@ -3,7 +3,7 @@ from itertools import chain
3
3
  from typing import Dict, Any
4
4
  from pathlib import Path
5
5
  import json
6
- from src.experimaestro.core.objects.config_utils import getqualattr
6
+ from experimaestro.core.objects.config_utils import getqualattr
7
7
  from termcolor import colored
8
8
 
9
9
 
@@ -0,0 +1,8 @@
1
+ """Textual-based TUI for monitoring experiments"""
2
+
3
+ from .app import ExperimaestroUI
4
+
5
+ # Backward compatibility alias
6
+ ExperimentTUI = ExperimaestroUI
7
+
8
+ __all__ = ["ExperimaestroUI", "ExperimentTUI"]