parsl 2024.4.22__py3-none-any.whl → 2024.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. parsl/config.py +10 -1
  2. parsl/data_provider/zip.py +32 -0
  3. parsl/dataflow/dflow.py +34 -26
  4. parsl/executors/high_throughput/executor.py +7 -1
  5. parsl/executors/status_handling.py +0 -3
  6. parsl/executors/taskvine/executor.py +0 -31
  7. parsl/executors/workqueue/executor.py +0 -30
  8. parsl/jobs/job_status_poller.py +1 -3
  9. parsl/monitoring/monitoring.py +3 -0
  10. parsl/monitoring/radios.py +1 -1
  11. parsl/providers/kubernetes/kube.py +20 -1
  12. parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
  13. parsl/tests/conftest.py +12 -1
  14. parsl/tests/test_bash_apps/test_std_uri.py +128 -0
  15. parsl/tests/test_checkpointing/test_periodic.py +20 -33
  16. parsl/tests/test_htex/test_basic.py +2 -2
  17. parsl/tests/test_htex/test_missing_worker.py +0 -4
  18. parsl/tests/test_mpi_apps/test_resource_spec.py +2 -8
  19. parsl/tests/test_staging/test_zip_in.py +42 -0
  20. parsl/tests/test_staging/test_zip_to_zip.py +44 -0
  21. parsl/tests/unit/__init__.py +0 -0
  22. parsl/tests/unit/test_file.py +99 -0
  23. parsl/usage_tracking/api.py +66 -0
  24. parsl/usage_tracking/usage.py +39 -26
  25. parsl/version.py +1 -1
  26. {parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/METADATA +2 -2
  27. {parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/RECORD +34 -28
  28. {parsl-2024.4.22.data → parsl-2024.5.6.data}/scripts/exec_parsl_function.py +0 -0
  29. {parsl-2024.4.22.data → parsl-2024.5.6.data}/scripts/parsl_coprocess.py +0 -0
  30. {parsl-2024.4.22.data → parsl-2024.5.6.data}/scripts/process_worker_pool.py +0 -0
  31. {parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/LICENSE +0 -0
  32. {parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/WHEEL +0 -0
  33. {parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/entry_points.txt +0 -0
  34. {parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,12 @@
1
- import argparse
2
- import time
3
-
4
1
  import pytest
5
2
 
6
3
  import parsl
7
4
  from parsl.app.app import python_app
8
- from parsl.tests.configs.local_threads_checkpoint_periodic import config
5
+ from parsl.tests.configs.local_threads_checkpoint_periodic import fresh_config
9
6
 
10
7
 
11
8
  def local_setup():
12
- global dfk
13
- dfk = parsl.load(config)
9
+ parsl.load(fresh_config())
14
10
 
15
11
 
16
12
  def local_teardown():
@@ -27,40 +23,31 @@ def slow_double(x, sleep_dur=1):
27
23
 
28
24
 
29
25
  def tstamp_to_seconds(line):
30
- print("Parsing line: ", line)
31
26
  f = line.partition(" ")[0]
32
27
  return float(f)
33
28
 
34
29
 
35
30
  @pytest.mark.local
36
- def test_periodic(n=4):
31
+ def test_periodic():
37
32
  """Test checkpointing with task_periodic behavior
38
33
  """
39
-
40
- d = {}
41
-
42
- print("Launching : ", n)
43
- for i in range(0, n):
44
- d[i] = slow_double(i)
45
- print("Done launching")
46
-
47
- for i in range(0, n):
48
- d[i].result()
49
- print("Done sleeping")
50
-
51
- time.sleep(16)
52
- dfk.cleanup()
34
+ h, m, s = map(int, parsl.dfk().config.checkpoint_period.split(":"))
35
+ assert h == 0, "Verify test setup"
36
+ assert m == 0, "Verify test setup"
37
+ assert s > 0, "Verify test setup"
38
+ sleep_for = s + 1
39
+ with parsl.dfk():
40
+ futs = [slow_double(sleep_for) for _ in range(4)]
41
+ [f.result() for f in futs]
53
42
 
54
43
  # Here we will check if the loglines came back with 5 seconds deltas
55
- print("Rundir: ", dfk.run_dir)
56
-
57
- with open("{}/parsl.log".format(dfk.run_dir), 'r') as f:
44
+ with open("{}/parsl.log".format(parsl.dfk().run_dir)) as f:
58
45
  log_lines = f.readlines()
59
- expected_msg = " Done checkpointing"
60
- expected_msg2 = " No tasks checkpointed in this pass"
61
-
62
- lines = [line for line in log_lines if expected_msg in line or expected_msg2 in line]
63
- assert len(lines) >= 3, "Insufficient checkpoint lines in logfile"
64
- deltas = [tstamp_to_seconds(line) for line in lines]
65
- assert deltas[1] - deltas[0] < 5.5, "Delta between checkpoints exceeded period"
66
- assert deltas[2] - deltas[1] < 5.5, "Delta between checkpoints exceeded period"
46
+ expected_msg = " Done checkpointing"
47
+ expected_msg2 = " No tasks checkpointed in this pass"
48
+
49
+ lines = [line for line in log_lines if expected_msg in line or expected_msg2 in line]
50
+ assert len(lines) >= 3, "Insufficient checkpoint lines in logfile"
51
+ deltas = [tstamp_to_seconds(line) for line in lines]
52
+ assert deltas[1] - deltas[0] < 5.5, "Delta between checkpoints exceeded period"
53
+ assert deltas[2] - deltas[1] < 5.5, "Delta between checkpoints exceeded period"
@@ -23,6 +23,6 @@ def dummy():
23
23
 
24
24
 
25
25
  @pytest.mark.local
26
- def test_that_it_fails():
26
+ def test_app():
27
27
  x = dummy()
28
- x.result()
28
+ assert x.result() is None
@@ -37,7 +37,3 @@ def test_that_it_fails():
37
37
  raise Exception("The app somehow ran without a valid worker")
38
38
 
39
39
  assert parsl.dfk().config.executors[0]._executor_bad_state.is_set()
40
-
41
- # htex needs shutting down explicitly because dfk.cleanup() will not
42
- # do that, as it is in bad state
43
- parsl.dfk().config.executors[0].shutdown()
@@ -25,17 +25,11 @@ from parsl.executors.high_throughput.mpi_prefix_composer import (
25
25
  EXECUTOR_LABEL = "MPI_TEST"
26
26
 
27
27
 
28
- def local_setup():
28
+ def local_config():
29
29
  config = fresh_config()
30
30
  config.executors[0].label = EXECUTOR_LABEL
31
31
  config.executors[0].max_workers_per_node = 1
32
- parsl.load(config)
33
-
34
-
35
- def local_teardown():
36
- logging.warning("Exiting")
37
- parsl.dfk().cleanup()
38
- parsl.clear()
32
+ return config
39
33
 
40
34
 
41
35
  @python_app
@@ -0,0 +1,42 @@
1
+ import parsl
2
+ import pytest
3
+ import random
4
+ import zipfile
5
+
6
+ from parsl.data_provider.files import File
7
+ from parsl.data_provider.zip import ZipAuthorityError, ZipFileStaging
8
+
9
+ from parsl.providers import LocalProvider
10
+ from parsl.channels import LocalChannel
11
+ from parsl.launchers import SimpleLauncher
12
+
13
+ from parsl.config import Config
14
+ from parsl.executors import HighThroughputExecutor
15
+
16
+ from parsl.tests.configs.htex_local import fresh_config as local_config
17
+
18
+
19
+ @parsl.python_app
20
+ def count_lines(file):
21
+ with open(file, "r") as f:
22
+ return len(f.readlines())
23
+
24
+
25
+ @pytest.mark.local
26
+ def test_zip_in(tmpd_cwd):
27
+ # basic test of zip file stage-in
28
+ zip_path = tmpd_cwd / "container.zip"
29
+ file_base = "data.txt"
30
+ zip_file = File(f"zip:{zip_path / file_base}")
31
+
32
+ # create a zip file containing one file with some abitrary number of lines
33
+ n_lines = random.randint(0, 1000)
34
+
35
+ with zipfile.ZipFile(zip_path, mode='w') as z:
36
+ with z.open(file_base, mode='w') as f:
37
+ for _ in range(n_lines):
38
+ f.write(b'someline\n')
39
+
40
+ app_future = count_lines(zip_file)
41
+
42
+ assert app_future.result() == n_lines
@@ -0,0 +1,44 @@
1
+ import parsl
2
+ import pytest
3
+ import random
4
+ import zipfile
5
+
6
+ from parsl.data_provider.files import File
7
+ from parsl.data_provider.zip import ZipAuthorityError, ZipFileStaging
8
+
9
+ from parsl.providers import LocalProvider
10
+ from parsl.channels import LocalChannel
11
+ from parsl.launchers import SimpleLauncher
12
+
13
+ from parsl.config import Config
14
+ from parsl.executors import HighThroughputExecutor
15
+
16
+ from parsl.tests.configs.htex_local import fresh_config as local_config
17
+
18
+
19
+ @parsl.python_app
20
+ def generate_lines(n: int, *, outputs):
21
+ with open(outputs[0], "w") as f:
22
+ for x in range(n):
23
+ # write numbered lines
24
+ f.write(str(x) + "\n")
25
+
26
+
27
+ @parsl.python_app
28
+ def count_lines(file):
29
+ with open(file, "r") as f:
30
+ return len(f.readlines())
31
+
32
+
33
+ @pytest.mark.local
34
+ def test_zip_pipeline(tmpd_cwd):
35
+ # basic test of zip file stage-in
36
+ zip_path = tmpd_cwd / "container.zip"
37
+ file_base = "data.txt"
38
+ zip_file = File(f"zip:{zip_path / file_base}")
39
+
40
+ n_lines = random.randint(0, 1000)
41
+ generate_fut = generate_lines(n_lines, outputs=[zip_file])
42
+ n_lines_out = count_lines(generate_fut.outputs[0]).result()
43
+
44
+ assert n_lines == n_lines_out
File without changes
@@ -0,0 +1,99 @@
1
+ import os
2
+ from unittest import mock
3
+
4
+ import pytest
5
+
6
+ from parsl import File
7
+
8
+ _MOCK_BASE = "parsl.data_provider.files."
9
+
10
+
11
+ @pytest.mark.local
12
+ @pytest.mark.parametrize("scheme", ("http", "https", "ftp", "ftps", "asdfasdf"))
13
+ def test_file_init_scheme(scheme):
14
+ basename = "some_base_name"
15
+ path = f"/some/path/1/2/3/{basename}"
16
+ fqdn = "some.fqdn.example.com"
17
+ exp_url = f"{scheme}://{fqdn}{path}"
18
+ f = File(exp_url)
19
+ assert f.url == exp_url, "Expected given url to be stored"
20
+ assert f.scheme == scheme
21
+ assert f.netloc == fqdn
22
+ assert f.path == path
23
+ assert f.filename == basename
24
+ assert f.local_path is None, "Expect only set by API consumer, not constructor"
25
+
26
+
27
+ @pytest.mark.local
28
+ @pytest.mark.parametrize("url", ("some weird :// url", "", "a"))
29
+ def test_file_init_file_url_fallback(url):
30
+ exp_url = "some weird :// url"
31
+ f = File(exp_url)
32
+ assert f.url == exp_url
33
+ assert not f.netloc, "invalid host, should be no netloc"
34
+ assert f.path == exp_url, "Should fail to fully parse, so path is whole url"
35
+ assert f.filename == exp_url.rsplit("/", 1)[-1]
36
+
37
+ assert f.scheme == "file"
38
+
39
+
40
+ @pytest.mark.local
41
+ def test_file_proxies_for_filepath(randomstring):
42
+ # verify (current) expected internal hookup
43
+ exp_filepath = randomstring()
44
+ with mock.patch(
45
+ f"{_MOCK_BASE}File.filepath", new_callable=mock.PropertyMock
46
+ ) as mock_fpath:
47
+ mock_fpath.return_value = exp_filepath
48
+ f = File("")
49
+ assert str(f) == exp_filepath
50
+ assert os.fspath(f) == exp_filepath
51
+
52
+
53
+ @pytest.mark.local
54
+ @pytest.mark.parametrize("scheme", ("file://", ""))
55
+ def test_file_filepath_local_path_is_priority(scheme, randomstring):
56
+ exp_path = "/some/local/path"
57
+ url = f"{scheme}{exp_path}"
58
+ f = File(url)
59
+
60
+ f.local_path = randomstring()
61
+ assert f.filepath == f.local_path
62
+
63
+ f.local_path = None
64
+ assert f.filepath == exp_path
65
+
66
+
67
+ @pytest.mark.local
68
+ def test_file_filepath_requires_local_accessible_path():
69
+ with pytest.raises(ValueError) as pyt_exc:
70
+ _ = File("http://").filepath
71
+
72
+ assert "No local_path" in str(pyt_exc.value), "Expected reason in exception"
73
+
74
+
75
+ @pytest.mark.local
76
+ @pytest.mark.parametrize("scheme", ("https", "ftps", "", "file", "asdfasdf"))
77
+ def test_file_repr(scheme):
78
+ netloc = "some.netloc"
79
+ filename = "some_file_name"
80
+ path = f"/some/path/{filename}"
81
+ if scheme:
82
+ url = f"{scheme}://{netloc}{path}"
83
+ else:
84
+ scheme = "file"
85
+ url = path
86
+
87
+ f = File(url)
88
+ r = repr(f)
89
+ assert r.startswith("<")
90
+ assert r.endswith(">")
91
+ assert f"<{type(f).__name__} " in r
92
+ assert f" at 0x{id(f):x}" in r
93
+ assert f" url={url}" in r
94
+ assert f" scheme={scheme}" in r
95
+ assert f" path={path}" in r
96
+ assert f" filename={filename}" in r
97
+
98
+ if scheme != "file":
99
+ assert f" netloc={netloc}" in r
@@ -0,0 +1,66 @@
1
+ import inspect
2
+
3
+ from parsl.utils import RepresentationMixin
4
+
5
+ from abc import abstractmethod
6
+ from functools import singledispatch
7
+ from typing import Any, List, Sequence
8
+
9
+
10
+ # Traverse the configuration hierarchy, returning a JSON component
11
+ # for each one. Configuration components which implement
12
+ # RepresentationMixin will be in the right form for inspecting
13
+ # object attributes. Configuration components which are lists or tuples
14
+ # are traversed in sequence. Other types default to reporting no
15
+ # usage information.
16
+
17
+ @singledispatch
18
+ def get_parsl_usage(obj) -> List[Any]:
19
+ return []
20
+
21
+
22
+ @get_parsl_usage.register
23
+ def get_parsl_usage_representation_mixin(obj: RepresentationMixin) -> List[Any]:
24
+ t = type(obj)
25
+ qualified_name = t.__module__ + "." + t.__name__
26
+
27
+ # me can contain anything that can be rendered as JSON
28
+ me: List[Any] = []
29
+
30
+ if isinstance(obj, UsageInformation):
31
+ # report rich usage information for this component
32
+ attrs = {'c': qualified_name}
33
+ attrs.update(obj.get_usage_information())
34
+ me = [attrs]
35
+ else:
36
+ # report the class name of this component
37
+ me = [qualified_name]
38
+
39
+ # unwrap typeguard-style unwrapping
40
+ init: Any = type(obj).__init__
41
+ if hasattr(init, '__wrapped__'):
42
+ init = init.__wrapped__
43
+
44
+ argspec = inspect.getfullargspec(init)
45
+
46
+ for arg in argspec.args[1:]: # skip first arg, self
47
+ arg_value = getattr(obj, arg)
48
+ d = get_parsl_usage(arg_value)
49
+ me += d
50
+
51
+ return me
52
+
53
+
54
+ @get_parsl_usage.register(list)
55
+ @get_parsl_usage.register(tuple)
56
+ def get_parsl_usage_sequence(obj: Sequence) -> List[Any]:
57
+ result = []
58
+ for v in obj:
59
+ result += get_parsl_usage(v)
60
+ return result
61
+
62
+
63
+ class UsageInformation:
64
+ @abstractmethod
65
+ def get_usage_information(self) -> dict:
66
+ pass
@@ -7,6 +7,7 @@ import socket
7
7
  import sys
8
8
  import platform
9
9
 
10
+ from parsl.usage_tracking.api import get_parsl_usage
10
11
  from parsl.utils import setproctitle
11
12
  from parsl.multiprocessing import ForkProcess
12
13
  from parsl.dataflow.states import States
@@ -17,6 +18,13 @@ logger = logging.getLogger(__name__)
17
18
  from typing import Callable
18
19
  from typing_extensions import ParamSpec
19
20
 
21
+ # protocol version byte: when (for example) compression parameters are changed
22
+ # that cannot be inferred from the compressed message itself, this version
23
+ # ID needs to imply those parameters.
24
+
25
+ # Earlier protocol versions: b'{' - the original pure-JSON protocol pre-March 2024
26
+ PROTOCOL_VERSION = b'1'
27
+
20
28
  P = ParamSpec("P")
21
29
 
22
30
 
@@ -32,7 +40,7 @@ def async_process(fn: Callable[P, None]) -> Callable[P, None]:
32
40
 
33
41
 
34
42
  @async_process
35
- def udp_messenger(domain_name: str, UDP_PORT: int, sock_timeout: int, message: str) -> None:
43
+ def udp_messenger(domain_name: str, UDP_PORT: int, sock_timeout: int, message: bytes) -> None:
36
44
  """Send UDP messages to usage tracker asynchronously
37
45
 
38
46
  This multiprocessing based messenger was written to overcome the limitations
@@ -46,16 +54,11 @@ def udp_messenger(domain_name: str, UDP_PORT: int, sock_timeout: int, message: s
46
54
  setproctitle("parsl: Usage tracking")
47
55
 
48
56
  try:
49
- encoded_message = bytes(message, "utf-8")
50
-
51
57
  UDP_IP = socket.gethostbyname(domain_name)
52
58
 
53
- if UDP_PORT is None:
54
- raise Exception("UDP_PORT is None")
55
-
56
59
  sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # UDP
57
60
  sock.settimeout(sock_timeout)
58
- sock.sendto(encoded_message, (UDP_IP, UDP_PORT))
61
+ sock.sendto(message, (UDP_IP, UDP_PORT))
59
62
  sock.close()
60
63
 
61
64
  except socket.timeout:
@@ -102,7 +105,7 @@ class UsageTracker:
102
105
  self.procs = []
103
106
  self.dfk = dfk
104
107
  self.config = self.dfk.config
105
- self.uuid = str(uuid.uuid4())
108
+ self.correlator_uuid = str(uuid.uuid4())
106
109
  self.parsl_version = PARSL_VERSION
107
110
  self.python_version = "{}.{}.{}".format(sys.version_info.major,
108
111
  sys.version_info.minor,
@@ -130,22 +133,23 @@ class UsageTracker:
130
133
 
131
134
  return track
132
135
 
133
- def construct_start_message(self) -> str:
136
+ def construct_start_message(self) -> bytes:
134
137
  """Collect preliminary run info at the start of the DFK.
135
138
 
136
139
  Returns :
137
140
  - Message dict dumped as json string, ready for UDP
138
141
  """
139
- message = {'uuid': self.uuid,
142
+ message = {'correlator': self.correlator_uuid,
140
143
  'parsl_v': self.parsl_version,
141
144
  'python_v': self.python_version,
142
- 'os': platform.system(),
143
- 'os_v': platform.release(),
144
- 'start': time.time()}
145
+ 'platform.system': platform.system(),
146
+ 'start': int(time.time()),
147
+ 'components': get_parsl_usage(self.dfk._config)}
148
+ logger.debug(f"Usage tracking start message: {message}")
145
149
 
146
- return json.dumps(message)
150
+ return self.encode_message(message)
147
151
 
148
- def construct_end_message(self) -> str:
152
+ def construct_end_message(self) -> bytes:
149
153
  """Collect the final run information at the time of DFK cleanup.
150
154
 
151
155
  Returns:
@@ -153,20 +157,26 @@ class UsageTracker:
153
157
  """
154
158
  app_count = self.dfk.task_count
155
159
 
156
- site_count = len(self.dfk.config.executors)
157
-
158
160
  app_fails = self.dfk.task_state_counts[States.failed] + self.dfk.task_state_counts[States.dep_fail]
159
161
 
160
- message = {'uuid': self.uuid,
161
- 'end': time.time(),
162
- 't_apps': app_count,
163
- 'sites': site_count,
164
- 'failed': app_fails
165
- }
162
+ # the DFK is tangled into this code as a god-object, so it is
163
+ # handled separately from the usual traversal code, but presenting
164
+ # the same protocol-level report.
165
+ dfk_component = {'c': type(self.dfk).__module__ + "." + type(self.dfk).__name__,
166
+ 'app_count': app_count,
167
+ 'app_fails': app_fails}
168
+
169
+ message = {'correlator': self.correlator_uuid,
170
+ 'end': int(time.time()),
171
+ 'components': [dfk_component] + get_parsl_usage(self.dfk._config)}
172
+ logger.debug(f"Usage tracking end message (unencoded): {message}")
166
173
 
167
- return json.dumps(message)
174
+ return self.encode_message(message)
168
175
 
169
- def send_UDP_message(self, message: str) -> None:
176
+ def encode_message(self, obj):
177
+ return PROTOCOL_VERSION + json.dumps(obj).encode()
178
+
179
+ def send_UDP_message(self, message: bytes) -> None:
170
180
  """Send UDP message."""
171
181
  if self.tracking_enabled:
172
182
  try:
@@ -191,7 +201,10 @@ class UsageTracker:
191
201
  or won't respond to SIGTERM.
192
202
  """
193
203
  for proc in self.procs:
204
+ logger.debug("Joining usage tracking process %s", proc)
194
205
  proc.join(timeout=timeout)
195
206
  if proc.is_alive():
196
- logger.info("Usage tracking process did not end itself; sending SIGKILL")
207
+ logger.warning("Usage tracking process did not end itself; sending SIGKILL")
197
208
  proc.kill()
209
+
210
+ proc.close()
parsl/version.py CHANGED
@@ -3,4 +3,4 @@
3
3
  Year.Month.Day[alpha/beta/..]
4
4
  Alphas will be numbered like this -> 2024.12.10a0
5
5
  """
6
- VERSION = '2024.04.22'
6
+ VERSION = '2024.05.06'
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parsl
3
- Version: 2024.4.22
3
+ Version: 2024.5.6
4
4
  Summary: Simple data dependent workflows in Python
5
5
  Home-page: https://github.com/Parsl/parsl
6
- Download-URL: https://github.com/Parsl/parsl/archive/2024.04.22.tar.gz
6
+ Download-URL: https://github.com/Parsl/parsl/archive/2024.05.06.tar.gz
7
7
  Author: The Parsl Team
8
8
  Author-email: parsl@googlegroups.com
9
9
  License: Apache 2.0