PyPI - parsl - Versions diffs - 2024.4.22__py3-none-any.whl → 2024.5.6__py3-none-any.whl - Mend

parsl 2024.4.22py3-none-any.whl → 2024.5.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

parsl/config.py +10 -1
parsl/data_provider/zip.py +32 -0
parsl/dataflow/dflow.py +34 -26
parsl/executors/high_throughput/executor.py +7 -1
parsl/executors/status_handling.py +0 -3
parsl/executors/taskvine/executor.py +0 -31
parsl/executors/workqueue/executor.py +0 -30
parsl/jobs/job_status_poller.py +1 -3
parsl/monitoring/monitoring.py +3 -0
parsl/monitoring/radios.py +1 -1
parsl/providers/kubernetes/kube.py +20 -1
parsl/tests/configs/local_threads_checkpoint_periodic.py +8 -10
parsl/tests/conftest.py +12 -1
parsl/tests/test_bash_apps/test_std_uri.py +128 -0
parsl/tests/test_checkpointing/test_periodic.py +20 -33
parsl/tests/test_htex/test_basic.py +2 -2
parsl/tests/test_htex/test_missing_worker.py +0 -4
parsl/tests/test_mpi_apps/test_resource_spec.py +2 -8
parsl/tests/test_staging/test_zip_in.py +42 -0
parsl/tests/test_staging/test_zip_to_zip.py +44 -0
parsl/tests/unit/__init__.py +0 -0
parsl/tests/unit/test_file.py +99 -0
parsl/usage_tracking/api.py +66 -0
parsl/usage_tracking/usage.py +39 -26
parsl/version.py +1 -1
{parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/METADATA +2 -2
{parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/RECORD +34 -28
{parsl-2024.4.22.data → parsl-2024.5.6.data}/scripts/exec_parsl_function.py +0 -0
{parsl-2024.4.22.data → parsl-2024.5.6.data}/scripts/parsl_coprocess.py +0 -0
{parsl-2024.4.22.data → parsl-2024.5.6.data}/scripts/process_worker_pool.py +0 -0
{parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/LICENSE +0 -0
{parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/WHEEL +0 -0
{parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/entry_points.txt +0 -0
{parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/top_level.txt +0 -0

parsl/tests/test_checkpointing/test_periodic.py CHANGED Viewed

@@ -1,16 +1,12 @@
-import argparse
-import time
 import pytest
 import parsl
 from parsl.app.app import python_app
-from parsl.tests.configs.local_threads_checkpoint_periodic import config
+from parsl.tests.configs.local_threads_checkpoint_periodic import fresh_config
 def local_setup():
-    global dfk
-    dfk = parsl.load(config)
+    parsl.load(fresh_config())
 def local_teardown():
@@ -27,40 +23,31 @@ def slow_double(x, sleep_dur=1):
 def tstamp_to_seconds(line):
-    print("Parsing line: ", line)
     f = line.partition(" ")[0]
     return float(f)
 @pytest.mark.local
-def test_periodic(n=4):
+def test_periodic():
     """Test checkpointing with task_periodic behavior
     """
-    d = {}
-    print("Launching : ", n)
-    for i in range(0, n):
-        d[i] = slow_double(i)
-    print("Done launching")
-    for i in range(0, n):
-        d[i].result()
-    print("Done sleeping")
-    time.sleep(16)
-    dfk.cleanup()
+    h, m, s = map(int, parsl.dfk().config.checkpoint_period.split(":"))
+    assert h == 0, "Verify test setup"
+    assert m == 0, "Verify test setup"
+    assert s > 0, "Verify test setup"
+    sleep_for = s + 1
+    with parsl.dfk():
+        futs = [slow_double(sleep_for) for _ in range(4)]
+        [f.result() for f in futs]
     # Here we will check if the loglines came back with 5 seconds deltas
-    print("Rundir: ", dfk.run_dir)
-    with open("{}/parsl.log".format(dfk.run_dir), 'r') as f:
+    with open("{}/parsl.log".format(parsl.dfk().run_dir)) as f:
         log_lines = f.readlines()
-        expected_msg = " Done checkpointing"
-        expected_msg2 = " No tasks checkpointed in this pass"
-        lines = [line for line in log_lines if expected_msg in line or expected_msg2 in line]
-        assert len(lines) >= 3, "Insufficient checkpoint lines in logfile"
-        deltas = [tstamp_to_seconds(line) for line in lines]
-        assert deltas[1] - deltas[0] < 5.5, "Delta between checkpoints exceeded period"
-        assert deltas[2] - deltas[1] < 5.5, "Delta between checkpoints exceeded period"
+    expected_msg = " Done checkpointing"
+    expected_msg2 = " No tasks checkpointed in this pass"
+    lines = [line for line in log_lines if expected_msg in line or expected_msg2 in line]
+    assert len(lines) >= 3, "Insufficient checkpoint lines in logfile"
+    deltas = [tstamp_to_seconds(line) for line in lines]
+    assert deltas[1] - deltas[0] < 5.5, "Delta between checkpoints exceeded period"
+    assert deltas[2] - deltas[1] < 5.5, "Delta between checkpoints exceeded period"

parsl/tests/test_htex/test_basic.py CHANGED Viewed

@@ -23,6 +23,6 @@ def dummy():
 @pytest.mark.local
-def test_that_it_fails():
+def test_app():
     x = dummy()
-    x.result()
+    assert x.result() is None

parsl/tests/test_htex/test_missing_worker.py CHANGED Viewed

@@ -37,7 +37,3 @@ def test_that_it_fails():
         raise Exception("The app somehow ran without a valid worker")
     assert parsl.dfk().config.executors[0]._executor_bad_state.is_set()
-    # htex needs shutting down explicitly because dfk.cleanup() will not
-    # do that, as it is in bad state
-    parsl.dfk().config.executors[0].shutdown()

parsl/tests/test_mpi_apps/test_resource_spec.py CHANGED Viewed

@@ -25,17 +25,11 @@ from parsl.executors.high_throughput.mpi_prefix_composer import (
 EXECUTOR_LABEL = "MPI_TEST"
-def local_setup():
+def local_config():
     config = fresh_config()
     config.executors[0].label = EXECUTOR_LABEL
     config.executors[0].max_workers_per_node = 1
-    parsl.load(config)
-def local_teardown():
-    logging.warning("Exiting")
-    parsl.dfk().cleanup()
-    parsl.clear()
+    return config
 @python_app

parsl/tests/test_staging/test_zip_in.py ADDED Viewed

@@ -0,0 +1,42 @@
+import parsl
+import pytest
+import random
+import zipfile
+from parsl.data_provider.files import File
+from parsl.data_provider.zip import ZipAuthorityError, ZipFileStaging
+from parsl.providers import LocalProvider
+from parsl.channels import LocalChannel
+from parsl.launchers import SimpleLauncher
+from parsl.config import Config
+from parsl.executors import HighThroughputExecutor
+from parsl.tests.configs.htex_local import fresh_config as local_config
+@parsl.python_app
+def count_lines(file):
+    with open(file, "r") as f:
+        return len(f.readlines())
+@pytest.mark.local
+def test_zip_in(tmpd_cwd):
+    # basic test of zip file stage-in
+    zip_path = tmpd_cwd / "container.zip"
+    file_base = "data.txt"
+    zip_file = File(f"zip:{zip_path / file_base}")
+    # create a zip file containing one file with some abitrary number of lines
+    n_lines = random.randint(0, 1000)
+    with zipfile.ZipFile(zip_path, mode='w') as z:
+        with z.open(file_base, mode='w') as f:
+            for _ in range(n_lines):
+                f.write(b'someline\n')
+    app_future = count_lines(zip_file)
+    assert app_future.result() == n_lines

parsl/tests/test_staging/test_zip_to_zip.py ADDED Viewed

@@ -0,0 +1,44 @@
+import parsl
+import pytest
+import random
+import zipfile
+from parsl.data_provider.files import File
+from parsl.data_provider.zip import ZipAuthorityError, ZipFileStaging
+from parsl.providers import LocalProvider
+from parsl.channels import LocalChannel
+from parsl.launchers import SimpleLauncher
+from parsl.config import Config
+from parsl.executors import HighThroughputExecutor
+from parsl.tests.configs.htex_local import fresh_config as local_config
+@parsl.python_app
+def generate_lines(n: int, *, outputs):
+    with open(outputs[0], "w") as f:
+        for x in range(n):
+            # write numbered lines
+            f.write(str(x) + "\n")
+@parsl.python_app
+def count_lines(file):
+    with open(file, "r") as f:
+        return len(f.readlines())
+@pytest.mark.local
+def test_zip_pipeline(tmpd_cwd):
+    # basic test of zip file stage-in
+    zip_path = tmpd_cwd / "container.zip"
+    file_base = "data.txt"
+    zip_file = File(f"zip:{zip_path / file_base}")
+    n_lines = random.randint(0, 1000)
+    generate_fut = generate_lines(n_lines, outputs=[zip_file])
+    n_lines_out = count_lines(generate_fut.outputs[0]).result()
+    assert n_lines == n_lines_out

parsl/tests/unit/__init__.py ADDED Viewed

File without changes

parsl/tests/unit/test_file.py ADDED Viewed

@@ -0,0 +1,99 @@
+import os
+from unittest import mock
+import pytest
+from parsl import File
+_MOCK_BASE = "parsl.data_provider.files."
+@pytest.mark.local
+@pytest.mark.parametrize("scheme", ("http", "https", "ftp", "ftps", "asdfasdf"))
+def test_file_init_scheme(scheme):
+    basename = "some_base_name"
+    path = f"/some/path/1/2/3/{basename}"
+    fqdn = "some.fqdn.example.com"
+    exp_url = f"{scheme}://{fqdn}{path}"
+    f = File(exp_url)
+    assert f.url == exp_url, "Expected given url to be stored"
+    assert f.scheme == scheme
+    assert f.netloc == fqdn
+    assert f.path == path
+    assert f.filename == basename
+    assert f.local_path is None, "Expect only set by API consumer, not constructor"
+@pytest.mark.local
+@pytest.mark.parametrize("url", ("some weird :// url", "", "a"))
+def test_file_init_file_url_fallback(url):
+    exp_url = "some weird :// url"
+    f = File(exp_url)
+    assert f.url == exp_url
+    assert not f.netloc, "invalid host, should be no netloc"
+    assert f.path == exp_url, "Should fail to fully parse, so path is whole url"
+    assert f.filename == exp_url.rsplit("/", 1)[-1]
+    assert f.scheme == "file"
+@pytest.mark.local
+def test_file_proxies_for_filepath(randomstring):
+    # verify (current) expected internal hookup
+    exp_filepath = randomstring()
+    with mock.patch(
+        f"{_MOCK_BASE}File.filepath", new_callable=mock.PropertyMock
+    ) as mock_fpath:
+        mock_fpath.return_value = exp_filepath
+        f = File("")
+        assert str(f) == exp_filepath
+        assert os.fspath(f) == exp_filepath
+@pytest.mark.local
+@pytest.mark.parametrize("scheme", ("file://", ""))
+def test_file_filepath_local_path_is_priority(scheme, randomstring):
+    exp_path = "/some/local/path"
+    url = f"{scheme}{exp_path}"
+    f = File(url)
+    f.local_path = randomstring()
+    assert f.filepath == f.local_path
+    f.local_path = None
+    assert f.filepath == exp_path
+@pytest.mark.local
+def test_file_filepath_requires_local_accessible_path():
+    with pytest.raises(ValueError) as pyt_exc:
+        _ = File("http://").filepath
+    assert "No local_path" in str(pyt_exc.value), "Expected reason in exception"
+@pytest.mark.local
+@pytest.mark.parametrize("scheme", ("https", "ftps", "", "file", "asdfasdf"))
+def test_file_repr(scheme):
+    netloc = "some.netloc"
+    filename = "some_file_name"
+    path = f"/some/path/{filename}"
+    if scheme:
+        url = f"{scheme}://{netloc}{path}"
+    else:
+        scheme = "file"
+        url = path
+    f = File(url)
+    r = repr(f)
+    assert r.startswith("<")
+    assert r.endswith(">")
+    assert f"<{type(f).__name__} " in r
+    assert f" at 0x{id(f):x}" in r
+    assert f" url={url}" in r
+    assert f" scheme={scheme}" in r
+    assert f" path={path}" in r
+    assert f" filename={filename}" in r
+    if scheme != "file":
+        assert f" netloc={netloc}" in r

parsl/usage_tracking/api.py ADDED Viewed

@@ -0,0 +1,66 @@
+import inspect
+from parsl.utils import RepresentationMixin
+from abc import abstractmethod
+from functools import singledispatch
+from typing import Any, List, Sequence
+# Traverse the configuration hierarchy, returning a JSON component
+# for each one. Configuration components which implement
+# RepresentationMixin will be in the right form for inspecting
+# object attributes. Configuration components which are lists or tuples
+# are traversed in sequence. Other types default to reporting no
+# usage information.
+@singledispatch
+def get_parsl_usage(obj) -> List[Any]:
+    return []
+@get_parsl_usage.register
+def get_parsl_usage_representation_mixin(obj: RepresentationMixin) -> List[Any]:
+    t = type(obj)
+    qualified_name = t.__module__ + "." + t.__name__
+    # me can contain anything that can be rendered as JSON
+    me: List[Any] = []
+    if isinstance(obj, UsageInformation):
+        # report rich usage information for this component
+        attrs = {'c': qualified_name}
+        attrs.update(obj.get_usage_information())
+        me = [attrs]
+    else:
+        # report the class name of this component
+        me = [qualified_name]
+    # unwrap typeguard-style unwrapping
+    init: Any = type(obj).__init__
+    if hasattr(init, '__wrapped__'):
+        init = init.__wrapped__
+    argspec = inspect.getfullargspec(init)
+    for arg in argspec.args[1:]:  # skip first arg, self
+        arg_value = getattr(obj, arg)
+        d = get_parsl_usage(arg_value)
+        me += d
+    return me
+@get_parsl_usage.register(list)
+@get_parsl_usage.register(tuple)
+def get_parsl_usage_sequence(obj: Sequence) -> List[Any]:
+    result = []
+    for v in obj:
+        result += get_parsl_usage(v)
+    return result
+class UsageInformation:
+    @abstractmethod
+    def get_usage_information(self) -> dict:
+        pass

parsl/usage_tracking/usage.py CHANGED Viewed

@@ -7,6 +7,7 @@ import socket
 import sys
 import platform
+from parsl.usage_tracking.api import get_parsl_usage
 from parsl.utils import setproctitle
 from parsl.multiprocessing import ForkProcess
 from parsl.dataflow.states import States
@@ -17,6 +18,13 @@ logger = logging.getLogger(__name__)
 from typing import Callable
 from typing_extensions import ParamSpec
+# protocol version byte: when (for example) compression parameters are changed
+# that cannot be inferred from the compressed message itself, this version
+# ID needs to imply those parameters.
+# Earlier protocol versions: b'{' - the original pure-JSON protocol pre-March 2024
+PROTOCOL_VERSION = b'1'
 P = ParamSpec("P")
@@ -32,7 +40,7 @@ def async_process(fn: Callable[P, None]) -> Callable[P, None]:
 @async_process
-def udp_messenger(domain_name: str, UDP_PORT: int, sock_timeout: int, message: str) -> None:
+def udp_messenger(domain_name: str, UDP_PORT: int, sock_timeout: int, message: bytes) -> None:
     """Send UDP messages to usage tracker asynchronously
     This multiprocessing based messenger was written to overcome the limitations
@@ -46,16 +54,11 @@ def udp_messenger(domain_name: str, UDP_PORT: int, sock_timeout: int, message: s
     setproctitle("parsl: Usage tracking")
     try:
-        encoded_message = bytes(message, "utf-8")
         UDP_IP = socket.gethostbyname(domain_name)
-        if UDP_PORT is None:
-            raise Exception("UDP_PORT is None")
         sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)  # UDP
         sock.settimeout(sock_timeout)
-        sock.sendto(encoded_message, (UDP_IP, UDP_PORT))
+        sock.sendto(message, (UDP_IP, UDP_PORT))
         sock.close()
     except socket.timeout:
@@ -102,7 +105,7 @@ class UsageTracker:
         self.procs = []
         self.dfk = dfk
         self.config = self.dfk.config
-        self.uuid = str(uuid.uuid4())
+        self.correlator_uuid = str(uuid.uuid4())
         self.parsl_version = PARSL_VERSION
         self.python_version = "{}.{}.{}".format(sys.version_info.major,
                                                 sys.version_info.minor,
@@ -130,22 +133,23 @@ class UsageTracker:
         return track
-    def construct_start_message(self) -> str:
+    def construct_start_message(self) -> bytes:
         """Collect preliminary run info at the start of the DFK.
         Returns :
               - Message dict dumped as json string, ready for UDP
         """
-        message = {'uuid': self.uuid,
+        message = {'correlator': self.correlator_uuid,
                    'parsl_v': self.parsl_version,
                    'python_v': self.python_version,
-                   'os': platform.system(),
-                   'os_v': platform.release(),
-                   'start': time.time()}
+                   'platform.system': platform.system(),
+                   'start': int(time.time()),
+                   'components': get_parsl_usage(self.dfk._config)}
+        logger.debug(f"Usage tracking start message: {message}")
-        return json.dumps(message)
+        return self.encode_message(message)
-    def construct_end_message(self) -> str:
+    def construct_end_message(self) -> bytes:
         """Collect the final run information at the time of DFK cleanup.
         Returns:
@@ -153,20 +157,26 @@ class UsageTracker:
         """
         app_count = self.dfk.task_count
-        site_count = len(self.dfk.config.executors)
         app_fails = self.dfk.task_state_counts[States.failed] + self.dfk.task_state_counts[States.dep_fail]
-        message = {'uuid': self.uuid,
-                   'end': time.time(),
-                   't_apps': app_count,
-                   'sites': site_count,
-                   'failed': app_fails
-                   }
+        # the DFK is tangled into this code as a god-object, so it is
+        # handled separately from the usual traversal code, but presenting
+        # the same protocol-level report.
+        dfk_component = {'c': type(self.dfk).__module__ + "." + type(self.dfk).__name__,
+                         'app_count': app_count,
+                         'app_fails': app_fails}
+        message = {'correlator': self.correlator_uuid,
+                   'end': int(time.time()),
+                   'components': [dfk_component] + get_parsl_usage(self.dfk._config)}
+        logger.debug(f"Usage tracking end message (unencoded): {message}")
-        return json.dumps(message)
+        return self.encode_message(message)
-    def send_UDP_message(self, message: str) -> None:
+    def encode_message(self, obj):
+        return PROTOCOL_VERSION + json.dumps(obj).encode()
+    def send_UDP_message(self, message: bytes) -> None:
         """Send UDP message."""
         if self.tracking_enabled:
             try:
@@ -191,7 +201,10 @@ class UsageTracker:
         or won't respond to SIGTERM.
         """
         for proc in self.procs:
+            logger.debug("Joining usage tracking process %s", proc)
             proc.join(timeout=timeout)
             if proc.is_alive():
-                logger.info("Usage tracking process did not end itself; sending SIGKILL")
+                logger.warning("Usage tracking process did not end itself; sending SIGKILL")
                 proc.kill()
+            proc.close()

parsl/version.py CHANGED Viewed

@@ -3,4 +3,4 @@
 Year.Month.Day[alpha/beta/..]
 Alphas will be numbered like this -> 2024.12.10a0
 """
-VERSION = '2024.04.22'
+VERSION = '2024.05.06'

{parsl-2024.4.22.dist-info → parsl-2024.5.6.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.1
 Name: parsl
-Version: 2024.4.22
+Version: 2024.5.6
 Summary: Simple data dependent workflows in Python
 Home-page: https://github.com/Parsl/parsl
-Download-URL: https://github.com/Parsl/parsl/archive/2024.04.22.tar.gz
+Download-URL: https://github.com/Parsl/parsl/archive/2024.05.06.tar.gz
 Author: The Parsl Team
 Author-email: parsl@googlegroups.com
 License: Apache 2.0

parsl 2024.4.22__py3-none-any.whl → 2024.5.6__py3-none-any.whl

parsl 2024.4.22py3-none-any.whl → 2024.5.6py3-none-any.whl