parsl 2025.1.13__py3-none-any.whl → 2025.1.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. parsl/configs/gc_multisite.py +27 -0
  2. parsl/configs/gc_tutorial.py +18 -0
  3. parsl/dataflow/dflow.py +23 -103
  4. parsl/dataflow/errors.py +60 -18
  5. parsl/dataflow/memoization.py +76 -2
  6. parsl/dataflow/taskrecord.py +1 -3
  7. parsl/executors/__init__.py +3 -1
  8. parsl/executors/globus_compute.py +125 -0
  9. parsl/executors/high_throughput/errors.py +1 -1
  10. parsl/executors/high_throughput/executor.py +16 -15
  11. parsl/executors/high_throughput/interchange.py +74 -96
  12. parsl/executors/high_throughput/zmq_pipes.py +0 -1
  13. parsl/tests/configs/globus_compute.py +20 -0
  14. parsl/tests/conftest.py +4 -0
  15. parsl/tests/test_checkpointing/test_python_checkpoint_1.py +0 -3
  16. parsl/tests/test_error_handling/test_resource_spec.py +3 -0
  17. parsl/tests/test_htex/test_interchange_exit_bad_registration.py +120 -0
  18. parsl/tests/test_htex/test_resource_spec_validation.py +0 -7
  19. parsl/tests/test_python_apps/test_dep_standard_futures.py +3 -0
  20. parsl/tests/test_python_apps/test_fail.py +23 -8
  21. parsl/tests/test_python_apps/test_join.py +6 -0
  22. parsl/tests/test_python_apps/test_memoize_1.py +0 -1
  23. parsl/tests/unit/test_globus_compute_executor.py +104 -0
  24. parsl/usage_tracking/usage.py +13 -8
  25. parsl/version.py +1 -1
  26. {parsl-2025.1.13.data → parsl-2025.1.27.data}/scripts/interchange.py +74 -96
  27. {parsl-2025.1.13.dist-info → parsl-2025.1.27.dist-info}/METADATA +5 -2
  28. {parsl-2025.1.13.dist-info → parsl-2025.1.27.dist-info}/RECORD +35 -30
  29. parsl/tests/test_checkpointing/test_python_checkpoint_3.py +0 -42
  30. {parsl-2025.1.13.data → parsl-2025.1.27.data}/scripts/exec_parsl_function.py +0 -0
  31. {parsl-2025.1.13.data → parsl-2025.1.27.data}/scripts/parsl_coprocess.py +0 -0
  32. {parsl-2025.1.13.data → parsl-2025.1.27.data}/scripts/process_worker_pool.py +0 -0
  33. {parsl-2025.1.13.dist-info → parsl-2025.1.27.dist-info}/LICENSE +0 -0
  34. {parsl-2025.1.13.dist-info → parsl-2025.1.27.dist-info}/WHEEL +0 -0
  35. {parsl-2025.1.13.dist-info → parsl-2025.1.27.dist-info}/entry_points.txt +0 -0
  36. {parsl-2025.1.13.dist-info → parsl-2025.1.27.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,27 @@
1
+ from globus_compute_sdk import Executor
2
+
3
+ from parsl.config import Config
4
+ from parsl.executors import GlobusComputeExecutor
5
+ from parsl.usage_tracking.levels import LEVEL_1
6
+
7
+ # Please start your own endpoint on perlmutter following instructions below to use this config:
8
+ # https://globus-compute.readthedocs.io/en/stable/endpoints/endpoint_examples.html#perlmutter-nersc
9
+ perlmutter_endpoint = 'YOUR_PERLMUTTER_ENDPOINT_UUID'
10
+
11
+ # Please start your own endpoint on expanse following instructions below to use this config:
12
+ # https://globus-compute.readthedocs.io/en/stable/endpoints/endpoint_examples.html#expanse-sdsc
13
+ expanse_endpoint = 'YOUR_EXPANSE_ENDPOINT_UUID'
14
+
15
+ config = Config(
16
+ executors=[
17
+ GlobusComputeExecutor(
18
+ executor=Executor(endpoint_id=perlmutter_endpoint),
19
+ label="Perlmutter",
20
+ ),
21
+ GlobusComputeExecutor(
22
+ executor=Executor(endpoint_id=expanse_endpoint),
23
+ label="Expanse",
24
+ ),
25
+ ],
26
+ usage_tracking=LEVEL_1,
27
+ )
@@ -0,0 +1,18 @@
1
+ from globus_compute_sdk import Executor
2
+
3
+ from parsl.config import Config
4
+ from parsl.executors import GlobusComputeExecutor
5
+ from parsl.usage_tracking.levels import LEVEL_1
6
+
7
+ # Public tutorial endpoint
8
+ tutorial_endpoint = '4b116d3c-1703-4f8f-9f6f-39921e5864df'
9
+
10
+ config = Config(
11
+ executors=[
12
+ GlobusComputeExecutor(
13
+ executor=Executor(endpoint_id=tutorial_endpoint),
14
+ label="Tutorial_Endpoint_py3.11",
15
+ )
16
+ ],
17
+ usage_tracking=LEVEL_1,
18
+ )
parsl/dataflow/dflow.py CHANGED
@@ -28,7 +28,7 @@ from parsl.config import Config
28
28
  from parsl.data_provider.data_manager import DataManager
29
29
  from parsl.data_provider.files import File
30
30
  from parsl.dataflow.dependency_resolvers import SHALLOW_DEPENDENCY_RESOLVER
31
- from parsl.dataflow.errors import BadCheckpoint, DependencyError, JoinError
31
+ from parsl.dataflow.errors import DependencyError, JoinError
32
32
  from parsl.dataflow.futures import AppFuture
33
33
  from parsl.dataflow.memoization import Memoizer
34
34
  from parsl.dataflow.rundirs import make_rundir
@@ -161,13 +161,13 @@ class DataFlowKernel:
161
161
  workflow_info))
162
162
 
163
163
  if config.checkpoint_files is not None:
164
- checkpoints = self.load_checkpoints(config.checkpoint_files)
164
+ checkpoint_files = config.checkpoint_files
165
165
  elif config.checkpoint_files is None and config.checkpoint_mode is not None:
166
- checkpoints = self.load_checkpoints(get_all_checkpoints(self.run_dir))
166
+ checkpoint_files = get_all_checkpoints(self.run_dir)
167
167
  else:
168
- checkpoints = {}
168
+ checkpoint_files = []
169
169
 
170
- self.memoizer = Memoizer(self, memoize=config.app_cache, checkpoint=checkpoints)
170
+ self.memoizer = Memoizer(self, memoize=config.app_cache, checkpoint_files=checkpoint_files)
171
171
  self.checkpointed_tasks = 0
172
172
  self._checkpoint_timer = None
173
173
  self.checkpoint_mode = config.checkpoint_mode
@@ -484,24 +484,18 @@ class DataFlowKernel:
484
484
 
485
485
  # now we know each joinable Future is done
486
486
  # so now look for any exceptions
487
- exceptions_tids: List[Tuple[BaseException, Optional[str]]]
487
+ exceptions_tids: List[Tuple[BaseException, str]]
488
488
  exceptions_tids = []
489
489
  if isinstance(joinable, Future):
490
490
  je = joinable.exception()
491
491
  if je is not None:
492
- if hasattr(joinable, 'task_record'):
493
- tid = joinable.task_record['id']
494
- else:
495
- tid = None
492
+ tid = self.render_future_description(joinable)
496
493
  exceptions_tids = [(je, tid)]
497
494
  elif isinstance(joinable, list):
498
495
  for future in joinable:
499
496
  je = future.exception()
500
497
  if je is not None:
501
- if hasattr(joinable, 'task_record'):
502
- tid = joinable.task_record['id']
503
- else:
504
- tid = None
498
+ tid = self.render_future_description(future)
505
499
  exceptions_tids.append((je, tid))
506
500
  else:
507
501
  raise TypeError(f"Unknown joinable type {type(joinable)}")
@@ -918,13 +912,7 @@ class DataFlowKernel:
918
912
  dep_failures = []
919
913
 
920
914
  def append_failure(e: Exception, dep: Future) -> None:
921
- # If this Future is associated with a task inside this DFK,
922
- # then refer to the task ID.
923
- # Otherwise make a repr of the Future object.
924
- if hasattr(dep, 'task_record') and dep.task_record['dfk'] == self:
925
- tid = "task " + repr(dep.task_record['id'])
926
- else:
927
- tid = repr(dep)
915
+ tid = self.render_future_description(dep)
928
916
  dep_failures.extend([(e, tid)])
929
917
 
930
918
  # Replace item in args
@@ -1076,10 +1064,7 @@ class DataFlowKernel:
1076
1064
 
1077
1065
  depend_descs = []
1078
1066
  for d in depends:
1079
- if isinstance(d, AppFuture) or isinstance(d, DataFuture):
1080
- depend_descs.append("task {}".format(d.tid))
1081
- else:
1082
- depend_descs.append(repr(d))
1067
+ depend_descs.append(self.render_future_description(d))
1083
1068
 
1084
1069
  if depend_descs != []:
1085
1070
  waiting_message = "waiting on {}".format(", ".join(depend_descs))
@@ -1215,10 +1200,8 @@ class DataFlowKernel:
1215
1200
  self._checkpoint_timer.close()
1216
1201
 
1217
1202
  # Send final stats
1218
- logger.info("Sending end message for usage tracking")
1219
1203
  self.usage_tracker.send_end_message()
1220
1204
  self.usage_tracker.close()
1221
- logger.info("Closed usage tracking")
1222
1205
 
1223
1206
  logger.info("Closing job status poller")
1224
1207
  self.job_status_poller.close()
@@ -1280,7 +1263,7 @@ class DataFlowKernel:
1280
1263
  Returns:
1281
1264
  Checkpoint dir if checkpoints were written successfully.
1282
1265
  By default the checkpoints are written to the RUNDIR of the current
1283
- run under RUNDIR/checkpoints/{tasks.pkl, dfk.pkl}
1266
+ run under RUNDIR/checkpoints/tasks.pkl
1284
1267
  """
1285
1268
  with self.checkpoint_lock:
1286
1269
  if tasks:
@@ -1290,18 +1273,11 @@ class DataFlowKernel:
1290
1273
  self.checkpointable_tasks = []
1291
1274
 
1292
1275
  checkpoint_dir = '{0}/checkpoint'.format(self.run_dir)
1293
- checkpoint_dfk = checkpoint_dir + '/dfk.pkl'
1294
1276
  checkpoint_tasks = checkpoint_dir + '/tasks.pkl'
1295
1277
 
1296
1278
  if not os.path.exists(checkpoint_dir):
1297
1279
  os.makedirs(checkpoint_dir, exist_ok=True)
1298
1280
 
1299
- with open(checkpoint_dfk, 'wb') as f:
1300
- state = {'rundir': self.run_dir,
1301
- 'task_count': self.task_count
1302
- }
1303
- pickle.dump(state, f)
1304
-
1305
1281
  count = 0
1306
1282
 
1307
1283
  with open(checkpoint_tasks, 'ab') as f:
@@ -1334,74 +1310,6 @@ class DataFlowKernel:
1334
1310
 
1335
1311
  return checkpoint_dir
1336
1312
 
1337
- def _load_checkpoints(self, checkpointDirs: Sequence[str]) -> Dict[str, Future[Any]]:
1338
- """Load a checkpoint file into a lookup table.
1339
-
1340
- The data being loaded from the pickle file mostly contains input
1341
- attributes of the task: func, args, kwargs, env...
1342
- To simplify the check of whether the exact task has been completed
1343
- in the checkpoint, we hash these input params and use it as the key
1344
- for the memoized lookup table.
1345
-
1346
- Args:
1347
- - checkpointDirs (list) : List of filepaths to checkpoints
1348
- Eg. ['runinfo/001', 'runinfo/002']
1349
-
1350
- Returns:
1351
- - memoized_lookup_table (dict)
1352
- """
1353
- memo_lookup_table = {}
1354
-
1355
- for checkpoint_dir in checkpointDirs:
1356
- logger.info("Loading checkpoints from {}".format(checkpoint_dir))
1357
- checkpoint_file = os.path.join(checkpoint_dir, 'tasks.pkl')
1358
- try:
1359
- with open(checkpoint_file, 'rb') as f:
1360
- while True:
1361
- try:
1362
- data = pickle.load(f)
1363
- # Copy and hash only the input attributes
1364
- memo_fu: Future = Future()
1365
- assert data['exception'] is None
1366
- memo_fu.set_result(data['result'])
1367
- memo_lookup_table[data['hash']] = memo_fu
1368
-
1369
- except EOFError:
1370
- # Done with the checkpoint file
1371
- break
1372
- except FileNotFoundError:
1373
- reason = "Checkpoint file was not found: {}".format(
1374
- checkpoint_file)
1375
- logger.error(reason)
1376
- raise BadCheckpoint(reason)
1377
- except Exception:
1378
- reason = "Failed to load checkpoint: {}".format(
1379
- checkpoint_file)
1380
- logger.error(reason)
1381
- raise BadCheckpoint(reason)
1382
-
1383
- logger.info("Completed loading checkpoint: {0} with {1} tasks".format(checkpoint_file,
1384
- len(memo_lookup_table.keys())))
1385
- return memo_lookup_table
1386
-
1387
- @typeguard.typechecked
1388
- def load_checkpoints(self, checkpointDirs: Optional[Sequence[str]]) -> Dict[str, Future]:
1389
- """Load checkpoints from the checkpoint files into a dictionary.
1390
-
1391
- The results are used to pre-populate the memoizer's lookup_table
1392
-
1393
- Kwargs:
1394
- - checkpointDirs (list) : List of run folder to use as checkpoints
1395
- Eg. ['runinfo/001', 'runinfo/002']
1396
-
1397
- Returns:
1398
- - dict containing, hashed -> future mappings
1399
- """
1400
- if checkpointDirs:
1401
- return self._load_checkpoints(checkpointDirs)
1402
- else:
1403
- return {}
1404
-
1405
1313
  @staticmethod
1406
1314
  def _log_std_streams(task_record: TaskRecord) -> None:
1407
1315
  tid = task_record['id']
@@ -1438,6 +1346,18 @@ class DataFlowKernel:
1438
1346
  '' if label is None else '_{}'.format(label),
1439
1347
  kw))
1440
1348
 
1349
+ def render_future_description(self, dep: Future) -> str:
1350
+ """Renders a description of the future in the context of the
1351
+ current DFK.
1352
+ """
1353
+ if isinstance(dep, AppFuture) and dep.task_record['dfk'] == self:
1354
+ tid = "task " + repr(dep.task_record['id'])
1355
+ elif isinstance(dep, DataFuture):
1356
+ tid = "DataFuture from task " + repr(dep.tid)
1357
+ else:
1358
+ tid = repr(dep)
1359
+ return tid
1360
+
1441
1361
 
1442
1362
  class DataFlowKernelLoader:
1443
1363
  """Manage which DataFlowKernel is active.
parsl/dataflow/errors.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Optional, Sequence, Tuple
1
+ from typing import List, Sequence, Tuple
2
2
 
3
3
  from parsl.errors import ParslError
4
4
 
@@ -29,35 +29,77 @@ class BadCheckpoint(DataFlowException):
29
29
  return self.reason
30
30
 
31
31
 
32
- class DependencyError(DataFlowException):
33
- """Error raised if an app cannot run because there was an error
34
- in a dependency.
32
+ class PropagatedException(DataFlowException):
33
+ """Error raised if an app fails because there was an error
34
+ in a related task. This is intended to be subclassed for
35
+ dependency and join_app errors.
35
36
 
36
37
  Args:
37
- - dependent_exceptions_tids: List of exceptions and identifiers for
38
- dependencies which failed. The identifier might be a task ID or
39
- the repr of a non-DFK Future.
38
+ - dependent_exceptions_tids: List of exceptions and brief descriptions
39
+ for dependencies which failed. The description might be a task ID or
40
+ the repr of a non-AppFuture.
40
41
  - task_id: Task ID of the task that failed because of the dependency error
41
42
  """
42
43
 
43
- def __init__(self, dependent_exceptions_tids: Sequence[Tuple[Exception, str]], task_id: int) -> None:
44
+ def __init__(self,
45
+ dependent_exceptions_tids: Sequence[Tuple[BaseException, str]],
46
+ task_id: int,
47
+ *,
48
+ failure_description: str) -> None:
44
49
  self.dependent_exceptions_tids = dependent_exceptions_tids
45
50
  self.task_id = task_id
51
+ self._failure_description = failure_description
52
+
53
+ (cause, cause_sequence) = self._find_any_root_cause()
54
+ self.__cause__ = cause
55
+ self._cause_sequence = cause_sequence
46
56
 
47
57
  def __str__(self) -> str:
48
- deps = ", ".join(tid for _exc, tid in self.dependent_exceptions_tids)
49
- return f"Dependency failure for task {self.task_id} with failed dependencies from {deps}"
58
+ sequence_text = " <- ".join(self._cause_sequence)
59
+ return f"{self._failure_description} for task {self.task_id}. " \
60
+ f"The representative cause is via {sequence_text}"
61
+
62
+ def _find_any_root_cause(self) -> Tuple[BaseException, List[str]]:
63
+ """Looks recursively through self.dependent_exceptions_tids to find
64
+ an exception that caused this propagated error, that is not itself
65
+ a propagated error.
66
+ """
67
+ e: BaseException = self
68
+ dep_ids = []
69
+ while isinstance(e, PropagatedException) and len(e.dependent_exceptions_tids) >= 1:
70
+ id_txt = e.dependent_exceptions_tids[0][1]
71
+ assert isinstance(id_txt, str)
72
+ # if there are several causes for this exception, label that
73
+ # there are more so that we know that the representative fail
74
+ # sequence is not the full story.
75
+ if len(e.dependent_exceptions_tids) > 1:
76
+ id_txt += " (+ others)"
77
+ dep_ids.append(id_txt)
78
+ e = e.dependent_exceptions_tids[0][0]
79
+ return e, dep_ids
80
+
81
+
82
+ class DependencyError(PropagatedException):
83
+ """Error raised if an app cannot run because there was an error
84
+ in a dependency. There can be several exceptions (one from each
85
+ dependency) and DependencyError collects them all together.
50
86
 
87
+ Args:
88
+ - dependent_exceptions_tids: List of exceptions and brief descriptions
89
+ for dependencies which failed. The description might be a task ID or
90
+ the repr of a non-AppFuture.
91
+ - task_id: Task ID of the task that failed because of the dependency error
92
+ """
93
+ def __init__(self, dependent_exceptions_tids: Sequence[Tuple[BaseException, str]], task_id: int) -> None:
94
+ super().__init__(dependent_exceptions_tids, task_id,
95
+ failure_description="Dependency failure")
51
96
 
52
- class JoinError(DataFlowException):
97
+
98
+ class JoinError(PropagatedException):
53
99
  """Error raised if apps joining into a join_app raise exceptions.
54
100
  There can be several exceptions (one from each joining app),
55
101
  and JoinError collects them all together.
56
102
  """
57
- def __init__(self, dependent_exceptions_tids: Sequence[Tuple[BaseException, Optional[str]]], task_id: int) -> None:
58
- self.dependent_exceptions_tids = dependent_exceptions_tids
59
- self.task_id = task_id
60
-
61
- def __str__(self) -> str:
62
- dep_tids = [tid for (exception, tid) in self.dependent_exceptions_tids]
63
- return "Join failure for task {} with failed join dependencies from tasks {}".format(self.task_id, dep_tids)
103
+ def __init__(self, dependent_exceptions_tids: Sequence[Tuple[BaseException, str]], task_id: int) -> None:
104
+ super().__init__(dependent_exceptions_tids, task_id,
105
+ failure_description="Join failure")
@@ -2,10 +2,14 @@ from __future__ import annotations
2
2
 
3
3
  import hashlib
4
4
  import logging
5
+ import os
5
6
  import pickle
6
7
  from functools import lru_cache, singledispatch
7
- from typing import TYPE_CHECKING, Any, Dict, List, Optional
8
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence
8
9
 
10
+ import typeguard
11
+
12
+ from parsl.dataflow.errors import BadCheckpoint
9
13
  from parsl.dataflow.taskrecord import TaskRecord
10
14
 
11
15
  if TYPE_CHECKING:
@@ -146,7 +150,7 @@ class Memoizer:
146
150
 
147
151
  """
148
152
 
149
- def __init__(self, dfk: DataFlowKernel, memoize: bool = True, checkpoint: Dict[str, Future[Any]] = {}):
153
+ def __init__(self, dfk: DataFlowKernel, *, memoize: bool = True, checkpoint_files: Sequence[str]):
150
154
  """Initialize the memoizer.
151
155
 
152
156
  Args:
@@ -159,6 +163,8 @@ class Memoizer:
159
163
  self.dfk = dfk
160
164
  self.memoize = memoize
161
165
 
166
+ checkpoint = self.load_checkpoints(checkpoint_files)
167
+
162
168
  if self.memoize:
163
169
  logger.info("App caching initialized")
164
170
  self.memo_lookup_table = checkpoint
@@ -274,3 +280,71 @@ class Memoizer:
274
280
  else:
275
281
  logger.debug(f"Storing app cache entry {task['hashsum']} with result from task {task_id}")
276
282
  self.memo_lookup_table[task['hashsum']] = r
283
+
284
+ def _load_checkpoints(self, checkpointDirs: Sequence[str]) -> Dict[str, Future[Any]]:
285
+ """Load a checkpoint file into a lookup table.
286
+
287
+ The data being loaded from the pickle file mostly contains input
288
+ attributes of the task: func, args, kwargs, env...
289
+ To simplify the check of whether the exact task has been completed
290
+ in the checkpoint, we hash these input params and use it as the key
291
+ for the memoized lookup table.
292
+
293
+ Args:
294
+ - checkpointDirs (list) : List of filepaths to checkpoints
295
+ Eg. ['runinfo/001', 'runinfo/002']
296
+
297
+ Returns:
298
+ - memoized_lookup_table (dict)
299
+ """
300
+ memo_lookup_table = {}
301
+
302
+ for checkpoint_dir in checkpointDirs:
303
+ logger.info("Loading checkpoints from {}".format(checkpoint_dir))
304
+ checkpoint_file = os.path.join(checkpoint_dir, 'tasks.pkl')
305
+ try:
306
+ with open(checkpoint_file, 'rb') as f:
307
+ while True:
308
+ try:
309
+ data = pickle.load(f)
310
+ # Copy and hash only the input attributes
311
+ memo_fu: Future = Future()
312
+ assert data['exception'] is None
313
+ memo_fu.set_result(data['result'])
314
+ memo_lookup_table[data['hash']] = memo_fu
315
+
316
+ except EOFError:
317
+ # Done with the checkpoint file
318
+ break
319
+ except FileNotFoundError:
320
+ reason = "Checkpoint file was not found: {}".format(
321
+ checkpoint_file)
322
+ logger.error(reason)
323
+ raise BadCheckpoint(reason)
324
+ except Exception:
325
+ reason = "Failed to load checkpoint: {}".format(
326
+ checkpoint_file)
327
+ logger.error(reason)
328
+ raise BadCheckpoint(reason)
329
+
330
+ logger.info("Completed loading checkpoint: {0} with {1} tasks".format(checkpoint_file,
331
+ len(memo_lookup_table.keys())))
332
+ return memo_lookup_table
333
+
334
+ @typeguard.typechecked
335
+ def load_checkpoints(self, checkpointDirs: Optional[Sequence[str]]) -> Dict[str, Future]:
336
+ """Load checkpoints from the checkpoint files into a dictionary.
337
+
338
+ The results are used to pre-populate the memoizer's lookup_table
339
+
340
+ Kwargs:
341
+ - checkpointDirs (list) : List of run folder to use as checkpoints
342
+ Eg. ['runinfo/001', 'runinfo/002']
343
+
344
+ Returns:
345
+ - dict containing, hashed -> future mappings
346
+ """
347
+ if checkpointDirs:
348
+ return self._load_checkpoints(checkpointDirs)
349
+ else:
350
+ return {}
@@ -43,12 +43,11 @@ class TaskRecord(TypedDict, total=False):
43
43
  executed on.
44
44
  """
45
45
 
46
- retries_left: int
47
46
  fail_count: int
48
47
  fail_cost: float
49
48
  fail_history: List[str]
50
49
 
51
- checkpoint: bool # this change is also in #1516
50
+ checkpoint: bool
52
51
  """Should this task be checkpointed?
53
52
  """
54
53
 
@@ -68,7 +67,6 @@ class TaskRecord(TypedDict, total=False):
68
67
 
69
68
  # these three could be more strongly typed perhaps but I'm not thinking about that now
70
69
  func: Callable
71
- fn_hash: str
72
70
  args: Sequence[Any]
73
71
  # in some places we uses a Tuple[Any, ...] and in some places a List[Any].
74
72
  # This is an attempt to correctly type both of those.
@@ -1,4 +1,5 @@
1
1
  from parsl.executors.flux.executor import FluxExecutor
2
+ from parsl.executors.globus_compute import GlobusComputeExecutor
2
3
  from parsl.executors.high_throughput.executor import HighThroughputExecutor
3
4
  from parsl.executors.high_throughput.mpi_executor import MPIExecutor
4
5
  from parsl.executors.threads import ThreadPoolExecutor
@@ -8,4 +9,5 @@ __all__ = ['ThreadPoolExecutor',
8
9
  'HighThroughputExecutor',
9
10
  'MPIExecutor',
10
11
  'WorkQueueExecutor',
11
- 'FluxExecutor']
12
+ 'FluxExecutor',
13
+ 'GlobusComputeExecutor']
@@ -0,0 +1,125 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ from concurrent.futures import Future
5
+ from typing import Any, Callable, Dict
6
+
7
+ import typeguard
8
+
9
+ from parsl.errors import OptionalModuleMissing
10
+ from parsl.executors.base import ParslExecutor
11
+ from parsl.utils import RepresentationMixin
12
+
13
+ try:
14
+ from globus_compute_sdk import Executor
15
+ _globus_compute_enabled = True
16
+ except ImportError:
17
+ _globus_compute_enabled = False
18
+
19
+
20
+ class GlobusComputeExecutor(ParslExecutor, RepresentationMixin):
21
+ """ GlobusComputeExecutor enables remote execution on Globus Compute endpoints
22
+
23
+ GlobusComputeExecutor is a thin wrapper over globus_compute_sdk.Executor
24
+ Refer to `globus-compute user documentation <https://globus-compute.readthedocs.io/en/latest/executor.html>`_
25
+ and `reference documentation <https://globus-compute.readthedocs.io/en/latest/reference/executor.html>`_
26
+ for more details.
27
+
28
+ .. note::
29
+ As a remote execution system, Globus Compute relies on serialization to ship
30
+ tasks and results between the Parsl client side and the remote Globus Compute
31
+ Endpoint side. Serialization is unreliable across python versions, and
32
+ wrappers used by Parsl assume identical Parsl versions across on both sides.
33
+ We recommend using matching Python, Parsl and Globus Compute version on both
34
+ the client side and the endpoint side for stable behavior.
35
+
36
+ """
37
+
38
+ @typeguard.typechecked
39
+ def __init__(
40
+ self,
41
+ executor: Executor,
42
+ label: str = 'GlobusComputeExecutor',
43
+ ):
44
+ """
45
+ Parameters
46
+ ----------
47
+
48
+ executor: globus_compute_sdk.Executor
49
+ Pass a globus_compute_sdk Executor that will be used to execute
50
+ tasks on a globus_compute endpoint. Refer to `globus-compute docs
51
+ <https://globus-compute.readthedocs.io/en/latest/reference/executor.html#globus-compute-executor>`_
52
+
53
+ label:
54
+ a label to name the executor
55
+ """
56
+ if not _globus_compute_enabled:
57
+ raise OptionalModuleMissing(
58
+ ['globus-compute-sdk'],
59
+ "GlobusComputeExecutor requires globus-compute-sdk installed"
60
+ )
61
+
62
+ super().__init__()
63
+ self.executor: Executor = executor
64
+ self.resource_specification = self.executor.resource_specification
65
+ self.user_endpoint_config = self.executor.user_endpoint_config
66
+ self.label = label
67
+
68
+ def start(self) -> None:
69
+ """ Start the Globus Compute Executor """
70
+ pass
71
+
72
+ def submit(self, func: Callable, resource_specification: Dict[str, Any], *args: Any, **kwargs: Any) -> Future:
73
+ """ Submit func to globus-compute
74
+
75
+
76
+ Parameters
77
+ ----------
78
+
79
+ func: Callable
80
+ Python function to execute remotely
81
+
82
+ resource_specification: Dict[str, Any]
83
+ Resource specification can be used specify MPI resources required by MPI applications on
84
+ Endpoints configured to use globus compute's MPIEngine. GCE also accepts *user_endpoint_config*
85
+ to configure endpoints when the endpoint is a `Multi-User Endpoint
86
+ <https://globus-compute.readthedocs.io/en/latest/endpoints/endpoints.html#templating-endpoint-configuration>`_
87
+
88
+ args:
89
+ Args to pass to the function
90
+
91
+ kwargs:
92
+ kwargs to pass to the function
93
+
94
+ Returns
95
+ -------
96
+
97
+ Future
98
+ """
99
+ res_spec = copy.deepcopy(resource_specification or self.resource_specification)
100
+ # Pop user_endpoint_config since it is illegal in resource_spec for globus_compute
101
+ if res_spec:
102
+ user_endpoint_config = res_spec.pop('user_endpoint_config', self.user_endpoint_config)
103
+ else:
104
+ user_endpoint_config = self.user_endpoint_config
105
+
106
+ try:
107
+ self.executor.resource_specification = res_spec
108
+ self.executor.user_endpoint_config = user_endpoint_config
109
+ return self.executor.submit(func, *args, **kwargs)
110
+ finally:
111
+ # Reset executor state to defaults set at configuration time
112
+ self.executor.resource_specification = self.resource_specification
113
+ self.executor.user_endpoint_config = self.user_endpoint_config
114
+
115
+ def shutdown(self):
116
+ """Clean-up the resources associated with the Executor.
117
+
118
+ GCE.shutdown will cancel all futures that have not yet registered with
119
+ Globus Compute and will not wait for the launched futures to complete.
120
+ This method explicitly shutsdown the result_watcher thread to avoid
121
+ it waiting for outstanding futures at thread exit.
122
+ """
123
+ self.executor.shutdown(wait=False, cancel_futures=True)
124
+ result_watcher = self.executor._get_result_watcher()
125
+ result_watcher.shutdown(wait=False, cancel_futures=True)
@@ -27,7 +27,7 @@ class VersionMismatch(Exception):
27
27
  def __str__(self) -> str:
28
28
  return (
29
29
  f"Manager version info {self.manager_version} does not match interchange"
30
- f" version info {self.interchange_version}, causing a critical failure"
30
+ f" version info {self.interchange_version}"
31
31
  )
32
32
 
33
33