fractal-server 2.14.4a0__py3-none-any.whl → 2.14.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/app/db/__init__.py +2 -2
  3. fractal_server/app/models/security.py +8 -8
  4. fractal_server/app/models/user_settings.py +8 -10
  5. fractal_server/app/models/v2/accounting.py +2 -3
  6. fractal_server/app/models/v2/dataset.py +1 -2
  7. fractal_server/app/models/v2/history.py +3 -4
  8. fractal_server/app/models/v2/job.py +10 -11
  9. fractal_server/app/models/v2/project.py +1 -2
  10. fractal_server/app/models/v2/task.py +13 -14
  11. fractal_server/app/models/v2/task_group.py +15 -16
  12. fractal_server/app/models/v2/workflow.py +1 -2
  13. fractal_server/app/models/v2/workflowtask.py +6 -7
  14. fractal_server/app/routes/admin/v2/accounting.py +3 -4
  15. fractal_server/app/routes/admin/v2/job.py +13 -14
  16. fractal_server/app/routes/admin/v2/project.py +2 -4
  17. fractal_server/app/routes/admin/v2/task.py +11 -13
  18. fractal_server/app/routes/admin/v2/task_group.py +15 -17
  19. fractal_server/app/routes/admin/v2/task_group_lifecycle.py +5 -8
  20. fractal_server/app/routes/api/v2/__init__.py +2 -0
  21. fractal_server/app/routes/api/v2/_aux_functions.py +7 -9
  22. fractal_server/app/routes/api/v2/_aux_functions_history.py +1 -1
  23. fractal_server/app/routes/api/v2/_aux_functions_task_lifecycle.py +1 -3
  24. fractal_server/app/routes/api/v2/_aux_functions_tasks.py +5 -6
  25. fractal_server/app/routes/api/v2/dataset.py +6 -8
  26. fractal_server/app/routes/api/v2/history.py +5 -8
  27. fractal_server/app/routes/api/v2/images.py +2 -3
  28. fractal_server/app/routes/api/v2/job.py +5 -6
  29. fractal_server/app/routes/api/v2/pre_submission_checks.py +1 -3
  30. fractal_server/app/routes/api/v2/project.py +2 -4
  31. fractal_server/app/routes/api/v2/status_legacy.py +2 -4
  32. fractal_server/app/routes/api/v2/submit.py +3 -4
  33. fractal_server/app/routes/api/v2/task.py +6 -7
  34. fractal_server/app/routes/api/v2/task_collection.py +11 -13
  35. fractal_server/app/routes/api/v2/task_collection_custom.py +4 -4
  36. fractal_server/app/routes/api/v2/task_group.py +6 -8
  37. fractal_server/app/routes/api/v2/task_group_lifecycle.py +6 -9
  38. fractal_server/app/routes/api/v2/task_version_update.py +270 -0
  39. fractal_server/app/routes/api/v2/workflow.py +5 -6
  40. fractal_server/app/routes/api/v2/workflow_import.py +3 -5
  41. fractal_server/app/routes/api/v2/workflowtask.py +2 -114
  42. fractal_server/app/routes/auth/current_user.py +2 -2
  43. fractal_server/app/routes/pagination.py +2 -3
  44. fractal_server/app/runner/exceptions.py +16 -22
  45. fractal_server/app/runner/executors/base_runner.py +19 -7
  46. fractal_server/app/runner/executors/call_command_wrapper.py +52 -0
  47. fractal_server/app/runner/executors/local/get_local_config.py +2 -3
  48. fractal_server/app/runner/executors/local/runner.py +52 -13
  49. fractal_server/app/runner/executors/slurm_common/_batching.py +2 -3
  50. fractal_server/app/runner/executors/slurm_common/_slurm_config.py +27 -29
  51. fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py +95 -63
  52. fractal_server/app/runner/executors/slurm_common/get_slurm_config.py +2 -3
  53. fractal_server/app/runner/executors/slurm_common/remote.py +47 -92
  54. fractal_server/app/runner/executors/slurm_common/slurm_job_task_models.py +22 -22
  55. fractal_server/app/runner/executors/slurm_ssh/run_subprocess.py +2 -3
  56. fractal_server/app/runner/executors/slurm_ssh/runner.py +4 -6
  57. fractal_server/app/runner/executors/slurm_sudo/_subprocess_run_as_user.py +2 -6
  58. fractal_server/app/runner/executors/slurm_sudo/runner.py +9 -18
  59. fractal_server/app/runner/set_start_and_last_task_index.py +2 -5
  60. fractal_server/app/runner/shutdown.py +5 -11
  61. fractal_server/app/runner/task_files.py +3 -13
  62. fractal_server/app/runner/v2/_local.py +3 -4
  63. fractal_server/app/runner/v2/_slurm_ssh.py +5 -7
  64. fractal_server/app/runner/v2/_slurm_sudo.py +8 -10
  65. fractal_server/app/runner/v2/runner.py +4 -5
  66. fractal_server/app/runner/v2/runner_functions.py +20 -35
  67. fractal_server/app/runner/v2/submit_workflow.py +7 -10
  68. fractal_server/app/runner/v2/task_interface.py +2 -3
  69. fractal_server/app/runner/versions.py +3 -13
  70. fractal_server/app/schemas/user.py +2 -4
  71. fractal_server/app/schemas/user_group.py +1 -2
  72. fractal_server/app/schemas/user_settings.py +19 -21
  73. fractal_server/app/schemas/v2/dataset.py +2 -3
  74. fractal_server/app/schemas/v2/dumps.py +13 -15
  75. fractal_server/app/schemas/v2/history.py +6 -7
  76. fractal_server/app/schemas/v2/job.py +17 -18
  77. fractal_server/app/schemas/v2/manifest.py +12 -13
  78. fractal_server/app/schemas/v2/status_legacy.py +2 -2
  79. fractal_server/app/schemas/v2/task.py +29 -30
  80. fractal_server/app/schemas/v2/task_collection.py +8 -9
  81. fractal_server/app/schemas/v2/task_group.py +22 -23
  82. fractal_server/app/schemas/v2/workflow.py +1 -2
  83. fractal_server/app/schemas/v2/workflowtask.py +27 -29
  84. fractal_server/app/security/__init__.py +10 -12
  85. fractal_server/config.py +32 -42
  86. fractal_server/images/models.py +2 -4
  87. fractal_server/images/tools.py +4 -7
  88. fractal_server/logger.py +3 -5
  89. fractal_server/ssh/_fabric.py +41 -13
  90. fractal_server/string_tools.py +2 -2
  91. fractal_server/syringe.py +1 -1
  92. fractal_server/tasks/v2/local/collect.py +2 -3
  93. fractal_server/tasks/v2/local/deactivate.py +1 -1
  94. fractal_server/tasks/v2/local/reactivate.py +1 -1
  95. fractal_server/tasks/v2/ssh/collect.py +256 -245
  96. fractal_server/tasks/v2/ssh/deactivate.py +210 -187
  97. fractal_server/tasks/v2/ssh/reactivate.py +154 -146
  98. fractal_server/tasks/v2/utils_background.py +2 -3
  99. fractal_server/types/__init__.py +1 -2
  100. fractal_server/types/validators/_filter_validators.py +1 -2
  101. fractal_server/utils.py +4 -5
  102. fractal_server/zip_tools.py +1 -1
  103. {fractal_server-2.14.4a0.dist-info → fractal_server-2.14.6.dist-info}/METADATA +2 -9
  104. {fractal_server-2.14.4a0.dist-info → fractal_server-2.14.6.dist-info}/RECORD +107 -108
  105. fractal_server/app/history/__init__.py +0 -0
  106. fractal_server/app/runner/executors/slurm_common/utils_executors.py +0 -58
  107. fractal_server/app/runner/v2/runner_functions_low_level.py +0 -122
  108. {fractal_server-2.14.4a0.dist-info → fractal_server-2.14.6.dist-info}/LICENSE +0 -0
  109. {fractal_server-2.14.4a0.dist-info → fractal_server-2.14.6.dist-info}/WHEEL +0 -0
  110. {fractal_server-2.14.4a0.dist-info → fractal_server-2.14.6.dist-info}/entry_points.txt +0 -0
@@ -5,9 +5,9 @@ import time
5
5
  from pathlib import Path
6
6
  from typing import Any
7
7
  from typing import Literal
8
- from typing import Optional
9
8
 
10
- import cloudpickle
9
+ from pydantic import BaseModel
10
+ from pydantic import ConfigDict
11
11
 
12
12
  from ..slurm_common._slurm_config import SlurmConfig
13
13
  from ..slurm_common.slurm_job_task_models import SlurmJob
@@ -36,6 +36,17 @@ SHUTDOWN_EXCEPTION = JobExecutionError(SHUTDOWN_ERROR_MESSAGE)
36
36
  logger = set_logger(__name__)
37
37
 
38
38
 
39
+ class RemoteInputData(BaseModel):
40
+ model_config = ConfigDict(extra="forbid")
41
+
42
+ python_version: tuple[int, int, int]
43
+ fractal_server_version: str
44
+ full_command: str
45
+
46
+ metadiff_file_remote: str
47
+ log_file_remote: str
48
+
49
+
39
50
  def create_accounting_record_slurm(
40
51
  *,
41
52
  user_id: int,
@@ -69,9 +80,9 @@ class BaseSlurmRunner(BaseRunner):
69
80
  root_dir_remote: Path,
70
81
  slurm_runner_type: Literal["ssh", "sudo"],
71
82
  python_worker_interpreter: str,
72
- common_script_lines: Optional[list[str]] = None,
73
- user_cache_dir: Optional[str] = None,
74
- poll_interval: Optional[int] = None,
83
+ common_script_lines: list[str] | None = None,
84
+ user_cache_dir: str | None = None,
85
+ poll_interval: int | None = None,
75
86
  ):
76
87
  self.slurm_runner_type = slurm_runner_type
77
88
  self.root_dir_local = root_dir_local
@@ -121,7 +132,6 @@ class BaseSlurmRunner(BaseRunner):
121
132
  raise NotImplementedError("Implement in child class.")
122
133
 
123
134
  def _get_finished_jobs(self, job_ids: list[str]) -> set[str]:
124
-
125
135
  # If there is no Slurm job to check, return right away
126
136
  if not job_ids:
127
137
  return set()
@@ -168,59 +178,73 @@ class BaseSlurmRunner(BaseRunner):
168
178
 
169
179
  def _submit_single_sbatch(
170
180
  self,
171
- func,
181
+ *,
182
+ base_command: str,
172
183
  slurm_job: SlurmJob,
173
184
  slurm_config: SlurmConfig,
174
185
  ) -> str:
175
186
  logger.debug("[_submit_single_sbatch] START")
176
- # Prepare input pickle(s)
177
- versions = dict(
178
- python=sys.version_info[:3],
179
- cloudpickle=cloudpickle.__version__,
180
- fractal_server=__VERSION__,
181
- )
187
+
182
188
  for task in slurm_job.tasks:
183
- # Write input pickle
184
- _args = []
185
- _kwargs = dict(
186
- parameters=task.parameters,
187
- remote_files=task.task_files.remote_files_dict,
189
+ # Write input file
190
+ if self.slurm_runner_type == "ssh":
191
+ args_file_remote = task.task_files.args_file_remote
192
+ else:
193
+ args_file_remote = task.task_files.args_file_local
194
+ metadiff_file_remote = task.task_files.metadiff_file_remote
195
+ full_command = (
196
+ f"{base_command} "
197
+ f"--args-json {args_file_remote} "
198
+ f"--out-json {metadiff_file_remote}"
188
199
  )
189
- funcser = cloudpickle.dumps((versions, func, _args, _kwargs))
190
- with open(task.input_pickle_file_local, "wb") as f:
191
- f.write(funcser)
200
+
201
+ input_data = RemoteInputData(
202
+ full_command=full_command,
203
+ python_version=sys.version_info[:3],
204
+ fractal_server_version=__VERSION__,
205
+ metadiff_file_remote=task.task_files.metadiff_file_remote,
206
+ log_file_remote=task.task_files.log_file_remote,
207
+ )
208
+
209
+ with open(task.input_file_local, "w") as f:
210
+ json.dump(input_data.model_dump(), f, indent=2)
211
+
212
+ with open(task.task_files.args_file_local, "w") as f:
213
+ json.dump(task.parameters, f, indent=2)
214
+
192
215
  logger.debug(
193
- "[_submit_single_sbatch] Written "
194
- f"{task.input_pickle_file_local=}"
216
+ "[_submit_single_sbatch] Written " f"{task.input_file_local=}"
195
217
  )
196
218
 
197
219
  if self.slurm_runner_type == "ssh":
198
- # Send input pickle (only relevant for SSH)
220
+ # Send input file (only relevant for SSH)
221
+ self.fractal_ssh.send_file(
222
+ local=task.input_file_local,
223
+ remote=task.input_file_remote,
224
+ )
199
225
  self.fractal_ssh.send_file(
200
- local=task.input_pickle_file_local,
201
- remote=task.input_pickle_file_remote,
226
+ local=task.task_files.args_file_local,
227
+ remote=task.task_files.args_file_remote,
202
228
  )
203
229
  logger.debug(
204
230
  "[_submit_single_sbatch] Transferred "
205
- f"{task.input_pickle_file_local=}"
231
+ f"{task.input_file_local=}"
206
232
  )
207
233
 
208
234
  # Prepare commands to be included in SLURM submission script
209
235
  cmdlines = []
210
236
  for task in slurm_job.tasks:
211
237
  if self.slurm_runner_type == "ssh":
212
- input_pickle_file = task.input_pickle_file_remote
238
+ input_file = task.input_file_remote
213
239
  else:
214
- input_pickle_file = task.input_pickle_file_local
215
- output_pickle_file = task.output_pickle_file_remote
240
+ input_file = task.input_file_local
241
+ output_file = task.output_file_remote
216
242
  cmdlines.append(
217
- (
218
- f"{self.python_worker_interpreter}"
219
- " -m fractal_server.app.runner."
220
- "executors.slurm_common.remote "
221
- f"--input-file {input_pickle_file} "
222
- f"--output-file {output_pickle_file}"
223
- )
243
+ f"{self.python_worker_interpreter}"
244
+ " -m fractal_server.app.runner."
245
+ "executors.slurm_common.remote "
246
+ f"--input-file {input_file} "
247
+ f"--output-file {output_file}"
224
248
  )
225
249
 
226
250
  # Set ntasks
@@ -363,12 +387,12 @@ class BaseSlurmRunner(BaseRunner):
363
387
  was_job_scancelled: bool = False,
364
388
  ) -> tuple[Any, Exception]:
365
389
  try:
366
- with open(task.output_pickle_file_local, "rb") as f:
367
- outdata = f.read()
368
- success, output = cloudpickle.loads(outdata)
390
+ with open(task.output_file_local) as f:
391
+ output = json.load(f)
392
+ success = output[0]
369
393
  if success:
370
394
  # Task succeeded
371
- result = output
395
+ result = output[1]
372
396
  return (result, None)
373
397
  else:
374
398
  # Task failed in a controlled way, and produced an `output`
@@ -376,21 +400,18 @@ class BaseSlurmRunner(BaseRunner):
376
400
  # `exc_type_name` and `traceback_string` and with optional
377
401
  # keys `workflow_task_order`, `workflow_task_id` and
378
402
  # `task_name`.
379
- exc_type_name = output.get("exc_type_name")
403
+ exc_proxy = output[1]
404
+ exc_type_name = exc_proxy.get("exc_type_name")
380
405
  logger.debug(
381
- f"Output pickle contains a '{exc_type_name}' exception."
406
+ f"Output file contains a '{exc_type_name}' exception."
407
+ )
408
+ traceback_string = output[1].get("traceback_string")
409
+ exception = TaskExecutionError(
410
+ traceback_string,
411
+ workflow_task_id=task.workflow_task_id,
412
+ workflow_task_order=task.workflow_task_order,
413
+ task_name=task.task_name,
382
414
  )
383
- traceback_string = output.get("traceback_string")
384
- kwargs = {
385
- key: output[key]
386
- for key in [
387
- "workflow_task_order",
388
- "workflow_task_id",
389
- "task_name",
390
- ]
391
- if key in output.keys()
392
- }
393
- exception = TaskExecutionError(traceback_string, **kwargs)
394
415
  return (None, exception)
395
416
 
396
417
  except Exception as e:
@@ -405,8 +426,8 @@ class BaseSlurmRunner(BaseRunner):
405
426
  exception = SHUTDOWN_EXCEPTION
406
427
  return (None, exception)
407
428
  finally:
408
- Path(task.input_pickle_file_local).unlink(missing_ok=True)
409
- Path(task.output_pickle_file_local).unlink(missing_ok=True)
429
+ Path(task.input_file_local).unlink(missing_ok=True)
430
+ Path(task.output_file_local).unlink(missing_ok=True)
410
431
 
411
432
  def is_shutdown(self) -> bool:
412
433
  return self.shutdown_file.exists()
@@ -451,7 +472,10 @@ class BaseSlurmRunner(BaseRunner):
451
472
 
452
473
  def submit(
453
474
  self,
454
- func: callable,
475
+ base_command: str,
476
+ workflow_task_order: int,
477
+ workflow_task_id: int,
478
+ task_name: str,
455
479
  parameters: dict[str, Any],
456
480
  history_unit_id: int,
457
481
  task_files: TaskFiles,
@@ -507,13 +531,16 @@ class BaseSlurmRunner(BaseRunner):
507
531
  workdir_remote=workdir_remote,
508
532
  workdir_local=workdir_local,
509
533
  task_files=task_files,
534
+ workflow_task_order=workflow_task_order,
535
+ workflow_task_id=workflow_task_id,
536
+ task_name=task_name,
510
537
  )
511
538
  ],
512
539
  )
513
540
 
514
541
  config.parallel_tasks_per_job = 1
515
542
  self._submit_single_sbatch(
516
- func,
543
+ base_command=base_command,
517
544
  slurm_job=slurm_job,
518
545
  slurm_config=config,
519
546
  )
@@ -586,7 +613,10 @@ class BaseSlurmRunner(BaseRunner):
586
613
 
587
614
  def multisubmit(
588
615
  self,
589
- func: callable,
616
+ base_command: str,
617
+ workflow_task_order: int,
618
+ workflow_task_id: int,
619
+ task_name: str,
590
620
  list_parameters: list[dict],
591
621
  history_unit_ids: list[int],
592
622
  list_task_files: list[TaskFiles],
@@ -602,7 +632,6 @@ class BaseSlurmRunner(BaseRunner):
602
632
 
603
633
  logger.debug(f"[multisubmit] START, {len(list_parameters)=}")
604
634
  try:
605
-
606
635
  if self.is_shutdown():
607
636
  if task_type == "parallel":
608
637
  with next(get_sync_db()) as db:
@@ -672,6 +701,9 @@ class BaseSlurmRunner(BaseRunner):
672
701
  parameters=parameters,
673
702
  zarr_url=parameters["zarr_url"],
674
703
  task_files=list_task_files[index],
704
+ workflow_task_order=workflow_task_order,
705
+ workflow_task_id=workflow_task_id,
706
+ task_name=task_name,
675
707
  ),
676
708
  )
677
709
  jobs_to_submit.append(
@@ -687,7 +719,7 @@ class BaseSlurmRunner(BaseRunner):
687
719
  logger.debug("[multisubmit] Transfer files and submit jobs.")
688
720
  for slurm_job in jobs_to_submit:
689
721
  self._submit_single_sbatch(
690
- func,
722
+ base_command=base_command,
691
723
  slurm_job=slurm_job,
692
724
  slurm_config=config,
693
725
  )
@@ -850,8 +882,8 @@ class BaseSlurmRunner(BaseRunner):
850
882
  """
851
883
  Check that a list of `SlurmJob`s have homogeneous working folders.
852
884
  """
853
- set_workdir_local = set(_job.workdir_local for _job in slurm_jobs)
854
- set_workdir_remote = set(_job.workdir_remote for _job in slurm_jobs)
885
+ set_workdir_local = {_job.workdir_local for _job in slurm_jobs}
886
+ set_workdir_remote = {_job.workdir_remote for _job in slurm_jobs}
855
887
  if len(set_workdir_local) > 1:
856
888
  raise ValueError(f"Non-unique values in {set_workdir_local=}.")
857
889
  if len(set_workdir_remote) > 1:
@@ -1,6 +1,5 @@
1
1
  from pathlib import Path
2
2
  from typing import Literal
3
- from typing import Optional
4
3
 
5
4
  from ._batching import heuristics
6
5
  from ._slurm_config import _parse_mem_value
@@ -14,7 +13,7 @@ from fractal_server.app.models.v2 import WorkflowTaskV2
14
13
  def get_slurm_config_internal(
15
14
  wftask: WorkflowTaskV2,
16
15
  which_type: Literal["non_parallel", "parallel"],
17
- config_path: Optional[Path] = None,
16
+ config_path: Path | None = None,
18
17
  ) -> SlurmConfig:
19
18
  """
20
19
  Prepare a `SlurmConfig` configuration object
@@ -168,7 +167,7 @@ def get_slurm_config_internal(
168
167
  def get_slurm_config(
169
168
  wftask: WorkflowTaskV2,
170
169
  which_type: Literal["non_parallel", "parallel"],
171
- config_path: Optional[Path] = None,
170
+ config_path: Path | None = None,
172
171
  tot_tasks: int = 1,
173
172
  ) -> SlurmConfig:
174
173
  config = get_slurm_config_internal(
@@ -1,27 +1,10 @@
1
- # This adapts clusterfutures <https://github.com/sampsyo/clusterfutures>
2
- # Original Copyright
3
- # Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
4
- # License: MIT
5
- #
6
- # Modified by:
7
- # Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
8
- # Tommaso Comparin <tommaso.comparin@exact-lab.it>
9
- #
10
- # Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
11
- # University of Zurich
12
- """
13
- This module provides a simple self-standing script that executes arbitrary
14
- python code received via pickled files on a cluster node.
15
- """
16
1
  import argparse
2
+ import json
17
3
  import logging
18
4
  import os
19
5
  import sys
20
- from typing import Literal
21
- from typing import Union
22
-
23
- import cloudpickle
24
6
 
7
+ from ..call_command_wrapper import call_command_wrapper
25
8
  from fractal_server import __VERSION__
26
9
 
27
10
 
@@ -33,59 +16,6 @@ class FractalVersionMismatch(RuntimeError):
33
16
  pass
34
17
 
35
18
 
36
- def _check_versions_mismatch(
37
- server_versions: dict[
38
- Literal["python", "fractal_server", "cloudpickle"],
39
- Union[str, tuple[int]],
40
- ]
41
- ):
42
- """
43
- Compare the server {python,cloudpickle,fractal_server} versions with the
44
- ones available to the current worker
45
-
46
- Arguments:
47
- server_versions:
48
- The version used in the fractal-server instance that created the
49
- cloudpickle file
50
-
51
- Raises:
52
- FractalVersionMismatch: If the cloudpickle or fractal_server versions
53
- do not match with the ones on the server
54
- """
55
-
56
- server_python_version = list(server_versions["python"])
57
- worker_python_version = list(sys.version_info[:3])
58
- if worker_python_version != server_python_version:
59
- if worker_python_version[:2] != server_python_version[:2]:
60
- # FIXME: Turn this into an error, in some version post 2.14.
61
- logging.error(
62
- f"{server_python_version=} but {worker_python_version=}. "
63
- "This configuration will be deprecated in a future version, "
64
- "please contact the admin of this Fractal instance."
65
- )
66
- else:
67
- # Major.minor versions match, patch versions differ
68
- logging.warning(
69
- f"{server_python_version=} but {worker_python_version=}."
70
- )
71
-
72
- server_cloudpickle_version = server_versions["cloudpickle"]
73
- worker_cloudpickle_version = cloudpickle.__version__
74
- if worker_cloudpickle_version != server_cloudpickle_version:
75
- raise FractalVersionMismatch(
76
- f"{server_cloudpickle_version=} but "
77
- f"{worker_cloudpickle_version=}"
78
- )
79
-
80
- server_fractal_server_version = server_versions["fractal_server"]
81
- worker_fractal_server_version = __VERSION__
82
- if worker_fractal_server_version != server_fractal_server_version:
83
- raise FractalVersionMismatch(
84
- f"{server_fractal_server_version=} but "
85
- f"{worker_fractal_server_version=}"
86
- )
87
-
88
-
89
19
  def worker(
90
20
  *,
91
21
  in_fname: str,
@@ -95,8 +25,8 @@ def worker(
95
25
  Execute a job, possibly on a remote node.
96
26
 
97
27
  Arguments:
98
- in_fname: Absolute path to the input pickle file (must be readable).
99
- out_fname: Absolute path of the output pickle file (must be writeable).
28
+ in_fname: Absolute path to the input file (must be readable).
29
+ out_fname: Absolute path of the output file (must be writeable).
100
30
  """
101
31
 
102
32
  # Create output folder, if missing
@@ -107,19 +37,49 @@ def worker(
107
37
 
108
38
  # Execute the job and capture exceptions
109
39
  try:
110
- with open(in_fname, "rb") as f:
111
- indata = f.read()
112
- server_versions, fun, args, kwargs = cloudpickle.loads(indata)
113
- _check_versions_mismatch(server_versions)
40
+ with open(in_fname) as f:
41
+ input_data = json.load(f)
42
+
43
+ server_python_version = input_data["python_version"]
44
+ server_fractal_server_version = input_data["fractal_server_version"]
45
+
46
+ # Fractal-server version must be identical
47
+ worker_fractal_server_version = __VERSION__
48
+ if worker_fractal_server_version != server_fractal_server_version:
49
+ raise FractalVersionMismatch(
50
+ f"{server_fractal_server_version=} but "
51
+ f"{worker_fractal_server_version=}"
52
+ )
53
+
54
+ # Python version mismatch only raises a warning
55
+ worker_python_version = tuple(sys.version_info[:3])
56
+ if worker_python_version != server_python_version:
57
+ if worker_python_version[:2] != server_python_version[:2]:
58
+ logging.warning(
59
+ f"{server_python_version=} but {worker_python_version=}."
60
+ )
61
+
62
+ # Extract some useful paths
63
+ metadiff_file_remote = input_data["metadiff_file_remote"]
64
+ log_path = input_data["log_file_remote"]
65
+
66
+ # Execute command
67
+ full_command = input_data["full_command"]
68
+ call_command_wrapper(cmd=full_command, log_path=log_path)
69
+
70
+ try:
71
+ with open(metadiff_file_remote) as f:
72
+ out_meta = json.load(f)
73
+ result = (True, out_meta)
74
+ except FileNotFoundError:
75
+ # Command completed, but it produced no metadiff file
76
+ result = (True, None)
114
77
 
115
- result = (True, fun(*args, **kwargs))
116
- out = cloudpickle.dumps(result)
117
78
  except Exception as e:
118
79
  # Exception objects are not serialisable. Here we save the relevant
119
80
  # exception contents in a serializable dictionary. Note that whenever
120
81
  # the task failed "properly", the exception is a `TaskExecutionError`
121
82
  # and it has additional attributes.
122
-
123
83
  import traceback
124
84
 
125
85
  exc_type, exc_value, traceback_obj = sys.exc_info()
@@ -131,33 +91,28 @@ def worker(
131
91
  )
132
92
  traceback_string = "".join(traceback_list)
133
93
  exc_proxy = dict(
134
- exc_type_name=exc_type.__name__,
94
+ exc_type_name=type(e).__name__,
135
95
  traceback_string=traceback_string,
136
- workflow_task_order=getattr(e, "workflow_task_order", None),
137
- workflow_task_id=getattr(e, "workflow_task_id", None),
138
- task_name=getattr(e, "task_name", None),
139
96
  )
140
97
  result = (False, exc_proxy)
141
- out = cloudpickle.dumps(result)
142
98
 
143
- # Write the output pickle file
144
- with open(out_fname, "wb") as f:
145
- f.write(out)
99
+ # Write output file
100
+ with open(out_fname, "w") as f:
101
+ json.dump(result, f, indent=2)
146
102
 
147
103
 
148
104
  if __name__ == "__main__":
149
-
150
105
  parser = argparse.ArgumentParser()
151
106
  parser.add_argument(
152
107
  "--input-file",
153
108
  type=str,
154
- help="Path of input pickle file",
109
+ help="Path of input JSON file",
155
110
  required=True,
156
111
  )
157
112
  parser.add_argument(
158
113
  "--output-file",
159
114
  type=str,
160
- help="Path of output pickle file",
115
+ help="Path of output JSON file",
161
116
  required=True,
162
117
  )
163
118
  parsed_args = parser.parse_args()
@@ -1,6 +1,5 @@
1
1
  from pathlib import Path
2
2
  from typing import Any
3
- from typing import Optional
4
3
 
5
4
  from pydantic import BaseModel
6
5
  from pydantic import ConfigDict
@@ -15,56 +14,57 @@ class SlurmTask(BaseModel):
15
14
  workdir_local: Path
16
15
  workdir_remote: Path
17
16
  parameters: dict[str, Any]
18
- zarr_url: Optional[str] = None
17
+ zarr_url: str | None = None
19
18
  task_files: TaskFiles
20
19
  index: int
21
20
 
21
+ workflow_task_order: int
22
+ workflow_task_id: int
23
+ task_name: str
24
+
22
25
  @property
23
- def input_pickle_file_local_path(self) -> Path:
26
+ def input_file_local_path(self) -> Path:
24
27
  return (
25
- self.workdir_local / f"{self.prefix}-{self.component}-input.pickle"
28
+ self.workdir_local / f"{self.prefix}-{self.component}-input.json"
26
29
  )
27
30
 
28
31
  @property
29
- def input_pickle_file_remote_path(self) -> Path:
32
+ def input_file_remote_path(self) -> Path:
30
33
  return (
31
- self.workdir_remote
32
- / f"{self.prefix}-{self.component}-input.pickle"
34
+ self.workdir_remote / f"{self.prefix}-{self.component}-input.json"
33
35
  )
34
36
 
35
37
  @property
36
- def output_pickle_file_local_path(self) -> Path:
38
+ def output_file_local_path(self) -> Path:
37
39
  return (
38
- self.workdir_local
39
- / f"{self.prefix}-{self.component}-output.pickle"
40
+ self.workdir_local / f"{self.prefix}-{self.component}-output.json"
40
41
  )
41
42
 
42
43
  @property
43
- def output_pickle_file_remote_path(self) -> Path:
44
+ def output_file_remote_path(self) -> Path:
44
45
  return (
45
- self.workdir_remote
46
- / f"{self.prefix}-{self.component}-output.pickle"
46
+ self.workdir_remote / f"{self.prefix}-{self.component}-output.json"
47
47
  )
48
48
 
49
49
  @property
50
- def input_pickle_file_local(self) -> str:
51
- return self.input_pickle_file_local_path.as_posix()
50
+ def input_file_local(self) -> str:
51
+ return self.input_file_local_path.as_posix()
52
52
 
53
53
  @property
54
- def input_pickle_file_remote(self) -> str:
55
- return self.input_pickle_file_remote_path.as_posix()
54
+ def input_file_remote(self) -> str:
55
+ return self.input_file_remote_path.as_posix()
56
56
 
57
57
  @property
58
- def output_pickle_file_local(self) -> str:
59
- return self.output_pickle_file_local_path.as_posix()
58
+ def output_file_local(self) -> str:
59
+ return self.output_file_local_path.as_posix()
60
60
 
61
61
  @property
62
- def output_pickle_file_remote(self) -> str:
63
- return self.output_pickle_file_remote_path.as_posix()
62
+ def output_file_remote(self) -> str:
63
+ return self.output_file_remote_path.as_posix()
64
64
 
65
65
 
66
66
  class SlurmJob(BaseModel):
67
- slurm_job_id: Optional[str] = None
67
+ slurm_job_id: str | None = None
68
68
  prefix: str
69
69
  workdir_local: Path
70
70
  workdir_remote: Path
@@ -1,6 +1,5 @@
1
1
  import shlex
2
2
  import subprocess # nosec
3
- from typing import Optional
4
3
 
5
4
  from fractal_server.logger import get_logger
6
5
  from fractal_server.string_tools import validate_cmd
@@ -8,8 +7,8 @@ from fractal_server.string_tools import validate_cmd
8
7
 
9
8
  def run_subprocess(
10
9
  cmd: str,
11
- allow_char: Optional[str] = None,
12
- logger_name: Optional[str] = None,
10
+ allow_char: str | None = None,
11
+ logger_name: str | None = None,
13
12
  ) -> subprocess.CompletedProcess:
14
13
  validate_cmd(cmd, allow_char=allow_char)
15
14
  logger = get_logger(logger_name)
@@ -1,6 +1,5 @@
1
1
  import time
2
2
  from pathlib import Path
3
- from typing import Optional
4
3
 
5
4
  from ..slurm_common.base_slurm_runner import BaseSlurmRunner
6
5
  from ..slurm_common.slurm_job_task_models import SlurmJob
@@ -27,9 +26,9 @@ class SlurmSSHRunner(BaseSlurmRunner):
27
26
  # Common
28
27
  root_dir_local: Path,
29
28
  root_dir_remote: Path,
30
- common_script_lines: Optional[list[str]] = None,
31
- user_cache_dir: Optional[str] = None,
32
- poll_interval: Optional[int] = None,
29
+ common_script_lines: list[str] | None = None,
30
+ user_cache_dir: str | None = None,
31
+ poll_interval: int | None = None,
33
32
  # Specific
34
33
  fractal_ssh: FractalSSH,
35
34
  ) -> None:
@@ -99,9 +98,8 @@ class SlurmSSHRunner(BaseSlurmRunner):
99
98
  for task in _slurm_job.tasks:
100
99
  _single_job_filelist.extend(
101
100
  [
102
- task.output_pickle_file_remote_path.name,
101
+ task.output_file_remote_path.name,
103
102
  task.task_files.log_file_remote_path.name,
104
- task.task_files.args_file_remote_path.name,
105
103
  task.task_files.metadiff_file_remote_path.name,
106
104
  ]
107
105
  )
@@ -17,7 +17,6 @@ another user. Note that this requires appropriate sudo permissions.
17
17
  """
18
18
  import shlex
19
19
  import subprocess # nosec
20
- from typing import Optional
21
20
 
22
21
  from fractal_server.logger import set_logger
23
22
  from fractal_server.string_tools import validate_cmd
@@ -28,8 +27,7 @@ logger = set_logger(__name__)
28
27
  def _run_command_as_user(
29
28
  *,
30
29
  cmd: str,
31
- user: Optional[str] = None,
32
- encoding: Optional[str] = "utf-8",
30
+ user: str | None = None,
33
31
  check: bool = False,
34
32
  ) -> subprocess.CompletedProcess:
35
33
  """
@@ -38,8 +36,6 @@ def _run_command_as_user(
38
36
  Arguments:
39
37
  cmd: Command to be run
40
38
  user: User to be impersonated
41
- encoding: Argument for `subprocess.run`. Note that this must be `None`
42
- to have stdout/stderr as bytes.
43
39
  check: If `True`, check that `returncode=0` and fail otherwise.
44
40
 
45
41
  Raises:
@@ -57,7 +53,7 @@ def _run_command_as_user(
57
53
  res = subprocess.run( # nosec
58
54
  shlex.split(new_cmd),
59
55
  capture_output=True,
60
- encoding=encoding,
56
+ encoding="utf-8",
61
57
  )
62
58
  logger.debug(f"[_run_command_as_user] {res.returncode=}")
63
59
  logger.debug(f"[_run_command_as_user] {res.stdout=}")