hpcflow 0.1.15__py3-none-any.whl → 0.2.0a271__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__init__.py +2 -11
- hpcflow/__pyinstaller/__init__.py +5 -0
- hpcflow/__pyinstaller/hook-hpcflow.py +40 -0
- hpcflow/_version.py +1 -1
- hpcflow/app.py +43 -0
- hpcflow/cli.py +2 -461
- hpcflow/data/demo_data_manifest/__init__.py +3 -0
- hpcflow/data/demo_data_manifest/demo_data_manifest.json +6 -0
- hpcflow/data/jinja_templates/test/test_template.txt +8 -0
- hpcflow/data/programs/hello_world/README.md +1 -0
- hpcflow/data/programs/hello_world/hello_world.c +87 -0
- hpcflow/data/programs/hello_world/linux/hello_world +0 -0
- hpcflow/data/programs/hello_world/macos/hello_world +0 -0
- hpcflow/data/programs/hello_world/win/hello_world.exe +0 -0
- hpcflow/data/scripts/__init__.py +1 -0
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/demo_task_1_generate_t1_infile_1.py +8 -0
- hpcflow/data/scripts/demo_task_1_generate_t1_infile_2.py +8 -0
- hpcflow/data/scripts/demo_task_1_parse_p3.py +7 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/generate_t1_file_01.py +7 -0
- hpcflow/data/scripts/import_future_script.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_all_iters_test.py +15 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_env_spec.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_labels.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_sub_param_in_direct_out.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_group.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +11 -0
- hpcflow/data/scripts/main_script_test_json_and_direct_in_json_out.py +14 -0
- hpcflow/data/scripts/main_script_test_json_in_json_and_direct_out.py +17 -0
- hpcflow/data/scripts/main_script_test_json_in_json_out.py +14 -0
- hpcflow/data/scripts/main_script_test_json_in_json_out_labels.py +16 -0
- hpcflow/data/scripts/main_script_test_json_in_obj.py +12 -0
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_json_out_obj.py +10 -0
- hpcflow/data/scripts/main_script_test_json_sub_param_in_json_out_labels.py +16 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/parse_t1_file_01.py +4 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/__init__.py +1 -0
- hpcflow/data/template_components/command_files.yaml +26 -0
- hpcflow/data/template_components/environments.yaml +13 -0
- hpcflow/data/template_components/parameters.yaml +14 -0
- hpcflow/data/template_components/task_schemas.yaml +139 -0
- hpcflow/data/workflows/workflow_1.yaml +5 -0
- hpcflow/examples.ipynb +1037 -0
- hpcflow/sdk/__init__.py +149 -0
- hpcflow/sdk/app.py +4266 -0
- hpcflow/sdk/cli.py +1479 -0
- hpcflow/sdk/cli_common.py +385 -0
- hpcflow/sdk/config/__init__.py +5 -0
- hpcflow/sdk/config/callbacks.py +246 -0
- hpcflow/sdk/config/cli.py +388 -0
- hpcflow/sdk/config/config.py +1410 -0
- hpcflow/sdk/config/config_file.py +501 -0
- hpcflow/sdk/config/errors.py +272 -0
- hpcflow/sdk/config/types.py +150 -0
- hpcflow/sdk/core/__init__.py +38 -0
- hpcflow/sdk/core/actions.py +3857 -0
- hpcflow/sdk/core/app_aware.py +25 -0
- hpcflow/sdk/core/cache.py +224 -0
- hpcflow/sdk/core/command_files.py +814 -0
- hpcflow/sdk/core/commands.py +424 -0
- hpcflow/sdk/core/element.py +2071 -0
- hpcflow/sdk/core/enums.py +221 -0
- hpcflow/sdk/core/environment.py +256 -0
- hpcflow/sdk/core/errors.py +1043 -0
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/json_like.py +809 -0
- hpcflow/sdk/core/loop.py +1320 -0
- hpcflow/sdk/core/loop_cache.py +282 -0
- hpcflow/sdk/core/object_list.py +933 -0
- hpcflow/sdk/core/parameters.py +3371 -0
- hpcflow/sdk/core/rule.py +196 -0
- hpcflow/sdk/core/run_dir_files.py +57 -0
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +3792 -0
- hpcflow/sdk/core/task_schema.py +993 -0
- hpcflow/sdk/core/test_utils.py +538 -0
- hpcflow/sdk/core/types.py +447 -0
- hpcflow/sdk/core/utils.py +1207 -0
- hpcflow/sdk/core/validation.py +87 -0
- hpcflow/sdk/core/values.py +477 -0
- hpcflow/sdk/core/workflow.py +4820 -0
- hpcflow/sdk/core/zarr_io.py +206 -0
- hpcflow/sdk/data/__init__.py +13 -0
- hpcflow/sdk/data/config_file_schema.yaml +34 -0
- hpcflow/sdk/data/config_schema.yaml +260 -0
- hpcflow/sdk/data/environments_spec_schema.yaml +21 -0
- hpcflow/sdk/data/files_spec_schema.yaml +5 -0
- hpcflow/sdk/data/parameters_spec_schema.yaml +7 -0
- hpcflow/sdk/data/task_schema_spec_schema.yaml +3 -0
- hpcflow/sdk/data/workflow_spec_schema.yaml +22 -0
- hpcflow/sdk/demo/__init__.py +3 -0
- hpcflow/sdk/demo/cli.py +242 -0
- hpcflow/sdk/helper/__init__.py +3 -0
- hpcflow/sdk/helper/cli.py +137 -0
- hpcflow/sdk/helper/helper.py +300 -0
- hpcflow/sdk/helper/watcher.py +192 -0
- hpcflow/sdk/log.py +288 -0
- hpcflow/sdk/persistence/__init__.py +18 -0
- hpcflow/sdk/persistence/base.py +2817 -0
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +39 -0
- hpcflow/sdk/persistence/json.py +954 -0
- hpcflow/sdk/persistence/pending.py +948 -0
- hpcflow/sdk/persistence/store_resource.py +203 -0
- hpcflow/sdk/persistence/types.py +309 -0
- hpcflow/sdk/persistence/utils.py +73 -0
- hpcflow/sdk/persistence/zarr.py +2388 -0
- hpcflow/sdk/runtime.py +320 -0
- hpcflow/sdk/submission/__init__.py +3 -0
- hpcflow/sdk/submission/enums.py +70 -0
- hpcflow/sdk/submission/jobscript.py +2379 -0
- hpcflow/sdk/submission/schedulers/__init__.py +281 -0
- hpcflow/sdk/submission/schedulers/direct.py +233 -0
- hpcflow/sdk/submission/schedulers/sge.py +376 -0
- hpcflow/sdk/submission/schedulers/slurm.py +598 -0
- hpcflow/sdk/submission/schedulers/utils.py +25 -0
- hpcflow/sdk/submission/shells/__init__.py +52 -0
- hpcflow/sdk/submission/shells/base.py +229 -0
- hpcflow/sdk/submission/shells/bash.py +504 -0
- hpcflow/sdk/submission/shells/os_version.py +115 -0
- hpcflow/sdk/submission/shells/powershell.py +352 -0
- hpcflow/sdk/submission/submission.py +1402 -0
- hpcflow/sdk/submission/types.py +140 -0
- hpcflow/sdk/typing.py +194 -0
- hpcflow/sdk/utils/arrays.py +69 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +31 -0
- hpcflow/sdk/utils/strings.py +69 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +123 -0
- hpcflow/tests/data/__init__.py +0 -0
- hpcflow/tests/data/benchmark_N_elements.yaml +6 -0
- hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_1.json +10 -0
- hpcflow/tests/data/workflow_1.yaml +5 -0
- hpcflow/tests/data/workflow_1_slurm.yaml +8 -0
- hpcflow/tests/data/workflow_1_wsl.yaml +8 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +42 -0
- hpcflow/tests/jinja_templates/test_jinja_templates.py +161 -0
- hpcflow/tests/programs/test_programs.py +180 -0
- hpcflow/tests/schedulers/direct_linux/test_direct_linux_submission.py +12 -0
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +14 -0
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +1361 -0
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +14 -0
- hpcflow/tests/unit/test_action.py +1066 -0
- hpcflow/tests/unit/test_action_rule.py +24 -0
- hpcflow/tests/unit/test_app.py +132 -0
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +172 -0
- hpcflow/tests/unit/test_command.py +377 -0
- hpcflow/tests/unit/test_config.py +195 -0
- hpcflow/tests/unit/test_config_file.py +162 -0
- hpcflow/tests/unit/test_element.py +666 -0
- hpcflow/tests/unit/test_element_iteration.py +88 -0
- hpcflow/tests/unit/test_element_set.py +158 -0
- hpcflow/tests/unit/test_group.py +115 -0
- hpcflow/tests/unit/test_input_source.py +1479 -0
- hpcflow/tests/unit/test_input_value.py +398 -0
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_json_like.py +1247 -0
- hpcflow/tests/unit/test_loop.py +2674 -0
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +259 -0
- hpcflow/tests/unit/test_object_list.py +116 -0
- hpcflow/tests/unit/test_parameter.py +243 -0
- hpcflow/tests/unit/test_persistence.py +664 -0
- hpcflow/tests/unit/test_resources.py +243 -0
- hpcflow/tests/unit/test_run.py +286 -0
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_runtime.py +9 -0
- hpcflow/tests/unit/test_schema_input.py +372 -0
- hpcflow/tests/unit/test_shell.py +129 -0
- hpcflow/tests/unit/test_slurm.py +39 -0
- hpcflow/tests/unit/test_submission.py +502 -0
- hpcflow/tests/unit/test_task.py +2560 -0
- hpcflow/tests/unit/test_task_schema.py +182 -0
- hpcflow/tests/unit/test_utils.py +616 -0
- hpcflow/tests/unit/test_value_sequence.py +549 -0
- hpcflow/tests/unit/test_values.py +91 -0
- hpcflow/tests/unit/test_workflow.py +827 -0
- hpcflow/tests/unit/test_workflow_template.py +186 -0
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/unit/utils/test_strings.py +97 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +355 -0
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +564 -0
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6794 -0
- hpcflow-0.2.0a271.dist-info/LICENSE +375 -0
- hpcflow-0.2.0a271.dist-info/METADATA +65 -0
- hpcflow-0.2.0a271.dist-info/RECORD +237 -0
- {hpcflow-0.1.15.dist-info → hpcflow-0.2.0a271.dist-info}/WHEEL +4 -5
- hpcflow-0.2.0a271.dist-info/entry_points.txt +6 -0
- hpcflow/api.py +0 -490
- hpcflow/archive/archive.py +0 -307
- hpcflow/archive/cloud/cloud.py +0 -45
- hpcflow/archive/cloud/errors.py +0 -9
- hpcflow/archive/cloud/providers/dropbox.py +0 -427
- hpcflow/archive/errors.py +0 -5
- hpcflow/base_db.py +0 -4
- hpcflow/config.py +0 -233
- hpcflow/copytree.py +0 -66
- hpcflow/data/examples/_config.yml +0 -14
- hpcflow/data/examples/damask/demo/1.run.yml +0 -4
- hpcflow/data/examples/damask/demo/2.process.yml +0 -29
- hpcflow/data/examples/damask/demo/geom.geom +0 -2052
- hpcflow/data/examples/damask/demo/load.load +0 -1
- hpcflow/data/examples/damask/demo/material.config +0 -185
- hpcflow/data/examples/damask/inputs/geom.geom +0 -2052
- hpcflow/data/examples/damask/inputs/load.load +0 -1
- hpcflow/data/examples/damask/inputs/material.config +0 -185
- hpcflow/data/examples/damask/profiles/_variable_lookup.yml +0 -21
- hpcflow/data/examples/damask/profiles/damask.yml +0 -4
- hpcflow/data/examples/damask/profiles/damask_process.yml +0 -8
- hpcflow/data/examples/damask/profiles/damask_run.yml +0 -5
- hpcflow/data/examples/damask/profiles/default.yml +0 -6
- hpcflow/data/examples/thinking.yml +0 -177
- hpcflow/errors.py +0 -2
- hpcflow/init_db.py +0 -37
- hpcflow/models.py +0 -2595
- hpcflow/nesting.py +0 -9
- hpcflow/profiles.py +0 -455
- hpcflow/project.py +0 -81
- hpcflow/scheduler.py +0 -322
- hpcflow/utils.py +0 -103
- hpcflow/validation.py +0 -166
- hpcflow/variables.py +0 -543
- hpcflow-0.1.15.dist-info/METADATA +0 -168
- hpcflow-0.1.15.dist-info/RECORD +0 -45
- hpcflow-0.1.15.dist-info/entry_points.txt +0 -8
- hpcflow-0.1.15.dist-info/top_level.txt +0 -1
- /hpcflow/{archive → data/jinja_templates}/__init__.py +0 -0
- /hpcflow/{archive/cloud → data/programs}/__init__.py +0 -0
- /hpcflow/{archive/cloud/providers → data/workflows}/__init__.py +0 -0
|
@@ -0,0 +1,1402 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A collection of submissions to a scheduler, generated from a workflow.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
import shutil
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import socket
|
|
10
|
+
from textwrap import indent
|
|
11
|
+
from typing import Any, Literal, overload, TYPE_CHECKING
|
|
12
|
+
from typing_extensions import override
|
|
13
|
+
import warnings
|
|
14
|
+
from contextlib import contextmanager
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
from hpcflow.sdk.utils.strings import shorten_list_str
|
|
18
|
+
import numpy as np
|
|
19
|
+
|
|
20
|
+
from hpcflow.sdk.typing import hydrate
|
|
21
|
+
from hpcflow.sdk.core.errors import (
|
|
22
|
+
JobscriptSubmissionFailure,
|
|
23
|
+
MissingEnvironmentError,
|
|
24
|
+
MissingEnvironmentExecutableError,
|
|
25
|
+
MissingEnvironmentExecutableInstanceError,
|
|
26
|
+
MultipleEnvironmentsError,
|
|
27
|
+
SubmissionFailure,
|
|
28
|
+
OutputFileParserNoOutputError,
|
|
29
|
+
)
|
|
30
|
+
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
|
31
|
+
from hpcflow.sdk.core.object_list import ObjectListMultipleMatchError
|
|
32
|
+
from hpcflow.sdk.core.utils import parse_timestamp, current_timestamp
|
|
33
|
+
from hpcflow.sdk.submission.enums import SubmissionStatus
|
|
34
|
+
from hpcflow.sdk.core import RUN_DIR_ARR_DTYPE
|
|
35
|
+
from hpcflow.sdk.log import TimeIt
|
|
36
|
+
from hpcflow.sdk.utils.strings import shorten_list_str
|
|
37
|
+
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from collections.abc import Iterable, Mapping, Sequence
|
|
40
|
+
from datetime import datetime
|
|
41
|
+
from typing import ClassVar, Literal
|
|
42
|
+
from rich.status import Status
|
|
43
|
+
from numpy.typing import NDArray
|
|
44
|
+
from .jobscript import Jobscript
|
|
45
|
+
from .enums import JobscriptElementState
|
|
46
|
+
from .schedulers import Scheduler
|
|
47
|
+
from .shells import Shell
|
|
48
|
+
from .types import SubmissionPart
|
|
49
|
+
from ..core.element import ElementActionRun
|
|
50
|
+
from ..core.environment import Environment
|
|
51
|
+
from ..core.object_list import EnvironmentsList
|
|
52
|
+
from ..core.workflow import Workflow
|
|
53
|
+
from ..core.cache import ObjectCache
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# jobscript attributes that are set persistently just after the jobscript has been
|
|
57
|
+
# submitted to the scheduler:
|
|
58
|
+
JOBSCRIPT_SUBMIT_TIME_KEYS = (
|
|
59
|
+
"submit_cmdline",
|
|
60
|
+
"scheduler_job_ID",
|
|
61
|
+
"process_ID",
|
|
62
|
+
"submit_time",
|
|
63
|
+
)
|
|
64
|
+
# submission attributes that are set persistently just after all of a submission's
|
|
65
|
+
# jobscripts have been submitted:
|
|
66
|
+
SUBMISSION_SUBMIT_TIME_KEYS = {
|
|
67
|
+
"submission_parts": dict,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@hydrate
|
|
72
|
+
class Submission(JSONLike):
|
|
73
|
+
"""
|
|
74
|
+
A collection of jobscripts to be submitted to a scheduler.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
index: int
|
|
79
|
+
The index of this submission.
|
|
80
|
+
jobscripts: list[~hpcflow.app.Jobscript]
|
|
81
|
+
The jobscripts in the submission.
|
|
82
|
+
workflow: ~hpcflow.app.Workflow
|
|
83
|
+
The workflow this is part of.
|
|
84
|
+
submission_parts: dict
|
|
85
|
+
Description of submission parts.
|
|
86
|
+
JS_parallelism: bool
|
|
87
|
+
Whether to exploit jobscript parallelism.
|
|
88
|
+
environments: ~hpcflow.app.EnvironmentsList
|
|
89
|
+
The execution environments to use.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
_child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
|
|
93
|
+
ChildObjectSpec(
|
|
94
|
+
name="jobscripts",
|
|
95
|
+
class_name="Jobscript",
|
|
96
|
+
is_multiple=True,
|
|
97
|
+
parent_ref="_submission",
|
|
98
|
+
),
|
|
99
|
+
ChildObjectSpec(
|
|
100
|
+
name="environments",
|
|
101
|
+
class_name="EnvironmentsList",
|
|
102
|
+
),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
TMP_DIR_NAME = "tmp"
|
|
106
|
+
LOG_DIR_NAME = "app_logs"
|
|
107
|
+
APP_STD_DIR_NAME = "app_std"
|
|
108
|
+
JS_DIR_NAME = "jobscripts"
|
|
109
|
+
JS_STD_DIR_NAME = "js_std"
|
|
110
|
+
JS_RUN_IDS_DIR_NAME = "js_run_ids"
|
|
111
|
+
JS_FUNCS_DIR_NAME = "js_funcs"
|
|
112
|
+
JS_WIN_PIDS_DIR_NAME = "js_pids"
|
|
113
|
+
JS_SCRIPT_INDICES_DIR_NAME = "js_script_indices"
|
|
114
|
+
SCRIPTS_DIR_NAME = "scripts"
|
|
115
|
+
COMMANDS_DIR_NAME = "commands"
|
|
116
|
+
WORKFLOW_APP_ALIAS = "wkflow_app"
|
|
117
|
+
|
|
118
|
+
def __init__(
|
|
119
|
+
self,
|
|
120
|
+
index: int,
|
|
121
|
+
jobscripts: list[Jobscript],
|
|
122
|
+
workflow: Workflow | None = None,
|
|
123
|
+
at_submit_metadata: dict[str, Any] | None = None,
|
|
124
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
|
125
|
+
environments: EnvironmentsList | None = None,
|
|
126
|
+
):
|
|
127
|
+
self._index = index
|
|
128
|
+
self._jobscripts = jobscripts
|
|
129
|
+
self._at_submit_metadata = at_submit_metadata or {
|
|
130
|
+
k: v() for k, v in SUBMISSION_SUBMIT_TIME_KEYS.items()
|
|
131
|
+
}
|
|
132
|
+
self._JS_parallelism = JS_parallelism
|
|
133
|
+
self._environments = environments # assigned by _set_environments
|
|
134
|
+
|
|
135
|
+
self._submission_parts_lst: list[SubmissionPart] | None = (
|
|
136
|
+
None # assigned on first access
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# updated in _submission_EARs_cache context manager:
|
|
140
|
+
self._use_EARs_cache = False
|
|
141
|
+
self._EARs_cache: dict[int, ElementActionRun] = {}
|
|
142
|
+
|
|
143
|
+
if workflow:
|
|
144
|
+
#: The workflow this is part of.
|
|
145
|
+
self.workflow = workflow
|
|
146
|
+
|
|
147
|
+
self._set_parent_refs()
|
|
148
|
+
|
|
149
|
+
def _ensure_JS_parallelism_set(self):
|
|
150
|
+
"""Ensure that the JS_parallelism attribute is one of `True`, `False`, `'direct'`
|
|
151
|
+
or `'scheduled'`.
|
|
152
|
+
|
|
153
|
+
Notes
|
|
154
|
+
-----
|
|
155
|
+
This method is called after the Submission object is first created in
|
|
156
|
+
`Workflow._add_submission`.
|
|
157
|
+
|
|
158
|
+
"""
|
|
159
|
+
# if JS_parallelism explicitly requested but store doesn't support, raise:
|
|
160
|
+
supports_JS_para = self.workflow._store._features.jobscript_parallelism
|
|
161
|
+
if self.JS_parallelism:
|
|
162
|
+
# could be: True | "direct" | "scheduled"
|
|
163
|
+
if not supports_JS_para:
|
|
164
|
+
# if status:
|
|
165
|
+
# status.stop()
|
|
166
|
+
raise ValueError(
|
|
167
|
+
f"Store type {self.workflow._store!r} does not support jobscript "
|
|
168
|
+
f"parallelism."
|
|
169
|
+
)
|
|
170
|
+
elif self.JS_parallelism is None:
|
|
171
|
+
# by default only use JS parallelism for scheduled jobscripts:
|
|
172
|
+
self._JS_parallelism = "scheduled" if supports_JS_para else False
|
|
173
|
+
|
|
174
|
+
@TimeIt.decorator
|
|
175
|
+
def _set_environments(self) -> None:
|
|
176
|
+
filterable = self._app.ElementResources.get_env_instance_filterable_attributes()
|
|
177
|
+
|
|
178
|
+
# map required environments and executable labels to job script indices:
|
|
179
|
+
req_envs: dict[tuple[tuple[str, ...], tuple[Any, ...]], dict[str, set[int]]] = (
|
|
180
|
+
defaultdict(lambda: defaultdict(set))
|
|
181
|
+
)
|
|
182
|
+
with self.workflow.cached_merged_parameters():
|
|
183
|
+
# using the cache (for `run.env_spec_hashable` -> `run.resources`) should
|
|
184
|
+
# significantly speed up this loop, unless a large resources sequence is used:
|
|
185
|
+
for js_idx, all_EARs_i in enumerate(self.all_EARs_by_jobscript):
|
|
186
|
+
for run in all_EARs_i:
|
|
187
|
+
env_spec_h = run.env_spec_hashable
|
|
188
|
+
for exec_label_j in run.action.get_required_executables():
|
|
189
|
+
req_envs[env_spec_h][exec_label_j].add(js_idx)
|
|
190
|
+
# add any environment for which an executable was not required:
|
|
191
|
+
if env_spec_h not in req_envs:
|
|
192
|
+
req_envs[env_spec_h]
|
|
193
|
+
|
|
194
|
+
# check these envs/execs exist in app data:
|
|
195
|
+
envs: list[Environment] = []
|
|
196
|
+
for env_spec_h, exec_js in req_envs.items():
|
|
197
|
+
env_spec = self._app.Action.env_spec_from_hashable(env_spec_h)
|
|
198
|
+
try:
|
|
199
|
+
env_i = self._app.envs.get(**env_spec)
|
|
200
|
+
except ObjectListMultipleMatchError:
|
|
201
|
+
raise MultipleEnvironmentsError(env_spec)
|
|
202
|
+
except ValueError:
|
|
203
|
+
raise MissingEnvironmentError(env_spec) from None
|
|
204
|
+
else:
|
|
205
|
+
if env_i not in envs:
|
|
206
|
+
envs.append(env_i)
|
|
207
|
+
|
|
208
|
+
for exec_i_lab, js_idx_set in exec_js.items():
|
|
209
|
+
try:
|
|
210
|
+
exec_i = env_i.executables.get(exec_i_lab)
|
|
211
|
+
except ValueError:
|
|
212
|
+
raise MissingEnvironmentExecutableError(
|
|
213
|
+
env_spec, exec_i_lab
|
|
214
|
+
) from None
|
|
215
|
+
|
|
216
|
+
# check matching executable instances exist:
|
|
217
|
+
for js_idx_j in js_idx_set:
|
|
218
|
+
js_res = self.jobscripts[js_idx_j].resources
|
|
219
|
+
filter_exec = {j: getattr(js_res, j) for j in filterable}
|
|
220
|
+
if not exec_i.filter_instances(**filter_exec):
|
|
221
|
+
raise MissingEnvironmentExecutableInstanceError(
|
|
222
|
+
env_spec, exec_i_lab, js_idx_j, filter_exec
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# save env definitions to the environments attribute:
|
|
226
|
+
self._environments = self._app.EnvironmentsList(envs)
|
|
227
|
+
|
|
228
|
+
@override
|
|
229
|
+
def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
|
|
230
|
+
dct = super()._postprocess_to_dict(d)
|
|
231
|
+
del dct["_workflow"]
|
|
232
|
+
del dct["_index"]
|
|
233
|
+
del dct["_submission_parts_lst"]
|
|
234
|
+
del dct["_use_EARs_cache"]
|
|
235
|
+
del dct["_EARs_cache"]
|
|
236
|
+
return {k.lstrip("_"): v for k, v in dct.items()}
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
def index(self) -> int:
|
|
240
|
+
"""
|
|
241
|
+
The index of this submission.
|
|
242
|
+
"""
|
|
243
|
+
return self._index
|
|
244
|
+
|
|
245
|
+
@property
|
|
246
|
+
def environments(self) -> EnvironmentsList:
|
|
247
|
+
"""
|
|
248
|
+
The execution environments to use.
|
|
249
|
+
"""
|
|
250
|
+
assert self._environments
|
|
251
|
+
return self._environments
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
def at_submit_metadata(self) -> dict[str, dict[str, Any]]:
|
|
255
|
+
return self.workflow._store.get_submission_at_submit_metadata(
|
|
256
|
+
sub_idx=self.index, metadata_attr=self._at_submit_metadata
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
@property
|
|
260
|
+
def _submission_parts(self) -> dict[str, list[int]]:
|
|
261
|
+
return self.at_submit_metadata["submission_parts"] or {}
|
|
262
|
+
|
|
263
|
+
@property
|
|
264
|
+
def submission_parts(self) -> list[SubmissionPart]:
|
|
265
|
+
if self._submission_parts_lst is None:
|
|
266
|
+
self._submission_parts_lst = [
|
|
267
|
+
{
|
|
268
|
+
"submit_time": parse_timestamp(dt, self.workflow.ts_fmt),
|
|
269
|
+
"jobscripts": js_idx,
|
|
270
|
+
}
|
|
271
|
+
for dt, js_idx in self._submission_parts.items()
|
|
272
|
+
]
|
|
273
|
+
return self._submission_parts_lst
|
|
274
|
+
|
|
275
|
+
@property
|
|
276
|
+
@TimeIt.decorator
|
|
277
|
+
def use_EARs_cache(self) -> bool:
|
|
278
|
+
"""Whether to pre-cache all EARs associated with the submission."""
|
|
279
|
+
return self._use_EARs_cache
|
|
280
|
+
|
|
281
|
+
@use_EARs_cache.setter
|
|
282
|
+
@TimeIt.decorator
|
|
283
|
+
def use_EARs_cache(self, value: bool):
|
|
284
|
+
"""Toggle the EAR caching facility."""
|
|
285
|
+
if self._use_EARs_cache == value:
|
|
286
|
+
return
|
|
287
|
+
self._use_EARs_cache = value
|
|
288
|
+
if value:
|
|
289
|
+
all_EAR_IDs = list(self.all_EAR_IDs)
|
|
290
|
+
self._EARs_cache = {
|
|
291
|
+
ear_ID: ear
|
|
292
|
+
for ear_ID, ear in zip(
|
|
293
|
+
all_EAR_IDs, self.workflow.get_EARs_from_IDs(all_EAR_IDs)
|
|
294
|
+
)
|
|
295
|
+
}
|
|
296
|
+
else:
|
|
297
|
+
self._EARs_cache = {} # reset the cache
|
|
298
|
+
|
|
299
|
+
@TimeIt.decorator
|
|
300
|
+
def get_start_time(self, submit_time: str) -> datetime | None:
|
|
301
|
+
"""Get the start time of a given submission part."""
|
|
302
|
+
times = (
|
|
303
|
+
self.jobscripts[i].start_time for i in self._submission_parts[submit_time]
|
|
304
|
+
)
|
|
305
|
+
return min((t for t in times if t is not None), default=None)
|
|
306
|
+
|
|
307
|
+
@TimeIt.decorator
|
|
308
|
+
def get_end_time(self, submit_time: str) -> datetime | None:
|
|
309
|
+
"""Get the end time of a given submission part."""
|
|
310
|
+
times = (self.jobscripts[i].end_time for i in self._submission_parts[submit_time])
|
|
311
|
+
return max((t for t in times if t is not None), default=None)
|
|
312
|
+
|
|
313
|
+
@property
|
|
314
|
+
@TimeIt.decorator
|
|
315
|
+
def start_time(self) -> datetime | None:
|
|
316
|
+
"""Get the first non-None start time over all submission parts."""
|
|
317
|
+
with self.using_EARs_cache():
|
|
318
|
+
times = (
|
|
319
|
+
self.get_start_time(submit_time) for submit_time in self._submission_parts
|
|
320
|
+
)
|
|
321
|
+
return min((t for t in times if t is not None), default=None)
|
|
322
|
+
|
|
323
|
+
@property
|
|
324
|
+
@TimeIt.decorator
|
|
325
|
+
def end_time(self) -> datetime | None:
|
|
326
|
+
"""Get the final non-None end time over all submission parts."""
|
|
327
|
+
with self.using_EARs_cache():
|
|
328
|
+
times = (
|
|
329
|
+
self.get_end_time(submit_time) for submit_time in self._submission_parts
|
|
330
|
+
)
|
|
331
|
+
return max((t for t in times if t is not None), default=None)
|
|
332
|
+
|
|
333
|
+
@contextmanager
|
|
334
|
+
def using_EARs_cache(self):
|
|
335
|
+
"""
|
|
336
|
+
A context manager to load and cache all EARs associated with this submission (and
|
|
337
|
+
its jobscripts).
|
|
338
|
+
"""
|
|
339
|
+
if self.use_EARs_cache:
|
|
340
|
+
yield
|
|
341
|
+
else:
|
|
342
|
+
self.use_EARs_cache = True
|
|
343
|
+
try:
|
|
344
|
+
yield
|
|
345
|
+
finally:
|
|
346
|
+
self.use_EARs_cache = False
|
|
347
|
+
|
|
348
|
+
@property
|
|
349
|
+
def jobscripts(self) -> list[Jobscript]:
|
|
350
|
+
"""
|
|
351
|
+
The jobscripts in this submission.
|
|
352
|
+
"""
|
|
353
|
+
return self._jobscripts
|
|
354
|
+
|
|
355
|
+
@property
|
|
356
|
+
def JS_parallelism(self) -> bool | Literal["direct", "scheduled"] | None:
|
|
357
|
+
"""
|
|
358
|
+
Whether to exploit jobscript parallelism.
|
|
359
|
+
"""
|
|
360
|
+
return self._JS_parallelism
|
|
361
|
+
|
|
362
|
+
@property
|
|
363
|
+
def workflow(self) -> Workflow:
|
|
364
|
+
"""
|
|
365
|
+
The workflow this is part of.
|
|
366
|
+
"""
|
|
367
|
+
return self._workflow
|
|
368
|
+
|
|
369
|
+
@workflow.setter
|
|
370
|
+
def workflow(self, wk: Workflow):
|
|
371
|
+
self._workflow = wk
|
|
372
|
+
|
|
373
|
+
@property
|
|
374
|
+
def jobscript_indices(self) -> tuple[int, ...]:
|
|
375
|
+
"""All associated jobscript indices."""
|
|
376
|
+
return tuple(js.index for js in self.jobscripts)
|
|
377
|
+
|
|
378
|
+
@property
|
|
379
|
+
def submitted_jobscripts(self) -> tuple[int, ...]:
|
|
380
|
+
"""Jobscript indices that have been successfully submitted."""
|
|
381
|
+
return tuple(j for sp in self.submission_parts for j in sp["jobscripts"])
|
|
382
|
+
|
|
383
|
+
@property
|
|
384
|
+
def outstanding_jobscripts(self) -> tuple[int, ...]:
|
|
385
|
+
"""Jobscript indices that have not yet been successfully submitted."""
|
|
386
|
+
return tuple(set(self.jobscript_indices).difference(self.submitted_jobscripts))
|
|
387
|
+
|
|
388
|
+
@property
|
|
389
|
+
def status(self) -> SubmissionStatus:
|
|
390
|
+
"""
|
|
391
|
+
The status of this submission.
|
|
392
|
+
"""
|
|
393
|
+
if not self.submission_parts:
|
|
394
|
+
return SubmissionStatus.PENDING
|
|
395
|
+
elif set(self.submitted_jobscripts) == set(self.jobscript_indices):
|
|
396
|
+
return SubmissionStatus.SUBMITTED
|
|
397
|
+
else:
|
|
398
|
+
return SubmissionStatus.PARTIALLY_SUBMITTED
|
|
399
|
+
|
|
400
|
+
@property
|
|
401
|
+
def needs_submit(self) -> bool:
|
|
402
|
+
"""
|
|
403
|
+
Whether this submission needs a submit to be done.
|
|
404
|
+
"""
|
|
405
|
+
return self.status in (
|
|
406
|
+
SubmissionStatus.PENDING,
|
|
407
|
+
SubmissionStatus.PARTIALLY_SUBMITTED,
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
@property
|
|
411
|
+
def needs_app_log_dir(self) -> bool:
|
|
412
|
+
"""
|
|
413
|
+
Whether this submision requires an app log directory.
|
|
414
|
+
"""
|
|
415
|
+
for js in self.jobscripts:
|
|
416
|
+
if js.resources.write_app_logs:
|
|
417
|
+
return True
|
|
418
|
+
return False
|
|
419
|
+
|
|
420
|
+
@property
|
|
421
|
+
def needs_win_pids_dir(self) -> bool:
|
|
422
|
+
"""
|
|
423
|
+
Whether this submision requires a directory for process ID files (Windows only).
|
|
424
|
+
"""
|
|
425
|
+
for js in self.jobscripts:
|
|
426
|
+
if js.os_name == "nt":
|
|
427
|
+
return True
|
|
428
|
+
return False
|
|
429
|
+
|
|
430
|
+
@property
|
|
431
|
+
def needs_script_indices_dir(self) -> bool:
|
|
432
|
+
"""
|
|
433
|
+
Whether this submision requires a directory for combined-script script ID files.
|
|
434
|
+
"""
|
|
435
|
+
for js in self.jobscripts:
|
|
436
|
+
if js.resources.combine_scripts:
|
|
437
|
+
return True
|
|
438
|
+
return False
|
|
439
|
+
|
|
440
|
+
@classmethod
|
|
441
|
+
def get_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
|
442
|
+
"""
|
|
443
|
+
The directory path to files associated with the specified submission.
|
|
444
|
+
"""
|
|
445
|
+
return submissions_path / str(sub_idx)
|
|
446
|
+
|
|
447
|
+
@classmethod
|
|
448
|
+
def get_tmp_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
|
449
|
+
"""
|
|
450
|
+
The path to the temporary files directory, for the specified submission.
|
|
451
|
+
"""
|
|
452
|
+
return cls.get_path(submissions_path, sub_idx) / cls.TMP_DIR_NAME
|
|
453
|
+
|
|
454
|
+
@classmethod
|
|
455
|
+
def get_app_log_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
|
456
|
+
"""
|
|
457
|
+
The path to the app log directory for this submission, for the specified
|
|
458
|
+
submission.
|
|
459
|
+
"""
|
|
460
|
+
return cls.get_path(submissions_path, sub_idx) / cls.LOG_DIR_NAME
|
|
461
|
+
|
|
462
|
+
@staticmethod
|
|
463
|
+
def get_app_log_file_name(run_ID: int | str) -> str:
|
|
464
|
+
"""
|
|
465
|
+
The app log file name.
|
|
466
|
+
"""
|
|
467
|
+
# TODO: consider combine_app_logs argument
|
|
468
|
+
return f"r_{run_ID}.log"
|
|
469
|
+
|
|
470
|
+
@classmethod
|
|
471
|
+
def get_app_log_file_path(cls, submissions_path: Path, sub_idx: int, run_ID: int):
|
|
472
|
+
"""
|
|
473
|
+
The file path to the app log, for the specified submission.
|
|
474
|
+
"""
|
|
475
|
+
return (
|
|
476
|
+
cls.get_path(submissions_path, sub_idx)
|
|
477
|
+
/ cls.LOG_DIR_NAME
|
|
478
|
+
/ cls.get_app_log_file_name(run_ID)
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
@classmethod
|
|
482
|
+
def get_app_std_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
|
483
|
+
"""
|
|
484
|
+
The path to the app standard output and error stream files directory, for the
|
|
485
|
+
specified submission.
|
|
486
|
+
"""
|
|
487
|
+
return cls.get_path(submissions_path, sub_idx) / cls.APP_STD_DIR_NAME
|
|
488
|
+
|
|
489
|
+
@classmethod
|
|
490
|
+
def get_js_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
|
491
|
+
"""
|
|
492
|
+
The path to the jobscript files directory, for the specified submission.
|
|
493
|
+
"""
|
|
494
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_DIR_NAME
|
|
495
|
+
|
|
496
|
+
@classmethod
|
|
497
|
+
def get_js_std_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
|
498
|
+
"""
|
|
499
|
+
The path to the jobscript standard output and error files directory, for the
|
|
500
|
+
specified submission.
|
|
501
|
+
"""
|
|
502
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_STD_DIR_NAME
|
|
503
|
+
|
|
504
|
+
@classmethod
|
|
505
|
+
def get_js_run_ids_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
|
506
|
+
"""
|
|
507
|
+
The path to the directory containing jobscript run IDs, for the specified
|
|
508
|
+
submission.
|
|
509
|
+
"""
|
|
510
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_RUN_IDS_DIR_NAME
|
|
511
|
+
|
|
512
|
+
@classmethod
|
|
513
|
+
def get_js_funcs_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
|
514
|
+
"""
|
|
515
|
+
The path to the directory containing the shell functions that are invoked within
|
|
516
|
+
jobscripts and commmand files, for the specified submission.
|
|
517
|
+
"""
|
|
518
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_FUNCS_DIR_NAME
|
|
519
|
+
|
|
520
|
+
@classmethod
|
|
521
|
+
def get_js_win_pids_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
|
522
|
+
"""
|
|
523
|
+
The path to the directory containing process ID files (Windows only), for the
|
|
524
|
+
specified submission.
|
|
525
|
+
"""
|
|
526
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_WIN_PIDS_DIR_NAME
|
|
527
|
+
|
|
528
|
+
@classmethod
|
|
529
|
+
def get_js_script_indices_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
|
530
|
+
"""
|
|
531
|
+
The path to the directory containing script indices for combined-script jobscripts
|
|
532
|
+
only, for the specified submission.
|
|
533
|
+
"""
|
|
534
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_SCRIPT_INDICES_DIR_NAME
|
|
535
|
+
|
|
536
|
+
@classmethod
|
|
537
|
+
def get_scripts_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
|
538
|
+
"""
|
|
539
|
+
The path to the directory containing action scripts, for the specified submission.
|
|
540
|
+
"""
|
|
541
|
+
return cls.get_path(submissions_path, sub_idx) / cls.SCRIPTS_DIR_NAME
|
|
542
|
+
|
|
543
|
+
@classmethod
|
|
544
|
+
def get_commands_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
|
545
|
+
"""
|
|
546
|
+
The path to the directory containing command files, for the specified submission.
|
|
547
|
+
"""
|
|
548
|
+
return cls.get_path(submissions_path, sub_idx) / cls.COMMANDS_DIR_NAME
|
|
549
|
+
|
|
550
|
+
@property
|
|
551
|
+
def path(self) -> Path:
|
|
552
|
+
"""
|
|
553
|
+
The path to the directory containing action scripts.
|
|
554
|
+
"""
|
|
555
|
+
return self.get_path(self.workflow.submissions_path, self.index)
|
|
556
|
+
|
|
557
|
+
@property
|
|
558
|
+
def tmp_path(self) -> Path:
|
|
559
|
+
"""
|
|
560
|
+
The path to the temporary files directory for this submission.
|
|
561
|
+
"""
|
|
562
|
+
return self.get_tmp_path(self.workflow.submissions_path, self.index)
|
|
563
|
+
|
|
564
|
+
@property
|
|
565
|
+
def app_log_path(self) -> Path:
|
|
566
|
+
"""
|
|
567
|
+
The path to the app log directory for this submission for this submission.
|
|
568
|
+
"""
|
|
569
|
+
return self.get_app_log_path(self.workflow.submissions_path, self.index)
|
|
570
|
+
|
|
571
|
+
@property
|
|
572
|
+
def app_std_path(self) -> Path:
|
|
573
|
+
"""
|
|
574
|
+
The path to the app standard output and error stream files directory, for the
|
|
575
|
+
this submission.
|
|
576
|
+
"""
|
|
577
|
+
return self.get_app_std_path(self.workflow.submissions_path, self.index)
|
|
578
|
+
|
|
579
|
+
@property
|
|
580
|
+
def js_path(self) -> Path:
|
|
581
|
+
"""
|
|
582
|
+
The path to the jobscript files directory, for this submission.
|
|
583
|
+
"""
|
|
584
|
+
return self.get_js_path(self.workflow.submissions_path, self.index)
|
|
585
|
+
|
|
586
|
+
@property
|
|
587
|
+
def js_std_path(self) -> Path:
|
|
588
|
+
"""
|
|
589
|
+
The path to the jobscript standard output and error files directory, for this
|
|
590
|
+
submission.
|
|
591
|
+
"""
|
|
592
|
+
return self.get_js_std_path(self.workflow.submissions_path, self.index)
|
|
593
|
+
|
|
594
|
+
@property
|
|
595
|
+
def js_run_ids_path(self) -> Path:
|
|
596
|
+
"""
|
|
597
|
+
The path to the directory containing jobscript run IDs, for this submission.
|
|
598
|
+
"""
|
|
599
|
+
return self.get_js_run_ids_path(self.workflow.submissions_path, self.index)
|
|
600
|
+
|
|
601
|
+
@property
|
|
602
|
+
def js_funcs_path(self) -> Path:
|
|
603
|
+
"""
|
|
604
|
+
The path to the directory containing the shell functions that are invoked within
|
|
605
|
+
jobscripts and commmand files, for this submission.
|
|
606
|
+
"""
|
|
607
|
+
return self.get_js_funcs_path(self.workflow.submissions_path, self.index)
|
|
608
|
+
|
|
609
|
+
@property
|
|
610
|
+
def js_win_pids_path(self) -> Path:
|
|
611
|
+
"""
|
|
612
|
+
The path to the directory containing process ID files (Windows only), for this
|
|
613
|
+
submission.
|
|
614
|
+
"""
|
|
615
|
+
return self.get_js_win_pids_path(self.workflow.submissions_path, self.index)
|
|
616
|
+
|
|
617
|
+
@property
|
|
618
|
+
def js_script_indices_path(self) -> Path:
|
|
619
|
+
"""
|
|
620
|
+
The path to the directory containing script indices for combined-script jobscripts
|
|
621
|
+
only, for this submission.
|
|
622
|
+
"""
|
|
623
|
+
return self.get_js_script_indices_path(self.workflow.submissions_path, self.index)
|
|
624
|
+
|
|
625
|
+
@property
|
|
626
|
+
def scripts_path(self) -> Path:
|
|
627
|
+
"""
|
|
628
|
+
The path to the directory containing action scripts, for this submission.
|
|
629
|
+
"""
|
|
630
|
+
return self.get_scripts_path(self.workflow.submissions_path, self.index)
|
|
631
|
+
|
|
632
|
+
@property
|
|
633
|
+
def commands_path(self) -> Path:
|
|
634
|
+
"""
|
|
635
|
+
The path to the directory containing command files, for this submission.
|
|
636
|
+
"""
|
|
637
|
+
return self.get_commands_path(self.workflow.submissions_path, self.index)
|
|
638
|
+
|
|
639
|
+
@property
|
|
640
|
+
@TimeIt.decorator
|
|
641
|
+
def all_EAR_IDs(self) -> Iterable[int]:
|
|
642
|
+
"""
|
|
643
|
+
The IDs of all EARs in this submission.
|
|
644
|
+
"""
|
|
645
|
+
return (int(i) for js in self.jobscripts for i in js.all_EAR_IDs)
|
|
646
|
+
|
|
647
|
+
@property
|
|
648
|
+
@TimeIt.decorator
|
|
649
|
+
def all_EARs(self) -> list[ElementActionRun]:
|
|
650
|
+
"""
|
|
651
|
+
All EARs in this submission.
|
|
652
|
+
"""
|
|
653
|
+
if self.use_EARs_cache:
|
|
654
|
+
return list(self._EARs_cache.values())
|
|
655
|
+
else:
|
|
656
|
+
return self.workflow.get_EARs_from_IDs(self.all_EAR_IDs)
|
|
657
|
+
|
|
658
|
+
@property
|
|
659
|
+
@TimeIt.decorator
|
|
660
|
+
def all_EARs_IDs_by_jobscript(self) -> list[np.ndarray]:
|
|
661
|
+
return [i.all_EAR_IDs for i in self.jobscripts]
|
|
662
|
+
|
|
663
|
+
@property
|
|
664
|
+
@TimeIt.decorator
|
|
665
|
+
def all_EARs_by_jobscript(self) -> list[list[ElementActionRun]]:
|
|
666
|
+
all_EARs = {i.id_: i for i in self.all_EARs}
|
|
667
|
+
return [
|
|
668
|
+
[all_EARs[i] for i in js_ids] for js_ids in self.all_EARs_IDs_by_jobscript
|
|
669
|
+
]
|
|
670
|
+
|
|
671
|
+
@property
|
|
672
|
+
@TimeIt.decorator
|
|
673
|
+
def EARs_by_elements(self) -> Mapping[int, Mapping[int, Sequence[ElementActionRun]]]:
|
|
674
|
+
"""
|
|
675
|
+
All EARs in this submission, grouped by element.
|
|
676
|
+
"""
|
|
677
|
+
task_elem_EARs: dict[int, dict[int, list[ElementActionRun]]] = defaultdict(
|
|
678
|
+
lambda: defaultdict(list)
|
|
679
|
+
)
|
|
680
|
+
for ear in self.all_EARs:
|
|
681
|
+
task_elem_EARs[ear.task.index][ear.element.index].append(ear)
|
|
682
|
+
return task_elem_EARs
|
|
683
|
+
|
|
684
|
+
@property
|
|
685
|
+
def is_scheduled(self) -> tuple[bool, ...]:
|
|
686
|
+
"""Return whether each jobscript of this submission uses a scheduler or not."""
|
|
687
|
+
return tuple(i.is_scheduled for i in self.jobscripts)
|
|
688
|
+
|
|
689
|
+
@overload
|
|
690
|
+
def get_active_jobscripts(
|
|
691
|
+
self, as_json: Literal[False] = False
|
|
692
|
+
) -> Mapping[int, Mapping[int, Mapping[int, JobscriptElementState]]]: ...
|
|
693
|
+
|
|
694
|
+
@overload
|
|
695
|
+
def get_active_jobscripts(
|
|
696
|
+
self, as_json: Literal[True]
|
|
697
|
+
) -> Mapping[int, Mapping[int, Mapping[int, str]]]: ...
|
|
698
|
+
|
|
699
|
+
@TimeIt.decorator
|
|
700
|
+
def get_active_jobscripts(
|
|
701
|
+
self,
|
|
702
|
+
as_json: Literal[True] | Literal[False] = False, # TODO: why can't we use bool?
|
|
703
|
+
) -> Mapping[int, Mapping[int, Mapping[int, JobscriptElementState | str]]]:
|
|
704
|
+
"""Get jobscripts that are active on this machine, and their active states."""
|
|
705
|
+
# this returns: {JS_IDX: {BLOCK_IDX: {JS_ELEMENT_IDX: STATE}}}
|
|
706
|
+
# TODO: query the scheduler once for all jobscripts?
|
|
707
|
+
with self.using_EARs_cache():
|
|
708
|
+
return {
|
|
709
|
+
js.index: act_states
|
|
710
|
+
for js in self.jobscripts
|
|
711
|
+
if (act_states := js.get_active_states(as_json=as_json))
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
@TimeIt.decorator
|
|
715
|
+
def _write_scripts(
|
|
716
|
+
self, cache: ObjectCache, status: Status | None = None
|
|
717
|
+
) -> tuple[dict[int, int | None], NDArray, dict[int, list[Path]]]:
|
|
718
|
+
"""Write to disk all action scripts associated with this submission."""
|
|
719
|
+
# TODO: rename this method
|
|
720
|
+
|
|
721
|
+
# TODO: need to check is_snippet_script is exclusive? i.e. only `script` and no
|
|
722
|
+
# `commands` in the action?
|
|
723
|
+
# TODO: scripts must have the same exe and the same environment as well?
|
|
724
|
+
# TODO: env_spec should be included in jobscript hash if combine_scripts=True ?
|
|
725
|
+
|
|
726
|
+
actions_by_schema: dict[str, dict[int, set]] = defaultdict(
|
|
727
|
+
lambda: defaultdict(set)
|
|
728
|
+
)
|
|
729
|
+
combined_env_specs = {}
|
|
730
|
+
|
|
731
|
+
# task insert IDs and action indices for each combined_scripts jobscript:
|
|
732
|
+
combined_actions = {}
|
|
733
|
+
|
|
734
|
+
cmd_hashes = defaultdict(set)
|
|
735
|
+
num_runs_tot = sum(len(js.all_EAR_IDs) for js in self.jobscripts)
|
|
736
|
+
run_indices = np.ones((num_runs_tot, 9), dtype=int) * -1
|
|
737
|
+
run_inp_files = defaultdict(
|
|
738
|
+
list
|
|
739
|
+
) # keys are `run_idx`, values are Paths to copy to run dir
|
|
740
|
+
run_cmd_file_names: dict[int, int | None] = {} # None if no commands to write
|
|
741
|
+
run_idx = 0
|
|
742
|
+
|
|
743
|
+
if status:
|
|
744
|
+
status.update(f"Adding new submission: processing run 1/{num_runs_tot}.")
|
|
745
|
+
|
|
746
|
+
all_runs = cache.runs
|
|
747
|
+
assert all_runs is not None
|
|
748
|
+
runs_ids_by_js = self.all_EARs_IDs_by_jobscript
|
|
749
|
+
|
|
750
|
+
with self.workflow.cached_merged_parameters():
|
|
751
|
+
for js in self.jobscripts:
|
|
752
|
+
js_idx = js.index
|
|
753
|
+
js_run_0 = all_runs[runs_ids_by_js[js.index][0]]
|
|
754
|
+
|
|
755
|
+
if js.resources.combine_scripts:
|
|
756
|
+
# this will be one or more snippet scripts that needs to be combined into
|
|
757
|
+
# one script for the whole jobscript
|
|
758
|
+
|
|
759
|
+
# need to write one script + one commands file for the whole jobscript
|
|
760
|
+
|
|
761
|
+
# env_spec will be the same for all runs of this jobscript:
|
|
762
|
+
combined_env_specs[js_idx] = js_run_0.env_spec
|
|
763
|
+
combined_actions[js_idx] = [
|
|
764
|
+
[j[0:2] for j in i.task_actions] for i in js.blocks
|
|
765
|
+
]
|
|
766
|
+
|
|
767
|
+
for idx, run_id in enumerate(js.all_EAR_IDs):
|
|
768
|
+
run = all_runs[run_id]
|
|
769
|
+
|
|
770
|
+
run_indices[run_idx] = [
|
|
771
|
+
run.task.insert_ID,
|
|
772
|
+
run.element.id_,
|
|
773
|
+
run.element_iteration.id_,
|
|
774
|
+
run.id_,
|
|
775
|
+
run.element.index,
|
|
776
|
+
run.element_iteration.index,
|
|
777
|
+
run.element_action.action_idx,
|
|
778
|
+
run.index,
|
|
779
|
+
int(run.action.requires_dir),
|
|
780
|
+
]
|
|
781
|
+
run_idx += 1
|
|
782
|
+
|
|
783
|
+
if status and run_idx % 10 == 0:
|
|
784
|
+
status.update(
|
|
785
|
+
f"Adding new submission: processing run {run_idx}/{num_runs_tot}."
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
if js.resources.combine_scripts:
|
|
789
|
+
if idx == 0:
|
|
790
|
+
# the commands file for a combined jobscript won't have
|
|
791
|
+
# any parameter data in the command line, so should raise
|
|
792
|
+
# if something is found to be unset:
|
|
793
|
+
run.try_write_commands(
|
|
794
|
+
environments=self.environments,
|
|
795
|
+
jobscript=js,
|
|
796
|
+
raise_on_unset=True,
|
|
797
|
+
)
|
|
798
|
+
run_cmd_file_names[run.id_] = None
|
|
799
|
+
|
|
800
|
+
else:
|
|
801
|
+
if run.is_snippet_script:
|
|
802
|
+
actions_by_schema[run.action.task_schema.name][
|
|
803
|
+
run.element_action.action_idx
|
|
804
|
+
].add(run.env_spec_hashable)
|
|
805
|
+
|
|
806
|
+
if run.action.commands:
|
|
807
|
+
hash_i = run.get_commands_file_hash()
|
|
808
|
+
# TODO: could further reduce number of files in the case the data
|
|
809
|
+
# indices hash is the same: if commands objects are the same and
|
|
810
|
+
# environment objects are the same, then the files will be the
|
|
811
|
+
# same, even if runs come from different task schemas/actions...
|
|
812
|
+
if hash_i not in cmd_hashes:
|
|
813
|
+
try:
|
|
814
|
+
run.try_write_commands(
|
|
815
|
+
environments=self.environments,
|
|
816
|
+
jobscript=js,
|
|
817
|
+
)
|
|
818
|
+
except OutputFileParserNoOutputError:
|
|
819
|
+
# no commands to write, might be used just for saving
|
|
820
|
+
# files
|
|
821
|
+
run_cmd_file_names[run.id_] = None
|
|
822
|
+
cmd_hashes[hash_i].add(run.id_)
|
|
823
|
+
else:
|
|
824
|
+
run_cmd_file_names[run.id_] = None
|
|
825
|
+
|
|
826
|
+
if run.action.requires_dir:
|
|
827
|
+
# TODO: what is type of `path`?
|
|
828
|
+
for name, path in run.get("input_files", {}).items():
|
|
829
|
+
if path:
|
|
830
|
+
run_inp_files[run_idx].append(path)
|
|
831
|
+
|
|
832
|
+
for run_ids in cmd_hashes.values():
|
|
833
|
+
run_ids_srt = sorted(run_ids)
|
|
834
|
+
root_id = run_ids_srt[0] # used for command file name for this group
|
|
835
|
+
# TODO: could store multiple IDs to reduce number of files created
|
|
836
|
+
for run_id_i in run_ids_srt:
|
|
837
|
+
if run_id_i not in run_cmd_file_names:
|
|
838
|
+
run_cmd_file_names[run_id_i] = root_id
|
|
839
|
+
|
|
840
|
+
if status:
|
|
841
|
+
status.update("Adding new submission: writing scripts...")
|
|
842
|
+
|
|
843
|
+
seen: dict[int, Path] = {}
|
|
844
|
+
combined_script_data: dict[int, dict[int, list[tuple[str, Path, bool]]]] = (
|
|
845
|
+
defaultdict(lambda: defaultdict(list))
|
|
846
|
+
)
|
|
847
|
+
for task in self.workflow.tasks:
|
|
848
|
+
for schema in task.template.schemas:
|
|
849
|
+
if schema.name in actions_by_schema:
|
|
850
|
+
for idx, action in enumerate(schema.actions):
|
|
851
|
+
|
|
852
|
+
if not action.script:
|
|
853
|
+
continue
|
|
854
|
+
|
|
855
|
+
for env_spec_h in actions_by_schema[schema.name][idx]:
|
|
856
|
+
|
|
857
|
+
env_spec = action.env_spec_from_hashable(env_spec_h)
|
|
858
|
+
name, snip_path, specs = action.get_script_artifact_name(
|
|
859
|
+
env_spec=env_spec,
|
|
860
|
+
act_idx=idx,
|
|
861
|
+
ret_specifiers=True,
|
|
862
|
+
)
|
|
863
|
+
script_hash = action.get_script_determinant_hash(specs)
|
|
864
|
+
script_path = self.scripts_path / name
|
|
865
|
+
prev_path = seen.get(script_hash)
|
|
866
|
+
if script_path == prev_path:
|
|
867
|
+
continue
|
|
868
|
+
|
|
869
|
+
elif prev_path:
|
|
870
|
+
# try to make a symbolic link to the file previously
|
|
871
|
+
# created:
|
|
872
|
+
try:
|
|
873
|
+
script_path.symlink_to(prev_path.name)
|
|
874
|
+
except OSError:
|
|
875
|
+
# windows requires admin permission, copy instead:
|
|
876
|
+
shutil.copy(prev_path, script_path)
|
|
877
|
+
else:
|
|
878
|
+
# write script to disk:
|
|
879
|
+
source_str = action.compose_source(snip_path)
|
|
880
|
+
if source_str:
|
|
881
|
+
with script_path.open("wt", newline="\n") as fp:
|
|
882
|
+
fp.write(source_str)
|
|
883
|
+
seen[script_hash] = script_path
|
|
884
|
+
|
|
885
|
+
# combined script stuff
|
|
886
|
+
for js_idx, act_IDs in combined_actions.items():
|
|
887
|
+
for block_idx, act_IDs_i in enumerate(act_IDs):
|
|
888
|
+
for task_iID, act_idx in act_IDs_i:
|
|
889
|
+
task = self.workflow.tasks.get(insert_ID=task_iID)
|
|
890
|
+
schema = task.template.schemas[0] # TODO: multiple schemas
|
|
891
|
+
action = schema.actions[act_idx]
|
|
892
|
+
func_name, snip_path = action.get_script_artifact_name(
|
|
893
|
+
env_spec=combined_env_specs[js_idx],
|
|
894
|
+
act_idx=act_idx,
|
|
895
|
+
ret_specifiers=False,
|
|
896
|
+
include_suffix=False,
|
|
897
|
+
specs_suffix_delim="_", # can't use "." in function name
|
|
898
|
+
)
|
|
899
|
+
combined_script_data[js_idx][block_idx].append(
|
|
900
|
+
(func_name, snip_path, action.requires_dir)
|
|
901
|
+
)
|
|
902
|
+
|
|
903
|
+
for js_idx, action_scripts in combined_script_data.items():
|
|
904
|
+
js = self.jobscripts[js_idx]
|
|
905
|
+
|
|
906
|
+
script_str, script_indices, num_elems, num_acts = js.compose_combined_script(
|
|
907
|
+
[i for _, i in sorted(action_scripts.items())]
|
|
908
|
+
)
|
|
909
|
+
js.write_script_indices_file(script_indices, num_elems, num_acts)
|
|
910
|
+
|
|
911
|
+
script_path = self.scripts_path / f"js_{js_idx}.py" # TODO: refactor name
|
|
912
|
+
with script_path.open("wt", newline="\n") as fp:
|
|
913
|
+
fp.write(script_str)
|
|
914
|
+
|
|
915
|
+
return run_cmd_file_names, run_indices, run_inp_files
|
|
916
|
+
|
|
917
|
+
@TimeIt.decorator
|
|
918
|
+
def _calculate_run_dir_indices(
|
|
919
|
+
self,
|
|
920
|
+
run_indices: np.ndarray,
|
|
921
|
+
cache: ObjectCache,
|
|
922
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
923
|
+
|
|
924
|
+
assert cache.elements is not None
|
|
925
|
+
assert cache.iterations is not None
|
|
926
|
+
# get the multiplicities of all tasks, elements, iterations, and runs:
|
|
927
|
+
wk_num_tasks = self.workflow.num_tasks
|
|
928
|
+
task_num_elems = {}
|
|
929
|
+
elem_num_iters = {}
|
|
930
|
+
iter_num_acts = {}
|
|
931
|
+
iter_acts_num_runs = {}
|
|
932
|
+
for task in self.workflow.tasks:
|
|
933
|
+
elem_IDs = task.element_IDs
|
|
934
|
+
task_num_elems[task.insert_ID] = len(elem_IDs)
|
|
935
|
+
for elem_ID in elem_IDs:
|
|
936
|
+
iter_IDs = cache.elements[elem_ID].iteration_IDs
|
|
937
|
+
elem_num_iters[elem_ID] = len(iter_IDs)
|
|
938
|
+
for iter_ID in iter_IDs:
|
|
939
|
+
run_IDs = cache.iterations[iter_ID].EAR_IDs
|
|
940
|
+
if run_IDs: # the schema might have no actions
|
|
941
|
+
iter_num_acts[iter_ID] = len(run_IDs)
|
|
942
|
+
for act_idx, act_run_IDs in run_IDs.items():
|
|
943
|
+
iter_acts_num_runs[(iter_ID, act_idx)] = len(act_run_IDs)
|
|
944
|
+
else:
|
|
945
|
+
iter_num_acts[iter_ID] = 0
|
|
946
|
+
|
|
947
|
+
max_u8 = np.iinfo(np.uint8).max
|
|
948
|
+
max_u32 = np.iinfo(np.uint32).max
|
|
949
|
+
MAX_ELEMS_PER_DIR = 1000 # TODO: configurable (add `workflow_defaults` to Config)
|
|
950
|
+
MAX_ITERS_PER_DIR = 1000
|
|
951
|
+
requires_dir_idx = np.where(run_indices[:, -1] == 1)[0]
|
|
952
|
+
run_dir_arr = np.empty(requires_dir_idx.size, dtype=RUN_DIR_ARR_DTYPE)
|
|
953
|
+
run_ids = np.empty(requires_dir_idx.size, dtype=int)
|
|
954
|
+
|
|
955
|
+
elem_depths: dict[int, int] = {}
|
|
956
|
+
iter_depths: dict[int, int] = {}
|
|
957
|
+
for idx in range(requires_dir_idx.size):
|
|
958
|
+
row = run_indices[requires_dir_idx[idx]]
|
|
959
|
+
t_iID, e_id, i_id, r_id, e_idx, i_idx, a_idx, r_idx = row[:-1]
|
|
960
|
+
run_ids[idx] = r_id
|
|
961
|
+
|
|
962
|
+
num_elems_i = task_num_elems[t_iID]
|
|
963
|
+
num_iters_i = elem_num_iters[e_id]
|
|
964
|
+
num_acts_i = iter_num_acts[i_id] # see TODO below
|
|
965
|
+
num_runs_i = iter_acts_num_runs[(i_id, a_idx)]
|
|
966
|
+
|
|
967
|
+
e_depth = 1
|
|
968
|
+
if num_elems_i == 1:
|
|
969
|
+
e_idx = max_u32
|
|
970
|
+
elif num_elems_i > MAX_ELEMS_PER_DIR:
|
|
971
|
+
if (e_depth := elem_depths.get(t_iID, -1)) == -1:
|
|
972
|
+
e_depth = int(
|
|
973
|
+
np.ceil(np.log(num_elems_i) / np.log(MAX_ELEMS_PER_DIR))
|
|
974
|
+
)
|
|
975
|
+
elem_depths[t_iID] = e_depth
|
|
976
|
+
|
|
977
|
+
# TODO: i_idx should be either MAX or the iteration ID, which will index into
|
|
978
|
+
# a separate array to get the formatted loop indices e.g.
|
|
979
|
+
# ("outer_loop_0_inner_loop_9")
|
|
980
|
+
i_depth = 1
|
|
981
|
+
if num_iters_i == 1:
|
|
982
|
+
i_idx = max_u32
|
|
983
|
+
elif num_iters_i > MAX_ITERS_PER_DIR:
|
|
984
|
+
if (i_depth := iter_depths.get(e_id, -1)) == -1:
|
|
985
|
+
i_depth = int(
|
|
986
|
+
np.ceil(np.log(num_iters_i) / np.log(MAX_ITERS_PER_DIR))
|
|
987
|
+
)
|
|
988
|
+
iter_depths[e_id] = i_depth
|
|
989
|
+
|
|
990
|
+
a_idx = max_u8 # TODO: for now, always exclude action index dir
|
|
991
|
+
|
|
992
|
+
if num_runs_i == 1:
|
|
993
|
+
r_idx = max_u8
|
|
994
|
+
|
|
995
|
+
if wk_num_tasks == 1:
|
|
996
|
+
t_iID = max_u8
|
|
997
|
+
|
|
998
|
+
run_dir_arr[idx] = (t_iID, e_idx, i_idx, a_idx, r_idx, e_depth, i_depth)
|
|
999
|
+
|
|
1000
|
+
return run_dir_arr, run_ids
|
|
1001
|
+
|
|
1002
|
+
@TimeIt.decorator
|
|
1003
|
+
def _write_execute_dirs(
|
|
1004
|
+
self,
|
|
1005
|
+
run_indices: NDArray,
|
|
1006
|
+
run_inp_files: dict[int, list[Path]],
|
|
1007
|
+
cache: ObjectCache,
|
|
1008
|
+
status: Status | None = None,
|
|
1009
|
+
):
|
|
1010
|
+
|
|
1011
|
+
if status:
|
|
1012
|
+
status.update("Adding new submission: resolving execution directories...")
|
|
1013
|
+
|
|
1014
|
+
run_dir_arr, run_idx = self._calculate_run_dir_indices(run_indices, cache)
|
|
1015
|
+
|
|
1016
|
+
# set run dirs in persistent array:
|
|
1017
|
+
if run_idx.size:
|
|
1018
|
+
self.workflow._store.set_run_dirs(run_dir_arr, run_idx)
|
|
1019
|
+
|
|
1020
|
+
# retrieve run directories as paths. array is not yet commited, so pass in
|
|
1021
|
+
# directly:
|
|
1022
|
+
run_dirs = self.workflow.get_run_directories(dir_indices_arr=run_dir_arr)
|
|
1023
|
+
|
|
1024
|
+
if status:
|
|
1025
|
+
status.update("Adding new submission: making execution directories...")
|
|
1026
|
+
|
|
1027
|
+
# make directories
|
|
1028
|
+
for idx, run_dir in enumerate(run_dirs):
|
|
1029
|
+
assert run_dir
|
|
1030
|
+
run_dir.mkdir(parents=True, exist_ok=True)
|
|
1031
|
+
inp_files_i = run_inp_files.get(run_idx[idx])
|
|
1032
|
+
if inp_files_i:
|
|
1033
|
+
# copy (TODO: optionally symlink) any input files:
|
|
1034
|
+
for path_i in inp_files_i:
|
|
1035
|
+
shutil.copy(path_i, run_dir)
|
|
1036
|
+
|
|
1037
|
+
@staticmethod
|
|
1038
|
+
def get_unique_schedulers_of_jobscripts(
|
|
1039
|
+
jobscripts: Iterable[Jobscript],
|
|
1040
|
+
) -> Iterable[tuple[tuple[tuple[int, int], ...], Scheduler]]:
|
|
1041
|
+
"""Get unique schedulers and which of the passed jobscripts they correspond to.
|
|
1042
|
+
|
|
1043
|
+
Uniqueness is determined only by the `QueuedScheduler.unique_properties` tuple.
|
|
1044
|
+
|
|
1045
|
+
Parameters
|
|
1046
|
+
----------
|
|
1047
|
+
jobscripts: list[~hpcflow.app.Jobscript]
|
|
1048
|
+
|
|
1049
|
+
Returns
|
|
1050
|
+
-------
|
|
1051
|
+
scheduler_mapping
|
|
1052
|
+
Mapping where keys are a sequence of jobscript index descriptors and
|
|
1053
|
+
the values are the scheduler to use for that jobscript.
|
|
1054
|
+
A jobscript index descriptor is a pair of the submission index and the main
|
|
1055
|
+
jobscript index.
|
|
1056
|
+
"""
|
|
1057
|
+
js_idx: list[list[tuple[int, int]]] = []
|
|
1058
|
+
schedulers: list[Scheduler] = []
|
|
1059
|
+
|
|
1060
|
+
# list of tuples of scheduler properties we consider to determine "uniqueness",
|
|
1061
|
+
# with the first string being the scheduler type (class name):
|
|
1062
|
+
seen_schedulers: dict[tuple, int] = {}
|
|
1063
|
+
|
|
1064
|
+
for js in jobscripts:
|
|
1065
|
+
if (
|
|
1066
|
+
sched_idx := seen_schedulers.get(key := js.scheduler.unique_properties)
|
|
1067
|
+
) is None:
|
|
1068
|
+
seen_schedulers[key] = sched_idx = len(seen_schedulers) - 1
|
|
1069
|
+
schedulers.append(js.scheduler)
|
|
1070
|
+
js_idx.append([])
|
|
1071
|
+
js_idx[sched_idx].append((js.submission.index, js.index))
|
|
1072
|
+
|
|
1073
|
+
return zip(map(tuple, js_idx), schedulers)
|
|
1074
|
+
|
|
1075
|
+
@property
|
|
1076
|
+
@TimeIt.decorator
|
|
1077
|
+
def _unique_schedulers(
|
|
1078
|
+
self,
|
|
1079
|
+
) -> Iterable[tuple[tuple[tuple[int, int], ...], Scheduler]]:
|
|
1080
|
+
return self.get_unique_schedulers_of_jobscripts(self.jobscripts)
|
|
1081
|
+
|
|
1082
|
+
@TimeIt.decorator
|
|
1083
|
+
def get_unique_schedulers(self) -> Mapping[tuple[tuple[int, int], ...], Scheduler]:
|
|
1084
|
+
"""Get unique schedulers and which of this submission's jobscripts they
|
|
1085
|
+
correspond to.
|
|
1086
|
+
|
|
1087
|
+
Returns
|
|
1088
|
+
-------
|
|
1089
|
+
scheduler_mapping
|
|
1090
|
+
Mapping where keys are a sequence of jobscript index descriptors and
|
|
1091
|
+
the values are the scheduler to use for that jobscript.
|
|
1092
|
+
A jobscript index descriptor is a pair of the submission index and the main
|
|
1093
|
+
jobscript index.
|
|
1094
|
+
"""
|
|
1095
|
+
# This is an absurd type; you never use the key as a key
|
|
1096
|
+
return dict(self._unique_schedulers)
|
|
1097
|
+
|
|
1098
|
+
@TimeIt.decorator
|
|
1099
|
+
def get_unique_shells(self) -> Iterable[tuple[tuple[int, ...], Shell]]:
|
|
1100
|
+
"""Get unique shells and which jobscripts they correspond to."""
|
|
1101
|
+
js_idx: list[list[int]] = []
|
|
1102
|
+
shells: list[Shell] = []
|
|
1103
|
+
|
|
1104
|
+
for js in self.jobscripts:
|
|
1105
|
+
if js.shell not in shells:
|
|
1106
|
+
shells.append(js.shell)
|
|
1107
|
+
js_idx.append([])
|
|
1108
|
+
shell_idx = shells.index(js.shell)
|
|
1109
|
+
js_idx[shell_idx].append(js.index)
|
|
1110
|
+
|
|
1111
|
+
return zip(map(tuple, js_idx), shells)
|
|
1112
|
+
|
|
1113
|
+
def _update_at_submit_metadata(self, submission_parts: dict[str, list[int]]):
|
|
1114
|
+
"""Update persistent store and in-memory record of at-submit metadata.
|
|
1115
|
+
|
|
1116
|
+
Notes
|
|
1117
|
+
-----
|
|
1118
|
+
Currently there is only one type of at-submit metadata, which is the
|
|
1119
|
+
submission-parts: a mapping between a string submit-time, and the list of
|
|
1120
|
+
jobscript indices that were submitted at that submit-time. This method updates
|
|
1121
|
+
the recorded submission parts to include those passed here.
|
|
1122
|
+
|
|
1123
|
+
"""
|
|
1124
|
+
|
|
1125
|
+
self.workflow._store.update_at_submit_metadata(
|
|
1126
|
+
sub_idx=self.index,
|
|
1127
|
+
submission_parts=submission_parts,
|
|
1128
|
+
)
|
|
1129
|
+
|
|
1130
|
+
self._at_submit_metadata["submission_parts"].update(submission_parts)
|
|
1131
|
+
|
|
1132
|
+
# cache is now invalid:
|
|
1133
|
+
self._submission_parts_lst = None
|
|
1134
|
+
|
|
1135
|
+
def _append_submission_part(self, submit_time: str, submitted_js_idx: list[int]):
|
|
1136
|
+
self._update_at_submit_metadata(submission_parts={submit_time: submitted_js_idx})
|
|
1137
|
+
|
|
1138
|
+
def get_jobscript_functions_name(self, shell: Shell, shell_idx: int) -> str:
|
|
1139
|
+
"""Get the name of the jobscript functions file for the specified shell."""
|
|
1140
|
+
return f"js_funcs_{shell_idx}{shell.JS_EXT}"
|
|
1141
|
+
|
|
1142
|
+
def get_jobscript_functions_path(self, shell: Shell, shell_idx: int) -> Path:
|
|
1143
|
+
"""Get the path of the jobscript functions file for the specified shell."""
|
|
1144
|
+
return self.js_funcs_path / self.get_jobscript_functions_name(shell, shell_idx)
|
|
1145
|
+
|
|
1146
|
+
def _compose_functions_file(self, shell: Shell) -> str:
|
|
1147
|
+
"""Prepare the contents of the jobscript functions file for the specified
|
|
1148
|
+
shell.
|
|
1149
|
+
|
|
1150
|
+
Notes
|
|
1151
|
+
-----
|
|
1152
|
+
The functions file includes, at a minimum, a shell function that invokes the app
|
|
1153
|
+
with provided arguments. This file will be sourced/invoked within all jobscripts
|
|
1154
|
+
and command files that share the specified shell.
|
|
1155
|
+
|
|
1156
|
+
"""
|
|
1157
|
+
|
|
1158
|
+
cfg_invocation = self._app.config._file.get_invocation(
|
|
1159
|
+
self._app.config._config_key
|
|
1160
|
+
)
|
|
1161
|
+
env_setup = cfg_invocation["environment_setup"]
|
|
1162
|
+
if env_setup:
|
|
1163
|
+
env_setup = indent(env_setup.strip(), shell.JS_ENV_SETUP_INDENT)
|
|
1164
|
+
env_setup += "\n\n" + shell.JS_ENV_SETUP_INDENT
|
|
1165
|
+
else:
|
|
1166
|
+
env_setup = shell.JS_ENV_SETUP_INDENT
|
|
1167
|
+
app_invoc = list(self._app.run_time_info.invocation_command)
|
|
1168
|
+
|
|
1169
|
+
app_caps = self._app.package_name.upper()
|
|
1170
|
+
func_file_args = shell.process_JS_header_args( # TODO: rename?
|
|
1171
|
+
{
|
|
1172
|
+
"workflow_app_alias": self.WORKFLOW_APP_ALIAS,
|
|
1173
|
+
"env_setup": env_setup,
|
|
1174
|
+
"app_invoc": app_invoc,
|
|
1175
|
+
"app_caps": app_caps,
|
|
1176
|
+
"config_dir": str(self._app.config.config_directory),
|
|
1177
|
+
"config_invoc_key": self._app.config.config_key,
|
|
1178
|
+
}
|
|
1179
|
+
)
|
|
1180
|
+
out = shell.JS_FUNCS.format(**func_file_args)
|
|
1181
|
+
return out
|
|
1182
|
+
|
|
1183
|
+
def _write_functions_file(self, shell: Shell, shell_idx: int) -> None:
|
|
1184
|
+
"""Write the jobscript functions file for the specified shell.
|
|
1185
|
+
|
|
1186
|
+
Notes
|
|
1187
|
+
-----
|
|
1188
|
+
The functions file includes, at a minimum, a shell function that invokes the app
|
|
1189
|
+
with provided arguments. This file will be sourced/invoked within all jobscripts
|
|
1190
|
+
and command files that share the specified shell.
|
|
1191
|
+
|
|
1192
|
+
"""
|
|
1193
|
+
js_funcs_str = self._compose_functions_file(shell)
|
|
1194
|
+
path = self.get_jobscript_functions_path(shell, shell_idx)
|
|
1195
|
+
with path.open("wt", newline="\n") as fp:
|
|
1196
|
+
fp.write(js_funcs_str)
|
|
1197
|
+
|
|
1198
|
+
@TimeIt.decorator
|
|
1199
|
+
def submit(
|
|
1200
|
+
self,
|
|
1201
|
+
status: Status | None,
|
|
1202
|
+
ignore_errors: bool = False,
|
|
1203
|
+
print_stdout: bool = False,
|
|
1204
|
+
add_to_known: bool = True,
|
|
1205
|
+
) -> list[int]:
|
|
1206
|
+
"""Generate and submit the jobscripts of this submission."""
|
|
1207
|
+
|
|
1208
|
+
# TODO: support passing list of jobscript indices to submit; this will allow us
|
|
1209
|
+
# to test a submision with multiple "submission parts". would also need to check
|
|
1210
|
+
# dependencies if this customised list is passed
|
|
1211
|
+
|
|
1212
|
+
outstanding = self.outstanding_jobscripts
|
|
1213
|
+
|
|
1214
|
+
# get scheduler, shell and OS version information (also an opportunity to fail
|
|
1215
|
+
# before trying to submit jobscripts):
|
|
1216
|
+
js_vers_info: dict[int, dict[str, str | list[str]]] = {}
|
|
1217
|
+
for js_indices, sched in self._unique_schedulers:
|
|
1218
|
+
try:
|
|
1219
|
+
vers_info = sched.get_version_info()
|
|
1220
|
+
except Exception:
|
|
1221
|
+
if not ignore_errors:
|
|
1222
|
+
raise
|
|
1223
|
+
vers_info = {}
|
|
1224
|
+
for _, js_idx in js_indices:
|
|
1225
|
+
if js_idx in outstanding:
|
|
1226
|
+
js_vers_info.setdefault(js_idx, {}).update(vers_info)
|
|
1227
|
+
|
|
1228
|
+
js_shell_indices = {}
|
|
1229
|
+
for shell_idx, (js_indices_2, shell) in enumerate(self.get_unique_shells()):
|
|
1230
|
+
try:
|
|
1231
|
+
vers_info = shell.get_version_info()
|
|
1232
|
+
except Exception:
|
|
1233
|
+
if not ignore_errors:
|
|
1234
|
+
raise
|
|
1235
|
+
vers_info = {}
|
|
1236
|
+
for js_idx in js_indices_2:
|
|
1237
|
+
if js_idx in outstanding:
|
|
1238
|
+
js_vers_info.setdefault(js_idx, {}).update(vers_info)
|
|
1239
|
+
js_shell_indices[js_idx] = shell_idx
|
|
1240
|
+
|
|
1241
|
+
# write a file containing useful shell functions:
|
|
1242
|
+
self._write_functions_file(shell, shell_idx)
|
|
1243
|
+
|
|
1244
|
+
hostname = socket.gethostname()
|
|
1245
|
+
machine = self._app.config.get("machine")
|
|
1246
|
+
for js_idx, vers_info_i in js_vers_info.items():
|
|
1247
|
+
js = self.jobscripts[js_idx]
|
|
1248
|
+
js._set_version_info(vers_info_i)
|
|
1249
|
+
js._set_submit_hostname(hostname)
|
|
1250
|
+
js._set_submit_machine(machine)
|
|
1251
|
+
js._set_shell_idx(js_shell_indices[js_idx])
|
|
1252
|
+
|
|
1253
|
+
self.workflow._store._pending.commit_all()
|
|
1254
|
+
|
|
1255
|
+
# map jobscript `index` to (scheduler job ID or process ID, is_array):
|
|
1256
|
+
scheduler_refs: dict[int, tuple[str, bool]] = {}
|
|
1257
|
+
submitted_js_idx: list[int] = []
|
|
1258
|
+
errs: list[JobscriptSubmissionFailure] = []
|
|
1259
|
+
for js in self.jobscripts:
|
|
1260
|
+
# check not previously submitted:
|
|
1261
|
+
if js.index not in outstanding:
|
|
1262
|
+
continue
|
|
1263
|
+
|
|
1264
|
+
# check all dependencies were submitted now or previously:
|
|
1265
|
+
if not all(
|
|
1266
|
+
js_idx in submitted_js_idx or js_idx in self.submitted_jobscripts
|
|
1267
|
+
for js_idx, _ in js.dependencies
|
|
1268
|
+
):
|
|
1269
|
+
warnings.warn(
|
|
1270
|
+
f"Cannot submit jobscript index {js.index} since not all of its "
|
|
1271
|
+
f"dependencies have been submitted: {js.dependencies!r}"
|
|
1272
|
+
)
|
|
1273
|
+
continue
|
|
1274
|
+
|
|
1275
|
+
try:
|
|
1276
|
+
if status:
|
|
1277
|
+
status.update(
|
|
1278
|
+
f"Submitting jobscript {js.index + 1}/{len(self.jobscripts)}..."
|
|
1279
|
+
)
|
|
1280
|
+
js_ref_i = js.submit(scheduler_refs, print_stdout=print_stdout)
|
|
1281
|
+
scheduler_refs[js.index] = (js_ref_i, js.is_array)
|
|
1282
|
+
submitted_js_idx.append(js.index)
|
|
1283
|
+
|
|
1284
|
+
except JobscriptSubmissionFailure as err:
|
|
1285
|
+
errs.append(err)
|
|
1286
|
+
continue
|
|
1287
|
+
|
|
1288
|
+
# TODO: some way to handle KeyboardInterrupt during submission?
|
|
1289
|
+
# - stop, and cancel already submitted?
|
|
1290
|
+
|
|
1291
|
+
if submitted_js_idx:
|
|
1292
|
+
dt_str = current_timestamp().strftime(self._app._submission_ts_fmt)
|
|
1293
|
+
self._append_submission_part(
|
|
1294
|
+
submit_time=dt_str,
|
|
1295
|
+
submitted_js_idx=submitted_js_idx,
|
|
1296
|
+
)
|
|
1297
|
+
# ensure `_submission_parts` is committed
|
|
1298
|
+
self.workflow._store._pending.commit_all()
|
|
1299
|
+
|
|
1300
|
+
# add a record of the submission part to the known-submissions file
|
|
1301
|
+
if add_to_known:
|
|
1302
|
+
self._app._add_to_known_submissions(
|
|
1303
|
+
wk_path=self.workflow.path,
|
|
1304
|
+
wk_id=self.workflow.id_,
|
|
1305
|
+
sub_idx=self.index,
|
|
1306
|
+
sub_time=dt_str,
|
|
1307
|
+
)
|
|
1308
|
+
|
|
1309
|
+
if errs and not ignore_errors:
|
|
1310
|
+
if status:
|
|
1311
|
+
status.stop()
|
|
1312
|
+
raise SubmissionFailure(self.index, submitted_js_idx, errs)
|
|
1313
|
+
|
|
1314
|
+
len_js = len(submitted_js_idx)
|
|
1315
|
+
print(f"Submitted {len_js} jobscript{'s' if len_js > 1 else ''}.")
|
|
1316
|
+
|
|
1317
|
+
return submitted_js_idx
|
|
1318
|
+
|
|
1319
|
+
@TimeIt.decorator
|
|
1320
|
+
def cancel(self) -> None:
|
|
1321
|
+
"""
|
|
1322
|
+
Cancel the active jobs for this submission's jobscripts.
|
|
1323
|
+
"""
|
|
1324
|
+
if not (act_js := self.get_active_jobscripts()):
|
|
1325
|
+
print("No active jobscripts to cancel.")
|
|
1326
|
+
return
|
|
1327
|
+
for js_indices, sched in self._unique_schedulers:
|
|
1328
|
+
# filter by active jobscripts:
|
|
1329
|
+
if js_idx := [i[1] for i in js_indices if i[1] in act_js]:
|
|
1330
|
+
print(
|
|
1331
|
+
f"Cancelling jobscripts {shorten_list_str(js_idx, items=5)} of "
|
|
1332
|
+
f"submission {self.index} of workflow {self.workflow.name!r}."
|
|
1333
|
+
)
|
|
1334
|
+
jobscripts = [self.jobscripts[i] for i in js_idx]
|
|
1335
|
+
sched_refs = [js.scheduler_js_ref for js in jobscripts]
|
|
1336
|
+
sched.cancel_jobs(js_refs=sched_refs, jobscripts=jobscripts)
|
|
1337
|
+
else:
|
|
1338
|
+
print("No active jobscripts to cancel.")
|
|
1339
|
+
|
|
1340
|
+
@TimeIt.decorator
|
|
1341
|
+
def get_scheduler_job_IDs(self) -> tuple[str, ...]:
|
|
1342
|
+
"""Return jobscript scheduler job IDs."""
|
|
1343
|
+
return tuple(
|
|
1344
|
+
js_i.scheduler_job_ID
|
|
1345
|
+
for js_i in self.jobscripts
|
|
1346
|
+
if js_i.scheduler_job_ID is not None
|
|
1347
|
+
)
|
|
1348
|
+
|
|
1349
|
+
@TimeIt.decorator
|
|
1350
|
+
def get_process_IDs(self) -> tuple[int, ...]:
|
|
1351
|
+
"""Return jobscript process IDs."""
|
|
1352
|
+
return tuple(
|
|
1353
|
+
js_i.process_ID for js_i in self.jobscripts if js_i.process_ID is not None
|
|
1354
|
+
)
|
|
1355
|
+
|
|
1356
|
+
@TimeIt.decorator
|
|
1357
|
+
def list_jobscripts(
|
|
1358
|
+
self,
|
|
1359
|
+
max_js: int | None = None,
|
|
1360
|
+
jobscripts: list[int] | None = None,
|
|
1361
|
+
width: int | None = None,
|
|
1362
|
+
) -> None:
|
|
1363
|
+
"""Print a table listing jobscripts and associated information.
|
|
1364
|
+
|
|
1365
|
+
Parameters
|
|
1366
|
+
----------
|
|
1367
|
+
max_js
|
|
1368
|
+
Maximum jobscript index to display. This cannot be specified with `jobscripts`.
|
|
1369
|
+
jobscripts
|
|
1370
|
+
A list of jobscripts to display. This cannot be specified with `max_js`.
|
|
1371
|
+
width
|
|
1372
|
+
Width in characters of the printed table.
|
|
1373
|
+
|
|
1374
|
+
"""
|
|
1375
|
+
self.workflow.list_jobscripts(
|
|
1376
|
+
sub_idx=self.index, max_js=max_js, jobscripts=jobscripts, width=width
|
|
1377
|
+
)
|
|
1378
|
+
|
|
1379
|
+
@TimeIt.decorator
|
|
1380
|
+
def list_task_jobscripts(
|
|
1381
|
+
self,
|
|
1382
|
+
task_names: list[str] | None = None,
|
|
1383
|
+
max_js: int | None = None,
|
|
1384
|
+
width: int | None = None,
|
|
1385
|
+
) -> None:
|
|
1386
|
+
"""Print a table listing the jobscripts associated with the specified (or all)
|
|
1387
|
+
tasks for the specified submission.
|
|
1388
|
+
|
|
1389
|
+
Parameters
|
|
1390
|
+
----------
|
|
1391
|
+
task_names
|
|
1392
|
+
List of sub-strings to match to task names. Only matching task names will be
|
|
1393
|
+
included.
|
|
1394
|
+
max_js
|
|
1395
|
+
Maximum jobscript index to display.
|
|
1396
|
+
width
|
|
1397
|
+
Width in characters of the printed table.
|
|
1398
|
+
|
|
1399
|
+
"""
|
|
1400
|
+
self.workflow.list_task_jobscripts(
|
|
1401
|
+
sub_idx=self.index, max_js=max_js, task_names=task_names, width=width
|
|
1402
|
+
)
|