hpcflow 0.1.9__py3-none-any.whl → 0.2.0a271__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__init__.py +2 -11
- hpcflow/__pyinstaller/__init__.py +5 -0
- hpcflow/__pyinstaller/hook-hpcflow.py +40 -0
- hpcflow/_version.py +1 -1
- hpcflow/app.py +43 -0
- hpcflow/cli.py +2 -462
- hpcflow/data/demo_data_manifest/__init__.py +3 -0
- hpcflow/data/demo_data_manifest/demo_data_manifest.json +6 -0
- hpcflow/data/jinja_templates/test/test_template.txt +8 -0
- hpcflow/data/programs/hello_world/README.md +1 -0
- hpcflow/data/programs/hello_world/hello_world.c +87 -0
- hpcflow/data/programs/hello_world/linux/hello_world +0 -0
- hpcflow/data/programs/hello_world/macos/hello_world +0 -0
- hpcflow/data/programs/hello_world/win/hello_world.exe +0 -0
- hpcflow/data/scripts/__init__.py +1 -0
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/demo_task_1_generate_t1_infile_1.py +8 -0
- hpcflow/data/scripts/demo_task_1_generate_t1_infile_2.py +8 -0
- hpcflow/data/scripts/demo_task_1_parse_p3.py +7 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/generate_t1_file_01.py +7 -0
- hpcflow/data/scripts/import_future_script.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_all_iters_test.py +15 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_env_spec.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_labels.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_sub_param_in_direct_out.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_group.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +11 -0
- hpcflow/data/scripts/main_script_test_json_and_direct_in_json_out.py +14 -0
- hpcflow/data/scripts/main_script_test_json_in_json_and_direct_out.py +17 -0
- hpcflow/data/scripts/main_script_test_json_in_json_out.py +14 -0
- hpcflow/data/scripts/main_script_test_json_in_json_out_labels.py +16 -0
- hpcflow/data/scripts/main_script_test_json_in_obj.py +12 -0
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_json_out_obj.py +10 -0
- hpcflow/data/scripts/main_script_test_json_sub_param_in_json_out_labels.py +16 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/parse_t1_file_01.py +4 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/__init__.py +1 -0
- hpcflow/data/template_components/command_files.yaml +26 -0
- hpcflow/data/template_components/environments.yaml +13 -0
- hpcflow/data/template_components/parameters.yaml +14 -0
- hpcflow/data/template_components/task_schemas.yaml +139 -0
- hpcflow/data/workflows/workflow_1.yaml +5 -0
- hpcflow/examples.ipynb +1037 -0
- hpcflow/sdk/__init__.py +149 -0
- hpcflow/sdk/app.py +4266 -0
- hpcflow/sdk/cli.py +1479 -0
- hpcflow/sdk/cli_common.py +385 -0
- hpcflow/sdk/config/__init__.py +5 -0
- hpcflow/sdk/config/callbacks.py +246 -0
- hpcflow/sdk/config/cli.py +388 -0
- hpcflow/sdk/config/config.py +1410 -0
- hpcflow/sdk/config/config_file.py +501 -0
- hpcflow/sdk/config/errors.py +272 -0
- hpcflow/sdk/config/types.py +150 -0
- hpcflow/sdk/core/__init__.py +38 -0
- hpcflow/sdk/core/actions.py +3857 -0
- hpcflow/sdk/core/app_aware.py +25 -0
- hpcflow/sdk/core/cache.py +224 -0
- hpcflow/sdk/core/command_files.py +814 -0
- hpcflow/sdk/core/commands.py +424 -0
- hpcflow/sdk/core/element.py +2071 -0
- hpcflow/sdk/core/enums.py +221 -0
- hpcflow/sdk/core/environment.py +256 -0
- hpcflow/sdk/core/errors.py +1043 -0
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/json_like.py +809 -0
- hpcflow/sdk/core/loop.py +1320 -0
- hpcflow/sdk/core/loop_cache.py +282 -0
- hpcflow/sdk/core/object_list.py +933 -0
- hpcflow/sdk/core/parameters.py +3371 -0
- hpcflow/sdk/core/rule.py +196 -0
- hpcflow/sdk/core/run_dir_files.py +57 -0
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +3792 -0
- hpcflow/sdk/core/task_schema.py +993 -0
- hpcflow/sdk/core/test_utils.py +538 -0
- hpcflow/sdk/core/types.py +447 -0
- hpcflow/sdk/core/utils.py +1207 -0
- hpcflow/sdk/core/validation.py +87 -0
- hpcflow/sdk/core/values.py +477 -0
- hpcflow/sdk/core/workflow.py +4820 -0
- hpcflow/sdk/core/zarr_io.py +206 -0
- hpcflow/sdk/data/__init__.py +13 -0
- hpcflow/sdk/data/config_file_schema.yaml +34 -0
- hpcflow/sdk/data/config_schema.yaml +260 -0
- hpcflow/sdk/data/environments_spec_schema.yaml +21 -0
- hpcflow/sdk/data/files_spec_schema.yaml +5 -0
- hpcflow/sdk/data/parameters_spec_schema.yaml +7 -0
- hpcflow/sdk/data/task_schema_spec_schema.yaml +3 -0
- hpcflow/sdk/data/workflow_spec_schema.yaml +22 -0
- hpcflow/sdk/demo/__init__.py +3 -0
- hpcflow/sdk/demo/cli.py +242 -0
- hpcflow/sdk/helper/__init__.py +3 -0
- hpcflow/sdk/helper/cli.py +137 -0
- hpcflow/sdk/helper/helper.py +300 -0
- hpcflow/sdk/helper/watcher.py +192 -0
- hpcflow/sdk/log.py +288 -0
- hpcflow/sdk/persistence/__init__.py +18 -0
- hpcflow/sdk/persistence/base.py +2817 -0
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +39 -0
- hpcflow/sdk/persistence/json.py +954 -0
- hpcflow/sdk/persistence/pending.py +948 -0
- hpcflow/sdk/persistence/store_resource.py +203 -0
- hpcflow/sdk/persistence/types.py +309 -0
- hpcflow/sdk/persistence/utils.py +73 -0
- hpcflow/sdk/persistence/zarr.py +2388 -0
- hpcflow/sdk/runtime.py +320 -0
- hpcflow/sdk/submission/__init__.py +3 -0
- hpcflow/sdk/submission/enums.py +70 -0
- hpcflow/sdk/submission/jobscript.py +2379 -0
- hpcflow/sdk/submission/schedulers/__init__.py +281 -0
- hpcflow/sdk/submission/schedulers/direct.py +233 -0
- hpcflow/sdk/submission/schedulers/sge.py +376 -0
- hpcflow/sdk/submission/schedulers/slurm.py +598 -0
- hpcflow/sdk/submission/schedulers/utils.py +25 -0
- hpcflow/sdk/submission/shells/__init__.py +52 -0
- hpcflow/sdk/submission/shells/base.py +229 -0
- hpcflow/sdk/submission/shells/bash.py +504 -0
- hpcflow/sdk/submission/shells/os_version.py +115 -0
- hpcflow/sdk/submission/shells/powershell.py +352 -0
- hpcflow/sdk/submission/submission.py +1402 -0
- hpcflow/sdk/submission/types.py +140 -0
- hpcflow/sdk/typing.py +194 -0
- hpcflow/sdk/utils/arrays.py +69 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +31 -0
- hpcflow/sdk/utils/strings.py +69 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +123 -0
- hpcflow/tests/data/__init__.py +0 -0
- hpcflow/tests/data/benchmark_N_elements.yaml +6 -0
- hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_1.json +10 -0
- hpcflow/tests/data/workflow_1.yaml +5 -0
- hpcflow/tests/data/workflow_1_slurm.yaml +8 -0
- hpcflow/tests/data/workflow_1_wsl.yaml +8 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +42 -0
- hpcflow/tests/jinja_templates/test_jinja_templates.py +161 -0
- hpcflow/tests/programs/test_programs.py +180 -0
- hpcflow/tests/schedulers/direct_linux/test_direct_linux_submission.py +12 -0
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +14 -0
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +1361 -0
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +14 -0
- hpcflow/tests/unit/test_action.py +1066 -0
- hpcflow/tests/unit/test_action_rule.py +24 -0
- hpcflow/tests/unit/test_app.py +132 -0
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +172 -0
- hpcflow/tests/unit/test_command.py +377 -0
- hpcflow/tests/unit/test_config.py +195 -0
- hpcflow/tests/unit/test_config_file.py +162 -0
- hpcflow/tests/unit/test_element.py +666 -0
- hpcflow/tests/unit/test_element_iteration.py +88 -0
- hpcflow/tests/unit/test_element_set.py +158 -0
- hpcflow/tests/unit/test_group.py +115 -0
- hpcflow/tests/unit/test_input_source.py +1479 -0
- hpcflow/tests/unit/test_input_value.py +398 -0
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_json_like.py +1247 -0
- hpcflow/tests/unit/test_loop.py +2674 -0
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +259 -0
- hpcflow/tests/unit/test_object_list.py +116 -0
- hpcflow/tests/unit/test_parameter.py +243 -0
- hpcflow/tests/unit/test_persistence.py +664 -0
- hpcflow/tests/unit/test_resources.py +243 -0
- hpcflow/tests/unit/test_run.py +286 -0
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_runtime.py +9 -0
- hpcflow/tests/unit/test_schema_input.py +372 -0
- hpcflow/tests/unit/test_shell.py +129 -0
- hpcflow/tests/unit/test_slurm.py +39 -0
- hpcflow/tests/unit/test_submission.py +502 -0
- hpcflow/tests/unit/test_task.py +2560 -0
- hpcflow/tests/unit/test_task_schema.py +182 -0
- hpcflow/tests/unit/test_utils.py +616 -0
- hpcflow/tests/unit/test_value_sequence.py +549 -0
- hpcflow/tests/unit/test_values.py +91 -0
- hpcflow/tests/unit/test_workflow.py +827 -0
- hpcflow/tests/unit/test_workflow_template.py +186 -0
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/unit/utils/test_strings.py +97 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +355 -0
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +564 -0
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6794 -0
- hpcflow-0.2.0a271.dist-info/LICENSE +375 -0
- hpcflow-0.2.0a271.dist-info/METADATA +65 -0
- hpcflow-0.2.0a271.dist-info/RECORD +237 -0
- {hpcflow-0.1.9.dist-info → hpcflow-0.2.0a271.dist-info}/WHEEL +4 -5
- hpcflow-0.2.0a271.dist-info/entry_points.txt +6 -0
- hpcflow/api.py +0 -458
- hpcflow/archive/archive.py +0 -308
- hpcflow/archive/cloud/cloud.py +0 -47
- hpcflow/archive/cloud/errors.py +0 -9
- hpcflow/archive/cloud/providers/dropbox.py +0 -432
- hpcflow/archive/errors.py +0 -5
- hpcflow/base_db.py +0 -4
- hpcflow/config.py +0 -232
- hpcflow/copytree.py +0 -66
- hpcflow/data/examples/_config.yml +0 -14
- hpcflow/data/examples/damask/demo/1.run.yml +0 -4
- hpcflow/data/examples/damask/demo/2.process.yml +0 -29
- hpcflow/data/examples/damask/demo/geom.geom +0 -2052
- hpcflow/data/examples/damask/demo/load.load +0 -1
- hpcflow/data/examples/damask/demo/material.config +0 -185
- hpcflow/data/examples/damask/inputs/geom.geom +0 -2052
- hpcflow/data/examples/damask/inputs/load.load +0 -1
- hpcflow/data/examples/damask/inputs/material.config +0 -185
- hpcflow/data/examples/damask/profiles/_variable_lookup.yml +0 -21
- hpcflow/data/examples/damask/profiles/damask.yml +0 -4
- hpcflow/data/examples/damask/profiles/damask_process.yml +0 -8
- hpcflow/data/examples/damask/profiles/damask_run.yml +0 -5
- hpcflow/data/examples/damask/profiles/default.yml +0 -6
- hpcflow/data/examples/thinking.yml +0 -177
- hpcflow/errors.py +0 -2
- hpcflow/init_db.py +0 -37
- hpcflow/models.py +0 -2549
- hpcflow/nesting.py +0 -9
- hpcflow/profiles.py +0 -455
- hpcflow/project.py +0 -81
- hpcflow/scheduler.py +0 -323
- hpcflow/utils.py +0 -103
- hpcflow/validation.py +0 -167
- hpcflow/variables.py +0 -544
- hpcflow-0.1.9.dist-info/METADATA +0 -168
- hpcflow-0.1.9.dist-info/RECORD +0 -45
- hpcflow-0.1.9.dist-info/entry_points.txt +0 -8
- hpcflow-0.1.9.dist-info/top_level.txt +0 -1
- /hpcflow/{archive → data/jinja_templates}/__init__.py +0 -0
- /hpcflow/{archive/cloud → data/programs}/__init__.py +0 -0
- /hpcflow/{archive/cloud/providers → data/workflows}/__init__.py +0 -0
hpcflow/sdk/demo/cli.py
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI components for demonstration code.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from random import randint
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
import click
|
|
10
|
+
|
|
11
|
+
from hpcflow.sdk.core.workflow import Workflow
|
|
12
|
+
from hpcflow.sdk.core.utils import get_process_stamp
|
|
13
|
+
from hpcflow.sdk.cli_common import (
|
|
14
|
+
format_option,
|
|
15
|
+
path_option,
|
|
16
|
+
name_option,
|
|
17
|
+
name_timestamp_option,
|
|
18
|
+
name_dir_option,
|
|
19
|
+
overwrite_option,
|
|
20
|
+
store_option,
|
|
21
|
+
ts_fmt_option,
|
|
22
|
+
ts_name_fmt_option,
|
|
23
|
+
variables_option,
|
|
24
|
+
js_parallelism_option,
|
|
25
|
+
wait_option,
|
|
26
|
+
add_to_known_opt,
|
|
27
|
+
print_idx_opt,
|
|
28
|
+
tasks_opt,
|
|
29
|
+
cancel_opt,
|
|
30
|
+
submit_status_opt,
|
|
31
|
+
make_status_opt,
|
|
32
|
+
add_sub_opt,
|
|
33
|
+
)
|
|
34
|
+
from hpcflow.sdk.submission.submission import Submission
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from collections.abc import Iterable
|
|
38
|
+
from typing import Literal
|
|
39
|
+
from ..app import BaseApp
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_demo_software_CLI(app: BaseApp):
|
|
43
|
+
"""Generate the CLI to provide an example software."""
|
|
44
|
+
|
|
45
|
+
@click.group()
|
|
46
|
+
def demo_software():
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
@demo_software.command("doSomething")
|
|
50
|
+
@click.option("--infile1", "-i1", type=click.Path(exists=True), required=True)
|
|
51
|
+
@click.option("--infile2", "-i2", type=click.Path(exists=True), required=True)
|
|
52
|
+
@click.option("--value", "-v")
|
|
53
|
+
@click.option("--out", "-o")
|
|
54
|
+
def demo_do_something(
|
|
55
|
+
infile1: Path, infile2: Path, value: str | None = None, out: str | None = None
|
|
56
|
+
):
|
|
57
|
+
click.echo("trying to do something")
|
|
58
|
+
|
|
59
|
+
with Path(infile1).open("r") as handle:
|
|
60
|
+
file_id_1 = int(handle.readline().strip())
|
|
61
|
+
with Path(infile2).open("r") as handle:
|
|
62
|
+
file_id_2 = int(handle.readline().strip())
|
|
63
|
+
|
|
64
|
+
if out is None:
|
|
65
|
+
out = "outfile.txt"
|
|
66
|
+
out_path = Path(out)
|
|
67
|
+
with out_path.open("a") as handle:
|
|
68
|
+
handle.write("{}\n".format(randint(0, int(1e6))))
|
|
69
|
+
handle.write(
|
|
70
|
+
"{} Generated by `doSomething --infile1 {} --infile2 {}`.\n".format(
|
|
71
|
+
get_process_stamp(), infile1, infile2
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
if value:
|
|
75
|
+
handle.write("{} Value: {}\n".format(get_process_stamp(), value))
|
|
76
|
+
handle.write(
|
|
77
|
+
"{} Original file ID: {}: {}\n".format(
|
|
78
|
+
get_process_stamp(), infile1, file_id_1
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
handle.write(
|
|
82
|
+
"{} Original file ID: {}: {}\n".format(
|
|
83
|
+
get_process_stamp(), infile2, file_id_2
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
return demo_software
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_demo_workflow_CLI(app: BaseApp):
|
|
91
|
+
"""Generate the CLI to provide access to builtin demo workflows."""
|
|
92
|
+
|
|
93
|
+
def list_callback(ctx: click.Context, param, value: bool):
|
|
94
|
+
if not value or ctx.resilient_parsing:
|
|
95
|
+
return
|
|
96
|
+
# TODO: format with Rich with a one-line description
|
|
97
|
+
click.echo("\n".join(app.list_demo_workflows()))
|
|
98
|
+
ctx.exit()
|
|
99
|
+
|
|
100
|
+
@click.group()
|
|
101
|
+
@click.option(
|
|
102
|
+
"-l",
|
|
103
|
+
"--list",
|
|
104
|
+
help="Print available builtin demo workflows.",
|
|
105
|
+
is_flag=True,
|
|
106
|
+
is_eager=True,
|
|
107
|
+
expose_value=False,
|
|
108
|
+
callback=list_callback,
|
|
109
|
+
)
|
|
110
|
+
def demo_workflow():
|
|
111
|
+
"""Interact with builtin demo workflows."""
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
@demo_workflow.command("make")
|
|
115
|
+
@click.argument("workflow_name")
|
|
116
|
+
@format_option
|
|
117
|
+
@path_option
|
|
118
|
+
@name_option
|
|
119
|
+
@name_timestamp_option
|
|
120
|
+
@name_dir_option
|
|
121
|
+
@overwrite_option
|
|
122
|
+
@store_option
|
|
123
|
+
@ts_fmt_option
|
|
124
|
+
@ts_name_fmt_option
|
|
125
|
+
@variables_option
|
|
126
|
+
@make_status_opt
|
|
127
|
+
@add_sub_opt
|
|
128
|
+
def make_demo_workflow(
|
|
129
|
+
workflow_name: str,
|
|
130
|
+
format: Literal["json", "yaml"] | None,
|
|
131
|
+
path: Path | None,
|
|
132
|
+
name: str | None,
|
|
133
|
+
name_add_timestamp: bool | None,
|
|
134
|
+
name_use_dir: bool | None,
|
|
135
|
+
overwrite: bool,
|
|
136
|
+
store: str,
|
|
137
|
+
ts_fmt: str | None = None,
|
|
138
|
+
ts_name_fmt: str | None = None,
|
|
139
|
+
variables: Iterable[tuple[str, str]] = (),
|
|
140
|
+
status: bool = True,
|
|
141
|
+
add_submission: bool = False,
|
|
142
|
+
):
|
|
143
|
+
wk_or_sub = app.make_demo_workflow(
|
|
144
|
+
workflow_name=workflow_name,
|
|
145
|
+
template_format=format,
|
|
146
|
+
path=path,
|
|
147
|
+
name=name,
|
|
148
|
+
name_add_timestamp=name_add_timestamp,
|
|
149
|
+
name_use_dir=name_use_dir,
|
|
150
|
+
overwrite=overwrite,
|
|
151
|
+
store=store,
|
|
152
|
+
ts_fmt=ts_fmt,
|
|
153
|
+
ts_name_fmt=ts_name_fmt,
|
|
154
|
+
variables=dict(variables),
|
|
155
|
+
status=status,
|
|
156
|
+
add_submission=add_submission,
|
|
157
|
+
)
|
|
158
|
+
if add_submission:
|
|
159
|
+
assert isinstance(wk_or_sub, Submission)
|
|
160
|
+
click.echo(wk_or_sub.workflow.path)
|
|
161
|
+
else:
|
|
162
|
+
assert isinstance(wk_or_sub, Workflow)
|
|
163
|
+
click.echo(wk_or_sub.path)
|
|
164
|
+
|
|
165
|
+
@demo_workflow.command("go")
|
|
166
|
+
@click.argument("workflow_name")
|
|
167
|
+
@format_option
|
|
168
|
+
@path_option
|
|
169
|
+
@name_option
|
|
170
|
+
@name_timestamp_option
|
|
171
|
+
@name_dir_option
|
|
172
|
+
@overwrite_option
|
|
173
|
+
@store_option
|
|
174
|
+
@ts_fmt_option
|
|
175
|
+
@ts_name_fmt_option
|
|
176
|
+
@variables_option
|
|
177
|
+
@js_parallelism_option
|
|
178
|
+
@wait_option
|
|
179
|
+
@add_to_known_opt
|
|
180
|
+
@print_idx_opt
|
|
181
|
+
@tasks_opt
|
|
182
|
+
@cancel_opt
|
|
183
|
+
@submit_status_opt
|
|
184
|
+
def make_and_submit_demo_workflow(
|
|
185
|
+
workflow_name: str,
|
|
186
|
+
format: Literal["json", "yaml"] | None,
|
|
187
|
+
path: Path | None,
|
|
188
|
+
name: str | None,
|
|
189
|
+
name_add_timestamp: bool | None,
|
|
190
|
+
name_use_dir: bool | None,
|
|
191
|
+
overwrite: bool,
|
|
192
|
+
store: str,
|
|
193
|
+
ts_fmt: str | None = None,
|
|
194
|
+
ts_name_fmt: str | None = None,
|
|
195
|
+
variables: Iterable[tuple[str, str]] = (),
|
|
196
|
+
js_parallelism: bool | None = None,
|
|
197
|
+
wait: bool = False,
|
|
198
|
+
add_to_known: bool = True,
|
|
199
|
+
print_idx: bool = False,
|
|
200
|
+
tasks: list[int] | None = None,
|
|
201
|
+
cancel: bool = False,
|
|
202
|
+
status: bool = True,
|
|
203
|
+
):
|
|
204
|
+
out = app.make_and_submit_demo_workflow(
|
|
205
|
+
workflow_name=workflow_name,
|
|
206
|
+
template_format=format,
|
|
207
|
+
path=path,
|
|
208
|
+
name=name,
|
|
209
|
+
name_add_timestamp=name_add_timestamp,
|
|
210
|
+
name_use_dir=name_use_dir,
|
|
211
|
+
overwrite=overwrite,
|
|
212
|
+
store=store,
|
|
213
|
+
ts_fmt=ts_fmt,
|
|
214
|
+
ts_name_fmt=ts_name_fmt,
|
|
215
|
+
variables=dict(variables),
|
|
216
|
+
JS_parallelism=js_parallelism,
|
|
217
|
+
wait=wait,
|
|
218
|
+
add_to_known=add_to_known,
|
|
219
|
+
return_idx=print_idx,
|
|
220
|
+
tasks=tasks,
|
|
221
|
+
cancel=cancel,
|
|
222
|
+
status=status,
|
|
223
|
+
)
|
|
224
|
+
if print_idx:
|
|
225
|
+
assert isinstance(out, tuple)
|
|
226
|
+
click.echo(out[1])
|
|
227
|
+
|
|
228
|
+
@demo_workflow.command("copy")
|
|
229
|
+
@click.argument("workflow_name")
|
|
230
|
+
@click.argument("destination")
|
|
231
|
+
@click.option("--doc/--no-doc", default=True)
|
|
232
|
+
def copy_demo_workflow(workflow_name: str, destination: str, doc: bool):
|
|
233
|
+
app.copy_demo_workflow(name=workflow_name, dst=destination, doc=doc)
|
|
234
|
+
|
|
235
|
+
@demo_workflow.command("show")
|
|
236
|
+
@click.argument("workflow_name")
|
|
237
|
+
@click.option("--syntax/--no-syntax", default=True)
|
|
238
|
+
@click.option("--doc/--no-doc", default=True)
|
|
239
|
+
def show_demo_workflow(workflow_name: str, syntax: bool, doc: bool):
|
|
240
|
+
app.show_demo_workflow(workflow_name, syntax=syntax, doc=doc)
|
|
241
|
+
|
|
242
|
+
return demo_workflow
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Common Click command line options related to the helper.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
|
|
10
|
+
from hpcflow.sdk.helper.helper import (
|
|
11
|
+
DEFAULT_TIMEOUT,
|
|
12
|
+
DEFAULT_TIMEOUT_CHECK,
|
|
13
|
+
DEFAULT_WATCH_INTERVAL,
|
|
14
|
+
get_helper_log_path,
|
|
15
|
+
get_watcher_file_path,
|
|
16
|
+
get_helper_watch_list,
|
|
17
|
+
start_helper,
|
|
18
|
+
stop_helper,
|
|
19
|
+
restart_helper,
|
|
20
|
+
clear_helper,
|
|
21
|
+
run_helper,
|
|
22
|
+
get_helper_PID,
|
|
23
|
+
get_helper_uptime,
|
|
24
|
+
)
|
|
25
|
+
from hpcflow.sdk.cli_common import _add_doc_from_help
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from ..app import BaseApp
|
|
29
|
+
|
|
30
|
+
#: Helper option: ``--timeout``
|
|
31
|
+
timeout_option = click.option(
|
|
32
|
+
"--timeout",
|
|
33
|
+
type=click.FLOAT,
|
|
34
|
+
default=DEFAULT_TIMEOUT,
|
|
35
|
+
show_default=True,
|
|
36
|
+
help="Helper timeout in seconds.",
|
|
37
|
+
)
|
|
38
|
+
#: Helper option: ``--timeout-check-interval``
|
|
39
|
+
timeout_check_interval_option = click.option(
|
|
40
|
+
"--timeout-check-interval",
|
|
41
|
+
type=click.FLOAT,
|
|
42
|
+
default=DEFAULT_TIMEOUT_CHECK,
|
|
43
|
+
show_default=True,
|
|
44
|
+
help="Interval between testing if the timeout has been exceeded in seconds.",
|
|
45
|
+
)
|
|
46
|
+
#: Helper option: ``--watch interval``
|
|
47
|
+
watch_interval_option = click.option(
|
|
48
|
+
"--watch-interval",
|
|
49
|
+
type=click.FLOAT,
|
|
50
|
+
default=DEFAULT_WATCH_INTERVAL,
|
|
51
|
+
show_default=True,
|
|
52
|
+
help=(
|
|
53
|
+
"Polling interval for watching workflows (and the workflow watch list) in "
|
|
54
|
+
"seconds."
|
|
55
|
+
),
|
|
56
|
+
)
|
|
57
|
+
_add_doc_from_help(timeout_option, timeout_check_interval_option, watch_interval_option)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_helper_CLI(app: BaseApp):
|
|
61
|
+
"""Generate the CLI to provide some server-like functionality."""
|
|
62
|
+
|
|
63
|
+
@click.group()
|
|
64
|
+
def helper():
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
@helper.command()
|
|
68
|
+
@timeout_option
|
|
69
|
+
@timeout_check_interval_option
|
|
70
|
+
@watch_interval_option
|
|
71
|
+
def start(timeout: float, timeout_check_interval: float, watch_interval: float):
|
|
72
|
+
"""Start the helper process."""
|
|
73
|
+
start_helper(app, timeout, timeout_check_interval, watch_interval)
|
|
74
|
+
|
|
75
|
+
@helper.command()
|
|
76
|
+
def stop():
|
|
77
|
+
"""Stop the helper process, if it is running."""
|
|
78
|
+
stop_helper(app)
|
|
79
|
+
|
|
80
|
+
@helper.command()
|
|
81
|
+
@timeout_option
|
|
82
|
+
@timeout_check_interval_option
|
|
83
|
+
@watch_interval_option
|
|
84
|
+
def run(timeout: float, timeout_check_interval: float, watch_interval: float):
|
|
85
|
+
"""Run the helper functionality."""
|
|
86
|
+
run_helper(app, timeout, timeout_check_interval, watch_interval)
|
|
87
|
+
|
|
88
|
+
@helper.command()
|
|
89
|
+
@timeout_option
|
|
90
|
+
@timeout_check_interval_option
|
|
91
|
+
@watch_interval_option
|
|
92
|
+
def restart(timeout: float, timeout_check_interval: float, watch_interval: float):
|
|
93
|
+
"""Restart (or start) the helper process."""
|
|
94
|
+
restart_helper(app, timeout, timeout_check_interval, watch_interval)
|
|
95
|
+
|
|
96
|
+
@helper.command()
|
|
97
|
+
@click.option("-f", "--file", is_flag=True)
|
|
98
|
+
def pid(file: bool):
|
|
99
|
+
"""Get the process ID of the running helper, if running."""
|
|
100
|
+
pid_info = get_helper_PID(app)
|
|
101
|
+
if pid_info:
|
|
102
|
+
pid, pid_file = pid_info
|
|
103
|
+
if file:
|
|
104
|
+
click.echo(f"{pid} ({str(pid_file)})")
|
|
105
|
+
else:
|
|
106
|
+
click.echo(pid)
|
|
107
|
+
|
|
108
|
+
@helper.command()
|
|
109
|
+
def clear() -> None:
|
|
110
|
+
"""Remove the PID file (and kill the helper process if it exists). This should not
|
|
111
|
+
normally be needed."""
|
|
112
|
+
clear_helper(app)
|
|
113
|
+
|
|
114
|
+
@helper.command()
|
|
115
|
+
def uptime() -> None:
|
|
116
|
+
"""Get the uptime of the helper process, if it is running."""
|
|
117
|
+
out = get_helper_uptime(app)
|
|
118
|
+
if out:
|
|
119
|
+
click.echo(out)
|
|
120
|
+
|
|
121
|
+
@helper.command()
|
|
122
|
+
def log_path() -> None:
|
|
123
|
+
"""Get the path to the helper log file (may not exist)."""
|
|
124
|
+
click.echo(get_helper_log_path(app))
|
|
125
|
+
|
|
126
|
+
@helper.command()
|
|
127
|
+
def watch_list_path() -> None:
|
|
128
|
+
"""Get the path to the workflow watch list file (may not exist)."""
|
|
129
|
+
click.echo(get_watcher_file_path(app))
|
|
130
|
+
|
|
131
|
+
@helper.command()
|
|
132
|
+
def watch_list() -> None:
|
|
133
|
+
"""Get the list of workflows currently being watched."""
|
|
134
|
+
for wk in get_helper_watch_list(app) or ():
|
|
135
|
+
click.echo(str(wk["path"]))
|
|
136
|
+
|
|
137
|
+
return helper
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Implementation of a helper process used to monitor jobs.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
import logging
|
|
8
|
+
from logging.handlers import RotatingFileHandler
|
|
9
|
+
import os
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
import signal
|
|
12
|
+
import subprocess
|
|
13
|
+
import sys
|
|
14
|
+
import time
|
|
15
|
+
from typing import Any, TYPE_CHECKING
|
|
16
|
+
import psutil
|
|
17
|
+
|
|
18
|
+
from hpcflow.sdk.helper.watcher import MonitorController
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from collections.abc import Callable
|
|
22
|
+
from ..app import BaseApp
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
DEFAULT_TIMEOUT = 3600 # seconds
|
|
26
|
+
DEFAULT_TIMEOUT_CHECK = 60 # seconds
|
|
27
|
+
DEFAULT_WATCH_INTERVAL = 10 # seconds
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def kill_proc_tree(
|
|
31
|
+
pid: int,
|
|
32
|
+
sig=signal.SIGTERM,
|
|
33
|
+
include_parent: bool = True,
|
|
34
|
+
timeout: float | None = None,
|
|
35
|
+
on_terminate: Callable[[psutil.Process], object] | None = None,
|
|
36
|
+
) -> tuple[list[psutil.Process], list[psutil.Process]]:
|
|
37
|
+
"""Kill a process tree (including grandchildren) with signal
|
|
38
|
+
`sig` and return a (gone, still_alive) tuple.
|
|
39
|
+
`on_terminate`, if specified, is a callback function which is
|
|
40
|
+
called as soon as a child terminates.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
list[Process]:
|
|
45
|
+
The process and subprocesses that have died.
|
|
46
|
+
list[Process]:
|
|
47
|
+
The process and subprocesses that are still alive.
|
|
48
|
+
"""
|
|
49
|
+
assert pid != os.getpid(), "won't kill myself"
|
|
50
|
+
parent = psutil.Process(pid)
|
|
51
|
+
children = parent.children(recursive=True)
|
|
52
|
+
if include_parent:
|
|
53
|
+
children.append(parent)
|
|
54
|
+
for p in children:
|
|
55
|
+
try:
|
|
56
|
+
p.send_signal(sig)
|
|
57
|
+
except psutil.NoSuchProcess:
|
|
58
|
+
pass
|
|
59
|
+
return psutil.wait_procs(children, timeout=timeout, callback=on_terminate)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_PID_file_path(app: BaseApp) -> Path:
|
|
63
|
+
"""Get the path to the file containing the process ID of the helper, if running."""
|
|
64
|
+
return app.user_data_dir / "pid.txt"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_watcher_file_path(app: BaseApp) -> Path:
|
|
68
|
+
"""Get the path to the watcher file, which contains a list of workflows to watch."""
|
|
69
|
+
return app.user_data_dir / "watch_workflows.txt"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_helper_log_path(app: BaseApp) -> Path:
|
|
73
|
+
"""Get the log file path for the helper."""
|
|
74
|
+
return app.user_data_dir / "helper.log"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_helper_watch_list(app: BaseApp):
|
|
78
|
+
"""Get the list of workflows currently being watched by the helper process."""
|
|
79
|
+
watch_file_path = get_watcher_file_path(app)
|
|
80
|
+
if watch_file_path.exists():
|
|
81
|
+
return MonitorController.parse_watch_workflows_file(
|
|
82
|
+
watch_file_path, get_helper_logger(app)
|
|
83
|
+
)
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def start_helper(
|
|
88
|
+
app: BaseApp,
|
|
89
|
+
timeout: timedelta | float = DEFAULT_TIMEOUT,
|
|
90
|
+
timeout_check_interval: timedelta | float = DEFAULT_TIMEOUT_CHECK,
|
|
91
|
+
watch_interval: timedelta | float = DEFAULT_WATCH_INTERVAL,
|
|
92
|
+
logger: logging.Logger | None = None,
|
|
93
|
+
):
|
|
94
|
+
"""
|
|
95
|
+
Start the helper process.
|
|
96
|
+
"""
|
|
97
|
+
PID_file = get_PID_file_path(app)
|
|
98
|
+
if PID_file.is_file():
|
|
99
|
+
with PID_file.open("rt") as fp:
|
|
100
|
+
helper_pid = int(fp.read().strip())
|
|
101
|
+
print(f"Helper already running, with process ID: {helper_pid}")
|
|
102
|
+
|
|
103
|
+
else:
|
|
104
|
+
logger = logger or get_helper_logger(app)
|
|
105
|
+
logger.info(
|
|
106
|
+
f"Starting helper with timeout={timeout!r}, timeout_check_interval="
|
|
107
|
+
f"{timeout_check_interval!r} and watch_interval={watch_interval!r}."
|
|
108
|
+
)
|
|
109
|
+
kwargs: dict[str, Any] = {}
|
|
110
|
+
if os.name == "nt":
|
|
111
|
+
kwargs["creationflags"] = getattr(subprocess, "CREATE_NO_WINDOW", 0)
|
|
112
|
+
|
|
113
|
+
if isinstance(timeout, timedelta):
|
|
114
|
+
timeout = timeout.total_seconds()
|
|
115
|
+
if isinstance(timeout_check_interval, timedelta):
|
|
116
|
+
timeout_check_interval = timeout_check_interval.total_seconds()
|
|
117
|
+
if isinstance(watch_interval, timedelta):
|
|
118
|
+
watch_interval = watch_interval.total_seconds()
|
|
119
|
+
|
|
120
|
+
args = [
|
|
121
|
+
*app.run_time_info.invocation_command,
|
|
122
|
+
"--config-dir",
|
|
123
|
+
str(app.config.config_directory),
|
|
124
|
+
"helper",
|
|
125
|
+
"run",
|
|
126
|
+
"--timeout",
|
|
127
|
+
str(timeout),
|
|
128
|
+
"--timeout-check-interval",
|
|
129
|
+
str(timeout_check_interval),
|
|
130
|
+
"--watch-interval",
|
|
131
|
+
str(watch_interval),
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
proc = subprocess.Popen(
|
|
135
|
+
args=args,
|
|
136
|
+
stdin=subprocess.DEVNULL,
|
|
137
|
+
stdout=subprocess.DEVNULL,
|
|
138
|
+
stderr=subprocess.DEVNULL,
|
|
139
|
+
**kwargs,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
logger.info(f"Writing process ID {proc.pid} to file.")
|
|
143
|
+
try:
|
|
144
|
+
with PID_file.open("wt") as fp:
|
|
145
|
+
fp.write(f"{proc.pid}\n")
|
|
146
|
+
except FileNotFoundError as err:
|
|
147
|
+
logger.error(
|
|
148
|
+
f"Could not write to the PID file {PID_file!r}; killing helper process. "
|
|
149
|
+
f"Exception was: {err!r}"
|
|
150
|
+
)
|
|
151
|
+
proc.kill()
|
|
152
|
+
sys.exit(1)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def restart_helper(
|
|
156
|
+
app: BaseApp,
|
|
157
|
+
timeout: timedelta | float = DEFAULT_TIMEOUT,
|
|
158
|
+
timeout_check_interval: timedelta | float = DEFAULT_TIMEOUT_CHECK,
|
|
159
|
+
watch_interval: timedelta | float = DEFAULT_WATCH_INTERVAL,
|
|
160
|
+
):
|
|
161
|
+
"""
|
|
162
|
+
Restart the helper process.
|
|
163
|
+
"""
|
|
164
|
+
logger = stop_helper(app, return_logger=True)
|
|
165
|
+
start_helper(app, timeout, timeout_check_interval, watch_interval, logger=logger)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def get_helper_PID(app: BaseApp):
|
|
169
|
+
"""
|
|
170
|
+
Get the process ID of the helper process.
|
|
171
|
+
"""
|
|
172
|
+
PID_file = get_PID_file_path(app)
|
|
173
|
+
if not PID_file.is_file():
|
|
174
|
+
print("Helper not running!")
|
|
175
|
+
return None
|
|
176
|
+
with PID_file.open("rt") as fp:
|
|
177
|
+
helper_pid = int(fp.read().strip())
|
|
178
|
+
return helper_pid, PID_file
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def stop_helper(app: BaseApp, return_logger: bool = False):
|
|
182
|
+
"""
|
|
183
|
+
Stop the helper process.
|
|
184
|
+
"""
|
|
185
|
+
logger = get_helper_logger(app)
|
|
186
|
+
if pid_info := get_helper_PID(app):
|
|
187
|
+
logger.info("Stopping helper.")
|
|
188
|
+
pid, pid_file = pid_info
|
|
189
|
+
kill_proc_tree(pid=pid)
|
|
190
|
+
pid_file.unlink()
|
|
191
|
+
|
|
192
|
+
workflow_dirs_file_path = get_watcher_file_path(app)
|
|
193
|
+
logger.info(f"Deleting watcher file: {str(workflow_dirs_file_path)}")
|
|
194
|
+
workflow_dirs_file_path.unlink()
|
|
195
|
+
|
|
196
|
+
return logger if return_logger else None
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def clear_helper(app: BaseApp):
|
|
200
|
+
"""
|
|
201
|
+
Stop the helper or remove any stale information relating to it.
|
|
202
|
+
"""
|
|
203
|
+
try:
|
|
204
|
+
stop_helper(app)
|
|
205
|
+
except psutil.NoSuchProcess:
|
|
206
|
+
if pid_info := get_helper_PID(app):
|
|
207
|
+
pid_file = pid_info[1]
|
|
208
|
+
print(f"Removing file {pid_file!r}")
|
|
209
|
+
pid_file.unlink()
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def get_helper_uptime(app: BaseApp) -> None | timedelta:
|
|
213
|
+
"""
|
|
214
|
+
Get the amount of time that the helper has been running.
|
|
215
|
+
"""
|
|
216
|
+
if not (pid_info := get_helper_PID(app)):
|
|
217
|
+
return None
|
|
218
|
+
proc = psutil.Process(pid_info[0])
|
|
219
|
+
create_time = datetime.fromtimestamp(proc.create_time())
|
|
220
|
+
return datetime.now() - create_time
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def get_helper_logger(app: BaseApp) -> logging.Logger:
|
|
224
|
+
"""
|
|
225
|
+
Get the logger for helper-related messages.
|
|
226
|
+
"""
|
|
227
|
+
log_path = get_helper_log_path(app)
|
|
228
|
+
logger = logging.getLogger(__name__)
|
|
229
|
+
logger.setLevel(logging.INFO)
|
|
230
|
+
f_handler = RotatingFileHandler(log_path, maxBytes=(5 * 2**20), backupCount=3)
|
|
231
|
+
f_format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
232
|
+
f_handler.setFormatter(f_format)
|
|
233
|
+
logger.addHandler(f_handler)
|
|
234
|
+
|
|
235
|
+
return logger
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def helper_timeout(
|
|
239
|
+
app: BaseApp,
|
|
240
|
+
timeout: timedelta,
|
|
241
|
+
controller: MonitorController,
|
|
242
|
+
logger: logging.Logger,
|
|
243
|
+
):
|
|
244
|
+
"""Kill the helper due to running duration exceeding the timeout."""
|
|
245
|
+
|
|
246
|
+
logger.info(f"Helper exiting due to timeout ({timeout!r}).")
|
|
247
|
+
if pid_info := get_helper_PID(app):
|
|
248
|
+
pid_file = pid_info[1]
|
|
249
|
+
logger.info(f"Deleting PID file: {pid_file!r}.")
|
|
250
|
+
pid_file.unlink()
|
|
251
|
+
|
|
252
|
+
logger.info("Stopping all watchers.")
|
|
253
|
+
controller.stop()
|
|
254
|
+
controller.join()
|
|
255
|
+
|
|
256
|
+
logger.info(f"Deleting watcher file: {str(controller.workflow_dirs_file_path)}")
|
|
257
|
+
controller.workflow_dirs_file_path.unlink()
|
|
258
|
+
|
|
259
|
+
sys.exit(0)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def run_helper(
|
|
263
|
+
app: BaseApp,
|
|
264
|
+
timeout: timedelta | float = DEFAULT_TIMEOUT,
|
|
265
|
+
timeout_check_interval: timedelta | float = DEFAULT_TIMEOUT_CHECK,
|
|
266
|
+
watch_interval: timedelta | float = DEFAULT_WATCH_INTERVAL,
|
|
267
|
+
):
|
|
268
|
+
"""
|
|
269
|
+
Run the helper core.
|
|
270
|
+
"""
|
|
271
|
+
# TODO: when writing to watch_workflows from a workflow, copy, modify and then rename
|
|
272
|
+
# this will be atomic - so there will be only one event fired.
|
|
273
|
+
# Also return a local run ID (the position in the file) to be used in jobscript naming
|
|
274
|
+
|
|
275
|
+
# TODO: we will want to set the timeout to be slightly more than the largest allowable
|
|
276
|
+
# walltime in the case of scheduler submissions.
|
|
277
|
+
|
|
278
|
+
if not isinstance(timeout, timedelta):
|
|
279
|
+
timeout = timedelta(seconds=timeout)
|
|
280
|
+
|
|
281
|
+
if isinstance(timeout_check_interval, timedelta):
|
|
282
|
+
timeout_check_interval_s = timeout_check_interval.total_seconds()
|
|
283
|
+
else:
|
|
284
|
+
timeout_check_interval_s = timeout_check_interval
|
|
285
|
+
timeout_check_interval = timedelta(seconds=timeout_check_interval_s)
|
|
286
|
+
|
|
287
|
+
start_time = datetime.now()
|
|
288
|
+
logger = get_helper_logger(app)
|
|
289
|
+
controller = MonitorController(get_watcher_file_path(app), watch_interval, logger)
|
|
290
|
+
timeout_limit = timeout - timeout_check_interval
|
|
291
|
+
try:
|
|
292
|
+
while True:
|
|
293
|
+
if datetime.now() - start_time >= timeout_limit:
|
|
294
|
+
helper_timeout(app, timeout, controller, logger)
|
|
295
|
+
time.sleep(timeout_check_interval_s)
|
|
296
|
+
|
|
297
|
+
except KeyboardInterrupt:
|
|
298
|
+
controller.stop()
|
|
299
|
+
|
|
300
|
+
controller.join() # wait for it to stop!
|