hpcflow 0.1.15__py3-none-any.whl → 0.2.0a271__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__init__.py +2 -11
- hpcflow/__pyinstaller/__init__.py +5 -0
- hpcflow/__pyinstaller/hook-hpcflow.py +40 -0
- hpcflow/_version.py +1 -1
- hpcflow/app.py +43 -0
- hpcflow/cli.py +2 -461
- hpcflow/data/demo_data_manifest/__init__.py +3 -0
- hpcflow/data/demo_data_manifest/demo_data_manifest.json +6 -0
- hpcflow/data/jinja_templates/test/test_template.txt +8 -0
- hpcflow/data/programs/hello_world/README.md +1 -0
- hpcflow/data/programs/hello_world/hello_world.c +87 -0
- hpcflow/data/programs/hello_world/linux/hello_world +0 -0
- hpcflow/data/programs/hello_world/macos/hello_world +0 -0
- hpcflow/data/programs/hello_world/win/hello_world.exe +0 -0
- hpcflow/data/scripts/__init__.py +1 -0
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/demo_task_1_generate_t1_infile_1.py +8 -0
- hpcflow/data/scripts/demo_task_1_generate_t1_infile_2.py +8 -0
- hpcflow/data/scripts/demo_task_1_parse_p3.py +7 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/generate_t1_file_01.py +7 -0
- hpcflow/data/scripts/import_future_script.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_all_iters_test.py +15 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_env_spec.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_labels.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_sub_param_in_direct_out.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_group.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +11 -0
- hpcflow/data/scripts/main_script_test_json_and_direct_in_json_out.py +14 -0
- hpcflow/data/scripts/main_script_test_json_in_json_and_direct_out.py +17 -0
- hpcflow/data/scripts/main_script_test_json_in_json_out.py +14 -0
- hpcflow/data/scripts/main_script_test_json_in_json_out_labels.py +16 -0
- hpcflow/data/scripts/main_script_test_json_in_obj.py +12 -0
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_json_out_obj.py +10 -0
- hpcflow/data/scripts/main_script_test_json_sub_param_in_json_out_labels.py +16 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/parse_t1_file_01.py +4 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/__init__.py +1 -0
- hpcflow/data/template_components/command_files.yaml +26 -0
- hpcflow/data/template_components/environments.yaml +13 -0
- hpcflow/data/template_components/parameters.yaml +14 -0
- hpcflow/data/template_components/task_schemas.yaml +139 -0
- hpcflow/data/workflows/workflow_1.yaml +5 -0
- hpcflow/examples.ipynb +1037 -0
- hpcflow/sdk/__init__.py +149 -0
- hpcflow/sdk/app.py +4266 -0
- hpcflow/sdk/cli.py +1479 -0
- hpcflow/sdk/cli_common.py +385 -0
- hpcflow/sdk/config/__init__.py +5 -0
- hpcflow/sdk/config/callbacks.py +246 -0
- hpcflow/sdk/config/cli.py +388 -0
- hpcflow/sdk/config/config.py +1410 -0
- hpcflow/sdk/config/config_file.py +501 -0
- hpcflow/sdk/config/errors.py +272 -0
- hpcflow/sdk/config/types.py +150 -0
- hpcflow/sdk/core/__init__.py +38 -0
- hpcflow/sdk/core/actions.py +3857 -0
- hpcflow/sdk/core/app_aware.py +25 -0
- hpcflow/sdk/core/cache.py +224 -0
- hpcflow/sdk/core/command_files.py +814 -0
- hpcflow/sdk/core/commands.py +424 -0
- hpcflow/sdk/core/element.py +2071 -0
- hpcflow/sdk/core/enums.py +221 -0
- hpcflow/sdk/core/environment.py +256 -0
- hpcflow/sdk/core/errors.py +1043 -0
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/json_like.py +809 -0
- hpcflow/sdk/core/loop.py +1320 -0
- hpcflow/sdk/core/loop_cache.py +282 -0
- hpcflow/sdk/core/object_list.py +933 -0
- hpcflow/sdk/core/parameters.py +3371 -0
- hpcflow/sdk/core/rule.py +196 -0
- hpcflow/sdk/core/run_dir_files.py +57 -0
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +3792 -0
- hpcflow/sdk/core/task_schema.py +993 -0
- hpcflow/sdk/core/test_utils.py +538 -0
- hpcflow/sdk/core/types.py +447 -0
- hpcflow/sdk/core/utils.py +1207 -0
- hpcflow/sdk/core/validation.py +87 -0
- hpcflow/sdk/core/values.py +477 -0
- hpcflow/sdk/core/workflow.py +4820 -0
- hpcflow/sdk/core/zarr_io.py +206 -0
- hpcflow/sdk/data/__init__.py +13 -0
- hpcflow/sdk/data/config_file_schema.yaml +34 -0
- hpcflow/sdk/data/config_schema.yaml +260 -0
- hpcflow/sdk/data/environments_spec_schema.yaml +21 -0
- hpcflow/sdk/data/files_spec_schema.yaml +5 -0
- hpcflow/sdk/data/parameters_spec_schema.yaml +7 -0
- hpcflow/sdk/data/task_schema_spec_schema.yaml +3 -0
- hpcflow/sdk/data/workflow_spec_schema.yaml +22 -0
- hpcflow/sdk/demo/__init__.py +3 -0
- hpcflow/sdk/demo/cli.py +242 -0
- hpcflow/sdk/helper/__init__.py +3 -0
- hpcflow/sdk/helper/cli.py +137 -0
- hpcflow/sdk/helper/helper.py +300 -0
- hpcflow/sdk/helper/watcher.py +192 -0
- hpcflow/sdk/log.py +288 -0
- hpcflow/sdk/persistence/__init__.py +18 -0
- hpcflow/sdk/persistence/base.py +2817 -0
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +39 -0
- hpcflow/sdk/persistence/json.py +954 -0
- hpcflow/sdk/persistence/pending.py +948 -0
- hpcflow/sdk/persistence/store_resource.py +203 -0
- hpcflow/sdk/persistence/types.py +309 -0
- hpcflow/sdk/persistence/utils.py +73 -0
- hpcflow/sdk/persistence/zarr.py +2388 -0
- hpcflow/sdk/runtime.py +320 -0
- hpcflow/sdk/submission/__init__.py +3 -0
- hpcflow/sdk/submission/enums.py +70 -0
- hpcflow/sdk/submission/jobscript.py +2379 -0
- hpcflow/sdk/submission/schedulers/__init__.py +281 -0
- hpcflow/sdk/submission/schedulers/direct.py +233 -0
- hpcflow/sdk/submission/schedulers/sge.py +376 -0
- hpcflow/sdk/submission/schedulers/slurm.py +598 -0
- hpcflow/sdk/submission/schedulers/utils.py +25 -0
- hpcflow/sdk/submission/shells/__init__.py +52 -0
- hpcflow/sdk/submission/shells/base.py +229 -0
- hpcflow/sdk/submission/shells/bash.py +504 -0
- hpcflow/sdk/submission/shells/os_version.py +115 -0
- hpcflow/sdk/submission/shells/powershell.py +352 -0
- hpcflow/sdk/submission/submission.py +1402 -0
- hpcflow/sdk/submission/types.py +140 -0
- hpcflow/sdk/typing.py +194 -0
- hpcflow/sdk/utils/arrays.py +69 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +31 -0
- hpcflow/sdk/utils/strings.py +69 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +123 -0
- hpcflow/tests/data/__init__.py +0 -0
- hpcflow/tests/data/benchmark_N_elements.yaml +6 -0
- hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_1.json +10 -0
- hpcflow/tests/data/workflow_1.yaml +5 -0
- hpcflow/tests/data/workflow_1_slurm.yaml +8 -0
- hpcflow/tests/data/workflow_1_wsl.yaml +8 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +42 -0
- hpcflow/tests/jinja_templates/test_jinja_templates.py +161 -0
- hpcflow/tests/programs/test_programs.py +180 -0
- hpcflow/tests/schedulers/direct_linux/test_direct_linux_submission.py +12 -0
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +14 -0
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +1361 -0
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +14 -0
- hpcflow/tests/unit/test_action.py +1066 -0
- hpcflow/tests/unit/test_action_rule.py +24 -0
- hpcflow/tests/unit/test_app.py +132 -0
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +172 -0
- hpcflow/tests/unit/test_command.py +377 -0
- hpcflow/tests/unit/test_config.py +195 -0
- hpcflow/tests/unit/test_config_file.py +162 -0
- hpcflow/tests/unit/test_element.py +666 -0
- hpcflow/tests/unit/test_element_iteration.py +88 -0
- hpcflow/tests/unit/test_element_set.py +158 -0
- hpcflow/tests/unit/test_group.py +115 -0
- hpcflow/tests/unit/test_input_source.py +1479 -0
- hpcflow/tests/unit/test_input_value.py +398 -0
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_json_like.py +1247 -0
- hpcflow/tests/unit/test_loop.py +2674 -0
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +259 -0
- hpcflow/tests/unit/test_object_list.py +116 -0
- hpcflow/tests/unit/test_parameter.py +243 -0
- hpcflow/tests/unit/test_persistence.py +664 -0
- hpcflow/tests/unit/test_resources.py +243 -0
- hpcflow/tests/unit/test_run.py +286 -0
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_runtime.py +9 -0
- hpcflow/tests/unit/test_schema_input.py +372 -0
- hpcflow/tests/unit/test_shell.py +129 -0
- hpcflow/tests/unit/test_slurm.py +39 -0
- hpcflow/tests/unit/test_submission.py +502 -0
- hpcflow/tests/unit/test_task.py +2560 -0
- hpcflow/tests/unit/test_task_schema.py +182 -0
- hpcflow/tests/unit/test_utils.py +616 -0
- hpcflow/tests/unit/test_value_sequence.py +549 -0
- hpcflow/tests/unit/test_values.py +91 -0
- hpcflow/tests/unit/test_workflow.py +827 -0
- hpcflow/tests/unit/test_workflow_template.py +186 -0
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/unit/utils/test_strings.py +97 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +355 -0
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +564 -0
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6794 -0
- hpcflow-0.2.0a271.dist-info/LICENSE +375 -0
- hpcflow-0.2.0a271.dist-info/METADATA +65 -0
- hpcflow-0.2.0a271.dist-info/RECORD +237 -0
- {hpcflow-0.1.15.dist-info → hpcflow-0.2.0a271.dist-info}/WHEEL +4 -5
- hpcflow-0.2.0a271.dist-info/entry_points.txt +6 -0
- hpcflow/api.py +0 -490
- hpcflow/archive/archive.py +0 -307
- hpcflow/archive/cloud/cloud.py +0 -45
- hpcflow/archive/cloud/errors.py +0 -9
- hpcflow/archive/cloud/providers/dropbox.py +0 -427
- hpcflow/archive/errors.py +0 -5
- hpcflow/base_db.py +0 -4
- hpcflow/config.py +0 -233
- hpcflow/copytree.py +0 -66
- hpcflow/data/examples/_config.yml +0 -14
- hpcflow/data/examples/damask/demo/1.run.yml +0 -4
- hpcflow/data/examples/damask/demo/2.process.yml +0 -29
- hpcflow/data/examples/damask/demo/geom.geom +0 -2052
- hpcflow/data/examples/damask/demo/load.load +0 -1
- hpcflow/data/examples/damask/demo/material.config +0 -185
- hpcflow/data/examples/damask/inputs/geom.geom +0 -2052
- hpcflow/data/examples/damask/inputs/load.load +0 -1
- hpcflow/data/examples/damask/inputs/material.config +0 -185
- hpcflow/data/examples/damask/profiles/_variable_lookup.yml +0 -21
- hpcflow/data/examples/damask/profiles/damask.yml +0 -4
- hpcflow/data/examples/damask/profiles/damask_process.yml +0 -8
- hpcflow/data/examples/damask/profiles/damask_run.yml +0 -5
- hpcflow/data/examples/damask/profiles/default.yml +0 -6
- hpcflow/data/examples/thinking.yml +0 -177
- hpcflow/errors.py +0 -2
- hpcflow/init_db.py +0 -37
- hpcflow/models.py +0 -2595
- hpcflow/nesting.py +0 -9
- hpcflow/profiles.py +0 -455
- hpcflow/project.py +0 -81
- hpcflow/scheduler.py +0 -322
- hpcflow/utils.py +0 -103
- hpcflow/validation.py +0 -166
- hpcflow/variables.py +0 -543
- hpcflow-0.1.15.dist-info/METADATA +0 -168
- hpcflow-0.1.15.dist-info/RECORD +0 -45
- hpcflow-0.1.15.dist-info/entry_points.txt +0 -8
- hpcflow-0.1.15.dist-info/top_level.txt +0 -1
- /hpcflow/{archive → data/jinja_templates}/__init__.py +0 -0
- /hpcflow/{archive/cloud → data/programs}/__init__.py +0 -0
- /hpcflow/{archive/cloud/providers → data/workflows}/__init__.py +0 -0
hpcflow/models.py
DELETED
|
@@ -1,2595 +0,0 @@
|
|
|
1
|
-
"""`hpcflow.models.py`"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
import re
|
|
5
|
-
import os
|
|
6
|
-
import enum
|
|
7
|
-
from datetime import datetime
|
|
8
|
-
from math import ceil, floor
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from pprint import pprint
|
|
11
|
-
from subprocess import run, PIPE
|
|
12
|
-
from time import sleep
|
|
13
|
-
|
|
14
|
-
from sqlalchemy import (Column, Integer, DateTime, JSON, ForeignKey, Boolean,
|
|
15
|
-
Enum, String, select, Float)
|
|
16
|
-
from sqlalchemy.orm import relationship, deferred, Session, reconstructor
|
|
17
|
-
from sqlalchemy.exc import IntegrityError, OperationalError
|
|
18
|
-
|
|
19
|
-
from hpcflow.config import Config as CONFIG
|
|
20
|
-
from hpcflow._version import __version__
|
|
21
|
-
from hpcflow.archive.archive import Archive, TaskArchiveStatus
|
|
22
|
-
from hpcflow.base_db import Base
|
|
23
|
-
from hpcflow.archive.cloud.cloud import CloudProvider
|
|
24
|
-
from hpcflow.nesting import NestingType
|
|
25
|
-
from hpcflow.scheduler import SunGridEngine
|
|
26
|
-
from hpcflow.utils import coerce_same_length, zeropad, format_time_delta, get_random_hex, datetime_to_dict, timedelta_to_dict
|
|
27
|
-
from hpcflow.validation import validate_task_multiplicity
|
|
28
|
-
from hpcflow.variables import (
|
|
29
|
-
select_cmd_group_var_names, select_cmd_group_var_definitions,
|
|
30
|
-
extract_variable_names, resolve_variable_values, UnresolvedVariableError
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
SCHEDULER_MAP = {
|
|
34
|
-
'sge': SunGridEngine,
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class IterationStatus(enum.Enum):
|
|
39
|
-
|
|
40
|
-
pending = 'pending'
|
|
41
|
-
active = 'active'
|
|
42
|
-
complete = 'complete'
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class Workflow(Base):
|
|
46
|
-
"""Class to represent a Workflow."""
|
|
47
|
-
|
|
48
|
-
__tablename__ = 'workflow'
|
|
49
|
-
|
|
50
|
-
id_ = Column('id', Integer, primary_key=True)
|
|
51
|
-
create_time = Column(DateTime)
|
|
52
|
-
pre_commands = Column(JSON)
|
|
53
|
-
_directory = Column('directory', String(255))
|
|
54
|
-
root_archive_id = Column(Integer, ForeignKey('archive.id'), nullable=True)
|
|
55
|
-
root_archive_excludes = Column(JSON, nullable=True)
|
|
56
|
-
root_archive_directory = Column(String(255), nullable=True)
|
|
57
|
-
_profile_files = Column('profile_files', JSON, nullable=True)
|
|
58
|
-
loop = Column(JSON)
|
|
59
|
-
parallel_modes = Column(JSON, nullable=True)
|
|
60
|
-
|
|
61
|
-
command_groups = relationship(
|
|
62
|
-
'CommandGroup',
|
|
63
|
-
back_populates='workflow',
|
|
64
|
-
order_by='CommandGroup.exec_order',
|
|
65
|
-
)
|
|
66
|
-
submissions = relationship('Submission', back_populates='workflow')
|
|
67
|
-
variable_definitions = relationship('VarDefinition', back_populates='workflow')
|
|
68
|
-
root_archive = relationship('Archive', back_populates='workflow', uselist=False)
|
|
69
|
-
iterations = relationship(
|
|
70
|
-
'Iteration', back_populates='workflow', order_by='Iteration.order_id')
|
|
71
|
-
|
|
72
|
-
def __init__(self, directory, command_groups, var_definitions=None,
|
|
73
|
-
pre_commands=None, archives=None, root_archive_idx=None,
|
|
74
|
-
root_archive_excludes=None, profile_files=None, loop=None,
|
|
75
|
-
parallel_modes=None):
|
|
76
|
-
"""Method to initialise a new Workflow.
|
|
77
|
-
|
|
78
|
-
Parameters
|
|
79
|
-
----------
|
|
80
|
-
directory : str or Path
|
|
81
|
-
Directory in which the Workflow resides.
|
|
82
|
-
command_groups : list of dict
|
|
83
|
-
List of dictionaries that each represent a command group.
|
|
84
|
-
var_definitions : dict, optional
|
|
85
|
-
Dictionary whose keys are variable names and values are
|
|
86
|
-
dictionaries that define variable definitions. By default, set to
|
|
87
|
-
`None`, in which case it is assumed there are no variable
|
|
88
|
-
references in any of the command groups.
|
|
89
|
-
pre_commands : list of str
|
|
90
|
-
List of commands to execute on creation of the Workflow.
|
|
91
|
-
archives : list of dict
|
|
92
|
-
List of dicts representing archive locations. Each dict in
|
|
93
|
-
`command_groups` may contain keys `archive_idx` (which is an
|
|
94
|
-
index into `archives`) and `archive_excludes` (which is a list
|
|
95
|
-
of glob patterns to ignore when archiving). Each item in `archives`
|
|
96
|
-
contains the following keys:
|
|
97
|
-
name : str
|
|
98
|
-
host : str
|
|
99
|
-
path : str
|
|
100
|
-
root_archive_idx : int
|
|
101
|
-
Index into `archives` that sets the root archive for the workflow.
|
|
102
|
-
root_archive_excludes : list of str
|
|
103
|
-
File patterns to exclude from the root archive.
|
|
104
|
-
profile_files : list of Path, optional
|
|
105
|
-
If specified, the list of absolute file paths to the profile files used to
|
|
106
|
-
generate this workflow.
|
|
107
|
-
loop : dict, optional
|
|
108
|
-
If specified, keys are:
|
|
109
|
-
max_iterations : int
|
|
110
|
-
Maximum number of loop iterations to submit.
|
|
111
|
-
groups : list of int, optional
|
|
112
|
-
Which command groups to include in iterations beyond the first. If not
|
|
113
|
-
specified, all command groups are included in the loop.
|
|
114
|
-
parallel_modes : dict, optional
|
|
115
|
-
If specified, (case-insensitive) keys are one or more of: 'MPI', 'OpenMP'.
|
|
116
|
-
Each is a dict with allowed keys:
|
|
117
|
-
env : list of str
|
|
118
|
-
Environment set up required for a given parallel mode.
|
|
119
|
-
command : str
|
|
120
|
-
Command to prepend to any command group commands that use this
|
|
121
|
-
parallel mode.
|
|
122
|
-
|
|
123
|
-
"""
|
|
124
|
-
|
|
125
|
-
if loop is None:
|
|
126
|
-
loop = {
|
|
127
|
-
'max_iterations': 1,
|
|
128
|
-
'groups': [],
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
# Command group directories must be stored internally as variables:
|
|
132
|
-
for idx, i in enumerate(command_groups):
|
|
133
|
-
|
|
134
|
-
dir_var_value = '.'
|
|
135
|
-
|
|
136
|
-
if 'directory' in i:
|
|
137
|
-
|
|
138
|
-
var_names = extract_variable_names(
|
|
139
|
-
i['directory'], CONFIG.get('variable_delimiters'))
|
|
140
|
-
if len(var_names) > 1:
|
|
141
|
-
raise NotImplementedError()
|
|
142
|
-
elif not var_names:
|
|
143
|
-
# Value is set but is not a variable
|
|
144
|
-
dir_var_value = i['directory'] or dir_var_value
|
|
145
|
-
else:
|
|
146
|
-
# Value is already a variable; no action.
|
|
147
|
-
continue
|
|
148
|
-
|
|
149
|
-
dir_var_defn_name = CONFIG.get('default_cmd_group_dir_var_name')
|
|
150
|
-
|
|
151
|
-
command_groups[idx]['directory'] = '{1:}{0:}{2:}'.format(
|
|
152
|
-
dir_var_defn_name,
|
|
153
|
-
*CONFIG.get('variable_delimiters')
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
# Add new variable definition:
|
|
157
|
-
var_definitions.update({
|
|
158
|
-
dir_var_defn_name: {
|
|
159
|
-
'value': dir_var_value,
|
|
160
|
-
}
|
|
161
|
-
})
|
|
162
|
-
|
|
163
|
-
self._directory = str(directory)
|
|
164
|
-
self.profile_files = [i.relative_to(self.directory) for i in profile_files]
|
|
165
|
-
self.create_time = datetime.now()
|
|
166
|
-
self.pre_commands = pre_commands
|
|
167
|
-
self.variable_definitions = [
|
|
168
|
-
VarDefinition(name=k, **v) for k, v in var_definitions.items()
|
|
169
|
-
]
|
|
170
|
-
|
|
171
|
-
# Generate Archive objects:
|
|
172
|
-
archive_objs = []
|
|
173
|
-
archive_dir_names = []
|
|
174
|
-
if archives:
|
|
175
|
-
for i in archives:
|
|
176
|
-
arch_i = Archive(**i)
|
|
177
|
-
archive_objs.append(arch_i)
|
|
178
|
-
archive_dir_names.append(arch_i.get_archive_dir(self))
|
|
179
|
-
|
|
180
|
-
if root_archive_idx is not None:
|
|
181
|
-
self.root_archive = archive_objs[root_archive_idx]
|
|
182
|
-
self.root_archive_excludes = root_archive_excludes
|
|
183
|
-
self.root_archive_directory = archive_dir_names[root_archive_idx]
|
|
184
|
-
|
|
185
|
-
cmd_groups = []
|
|
186
|
-
for i in command_groups:
|
|
187
|
-
|
|
188
|
-
dir_var_name = extract_variable_names(
|
|
189
|
-
i['directory'], CONFIG.get('variable_delimiters'))[0]
|
|
190
|
-
|
|
191
|
-
dir_var_defn = [i for i in self.variable_definitions
|
|
192
|
-
if i.name == dir_var_name][0]
|
|
193
|
-
|
|
194
|
-
i.pop('directory')
|
|
195
|
-
i.update({
|
|
196
|
-
'directory_var': dir_var_defn,
|
|
197
|
-
})
|
|
198
|
-
arch_idx = i.pop('archive_idx', None)
|
|
199
|
-
if arch_idx is not None:
|
|
200
|
-
i.update({
|
|
201
|
-
'archive': archive_objs[arch_idx],
|
|
202
|
-
'archive_directory': archive_dir_names[arch_idx],
|
|
203
|
-
})
|
|
204
|
-
cmd_groups.append(CommandGroup(**i))
|
|
205
|
-
|
|
206
|
-
self.command_groups = cmd_groups
|
|
207
|
-
self.parallel_modes = parallel_modes
|
|
208
|
-
|
|
209
|
-
self.loop = loop
|
|
210
|
-
for i in range(self.loop['max_iterations']):
|
|
211
|
-
self.iterations.append(Iteration(i))
|
|
212
|
-
|
|
213
|
-
self.validate(archive_objs)
|
|
214
|
-
self._execute_pre_commands()
|
|
215
|
-
self.do_root_archive()
|
|
216
|
-
|
|
217
|
-
def __repr__(self):
|
|
218
|
-
out = ('{}('
|
|
219
|
-
'id={}, '
|
|
220
|
-
'directory={}, '
|
|
221
|
-
'pre_commands={}, '
|
|
222
|
-
'root_archive_id={}, '
|
|
223
|
-
'loop={}'
|
|
224
|
-
')').format(
|
|
225
|
-
self.__class__.__name__,
|
|
226
|
-
self.id_,
|
|
227
|
-
self.directory,
|
|
228
|
-
self.pre_commands,
|
|
229
|
-
self.root_archive_id,
|
|
230
|
-
self.loop,
|
|
231
|
-
)
|
|
232
|
-
|
|
233
|
-
return out
|
|
234
|
-
|
|
235
|
-
def get_variable_definition_by_name(self, variable_name):
|
|
236
|
-
"""Get the VarDefintion object using the variable name."""
|
|
237
|
-
|
|
238
|
-
for i in self.variable_definitions:
|
|
239
|
-
if i.name == variable_name:
|
|
240
|
-
return i
|
|
241
|
-
|
|
242
|
-
msg = ('Cannot find variable definition with '
|
|
243
|
-
'name "{}"'.format(variable_name))
|
|
244
|
-
raise ValueError(msg)
|
|
245
|
-
|
|
246
|
-
@property
|
|
247
|
-
def first_iteration(self):
|
|
248
|
-
return self.iterations[0]
|
|
249
|
-
|
|
250
|
-
@property
|
|
251
|
-
def profile_files(self):
|
|
252
|
-
if self._profile_files:
|
|
253
|
-
return [Path(i) for i in self._profile_files]
|
|
254
|
-
else:
|
|
255
|
-
return []
|
|
256
|
-
|
|
257
|
-
@profile_files.setter
|
|
258
|
-
def profile_files(self, profile_files):
|
|
259
|
-
if profile_files:
|
|
260
|
-
self._profile_files = [str(i) for i in profile_files]
|
|
261
|
-
|
|
262
|
-
@property
|
|
263
|
-
def has_alternate_scratch(self):
|
|
264
|
-
return bool(self.all_alternate_scratch)
|
|
265
|
-
|
|
266
|
-
@property
|
|
267
|
-
def all_alternate_scratch(self):
|
|
268
|
-
out = list(set([i.alternate_scratch for i in self.command_groups
|
|
269
|
-
if i.alternate_scratch]))
|
|
270
|
-
return out
|
|
271
|
-
|
|
272
|
-
@property
|
|
273
|
-
def directory(self):
|
|
274
|
-
return Path(self._directory)
|
|
275
|
-
|
|
276
|
-
def validate(self, archive_objs):
|
|
277
|
-
cmd_group_list = []
|
|
278
|
-
for i in self.command_groups:
|
|
279
|
-
cmd_group_list.append({
|
|
280
|
-
'is_job_array': i.is_job_array,
|
|
281
|
-
'exec_order': i.exec_order,
|
|
282
|
-
'nesting': i.nesting,
|
|
283
|
-
})
|
|
284
|
-
|
|
285
|
-
err = '[Workflow instantiation error]'
|
|
286
|
-
cmd_group_list = validate_task_multiplicity(cmd_group_list, err)
|
|
287
|
-
|
|
288
|
-
for i_idx, i in enumerate(cmd_group_list):
|
|
289
|
-
cmd_group = self.command_groups[i_idx]
|
|
290
|
-
cmd_group.is_job_array = i['is_job_array']
|
|
291
|
-
cmd_group.exec_order = i['exec_order']
|
|
292
|
-
cmd_group.nesting = i['nesting']
|
|
293
|
-
|
|
294
|
-
# If using an Archive with a cloud provider, check access:
|
|
295
|
-
for i in archive_objs:
|
|
296
|
-
if i.cloud_provider != CloudProvider.null:
|
|
297
|
-
msg = f'Checking access to cloud storage ({i.name})...'
|
|
298
|
-
print(msg, end='', flush=True)
|
|
299
|
-
i.cloud_provider.check_access()
|
|
300
|
-
|
|
301
|
-
def add_submission(self, project, task_range=None):
|
|
302
|
-
"""Add a new submission to this Workflow.
|
|
303
|
-
|
|
304
|
-
Parameters
|
|
305
|
-
----------
|
|
306
|
-
project : Project
|
|
307
|
-
task_ranges : list, optional
|
|
308
|
-
If specified, must be a list of length equal to the number of
|
|
309
|
-
channels in the Workflow. Each list element specifies which tasks
|
|
310
|
-
to submit from each Workflow channel. Each element may be either a
|
|
311
|
-
list, a string "all", or `None`. If an element is a string "all",
|
|
312
|
-
all tasks within the specified channel will be submitted. If an
|
|
313
|
-
element is `None`, no tasks within the specified channel will be
|
|
314
|
-
submitted. If an element is a list, it must have either two or
|
|
315
|
-
three elements; if it has two elements, these signify the first and
|
|
316
|
-
last tasks, inclusively, to submit from that channel. By default,
|
|
317
|
-
the task step size is one, but this can be chosen as a third list
|
|
318
|
-
entry. By default, set to `None`, in which case all tasks from all
|
|
319
|
-
channels are included.
|
|
320
|
-
|
|
321
|
-
Notes
|
|
322
|
-
-----
|
|
323
|
-
We are temporarily restricting the number of channels to 1, since
|
|
324
|
-
supporting multiple channels requires some more technical work. This
|
|
325
|
-
restriction is enforced in the `validation.validate_task_multiplicity`
|
|
326
|
-
function.
|
|
327
|
-
|
|
328
|
-
Examples
|
|
329
|
-
--------
|
|
330
|
-
Submit all tasks from all channels:
|
|
331
|
-
>>> workflow.add_submission()
|
|
332
|
-
|
|
333
|
-
Submit tasks 1, 2, 3, 4 and 5 from the first and only channel:
|
|
334
|
-
>>> workflow.add_submission([[1, 5]])
|
|
335
|
-
|
|
336
|
-
Submit tasks 1 and 3 from the first channel, and tasks 2, 3 and 4 from
|
|
337
|
-
the second channel:
|
|
338
|
-
>>> workflow.add_submission([[1, 4, 2], [2, 4]])
|
|
339
|
-
|
|
340
|
-
Submit all tasks from the first channel, and tasks 2 and 7 from the
|
|
341
|
-
second channel:
|
|
342
|
-
>>> workflow.add_submission(['all', (2, 7, 5)])
|
|
343
|
-
|
|
344
|
-
Submit all tasks from the first channel and no tasks from the second
|
|
345
|
-
channel:
|
|
346
|
-
>>> workflow.add_submission(['all', None])
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
What to do:
|
|
350
|
-
-----------
|
|
351
|
-
|
|
352
|
-
0. Firstly, resolve variable values for the first command group.
|
|
353
|
-
1. Need to identify which command groups must have their
|
|
354
|
-
var_multiplicity resolved at submit time, and raise if it cannot
|
|
355
|
-
be done. For `is_job_array=False` command groups, var_multiplicity
|
|
356
|
-
does not need to be known at submit-time, since the number of
|
|
357
|
-
output tasks will be known (either one [for `nesting=hold`], or
|
|
358
|
-
equal to number of input tasks [for `nesting=None`]).
|
|
359
|
-
2. To do this, organise command groups into scheduler groups,
|
|
360
|
-
which are delineated by command groups with `nesting=hold`.
|
|
361
|
-
3. For each scheduler group, go through the command groups in order
|
|
362
|
-
and resolve the `var_multiplicity` if it is required. This is not
|
|
363
|
-
the same as actually resolving the variable values. And we don't
|
|
364
|
-
need to do that at submit-time, except for the very first command
|
|
365
|
-
group! (Or rather, since submit-time and run-time coincide for
|
|
366
|
-
the first command group, we have the *opportunity* to resolve
|
|
367
|
-
variable values for the first command group; in general, variable
|
|
368
|
-
values in a given command group may depend on the commands run in
|
|
369
|
-
a previous command group, so this cannot be done.)
|
|
370
|
-
|
|
371
|
-
"""
|
|
372
|
-
|
|
373
|
-
# print('Workflow.add_submission: task_range: {}'.format(task_range), flush=True)
|
|
374
|
-
|
|
375
|
-
submission = Submission(self, task_range) # Generate CGSs and Tasks
|
|
376
|
-
submission.write_submit_dirs(project.hf_dir)
|
|
377
|
-
js_paths = submission.write_jobscripts(project.hf_dir)
|
|
378
|
-
submission.submit_jobscripts(js_paths)
|
|
379
|
-
|
|
380
|
-
return submission
|
|
381
|
-
|
|
382
|
-
def get_num_channels(self, exec_order=0):
|
|
383
|
-
"""Get the number of command groups with a given execution order.
|
|
384
|
-
|
|
385
|
-
Parameters
|
|
386
|
-
----------
|
|
387
|
-
exec_order : int, optional
|
|
388
|
-
The execution order at which to count command groups.
|
|
389
|
-
|
|
390
|
-
Returns
|
|
391
|
-
-------
|
|
392
|
-
num_channels : int
|
|
393
|
-
The number of command groups at the given execution order.
|
|
394
|
-
|
|
395
|
-
"""
|
|
396
|
-
|
|
397
|
-
num_channels = 0
|
|
398
|
-
for i in self.command_groups:
|
|
399
|
-
if i.exec_order == exec_order:
|
|
400
|
-
num_channels += 1
|
|
401
|
-
|
|
402
|
-
return num_channels
|
|
403
|
-
|
|
404
|
-
def _validate_task_ranges(self, task_ranges):
|
|
405
|
-
"""Validate task ranges.
|
|
406
|
-
|
|
407
|
-
Parameters
|
|
408
|
-
----------
|
|
409
|
-
task_ranges : list
|
|
410
|
-
|
|
411
|
-
Returns
|
|
412
|
-
-------
|
|
413
|
-
task_ranges_valid : list
|
|
414
|
-
|
|
415
|
-
"""
|
|
416
|
-
|
|
417
|
-
# Check length equal to num_channels:
|
|
418
|
-
if len(task_ranges) != self.get_num_channels():
|
|
419
|
-
msg = ('The number of task ranges specified must be equal to the '
|
|
420
|
-
'number of channels in the workflow, which is {}, but {} '
|
|
421
|
-
'task ranges were specified.')
|
|
422
|
-
raise ValueError(msg.format(self.get_num_channels(),
|
|
423
|
-
len(task_ranges)))
|
|
424
|
-
|
|
425
|
-
task_range_msg = (
|
|
426
|
-
'Each task range must be specified as either a list with two or '
|
|
427
|
-
'three elements, representing the first and last task and '
|
|
428
|
-
'(optionally) the step size, `None`, or the string "all".'
|
|
429
|
-
)
|
|
430
|
-
|
|
431
|
-
task_ranges_valid = []
|
|
432
|
-
for i in task_ranges:
|
|
433
|
-
|
|
434
|
-
# Validate:
|
|
435
|
-
if isinstance(i, list):
|
|
436
|
-
if len(i) not in [2, 3]:
|
|
437
|
-
raise ValueError(task_range_msg)
|
|
438
|
-
elif i not in ['all', None]:
|
|
439
|
-
raise ValueError(task_range_msg)
|
|
440
|
-
|
|
441
|
-
task_range_i = i
|
|
442
|
-
if i == 'all':
|
|
443
|
-
# Replace "all" with [n, m, s]
|
|
444
|
-
task_range_i = [1, -1, 1]
|
|
445
|
-
|
|
446
|
-
elif isinstance(i, list) and len(i) == 2:
|
|
447
|
-
# Add step size of 1:
|
|
448
|
-
task_range_i += [1]
|
|
449
|
-
|
|
450
|
-
if task_range_i[1] != -1:
|
|
451
|
-
# For known number of tasks, check m >= n >= 1:
|
|
452
|
-
if task_range_i[0] < 1:
|
|
453
|
-
msg = 'Starting task, `n`, must be >= 1.'
|
|
454
|
-
raise ValueError(msg)
|
|
455
|
-
if task_range_i[1] < task_range_i[0]:
|
|
456
|
-
msg = 'Ending task, `m`, must be >= starting task, `n`.'
|
|
457
|
-
raise ValueError(msg)
|
|
458
|
-
|
|
459
|
-
task_ranges_valid.append(task_range_i)
|
|
460
|
-
|
|
461
|
-
return task_ranges_valid
|
|
462
|
-
|
|
463
|
-
def _execute_pre_commands(self):
|
|
464
|
-
|
|
465
|
-
for i in self.pre_commands:
|
|
466
|
-
|
|
467
|
-
proc = run(i, shell=True, stdout=PIPE, stderr=PIPE)
|
|
468
|
-
pre_cmd_out = proc.stdout.decode()
|
|
469
|
-
pre_cmd_err = proc.stderr.decode()
|
|
470
|
-
|
|
471
|
-
def do_root_archive(self):
|
|
472
|
-
"""Copy the workflow directory to the root archive location."""
|
|
473
|
-
|
|
474
|
-
if self.root_archive:
|
|
475
|
-
self.root_archive.execute(self.root_archive_excludes,
|
|
476
|
-
self.root_archive_directory)
|
|
477
|
-
|
|
478
|
-
def get_stats(self, jsonable=True, datetime_dicts=False):
|
|
479
|
-
"""Get task statistics for this workflow."""
|
|
480
|
-
out = {
|
|
481
|
-
'workflow_id': self.id_,
|
|
482
|
-
'submissions': [i.get_stats(jsonable=jsonable, datetime_dicts=datetime_dicts)
|
|
483
|
-
for i in self.submissions]
|
|
484
|
-
}
|
|
485
|
-
return out
|
|
486
|
-
|
|
487
|
-
def kill_active(self):
|
|
488
|
-
"""Kill any active scheduled jobs associated with the workflow."""
|
|
489
|
-
|
|
490
|
-
kill_scheduler_ids = []
|
|
491
|
-
for sub in self.submissions:
|
|
492
|
-
for cg_sub in sub.command_group_submissions:
|
|
493
|
-
for iteration in self.iterations:
|
|
494
|
-
cg_sub_iter = cg_sub.get_command_group_submission_iteration(iteration)
|
|
495
|
-
if cg_sub_iter:
|
|
496
|
-
if cg_sub_iter.scheduler_job_id is not None:
|
|
497
|
-
kill_scheduler_ids.append(cg_sub_iter.scheduler_job_id)
|
|
498
|
-
if cg_sub_iter.scheduler_stats_job_id is not None:
|
|
499
|
-
kill_scheduler_ids.append(cg_sub_iter.scheduler_stats_job_id)
|
|
500
|
-
|
|
501
|
-
print('Need to kill: {}'.format(kill_scheduler_ids))
|
|
502
|
-
del_cmd = ['qdel'] + [str(i) for i in kill_scheduler_ids]
|
|
503
|
-
proc = run(del_cmd, stdout=PIPE, stderr=PIPE)
|
|
504
|
-
qdel_out = proc.stdout.decode()
|
|
505
|
-
qdel_err = proc.stderr.decode()
|
|
506
|
-
print(qdel_out)
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
class CommandGroup(Base):
|
|
510
|
-
"""Class to represent a command group, which is roughly translated into a
|
|
511
|
-
job script."""
|
|
512
|
-
|
|
513
|
-
__tablename__ = 'command_group'
|
|
514
|
-
|
|
515
|
-
id_ = Column('id', Integer, primary_key=True)
|
|
516
|
-
workflow_id = Column(Integer, ForeignKey('workflow.id'))
|
|
517
|
-
directory_variable_id = Column(Integer, ForeignKey('var_definition.id'))
|
|
518
|
-
archive_id = Column(Integer, ForeignKey('archive.id'), nullable=True)
|
|
519
|
-
|
|
520
|
-
name = Column(String(255), nullable=True)
|
|
521
|
-
stats_name = Column(String(255), nullable=True)
|
|
522
|
-
commands = Column(JSON)
|
|
523
|
-
is_job_array = Column(Boolean)
|
|
524
|
-
exec_order = Column(Integer)
|
|
525
|
-
nesting = Column(Enum(NestingType), nullable=True)
|
|
526
|
-
environment = Column(JSON, nullable=True)
|
|
527
|
-
_scheduler = Column('scheduler', JSON)
|
|
528
|
-
profile_name = Column(String(255), nullable=True)
|
|
529
|
-
profile_order = Column(Integer, nullable=True)
|
|
530
|
-
archive_excludes = Column(JSON, nullable=True)
|
|
531
|
-
archive_directory = Column(String(255), nullable=True)
|
|
532
|
-
_alternate_scratch = Column('alternate_scratch', String(255), nullable=True)
|
|
533
|
-
stats = Column(Boolean)
|
|
534
|
-
|
|
535
|
-
archive = relationship('Archive', back_populates='command_groups')
|
|
536
|
-
workflow = relationship('Workflow', back_populates='command_groups')
|
|
537
|
-
command_group_submissions = relationship('CommandGroupSubmission',
|
|
538
|
-
back_populates='command_group')
|
|
539
|
-
|
|
540
|
-
directory_variable = relationship('VarDefinition')
|
|
541
|
-
|
|
542
|
-
_scheduler_obj = None
|
|
543
|
-
|
|
544
|
-
def __repr__(self):
|
|
545
|
-
out = (
|
|
546
|
-
'{}('
|
|
547
|
-
'commands={!r}, '
|
|
548
|
-
'is_job_array={!r}, '
|
|
549
|
-
'nesting={!r}'
|
|
550
|
-
')'
|
|
551
|
-
).format(
|
|
552
|
-
self.__class__.__name__,
|
|
553
|
-
self.commands,
|
|
554
|
-
self.is_job_array,
|
|
555
|
-
self.nesting,
|
|
556
|
-
)
|
|
557
|
-
return out
|
|
558
|
-
|
|
559
|
-
def __init__(self, commands, directory_var, is_job_array=True,
|
|
560
|
-
exec_order=None, nesting=None, environment=None, scheduler=None,
|
|
561
|
-
profile_name=None, profile_order=None, archive=None,
|
|
562
|
-
archive_excludes=None, archive_directory=None, alternate_scratch=None,
|
|
563
|
-
stats=None, name=None, stats_name=None):
|
|
564
|
-
"""Method to initialise a new CommandGroup.
|
|
565
|
-
|
|
566
|
-
Parameters
|
|
567
|
-
----------
|
|
568
|
-
commands : list of dict
|
|
569
|
-
List of dicts containing commands to execute.
|
|
570
|
-
directory_var : VarDefinition
|
|
571
|
-
The working directory for this command group. TODO...
|
|
572
|
-
is_job_array : bool, optional
|
|
573
|
-
If True, the command group is executed as a job array. True by
|
|
574
|
-
default.
|
|
575
|
-
exec_order : int, optional
|
|
576
|
-
Execution order of this command relative to other command groups in
|
|
577
|
-
the Workflow. By default, `None`.
|
|
578
|
-
nesting : str, optional
|
|
579
|
-
Either "nest" or "hold". This determines how the task multiplicity
|
|
580
|
-
of this command group joins together with the task multiplicity of
|
|
581
|
-
the previous command group (i.e. the command group with the lower
|
|
582
|
-
execution order as determined by `exec_order`). If "nest", each
|
|
583
|
-
task from the previous command group, once completed, will fork
|
|
584
|
-
into multiple tasks in the current command group. If "hold", all
|
|
585
|
-
tasks in the current command group will only begin once all tasks
|
|
586
|
-
in the previous command group have completed. If `None`, the number
|
|
587
|
-
of tasks in the previous and current command groups must match,
|
|
588
|
-
since a given task in the current command group will only begin
|
|
589
|
-
once its corresponding task in the previous command group has
|
|
590
|
-
completed. By default, set to `None`.
|
|
591
|
-
environment : list of str, optional
|
|
592
|
-
List of commands to be run to set up the environment for the command group. By
|
|
593
|
-
default set to `None`.
|
|
594
|
-
scheduler : dict, optional
|
|
595
|
-
Scheduler type and options to be passed directly to the scheduler. By default,
|
|
596
|
-
`None`, in which case the DirectExecution scheduler is used and no additional
|
|
597
|
-
options are passed.
|
|
598
|
-
profile_name : str, optional
|
|
599
|
-
If the command group was generated as part of a job profile file,
|
|
600
|
-
the profile name should be passed here.
|
|
601
|
-
profile_order : int, optional
|
|
602
|
-
If the command group was generated as part of a job profile file,
|
|
603
|
-
the profile order should be passed here.
|
|
604
|
-
archive : Archive, optional
|
|
605
|
-
The Archive object associated with this command group.
|
|
606
|
-
archive_excludes : list of str
|
|
607
|
-
List of glob patterns representing files that should be excluding
|
|
608
|
-
when archiving this command group.
|
|
609
|
-
archive_directory : str or Path, optional
|
|
610
|
-
Name of the directory in which the archive for this command group will reside.
|
|
611
|
-
alternate_scratch : str, optional
|
|
612
|
-
Location of alternate scratch in which to run commands.
|
|
613
|
-
|
|
614
|
-
TODO: document how `nesting` interacts with `is_job_array`.
|
|
615
|
-
|
|
616
|
-
"""
|
|
617
|
-
|
|
618
|
-
self.commands = commands
|
|
619
|
-
self.is_job_array = is_job_array
|
|
620
|
-
self.exec_order = exec_order
|
|
621
|
-
self.nesting = nesting
|
|
622
|
-
self.environment = environment
|
|
623
|
-
self.scheduler = scheduler
|
|
624
|
-
self.directory_variable = directory_var
|
|
625
|
-
self.profile_name = profile_name
|
|
626
|
-
self.profile_order = profile_order
|
|
627
|
-
self.stats = stats
|
|
628
|
-
self.name = name
|
|
629
|
-
self.stats_name = stats_name
|
|
630
|
-
|
|
631
|
-
self.archive = archive
|
|
632
|
-
self.archive_excludes = archive_excludes
|
|
633
|
-
self.archive_directory = archive_directory
|
|
634
|
-
|
|
635
|
-
self._alternate_scratch = alternate_scratch
|
|
636
|
-
|
|
637
|
-
self.validate()
|
|
638
|
-
|
|
639
|
-
@reconstructor
|
|
640
|
-
def init_on_load(self):
|
|
641
|
-
self.scheduler = self._scheduler
|
|
642
|
-
|
|
643
|
-
def validate(self):
|
|
644
|
-
|
|
645
|
-
# Check at least one command:
|
|
646
|
-
if not self.commands:
|
|
647
|
-
msg = 'At least one command must be specified.'
|
|
648
|
-
raise ValueError(msg)
|
|
649
|
-
|
|
650
|
-
self.nesting = NestingType[self.nesting] if self.nesting else None
|
|
651
|
-
|
|
652
|
-
# Check alternate scratch exists
|
|
653
|
-
if self.alternate_scratch:
|
|
654
|
-
if not self.alternate_scratch.is_dir():
|
|
655
|
-
msg = 'Alternate scratch "{}" is not an existing directory.'
|
|
656
|
-
raise ValueError(msg.format(self.alternate_scratch))
|
|
657
|
-
|
|
658
|
-
@staticmethod
|
|
659
|
-
def get_command_lines(commands):
|
|
660
|
-
"""Get all lines in the commands list."""
|
|
661
|
-
out = []
|
|
662
|
-
for i in commands:
|
|
663
|
-
if 'line' in i:
|
|
664
|
-
out.append(i['line'])
|
|
665
|
-
elif 'subshell' in i:
|
|
666
|
-
out.extend(CommandGroup.get_command_lines(i['subshell']))
|
|
667
|
-
return out
|
|
668
|
-
|
|
669
|
-
@property
|
|
670
|
-
def scheduler(self):
|
|
671
|
-
return self._scheduler_obj
|
|
672
|
-
|
|
673
|
-
@scheduler.setter
|
|
674
|
-
def scheduler(self, scheduler):
|
|
675
|
-
|
|
676
|
-
if 'name' not in scheduler:
|
|
677
|
-
msg = 'Scheduler must have a name that is one of: {}'
|
|
678
|
-
raise ValueError(msg.format(list(SCHEDULER_MAP.keys())))
|
|
679
|
-
|
|
680
|
-
sch_name = scheduler['name']
|
|
681
|
-
if sch_name not in SCHEDULER_MAP.keys():
|
|
682
|
-
msg = 'Scheduler "{}" is not known.'.format(scheduler)
|
|
683
|
-
raise ValueError(msg)
|
|
684
|
-
|
|
685
|
-
sch_class = SCHEDULER_MAP[sch_name]
|
|
686
|
-
self._scheduler_obj = sch_class(
|
|
687
|
-
options=scheduler['options'],
|
|
688
|
-
output_dir=scheduler['output_dir'],
|
|
689
|
-
error_dir=scheduler['error_dir'],
|
|
690
|
-
)
|
|
691
|
-
self._scheduler = scheduler
|
|
692
|
-
|
|
693
|
-
@property
|
|
694
|
-
def alternate_scratch(self):
|
|
695
|
-
if self._alternate_scratch:
|
|
696
|
-
return Path(self._alternate_scratch)
|
|
697
|
-
else:
|
|
698
|
-
return None
|
|
699
|
-
|
|
700
|
-
@property
|
|
701
|
-
def variable_names(self):
|
|
702
|
-
"""Get those variable names associated with this command group."""
|
|
703
|
-
|
|
704
|
-
var_names = select_cmd_group_var_names(
|
|
705
|
-
self.get_command_lines(self.commands),
|
|
706
|
-
self.directory_variable.value
|
|
707
|
-
)
|
|
708
|
-
return var_names
|
|
709
|
-
|
|
710
|
-
@property
|
|
711
|
-
def variable_definitions(self):
|
|
712
|
-
"""Get those variable definitions associated with this command group,
|
|
713
|
-
excluding those that appear embedded within other variables."""
|
|
714
|
-
|
|
715
|
-
var_names = self.variable_names
|
|
716
|
-
var_defns = []
|
|
717
|
-
for i in self.workflow.variable_definitions:
|
|
718
|
-
if i.name in var_names:
|
|
719
|
-
var_defns.append(i)
|
|
720
|
-
|
|
721
|
-
return var_defns
|
|
722
|
-
|
|
723
|
-
@property
|
|
724
|
-
def variable_definitions_recursive(self):
|
|
725
|
-
"""Get those variable definitions associated with this command group,
|
|
726
|
-
including those that appear embedded within other variables."""
|
|
727
|
-
|
|
728
|
-
var_defns_dict = {
|
|
729
|
-
i.name: {
|
|
730
|
-
'data': i.data,
|
|
731
|
-
'file_regex': i.file_regex,
|
|
732
|
-
'file_contents': i.file_contents,
|
|
733
|
-
'value': i.value,
|
|
734
|
-
}
|
|
735
|
-
for i in self.workflow.variable_definitions
|
|
736
|
-
}
|
|
737
|
-
|
|
738
|
-
cmd_group_var_defns = select_cmd_group_var_definitions(
|
|
739
|
-
var_defns_dict,
|
|
740
|
-
self.get_command_lines(self.commands),
|
|
741
|
-
self.directory_variable.value,
|
|
742
|
-
)
|
|
743
|
-
|
|
744
|
-
var_defns = [
|
|
745
|
-
i for i in self.workflow.variable_definitions
|
|
746
|
-
if i.name in cmd_group_var_defns
|
|
747
|
-
]
|
|
748
|
-
|
|
749
|
-
return var_defns
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
class VarDefinition(Base):
|
|
753
|
-
"""Class to represent a variable definition."""
|
|
754
|
-
|
|
755
|
-
__tablename__ = 'var_definition'
|
|
756
|
-
|
|
757
|
-
id_ = Column('id', Integer, primary_key=True)
|
|
758
|
-
workflow_id = Column('workflow_id', Integer, ForeignKey('workflow.id'))
|
|
759
|
-
|
|
760
|
-
name = Column(String(255))
|
|
761
|
-
data = Column(JSON, nullable=True)
|
|
762
|
-
file_regex = Column(JSON, nullable=True)
|
|
763
|
-
file_contents = Column(JSON, nullable=True)
|
|
764
|
-
value = Column(String(255), nullable=True)
|
|
765
|
-
|
|
766
|
-
workflow = relationship('Workflow', back_populates='variable_definitions')
|
|
767
|
-
variable_values = relationship(
|
|
768
|
-
'VarValue',
|
|
769
|
-
back_populates='variable_definition',
|
|
770
|
-
order_by='VarValue.order_id',
|
|
771
|
-
)
|
|
772
|
-
|
|
773
|
-
def __repr__(self):
|
|
774
|
-
out = ('{}('
|
|
775
|
-
'name={!r}, '
|
|
776
|
-
'data={!r}, '
|
|
777
|
-
'file_regex={!r}, '
|
|
778
|
-
'value={!r}'
|
|
779
|
-
')').format(
|
|
780
|
-
self.__class__.__name__,
|
|
781
|
-
self.name,
|
|
782
|
-
self.data,
|
|
783
|
-
self.file_regex,
|
|
784
|
-
self.value,
|
|
785
|
-
)
|
|
786
|
-
return out
|
|
787
|
-
|
|
788
|
-
def __init__(self, name, data=None, file_regex=None, value=None, file_contents=None):
|
|
789
|
-
|
|
790
|
-
self.name = name
|
|
791
|
-
self.data = data
|
|
792
|
-
self.file_regex = file_regex
|
|
793
|
-
self.file_contents = file_contents
|
|
794
|
-
self.value = value
|
|
795
|
-
|
|
796
|
-
def is_base_variable(self):
|
|
797
|
-
"""Check if the variable depends on any other variables."""
|
|
798
|
-
|
|
799
|
-
if extract_variable_names(self.value,
|
|
800
|
-
CONFIG.get('variable_delimiters')):
|
|
801
|
-
return False
|
|
802
|
-
else:
|
|
803
|
-
return True
|
|
804
|
-
|
|
805
|
-
def get_dependent_variable_names(self):
|
|
806
|
-
"""Get the names of variables on which this variable depends."""
|
|
807
|
-
return extract_variable_names(self.value,
|
|
808
|
-
CONFIG.get('variable_delimiters'))
|
|
809
|
-
|
|
810
|
-
def get_multiplicity(self, submission):
|
|
811
|
-
"""Get the value multiplicity of this variable for a given
|
|
812
|
-
submission.
|
|
813
|
-
|
|
814
|
-
TODO: this should first try to get multiplicity from values (as a
|
|
815
|
-
function of cmd group directory?)
|
|
816
|
-
|
|
817
|
-
"""
|
|
818
|
-
|
|
819
|
-
# First check if the variable is resolved.
|
|
820
|
-
|
|
821
|
-
var_values = {}
|
|
822
|
-
for i in self.variable_values:
|
|
823
|
-
if i.submission == submission:
|
|
824
|
-
if i.directory_value.value not in var_values:
|
|
825
|
-
var_values.update({i.directory_value.value: []})
|
|
826
|
-
var_values[i.directory_value.value].append(i)
|
|
827
|
-
|
|
828
|
-
var_lengths = {}
|
|
829
|
-
for directory_path, var_vals in var_values.items():
|
|
830
|
-
|
|
831
|
-
if var_vals:
|
|
832
|
-
var_length = len(var_vals)
|
|
833
|
-
|
|
834
|
-
else:
|
|
835
|
-
var_length = None
|
|
836
|
-
|
|
837
|
-
if self.data:
|
|
838
|
-
var_length = len(self.data)
|
|
839
|
-
|
|
840
|
-
elif self.file_regex:
|
|
841
|
-
|
|
842
|
-
if 'subset' in self.file_regex:
|
|
843
|
-
var_length = len(self.file_regex['subset'])
|
|
844
|
-
|
|
845
|
-
elif 'expected_multiplicity' in self.file_regex:
|
|
846
|
-
var_length = self.file_regex['expected_multiplicity']
|
|
847
|
-
|
|
848
|
-
elif self.file_contents:
|
|
849
|
-
|
|
850
|
-
if 'expected_multiplicity' in self.file_contents:
|
|
851
|
-
var_length = self.file_contents['expected_multiplicity']
|
|
852
|
-
|
|
853
|
-
elif self.is_base_variable():
|
|
854
|
-
var_length = 1
|
|
855
|
-
|
|
856
|
-
else:
|
|
857
|
-
raise ValueError('bad 3!')
|
|
858
|
-
|
|
859
|
-
var_lengths.update({directory_path: var_length})
|
|
860
|
-
|
|
861
|
-
return var_lengths
|
|
862
|
-
|
|
863
|
-
def get_values(self, directory):
|
|
864
|
-
"""Get the values of this variable.
|
|
865
|
-
|
|
866
|
-
TODO: refactor repeated code blocks.
|
|
867
|
-
|
|
868
|
-
Parameters
|
|
869
|
-
----------
|
|
870
|
-
directory : Path
|
|
871
|
-
Directory within which to resolve variable.
|
|
872
|
-
|
|
873
|
-
Raises
|
|
874
|
-
------
|
|
875
|
-
UnresolvedVariableError
|
|
876
|
-
If the variable...
|
|
877
|
-
|
|
878
|
-
"""
|
|
879
|
-
|
|
880
|
-
vals = []
|
|
881
|
-
|
|
882
|
-
if self.file_regex:
|
|
883
|
-
|
|
884
|
-
if self.file_regex.get('is_dir'):
|
|
885
|
-
|
|
886
|
-
for root, _, _ in os.walk(directory):
|
|
887
|
-
root_rel = Path(root).relative_to(directory).as_posix()
|
|
888
|
-
|
|
889
|
-
match = re.search(self.file_regex['pattern'], root_rel)
|
|
890
|
-
if match:
|
|
891
|
-
match_groups = match.groups()
|
|
892
|
-
if match_groups:
|
|
893
|
-
match = match_groups[self.file_regex['group']]
|
|
894
|
-
val_fmt = self.value.format(match)
|
|
895
|
-
vals.append(val_fmt)
|
|
896
|
-
|
|
897
|
-
else:
|
|
898
|
-
# Search files in the given directory
|
|
899
|
-
for i in directory.iterdir():
|
|
900
|
-
match = re.search(self.file_regex['pattern'], i.name)
|
|
901
|
-
if match:
|
|
902
|
-
match_groups = match.groups()
|
|
903
|
-
if match_groups:
|
|
904
|
-
match = match_groups[self.file_regex['group']]
|
|
905
|
-
val_fmt = self.value.format(match)
|
|
906
|
-
vals.append(val_fmt)
|
|
907
|
-
|
|
908
|
-
elif self.file_contents:
|
|
909
|
-
|
|
910
|
-
path = Path(directory).joinpath(self.file_contents['path'])
|
|
911
|
-
with path.open('r') as handle:
|
|
912
|
-
for i in handle.readlines():
|
|
913
|
-
vals.append(i.strip())
|
|
914
|
-
|
|
915
|
-
elif self.data:
|
|
916
|
-
for i in self.data:
|
|
917
|
-
vals.append(self.value.format(i))
|
|
918
|
-
|
|
919
|
-
else:
|
|
920
|
-
vals.append(self.value)
|
|
921
|
-
|
|
922
|
-
if not vals:
|
|
923
|
-
msg = ('Cannot resolve variable value with name: {}')
|
|
924
|
-
raise UnresolvedVariableError(msg.format(self.name))
|
|
925
|
-
|
|
926
|
-
vals = sorted(vals)
|
|
927
|
-
|
|
928
|
-
return vals
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
class Submission(Base):
|
|
932
|
-
"""Class to represent the submission of (part of) a workflow."""
|
|
933
|
-
|
|
934
|
-
__tablename__ = 'submission'
|
|
935
|
-
|
|
936
|
-
id_ = Column('id', Integer, primary_key=True)
|
|
937
|
-
order_id = Column(Integer)
|
|
938
|
-
workflow_id = Column(Integer, ForeignKey('workflow.id'))
|
|
939
|
-
submit_time = Column(DateTime)
|
|
940
|
-
alt_scratch_dir_name = Column(String(255), nullable=True)
|
|
941
|
-
|
|
942
|
-
workflow = relationship('Workflow', back_populates='submissions')
|
|
943
|
-
command_group_submissions = relationship(
|
|
944
|
-
'CommandGroupSubmission',
|
|
945
|
-
back_populates='submission',
|
|
946
|
-
order_by='CommandGroupSubmission.command_group_exec_order',
|
|
947
|
-
)
|
|
948
|
-
|
|
949
|
-
variable_values = relationship('VarValue', back_populates='submission')
|
|
950
|
-
|
|
951
|
-
def __init__(self, workflow, task_range):
|
|
952
|
-
|
|
953
|
-
self.submit_time = datetime.now()
|
|
954
|
-
self.order_id = len(workflow.submissions)
|
|
955
|
-
self.workflow = workflow
|
|
956
|
-
|
|
957
|
-
# print('Submission.__init__: task_range: {}'.format(task_range), flush=True)
|
|
958
|
-
|
|
959
|
-
self.resolve_variable_values(self.workflow.directory, self.first_iteration)
|
|
960
|
-
|
|
961
|
-
cg_subs = []
|
|
962
|
-
for i in self.workflow.command_groups:
|
|
963
|
-
task_range = [1, -1, 1] # TEMP
|
|
964
|
-
cg_sub = CommandGroupSubmission(i, self, task_range)
|
|
965
|
-
cg_subs.append(cg_sub)
|
|
966
|
-
|
|
967
|
-
session = Session.object_session(self)
|
|
968
|
-
session.commit()
|
|
969
|
-
|
|
970
|
-
# `SchedulerGroup`s must be generated after `CommandGroupSubmission`s and
|
|
971
|
-
# `resolve_variable_values`:
|
|
972
|
-
self._scheduler_groups = self.get_scheduler_groups()
|
|
973
|
-
|
|
974
|
-
if self.workflow.has_alternate_scratch:
|
|
975
|
-
self._make_alternate_scratch_dirs()
|
|
976
|
-
|
|
977
|
-
# `Task`s must be generated after `SchedulerGroup`s:
|
|
978
|
-
cg_sub_iters = []
|
|
979
|
-
for cg_sub in self.command_group_submissions:
|
|
980
|
-
|
|
981
|
-
for iteration in self.workflow.iterations:
|
|
982
|
-
|
|
983
|
-
if iteration.order_id > 0 and self.workflow.loop.get('groups'):
|
|
984
|
-
# For > first iteration, not all command groups need be run:
|
|
985
|
-
if cg_sub.command_group_exec_order not in self.workflow.loop['groups']:
|
|
986
|
-
continue
|
|
987
|
-
|
|
988
|
-
cg_sub_iter = CommandGroupSubmissionIteration(iteration, cg_sub)
|
|
989
|
-
cg_sub_iters.append(cg_sub_iter)
|
|
990
|
-
|
|
991
|
-
session.commit()
|
|
992
|
-
# `cg_sub_iter.num_outputs` requires all cg_sub_iters to be generated:
|
|
993
|
-
for cg_sub_iter in cg_sub_iters:
|
|
994
|
-
for task_num in range(cg_sub_iter.num_outputs):
|
|
995
|
-
Task(cg_sub_iter, task_num)
|
|
996
|
-
|
|
997
|
-
self.first_iteration.status = IterationStatus('active')
|
|
998
|
-
|
|
999
|
-
@reconstructor
|
|
1000
|
-
def init_on_load(self):
|
|
1001
|
-
self._scheduler_groups = self.get_scheduler_groups()
|
|
1002
|
-
|
|
1003
|
-
def _make_alternate_scratch_dirs(self):
|
|
1004
|
-
"""Create a new directory on each alternate scratch for this submission."""
|
|
1005
|
-
|
|
1006
|
-
alt_scratches = self.workflow.all_alternate_scratch
|
|
1007
|
-
|
|
1008
|
-
# Find a suitable alternate scratch directory name for this submission:
|
|
1009
|
-
count = 0
|
|
1010
|
-
MAX_COUNT = 10
|
|
1011
|
-
hex_length = 10
|
|
1012
|
-
alt_dirname = get_random_hex(hex_length)
|
|
1013
|
-
while True:
|
|
1014
|
-
if all([not i.joinpath(alt_dirname).exists() for i in alt_scratches]):
|
|
1015
|
-
break
|
|
1016
|
-
alt_dirname = get_random_hex(hex_length)
|
|
1017
|
-
count += 1
|
|
1018
|
-
if count > MAX_COUNT:
|
|
1019
|
-
msg = ('Could not find a suitable alternate scratch directory name '
|
|
1020
|
-
'in {} iterations.')
|
|
1021
|
-
raise RuntimeError(msg.format(MAX_COUNT))
|
|
1022
|
-
|
|
1023
|
-
# Make alternate scratch "root" directories:
|
|
1024
|
-
for alt_scratch in alt_scratches:
|
|
1025
|
-
alt_scratch_root = alt_scratch.joinpath(alt_dirname)
|
|
1026
|
-
alt_scratch_root.mkdir(parents=False, exist_ok=False)
|
|
1027
|
-
|
|
1028
|
-
self.alt_scratch_dir_name = alt_dirname
|
|
1029
|
-
|
|
1030
|
-
def get_working_directories(self, iteration):
|
|
1031
|
-
dirs = []
|
|
1032
|
-
for cg_sub in self.command_group_submissions:
|
|
1033
|
-
cg_sub_iter = cg_sub.get_command_group_submission_iteration(iteration)
|
|
1034
|
-
for i in cg_sub_iter.get_directories():
|
|
1035
|
-
if i not in dirs:
|
|
1036
|
-
dirs.append(i)
|
|
1037
|
-
return dirs
|
|
1038
|
-
|
|
1039
|
-
@property
|
|
1040
|
-
def first_iteration(self):
|
|
1041
|
-
return self.workflow.first_iteration
|
|
1042
|
-
|
|
1043
|
-
@property
|
|
1044
|
-
def scheduler_groups(self):
|
|
1045
|
-
return self._scheduler_groups
|
|
1046
|
-
|
|
1047
|
-
def get_scheduler_groups(self):
|
|
1048
|
-
"""Get scheduler groups for this workflow submission."""
|
|
1049
|
-
return SchedulerGroup.get_scheduler_groups(self)
|
|
1050
|
-
|
|
1051
|
-
def get_scheduler_group_index(self, command_group_submission):
|
|
1052
|
-
"""Get the position of a command group submission within the submission's
|
|
1053
|
-
scheduler groups.
|
|
1054
|
-
|
|
1055
|
-
Parameters
|
|
1056
|
-
----------
|
|
1057
|
-
command_group_submission : CommandGroupSubmission
|
|
1058
|
-
|
|
1059
|
-
Returns
|
|
1060
|
-
-------
|
|
1061
|
-
tuple (int, int)
|
|
1062
|
-
First integer identifies which scheduler group. Second integer identifies
|
|
1063
|
-
the relative position of the command group within the scheduler group.
|
|
1064
|
-
|
|
1065
|
-
"""
|
|
1066
|
-
|
|
1067
|
-
if command_group_submission not in self.command_group_submissions:
|
|
1068
|
-
msg = 'Command group submission {} is not part of the submission.'
|
|
1069
|
-
raise ValueError(msg.format(command_group_submission))
|
|
1070
|
-
|
|
1071
|
-
for i in self.scheduler_groups:
|
|
1072
|
-
if i.has(command_group_submission):
|
|
1073
|
-
return (i.order_id, i.index(command_group_submission))
|
|
1074
|
-
|
|
1075
|
-
msg = 'Command group submission {} is not part of the scheduler group.'
|
|
1076
|
-
raise ValueError(msg.format(command_group_submission))
|
|
1077
|
-
|
|
1078
|
-
def get_scheduler_group(self, command_group_submission):
|
|
1079
|
-
|
|
1080
|
-
sch_group_idx, _ = self.get_scheduler_group_index(command_group_submission)
|
|
1081
|
-
return self.scheduler_groups[sch_group_idx]
|
|
1082
|
-
|
|
1083
|
-
def is_variable_resolved(self, variable_definition, iteration, directory_var_val=None):
|
|
1084
|
-
"""Returns True if the passed variable_definition has been resolved
|
|
1085
|
-
for this Submission and iteration."""
|
|
1086
|
-
# Check the variable definition is part of the workflow:
|
|
1087
|
-
if variable_definition not in self.workflow.variable_definitions:
|
|
1088
|
-
msg = ('Passed variable_definition object is not in the '
|
|
1089
|
-
' workflow of this submission.')
|
|
1090
|
-
raise ValueError(msg)
|
|
1091
|
-
|
|
1092
|
-
for i in self.variable_values:
|
|
1093
|
-
if i.variable_definition == variable_definition:
|
|
1094
|
-
if i.iteration == iteration:
|
|
1095
|
-
if directory_var_val:
|
|
1096
|
-
if i.directory_value == directory_var_val:
|
|
1097
|
-
return True
|
|
1098
|
-
else:
|
|
1099
|
-
return True
|
|
1100
|
-
|
|
1101
|
-
return False
|
|
1102
|
-
|
|
1103
|
-
def resolve_variable_values(self, root_directory, iteration):
|
|
1104
|
-
"""Attempt to resolve as many variable values in the Workflow as
|
|
1105
|
-
possible."""
|
|
1106
|
-
|
|
1107
|
-
session = Session.object_session(self)
|
|
1108
|
-
|
|
1109
|
-
# Loop through CommandGroupSubmissions in order:
|
|
1110
|
-
for i in self.workflow.command_groups:
|
|
1111
|
-
|
|
1112
|
-
dir_var = i.directory_variable
|
|
1113
|
-
|
|
1114
|
-
# VarValues representing the resolved command group working directories:
|
|
1115
|
-
cg_dirs_var_vals = []
|
|
1116
|
-
cg_dirs_var_vals_other_val = []
|
|
1117
|
-
for j in dir_var.variable_values:
|
|
1118
|
-
if j.iteration == iteration:
|
|
1119
|
-
cg_dirs_var_vals.append(j)
|
|
1120
|
-
else:
|
|
1121
|
-
cg_dirs_var_vals_other_val.append(j.value)
|
|
1122
|
-
|
|
1123
|
-
if cg_dirs_var_vals:
|
|
1124
|
-
pass
|
|
1125
|
-
# print(('Submission.resolve_variable_values: found existing resolved '
|
|
1126
|
-
# 'directory variables: {}').format(cg_dirs_var_vals), flush=True)
|
|
1127
|
-
|
|
1128
|
-
else:
|
|
1129
|
-
|
|
1130
|
-
# print(('Submission.resolve_variable_values: trying to resolve directory '
|
|
1131
|
-
# 'variable values.'), flush=True)
|
|
1132
|
-
|
|
1133
|
-
# Directory variable has not yet been resolved; try:
|
|
1134
|
-
try:
|
|
1135
|
-
dir_var_vals_dat = dir_var.get_values(root_directory)
|
|
1136
|
-
# print(('Submission.resolve_variable_values: found directories with '
|
|
1137
|
-
# 'values: {}.'.format(dir_var_vals_dat)), flush=True)
|
|
1138
|
-
|
|
1139
|
-
except UnresolvedVariableError:
|
|
1140
|
-
# Move on to next command group:
|
|
1141
|
-
continue
|
|
1142
|
-
|
|
1143
|
-
dir_var_vals_dat_new = [
|
|
1144
|
-
j for j in dir_var_vals_dat
|
|
1145
|
-
if (j not in cg_dirs_var_vals_other_val or j == '.')
|
|
1146
|
-
]
|
|
1147
|
-
|
|
1148
|
-
# print(('Submission.resolve_variable_values: new directories are: '
|
|
1149
|
-
# '{}.'.format(dir_var_vals_dat_new)), flush=True)
|
|
1150
|
-
|
|
1151
|
-
# Add VarVals:
|
|
1152
|
-
for val_idx, val in enumerate(dir_var_vals_dat_new):
|
|
1153
|
-
cg_dirs_var_vals.append(
|
|
1154
|
-
VarValue(
|
|
1155
|
-
value=val,
|
|
1156
|
-
order_id=val_idx,
|
|
1157
|
-
var_definition=dir_var,
|
|
1158
|
-
submission=self,
|
|
1159
|
-
iteration=iteration,
|
|
1160
|
-
)
|
|
1161
|
-
)
|
|
1162
|
-
|
|
1163
|
-
var_defns_rec = i.variable_definitions_recursive
|
|
1164
|
-
|
|
1165
|
-
# print(('Submission.resolve_variable_values: cg_dirs_var_vals: '
|
|
1166
|
-
# '{}.'.format(cg_dirs_var_vals)), flush=True)
|
|
1167
|
-
|
|
1168
|
-
for j in cg_dirs_var_vals:
|
|
1169
|
-
|
|
1170
|
-
# print(('Submission.resolve_variable_values: dir var val: '
|
|
1171
|
-
# '{}.'.format(j)), flush=True)
|
|
1172
|
-
|
|
1173
|
-
var_vals_dat = resolve_variable_values(
|
|
1174
|
-
var_defns_rec,
|
|
1175
|
-
root_directory.joinpath(j.value)
|
|
1176
|
-
)
|
|
1177
|
-
|
|
1178
|
-
# print(('Submission.resolve_variable_values: var_vals_dat: '
|
|
1179
|
-
# '{}.'.format(var_vals_dat)), flush=True)
|
|
1180
|
-
|
|
1181
|
-
for k, v in var_vals_dat.items():
|
|
1182
|
-
|
|
1183
|
-
# print(('Submission.resolve_variable_values: var_vals_dat k: '
|
|
1184
|
-
# '{}; v: {}.'.format(k, v)), flush=True)
|
|
1185
|
-
|
|
1186
|
-
vals_dat = v['vals']
|
|
1187
|
-
var_defn = self.workflow.get_variable_definition_by_name(k)
|
|
1188
|
-
|
|
1189
|
-
# print(('Submission.resolve_variable_values: vals_dat '
|
|
1190
|
-
# '{}.'.format(vals_dat)), flush=True)
|
|
1191
|
-
# print(('Submission.resolve_variable_values: var_defn '
|
|
1192
|
-
# '{}.'.format(var_defn)), flush=True)
|
|
1193
|
-
|
|
1194
|
-
if not self.is_variable_resolved(var_defn, iteration, j):
|
|
1195
|
-
|
|
1196
|
-
# print(('Submission.resolve_variable_values: {} not resolved...'.format(
|
|
1197
|
-
# var_defn)), flush=True)
|
|
1198
|
-
|
|
1199
|
-
for val_idx, val in enumerate(vals_dat):
|
|
1200
|
-
|
|
1201
|
-
# print(('Submission.resolve_variable_values: val: {}...'.format(
|
|
1202
|
-
# val)), flush=True)
|
|
1203
|
-
|
|
1204
|
-
VarValue(
|
|
1205
|
-
value=val,
|
|
1206
|
-
order_id=val_idx,
|
|
1207
|
-
var_definition=var_defn,
|
|
1208
|
-
submission=self,
|
|
1209
|
-
iteration=iteration,
|
|
1210
|
-
directory_value=j
|
|
1211
|
-
)
|
|
1212
|
-
session.commit()
|
|
1213
|
-
|
|
1214
|
-
def write_submit_dirs(self, hf_dir):
|
|
1215
|
-
"""Write the directory structure necessary for this submission."""
|
|
1216
|
-
|
|
1217
|
-
# Ensure scheduler output and error directories exist, if specified:
|
|
1218
|
-
for cg_sub in self.command_group_submissions:
|
|
1219
|
-
root_dir = self.workflow.directory
|
|
1220
|
-
out_dir = root_dir.joinpath(cg_sub.command_group.scheduler.output_dir)
|
|
1221
|
-
err_dir = root_dir.joinpath(cg_sub.command_group.scheduler.error_dir)
|
|
1222
|
-
if not out_dir.is_dir():
|
|
1223
|
-
out_dir.mkdir()
|
|
1224
|
-
if not err_dir.is_dir():
|
|
1225
|
-
err_dir.mkdir()
|
|
1226
|
-
|
|
1227
|
-
# Make the workflow directory if it does not exist:
|
|
1228
|
-
wf_path = hf_dir.joinpath('workflow_{}'.format(self.workflow_id))
|
|
1229
|
-
if not wf_path.exists():
|
|
1230
|
-
wf_path.mkdir()
|
|
1231
|
-
|
|
1232
|
-
# Make the submit directory:
|
|
1233
|
-
submit_path = wf_path.joinpath('submit_{}'.format(self.order_id))
|
|
1234
|
-
submit_path.mkdir()
|
|
1235
|
-
|
|
1236
|
-
num_dir_vals_first_iter = {} # keys are cg_sub_idx
|
|
1237
|
-
for iteration in self.workflow.iterations:
|
|
1238
|
-
|
|
1239
|
-
# Make the iteration directory for each iteration:
|
|
1240
|
-
iter_path = submit_path.joinpath('iter_{}'.format(iteration.order_id))
|
|
1241
|
-
iter_path.mkdir()
|
|
1242
|
-
|
|
1243
|
-
for idx, i in enumerate(self.scheduler_groups):
|
|
1244
|
-
|
|
1245
|
-
max_num_tasks = i.get_max_num_tasks(self.first_iteration)
|
|
1246
|
-
step_size = i.get_step_size(self.first_iteration)
|
|
1247
|
-
|
|
1248
|
-
# Make the scheduler group directory for each scheduler group:
|
|
1249
|
-
sg_path = iter_path.joinpath('scheduler_group_{}'.format(idx))
|
|
1250
|
-
sg_path.mkdir()
|
|
1251
|
-
|
|
1252
|
-
# Loop through cmd groups in this scheduler group:
|
|
1253
|
-
for cg_sub_idx, cg_sub in enumerate(i.command_group_submissions):
|
|
1254
|
-
|
|
1255
|
-
cg_sub_iter = cg_sub.get_command_group_submission_iteration(iteration)
|
|
1256
|
-
if not cg_sub_iter:
|
|
1257
|
-
continue
|
|
1258
|
-
|
|
1259
|
-
cg_sub_first_iter = cg_sub.get_command_group_submission_iteration(
|
|
1260
|
-
self.first_iteration
|
|
1261
|
-
)
|
|
1262
|
-
num_dir_vals = cg_sub_first_iter.num_directories
|
|
1263
|
-
all_dir_slots = [''] * max_num_tasks
|
|
1264
|
-
|
|
1265
|
-
# Distribute dirs over num_dir_slots:
|
|
1266
|
-
for k in range(0, max_num_tasks, step_size[cg_sub_idx]):
|
|
1267
|
-
dir_idx = round((k / max_num_tasks) * num_dir_vals)
|
|
1268
|
-
all_dir_slots[k] = 'REPLACE_WITH_DIR_{}'.format(dir_idx)
|
|
1269
|
-
|
|
1270
|
-
wk_dirs_path = iter_path.joinpath('working_dirs_{}{}'.format(
|
|
1271
|
-
cg_sub.command_group_exec_order, CONFIG.get('working_dirs_file_ext')))
|
|
1272
|
-
|
|
1273
|
-
# Make the working directory template files for each cmd group:
|
|
1274
|
-
with wk_dirs_path.open('w') as handle:
|
|
1275
|
-
for dir_path in all_dir_slots:
|
|
1276
|
-
handle.write('{}\n'.format(dir_path))
|
|
1277
|
-
|
|
1278
|
-
# Make the variable values directories for each scheduler group:
|
|
1279
|
-
var_values_path = sg_path.joinpath('var_values')
|
|
1280
|
-
var_values_path.mkdir()
|
|
1281
|
-
for j in range(1, max_num_tasks + 1):
|
|
1282
|
-
j_fmt = zeropad(j, max_num_tasks)
|
|
1283
|
-
vv_j_path = var_values_path.joinpath(j_fmt)
|
|
1284
|
-
vv_j_path.mkdir()
|
|
1285
|
-
|
|
1286
|
-
def write_jobscripts(self, hf_dir):
|
|
1287
|
-
|
|
1288
|
-
wf_path = hf_dir.joinpath('workflow_{}'.format(self.workflow_id))
|
|
1289
|
-
submit_path = wf_path.joinpath('submit_{}'.format(self.order_id))
|
|
1290
|
-
js_paths = []
|
|
1291
|
-
js_stats_paths = []
|
|
1292
|
-
for cg_sub in self.command_group_submissions:
|
|
1293
|
-
js_paths_i = cg_sub.write_jobscript(dir_path=submit_path)
|
|
1294
|
-
js_paths.append(js_paths_i['jobscript'])
|
|
1295
|
-
js_stats_paths.append(js_paths_i['stats_jobscript'])
|
|
1296
|
-
|
|
1297
|
-
return js_paths, js_stats_paths
|
|
1298
|
-
|
|
1299
|
-
def submit_jobscripts(self, jobscript_paths):
|
|
1300
|
-
|
|
1301
|
-
loop_groups = self.workflow.loop['groups']
|
|
1302
|
-
cmd_group_idx = range(len(self.workflow.command_groups))
|
|
1303
|
-
|
|
1304
|
-
if loop_groups:
|
|
1305
|
-
|
|
1306
|
-
pre_loop_idx = [i for i in cmd_group_idx if i < min(loop_groups)]
|
|
1307
|
-
post_loop_idx = [i for i in cmd_group_idx if i > max(loop_groups)]
|
|
1308
|
-
|
|
1309
|
-
# List of tuples mapping jobscript path index (i.e. command group order id) to
|
|
1310
|
-
# iteration index:
|
|
1311
|
-
js_submissions = [(i, 0) for i in pre_loop_idx]
|
|
1312
|
-
|
|
1313
|
-
for iteration in self.workflow.iterations:
|
|
1314
|
-
for i in loop_groups:
|
|
1315
|
-
js_submissions.append((i, iteration.order_id))
|
|
1316
|
-
|
|
1317
|
-
for i in post_loop_idx:
|
|
1318
|
-
js_submissions.append((i, 0))
|
|
1319
|
-
|
|
1320
|
-
else:
|
|
1321
|
-
js_submissions = [(i, 0) for i in cmd_group_idx]
|
|
1322
|
-
|
|
1323
|
-
sumbit_cmd = os.getenv('HPCFLOW_QSUB_CMD', 'qsub')
|
|
1324
|
-
last_submit_id = None
|
|
1325
|
-
js_paths, js_stat_paths = jobscript_paths
|
|
1326
|
-
|
|
1327
|
-
for cg_sub_idx, iter_idx in js_submissions:
|
|
1328
|
-
|
|
1329
|
-
iter_idx_var = 'ITER_IDX={}'.format(iter_idx)
|
|
1330
|
-
cg_sub = self.command_group_submissions[cg_sub_idx]
|
|
1331
|
-
iteration = self.workflow.iterations[iter_idx]
|
|
1332
|
-
cg_sub_iter = cg_sub.get_command_group_submission_iteration(iteration)
|
|
1333
|
-
js_path_i, js_stat_path_i = js_paths[cg_sub_idx], js_stat_paths[cg_sub_idx]
|
|
1334
|
-
|
|
1335
|
-
qsub_cmd = [sumbit_cmd]
|
|
1336
|
-
|
|
1337
|
-
if last_submit_id:
|
|
1338
|
-
|
|
1339
|
-
# Add conditional submission:
|
|
1340
|
-
if iteration.order_id > 0:
|
|
1341
|
-
hold_arg = '-hold_jid'
|
|
1342
|
-
elif cg_sub.command_group.nesting == NestingType('hold'):
|
|
1343
|
-
hold_arg = '-hold_jid'
|
|
1344
|
-
else:
|
|
1345
|
-
hold_arg = '-hold_jid_ad'
|
|
1346
|
-
|
|
1347
|
-
qsub_cmd += [hold_arg, last_submit_id]
|
|
1348
|
-
|
|
1349
|
-
qsub_cmd += ['-v', iter_idx_var]
|
|
1350
|
-
qsub_cmd.append(str(js_path_i))
|
|
1351
|
-
|
|
1352
|
-
# Submit the jobscript:
|
|
1353
|
-
job_id_str = self.submit_jobscript(qsub_cmd, js_path_i, iteration)
|
|
1354
|
-
cg_sub_iter.scheduler_job_id = int(job_id_str)
|
|
1355
|
-
last_submit_id = job_id_str
|
|
1356
|
-
|
|
1357
|
-
# Submit the stats jobscript:
|
|
1358
|
-
if js_stat_path_i:
|
|
1359
|
-
st_cmd = [sumbit_cmd, '-hold_jid_ad', last_submit_id, '-v', iter_idx_var]
|
|
1360
|
-
st_cmd.append(str(js_stat_path_i))
|
|
1361
|
-
|
|
1362
|
-
job_id_str = self.submit_jobscript(st_cmd, js_stat_path_i, iteration)
|
|
1363
|
-
cg_sub_iter.scheduler_stats_job_id = int(job_id_str)
|
|
1364
|
-
last_submit_id = job_id_str
|
|
1365
|
-
|
|
1366
|
-
def submit_jobscript(self, cmd, js_path, iteration):
|
|
1367
|
-
|
|
1368
|
-
cwd = str(self.workflow.directory)
|
|
1369
|
-
proc = run(cmd, stdout=PIPE, stderr=PIPE, cwd=cwd)
|
|
1370
|
-
qsub_out = proc.stdout.decode().strip()
|
|
1371
|
-
qsub_err = proc.stderr.decode().strip()
|
|
1372
|
-
if qsub_out:
|
|
1373
|
-
print(qsub_out, flush=True)
|
|
1374
|
-
if qsub_err:
|
|
1375
|
-
print(qsub_err, flush=True)
|
|
1376
|
-
|
|
1377
|
-
# Extract newly submitted job ID:
|
|
1378
|
-
pattern = r'[0-9]+'
|
|
1379
|
-
job_id_search = re.search(pattern, qsub_out)
|
|
1380
|
-
try:
|
|
1381
|
-
job_id_str = job_id_search.group()
|
|
1382
|
-
except AttributeError:
|
|
1383
|
-
msg = ('Could not retrieve the job ID from the submitted jobscript '
|
|
1384
|
-
'found at {}. No more jobscripts will be submitted.')
|
|
1385
|
-
raise ValueError(msg.format(js_path))
|
|
1386
|
-
|
|
1387
|
-
return job_id_str
|
|
1388
|
-
|
|
1389
|
-
def get_stats(self, jsonable=True, datetime_dicts=False):
|
|
1390
|
-
"""Get task statistics for this submission."""
|
|
1391
|
-
out = {
|
|
1392
|
-
'submission_id': self.id_,
|
|
1393
|
-
'command_group_submissions': [
|
|
1394
|
-
i.get_stats(jsonable=jsonable, datetime_dicts=datetime_dicts)
|
|
1395
|
-
for i in self.command_group_submissions]
|
|
1396
|
-
}
|
|
1397
|
-
return out
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
class CommandGroupSubmission(Base):
|
|
1401
|
-
"""Class to represent the submission of a single command group."""
|
|
1402
|
-
|
|
1403
|
-
__tablename__ = 'command_group_submission'
|
|
1404
|
-
|
|
1405
|
-
id_ = Column('id', Integer, primary_key=True)
|
|
1406
|
-
command_group_id = Column(Integer, ForeignKey('command_group.id'))
|
|
1407
|
-
submission_id = Column(Integer, ForeignKey('submission.id'))
|
|
1408
|
-
task_start = Column(Integer)
|
|
1409
|
-
task_stop = Column(Integer)
|
|
1410
|
-
task_step = Column(Integer)
|
|
1411
|
-
commands_written = Column(Boolean)
|
|
1412
|
-
# _task_multiplicity = Column('task_multiplicity', Integer, nullable=True)
|
|
1413
|
-
|
|
1414
|
-
command_group = relationship('CommandGroup',
|
|
1415
|
-
back_populates='command_group_submissions')
|
|
1416
|
-
|
|
1417
|
-
submission = relationship('Submission', back_populates='command_group_submissions')
|
|
1418
|
-
|
|
1419
|
-
command_group_exec_order = deferred(
|
|
1420
|
-
select([CommandGroup.exec_order]).where(
|
|
1421
|
-
CommandGroup.id_ == command_group_id))
|
|
1422
|
-
|
|
1423
|
-
is_command_writing = relationship(
|
|
1424
|
-
'IsCommandWriting',
|
|
1425
|
-
uselist=False,
|
|
1426
|
-
cascade='all, delete, delete-orphan'
|
|
1427
|
-
)
|
|
1428
|
-
|
|
1429
|
-
command_group_submission_iterations = relationship(
|
|
1430
|
-
'CommandGroupSubmissionIteration',
|
|
1431
|
-
back_populates='command_group_submission',
|
|
1432
|
-
)
|
|
1433
|
-
|
|
1434
|
-
def __repr__(self):
|
|
1435
|
-
out = (
|
|
1436
|
-
'{}('
|
|
1437
|
-
'command_group={!r}, '
|
|
1438
|
-
'submission_id={!r})').format(
|
|
1439
|
-
self.__class__.__name__,
|
|
1440
|
-
self.command_group,
|
|
1441
|
-
self.submission_id,
|
|
1442
|
-
)
|
|
1443
|
-
return out
|
|
1444
|
-
|
|
1445
|
-
def __init__(self, command_group, submission, task_range):
|
|
1446
|
-
|
|
1447
|
-
# print('CommandGroupSubmission.__init__: task_range: {}'.format(task_range), flush=True)
|
|
1448
|
-
|
|
1449
|
-
self.command_group = command_group
|
|
1450
|
-
self.submission = submission
|
|
1451
|
-
self.task_range = task_range
|
|
1452
|
-
|
|
1453
|
-
@property
|
|
1454
|
-
def task_range(self):
|
|
1455
|
-
return (self.task_start, self.task_stop, self.task_step)
|
|
1456
|
-
|
|
1457
|
-
@task_range.setter
|
|
1458
|
-
def task_range(self, task_range):
|
|
1459
|
-
self.task_start = task_range[0]
|
|
1460
|
-
self.task_stop = task_range[1]
|
|
1461
|
-
if len(task_range) == 3:
|
|
1462
|
-
self.task_step = task_range[2]
|
|
1463
|
-
else:
|
|
1464
|
-
self.task_step = 1
|
|
1465
|
-
|
|
1466
|
-
@property
|
|
1467
|
-
def task_range_idx(self):
|
|
1468
|
-
return list(range(*self.task_range))
|
|
1469
|
-
|
|
1470
|
-
@property
|
|
1471
|
-
def variable_values(self):
|
|
1472
|
-
|
|
1473
|
-
var_values = []
|
|
1474
|
-
for i in self.command_group.variable_definitions:
|
|
1475
|
-
if i.variable_values:
|
|
1476
|
-
var_values.append(i)
|
|
1477
|
-
|
|
1478
|
-
return var_values
|
|
1479
|
-
|
|
1480
|
-
@property
|
|
1481
|
-
def num_submitted_tasks(self):
|
|
1482
|
-
"""Get the number of submitted tasks based on the task range.
|
|
1483
|
-
|
|
1484
|
-
Returns
|
|
1485
|
-
-------
|
|
1486
|
-
num : int
|
|
1487
|
-
If the number of tasks is as yet undetermined, `None` is returned.
|
|
1488
|
-
|
|
1489
|
-
"""
|
|
1490
|
-
|
|
1491
|
-
if self.task_stop == -1:
|
|
1492
|
-
return None
|
|
1493
|
-
|
|
1494
|
-
num = ceil((self.task_stop - (self.task_start - 1)) / self.task_step)
|
|
1495
|
-
|
|
1496
|
-
return num
|
|
1497
|
-
|
|
1498
|
-
@property
|
|
1499
|
-
def scheduler_group_index(self):
|
|
1500
|
-
"""Get the position of this command group submission within the submission's
|
|
1501
|
-
scheduler groups.
|
|
1502
|
-
|
|
1503
|
-
Returns
|
|
1504
|
-
-------
|
|
1505
|
-
tuple (int, int)
|
|
1506
|
-
First integer identifies which scheduler group. Second integer identifies
|
|
1507
|
-
the relative position of the command group within the scheduler group.
|
|
1508
|
-
|
|
1509
|
-
"""
|
|
1510
|
-
return self.submission.get_scheduler_group_index(self)
|
|
1511
|
-
|
|
1512
|
-
@property
|
|
1513
|
-
def scheduler_group(self):
|
|
1514
|
-
"""Get the scheduler group to which this command group belongs."""
|
|
1515
|
-
return self.submission.get_scheduler_group(self)
|
|
1516
|
-
|
|
1517
|
-
def get_command_group_submission_iteration(self, iteration):
|
|
1518
|
-
|
|
1519
|
-
for i in self.command_group_submission_iterations:
|
|
1520
|
-
if i.iteration == iteration:
|
|
1521
|
-
return i
|
|
1522
|
-
|
|
1523
|
-
@property
|
|
1524
|
-
def alternate_scratch_dir(self):
|
|
1525
|
-
if self.command_group.alternate_scratch:
|
|
1526
|
-
return self.command_group.alternate_scratch.joinpath(
|
|
1527
|
-
self.submission.alt_scratch_dir_name)
|
|
1528
|
-
else:
|
|
1529
|
-
return None
|
|
1530
|
-
|
|
1531
|
-
def get_var_definition_by_name(self, var_name):
|
|
1532
|
-
""""""
|
|
1533
|
-
|
|
1534
|
-
for i in self.command_group.var_definitions:
|
|
1535
|
-
if i.name == var_name:
|
|
1536
|
-
return i
|
|
1537
|
-
|
|
1538
|
-
def write_jobscript(self, dir_path):
|
|
1539
|
-
"""Write the jobscript."""
|
|
1540
|
-
|
|
1541
|
-
cg_sub_first_iter = self.get_command_group_submission_iteration(
|
|
1542
|
-
self.submission.first_iteration)
|
|
1543
|
-
|
|
1544
|
-
js_path = self.command_group.scheduler.write_jobscript(
|
|
1545
|
-
dir_path=dir_path,
|
|
1546
|
-
workflow_directory=self.submission.workflow.directory,
|
|
1547
|
-
command_group_order=self.command_group_exec_order,
|
|
1548
|
-
max_num_tasks=self.scheduler_group.get_max_num_tasks(
|
|
1549
|
-
self.submission.first_iteration),
|
|
1550
|
-
task_step_size=cg_sub_first_iter.step_size,
|
|
1551
|
-
environment=self.command_group.environment,
|
|
1552
|
-
archive=self.command_group.archive is not None,
|
|
1553
|
-
alternate_scratch_dir=self.alternate_scratch_dir,
|
|
1554
|
-
command_group_submission_id=self.id_,
|
|
1555
|
-
name=self.command_group.name,
|
|
1556
|
-
)
|
|
1557
|
-
|
|
1558
|
-
js_stats_path = None
|
|
1559
|
-
if self.command_group.stats:
|
|
1560
|
-
js_stats_path = self.command_group.scheduler.write_stats_jobscript(
|
|
1561
|
-
dir_path=dir_path,
|
|
1562
|
-
workflow_directory=self.submission.workflow.directory,
|
|
1563
|
-
command_group_order=self.command_group_exec_order,
|
|
1564
|
-
max_num_tasks=self.scheduler_group.get_max_num_tasks(
|
|
1565
|
-
self.submission.first_iteration),
|
|
1566
|
-
task_step_size=cg_sub_first_iter.step_size,
|
|
1567
|
-
command_group_submission_id=self.id_,
|
|
1568
|
-
name=self.command_group.stats_name,
|
|
1569
|
-
)
|
|
1570
|
-
|
|
1571
|
-
out = {
|
|
1572
|
-
'jobscript': js_path,
|
|
1573
|
-
'stats_jobscript': js_stats_path,
|
|
1574
|
-
}
|
|
1575
|
-
|
|
1576
|
-
return out
|
|
1577
|
-
|
|
1578
|
-
def write_runtime_files(self, project, task_idx, iter_idx):
|
|
1579
|
-
iteration = self.get_iteration(iter_idx)
|
|
1580
|
-
self.queue_write_command_file(project, task_idx, iteration)
|
|
1581
|
-
self.write_variable_files(project, task_idx, iteration)
|
|
1582
|
-
|
|
1583
|
-
def queue_write_command_file(self, project, task_idx, iteration):
|
|
1584
|
-
"""Ensure the command file for this command group submission is written, ready
|
|
1585
|
-
to be invoked by the jobscript, and also refresh the resolved variable values
|
|
1586
|
-
so that when the variable files are written, they are up to date."""
|
|
1587
|
-
|
|
1588
|
-
session = Session.object_session(self)
|
|
1589
|
-
|
|
1590
|
-
sleep_time = 5
|
|
1591
|
-
context = 'CommandGroupSubmission.write_cmd'
|
|
1592
|
-
block_msg = ('{{}} {}: Writing command file blocked. Sleeping for {} '
|
|
1593
|
-
'seconds'.format(context, sleep_time))
|
|
1594
|
-
unblock_msg = ('{{}} {}: Commands not written and writing available. Writing '
|
|
1595
|
-
'command file.'.format(context))
|
|
1596
|
-
written_msg = '{{}} {}: Command files already written.'.format(context)
|
|
1597
|
-
refresh_vals_msg = '{{}} {}: Refreshing resolved variable values.'.format(context)
|
|
1598
|
-
write_dirs_msg = ('{{}} {}: Writing working directory files for '
|
|
1599
|
-
'iteration {}').format(context, iteration)
|
|
1600
|
-
write_as_msg = ('{{}} {}: Writing alternate scratch exclusion list for '
|
|
1601
|
-
'task_idx {}.').format(context, task_idx)
|
|
1602
|
-
make_alt_msg = ('{{}} {}: Making alternate scratch working '
|
|
1603
|
-
'directories.'.format(context))
|
|
1604
|
-
|
|
1605
|
-
blocked = True
|
|
1606
|
-
while blocked:
|
|
1607
|
-
|
|
1608
|
-
try:
|
|
1609
|
-
session.refresh(self)
|
|
1610
|
-
except OperationalError:
|
|
1611
|
-
# Database is likely locked.
|
|
1612
|
-
print(block_msg.format(datetime.now()), flush=True)
|
|
1613
|
-
sleep(sleep_time)
|
|
1614
|
-
continue
|
|
1615
|
-
|
|
1616
|
-
if self.is_command_writing:
|
|
1617
|
-
print(block_msg.format(datetime.now()), flush=True)
|
|
1618
|
-
sleep(sleep_time)
|
|
1619
|
-
|
|
1620
|
-
else:
|
|
1621
|
-
try:
|
|
1622
|
-
self.is_command_writing = IsCommandWriting()
|
|
1623
|
-
session.commit()
|
|
1624
|
-
blocked = False
|
|
1625
|
-
|
|
1626
|
-
except IntegrityError:
|
|
1627
|
-
# Another process has already set `is_command_writing`
|
|
1628
|
-
session.rollback()
|
|
1629
|
-
print(block_msg.format(datetime.now()), flush=True)
|
|
1630
|
-
sleep(sleep_time)
|
|
1631
|
-
|
|
1632
|
-
except OperationalError:
|
|
1633
|
-
# Database is likely locked.
|
|
1634
|
-
session.rollback()
|
|
1635
|
-
print(block_msg.format(datetime.now()), flush=True)
|
|
1636
|
-
sleep(sleep_time)
|
|
1637
|
-
|
|
1638
|
-
if not blocked:
|
|
1639
|
-
|
|
1640
|
-
if iteration.status == IterationStatus('pending'):
|
|
1641
|
-
iteration.status = IterationStatus('active')
|
|
1642
|
-
|
|
1643
|
-
# This needs to happen once *per task* per CGS:
|
|
1644
|
-
print(refresh_vals_msg.format(datetime.now()), flush=True)
|
|
1645
|
-
self.submission.resolve_variable_values(project.dir_path, iteration)
|
|
1646
|
-
|
|
1647
|
-
# This needs to happen once *per task* per CGS (if it has AS):
|
|
1648
|
-
if self.command_group.alternate_scratch:
|
|
1649
|
-
print(write_as_msg.format(datetime.now()), flush=True)
|
|
1650
|
-
task = self.get_task(task_idx, iteration)
|
|
1651
|
-
self.write_alt_scratch_exclusion_list(project, task, iteration)
|
|
1652
|
-
|
|
1653
|
-
cg_sub_iter = self.get_command_group_submission_iteration(iteration)
|
|
1654
|
-
if not cg_sub_iter.working_dirs_written:
|
|
1655
|
-
|
|
1656
|
-
# These need to happen once *per iteration* per CGS:
|
|
1657
|
-
|
|
1658
|
-
print(write_dirs_msg.format(datetime.now()), flush=True)
|
|
1659
|
-
cg_sub_iter.write_working_directories(project)
|
|
1660
|
-
|
|
1661
|
-
if self.command_group.alternate_scratch:
|
|
1662
|
-
print(make_alt_msg.format(datetime.now()), flush=True)
|
|
1663
|
-
self.make_alternate_scratch_dirs(project, iteration)
|
|
1664
|
-
|
|
1665
|
-
cg_sub_iter.working_dirs_written = True
|
|
1666
|
-
|
|
1667
|
-
if not self.commands_written:
|
|
1668
|
-
# This needs to happen once per CGS:
|
|
1669
|
-
print(unblock_msg.format(datetime.now()), flush=True)
|
|
1670
|
-
self.write_command_file(project)
|
|
1671
|
-
self.commands_written = True
|
|
1672
|
-
else:
|
|
1673
|
-
print(written_msg.format(datetime.now()), flush=True)
|
|
1674
|
-
|
|
1675
|
-
self.is_command_writing = None
|
|
1676
|
-
session.commit()
|
|
1677
|
-
|
|
1678
|
-
def write_variable_files(self, project, task_idx, iteration):
|
|
1679
|
-
|
|
1680
|
-
task = self.get_task(task_idx, iteration)
|
|
1681
|
-
var_vals_normed = task.get_variable_values_normed()
|
|
1682
|
-
|
|
1683
|
-
print('CGS.write_variable_files: task: {}'.format(task), flush=True)
|
|
1684
|
-
print('CGS.write_variable_files: var_vals_normed: {}'.format(
|
|
1685
|
-
var_vals_normed), flush=True)
|
|
1686
|
-
|
|
1687
|
-
max_num_tasks = self.scheduler_group.get_max_num_tasks(
|
|
1688
|
-
self.submission.first_iteration)
|
|
1689
|
-
|
|
1690
|
-
var_values_task_dir = project.hf_dir.joinpath(
|
|
1691
|
-
'workflow_{}'.format(self.submission.workflow.id_),
|
|
1692
|
-
'submit_{}'.format(self.submission.order_id),
|
|
1693
|
-
'iter_{}'.format(iteration.order_id),
|
|
1694
|
-
'scheduler_group_{}'.format(self.scheduler_group_index[0]),
|
|
1695
|
-
'var_values',
|
|
1696
|
-
zeropad(task.scheduler_id, max_num_tasks),
|
|
1697
|
-
)
|
|
1698
|
-
|
|
1699
|
-
for var_name, var_val_all in var_vals_normed.items():
|
|
1700
|
-
var_fn = 'var_{}{}'.format(var_name, CONFIG.get('variable_file_ext'))
|
|
1701
|
-
var_file_path = var_values_task_dir.joinpath(var_fn)
|
|
1702
|
-
with var_file_path.open('w') as handle:
|
|
1703
|
-
for i in var_val_all:
|
|
1704
|
-
handle.write('{}\n'.format(i))
|
|
1705
|
-
|
|
1706
|
-
@staticmethod
|
|
1707
|
-
def get_formatted_commands(commands, num_cores, parallel_modes, indent=''):
|
|
1708
|
-
|
|
1709
|
-
# TODO: what about parallel mode env?
|
|
1710
|
-
delims = CONFIG.get('variable_delimiters')
|
|
1711
|
-
lns_cmd = []
|
|
1712
|
-
for i in commands:
|
|
1713
|
-
if 'line' in i:
|
|
1714
|
-
cmd_ln = indent
|
|
1715
|
-
para_mode = i.get('parallel_mode')
|
|
1716
|
-
if para_mode:
|
|
1717
|
-
para_mode_config = parallel_modes.get(
|
|
1718
|
-
para_mode.lower()) # todo raise on miss
|
|
1719
|
-
para_command = para_mode_config.get('command')
|
|
1720
|
-
if para_command:
|
|
1721
|
-
cmd_ln += para_command.replace('<<num_cores>>', num_cores) + ' '
|
|
1722
|
-
line = i['line']
|
|
1723
|
-
for var_name in extract_variable_names(line, delims):
|
|
1724
|
-
line = line.replace(delims[0] + var_name + delims[1], f'${var_name}')
|
|
1725
|
-
cmd_ln += line
|
|
1726
|
-
lns_cmd.append(cmd_ln)
|
|
1727
|
-
elif 'subshell' in i:
|
|
1728
|
-
sub_cmds = CommandGroupSubmission.get_formatted_commands(
|
|
1729
|
-
i['subshell'],
|
|
1730
|
-
num_cores,
|
|
1731
|
-
parallel_modes,
|
|
1732
|
-
indent=(indent+'\t'),
|
|
1733
|
-
)
|
|
1734
|
-
lns_cmd.extend([f'{indent}('] + sub_cmds + [f'{indent})'])
|
|
1735
|
-
|
|
1736
|
-
return lns_cmd
|
|
1737
|
-
|
|
1738
|
-
def write_command_file(self, project):
|
|
1739
|
-
|
|
1740
|
-
lns_cmd = self.get_formatted_commands(
|
|
1741
|
-
self.command_group.commands,
|
|
1742
|
-
num_cores=self.command_group.scheduler.NUM_CORES_VAR,
|
|
1743
|
-
parallel_modes=self.command_group.workflow.parallel_modes,
|
|
1744
|
-
indent=('\t' if self.command_group.variable_definitions else ''),
|
|
1745
|
-
)
|
|
1746
|
-
|
|
1747
|
-
lns_while_start = ['while true', 'do']
|
|
1748
|
-
lns_while_end = ['done \\']
|
|
1749
|
-
|
|
1750
|
-
dt_stamp = datetime.now().strftime(r'%Y.%m.%d at %H:%M:%S')
|
|
1751
|
-
about_msg = ['# --- commands file generated by `hpcflow` (version: {}) '
|
|
1752
|
-
'on {} ---'.format(__version__, dt_stamp)]
|
|
1753
|
-
|
|
1754
|
-
max_num_tasks = self.scheduler_group.get_max_num_tasks(
|
|
1755
|
-
self.submission.first_iteration)
|
|
1756
|
-
|
|
1757
|
-
lns_task_id_pad = [
|
|
1758
|
-
'MAX_NUM_TASKS={}'.format(max_num_tasks),
|
|
1759
|
-
'MAX_NUM_DIGITS="${#MAX_NUM_TASKS}"',
|
|
1760
|
-
'ZEROPAD_TASK_ID=$(printf "%0${MAX_NUM_DIGITS}d" $SGE_TASK_ID)',
|
|
1761
|
-
]
|
|
1762
|
-
|
|
1763
|
-
lns_read = []
|
|
1764
|
-
lns_fds = []
|
|
1765
|
-
|
|
1766
|
-
for idx, i in enumerate(self.command_group.variable_definitions):
|
|
1767
|
-
|
|
1768
|
-
fd_idx = idx + 3
|
|
1769
|
-
|
|
1770
|
-
var_fn = 'var_{}{}'.format(i.name, CONFIG.get('variable_file_ext'))
|
|
1771
|
-
var_file_path = ('$ITER_DIR/scheduler_group_{}/var_values'
|
|
1772
|
-
'/$ZEROPAD_TASK_ID/{}').format(
|
|
1773
|
-
self.scheduler_group_index[0], var_fn)
|
|
1774
|
-
|
|
1775
|
-
lns_read.append('\tread -u{} {} || break'.format(fd_idx, i.name))
|
|
1776
|
-
|
|
1777
|
-
if idx > 0:
|
|
1778
|
-
lns_fds[-1] += ' \\'
|
|
1779
|
-
|
|
1780
|
-
lns_fds.append('\t{}< {}'.format(fd_idx, var_file_path))
|
|
1781
|
-
|
|
1782
|
-
lns_cmd_print = ['printf "Running command: \\"{}\\"\\n" >> $LOG_PATH 2>&1'.format(
|
|
1783
|
-
i.strip('\t').replace('"', r'\\\\\"')) for i in lns_cmd]
|
|
1784
|
-
|
|
1785
|
-
if self.command_group.variable_definitions:
|
|
1786
|
-
lns_cmd_print = ['\t{}'.format(i) for i in lns_cmd_print]
|
|
1787
|
-
cmd_lns = (about_msg + [''] +
|
|
1788
|
-
lns_task_id_pad + [''] +
|
|
1789
|
-
lns_while_start + [''] +
|
|
1790
|
-
lns_read + [''] +
|
|
1791
|
-
lns_cmd_print + [''] +
|
|
1792
|
-
lns_cmd + [''] +
|
|
1793
|
-
lns_while_end +
|
|
1794
|
-
lns_fds + [''])
|
|
1795
|
-
else:
|
|
1796
|
-
cmd_lns = (about_msg + [''] +
|
|
1797
|
-
lns_cmd_print + [''] +
|
|
1798
|
-
lns_cmd + [''])
|
|
1799
|
-
|
|
1800
|
-
cmd_lns = '\n'.join(cmd_lns)
|
|
1801
|
-
|
|
1802
|
-
cmd_path = project.hf_dir.joinpath(
|
|
1803
|
-
'workflow_{}'.format(self.submission.workflow.id_),
|
|
1804
|
-
'submit_{}'.format(self.submission.order_id),
|
|
1805
|
-
'cmd_{}{}'.format(self.command_group_exec_order, CONFIG.get('jobscript_ext')),
|
|
1806
|
-
)
|
|
1807
|
-
with cmd_path.open('w') as handle:
|
|
1808
|
-
handle.write(cmd_lns)
|
|
1809
|
-
|
|
1810
|
-
def write_alt_scratch_exclusion_list(self, project, task, iteration):
|
|
1811
|
-
"""Write alternate scratch exclusion files (for e.g. rsync)"""
|
|
1812
|
-
|
|
1813
|
-
# List of Paths to exclude, relative to `self.submission.workflow.directory`:
|
|
1814
|
-
excluded_paths = [
|
|
1815
|
-
Path(CONFIG.get('hpcflow_directory'))] + self.submission.workflow.profile_files
|
|
1816
|
-
|
|
1817
|
-
out_dir = Path(self.command_group.scheduler.output_dir)
|
|
1818
|
-
err_dir = Path(self.command_group.scheduler.error_dir)
|
|
1819
|
-
if out_dir not in excluded_paths:
|
|
1820
|
-
excluded_paths.append(out_dir)
|
|
1821
|
-
if err_dir not in excluded_paths:
|
|
1822
|
-
excluded_paths.append(err_dir)
|
|
1823
|
-
|
|
1824
|
-
working_dir_path = Path(task.get_working_directory_value())
|
|
1825
|
-
alt_scratch_exclusions = []
|
|
1826
|
-
for exc_path in excluded_paths:
|
|
1827
|
-
try:
|
|
1828
|
-
exc_path.relative_to(working_dir_path)
|
|
1829
|
-
except ValueError:
|
|
1830
|
-
continue
|
|
1831
|
-
alt_scratch_exclusions.append(exc_path)
|
|
1832
|
-
|
|
1833
|
-
exc_list_path = project.hf_dir.joinpath(
|
|
1834
|
-
'workflow_{}'.format(self.submission.workflow.id_),
|
|
1835
|
-
'submit_{}'.format(self.submission.order_id),
|
|
1836
|
-
'iter_{}'.format(iteration.order_id),
|
|
1837
|
-
'{}_{}_{}{}'.format(
|
|
1838
|
-
CONFIG.get('alt_scratch_exc_file'),
|
|
1839
|
-
self.command_group_exec_order,
|
|
1840
|
-
task.order_id,
|
|
1841
|
-
CONFIG.get('alt_scratch_exc_file_ext'),
|
|
1842
|
-
),
|
|
1843
|
-
)
|
|
1844
|
-
|
|
1845
|
-
working_dir_abs = self.submission.workflow.directory.joinpath(working_dir_path)
|
|
1846
|
-
about = (
|
|
1847
|
-
'# Alternate scratch exclusion list. Patterns are relative '
|
|
1848
|
-
'to task #{} working directory:\n'
|
|
1849
|
-
'# "{}"\n\n'
|
|
1850
|
-
)
|
|
1851
|
-
with exc_list_path.open('w') as handle:
|
|
1852
|
-
handle.write(about.format(task.order_id, working_dir_abs))
|
|
1853
|
-
for exc_path in alt_scratch_exclusions:
|
|
1854
|
-
handle.write(str(exc_path) + '\n')
|
|
1855
|
-
|
|
1856
|
-
def make_alternate_scratch_dirs(self, project, iteration):
|
|
1857
|
-
"""Generate task working directories on the alternate scratch."""
|
|
1858
|
-
|
|
1859
|
-
# Get task working directories:
|
|
1860
|
-
cg_sub_iter = self.get_command_group_submission_iteration(iteration)
|
|
1861
|
-
working_dirs = [task.get_working_directory() for task in cg_sub_iter.tasks]
|
|
1862
|
-
|
|
1863
|
-
alt_scratch_root = self.command_group.alternate_scratch.joinpath(
|
|
1864
|
-
self.submission.alt_scratch_dir_name)
|
|
1865
|
-
|
|
1866
|
-
for working_dir in working_dirs:
|
|
1867
|
-
if working_dir.value == '.':
|
|
1868
|
-
# Already made "root" dir.
|
|
1869
|
-
continue
|
|
1870
|
-
alt_scratch_w_dir = alt_scratch_root.joinpath(working_dir.value)
|
|
1871
|
-
alt_scratch_w_dir.mkdir(parents=True, exist_ok=True)
|
|
1872
|
-
|
|
1873
|
-
def get_iteration(self, iter_idx):
|
|
1874
|
-
for i in self.submission.workflow.iterations:
|
|
1875
|
-
if i.order_id == iter_idx:
|
|
1876
|
-
return i
|
|
1877
|
-
|
|
1878
|
-
def get_task(self, task_idx, iteration):
|
|
1879
|
-
cg_sub_iter = self.get_command_group_submission_iteration(iteration)
|
|
1880
|
-
for i in cg_sub_iter.tasks:
|
|
1881
|
-
if i.order_id == task_idx and i.iteration == iteration:
|
|
1882
|
-
return i
|
|
1883
|
-
|
|
1884
|
-
def set_task_start(self, task_idx, iter_idx):
|
|
1885
|
-
context = 'CommandGroupSubmission.set_task_start'
|
|
1886
|
-
msg = '{{}} {}: Task index {} started.'.format(context, task_idx)
|
|
1887
|
-
start_time = datetime.now()
|
|
1888
|
-
print(msg.format(start_time), flush=True)
|
|
1889
|
-
iteration = self.get_iteration(iter_idx)
|
|
1890
|
-
task = self.get_task(task_idx, iteration)
|
|
1891
|
-
task.start_time = start_time
|
|
1892
|
-
print('task: {}'.format(task))
|
|
1893
|
-
|
|
1894
|
-
def set_task_end(self, task_idx, iter_idx):
|
|
1895
|
-
context = 'CommandGroupSubmission.set_task_end'
|
|
1896
|
-
msg = '{{}} {}: Task index {} ended.'.format(context, task_idx)
|
|
1897
|
-
end_time = datetime.now()
|
|
1898
|
-
print(msg.format(end_time), flush=True)
|
|
1899
|
-
iteration = self.get_iteration(iter_idx)
|
|
1900
|
-
task = self.get_task(task_idx, iteration)
|
|
1901
|
-
task.end_time = end_time
|
|
1902
|
-
print('task: {}'.format(task))
|
|
1903
|
-
|
|
1904
|
-
def do_archive(self, task_idx, iter_idx):
|
|
1905
|
-
"""Archive the working directory associated with a given task in this command
|
|
1906
|
-
group submission."""
|
|
1907
|
-
|
|
1908
|
-
# Adding a small delay increases the chance that `Task.is_archive_required` will
|
|
1909
|
-
# be False (and so save some time overall), in the case where all tasks start at
|
|
1910
|
-
# roughly the same time:
|
|
1911
|
-
sleep(10)
|
|
1912
|
-
|
|
1913
|
-
iteration = self.get_iteration(iter_idx)
|
|
1914
|
-
task = self.get_task(task_idx, iteration)
|
|
1915
|
-
self.command_group.archive.execute_with_lock(task)
|
|
1916
|
-
|
|
1917
|
-
def get_stats(self, jsonable=True, datetime_dicts=False):
|
|
1918
|
-
"""Get task statistics for this command group submission."""
|
|
1919
|
-
out = {
|
|
1920
|
-
'command_group_submission_id': self.id_,
|
|
1921
|
-
'command_group_id': self.command_group.id_,
|
|
1922
|
-
'commands': self.command_group.commands,
|
|
1923
|
-
'name': self.command_group.name,
|
|
1924
|
-
'tasks': [task.get_stats(jsonable=jsonable, datetime_dicts=datetime_dicts)
|
|
1925
|
-
for cgsub_iter in self.command_group_submission_iterations
|
|
1926
|
-
for task in cgsub_iter.tasks
|
|
1927
|
-
if task.iteration.status != IterationStatus('pending')]
|
|
1928
|
-
}
|
|
1929
|
-
return out
|
|
1930
|
-
|
|
1931
|
-
def get_scheduler_stats(self, task_idx, iter_idx):
|
|
1932
|
-
|
|
1933
|
-
# Get scheduler job ID and scheduler task ID:
|
|
1934
|
-
iteration = self.get_iteration(iter_idx)
|
|
1935
|
-
cg_sub_iter = self.get_command_group_submission_iteration(iteration)
|
|
1936
|
-
scheduler_job_id = cg_sub_iter.scheduler_job_id
|
|
1937
|
-
task = self.get_task(task_idx, iteration)
|
|
1938
|
-
task_id = task.scheduler_id
|
|
1939
|
-
|
|
1940
|
-
info = self.command_group.scheduler.get_scheduler_stats(scheduler_job_id, task_id)
|
|
1941
|
-
|
|
1942
|
-
if 'MB' in info['maxvmem']:
|
|
1943
|
-
maxvmem = float(info['maxvmem'].split('MB')[0])
|
|
1944
|
-
elif 'GB' in info['maxvmem']:
|
|
1945
|
-
maxvmem = float(info['maxvmem'].split('GB')[0])
|
|
1946
|
-
hostname = info['hostname']
|
|
1947
|
-
wallclock = int(info['ru_wallclock'].split('s')[0])
|
|
1948
|
-
|
|
1949
|
-
task.memory = maxvmem
|
|
1950
|
-
task.hostname = hostname
|
|
1951
|
-
task.wallclock = wallclock
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
class VarValue(Base):
|
|
1955
|
-
"""Class to represent the evaluated value of a variable."""
|
|
1956
|
-
|
|
1957
|
-
__tablename__ = 'var_value'
|
|
1958
|
-
|
|
1959
|
-
id_ = Column('id', Integer, primary_key=True)
|
|
1960
|
-
var_definition_id = Column(
|
|
1961
|
-
Integer,
|
|
1962
|
-
ForeignKey('var_definition.id'),
|
|
1963
|
-
)
|
|
1964
|
-
submission_id = Column(Integer, ForeignKey('submission.id'))
|
|
1965
|
-
value = Column(String(255))
|
|
1966
|
-
order_id = Column(Integer)
|
|
1967
|
-
directory_value_id = Column('directory_value_id', Integer, ForeignKey('var_value.id'))
|
|
1968
|
-
iteration_id = Column(Integer, ForeignKey('iteration.id'))
|
|
1969
|
-
|
|
1970
|
-
variable_definition = relationship('VarDefinition', back_populates='variable_values')
|
|
1971
|
-
submission = relationship('Submission', back_populates='variable_values')
|
|
1972
|
-
directory_value = relationship('VarValue', uselist=False, remote_side=id_)
|
|
1973
|
-
iteration = relationship('Iteration', uselist=False)
|
|
1974
|
-
|
|
1975
|
-
def __init__(self, value, order_id, var_definition, submission, iteration,
|
|
1976
|
-
directory_value=None):
|
|
1977
|
-
|
|
1978
|
-
self.value = value
|
|
1979
|
-
self.order_id = order_id
|
|
1980
|
-
self.iteration = iteration
|
|
1981
|
-
self.variable_definition = var_definition
|
|
1982
|
-
self.submission = submission
|
|
1983
|
-
self.directory_value = directory_value
|
|
1984
|
-
|
|
1985
|
-
def __repr__(self):
|
|
1986
|
-
out = (
|
|
1987
|
-
'{}('
|
|
1988
|
-
'variable_name={}, '
|
|
1989
|
-
'value={}, '
|
|
1990
|
-
'order_id={}, '
|
|
1991
|
-
'iteration={}, '
|
|
1992
|
-
'directory={}'
|
|
1993
|
-
')').format(
|
|
1994
|
-
self.__class__.__name__,
|
|
1995
|
-
self.variable_definition.name,
|
|
1996
|
-
self.value,
|
|
1997
|
-
self.order_id,
|
|
1998
|
-
self.iteration,
|
|
1999
|
-
self.directory_value.value if self.directory_value else None,
|
|
2000
|
-
)
|
|
2001
|
-
return out
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
class IsCommandWriting(Base):
|
|
2005
|
-
"""Class to represent active writing of a command file."""
|
|
2006
|
-
|
|
2007
|
-
__tablename__ = 'is_command_writing'
|
|
2008
|
-
|
|
2009
|
-
command_group_submission_id = Column(
|
|
2010
|
-
Integer,
|
|
2011
|
-
ForeignKey('command_group_submission.id'),
|
|
2012
|
-
primary_key=True,
|
|
2013
|
-
unique=True
|
|
2014
|
-
)
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
class Task(Base):
|
|
2018
|
-
"""Class to represent a single task."""
|
|
2019
|
-
|
|
2020
|
-
__tablename__ = 'task'
|
|
2021
|
-
|
|
2022
|
-
id_ = Column('id', Integer, primary_key=True)
|
|
2023
|
-
order_id = Column(Integer, nullable=False)
|
|
2024
|
-
start_time = Column(DateTime)
|
|
2025
|
-
end_time = Column(DateTime)
|
|
2026
|
-
memory = Column(Float)
|
|
2027
|
-
hostname = Column(String(255))
|
|
2028
|
-
wallclock = Column(Integer)
|
|
2029
|
-
archive_status = Column(Enum(TaskArchiveStatus), nullable=True)
|
|
2030
|
-
_archive_start_time = Column('archive_start_time', DateTime, nullable=True)
|
|
2031
|
-
_archive_end_time = Column('archive_end_time', DateTime, nullable=True)
|
|
2032
|
-
archived_task_id = Column(Integer, ForeignKey('task.id'), nullable=True)
|
|
2033
|
-
|
|
2034
|
-
command_group_submission_iteration_id = Column(
|
|
2035
|
-
Integer, ForeignKey('command_group_submission_iteration.id'))
|
|
2036
|
-
|
|
2037
|
-
command_group_submission_iteration = relationship(
|
|
2038
|
-
'CommandGroupSubmissionIteration', back_populates='tasks', uselist=False)
|
|
2039
|
-
|
|
2040
|
-
archived_task = relationship('Task', uselist=False, remote_side=id_)
|
|
2041
|
-
|
|
2042
|
-
def __init__(self, command_group_submission_iteration, order_id):
|
|
2043
|
-
self.order_id = order_id
|
|
2044
|
-
self.command_group_submission_iteration = command_group_submission_iteration
|
|
2045
|
-
self.start_time = None
|
|
2046
|
-
self.end_time = None
|
|
2047
|
-
|
|
2048
|
-
if self.command_group_submission_iteration.command_group_submission.command_group.archive:
|
|
2049
|
-
self.archive_status = TaskArchiveStatus('pending')
|
|
2050
|
-
|
|
2051
|
-
@property
|
|
2052
|
-
def iteration(self):
|
|
2053
|
-
return self.command_group_submission_iteration.iteration
|
|
2054
|
-
|
|
2055
|
-
def __repr__(self):
|
|
2056
|
-
out = (
|
|
2057
|
-
'{}('
|
|
2058
|
-
'order_id={}, '
|
|
2059
|
-
'command_group_submission_iteration_id={}, '
|
|
2060
|
-
'start_time={}, '
|
|
2061
|
-
'end_time={}'
|
|
2062
|
-
')').format(
|
|
2063
|
-
self.__class__.__name__,
|
|
2064
|
-
self.order_id,
|
|
2065
|
-
self.command_group_submission_iteration_id,
|
|
2066
|
-
self.start_time,
|
|
2067
|
-
self.end_time,
|
|
2068
|
-
)
|
|
2069
|
-
return out
|
|
2070
|
-
|
|
2071
|
-
@property
|
|
2072
|
-
def duration(self):
|
|
2073
|
-
if self.start_time and self.end_time:
|
|
2074
|
-
return self.end_time - self.start_time
|
|
2075
|
-
else:
|
|
2076
|
-
return None
|
|
2077
|
-
|
|
2078
|
-
@property
|
|
2079
|
-
def scheduler_id(self):
|
|
2080
|
-
"""Get the task ID, as understood by the scheduler."""
|
|
2081
|
-
num_tasks = self.command_group_submission_iteration.num_outputs
|
|
2082
|
-
step_size = self.command_group_submission_iteration.step_size
|
|
2083
|
-
scheduler_range = range(1, 1 + (num_tasks * step_size), step_size)
|
|
2084
|
-
scheduler_id = scheduler_range[self.order_id]
|
|
2085
|
-
|
|
2086
|
-
return scheduler_id
|
|
2087
|
-
|
|
2088
|
-
@property
|
|
2089
|
-
def archive_start_time(self):
|
|
2090
|
-
if self.archived_task:
|
|
2091
|
-
# Archive for this task was handled by another task with the same working dir:
|
|
2092
|
-
return self.archived_task.archive_start_time
|
|
2093
|
-
else:
|
|
2094
|
-
return self._archive_start_time
|
|
2095
|
-
|
|
2096
|
-
@archive_start_time.setter
|
|
2097
|
-
def archive_start_time(self, start_time):
|
|
2098
|
-
self._archive_start_time = start_time
|
|
2099
|
-
|
|
2100
|
-
@property
|
|
2101
|
-
def archive_end_time(self):
|
|
2102
|
-
if self.archived_task:
|
|
2103
|
-
# Archive for this task was handled by another task with the same working dir:
|
|
2104
|
-
return self.archived_task.archive_end_time
|
|
2105
|
-
else:
|
|
2106
|
-
return self._archive_end_time
|
|
2107
|
-
|
|
2108
|
-
@archive_end_time.setter
|
|
2109
|
-
def archive_end_time(self, end_time):
|
|
2110
|
-
self._archive_end_time = end_time
|
|
2111
|
-
|
|
2112
|
-
@property
|
|
2113
|
-
def archive_duration(self):
|
|
2114
|
-
if self.archive_start_time and self.archive_end_time:
|
|
2115
|
-
return self.archive_end_time - self.archive_start_time
|
|
2116
|
-
else:
|
|
2117
|
-
return None
|
|
2118
|
-
|
|
2119
|
-
def get_working_directory(self):
|
|
2120
|
-
"""Get the "working directory" of this task."""
|
|
2121
|
-
dir_vals = self.command_group_submission_iteration.get_directories()
|
|
2122
|
-
dirs_per_task = len(dir_vals) / \
|
|
2123
|
-
self.command_group_submission_iteration.num_outputs
|
|
2124
|
-
dir_idx = floor(self.order_id * dirs_per_task)
|
|
2125
|
-
working_dir = dir_vals[dir_idx]
|
|
2126
|
-
|
|
2127
|
-
return working_dir
|
|
2128
|
-
|
|
2129
|
-
def get_working_directory_value(self):
|
|
2130
|
-
return self.get_working_directory().value
|
|
2131
|
-
|
|
2132
|
-
def get_stats(self, jsonable=True, datetime_dicts=False):
|
|
2133
|
-
"""Get statistics for this task."""
|
|
2134
|
-
out = {
|
|
2135
|
-
'task_id': self.id_,
|
|
2136
|
-
'order_id': self.order_id,
|
|
2137
|
-
'scheduler_id': self.scheduler_id,
|
|
2138
|
-
'start_time': self.start_time,
|
|
2139
|
-
'end_time': self.end_time,
|
|
2140
|
-
'duration': self.duration,
|
|
2141
|
-
'archive_start_time': self.archive_start_time,
|
|
2142
|
-
'archive_end_time': self.archive_end_time,
|
|
2143
|
-
'archive_duration': self.archive_duration,
|
|
2144
|
-
'archived_task_id': self.archived_task_id,
|
|
2145
|
-
'memory': self.memory,
|
|
2146
|
-
'hostname': self.hostname,
|
|
2147
|
-
'wallclock': self.wallclock,
|
|
2148
|
-
'working_directory': self.get_working_directory_value(),
|
|
2149
|
-
'archive_status': self.archive_status,
|
|
2150
|
-
'iteration': self.iteration.order_id,
|
|
2151
|
-
}
|
|
2152
|
-
|
|
2153
|
-
if datetime_dicts:
|
|
2154
|
-
if self.duration:
|
|
2155
|
-
out['duration'] = timedelta_to_dict(out['duration'])
|
|
2156
|
-
if self.archive_duration:
|
|
2157
|
-
out['archive_duration'] = timedelta_to_dict(out['archive_duration'])
|
|
2158
|
-
if self.start_time:
|
|
2159
|
-
out['start_time'] = datetime_to_dict(out['start_time'])
|
|
2160
|
-
if self.end_time:
|
|
2161
|
-
out['end_time'] = datetime_to_dict(out['end_time'])
|
|
2162
|
-
if self.archive_start_time:
|
|
2163
|
-
out['archive_start_time'] = datetime_to_dict(out['archive_start_time'])
|
|
2164
|
-
if self.archive_end_time:
|
|
2165
|
-
out['archive_end_time'] = datetime_to_dict(out['archive_end_time'])
|
|
2166
|
-
|
|
2167
|
-
if jsonable:
|
|
2168
|
-
|
|
2169
|
-
if not datetime_dicts:
|
|
2170
|
-
|
|
2171
|
-
if self.duration:
|
|
2172
|
-
out['duration'] = format_time_delta(out['duration'])
|
|
2173
|
-
if self.archive_duration:
|
|
2174
|
-
out['archive_duration'] = format_time_delta(out['archive_duration'])
|
|
2175
|
-
|
|
2176
|
-
dt_fmt = r'%Y.%m.%d %H:%M:%S'
|
|
2177
|
-
|
|
2178
|
-
if self.start_time:
|
|
2179
|
-
out['start_time'] = out['start_time'].strftime(dt_fmt)
|
|
2180
|
-
if self.end_time:
|
|
2181
|
-
out['end_time'] = out['end_time'].strftime(dt_fmt)
|
|
2182
|
-
if self.archive_start_time:
|
|
2183
|
-
out['archive_start_time'] = out['archive_start_time'].strftime(dt_fmt)
|
|
2184
|
-
if self.archive_end_time:
|
|
2185
|
-
out['archive_end_time'] = out['archive_end_time'].strftime(dt_fmt)
|
|
2186
|
-
|
|
2187
|
-
if self.archive_status:
|
|
2188
|
-
out['archive_status'] = self.archive_status.value
|
|
2189
|
-
|
|
2190
|
-
return out
|
|
2191
|
-
|
|
2192
|
-
def get_same_directory_tasks(self):
|
|
2193
|
-
"""Get a list of other Tasks within the same command group that share the same
|
|
2194
|
-
working directory and iteration."""
|
|
2195
|
-
same_dir_tasks = []
|
|
2196
|
-
for i in self.command_group_submission_iteration.tasks:
|
|
2197
|
-
if i is self:
|
|
2198
|
-
continue
|
|
2199
|
-
elif i.iteration == self.iteration:
|
|
2200
|
-
if i.get_working_directory() is self.get_working_directory():
|
|
2201
|
-
same_dir_tasks.append(i)
|
|
2202
|
-
|
|
2203
|
-
print('Task.get_same_directory_tasks: same_dir_tasks: {}'.format(same_dir_tasks),
|
|
2204
|
-
flush=True)
|
|
2205
|
-
|
|
2206
|
-
return same_dir_tasks
|
|
2207
|
-
|
|
2208
|
-
def is_archive_required(self):
|
|
2209
|
-
"""Check if archive of this task is required. It is not required if a different
|
|
2210
|
-
task in the same command group submission with the same working directory begun
|
|
2211
|
-
its own archive after the commands of this command completed."""
|
|
2212
|
-
|
|
2213
|
-
if not self.end_time:
|
|
2214
|
-
msg = ('`Task.is_archive_required` should not be called unit the task has '
|
|
2215
|
-
'completed; {} has not completed.'.format(self))
|
|
2216
|
-
raise RuntimeError(msg)
|
|
2217
|
-
|
|
2218
|
-
for i in self.get_same_directory_tasks():
|
|
2219
|
-
print('Checking if other task {} archived started after this task '
|
|
2220
|
-
'({}) finished.'.format(i, self), flush=True)
|
|
2221
|
-
if i.archive_start_time:
|
|
2222
|
-
if i.archive_start_time > self.end_time:
|
|
2223
|
-
self.archived_task = i
|
|
2224
|
-
return False
|
|
2225
|
-
|
|
2226
|
-
return True
|
|
2227
|
-
|
|
2228
|
-
def get_variable_values(self):
|
|
2229
|
-
"""Get the values of variables that are resolved in this task's working
|
|
2230
|
-
directory.
|
|
2231
|
-
|
|
2232
|
-
Returns
|
|
2233
|
-
-------
|
|
2234
|
-
var_vals : dict of (str: list of str)
|
|
2235
|
-
Keys are the variable definition name and values are list of variable
|
|
2236
|
-
values as strings.
|
|
2237
|
-
|
|
2238
|
-
"""
|
|
2239
|
-
|
|
2240
|
-
task_directory = self.get_working_directory()
|
|
2241
|
-
cg_sub = self.command_group_submission_iteration.command_group_submission
|
|
2242
|
-
sub_var_vals = cg_sub.submission.variable_values
|
|
2243
|
-
cmd_group_var_names = cg_sub.command_group.variable_names
|
|
2244
|
-
var_vals = {}
|
|
2245
|
-
|
|
2246
|
-
print('Task.get_variable_values: sub_var_vals:', flush=True)
|
|
2247
|
-
pprint(sub_var_vals)
|
|
2248
|
-
|
|
2249
|
-
print('Task.get_variable_values: cmd_group_var_names:', flush=True)
|
|
2250
|
-
pprint(cmd_group_var_names)
|
|
2251
|
-
|
|
2252
|
-
for i in sub_var_vals:
|
|
2253
|
-
if i.directory_value == task_directory:
|
|
2254
|
-
var_defn_name = i.variable_definition.name
|
|
2255
|
-
if var_defn_name in cmd_group_var_names:
|
|
2256
|
-
if var_defn_name in var_vals:
|
|
2257
|
-
var_vals[var_defn_name].append(i.value)
|
|
2258
|
-
else:
|
|
2259
|
-
var_vals.update({var_defn_name: [i.value]})
|
|
2260
|
-
|
|
2261
|
-
return var_vals
|
|
2262
|
-
|
|
2263
|
-
def get_variable_values_normed(self):
|
|
2264
|
-
"""Get the values of variables that are resolved in this task's working
|
|
2265
|
-
directory, where all variable values have the same, normalised multiplicity.
|
|
2266
|
-
|
|
2267
|
-
Returns
|
|
2268
|
-
-------
|
|
2269
|
-
var_vals_normed : dict of (str: list of str)
|
|
2270
|
-
Keys are the variable definition name and values are list of variable
|
|
2271
|
-
values as strings. The list of variable values is the same length for
|
|
2272
|
-
each variable definition name.
|
|
2273
|
-
|
|
2274
|
-
"""
|
|
2275
|
-
|
|
2276
|
-
var_vals = self.get_variable_values()
|
|
2277
|
-
if not var_vals:
|
|
2278
|
-
return {}
|
|
2279
|
-
|
|
2280
|
-
only_names, only_vals = zip(*var_vals.items())
|
|
2281
|
-
only_vals_uniform = coerce_same_length(list(only_vals))
|
|
2282
|
-
|
|
2283
|
-
cg_sub = self.command_group_submission_iteration.command_group_submission
|
|
2284
|
-
if cg_sub.command_group.is_job_array:
|
|
2285
|
-
val_idx = self.order_id % len(only_vals_uniform[0])
|
|
2286
|
-
only_vals_uniform = [[i[val_idx]] for i in only_vals_uniform]
|
|
2287
|
-
|
|
2288
|
-
var_vals_normed = dict(zip(only_names, only_vals_uniform))
|
|
2289
|
-
|
|
2290
|
-
return var_vals_normed
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
class Iteration(Base):
|
|
2294
|
-
"""Class to represent a workflow iteration."""
|
|
2295
|
-
|
|
2296
|
-
__tablename__ = 'iteration'
|
|
2297
|
-
|
|
2298
|
-
id_ = Column('id', Integer, primary_key=True)
|
|
2299
|
-
workflow_id = Column(Integer, ForeignKey('workflow.id'))
|
|
2300
|
-
order_id = Column(Integer)
|
|
2301
|
-
status = Column(Enum(IterationStatus), default=IterationStatus('pending'))
|
|
2302
|
-
|
|
2303
|
-
workflow = relationship('Workflow', back_populates='iterations', uselist=False)
|
|
2304
|
-
command_group_submission_iterations = relationship(
|
|
2305
|
-
'CommandGroupSubmissionIteration',
|
|
2306
|
-
back_populates='iteration',
|
|
2307
|
-
)
|
|
2308
|
-
|
|
2309
|
-
def __init__(self, order_id):
|
|
2310
|
-
self.order_id = order_id
|
|
2311
|
-
|
|
2312
|
-
def __repr__(self):
|
|
2313
|
-
out = (
|
|
2314
|
-
'{}('
|
|
2315
|
-
'id={}, '
|
|
2316
|
-
'workflow_id={}, '
|
|
2317
|
-
'order_id={}'
|
|
2318
|
-
')'
|
|
2319
|
-
).format(
|
|
2320
|
-
self.__class__.__name__,
|
|
2321
|
-
self.id_,
|
|
2322
|
-
self.workflow_id,
|
|
2323
|
-
self.order_id,
|
|
2324
|
-
)
|
|
2325
|
-
return out
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
class CommandGroupSubmissionIteration(Base):
|
|
2329
|
-
|
|
2330
|
-
__tablename__ = 'command_group_submission_iteration'
|
|
2331
|
-
|
|
2332
|
-
id_ = Column('id', Integer, primary_key=True)
|
|
2333
|
-
working_dirs_written = Column(Boolean, default=False)
|
|
2334
|
-
iteration_id = Column(Integer, ForeignKey('iteration.id'))
|
|
2335
|
-
scheduler_job_id = Column(Integer, nullable=True)
|
|
2336
|
-
scheduler_stats_job_id = Column(Integer, nullable=True)
|
|
2337
|
-
command_group_submission_id = Column(
|
|
2338
|
-
Integer, ForeignKey('command_group_submission.id'))
|
|
2339
|
-
|
|
2340
|
-
iteration = relationship(
|
|
2341
|
-
'Iteration',
|
|
2342
|
-
back_populates='command_group_submission_iterations',
|
|
2343
|
-
uselist=False,
|
|
2344
|
-
)
|
|
2345
|
-
command_group_submission = relationship(
|
|
2346
|
-
'CommandGroupSubmission',
|
|
2347
|
-
back_populates='command_group_submission_iterations',
|
|
2348
|
-
)
|
|
2349
|
-
tasks = relationship('Task', back_populates='command_group_submission_iteration')
|
|
2350
|
-
|
|
2351
|
-
def __init__(self, iteration, command_group_submission):
|
|
2352
|
-
self.iteration = iteration
|
|
2353
|
-
self.command_group_submission = command_group_submission
|
|
2354
|
-
|
|
2355
|
-
def __repr__(self):
|
|
2356
|
-
out = (
|
|
2357
|
-
'{}('
|
|
2358
|
-
'iteration_id={}, '
|
|
2359
|
-
'command_group_submission_id={}, '
|
|
2360
|
-
'scheduler_job_id={}, '
|
|
2361
|
-
'scheduler_stats_job_id={}'
|
|
2362
|
-
')'
|
|
2363
|
-
).format(
|
|
2364
|
-
self.__class__.__name__,
|
|
2365
|
-
self.iteration_id,
|
|
2366
|
-
self.command_group_submission_id,
|
|
2367
|
-
self.scheduler_job_id,
|
|
2368
|
-
self.scheduler_stats_job_id,
|
|
2369
|
-
)
|
|
2370
|
-
return out
|
|
2371
|
-
|
|
2372
|
-
def get_directory_values(self):
|
|
2373
|
-
|
|
2374
|
-
dir_vals = [i.value for i in self.get_directories()]
|
|
2375
|
-
return dir_vals
|
|
2376
|
-
|
|
2377
|
-
def get_directories(self):
|
|
2378
|
-
"""Get the directory variable values associated with this command group
|
|
2379
|
-
submission and iteration."""
|
|
2380
|
-
|
|
2381
|
-
dir_vars_all = self.command_group_submission.command_group.directory_variable.variable_values
|
|
2382
|
-
# Get only those with correct submission and iteration
|
|
2383
|
-
|
|
2384
|
-
dirs = []
|
|
2385
|
-
for i in dir_vars_all:
|
|
2386
|
-
if i.iteration == self.iteration:
|
|
2387
|
-
if i.submission == self.command_group_submission.submission:
|
|
2388
|
-
dirs.append(i)
|
|
2389
|
-
|
|
2390
|
-
# dirs = [i for idx, i in enumerate(dirs) if idx in self.task_range_idx]
|
|
2391
|
-
|
|
2392
|
-
return dirs
|
|
2393
|
-
|
|
2394
|
-
@property
|
|
2395
|
-
def num_directories(self):
|
|
2396
|
-
return len(self.get_directories())
|
|
2397
|
-
|
|
2398
|
-
def get_task_multiplicity(self):
|
|
2399
|
-
"""Get the number of tasks associated with this command group submission."""
|
|
2400
|
-
|
|
2401
|
-
# TODO: move get_task_multiplicity to CommandGroupSubmissionIteration !
|
|
2402
|
-
|
|
2403
|
-
dirs = self.get_directory_values()
|
|
2404
|
-
|
|
2405
|
-
sub = self.command_group_submission.submission
|
|
2406
|
-
|
|
2407
|
-
var_lengths = {}
|
|
2408
|
-
for directory in dirs:
|
|
2409
|
-
var_lengths.update({directory: {}})
|
|
2410
|
-
for i in self.command_group_submission.command_group.variable_definitions:
|
|
2411
|
-
var_lengths_i = i.get_multiplicity(sub) # as a func of dir
|
|
2412
|
-
for var_dir, num in var_lengths_i.items():
|
|
2413
|
-
if var_dir == directory:
|
|
2414
|
-
var_lengths[directory].update({i.name: num})
|
|
2415
|
-
|
|
2416
|
-
var_lengths_combined = {}
|
|
2417
|
-
for directory, var_nums in var_lengths.items():
|
|
2418
|
-
if var_nums:
|
|
2419
|
-
uniq_lens = set(var_nums.values())
|
|
2420
|
-
num_uniq_lens = len(uniq_lens)
|
|
2421
|
-
if num_uniq_lens == 1:
|
|
2422
|
-
combined_len = min(uniq_lens)
|
|
2423
|
-
elif num_uniq_lens == 2:
|
|
2424
|
-
if min(uniq_lens) != 1:
|
|
2425
|
-
raise ValueError('bad 4!')
|
|
2426
|
-
combined_len = max(uniq_lens)
|
|
2427
|
-
else:
|
|
2428
|
-
raise ValueError('bad 5!')
|
|
2429
|
-
else:
|
|
2430
|
-
combined_len = 1
|
|
2431
|
-
|
|
2432
|
-
var_lengths_combined.update({directory: combined_len})
|
|
2433
|
-
|
|
2434
|
-
return var_lengths_combined
|
|
2435
|
-
|
|
2436
|
-
@property
|
|
2437
|
-
def num_outputs(self):
|
|
2438
|
-
"""Get the number of outputs for this command group submission."""
|
|
2439
|
-
iteration = self.command_group_submission.submission.workflow.first_iteration
|
|
2440
|
-
return self.command_group_submission.scheduler_group.get_num_outputs(iteration)[
|
|
2441
|
-
self.command_group_submission.scheduler_group_index[1]]
|
|
2442
|
-
|
|
2443
|
-
@property
|
|
2444
|
-
def step_size(self):
|
|
2445
|
-
"""Get the scheduler step size for this command group submission."""
|
|
2446
|
-
iteration = self.command_group_submission.submission.workflow.first_iteration
|
|
2447
|
-
return self.command_group_submission.scheduler_group.get_step_size(iteration)[
|
|
2448
|
-
self.command_group_submission.scheduler_group_index[1]]
|
|
2449
|
-
|
|
2450
|
-
@property
|
|
2451
|
-
def num_tasks(self):
|
|
2452
|
-
return len(self.tasks)
|
|
2453
|
-
|
|
2454
|
-
def write_working_directories(self, project):
|
|
2455
|
-
"""Replace lines in the working_dirs files with actual directory paths."""
|
|
2456
|
-
|
|
2457
|
-
dir_vals = self.get_directories()
|
|
2458
|
-
|
|
2459
|
-
cg_sub = self.command_group_submission
|
|
2460
|
-
|
|
2461
|
-
wk_dirs_path = project.hf_dir.joinpath(
|
|
2462
|
-
'workflow_{}'.format(cg_sub.submission.workflow.id_),
|
|
2463
|
-
'submit_{}'.format(cg_sub.submission.order_id),
|
|
2464
|
-
'iter_{}'.format(self.iteration.order_id),
|
|
2465
|
-
'working_dirs_{}{}'.format(
|
|
2466
|
-
cg_sub.command_group_exec_order, CONFIG.get('working_dirs_file_ext')),
|
|
2467
|
-
)
|
|
2468
|
-
|
|
2469
|
-
with wk_dirs_path.open() as handle:
|
|
2470
|
-
file_lns = handle.readlines()
|
|
2471
|
-
|
|
2472
|
-
for idx, i in enumerate(file_lns):
|
|
2473
|
-
new_val = i.strip()
|
|
2474
|
-
if 'REPLACE_WITH_DIR_' in i:
|
|
2475
|
-
dir_idx = int(i.split('REPLACE_WITH_DIR_')[1])
|
|
2476
|
-
new_val = dir_vals[dir_idx].value
|
|
2477
|
-
file_lns[idx] = new_val
|
|
2478
|
-
|
|
2479
|
-
with wk_dirs_path.open('w') as handle:
|
|
2480
|
-
for i in file_lns:
|
|
2481
|
-
handle.write(i + '\n')
|
|
2482
|
-
|
|
2483
|
-
|
|
2484
|
-
class SchedulerGroup(object):
|
|
2485
|
-
"""Class to represent a collection of consecutive command group submissions that have
|
|
2486
|
-
the same scheduler task range."""
|
|
2487
|
-
|
|
2488
|
-
def __init__(self, order_id, command_groups_submissions):
|
|
2489
|
-
|
|
2490
|
-
self.order_id = order_id
|
|
2491
|
-
self.command_group_submissions = command_groups_submissions
|
|
2492
|
-
|
|
2493
|
-
def __repr__(self):
|
|
2494
|
-
out = ('{}('
|
|
2495
|
-
'order_id={}, '
|
|
2496
|
-
'command_group_submissions={}, '
|
|
2497
|
-
')').format(
|
|
2498
|
-
self.__class__.__name__,
|
|
2499
|
-
self.order_id,
|
|
2500
|
-
self.command_group_submissions,
|
|
2501
|
-
)
|
|
2502
|
-
return out
|
|
2503
|
-
|
|
2504
|
-
def get_max_num_tasks(self, iteration):
|
|
2505
|
-
return max(self.get_num_outputs(iteration))
|
|
2506
|
-
|
|
2507
|
-
def get_step_size(self, iteration):
|
|
2508
|
-
return [int(self.get_max_num_tasks(iteration) / i)
|
|
2509
|
-
for i in self.get_num_outputs(iteration)]
|
|
2510
|
-
|
|
2511
|
-
def get_num_outputs(self, iteration):
|
|
2512
|
-
|
|
2513
|
-
num_outs = 1
|
|
2514
|
-
num_outs_prev = num_outs
|
|
2515
|
-
num_outs_all = []
|
|
2516
|
-
|
|
2517
|
-
# Get num_outputs for all previous cg subs in this scheduler group
|
|
2518
|
-
for idx, cg_sub in enumerate(self.command_group_submissions):
|
|
2519
|
-
|
|
2520
|
-
# print('SchedulerGroup.get_num_outputs: cg_sub idx: {}'.format(idx), flush=True)
|
|
2521
|
-
|
|
2522
|
-
# print('SchedulerGroup.get_num_outputs: cg_sub_iters: ')
|
|
2523
|
-
# pprint(cg_sub.command_group_submission_iterations)
|
|
2524
|
-
|
|
2525
|
-
cg_sub_iter = None
|
|
2526
|
-
for i in cg_sub.command_group_submission_iterations:
|
|
2527
|
-
if i.iteration == iteration:
|
|
2528
|
-
cg_sub_iter = i
|
|
2529
|
-
break
|
|
2530
|
-
if not cg_sub_iter:
|
|
2531
|
-
raise ValueError('Could not find CommandGroupSubmissionIteration object.')
|
|
2532
|
-
|
|
2533
|
-
# Number of outputs depend on task multiplicity, `is_job_array` and `nesting`
|
|
2534
|
-
is_job_array = cg_sub.command_group.is_job_array
|
|
2535
|
-
nesting = cg_sub.command_group.nesting
|
|
2536
|
-
|
|
2537
|
-
# print('SchedulerGroup.get_num_outputs: is_job_array: {}'.format(
|
|
2538
|
-
# is_job_array), flush=True)
|
|
2539
|
-
# print('SchedulerGroup.get_num_outputs: nesting: {}'.format(nesting), flush=True)
|
|
2540
|
-
|
|
2541
|
-
if nesting == NestingType('nest'): # or first_cmd_group:
|
|
2542
|
-
num_outs = num_outs_prev
|
|
2543
|
-
elif nesting == NestingType('hold'):
|
|
2544
|
-
num_outs = 1
|
|
2545
|
-
elif nesting is None:
|
|
2546
|
-
num_outs = 1
|
|
2547
|
-
|
|
2548
|
-
if is_job_array:
|
|
2549
|
-
# if nesting in [NestingType('hold'), None]:
|
|
2550
|
-
# num_outs *= cg_sub.num_directories
|
|
2551
|
-
# print('SchedulerGroup._get_num_outputs: cg_sub.num_directories: {}'.format(
|
|
2552
|
-
# cg_sub.num_directories), flush=True)
|
|
2553
|
-
|
|
2554
|
-
# cg_sub.task_multiplicity is a dict of directory keys
|
|
2555
|
-
num_outs *= sum(cg_sub_iter.get_task_multiplicity().values())
|
|
2556
|
-
|
|
2557
|
-
# print('SchedulerGroup.get_num_outputs: cg_sub_iter.task_multiplicity: {}'.format(
|
|
2558
|
-
# cg_sub_iter.get_task_multiplicity()), flush=True)
|
|
2559
|
-
|
|
2560
|
-
# print('SchedulerGroup.get_num_outputs: num_outs: {}'.format(num_outs), flush=True)
|
|
2561
|
-
|
|
2562
|
-
num_outs_all.append(num_outs)
|
|
2563
|
-
num_outs_prev = num_outs
|
|
2564
|
-
|
|
2565
|
-
# print('SchedulerGroup.get_num_outputs: num_outs_all: {}'.format(
|
|
2566
|
-
# num_outs_all), flush=True)
|
|
2567
|
-
|
|
2568
|
-
return num_outs_all
|
|
2569
|
-
|
|
2570
|
-
def has(self, command_group_submission):
|
|
2571
|
-
return command_group_submission in self.command_group_submissions
|
|
2572
|
-
|
|
2573
|
-
def index(self, command_group_submission):
|
|
2574
|
-
if not self.has(command_group_submission):
|
|
2575
|
-
msg = '{} is not in the scheduler group.'
|
|
2576
|
-
raise ValueError(msg.format(command_group_submission))
|
|
2577
|
-
return self.command_group_submissions.index(command_group_submission)
|
|
2578
|
-
|
|
2579
|
-
@classmethod
|
|
2580
|
-
def get_scheduler_groups(cls, submission):
|
|
2581
|
-
"""Split the command group submissions up into scheduler groups."""
|
|
2582
|
-
|
|
2583
|
-
cmd_groups_split = []
|
|
2584
|
-
sch_group_idx = 0
|
|
2585
|
-
|
|
2586
|
-
for cg_sub in submission.command_group_submissions:
|
|
2587
|
-
|
|
2588
|
-
if cg_sub.command_group.nesting == NestingType('hold'):
|
|
2589
|
-
sch_group_idx += 1
|
|
2590
|
-
if len(cmd_groups_split) == sch_group_idx + 1:
|
|
2591
|
-
cmd_groups_split[sch_group_idx].append(cg_sub)
|
|
2592
|
-
else:
|
|
2593
|
-
cmd_groups_split.append([cg_sub])
|
|
2594
|
-
|
|
2595
|
-
return [cls(idx, i) for idx, i in enumerate(cmd_groups_split)]
|