hpcflow 0.1.15__py3-none-any.whl → 0.2.0a271__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. hpcflow/__init__.py +2 -11
  2. hpcflow/__pyinstaller/__init__.py +5 -0
  3. hpcflow/__pyinstaller/hook-hpcflow.py +40 -0
  4. hpcflow/_version.py +1 -1
  5. hpcflow/app.py +43 -0
  6. hpcflow/cli.py +2 -461
  7. hpcflow/data/demo_data_manifest/__init__.py +3 -0
  8. hpcflow/data/demo_data_manifest/demo_data_manifest.json +6 -0
  9. hpcflow/data/jinja_templates/test/test_template.txt +8 -0
  10. hpcflow/data/programs/hello_world/README.md +1 -0
  11. hpcflow/data/programs/hello_world/hello_world.c +87 -0
  12. hpcflow/data/programs/hello_world/linux/hello_world +0 -0
  13. hpcflow/data/programs/hello_world/macos/hello_world +0 -0
  14. hpcflow/data/programs/hello_world/win/hello_world.exe +0 -0
  15. hpcflow/data/scripts/__init__.py +1 -0
  16. hpcflow/data/scripts/bad_script.py +2 -0
  17. hpcflow/data/scripts/demo_task_1_generate_t1_infile_1.py +8 -0
  18. hpcflow/data/scripts/demo_task_1_generate_t1_infile_2.py +8 -0
  19. hpcflow/data/scripts/demo_task_1_parse_p3.py +7 -0
  20. hpcflow/data/scripts/do_nothing.py +2 -0
  21. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  22. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  23. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  24. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  25. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  26. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  27. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  28. hpcflow/data/scripts/generate_t1_file_01.py +7 -0
  29. hpcflow/data/scripts/import_future_script.py +7 -0
  30. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  31. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  32. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  33. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  34. hpcflow/data/scripts/main_script_test_direct_in_direct_out.py +6 -0
  35. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  36. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  37. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  38. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  39. hpcflow/data/scripts/main_script_test_direct_in_direct_out_all_iters_test.py +15 -0
  40. hpcflow/data/scripts/main_script_test_direct_in_direct_out_env_spec.py +7 -0
  41. hpcflow/data/scripts/main_script_test_direct_in_direct_out_labels.py +8 -0
  42. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  43. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  44. hpcflow/data/scripts/main_script_test_direct_sub_param_in_direct_out.py +6 -0
  45. hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +12 -0
  46. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  47. hpcflow/data/scripts/main_script_test_hdf5_in_obj_group.py +12 -0
  48. hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +11 -0
  49. hpcflow/data/scripts/main_script_test_json_and_direct_in_json_out.py +14 -0
  50. hpcflow/data/scripts/main_script_test_json_in_json_and_direct_out.py +17 -0
  51. hpcflow/data/scripts/main_script_test_json_in_json_out.py +14 -0
  52. hpcflow/data/scripts/main_script_test_json_in_json_out_labels.py +16 -0
  53. hpcflow/data/scripts/main_script_test_json_in_obj.py +12 -0
  54. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  55. hpcflow/data/scripts/main_script_test_json_out_obj.py +10 -0
  56. hpcflow/data/scripts/main_script_test_json_sub_param_in_json_out_labels.py +16 -0
  57. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  58. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  59. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  60. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  61. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  62. hpcflow/data/scripts/parse_t1_file_01.py +4 -0
  63. hpcflow/data/scripts/script_exit_test.py +5 -0
  64. hpcflow/data/template_components/__init__.py +1 -0
  65. hpcflow/data/template_components/command_files.yaml +26 -0
  66. hpcflow/data/template_components/environments.yaml +13 -0
  67. hpcflow/data/template_components/parameters.yaml +14 -0
  68. hpcflow/data/template_components/task_schemas.yaml +139 -0
  69. hpcflow/data/workflows/workflow_1.yaml +5 -0
  70. hpcflow/examples.ipynb +1037 -0
  71. hpcflow/sdk/__init__.py +149 -0
  72. hpcflow/sdk/app.py +4266 -0
  73. hpcflow/sdk/cli.py +1479 -0
  74. hpcflow/sdk/cli_common.py +385 -0
  75. hpcflow/sdk/config/__init__.py +5 -0
  76. hpcflow/sdk/config/callbacks.py +246 -0
  77. hpcflow/sdk/config/cli.py +388 -0
  78. hpcflow/sdk/config/config.py +1410 -0
  79. hpcflow/sdk/config/config_file.py +501 -0
  80. hpcflow/sdk/config/errors.py +272 -0
  81. hpcflow/sdk/config/types.py +150 -0
  82. hpcflow/sdk/core/__init__.py +38 -0
  83. hpcflow/sdk/core/actions.py +3857 -0
  84. hpcflow/sdk/core/app_aware.py +25 -0
  85. hpcflow/sdk/core/cache.py +224 -0
  86. hpcflow/sdk/core/command_files.py +814 -0
  87. hpcflow/sdk/core/commands.py +424 -0
  88. hpcflow/sdk/core/element.py +2071 -0
  89. hpcflow/sdk/core/enums.py +221 -0
  90. hpcflow/sdk/core/environment.py +256 -0
  91. hpcflow/sdk/core/errors.py +1043 -0
  92. hpcflow/sdk/core/execute.py +207 -0
  93. hpcflow/sdk/core/json_like.py +809 -0
  94. hpcflow/sdk/core/loop.py +1320 -0
  95. hpcflow/sdk/core/loop_cache.py +282 -0
  96. hpcflow/sdk/core/object_list.py +933 -0
  97. hpcflow/sdk/core/parameters.py +3371 -0
  98. hpcflow/sdk/core/rule.py +196 -0
  99. hpcflow/sdk/core/run_dir_files.py +57 -0
  100. hpcflow/sdk/core/skip_reason.py +7 -0
  101. hpcflow/sdk/core/task.py +3792 -0
  102. hpcflow/sdk/core/task_schema.py +993 -0
  103. hpcflow/sdk/core/test_utils.py +538 -0
  104. hpcflow/sdk/core/types.py +447 -0
  105. hpcflow/sdk/core/utils.py +1207 -0
  106. hpcflow/sdk/core/validation.py +87 -0
  107. hpcflow/sdk/core/values.py +477 -0
  108. hpcflow/sdk/core/workflow.py +4820 -0
  109. hpcflow/sdk/core/zarr_io.py +206 -0
  110. hpcflow/sdk/data/__init__.py +13 -0
  111. hpcflow/sdk/data/config_file_schema.yaml +34 -0
  112. hpcflow/sdk/data/config_schema.yaml +260 -0
  113. hpcflow/sdk/data/environments_spec_schema.yaml +21 -0
  114. hpcflow/sdk/data/files_spec_schema.yaml +5 -0
  115. hpcflow/sdk/data/parameters_spec_schema.yaml +7 -0
  116. hpcflow/sdk/data/task_schema_spec_schema.yaml +3 -0
  117. hpcflow/sdk/data/workflow_spec_schema.yaml +22 -0
  118. hpcflow/sdk/demo/__init__.py +3 -0
  119. hpcflow/sdk/demo/cli.py +242 -0
  120. hpcflow/sdk/helper/__init__.py +3 -0
  121. hpcflow/sdk/helper/cli.py +137 -0
  122. hpcflow/sdk/helper/helper.py +300 -0
  123. hpcflow/sdk/helper/watcher.py +192 -0
  124. hpcflow/sdk/log.py +288 -0
  125. hpcflow/sdk/persistence/__init__.py +18 -0
  126. hpcflow/sdk/persistence/base.py +2817 -0
  127. hpcflow/sdk/persistence/defaults.py +6 -0
  128. hpcflow/sdk/persistence/discovery.py +39 -0
  129. hpcflow/sdk/persistence/json.py +954 -0
  130. hpcflow/sdk/persistence/pending.py +948 -0
  131. hpcflow/sdk/persistence/store_resource.py +203 -0
  132. hpcflow/sdk/persistence/types.py +309 -0
  133. hpcflow/sdk/persistence/utils.py +73 -0
  134. hpcflow/sdk/persistence/zarr.py +2388 -0
  135. hpcflow/sdk/runtime.py +320 -0
  136. hpcflow/sdk/submission/__init__.py +3 -0
  137. hpcflow/sdk/submission/enums.py +70 -0
  138. hpcflow/sdk/submission/jobscript.py +2379 -0
  139. hpcflow/sdk/submission/schedulers/__init__.py +281 -0
  140. hpcflow/sdk/submission/schedulers/direct.py +233 -0
  141. hpcflow/sdk/submission/schedulers/sge.py +376 -0
  142. hpcflow/sdk/submission/schedulers/slurm.py +598 -0
  143. hpcflow/sdk/submission/schedulers/utils.py +25 -0
  144. hpcflow/sdk/submission/shells/__init__.py +52 -0
  145. hpcflow/sdk/submission/shells/base.py +229 -0
  146. hpcflow/sdk/submission/shells/bash.py +504 -0
  147. hpcflow/sdk/submission/shells/os_version.py +115 -0
  148. hpcflow/sdk/submission/shells/powershell.py +352 -0
  149. hpcflow/sdk/submission/submission.py +1402 -0
  150. hpcflow/sdk/submission/types.py +140 -0
  151. hpcflow/sdk/typing.py +194 -0
  152. hpcflow/sdk/utils/arrays.py +69 -0
  153. hpcflow/sdk/utils/deferred_file.py +55 -0
  154. hpcflow/sdk/utils/hashing.py +16 -0
  155. hpcflow/sdk/utils/patches.py +31 -0
  156. hpcflow/sdk/utils/strings.py +69 -0
  157. hpcflow/tests/api/test_api.py +32 -0
  158. hpcflow/tests/conftest.py +123 -0
  159. hpcflow/tests/data/__init__.py +0 -0
  160. hpcflow/tests/data/benchmark_N_elements.yaml +6 -0
  161. hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
  162. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  163. hpcflow/tests/data/workflow_1.json +10 -0
  164. hpcflow/tests/data/workflow_1.yaml +5 -0
  165. hpcflow/tests/data/workflow_1_slurm.yaml +8 -0
  166. hpcflow/tests/data/workflow_1_wsl.yaml +8 -0
  167. hpcflow/tests/data/workflow_test_run_abort.yaml +42 -0
  168. hpcflow/tests/jinja_templates/test_jinja_templates.py +161 -0
  169. hpcflow/tests/programs/test_programs.py +180 -0
  170. hpcflow/tests/schedulers/direct_linux/test_direct_linux_submission.py +12 -0
  171. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  172. hpcflow/tests/schedulers/slurm/test_slurm_submission.py +14 -0
  173. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  174. hpcflow/tests/scripts/test_main_scripts.py +1361 -0
  175. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  176. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  177. hpcflow/tests/shells/wsl/test_wsl_submission.py +14 -0
  178. hpcflow/tests/unit/test_action.py +1066 -0
  179. hpcflow/tests/unit/test_action_rule.py +24 -0
  180. hpcflow/tests/unit/test_app.py +132 -0
  181. hpcflow/tests/unit/test_cache.py +46 -0
  182. hpcflow/tests/unit/test_cli.py +172 -0
  183. hpcflow/tests/unit/test_command.py +377 -0
  184. hpcflow/tests/unit/test_config.py +195 -0
  185. hpcflow/tests/unit/test_config_file.py +162 -0
  186. hpcflow/tests/unit/test_element.py +666 -0
  187. hpcflow/tests/unit/test_element_iteration.py +88 -0
  188. hpcflow/tests/unit/test_element_set.py +158 -0
  189. hpcflow/tests/unit/test_group.py +115 -0
  190. hpcflow/tests/unit/test_input_source.py +1479 -0
  191. hpcflow/tests/unit/test_input_value.py +398 -0
  192. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  193. hpcflow/tests/unit/test_json_like.py +1247 -0
  194. hpcflow/tests/unit/test_loop.py +2674 -0
  195. hpcflow/tests/unit/test_meta_task.py +325 -0
  196. hpcflow/tests/unit/test_multi_path_sequences.py +259 -0
  197. hpcflow/tests/unit/test_object_list.py +116 -0
  198. hpcflow/tests/unit/test_parameter.py +243 -0
  199. hpcflow/tests/unit/test_persistence.py +664 -0
  200. hpcflow/tests/unit/test_resources.py +243 -0
  201. hpcflow/tests/unit/test_run.py +286 -0
  202. hpcflow/tests/unit/test_run_directories.py +29 -0
  203. hpcflow/tests/unit/test_runtime.py +9 -0
  204. hpcflow/tests/unit/test_schema_input.py +372 -0
  205. hpcflow/tests/unit/test_shell.py +129 -0
  206. hpcflow/tests/unit/test_slurm.py +39 -0
  207. hpcflow/tests/unit/test_submission.py +502 -0
  208. hpcflow/tests/unit/test_task.py +2560 -0
  209. hpcflow/tests/unit/test_task_schema.py +182 -0
  210. hpcflow/tests/unit/test_utils.py +616 -0
  211. hpcflow/tests/unit/test_value_sequence.py +549 -0
  212. hpcflow/tests/unit/test_values.py +91 -0
  213. hpcflow/tests/unit/test_workflow.py +827 -0
  214. hpcflow/tests/unit/test_workflow_template.py +186 -0
  215. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  216. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  217. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  218. hpcflow/tests/unit/utils/test_patches.py +5 -0
  219. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  220. hpcflow/tests/unit/utils/test_strings.py +97 -0
  221. hpcflow/tests/workflows/__init__.py +0 -0
  222. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  223. hpcflow/tests/workflows/test_jobscript.py +355 -0
  224. hpcflow/tests/workflows/test_run_status.py +198 -0
  225. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  226. hpcflow/tests/workflows/test_submission.py +140 -0
  227. hpcflow/tests/workflows/test_workflows.py +564 -0
  228. hpcflow/tests/workflows/test_zip.py +18 -0
  229. hpcflow/viz_demo.ipynb +6794 -0
  230. hpcflow-0.2.0a271.dist-info/LICENSE +375 -0
  231. hpcflow-0.2.0a271.dist-info/METADATA +65 -0
  232. hpcflow-0.2.0a271.dist-info/RECORD +237 -0
  233. {hpcflow-0.1.15.dist-info → hpcflow-0.2.0a271.dist-info}/WHEEL +4 -5
  234. hpcflow-0.2.0a271.dist-info/entry_points.txt +6 -0
  235. hpcflow/api.py +0 -490
  236. hpcflow/archive/archive.py +0 -307
  237. hpcflow/archive/cloud/cloud.py +0 -45
  238. hpcflow/archive/cloud/errors.py +0 -9
  239. hpcflow/archive/cloud/providers/dropbox.py +0 -427
  240. hpcflow/archive/errors.py +0 -5
  241. hpcflow/base_db.py +0 -4
  242. hpcflow/config.py +0 -233
  243. hpcflow/copytree.py +0 -66
  244. hpcflow/data/examples/_config.yml +0 -14
  245. hpcflow/data/examples/damask/demo/1.run.yml +0 -4
  246. hpcflow/data/examples/damask/demo/2.process.yml +0 -29
  247. hpcflow/data/examples/damask/demo/geom.geom +0 -2052
  248. hpcflow/data/examples/damask/demo/load.load +0 -1
  249. hpcflow/data/examples/damask/demo/material.config +0 -185
  250. hpcflow/data/examples/damask/inputs/geom.geom +0 -2052
  251. hpcflow/data/examples/damask/inputs/load.load +0 -1
  252. hpcflow/data/examples/damask/inputs/material.config +0 -185
  253. hpcflow/data/examples/damask/profiles/_variable_lookup.yml +0 -21
  254. hpcflow/data/examples/damask/profiles/damask.yml +0 -4
  255. hpcflow/data/examples/damask/profiles/damask_process.yml +0 -8
  256. hpcflow/data/examples/damask/profiles/damask_run.yml +0 -5
  257. hpcflow/data/examples/damask/profiles/default.yml +0 -6
  258. hpcflow/data/examples/thinking.yml +0 -177
  259. hpcflow/errors.py +0 -2
  260. hpcflow/init_db.py +0 -37
  261. hpcflow/models.py +0 -2595
  262. hpcflow/nesting.py +0 -9
  263. hpcflow/profiles.py +0 -455
  264. hpcflow/project.py +0 -81
  265. hpcflow/scheduler.py +0 -322
  266. hpcflow/utils.py +0 -103
  267. hpcflow/validation.py +0 -166
  268. hpcflow/variables.py +0 -543
  269. hpcflow-0.1.15.dist-info/METADATA +0 -168
  270. hpcflow-0.1.15.dist-info/RECORD +0 -45
  271. hpcflow-0.1.15.dist-info/entry_points.txt +0 -8
  272. hpcflow-0.1.15.dist-info/top_level.txt +0 -1
  273. /hpcflow/{archive → data/jinja_templates}/__init__.py +0 -0
  274. /hpcflow/{archive/cloud → data/programs}/__init__.py +0 -0
  275. /hpcflow/{archive/cloud/providers → data/workflows}/__init__.py +0 -0
hpcflow/models.py DELETED
@@ -1,2595 +0,0 @@
1
- """`hpcflow.models.py`"""
2
-
3
-
4
- import re
5
- import os
6
- import enum
7
- from datetime import datetime
8
- from math import ceil, floor
9
- from pathlib import Path
10
- from pprint import pprint
11
- from subprocess import run, PIPE
12
- from time import sleep
13
-
14
- from sqlalchemy import (Column, Integer, DateTime, JSON, ForeignKey, Boolean,
15
- Enum, String, select, Float)
16
- from sqlalchemy.orm import relationship, deferred, Session, reconstructor
17
- from sqlalchemy.exc import IntegrityError, OperationalError
18
-
19
- from hpcflow.config import Config as CONFIG
20
- from hpcflow._version import __version__
21
- from hpcflow.archive.archive import Archive, TaskArchiveStatus
22
- from hpcflow.base_db import Base
23
- from hpcflow.archive.cloud.cloud import CloudProvider
24
- from hpcflow.nesting import NestingType
25
- from hpcflow.scheduler import SunGridEngine
26
- from hpcflow.utils import coerce_same_length, zeropad, format_time_delta, get_random_hex, datetime_to_dict, timedelta_to_dict
27
- from hpcflow.validation import validate_task_multiplicity
28
- from hpcflow.variables import (
29
- select_cmd_group_var_names, select_cmd_group_var_definitions,
30
- extract_variable_names, resolve_variable_values, UnresolvedVariableError
31
- )
32
-
33
- SCHEDULER_MAP = {
34
- 'sge': SunGridEngine,
35
- }
36
-
37
-
38
- class IterationStatus(enum.Enum):
39
-
40
- pending = 'pending'
41
- active = 'active'
42
- complete = 'complete'
43
-
44
-
45
- class Workflow(Base):
46
- """Class to represent a Workflow."""
47
-
48
- __tablename__ = 'workflow'
49
-
50
- id_ = Column('id', Integer, primary_key=True)
51
- create_time = Column(DateTime)
52
- pre_commands = Column(JSON)
53
- _directory = Column('directory', String(255))
54
- root_archive_id = Column(Integer, ForeignKey('archive.id'), nullable=True)
55
- root_archive_excludes = Column(JSON, nullable=True)
56
- root_archive_directory = Column(String(255), nullable=True)
57
- _profile_files = Column('profile_files', JSON, nullable=True)
58
- loop = Column(JSON)
59
- parallel_modes = Column(JSON, nullable=True)
60
-
61
- command_groups = relationship(
62
- 'CommandGroup',
63
- back_populates='workflow',
64
- order_by='CommandGroup.exec_order',
65
- )
66
- submissions = relationship('Submission', back_populates='workflow')
67
- variable_definitions = relationship('VarDefinition', back_populates='workflow')
68
- root_archive = relationship('Archive', back_populates='workflow', uselist=False)
69
- iterations = relationship(
70
- 'Iteration', back_populates='workflow', order_by='Iteration.order_id')
71
-
72
- def __init__(self, directory, command_groups, var_definitions=None,
73
- pre_commands=None, archives=None, root_archive_idx=None,
74
- root_archive_excludes=None, profile_files=None, loop=None,
75
- parallel_modes=None):
76
- """Method to initialise a new Workflow.
77
-
78
- Parameters
79
- ----------
80
- directory : str or Path
81
- Directory in which the Workflow resides.
82
- command_groups : list of dict
83
- List of dictionaries that each represent a command group.
84
- var_definitions : dict, optional
85
- Dictionary whose keys are variable names and values are
86
- dictionaries that define variable definitions. By default, set to
87
- `None`, in which case it is assumed there are no variable
88
- references in any of the command groups.
89
- pre_commands : list of str
90
- List of commands to execute on creation of the Workflow.
91
- archives : list of dict
92
- List of dicts representing archive locations. Each dict in
93
- `command_groups` may contain keys `archive_idx` (which is an
94
- index into `archives`) and `archive_excludes` (which is a list
95
- of glob patterns to ignore when archiving). Each item in `archives`
96
- contains the following keys:
97
- name : str
98
- host : str
99
- path : str
100
- root_archive_idx : int
101
- Index into `archives` that sets the root archive for the workflow.
102
- root_archive_excludes : list of str
103
- File patterns to exclude from the root archive.
104
- profile_files : list of Path, optional
105
- If specified, the list of absolute file paths to the profile files used to
106
- generate this workflow.
107
- loop : dict, optional
108
- If specified, keys are:
109
- max_iterations : int
110
- Maximum number of loop iterations to submit.
111
- groups : list of int, optional
112
- Which command groups to include in iterations beyond the first. If not
113
- specified, all command groups are included in the loop.
114
- parallel_modes : dict, optional
115
- If specified, (case-insensitive) keys are one or more of: 'MPI', 'OpenMP'.
116
- Each is a dict with allowed keys:
117
- env : list of str
118
- Environment set up required for a given parallel mode.
119
- command : str
120
- Command to prepend to any command group commands that use this
121
- parallel mode.
122
-
123
- """
124
-
125
- if loop is None:
126
- loop = {
127
- 'max_iterations': 1,
128
- 'groups': [],
129
- }
130
-
131
- # Command group directories must be stored internally as variables:
132
- for idx, i in enumerate(command_groups):
133
-
134
- dir_var_value = '.'
135
-
136
- if 'directory' in i:
137
-
138
- var_names = extract_variable_names(
139
- i['directory'], CONFIG.get('variable_delimiters'))
140
- if len(var_names) > 1:
141
- raise NotImplementedError()
142
- elif not var_names:
143
- # Value is set but is not a variable
144
- dir_var_value = i['directory'] or dir_var_value
145
- else:
146
- # Value is already a variable; no action.
147
- continue
148
-
149
- dir_var_defn_name = CONFIG.get('default_cmd_group_dir_var_name')
150
-
151
- command_groups[idx]['directory'] = '{1:}{0:}{2:}'.format(
152
- dir_var_defn_name,
153
- *CONFIG.get('variable_delimiters')
154
- )
155
-
156
- # Add new variable definition:
157
- var_definitions.update({
158
- dir_var_defn_name: {
159
- 'value': dir_var_value,
160
- }
161
- })
162
-
163
- self._directory = str(directory)
164
- self.profile_files = [i.relative_to(self.directory) for i in profile_files]
165
- self.create_time = datetime.now()
166
- self.pre_commands = pre_commands
167
- self.variable_definitions = [
168
- VarDefinition(name=k, **v) for k, v in var_definitions.items()
169
- ]
170
-
171
- # Generate Archive objects:
172
- archive_objs = []
173
- archive_dir_names = []
174
- if archives:
175
- for i in archives:
176
- arch_i = Archive(**i)
177
- archive_objs.append(arch_i)
178
- archive_dir_names.append(arch_i.get_archive_dir(self))
179
-
180
- if root_archive_idx is not None:
181
- self.root_archive = archive_objs[root_archive_idx]
182
- self.root_archive_excludes = root_archive_excludes
183
- self.root_archive_directory = archive_dir_names[root_archive_idx]
184
-
185
- cmd_groups = []
186
- for i in command_groups:
187
-
188
- dir_var_name = extract_variable_names(
189
- i['directory'], CONFIG.get('variable_delimiters'))[0]
190
-
191
- dir_var_defn = [i for i in self.variable_definitions
192
- if i.name == dir_var_name][0]
193
-
194
- i.pop('directory')
195
- i.update({
196
- 'directory_var': dir_var_defn,
197
- })
198
- arch_idx = i.pop('archive_idx', None)
199
- if arch_idx is not None:
200
- i.update({
201
- 'archive': archive_objs[arch_idx],
202
- 'archive_directory': archive_dir_names[arch_idx],
203
- })
204
- cmd_groups.append(CommandGroup(**i))
205
-
206
- self.command_groups = cmd_groups
207
- self.parallel_modes = parallel_modes
208
-
209
- self.loop = loop
210
- for i in range(self.loop['max_iterations']):
211
- self.iterations.append(Iteration(i))
212
-
213
- self.validate(archive_objs)
214
- self._execute_pre_commands()
215
- self.do_root_archive()
216
-
217
- def __repr__(self):
218
- out = ('{}('
219
- 'id={}, '
220
- 'directory={}, '
221
- 'pre_commands={}, '
222
- 'root_archive_id={}, '
223
- 'loop={}'
224
- ')').format(
225
- self.__class__.__name__,
226
- self.id_,
227
- self.directory,
228
- self.pre_commands,
229
- self.root_archive_id,
230
- self.loop,
231
- )
232
-
233
- return out
234
-
235
- def get_variable_definition_by_name(self, variable_name):
236
- """Get the VarDefintion object using the variable name."""
237
-
238
- for i in self.variable_definitions:
239
- if i.name == variable_name:
240
- return i
241
-
242
- msg = ('Cannot find variable definition with '
243
- 'name "{}"'.format(variable_name))
244
- raise ValueError(msg)
245
-
246
- @property
247
- def first_iteration(self):
248
- return self.iterations[0]
249
-
250
- @property
251
- def profile_files(self):
252
- if self._profile_files:
253
- return [Path(i) for i in self._profile_files]
254
- else:
255
- return []
256
-
257
- @profile_files.setter
258
- def profile_files(self, profile_files):
259
- if profile_files:
260
- self._profile_files = [str(i) for i in profile_files]
261
-
262
- @property
263
- def has_alternate_scratch(self):
264
- return bool(self.all_alternate_scratch)
265
-
266
- @property
267
- def all_alternate_scratch(self):
268
- out = list(set([i.alternate_scratch for i in self.command_groups
269
- if i.alternate_scratch]))
270
- return out
271
-
272
- @property
273
- def directory(self):
274
- return Path(self._directory)
275
-
276
- def validate(self, archive_objs):
277
- cmd_group_list = []
278
- for i in self.command_groups:
279
- cmd_group_list.append({
280
- 'is_job_array': i.is_job_array,
281
- 'exec_order': i.exec_order,
282
- 'nesting': i.nesting,
283
- })
284
-
285
- err = '[Workflow instantiation error]'
286
- cmd_group_list = validate_task_multiplicity(cmd_group_list, err)
287
-
288
- for i_idx, i in enumerate(cmd_group_list):
289
- cmd_group = self.command_groups[i_idx]
290
- cmd_group.is_job_array = i['is_job_array']
291
- cmd_group.exec_order = i['exec_order']
292
- cmd_group.nesting = i['nesting']
293
-
294
- # If using an Archive with a cloud provider, check access:
295
- for i in archive_objs:
296
- if i.cloud_provider != CloudProvider.null:
297
- msg = f'Checking access to cloud storage ({i.name})...'
298
- print(msg, end='', flush=True)
299
- i.cloud_provider.check_access()
300
-
301
- def add_submission(self, project, task_range=None):
302
- """Add a new submission to this Workflow.
303
-
304
- Parameters
305
- ----------
306
- project : Project
307
- task_ranges : list, optional
308
- If specified, must be a list of length equal to the number of
309
- channels in the Workflow. Each list element specifies which tasks
310
- to submit from each Workflow channel. Each element may be either a
311
- list, a string "all", or `None`. If an element is a string "all",
312
- all tasks within the specified channel will be submitted. If an
313
- element is `None`, no tasks within the specified channel will be
314
- submitted. If an element is a list, it must have either two or
315
- three elements; if it has two elements, these signify the first and
316
- last tasks, inclusively, to submit from that channel. By default,
317
- the task step size is one, but this can be chosen as a third list
318
- entry. By default, set to `None`, in which case all tasks from all
319
- channels are included.
320
-
321
- Notes
322
- -----
323
- We are temporarily restricting the number of channels to 1, since
324
- supporting multiple channels requires some more technical work. This
325
- restriction is enforced in the `validation.validate_task_multiplicity`
326
- function.
327
-
328
- Examples
329
- --------
330
- Submit all tasks from all channels:
331
- >>> workflow.add_submission()
332
-
333
- Submit tasks 1, 2, 3, 4 and 5 from the first and only channel:
334
- >>> workflow.add_submission([[1, 5]])
335
-
336
- Submit tasks 1 and 3 from the first channel, and tasks 2, 3 and 4 from
337
- the second channel:
338
- >>> workflow.add_submission([[1, 4, 2], [2, 4]])
339
-
340
- Submit all tasks from the first channel, and tasks 2 and 7 from the
341
- second channel:
342
- >>> workflow.add_submission(['all', (2, 7, 5)])
343
-
344
- Submit all tasks from the first channel and no tasks from the second
345
- channel:
346
- >>> workflow.add_submission(['all', None])
347
-
348
-
349
- What to do:
350
- -----------
351
-
352
- 0. Firstly, resolve variable values for the first command group.
353
- 1. Need to identify which command groups must have their
354
- var_multiplicity resolved at submit time, and raise if it cannot
355
- be done. For `is_job_array=False` command groups, var_multiplicity
356
- does not need to be known at submit-time, since the number of
357
- output tasks will be known (either one [for `nesting=hold`], or
358
- equal to number of input tasks [for `nesting=None`]).
359
- 2. To do this, organise command groups into scheduler groups,
360
- which are delineated by command groups with `nesting=hold`.
361
- 3. For each scheduler group, go through the command groups in order
362
- and resolve the `var_multiplicity` if it is required. This is not
363
- the same as actually resolving the variable values. And we don't
364
- need to do that at submit-time, except for the very first command
365
- group! (Or rather, since submit-time and run-time coincide for
366
- the first command group, we have the *opportunity* to resolve
367
- variable values for the first command group; in general, variable
368
- values in a given command group may depend on the commands run in
369
- a previous command group, so this cannot be done.)
370
-
371
- """
372
-
373
- # print('Workflow.add_submission: task_range: {}'.format(task_range), flush=True)
374
-
375
- submission = Submission(self, task_range) # Generate CGSs and Tasks
376
- submission.write_submit_dirs(project.hf_dir)
377
- js_paths = submission.write_jobscripts(project.hf_dir)
378
- submission.submit_jobscripts(js_paths)
379
-
380
- return submission
381
-
382
- def get_num_channels(self, exec_order=0):
383
- """Get the number of command groups with a given execution order.
384
-
385
- Parameters
386
- ----------
387
- exec_order : int, optional
388
- The execution order at which to count command groups.
389
-
390
- Returns
391
- -------
392
- num_channels : int
393
- The number of command groups at the given execution order.
394
-
395
- """
396
-
397
- num_channels = 0
398
- for i in self.command_groups:
399
- if i.exec_order == exec_order:
400
- num_channels += 1
401
-
402
- return num_channels
403
-
404
- def _validate_task_ranges(self, task_ranges):
405
- """Validate task ranges.
406
-
407
- Parameters
408
- ----------
409
- task_ranges : list
410
-
411
- Returns
412
- -------
413
- task_ranges_valid : list
414
-
415
- """
416
-
417
- # Check length equal to num_channels:
418
- if len(task_ranges) != self.get_num_channels():
419
- msg = ('The number of task ranges specified must be equal to the '
420
- 'number of channels in the workflow, which is {}, but {} '
421
- 'task ranges were specified.')
422
- raise ValueError(msg.format(self.get_num_channels(),
423
- len(task_ranges)))
424
-
425
- task_range_msg = (
426
- 'Each task range must be specified as either a list with two or '
427
- 'three elements, representing the first and last task and '
428
- '(optionally) the step size, `None`, or the string "all".'
429
- )
430
-
431
- task_ranges_valid = []
432
- for i in task_ranges:
433
-
434
- # Validate:
435
- if isinstance(i, list):
436
- if len(i) not in [2, 3]:
437
- raise ValueError(task_range_msg)
438
- elif i not in ['all', None]:
439
- raise ValueError(task_range_msg)
440
-
441
- task_range_i = i
442
- if i == 'all':
443
- # Replace "all" with [n, m, s]
444
- task_range_i = [1, -1, 1]
445
-
446
- elif isinstance(i, list) and len(i) == 2:
447
- # Add step size of 1:
448
- task_range_i += [1]
449
-
450
- if task_range_i[1] != -1:
451
- # For known number of tasks, check m >= n >= 1:
452
- if task_range_i[0] < 1:
453
- msg = 'Starting task, `n`, must be >= 1.'
454
- raise ValueError(msg)
455
- if task_range_i[1] < task_range_i[0]:
456
- msg = 'Ending task, `m`, must be >= starting task, `n`.'
457
- raise ValueError(msg)
458
-
459
- task_ranges_valid.append(task_range_i)
460
-
461
- return task_ranges_valid
462
-
463
- def _execute_pre_commands(self):
464
-
465
- for i in self.pre_commands:
466
-
467
- proc = run(i, shell=True, stdout=PIPE, stderr=PIPE)
468
- pre_cmd_out = proc.stdout.decode()
469
- pre_cmd_err = proc.stderr.decode()
470
-
471
- def do_root_archive(self):
472
- """Copy the workflow directory to the root archive location."""
473
-
474
- if self.root_archive:
475
- self.root_archive.execute(self.root_archive_excludes,
476
- self.root_archive_directory)
477
-
478
- def get_stats(self, jsonable=True, datetime_dicts=False):
479
- """Get task statistics for this workflow."""
480
- out = {
481
- 'workflow_id': self.id_,
482
- 'submissions': [i.get_stats(jsonable=jsonable, datetime_dicts=datetime_dicts)
483
- for i in self.submissions]
484
- }
485
- return out
486
-
487
- def kill_active(self):
488
- """Kill any active scheduled jobs associated with the workflow."""
489
-
490
- kill_scheduler_ids = []
491
- for sub in self.submissions:
492
- for cg_sub in sub.command_group_submissions:
493
- for iteration in self.iterations:
494
- cg_sub_iter = cg_sub.get_command_group_submission_iteration(iteration)
495
- if cg_sub_iter:
496
- if cg_sub_iter.scheduler_job_id is not None:
497
- kill_scheduler_ids.append(cg_sub_iter.scheduler_job_id)
498
- if cg_sub_iter.scheduler_stats_job_id is not None:
499
- kill_scheduler_ids.append(cg_sub_iter.scheduler_stats_job_id)
500
-
501
- print('Need to kill: {}'.format(kill_scheduler_ids))
502
- del_cmd = ['qdel'] + [str(i) for i in kill_scheduler_ids]
503
- proc = run(del_cmd, stdout=PIPE, stderr=PIPE)
504
- qdel_out = proc.stdout.decode()
505
- qdel_err = proc.stderr.decode()
506
- print(qdel_out)
507
-
508
-
509
- class CommandGroup(Base):
510
- """Class to represent a command group, which is roughly translated into a
511
- job script."""
512
-
513
- __tablename__ = 'command_group'
514
-
515
- id_ = Column('id', Integer, primary_key=True)
516
- workflow_id = Column(Integer, ForeignKey('workflow.id'))
517
- directory_variable_id = Column(Integer, ForeignKey('var_definition.id'))
518
- archive_id = Column(Integer, ForeignKey('archive.id'), nullable=True)
519
-
520
- name = Column(String(255), nullable=True)
521
- stats_name = Column(String(255), nullable=True)
522
- commands = Column(JSON)
523
- is_job_array = Column(Boolean)
524
- exec_order = Column(Integer)
525
- nesting = Column(Enum(NestingType), nullable=True)
526
- environment = Column(JSON, nullable=True)
527
- _scheduler = Column('scheduler', JSON)
528
- profile_name = Column(String(255), nullable=True)
529
- profile_order = Column(Integer, nullable=True)
530
- archive_excludes = Column(JSON, nullable=True)
531
- archive_directory = Column(String(255), nullable=True)
532
- _alternate_scratch = Column('alternate_scratch', String(255), nullable=True)
533
- stats = Column(Boolean)
534
-
535
- archive = relationship('Archive', back_populates='command_groups')
536
- workflow = relationship('Workflow', back_populates='command_groups')
537
- command_group_submissions = relationship('CommandGroupSubmission',
538
- back_populates='command_group')
539
-
540
- directory_variable = relationship('VarDefinition')
541
-
542
- _scheduler_obj = None
543
-
544
- def __repr__(self):
545
- out = (
546
- '{}('
547
- 'commands={!r}, '
548
- 'is_job_array={!r}, '
549
- 'nesting={!r}'
550
- ')'
551
- ).format(
552
- self.__class__.__name__,
553
- self.commands,
554
- self.is_job_array,
555
- self.nesting,
556
- )
557
- return out
558
-
559
- def __init__(self, commands, directory_var, is_job_array=True,
560
- exec_order=None, nesting=None, environment=None, scheduler=None,
561
- profile_name=None, profile_order=None, archive=None,
562
- archive_excludes=None, archive_directory=None, alternate_scratch=None,
563
- stats=None, name=None, stats_name=None):
564
- """Method to initialise a new CommandGroup.
565
-
566
- Parameters
567
- ----------
568
- commands : list of dict
569
- List of dicts containing commands to execute.
570
- directory_var : VarDefinition
571
- The working directory for this command group. TODO...
572
- is_job_array : bool, optional
573
- If True, the command group is executed as a job array. True by
574
- default.
575
- exec_order : int, optional
576
- Execution order of this command relative to other command groups in
577
- the Workflow. By default, `None`.
578
- nesting : str, optional
579
- Either "nest" or "hold". This determines how the task multiplicity
580
- of this command group joins together with the task multiplicity of
581
- the previous command group (i.e. the command group with the lower
582
- execution order as determined by `exec_order`). If "nest", each
583
- task from the previous command group, once completed, will fork
584
- into multiple tasks in the current command group. If "hold", all
585
- tasks in the current command group will only begin once all tasks
586
- in the previous command group have completed. If `None`, the number
587
- of tasks in the previous and current command groups must match,
588
- since a given task in the current command group will only begin
589
- once its corresponding task in the previous command group has
590
- completed. By default, set to `None`.
591
- environment : list of str, optional
592
- List of commands to be run to set up the environment for the command group. By
593
- default set to `None`.
594
- scheduler : dict, optional
595
- Scheduler type and options to be passed directly to the scheduler. By default,
596
- `None`, in which case the DirectExecution scheduler is used and no additional
597
- options are passed.
598
- profile_name : str, optional
599
- If the command group was generated as part of a job profile file,
600
- the profile name should be passed here.
601
- profile_order : int, optional
602
- If the command group was generated as part of a job profile file,
603
- the profile order should be passed here.
604
- archive : Archive, optional
605
- The Archive object associated with this command group.
606
- archive_excludes : list of str
607
- List of glob patterns representing files that should be excluding
608
- when archiving this command group.
609
- archive_directory : str or Path, optional
610
- Name of the directory in which the archive for this command group will reside.
611
- alternate_scratch : str, optional
612
- Location of alternate scratch in which to run commands.
613
-
614
- TODO: document how `nesting` interacts with `is_job_array`.
615
-
616
- """
617
-
618
- self.commands = commands
619
- self.is_job_array = is_job_array
620
- self.exec_order = exec_order
621
- self.nesting = nesting
622
- self.environment = environment
623
- self.scheduler = scheduler
624
- self.directory_variable = directory_var
625
- self.profile_name = profile_name
626
- self.profile_order = profile_order
627
- self.stats = stats
628
- self.name = name
629
- self.stats_name = stats_name
630
-
631
- self.archive = archive
632
- self.archive_excludes = archive_excludes
633
- self.archive_directory = archive_directory
634
-
635
- self._alternate_scratch = alternate_scratch
636
-
637
- self.validate()
638
-
639
- @reconstructor
640
- def init_on_load(self):
641
- self.scheduler = self._scheduler
642
-
643
- def validate(self):
644
-
645
- # Check at least one command:
646
- if not self.commands:
647
- msg = 'At least one command must be specified.'
648
- raise ValueError(msg)
649
-
650
- self.nesting = NestingType[self.nesting] if self.nesting else None
651
-
652
- # Check alternate scratch exists
653
- if self.alternate_scratch:
654
- if not self.alternate_scratch.is_dir():
655
- msg = 'Alternate scratch "{}" is not an existing directory.'
656
- raise ValueError(msg.format(self.alternate_scratch))
657
-
658
- @staticmethod
659
- def get_command_lines(commands):
660
- """Get all lines in the commands list."""
661
- out = []
662
- for i in commands:
663
- if 'line' in i:
664
- out.append(i['line'])
665
- elif 'subshell' in i:
666
- out.extend(CommandGroup.get_command_lines(i['subshell']))
667
- return out
668
-
669
- @property
670
- def scheduler(self):
671
- return self._scheduler_obj
672
-
673
- @scheduler.setter
674
- def scheduler(self, scheduler):
675
-
676
- if 'name' not in scheduler:
677
- msg = 'Scheduler must have a name that is one of: {}'
678
- raise ValueError(msg.format(list(SCHEDULER_MAP.keys())))
679
-
680
- sch_name = scheduler['name']
681
- if sch_name not in SCHEDULER_MAP.keys():
682
- msg = 'Scheduler "{}" is not known.'.format(scheduler)
683
- raise ValueError(msg)
684
-
685
- sch_class = SCHEDULER_MAP[sch_name]
686
- self._scheduler_obj = sch_class(
687
- options=scheduler['options'],
688
- output_dir=scheduler['output_dir'],
689
- error_dir=scheduler['error_dir'],
690
- )
691
- self._scheduler = scheduler
692
-
693
- @property
694
- def alternate_scratch(self):
695
- if self._alternate_scratch:
696
- return Path(self._alternate_scratch)
697
- else:
698
- return None
699
-
700
- @property
701
- def variable_names(self):
702
- """Get those variable names associated with this command group."""
703
-
704
- var_names = select_cmd_group_var_names(
705
- self.get_command_lines(self.commands),
706
- self.directory_variable.value
707
- )
708
- return var_names
709
-
710
- @property
711
- def variable_definitions(self):
712
- """Get those variable definitions associated with this command group,
713
- excluding those that appear embedded within other variables."""
714
-
715
- var_names = self.variable_names
716
- var_defns = []
717
- for i in self.workflow.variable_definitions:
718
- if i.name in var_names:
719
- var_defns.append(i)
720
-
721
- return var_defns
722
-
723
- @property
724
- def variable_definitions_recursive(self):
725
- """Get those variable definitions associated with this command group,
726
- including those that appear embedded within other variables."""
727
-
728
- var_defns_dict = {
729
- i.name: {
730
- 'data': i.data,
731
- 'file_regex': i.file_regex,
732
- 'file_contents': i.file_contents,
733
- 'value': i.value,
734
- }
735
- for i in self.workflow.variable_definitions
736
- }
737
-
738
- cmd_group_var_defns = select_cmd_group_var_definitions(
739
- var_defns_dict,
740
- self.get_command_lines(self.commands),
741
- self.directory_variable.value,
742
- )
743
-
744
- var_defns = [
745
- i for i in self.workflow.variable_definitions
746
- if i.name in cmd_group_var_defns
747
- ]
748
-
749
- return var_defns
750
-
751
-
752
- class VarDefinition(Base):
753
- """Class to represent a variable definition."""
754
-
755
- __tablename__ = 'var_definition'
756
-
757
- id_ = Column('id', Integer, primary_key=True)
758
- workflow_id = Column('workflow_id', Integer, ForeignKey('workflow.id'))
759
-
760
- name = Column(String(255))
761
- data = Column(JSON, nullable=True)
762
- file_regex = Column(JSON, nullable=True)
763
- file_contents = Column(JSON, nullable=True)
764
- value = Column(String(255), nullable=True)
765
-
766
- workflow = relationship('Workflow', back_populates='variable_definitions')
767
- variable_values = relationship(
768
- 'VarValue',
769
- back_populates='variable_definition',
770
- order_by='VarValue.order_id',
771
- )
772
-
773
- def __repr__(self):
774
- out = ('{}('
775
- 'name={!r}, '
776
- 'data={!r}, '
777
- 'file_regex={!r}, '
778
- 'value={!r}'
779
- ')').format(
780
- self.__class__.__name__,
781
- self.name,
782
- self.data,
783
- self.file_regex,
784
- self.value,
785
- )
786
- return out
787
-
788
- def __init__(self, name, data=None, file_regex=None, value=None, file_contents=None):
789
-
790
- self.name = name
791
- self.data = data
792
- self.file_regex = file_regex
793
- self.file_contents = file_contents
794
- self.value = value
795
-
796
- def is_base_variable(self):
797
- """Check if the variable depends on any other variables."""
798
-
799
- if extract_variable_names(self.value,
800
- CONFIG.get('variable_delimiters')):
801
- return False
802
- else:
803
- return True
804
-
805
- def get_dependent_variable_names(self):
806
- """Get the names of variables on which this variable depends."""
807
- return extract_variable_names(self.value,
808
- CONFIG.get('variable_delimiters'))
809
-
810
- def get_multiplicity(self, submission):
811
- """Get the value multiplicity of this variable for a given
812
- submission.
813
-
814
- TODO: this should first try to get multiplicity from values (as a
815
- function of cmd group directory?)
816
-
817
- """
818
-
819
- # First check if the variable is resolved.
820
-
821
- var_values = {}
822
- for i in self.variable_values:
823
- if i.submission == submission:
824
- if i.directory_value.value not in var_values:
825
- var_values.update({i.directory_value.value: []})
826
- var_values[i.directory_value.value].append(i)
827
-
828
- var_lengths = {}
829
- for directory_path, var_vals in var_values.items():
830
-
831
- if var_vals:
832
- var_length = len(var_vals)
833
-
834
- else:
835
- var_length = None
836
-
837
- if self.data:
838
- var_length = len(self.data)
839
-
840
- elif self.file_regex:
841
-
842
- if 'subset' in self.file_regex:
843
- var_length = len(self.file_regex['subset'])
844
-
845
- elif 'expected_multiplicity' in self.file_regex:
846
- var_length = self.file_regex['expected_multiplicity']
847
-
848
- elif self.file_contents:
849
-
850
- if 'expected_multiplicity' in self.file_contents:
851
- var_length = self.file_contents['expected_multiplicity']
852
-
853
- elif self.is_base_variable():
854
- var_length = 1
855
-
856
- else:
857
- raise ValueError('bad 3!')
858
-
859
- var_lengths.update({directory_path: var_length})
860
-
861
- return var_lengths
862
-
863
- def get_values(self, directory):
864
- """Get the values of this variable.
865
-
866
- TODO: refactor repeated code blocks.
867
-
868
- Parameters
869
- ----------
870
- directory : Path
871
- Directory within which to resolve variable.
872
-
873
- Raises
874
- ------
875
- UnresolvedVariableError
876
- If the variable...
877
-
878
- """
879
-
880
- vals = []
881
-
882
- if self.file_regex:
883
-
884
- if self.file_regex.get('is_dir'):
885
-
886
- for root, _, _ in os.walk(directory):
887
- root_rel = Path(root).relative_to(directory).as_posix()
888
-
889
- match = re.search(self.file_regex['pattern'], root_rel)
890
- if match:
891
- match_groups = match.groups()
892
- if match_groups:
893
- match = match_groups[self.file_regex['group']]
894
- val_fmt = self.value.format(match)
895
- vals.append(val_fmt)
896
-
897
- else:
898
- # Search files in the given directory
899
- for i in directory.iterdir():
900
- match = re.search(self.file_regex['pattern'], i.name)
901
- if match:
902
- match_groups = match.groups()
903
- if match_groups:
904
- match = match_groups[self.file_regex['group']]
905
- val_fmt = self.value.format(match)
906
- vals.append(val_fmt)
907
-
908
- elif self.file_contents:
909
-
910
- path = Path(directory).joinpath(self.file_contents['path'])
911
- with path.open('r') as handle:
912
- for i in handle.readlines():
913
- vals.append(i.strip())
914
-
915
- elif self.data:
916
- for i in self.data:
917
- vals.append(self.value.format(i))
918
-
919
- else:
920
- vals.append(self.value)
921
-
922
- if not vals:
923
- msg = ('Cannot resolve variable value with name: {}')
924
- raise UnresolvedVariableError(msg.format(self.name))
925
-
926
- vals = sorted(vals)
927
-
928
- return vals
929
-
930
-
931
- class Submission(Base):
932
- """Class to represent the submission of (part of) a workflow."""
933
-
934
- __tablename__ = 'submission'
935
-
936
- id_ = Column('id', Integer, primary_key=True)
937
- order_id = Column(Integer)
938
- workflow_id = Column(Integer, ForeignKey('workflow.id'))
939
- submit_time = Column(DateTime)
940
- alt_scratch_dir_name = Column(String(255), nullable=True)
941
-
942
- workflow = relationship('Workflow', back_populates='submissions')
943
- command_group_submissions = relationship(
944
- 'CommandGroupSubmission',
945
- back_populates='submission',
946
- order_by='CommandGroupSubmission.command_group_exec_order',
947
- )
948
-
949
- variable_values = relationship('VarValue', back_populates='submission')
950
-
951
- def __init__(self, workflow, task_range):
952
-
953
- self.submit_time = datetime.now()
954
- self.order_id = len(workflow.submissions)
955
- self.workflow = workflow
956
-
957
- # print('Submission.__init__: task_range: {}'.format(task_range), flush=True)
958
-
959
- self.resolve_variable_values(self.workflow.directory, self.first_iteration)
960
-
961
- cg_subs = []
962
- for i in self.workflow.command_groups:
963
- task_range = [1, -1, 1] # TEMP
964
- cg_sub = CommandGroupSubmission(i, self, task_range)
965
- cg_subs.append(cg_sub)
966
-
967
- session = Session.object_session(self)
968
- session.commit()
969
-
970
- # `SchedulerGroup`s must be generated after `CommandGroupSubmission`s and
971
- # `resolve_variable_values`:
972
- self._scheduler_groups = self.get_scheduler_groups()
973
-
974
- if self.workflow.has_alternate_scratch:
975
- self._make_alternate_scratch_dirs()
976
-
977
- # `Task`s must be generated after `SchedulerGroup`s:
978
- cg_sub_iters = []
979
- for cg_sub in self.command_group_submissions:
980
-
981
- for iteration in self.workflow.iterations:
982
-
983
- if iteration.order_id > 0 and self.workflow.loop.get('groups'):
984
- # For > first iteration, not all command groups need be run:
985
- if cg_sub.command_group_exec_order not in self.workflow.loop['groups']:
986
- continue
987
-
988
- cg_sub_iter = CommandGroupSubmissionIteration(iteration, cg_sub)
989
- cg_sub_iters.append(cg_sub_iter)
990
-
991
- session.commit()
992
- # `cg_sub_iter.num_outputs` requires all cg_sub_iters to be generated:
993
- for cg_sub_iter in cg_sub_iters:
994
- for task_num in range(cg_sub_iter.num_outputs):
995
- Task(cg_sub_iter, task_num)
996
-
997
- self.first_iteration.status = IterationStatus('active')
998
-
999
- @reconstructor
1000
- def init_on_load(self):
1001
- self._scheduler_groups = self.get_scheduler_groups()
1002
-
1003
- def _make_alternate_scratch_dirs(self):
1004
- """Create a new directory on each alternate scratch for this submission."""
1005
-
1006
- alt_scratches = self.workflow.all_alternate_scratch
1007
-
1008
- # Find a suitable alternate scratch directory name for this submission:
1009
- count = 0
1010
- MAX_COUNT = 10
1011
- hex_length = 10
1012
- alt_dirname = get_random_hex(hex_length)
1013
- while True:
1014
- if all([not i.joinpath(alt_dirname).exists() for i in alt_scratches]):
1015
- break
1016
- alt_dirname = get_random_hex(hex_length)
1017
- count += 1
1018
- if count > MAX_COUNT:
1019
- msg = ('Could not find a suitable alternate scratch directory name '
1020
- 'in {} iterations.')
1021
- raise RuntimeError(msg.format(MAX_COUNT))
1022
-
1023
- # Make alternate scratch "root" directories:
1024
- for alt_scratch in alt_scratches:
1025
- alt_scratch_root = alt_scratch.joinpath(alt_dirname)
1026
- alt_scratch_root.mkdir(parents=False, exist_ok=False)
1027
-
1028
- self.alt_scratch_dir_name = alt_dirname
1029
-
1030
- def get_working_directories(self, iteration):
1031
- dirs = []
1032
- for cg_sub in self.command_group_submissions:
1033
- cg_sub_iter = cg_sub.get_command_group_submission_iteration(iteration)
1034
- for i in cg_sub_iter.get_directories():
1035
- if i not in dirs:
1036
- dirs.append(i)
1037
- return dirs
1038
-
1039
- @property
1040
- def first_iteration(self):
1041
- return self.workflow.first_iteration
1042
-
1043
- @property
1044
- def scheduler_groups(self):
1045
- return self._scheduler_groups
1046
-
1047
- def get_scheduler_groups(self):
1048
- """Get scheduler groups for this workflow submission."""
1049
- return SchedulerGroup.get_scheduler_groups(self)
1050
-
1051
- def get_scheduler_group_index(self, command_group_submission):
1052
- """Get the position of a command group submission within the submission's
1053
- scheduler groups.
1054
-
1055
- Parameters
1056
- ----------
1057
- command_group_submission : CommandGroupSubmission
1058
-
1059
- Returns
1060
- -------
1061
- tuple (int, int)
1062
- First integer identifies which scheduler group. Second integer identifies
1063
- the relative position of the command group within the scheduler group.
1064
-
1065
- """
1066
-
1067
- if command_group_submission not in self.command_group_submissions:
1068
- msg = 'Command group submission {} is not part of the submission.'
1069
- raise ValueError(msg.format(command_group_submission))
1070
-
1071
- for i in self.scheduler_groups:
1072
- if i.has(command_group_submission):
1073
- return (i.order_id, i.index(command_group_submission))
1074
-
1075
- msg = 'Command group submission {} is not part of the scheduler group.'
1076
- raise ValueError(msg.format(command_group_submission))
1077
-
1078
- def get_scheduler_group(self, command_group_submission):
1079
-
1080
- sch_group_idx, _ = self.get_scheduler_group_index(command_group_submission)
1081
- return self.scheduler_groups[sch_group_idx]
1082
-
1083
- def is_variable_resolved(self, variable_definition, iteration, directory_var_val=None):
1084
- """Returns True if the passed variable_definition has been resolved
1085
- for this Submission and iteration."""
1086
- # Check the variable definition is part of the workflow:
1087
- if variable_definition not in self.workflow.variable_definitions:
1088
- msg = ('Passed variable_definition object is not in the '
1089
- ' workflow of this submission.')
1090
- raise ValueError(msg)
1091
-
1092
- for i in self.variable_values:
1093
- if i.variable_definition == variable_definition:
1094
- if i.iteration == iteration:
1095
- if directory_var_val:
1096
- if i.directory_value == directory_var_val:
1097
- return True
1098
- else:
1099
- return True
1100
-
1101
- return False
1102
-
1103
- def resolve_variable_values(self, root_directory, iteration):
1104
- """Attempt to resolve as many variable values in the Workflow as
1105
- possible."""
1106
-
1107
- session = Session.object_session(self)
1108
-
1109
- # Loop through CommandGroupSubmissions in order:
1110
- for i in self.workflow.command_groups:
1111
-
1112
- dir_var = i.directory_variable
1113
-
1114
- # VarValues representing the resolved command group working directories:
1115
- cg_dirs_var_vals = []
1116
- cg_dirs_var_vals_other_val = []
1117
- for j in dir_var.variable_values:
1118
- if j.iteration == iteration:
1119
- cg_dirs_var_vals.append(j)
1120
- else:
1121
- cg_dirs_var_vals_other_val.append(j.value)
1122
-
1123
- if cg_dirs_var_vals:
1124
- pass
1125
- # print(('Submission.resolve_variable_values: found existing resolved '
1126
- # 'directory variables: {}').format(cg_dirs_var_vals), flush=True)
1127
-
1128
- else:
1129
-
1130
- # print(('Submission.resolve_variable_values: trying to resolve directory '
1131
- # 'variable values.'), flush=True)
1132
-
1133
- # Directory variable has not yet been resolved; try:
1134
- try:
1135
- dir_var_vals_dat = dir_var.get_values(root_directory)
1136
- # print(('Submission.resolve_variable_values: found directories with '
1137
- # 'values: {}.'.format(dir_var_vals_dat)), flush=True)
1138
-
1139
- except UnresolvedVariableError:
1140
- # Move on to next command group:
1141
- continue
1142
-
1143
- dir_var_vals_dat_new = [
1144
- j for j in dir_var_vals_dat
1145
- if (j not in cg_dirs_var_vals_other_val or j == '.')
1146
- ]
1147
-
1148
- # print(('Submission.resolve_variable_values: new directories are: '
1149
- # '{}.'.format(dir_var_vals_dat_new)), flush=True)
1150
-
1151
- # Add VarVals:
1152
- for val_idx, val in enumerate(dir_var_vals_dat_new):
1153
- cg_dirs_var_vals.append(
1154
- VarValue(
1155
- value=val,
1156
- order_id=val_idx,
1157
- var_definition=dir_var,
1158
- submission=self,
1159
- iteration=iteration,
1160
- )
1161
- )
1162
-
1163
- var_defns_rec = i.variable_definitions_recursive
1164
-
1165
- # print(('Submission.resolve_variable_values: cg_dirs_var_vals: '
1166
- # '{}.'.format(cg_dirs_var_vals)), flush=True)
1167
-
1168
- for j in cg_dirs_var_vals:
1169
-
1170
- # print(('Submission.resolve_variable_values: dir var val: '
1171
- # '{}.'.format(j)), flush=True)
1172
-
1173
- var_vals_dat = resolve_variable_values(
1174
- var_defns_rec,
1175
- root_directory.joinpath(j.value)
1176
- )
1177
-
1178
- # print(('Submission.resolve_variable_values: var_vals_dat: '
1179
- # '{}.'.format(var_vals_dat)), flush=True)
1180
-
1181
- for k, v in var_vals_dat.items():
1182
-
1183
- # print(('Submission.resolve_variable_values: var_vals_dat k: '
1184
- # '{}; v: {}.'.format(k, v)), flush=True)
1185
-
1186
- vals_dat = v['vals']
1187
- var_defn = self.workflow.get_variable_definition_by_name(k)
1188
-
1189
- # print(('Submission.resolve_variable_values: vals_dat '
1190
- # '{}.'.format(vals_dat)), flush=True)
1191
- # print(('Submission.resolve_variable_values: var_defn '
1192
- # '{}.'.format(var_defn)), flush=True)
1193
-
1194
- if not self.is_variable_resolved(var_defn, iteration, j):
1195
-
1196
- # print(('Submission.resolve_variable_values: {} not resolved...'.format(
1197
- # var_defn)), flush=True)
1198
-
1199
- for val_idx, val in enumerate(vals_dat):
1200
-
1201
- # print(('Submission.resolve_variable_values: val: {}...'.format(
1202
- # val)), flush=True)
1203
-
1204
- VarValue(
1205
- value=val,
1206
- order_id=val_idx,
1207
- var_definition=var_defn,
1208
- submission=self,
1209
- iteration=iteration,
1210
- directory_value=j
1211
- )
1212
- session.commit()
1213
-
1214
- def write_submit_dirs(self, hf_dir):
1215
- """Write the directory structure necessary for this submission."""
1216
-
1217
- # Ensure scheduler output and error directories exist, if specified:
1218
- for cg_sub in self.command_group_submissions:
1219
- root_dir = self.workflow.directory
1220
- out_dir = root_dir.joinpath(cg_sub.command_group.scheduler.output_dir)
1221
- err_dir = root_dir.joinpath(cg_sub.command_group.scheduler.error_dir)
1222
- if not out_dir.is_dir():
1223
- out_dir.mkdir()
1224
- if not err_dir.is_dir():
1225
- err_dir.mkdir()
1226
-
1227
- # Make the workflow directory if it does not exist:
1228
- wf_path = hf_dir.joinpath('workflow_{}'.format(self.workflow_id))
1229
- if not wf_path.exists():
1230
- wf_path.mkdir()
1231
-
1232
- # Make the submit directory:
1233
- submit_path = wf_path.joinpath('submit_{}'.format(self.order_id))
1234
- submit_path.mkdir()
1235
-
1236
- num_dir_vals_first_iter = {} # keys are cg_sub_idx
1237
- for iteration in self.workflow.iterations:
1238
-
1239
- # Make the iteration directory for each iteration:
1240
- iter_path = submit_path.joinpath('iter_{}'.format(iteration.order_id))
1241
- iter_path.mkdir()
1242
-
1243
- for idx, i in enumerate(self.scheduler_groups):
1244
-
1245
- max_num_tasks = i.get_max_num_tasks(self.first_iteration)
1246
- step_size = i.get_step_size(self.first_iteration)
1247
-
1248
- # Make the scheduler group directory for each scheduler group:
1249
- sg_path = iter_path.joinpath('scheduler_group_{}'.format(idx))
1250
- sg_path.mkdir()
1251
-
1252
- # Loop through cmd groups in this scheduler group:
1253
- for cg_sub_idx, cg_sub in enumerate(i.command_group_submissions):
1254
-
1255
- cg_sub_iter = cg_sub.get_command_group_submission_iteration(iteration)
1256
- if not cg_sub_iter:
1257
- continue
1258
-
1259
- cg_sub_first_iter = cg_sub.get_command_group_submission_iteration(
1260
- self.first_iteration
1261
- )
1262
- num_dir_vals = cg_sub_first_iter.num_directories
1263
- all_dir_slots = [''] * max_num_tasks
1264
-
1265
- # Distribute dirs over num_dir_slots:
1266
- for k in range(0, max_num_tasks, step_size[cg_sub_idx]):
1267
- dir_idx = round((k / max_num_tasks) * num_dir_vals)
1268
- all_dir_slots[k] = 'REPLACE_WITH_DIR_{}'.format(dir_idx)
1269
-
1270
- wk_dirs_path = iter_path.joinpath('working_dirs_{}{}'.format(
1271
- cg_sub.command_group_exec_order, CONFIG.get('working_dirs_file_ext')))
1272
-
1273
- # Make the working directory template files for each cmd group:
1274
- with wk_dirs_path.open('w') as handle:
1275
- for dir_path in all_dir_slots:
1276
- handle.write('{}\n'.format(dir_path))
1277
-
1278
- # Make the variable values directories for each scheduler group:
1279
- var_values_path = sg_path.joinpath('var_values')
1280
- var_values_path.mkdir()
1281
- for j in range(1, max_num_tasks + 1):
1282
- j_fmt = zeropad(j, max_num_tasks)
1283
- vv_j_path = var_values_path.joinpath(j_fmt)
1284
- vv_j_path.mkdir()
1285
-
1286
- def write_jobscripts(self, hf_dir):
1287
-
1288
- wf_path = hf_dir.joinpath('workflow_{}'.format(self.workflow_id))
1289
- submit_path = wf_path.joinpath('submit_{}'.format(self.order_id))
1290
- js_paths = []
1291
- js_stats_paths = []
1292
- for cg_sub in self.command_group_submissions:
1293
- js_paths_i = cg_sub.write_jobscript(dir_path=submit_path)
1294
- js_paths.append(js_paths_i['jobscript'])
1295
- js_stats_paths.append(js_paths_i['stats_jobscript'])
1296
-
1297
- return js_paths, js_stats_paths
1298
-
1299
- def submit_jobscripts(self, jobscript_paths):
1300
-
1301
- loop_groups = self.workflow.loop['groups']
1302
- cmd_group_idx = range(len(self.workflow.command_groups))
1303
-
1304
- if loop_groups:
1305
-
1306
- pre_loop_idx = [i for i in cmd_group_idx if i < min(loop_groups)]
1307
- post_loop_idx = [i for i in cmd_group_idx if i > max(loop_groups)]
1308
-
1309
- # List of tuples mapping jobscript path index (i.e. command group order id) to
1310
- # iteration index:
1311
- js_submissions = [(i, 0) for i in pre_loop_idx]
1312
-
1313
- for iteration in self.workflow.iterations:
1314
- for i in loop_groups:
1315
- js_submissions.append((i, iteration.order_id))
1316
-
1317
- for i in post_loop_idx:
1318
- js_submissions.append((i, 0))
1319
-
1320
- else:
1321
- js_submissions = [(i, 0) for i in cmd_group_idx]
1322
-
1323
- sumbit_cmd = os.getenv('HPCFLOW_QSUB_CMD', 'qsub')
1324
- last_submit_id = None
1325
- js_paths, js_stat_paths = jobscript_paths
1326
-
1327
- for cg_sub_idx, iter_idx in js_submissions:
1328
-
1329
- iter_idx_var = 'ITER_IDX={}'.format(iter_idx)
1330
- cg_sub = self.command_group_submissions[cg_sub_idx]
1331
- iteration = self.workflow.iterations[iter_idx]
1332
- cg_sub_iter = cg_sub.get_command_group_submission_iteration(iteration)
1333
- js_path_i, js_stat_path_i = js_paths[cg_sub_idx], js_stat_paths[cg_sub_idx]
1334
-
1335
- qsub_cmd = [sumbit_cmd]
1336
-
1337
- if last_submit_id:
1338
-
1339
- # Add conditional submission:
1340
- if iteration.order_id > 0:
1341
- hold_arg = '-hold_jid'
1342
- elif cg_sub.command_group.nesting == NestingType('hold'):
1343
- hold_arg = '-hold_jid'
1344
- else:
1345
- hold_arg = '-hold_jid_ad'
1346
-
1347
- qsub_cmd += [hold_arg, last_submit_id]
1348
-
1349
- qsub_cmd += ['-v', iter_idx_var]
1350
- qsub_cmd.append(str(js_path_i))
1351
-
1352
- # Submit the jobscript:
1353
- job_id_str = self.submit_jobscript(qsub_cmd, js_path_i, iteration)
1354
- cg_sub_iter.scheduler_job_id = int(job_id_str)
1355
- last_submit_id = job_id_str
1356
-
1357
- # Submit the stats jobscript:
1358
- if js_stat_path_i:
1359
- st_cmd = [sumbit_cmd, '-hold_jid_ad', last_submit_id, '-v', iter_idx_var]
1360
- st_cmd.append(str(js_stat_path_i))
1361
-
1362
- job_id_str = self.submit_jobscript(st_cmd, js_stat_path_i, iteration)
1363
- cg_sub_iter.scheduler_stats_job_id = int(job_id_str)
1364
- last_submit_id = job_id_str
1365
-
1366
- def submit_jobscript(self, cmd, js_path, iteration):
1367
-
1368
- cwd = str(self.workflow.directory)
1369
- proc = run(cmd, stdout=PIPE, stderr=PIPE, cwd=cwd)
1370
- qsub_out = proc.stdout.decode().strip()
1371
- qsub_err = proc.stderr.decode().strip()
1372
- if qsub_out:
1373
- print(qsub_out, flush=True)
1374
- if qsub_err:
1375
- print(qsub_err, flush=True)
1376
-
1377
- # Extract newly submitted job ID:
1378
- pattern = r'[0-9]+'
1379
- job_id_search = re.search(pattern, qsub_out)
1380
- try:
1381
- job_id_str = job_id_search.group()
1382
- except AttributeError:
1383
- msg = ('Could not retrieve the job ID from the submitted jobscript '
1384
- 'found at {}. No more jobscripts will be submitted.')
1385
- raise ValueError(msg.format(js_path))
1386
-
1387
- return job_id_str
1388
-
1389
- def get_stats(self, jsonable=True, datetime_dicts=False):
1390
- """Get task statistics for this submission."""
1391
- out = {
1392
- 'submission_id': self.id_,
1393
- 'command_group_submissions': [
1394
- i.get_stats(jsonable=jsonable, datetime_dicts=datetime_dicts)
1395
- for i in self.command_group_submissions]
1396
- }
1397
- return out
1398
-
1399
-
1400
- class CommandGroupSubmission(Base):
1401
- """Class to represent the submission of a single command group."""
1402
-
1403
- __tablename__ = 'command_group_submission'
1404
-
1405
- id_ = Column('id', Integer, primary_key=True)
1406
- command_group_id = Column(Integer, ForeignKey('command_group.id'))
1407
- submission_id = Column(Integer, ForeignKey('submission.id'))
1408
- task_start = Column(Integer)
1409
- task_stop = Column(Integer)
1410
- task_step = Column(Integer)
1411
- commands_written = Column(Boolean)
1412
- # _task_multiplicity = Column('task_multiplicity', Integer, nullable=True)
1413
-
1414
- command_group = relationship('CommandGroup',
1415
- back_populates='command_group_submissions')
1416
-
1417
- submission = relationship('Submission', back_populates='command_group_submissions')
1418
-
1419
- command_group_exec_order = deferred(
1420
- select([CommandGroup.exec_order]).where(
1421
- CommandGroup.id_ == command_group_id))
1422
-
1423
- is_command_writing = relationship(
1424
- 'IsCommandWriting',
1425
- uselist=False,
1426
- cascade='all, delete, delete-orphan'
1427
- )
1428
-
1429
- command_group_submission_iterations = relationship(
1430
- 'CommandGroupSubmissionIteration',
1431
- back_populates='command_group_submission',
1432
- )
1433
-
1434
- def __repr__(self):
1435
- out = (
1436
- '{}('
1437
- 'command_group={!r}, '
1438
- 'submission_id={!r})').format(
1439
- self.__class__.__name__,
1440
- self.command_group,
1441
- self.submission_id,
1442
- )
1443
- return out
1444
-
1445
- def __init__(self, command_group, submission, task_range):
1446
-
1447
- # print('CommandGroupSubmission.__init__: task_range: {}'.format(task_range), flush=True)
1448
-
1449
- self.command_group = command_group
1450
- self.submission = submission
1451
- self.task_range = task_range
1452
-
1453
- @property
1454
- def task_range(self):
1455
- return (self.task_start, self.task_stop, self.task_step)
1456
-
1457
- @task_range.setter
1458
- def task_range(self, task_range):
1459
- self.task_start = task_range[0]
1460
- self.task_stop = task_range[1]
1461
- if len(task_range) == 3:
1462
- self.task_step = task_range[2]
1463
- else:
1464
- self.task_step = 1
1465
-
1466
- @property
1467
- def task_range_idx(self):
1468
- return list(range(*self.task_range))
1469
-
1470
- @property
1471
- def variable_values(self):
1472
-
1473
- var_values = []
1474
- for i in self.command_group.variable_definitions:
1475
- if i.variable_values:
1476
- var_values.append(i)
1477
-
1478
- return var_values
1479
-
1480
- @property
1481
- def num_submitted_tasks(self):
1482
- """Get the number of submitted tasks based on the task range.
1483
-
1484
- Returns
1485
- -------
1486
- num : int
1487
- If the number of tasks is as yet undetermined, `None` is returned.
1488
-
1489
- """
1490
-
1491
- if self.task_stop == -1:
1492
- return None
1493
-
1494
- num = ceil((self.task_stop - (self.task_start - 1)) / self.task_step)
1495
-
1496
- return num
1497
-
1498
- @property
1499
- def scheduler_group_index(self):
1500
- """Get the position of this command group submission within the submission's
1501
- scheduler groups.
1502
-
1503
- Returns
1504
- -------
1505
- tuple (int, int)
1506
- First integer identifies which scheduler group. Second integer identifies
1507
- the relative position of the command group within the scheduler group.
1508
-
1509
- """
1510
- return self.submission.get_scheduler_group_index(self)
1511
-
1512
- @property
1513
- def scheduler_group(self):
1514
- """Get the scheduler group to which this command group belongs."""
1515
- return self.submission.get_scheduler_group(self)
1516
-
1517
- def get_command_group_submission_iteration(self, iteration):
1518
-
1519
- for i in self.command_group_submission_iterations:
1520
- if i.iteration == iteration:
1521
- return i
1522
-
1523
- @property
1524
- def alternate_scratch_dir(self):
1525
- if self.command_group.alternate_scratch:
1526
- return self.command_group.alternate_scratch.joinpath(
1527
- self.submission.alt_scratch_dir_name)
1528
- else:
1529
- return None
1530
-
1531
- def get_var_definition_by_name(self, var_name):
1532
- """"""
1533
-
1534
- for i in self.command_group.var_definitions:
1535
- if i.name == var_name:
1536
- return i
1537
-
1538
- def write_jobscript(self, dir_path):
1539
- """Write the jobscript."""
1540
-
1541
- cg_sub_first_iter = self.get_command_group_submission_iteration(
1542
- self.submission.first_iteration)
1543
-
1544
- js_path = self.command_group.scheduler.write_jobscript(
1545
- dir_path=dir_path,
1546
- workflow_directory=self.submission.workflow.directory,
1547
- command_group_order=self.command_group_exec_order,
1548
- max_num_tasks=self.scheduler_group.get_max_num_tasks(
1549
- self.submission.first_iteration),
1550
- task_step_size=cg_sub_first_iter.step_size,
1551
- environment=self.command_group.environment,
1552
- archive=self.command_group.archive is not None,
1553
- alternate_scratch_dir=self.alternate_scratch_dir,
1554
- command_group_submission_id=self.id_,
1555
- name=self.command_group.name,
1556
- )
1557
-
1558
- js_stats_path = None
1559
- if self.command_group.stats:
1560
- js_stats_path = self.command_group.scheduler.write_stats_jobscript(
1561
- dir_path=dir_path,
1562
- workflow_directory=self.submission.workflow.directory,
1563
- command_group_order=self.command_group_exec_order,
1564
- max_num_tasks=self.scheduler_group.get_max_num_tasks(
1565
- self.submission.first_iteration),
1566
- task_step_size=cg_sub_first_iter.step_size,
1567
- command_group_submission_id=self.id_,
1568
- name=self.command_group.stats_name,
1569
- )
1570
-
1571
- out = {
1572
- 'jobscript': js_path,
1573
- 'stats_jobscript': js_stats_path,
1574
- }
1575
-
1576
- return out
1577
-
1578
- def write_runtime_files(self, project, task_idx, iter_idx):
1579
- iteration = self.get_iteration(iter_idx)
1580
- self.queue_write_command_file(project, task_idx, iteration)
1581
- self.write_variable_files(project, task_idx, iteration)
1582
-
1583
- def queue_write_command_file(self, project, task_idx, iteration):
1584
- """Ensure the command file for this command group submission is written, ready
1585
- to be invoked by the jobscript, and also refresh the resolved variable values
1586
- so that when the variable files are written, they are up to date."""
1587
-
1588
- session = Session.object_session(self)
1589
-
1590
- sleep_time = 5
1591
- context = 'CommandGroupSubmission.write_cmd'
1592
- block_msg = ('{{}} {}: Writing command file blocked. Sleeping for {} '
1593
- 'seconds'.format(context, sleep_time))
1594
- unblock_msg = ('{{}} {}: Commands not written and writing available. Writing '
1595
- 'command file.'.format(context))
1596
- written_msg = '{{}} {}: Command files already written.'.format(context)
1597
- refresh_vals_msg = '{{}} {}: Refreshing resolved variable values.'.format(context)
1598
- write_dirs_msg = ('{{}} {}: Writing working directory files for '
1599
- 'iteration {}').format(context, iteration)
1600
- write_as_msg = ('{{}} {}: Writing alternate scratch exclusion list for '
1601
- 'task_idx {}.').format(context, task_idx)
1602
- make_alt_msg = ('{{}} {}: Making alternate scratch working '
1603
- 'directories.'.format(context))
1604
-
1605
- blocked = True
1606
- while blocked:
1607
-
1608
- try:
1609
- session.refresh(self)
1610
- except OperationalError:
1611
- # Database is likely locked.
1612
- print(block_msg.format(datetime.now()), flush=True)
1613
- sleep(sleep_time)
1614
- continue
1615
-
1616
- if self.is_command_writing:
1617
- print(block_msg.format(datetime.now()), flush=True)
1618
- sleep(sleep_time)
1619
-
1620
- else:
1621
- try:
1622
- self.is_command_writing = IsCommandWriting()
1623
- session.commit()
1624
- blocked = False
1625
-
1626
- except IntegrityError:
1627
- # Another process has already set `is_command_writing`
1628
- session.rollback()
1629
- print(block_msg.format(datetime.now()), flush=True)
1630
- sleep(sleep_time)
1631
-
1632
- except OperationalError:
1633
- # Database is likely locked.
1634
- session.rollback()
1635
- print(block_msg.format(datetime.now()), flush=True)
1636
- sleep(sleep_time)
1637
-
1638
- if not blocked:
1639
-
1640
- if iteration.status == IterationStatus('pending'):
1641
- iteration.status = IterationStatus('active')
1642
-
1643
- # This needs to happen once *per task* per CGS:
1644
- print(refresh_vals_msg.format(datetime.now()), flush=True)
1645
- self.submission.resolve_variable_values(project.dir_path, iteration)
1646
-
1647
- # This needs to happen once *per task* per CGS (if it has AS):
1648
- if self.command_group.alternate_scratch:
1649
- print(write_as_msg.format(datetime.now()), flush=True)
1650
- task = self.get_task(task_idx, iteration)
1651
- self.write_alt_scratch_exclusion_list(project, task, iteration)
1652
-
1653
- cg_sub_iter = self.get_command_group_submission_iteration(iteration)
1654
- if not cg_sub_iter.working_dirs_written:
1655
-
1656
- # These need to happen once *per iteration* per CGS:
1657
-
1658
- print(write_dirs_msg.format(datetime.now()), flush=True)
1659
- cg_sub_iter.write_working_directories(project)
1660
-
1661
- if self.command_group.alternate_scratch:
1662
- print(make_alt_msg.format(datetime.now()), flush=True)
1663
- self.make_alternate_scratch_dirs(project, iteration)
1664
-
1665
- cg_sub_iter.working_dirs_written = True
1666
-
1667
- if not self.commands_written:
1668
- # This needs to happen once per CGS:
1669
- print(unblock_msg.format(datetime.now()), flush=True)
1670
- self.write_command_file(project)
1671
- self.commands_written = True
1672
- else:
1673
- print(written_msg.format(datetime.now()), flush=True)
1674
-
1675
- self.is_command_writing = None
1676
- session.commit()
1677
-
1678
- def write_variable_files(self, project, task_idx, iteration):
1679
-
1680
- task = self.get_task(task_idx, iteration)
1681
- var_vals_normed = task.get_variable_values_normed()
1682
-
1683
- print('CGS.write_variable_files: task: {}'.format(task), flush=True)
1684
- print('CGS.write_variable_files: var_vals_normed: {}'.format(
1685
- var_vals_normed), flush=True)
1686
-
1687
- max_num_tasks = self.scheduler_group.get_max_num_tasks(
1688
- self.submission.first_iteration)
1689
-
1690
- var_values_task_dir = project.hf_dir.joinpath(
1691
- 'workflow_{}'.format(self.submission.workflow.id_),
1692
- 'submit_{}'.format(self.submission.order_id),
1693
- 'iter_{}'.format(iteration.order_id),
1694
- 'scheduler_group_{}'.format(self.scheduler_group_index[0]),
1695
- 'var_values',
1696
- zeropad(task.scheduler_id, max_num_tasks),
1697
- )
1698
-
1699
- for var_name, var_val_all in var_vals_normed.items():
1700
- var_fn = 'var_{}{}'.format(var_name, CONFIG.get('variable_file_ext'))
1701
- var_file_path = var_values_task_dir.joinpath(var_fn)
1702
- with var_file_path.open('w') as handle:
1703
- for i in var_val_all:
1704
- handle.write('{}\n'.format(i))
1705
-
1706
- @staticmethod
1707
- def get_formatted_commands(commands, num_cores, parallel_modes, indent=''):
1708
-
1709
- # TODO: what about parallel mode env?
1710
- delims = CONFIG.get('variable_delimiters')
1711
- lns_cmd = []
1712
- for i in commands:
1713
- if 'line' in i:
1714
- cmd_ln = indent
1715
- para_mode = i.get('parallel_mode')
1716
- if para_mode:
1717
- para_mode_config = parallel_modes.get(
1718
- para_mode.lower()) # todo raise on miss
1719
- para_command = para_mode_config.get('command')
1720
- if para_command:
1721
- cmd_ln += para_command.replace('<<num_cores>>', num_cores) + ' '
1722
- line = i['line']
1723
- for var_name in extract_variable_names(line, delims):
1724
- line = line.replace(delims[0] + var_name + delims[1], f'${var_name}')
1725
- cmd_ln += line
1726
- lns_cmd.append(cmd_ln)
1727
- elif 'subshell' in i:
1728
- sub_cmds = CommandGroupSubmission.get_formatted_commands(
1729
- i['subshell'],
1730
- num_cores,
1731
- parallel_modes,
1732
- indent=(indent+'\t'),
1733
- )
1734
- lns_cmd.extend([f'{indent}('] + sub_cmds + [f'{indent})'])
1735
-
1736
- return lns_cmd
1737
-
1738
- def write_command_file(self, project):
1739
-
1740
- lns_cmd = self.get_formatted_commands(
1741
- self.command_group.commands,
1742
- num_cores=self.command_group.scheduler.NUM_CORES_VAR,
1743
- parallel_modes=self.command_group.workflow.parallel_modes,
1744
- indent=('\t' if self.command_group.variable_definitions else ''),
1745
- )
1746
-
1747
- lns_while_start = ['while true', 'do']
1748
- lns_while_end = ['done \\']
1749
-
1750
- dt_stamp = datetime.now().strftime(r'%Y.%m.%d at %H:%M:%S')
1751
- about_msg = ['# --- commands file generated by `hpcflow` (version: {}) '
1752
- 'on {} ---'.format(__version__, dt_stamp)]
1753
-
1754
- max_num_tasks = self.scheduler_group.get_max_num_tasks(
1755
- self.submission.first_iteration)
1756
-
1757
- lns_task_id_pad = [
1758
- 'MAX_NUM_TASKS={}'.format(max_num_tasks),
1759
- 'MAX_NUM_DIGITS="${#MAX_NUM_TASKS}"',
1760
- 'ZEROPAD_TASK_ID=$(printf "%0${MAX_NUM_DIGITS}d" $SGE_TASK_ID)',
1761
- ]
1762
-
1763
- lns_read = []
1764
- lns_fds = []
1765
-
1766
- for idx, i in enumerate(self.command_group.variable_definitions):
1767
-
1768
- fd_idx = idx + 3
1769
-
1770
- var_fn = 'var_{}{}'.format(i.name, CONFIG.get('variable_file_ext'))
1771
- var_file_path = ('$ITER_DIR/scheduler_group_{}/var_values'
1772
- '/$ZEROPAD_TASK_ID/{}').format(
1773
- self.scheduler_group_index[0], var_fn)
1774
-
1775
- lns_read.append('\tread -u{} {} || break'.format(fd_idx, i.name))
1776
-
1777
- if idx > 0:
1778
- lns_fds[-1] += ' \\'
1779
-
1780
- lns_fds.append('\t{}< {}'.format(fd_idx, var_file_path))
1781
-
1782
- lns_cmd_print = ['printf "Running command: \\"{}\\"\\n" >> $LOG_PATH 2>&1'.format(
1783
- i.strip('\t').replace('"', r'\\\\\"')) for i in lns_cmd]
1784
-
1785
- if self.command_group.variable_definitions:
1786
- lns_cmd_print = ['\t{}'.format(i) for i in lns_cmd_print]
1787
- cmd_lns = (about_msg + [''] +
1788
- lns_task_id_pad + [''] +
1789
- lns_while_start + [''] +
1790
- lns_read + [''] +
1791
- lns_cmd_print + [''] +
1792
- lns_cmd + [''] +
1793
- lns_while_end +
1794
- lns_fds + [''])
1795
- else:
1796
- cmd_lns = (about_msg + [''] +
1797
- lns_cmd_print + [''] +
1798
- lns_cmd + [''])
1799
-
1800
- cmd_lns = '\n'.join(cmd_lns)
1801
-
1802
- cmd_path = project.hf_dir.joinpath(
1803
- 'workflow_{}'.format(self.submission.workflow.id_),
1804
- 'submit_{}'.format(self.submission.order_id),
1805
- 'cmd_{}{}'.format(self.command_group_exec_order, CONFIG.get('jobscript_ext')),
1806
- )
1807
- with cmd_path.open('w') as handle:
1808
- handle.write(cmd_lns)
1809
-
1810
- def write_alt_scratch_exclusion_list(self, project, task, iteration):
1811
- """Write alternate scratch exclusion files (for e.g. rsync)"""
1812
-
1813
- # List of Paths to exclude, relative to `self.submission.workflow.directory`:
1814
- excluded_paths = [
1815
- Path(CONFIG.get('hpcflow_directory'))] + self.submission.workflow.profile_files
1816
-
1817
- out_dir = Path(self.command_group.scheduler.output_dir)
1818
- err_dir = Path(self.command_group.scheduler.error_dir)
1819
- if out_dir not in excluded_paths:
1820
- excluded_paths.append(out_dir)
1821
- if err_dir not in excluded_paths:
1822
- excluded_paths.append(err_dir)
1823
-
1824
- working_dir_path = Path(task.get_working_directory_value())
1825
- alt_scratch_exclusions = []
1826
- for exc_path in excluded_paths:
1827
- try:
1828
- exc_path.relative_to(working_dir_path)
1829
- except ValueError:
1830
- continue
1831
- alt_scratch_exclusions.append(exc_path)
1832
-
1833
- exc_list_path = project.hf_dir.joinpath(
1834
- 'workflow_{}'.format(self.submission.workflow.id_),
1835
- 'submit_{}'.format(self.submission.order_id),
1836
- 'iter_{}'.format(iteration.order_id),
1837
- '{}_{}_{}{}'.format(
1838
- CONFIG.get('alt_scratch_exc_file'),
1839
- self.command_group_exec_order,
1840
- task.order_id,
1841
- CONFIG.get('alt_scratch_exc_file_ext'),
1842
- ),
1843
- )
1844
-
1845
- working_dir_abs = self.submission.workflow.directory.joinpath(working_dir_path)
1846
- about = (
1847
- '# Alternate scratch exclusion list. Patterns are relative '
1848
- 'to task #{} working directory:\n'
1849
- '# "{}"\n\n'
1850
- )
1851
- with exc_list_path.open('w') as handle:
1852
- handle.write(about.format(task.order_id, working_dir_abs))
1853
- for exc_path in alt_scratch_exclusions:
1854
- handle.write(str(exc_path) + '\n')
1855
-
1856
- def make_alternate_scratch_dirs(self, project, iteration):
1857
- """Generate task working directories on the alternate scratch."""
1858
-
1859
- # Get task working directories:
1860
- cg_sub_iter = self.get_command_group_submission_iteration(iteration)
1861
- working_dirs = [task.get_working_directory() for task in cg_sub_iter.tasks]
1862
-
1863
- alt_scratch_root = self.command_group.alternate_scratch.joinpath(
1864
- self.submission.alt_scratch_dir_name)
1865
-
1866
- for working_dir in working_dirs:
1867
- if working_dir.value == '.':
1868
- # Already made "root" dir.
1869
- continue
1870
- alt_scratch_w_dir = alt_scratch_root.joinpath(working_dir.value)
1871
- alt_scratch_w_dir.mkdir(parents=True, exist_ok=True)
1872
-
1873
- def get_iteration(self, iter_idx):
1874
- for i in self.submission.workflow.iterations:
1875
- if i.order_id == iter_idx:
1876
- return i
1877
-
1878
- def get_task(self, task_idx, iteration):
1879
- cg_sub_iter = self.get_command_group_submission_iteration(iteration)
1880
- for i in cg_sub_iter.tasks:
1881
- if i.order_id == task_idx and i.iteration == iteration:
1882
- return i
1883
-
1884
- def set_task_start(self, task_idx, iter_idx):
1885
- context = 'CommandGroupSubmission.set_task_start'
1886
- msg = '{{}} {}: Task index {} started.'.format(context, task_idx)
1887
- start_time = datetime.now()
1888
- print(msg.format(start_time), flush=True)
1889
- iteration = self.get_iteration(iter_idx)
1890
- task = self.get_task(task_idx, iteration)
1891
- task.start_time = start_time
1892
- print('task: {}'.format(task))
1893
-
1894
- def set_task_end(self, task_idx, iter_idx):
1895
- context = 'CommandGroupSubmission.set_task_end'
1896
- msg = '{{}} {}: Task index {} ended.'.format(context, task_idx)
1897
- end_time = datetime.now()
1898
- print(msg.format(end_time), flush=True)
1899
- iteration = self.get_iteration(iter_idx)
1900
- task = self.get_task(task_idx, iteration)
1901
- task.end_time = end_time
1902
- print('task: {}'.format(task))
1903
-
1904
- def do_archive(self, task_idx, iter_idx):
1905
- """Archive the working directory associated with a given task in this command
1906
- group submission."""
1907
-
1908
- # Adding a small delay increases the chance that `Task.is_archive_required` will
1909
- # be False (and so save some time overall), in the case where all tasks start at
1910
- # roughly the same time:
1911
- sleep(10)
1912
-
1913
- iteration = self.get_iteration(iter_idx)
1914
- task = self.get_task(task_idx, iteration)
1915
- self.command_group.archive.execute_with_lock(task)
1916
-
1917
- def get_stats(self, jsonable=True, datetime_dicts=False):
1918
- """Get task statistics for this command group submission."""
1919
- out = {
1920
- 'command_group_submission_id': self.id_,
1921
- 'command_group_id': self.command_group.id_,
1922
- 'commands': self.command_group.commands,
1923
- 'name': self.command_group.name,
1924
- 'tasks': [task.get_stats(jsonable=jsonable, datetime_dicts=datetime_dicts)
1925
- for cgsub_iter in self.command_group_submission_iterations
1926
- for task in cgsub_iter.tasks
1927
- if task.iteration.status != IterationStatus('pending')]
1928
- }
1929
- return out
1930
-
1931
- def get_scheduler_stats(self, task_idx, iter_idx):
1932
-
1933
- # Get scheduler job ID and scheduler task ID:
1934
- iteration = self.get_iteration(iter_idx)
1935
- cg_sub_iter = self.get_command_group_submission_iteration(iteration)
1936
- scheduler_job_id = cg_sub_iter.scheduler_job_id
1937
- task = self.get_task(task_idx, iteration)
1938
- task_id = task.scheduler_id
1939
-
1940
- info = self.command_group.scheduler.get_scheduler_stats(scheduler_job_id, task_id)
1941
-
1942
- if 'MB' in info['maxvmem']:
1943
- maxvmem = float(info['maxvmem'].split('MB')[0])
1944
- elif 'GB' in info['maxvmem']:
1945
- maxvmem = float(info['maxvmem'].split('GB')[0])
1946
- hostname = info['hostname']
1947
- wallclock = int(info['ru_wallclock'].split('s')[0])
1948
-
1949
- task.memory = maxvmem
1950
- task.hostname = hostname
1951
- task.wallclock = wallclock
1952
-
1953
-
1954
- class VarValue(Base):
1955
- """Class to represent the evaluated value of a variable."""
1956
-
1957
- __tablename__ = 'var_value'
1958
-
1959
- id_ = Column('id', Integer, primary_key=True)
1960
- var_definition_id = Column(
1961
- Integer,
1962
- ForeignKey('var_definition.id'),
1963
- )
1964
- submission_id = Column(Integer, ForeignKey('submission.id'))
1965
- value = Column(String(255))
1966
- order_id = Column(Integer)
1967
- directory_value_id = Column('directory_value_id', Integer, ForeignKey('var_value.id'))
1968
- iteration_id = Column(Integer, ForeignKey('iteration.id'))
1969
-
1970
- variable_definition = relationship('VarDefinition', back_populates='variable_values')
1971
- submission = relationship('Submission', back_populates='variable_values')
1972
- directory_value = relationship('VarValue', uselist=False, remote_side=id_)
1973
- iteration = relationship('Iteration', uselist=False)
1974
-
1975
- def __init__(self, value, order_id, var_definition, submission, iteration,
1976
- directory_value=None):
1977
-
1978
- self.value = value
1979
- self.order_id = order_id
1980
- self.iteration = iteration
1981
- self.variable_definition = var_definition
1982
- self.submission = submission
1983
- self.directory_value = directory_value
1984
-
1985
- def __repr__(self):
1986
- out = (
1987
- '{}('
1988
- 'variable_name={}, '
1989
- 'value={}, '
1990
- 'order_id={}, '
1991
- 'iteration={}, '
1992
- 'directory={}'
1993
- ')').format(
1994
- self.__class__.__name__,
1995
- self.variable_definition.name,
1996
- self.value,
1997
- self.order_id,
1998
- self.iteration,
1999
- self.directory_value.value if self.directory_value else None,
2000
- )
2001
- return out
2002
-
2003
-
2004
- class IsCommandWriting(Base):
2005
- """Class to represent active writing of a command file."""
2006
-
2007
- __tablename__ = 'is_command_writing'
2008
-
2009
- command_group_submission_id = Column(
2010
- Integer,
2011
- ForeignKey('command_group_submission.id'),
2012
- primary_key=True,
2013
- unique=True
2014
- )
2015
-
2016
-
2017
- class Task(Base):
2018
- """Class to represent a single task."""
2019
-
2020
- __tablename__ = 'task'
2021
-
2022
- id_ = Column('id', Integer, primary_key=True)
2023
- order_id = Column(Integer, nullable=False)
2024
- start_time = Column(DateTime)
2025
- end_time = Column(DateTime)
2026
- memory = Column(Float)
2027
- hostname = Column(String(255))
2028
- wallclock = Column(Integer)
2029
- archive_status = Column(Enum(TaskArchiveStatus), nullable=True)
2030
- _archive_start_time = Column('archive_start_time', DateTime, nullable=True)
2031
- _archive_end_time = Column('archive_end_time', DateTime, nullable=True)
2032
- archived_task_id = Column(Integer, ForeignKey('task.id'), nullable=True)
2033
-
2034
- command_group_submission_iteration_id = Column(
2035
- Integer, ForeignKey('command_group_submission_iteration.id'))
2036
-
2037
- command_group_submission_iteration = relationship(
2038
- 'CommandGroupSubmissionIteration', back_populates='tasks', uselist=False)
2039
-
2040
- archived_task = relationship('Task', uselist=False, remote_side=id_)
2041
-
2042
- def __init__(self, command_group_submission_iteration, order_id):
2043
- self.order_id = order_id
2044
- self.command_group_submission_iteration = command_group_submission_iteration
2045
- self.start_time = None
2046
- self.end_time = None
2047
-
2048
- if self.command_group_submission_iteration.command_group_submission.command_group.archive:
2049
- self.archive_status = TaskArchiveStatus('pending')
2050
-
2051
- @property
2052
- def iteration(self):
2053
- return self.command_group_submission_iteration.iteration
2054
-
2055
- def __repr__(self):
2056
- out = (
2057
- '{}('
2058
- 'order_id={}, '
2059
- 'command_group_submission_iteration_id={}, '
2060
- 'start_time={}, '
2061
- 'end_time={}'
2062
- ')').format(
2063
- self.__class__.__name__,
2064
- self.order_id,
2065
- self.command_group_submission_iteration_id,
2066
- self.start_time,
2067
- self.end_time,
2068
- )
2069
- return out
2070
-
2071
- @property
2072
- def duration(self):
2073
- if self.start_time and self.end_time:
2074
- return self.end_time - self.start_time
2075
- else:
2076
- return None
2077
-
2078
- @property
2079
- def scheduler_id(self):
2080
- """Get the task ID, as understood by the scheduler."""
2081
- num_tasks = self.command_group_submission_iteration.num_outputs
2082
- step_size = self.command_group_submission_iteration.step_size
2083
- scheduler_range = range(1, 1 + (num_tasks * step_size), step_size)
2084
- scheduler_id = scheduler_range[self.order_id]
2085
-
2086
- return scheduler_id
2087
-
2088
- @property
2089
- def archive_start_time(self):
2090
- if self.archived_task:
2091
- # Archive for this task was handled by another task with the same working dir:
2092
- return self.archived_task.archive_start_time
2093
- else:
2094
- return self._archive_start_time
2095
-
2096
- @archive_start_time.setter
2097
- def archive_start_time(self, start_time):
2098
- self._archive_start_time = start_time
2099
-
2100
- @property
2101
- def archive_end_time(self):
2102
- if self.archived_task:
2103
- # Archive for this task was handled by another task with the same working dir:
2104
- return self.archived_task.archive_end_time
2105
- else:
2106
- return self._archive_end_time
2107
-
2108
- @archive_end_time.setter
2109
- def archive_end_time(self, end_time):
2110
- self._archive_end_time = end_time
2111
-
2112
- @property
2113
- def archive_duration(self):
2114
- if self.archive_start_time and self.archive_end_time:
2115
- return self.archive_end_time - self.archive_start_time
2116
- else:
2117
- return None
2118
-
2119
- def get_working_directory(self):
2120
- """Get the "working directory" of this task."""
2121
- dir_vals = self.command_group_submission_iteration.get_directories()
2122
- dirs_per_task = len(dir_vals) / \
2123
- self.command_group_submission_iteration.num_outputs
2124
- dir_idx = floor(self.order_id * dirs_per_task)
2125
- working_dir = dir_vals[dir_idx]
2126
-
2127
- return working_dir
2128
-
2129
- def get_working_directory_value(self):
2130
- return self.get_working_directory().value
2131
-
2132
- def get_stats(self, jsonable=True, datetime_dicts=False):
2133
- """Get statistics for this task."""
2134
- out = {
2135
- 'task_id': self.id_,
2136
- 'order_id': self.order_id,
2137
- 'scheduler_id': self.scheduler_id,
2138
- 'start_time': self.start_time,
2139
- 'end_time': self.end_time,
2140
- 'duration': self.duration,
2141
- 'archive_start_time': self.archive_start_time,
2142
- 'archive_end_time': self.archive_end_time,
2143
- 'archive_duration': self.archive_duration,
2144
- 'archived_task_id': self.archived_task_id,
2145
- 'memory': self.memory,
2146
- 'hostname': self.hostname,
2147
- 'wallclock': self.wallclock,
2148
- 'working_directory': self.get_working_directory_value(),
2149
- 'archive_status': self.archive_status,
2150
- 'iteration': self.iteration.order_id,
2151
- }
2152
-
2153
- if datetime_dicts:
2154
- if self.duration:
2155
- out['duration'] = timedelta_to_dict(out['duration'])
2156
- if self.archive_duration:
2157
- out['archive_duration'] = timedelta_to_dict(out['archive_duration'])
2158
- if self.start_time:
2159
- out['start_time'] = datetime_to_dict(out['start_time'])
2160
- if self.end_time:
2161
- out['end_time'] = datetime_to_dict(out['end_time'])
2162
- if self.archive_start_time:
2163
- out['archive_start_time'] = datetime_to_dict(out['archive_start_time'])
2164
- if self.archive_end_time:
2165
- out['archive_end_time'] = datetime_to_dict(out['archive_end_time'])
2166
-
2167
- if jsonable:
2168
-
2169
- if not datetime_dicts:
2170
-
2171
- if self.duration:
2172
- out['duration'] = format_time_delta(out['duration'])
2173
- if self.archive_duration:
2174
- out['archive_duration'] = format_time_delta(out['archive_duration'])
2175
-
2176
- dt_fmt = r'%Y.%m.%d %H:%M:%S'
2177
-
2178
- if self.start_time:
2179
- out['start_time'] = out['start_time'].strftime(dt_fmt)
2180
- if self.end_time:
2181
- out['end_time'] = out['end_time'].strftime(dt_fmt)
2182
- if self.archive_start_time:
2183
- out['archive_start_time'] = out['archive_start_time'].strftime(dt_fmt)
2184
- if self.archive_end_time:
2185
- out['archive_end_time'] = out['archive_end_time'].strftime(dt_fmt)
2186
-
2187
- if self.archive_status:
2188
- out['archive_status'] = self.archive_status.value
2189
-
2190
- return out
2191
-
2192
- def get_same_directory_tasks(self):
2193
- """Get a list of other Tasks within the same command group that share the same
2194
- working directory and iteration."""
2195
- same_dir_tasks = []
2196
- for i in self.command_group_submission_iteration.tasks:
2197
- if i is self:
2198
- continue
2199
- elif i.iteration == self.iteration:
2200
- if i.get_working_directory() is self.get_working_directory():
2201
- same_dir_tasks.append(i)
2202
-
2203
- print('Task.get_same_directory_tasks: same_dir_tasks: {}'.format(same_dir_tasks),
2204
- flush=True)
2205
-
2206
- return same_dir_tasks
2207
-
2208
- def is_archive_required(self):
2209
- """Check if archive of this task is required. It is not required if a different
2210
- task in the same command group submission with the same working directory begun
2211
- its own archive after the commands of this command completed."""
2212
-
2213
- if not self.end_time:
2214
- msg = ('`Task.is_archive_required` should not be called unit the task has '
2215
- 'completed; {} has not completed.'.format(self))
2216
- raise RuntimeError(msg)
2217
-
2218
- for i in self.get_same_directory_tasks():
2219
- print('Checking if other task {} archived started after this task '
2220
- '({}) finished.'.format(i, self), flush=True)
2221
- if i.archive_start_time:
2222
- if i.archive_start_time > self.end_time:
2223
- self.archived_task = i
2224
- return False
2225
-
2226
- return True
2227
-
2228
- def get_variable_values(self):
2229
- """Get the values of variables that are resolved in this task's working
2230
- directory.
2231
-
2232
- Returns
2233
- -------
2234
- var_vals : dict of (str: list of str)
2235
- Keys are the variable definition name and values are list of variable
2236
- values as strings.
2237
-
2238
- """
2239
-
2240
- task_directory = self.get_working_directory()
2241
- cg_sub = self.command_group_submission_iteration.command_group_submission
2242
- sub_var_vals = cg_sub.submission.variable_values
2243
- cmd_group_var_names = cg_sub.command_group.variable_names
2244
- var_vals = {}
2245
-
2246
- print('Task.get_variable_values: sub_var_vals:', flush=True)
2247
- pprint(sub_var_vals)
2248
-
2249
- print('Task.get_variable_values: cmd_group_var_names:', flush=True)
2250
- pprint(cmd_group_var_names)
2251
-
2252
- for i in sub_var_vals:
2253
- if i.directory_value == task_directory:
2254
- var_defn_name = i.variable_definition.name
2255
- if var_defn_name in cmd_group_var_names:
2256
- if var_defn_name in var_vals:
2257
- var_vals[var_defn_name].append(i.value)
2258
- else:
2259
- var_vals.update({var_defn_name: [i.value]})
2260
-
2261
- return var_vals
2262
-
2263
- def get_variable_values_normed(self):
2264
- """Get the values of variables that are resolved in this task's working
2265
- directory, where all variable values have the same, normalised multiplicity.
2266
-
2267
- Returns
2268
- -------
2269
- var_vals_normed : dict of (str: list of str)
2270
- Keys are the variable definition name and values are list of variable
2271
- values as strings. The list of variable values is the same length for
2272
- each variable definition name.
2273
-
2274
- """
2275
-
2276
- var_vals = self.get_variable_values()
2277
- if not var_vals:
2278
- return {}
2279
-
2280
- only_names, only_vals = zip(*var_vals.items())
2281
- only_vals_uniform = coerce_same_length(list(only_vals))
2282
-
2283
- cg_sub = self.command_group_submission_iteration.command_group_submission
2284
- if cg_sub.command_group.is_job_array:
2285
- val_idx = self.order_id % len(only_vals_uniform[0])
2286
- only_vals_uniform = [[i[val_idx]] for i in only_vals_uniform]
2287
-
2288
- var_vals_normed = dict(zip(only_names, only_vals_uniform))
2289
-
2290
- return var_vals_normed
2291
-
2292
-
2293
- class Iteration(Base):
2294
- """Class to represent a workflow iteration."""
2295
-
2296
- __tablename__ = 'iteration'
2297
-
2298
- id_ = Column('id', Integer, primary_key=True)
2299
- workflow_id = Column(Integer, ForeignKey('workflow.id'))
2300
- order_id = Column(Integer)
2301
- status = Column(Enum(IterationStatus), default=IterationStatus('pending'))
2302
-
2303
- workflow = relationship('Workflow', back_populates='iterations', uselist=False)
2304
- command_group_submission_iterations = relationship(
2305
- 'CommandGroupSubmissionIteration',
2306
- back_populates='iteration',
2307
- )
2308
-
2309
- def __init__(self, order_id):
2310
- self.order_id = order_id
2311
-
2312
- def __repr__(self):
2313
- out = (
2314
- '{}('
2315
- 'id={}, '
2316
- 'workflow_id={}, '
2317
- 'order_id={}'
2318
- ')'
2319
- ).format(
2320
- self.__class__.__name__,
2321
- self.id_,
2322
- self.workflow_id,
2323
- self.order_id,
2324
- )
2325
- return out
2326
-
2327
-
2328
- class CommandGroupSubmissionIteration(Base):
2329
-
2330
- __tablename__ = 'command_group_submission_iteration'
2331
-
2332
- id_ = Column('id', Integer, primary_key=True)
2333
- working_dirs_written = Column(Boolean, default=False)
2334
- iteration_id = Column(Integer, ForeignKey('iteration.id'))
2335
- scheduler_job_id = Column(Integer, nullable=True)
2336
- scheduler_stats_job_id = Column(Integer, nullable=True)
2337
- command_group_submission_id = Column(
2338
- Integer, ForeignKey('command_group_submission.id'))
2339
-
2340
- iteration = relationship(
2341
- 'Iteration',
2342
- back_populates='command_group_submission_iterations',
2343
- uselist=False,
2344
- )
2345
- command_group_submission = relationship(
2346
- 'CommandGroupSubmission',
2347
- back_populates='command_group_submission_iterations',
2348
- )
2349
- tasks = relationship('Task', back_populates='command_group_submission_iteration')
2350
-
2351
- def __init__(self, iteration, command_group_submission):
2352
- self.iteration = iteration
2353
- self.command_group_submission = command_group_submission
2354
-
2355
- def __repr__(self):
2356
- out = (
2357
- '{}('
2358
- 'iteration_id={}, '
2359
- 'command_group_submission_id={}, '
2360
- 'scheduler_job_id={}, '
2361
- 'scheduler_stats_job_id={}'
2362
- ')'
2363
- ).format(
2364
- self.__class__.__name__,
2365
- self.iteration_id,
2366
- self.command_group_submission_id,
2367
- self.scheduler_job_id,
2368
- self.scheduler_stats_job_id,
2369
- )
2370
- return out
2371
-
2372
- def get_directory_values(self):
2373
-
2374
- dir_vals = [i.value for i in self.get_directories()]
2375
- return dir_vals
2376
-
2377
- def get_directories(self):
2378
- """Get the directory variable values associated with this command group
2379
- submission and iteration."""
2380
-
2381
- dir_vars_all = self.command_group_submission.command_group.directory_variable.variable_values
2382
- # Get only those with correct submission and iteration
2383
-
2384
- dirs = []
2385
- for i in dir_vars_all:
2386
- if i.iteration == self.iteration:
2387
- if i.submission == self.command_group_submission.submission:
2388
- dirs.append(i)
2389
-
2390
- # dirs = [i for idx, i in enumerate(dirs) if idx in self.task_range_idx]
2391
-
2392
- return dirs
2393
-
2394
- @property
2395
- def num_directories(self):
2396
- return len(self.get_directories())
2397
-
2398
- def get_task_multiplicity(self):
2399
- """Get the number of tasks associated with this command group submission."""
2400
-
2401
- # TODO: move get_task_multiplicity to CommandGroupSubmissionIteration !
2402
-
2403
- dirs = self.get_directory_values()
2404
-
2405
- sub = self.command_group_submission.submission
2406
-
2407
- var_lengths = {}
2408
- for directory in dirs:
2409
- var_lengths.update({directory: {}})
2410
- for i in self.command_group_submission.command_group.variable_definitions:
2411
- var_lengths_i = i.get_multiplicity(sub) # as a func of dir
2412
- for var_dir, num in var_lengths_i.items():
2413
- if var_dir == directory:
2414
- var_lengths[directory].update({i.name: num})
2415
-
2416
- var_lengths_combined = {}
2417
- for directory, var_nums in var_lengths.items():
2418
- if var_nums:
2419
- uniq_lens = set(var_nums.values())
2420
- num_uniq_lens = len(uniq_lens)
2421
- if num_uniq_lens == 1:
2422
- combined_len = min(uniq_lens)
2423
- elif num_uniq_lens == 2:
2424
- if min(uniq_lens) != 1:
2425
- raise ValueError('bad 4!')
2426
- combined_len = max(uniq_lens)
2427
- else:
2428
- raise ValueError('bad 5!')
2429
- else:
2430
- combined_len = 1
2431
-
2432
- var_lengths_combined.update({directory: combined_len})
2433
-
2434
- return var_lengths_combined
2435
-
2436
- @property
2437
- def num_outputs(self):
2438
- """Get the number of outputs for this command group submission."""
2439
- iteration = self.command_group_submission.submission.workflow.first_iteration
2440
- return self.command_group_submission.scheduler_group.get_num_outputs(iteration)[
2441
- self.command_group_submission.scheduler_group_index[1]]
2442
-
2443
- @property
2444
- def step_size(self):
2445
- """Get the scheduler step size for this command group submission."""
2446
- iteration = self.command_group_submission.submission.workflow.first_iteration
2447
- return self.command_group_submission.scheduler_group.get_step_size(iteration)[
2448
- self.command_group_submission.scheduler_group_index[1]]
2449
-
2450
- @property
2451
- def num_tasks(self):
2452
- return len(self.tasks)
2453
-
2454
- def write_working_directories(self, project):
2455
- """Replace lines in the working_dirs files with actual directory paths."""
2456
-
2457
- dir_vals = self.get_directories()
2458
-
2459
- cg_sub = self.command_group_submission
2460
-
2461
- wk_dirs_path = project.hf_dir.joinpath(
2462
- 'workflow_{}'.format(cg_sub.submission.workflow.id_),
2463
- 'submit_{}'.format(cg_sub.submission.order_id),
2464
- 'iter_{}'.format(self.iteration.order_id),
2465
- 'working_dirs_{}{}'.format(
2466
- cg_sub.command_group_exec_order, CONFIG.get('working_dirs_file_ext')),
2467
- )
2468
-
2469
- with wk_dirs_path.open() as handle:
2470
- file_lns = handle.readlines()
2471
-
2472
- for idx, i in enumerate(file_lns):
2473
- new_val = i.strip()
2474
- if 'REPLACE_WITH_DIR_' in i:
2475
- dir_idx = int(i.split('REPLACE_WITH_DIR_')[1])
2476
- new_val = dir_vals[dir_idx].value
2477
- file_lns[idx] = new_val
2478
-
2479
- with wk_dirs_path.open('w') as handle:
2480
- for i in file_lns:
2481
- handle.write(i + '\n')
2482
-
2483
-
2484
- class SchedulerGroup(object):
2485
- """Class to represent a collection of consecutive command group submissions that have
2486
- the same scheduler task range."""
2487
-
2488
- def __init__(self, order_id, command_groups_submissions):
2489
-
2490
- self.order_id = order_id
2491
- self.command_group_submissions = command_groups_submissions
2492
-
2493
- def __repr__(self):
2494
- out = ('{}('
2495
- 'order_id={}, '
2496
- 'command_group_submissions={}, '
2497
- ')').format(
2498
- self.__class__.__name__,
2499
- self.order_id,
2500
- self.command_group_submissions,
2501
- )
2502
- return out
2503
-
2504
- def get_max_num_tasks(self, iteration):
2505
- return max(self.get_num_outputs(iteration))
2506
-
2507
- def get_step_size(self, iteration):
2508
- return [int(self.get_max_num_tasks(iteration) / i)
2509
- for i in self.get_num_outputs(iteration)]
2510
-
2511
- def get_num_outputs(self, iteration):
2512
-
2513
- num_outs = 1
2514
- num_outs_prev = num_outs
2515
- num_outs_all = []
2516
-
2517
- # Get num_outputs for all previous cg subs in this scheduler group
2518
- for idx, cg_sub in enumerate(self.command_group_submissions):
2519
-
2520
- # print('SchedulerGroup.get_num_outputs: cg_sub idx: {}'.format(idx), flush=True)
2521
-
2522
- # print('SchedulerGroup.get_num_outputs: cg_sub_iters: ')
2523
- # pprint(cg_sub.command_group_submission_iterations)
2524
-
2525
- cg_sub_iter = None
2526
- for i in cg_sub.command_group_submission_iterations:
2527
- if i.iteration == iteration:
2528
- cg_sub_iter = i
2529
- break
2530
- if not cg_sub_iter:
2531
- raise ValueError('Could not find CommandGroupSubmissionIteration object.')
2532
-
2533
- # Number of outputs depend on task multiplicity, `is_job_array` and `nesting`
2534
- is_job_array = cg_sub.command_group.is_job_array
2535
- nesting = cg_sub.command_group.nesting
2536
-
2537
- # print('SchedulerGroup.get_num_outputs: is_job_array: {}'.format(
2538
- # is_job_array), flush=True)
2539
- # print('SchedulerGroup.get_num_outputs: nesting: {}'.format(nesting), flush=True)
2540
-
2541
- if nesting == NestingType('nest'): # or first_cmd_group:
2542
- num_outs = num_outs_prev
2543
- elif nesting == NestingType('hold'):
2544
- num_outs = 1
2545
- elif nesting is None:
2546
- num_outs = 1
2547
-
2548
- if is_job_array:
2549
- # if nesting in [NestingType('hold'), None]:
2550
- # num_outs *= cg_sub.num_directories
2551
- # print('SchedulerGroup._get_num_outputs: cg_sub.num_directories: {}'.format(
2552
- # cg_sub.num_directories), flush=True)
2553
-
2554
- # cg_sub.task_multiplicity is a dict of directory keys
2555
- num_outs *= sum(cg_sub_iter.get_task_multiplicity().values())
2556
-
2557
- # print('SchedulerGroup.get_num_outputs: cg_sub_iter.task_multiplicity: {}'.format(
2558
- # cg_sub_iter.get_task_multiplicity()), flush=True)
2559
-
2560
- # print('SchedulerGroup.get_num_outputs: num_outs: {}'.format(num_outs), flush=True)
2561
-
2562
- num_outs_all.append(num_outs)
2563
- num_outs_prev = num_outs
2564
-
2565
- # print('SchedulerGroup.get_num_outputs: num_outs_all: {}'.format(
2566
- # num_outs_all), flush=True)
2567
-
2568
- return num_outs_all
2569
-
2570
- def has(self, command_group_submission):
2571
- return command_group_submission in self.command_group_submissions
2572
-
2573
- def index(self, command_group_submission):
2574
- if not self.has(command_group_submission):
2575
- msg = '{} is not in the scheduler group.'
2576
- raise ValueError(msg.format(command_group_submission))
2577
- return self.command_group_submissions.index(command_group_submission)
2578
-
2579
- @classmethod
2580
- def get_scheduler_groups(cls, submission):
2581
- """Split the command group submissions up into scheduler groups."""
2582
-
2583
- cmd_groups_split = []
2584
- sch_group_idx = 0
2585
-
2586
- for cg_sub in submission.command_group_submissions:
2587
-
2588
- if cg_sub.command_group.nesting == NestingType('hold'):
2589
- sch_group_idx += 1
2590
- if len(cmd_groups_split) == sch_group_idx + 1:
2591
- cmd_groups_split[sch_group_idx].append(cg_sub)
2592
- else:
2593
- cmd_groups_split.append([cg_sub])
2594
-
2595
- return [cls(idx, i) for idx, i in enumerate(cmd_groups_split)]