hpcflow 0.1.9__py3-none-any.whl → 0.2.0a271__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. hpcflow/__init__.py +2 -11
  2. hpcflow/__pyinstaller/__init__.py +5 -0
  3. hpcflow/__pyinstaller/hook-hpcflow.py +40 -0
  4. hpcflow/_version.py +1 -1
  5. hpcflow/app.py +43 -0
  6. hpcflow/cli.py +2 -462
  7. hpcflow/data/demo_data_manifest/__init__.py +3 -0
  8. hpcflow/data/demo_data_manifest/demo_data_manifest.json +6 -0
  9. hpcflow/data/jinja_templates/test/test_template.txt +8 -0
  10. hpcflow/data/programs/hello_world/README.md +1 -0
  11. hpcflow/data/programs/hello_world/hello_world.c +87 -0
  12. hpcflow/data/programs/hello_world/linux/hello_world +0 -0
  13. hpcflow/data/programs/hello_world/macos/hello_world +0 -0
  14. hpcflow/data/programs/hello_world/win/hello_world.exe +0 -0
  15. hpcflow/data/scripts/__init__.py +1 -0
  16. hpcflow/data/scripts/bad_script.py +2 -0
  17. hpcflow/data/scripts/demo_task_1_generate_t1_infile_1.py +8 -0
  18. hpcflow/data/scripts/demo_task_1_generate_t1_infile_2.py +8 -0
  19. hpcflow/data/scripts/demo_task_1_parse_p3.py +7 -0
  20. hpcflow/data/scripts/do_nothing.py +2 -0
  21. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  22. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  23. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  24. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  25. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  26. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  27. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  28. hpcflow/data/scripts/generate_t1_file_01.py +7 -0
  29. hpcflow/data/scripts/import_future_script.py +7 -0
  30. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  31. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  32. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  33. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  34. hpcflow/data/scripts/main_script_test_direct_in_direct_out.py +6 -0
  35. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  36. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  37. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  38. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  39. hpcflow/data/scripts/main_script_test_direct_in_direct_out_all_iters_test.py +15 -0
  40. hpcflow/data/scripts/main_script_test_direct_in_direct_out_env_spec.py +7 -0
  41. hpcflow/data/scripts/main_script_test_direct_in_direct_out_labels.py +8 -0
  42. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  43. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  44. hpcflow/data/scripts/main_script_test_direct_sub_param_in_direct_out.py +6 -0
  45. hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +12 -0
  46. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  47. hpcflow/data/scripts/main_script_test_hdf5_in_obj_group.py +12 -0
  48. hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +11 -0
  49. hpcflow/data/scripts/main_script_test_json_and_direct_in_json_out.py +14 -0
  50. hpcflow/data/scripts/main_script_test_json_in_json_and_direct_out.py +17 -0
  51. hpcflow/data/scripts/main_script_test_json_in_json_out.py +14 -0
  52. hpcflow/data/scripts/main_script_test_json_in_json_out_labels.py +16 -0
  53. hpcflow/data/scripts/main_script_test_json_in_obj.py +12 -0
  54. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  55. hpcflow/data/scripts/main_script_test_json_out_obj.py +10 -0
  56. hpcflow/data/scripts/main_script_test_json_sub_param_in_json_out_labels.py +16 -0
  57. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  58. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  59. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  60. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  61. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  62. hpcflow/data/scripts/parse_t1_file_01.py +4 -0
  63. hpcflow/data/scripts/script_exit_test.py +5 -0
  64. hpcflow/data/template_components/__init__.py +1 -0
  65. hpcflow/data/template_components/command_files.yaml +26 -0
  66. hpcflow/data/template_components/environments.yaml +13 -0
  67. hpcflow/data/template_components/parameters.yaml +14 -0
  68. hpcflow/data/template_components/task_schemas.yaml +139 -0
  69. hpcflow/data/workflows/workflow_1.yaml +5 -0
  70. hpcflow/examples.ipynb +1037 -0
  71. hpcflow/sdk/__init__.py +149 -0
  72. hpcflow/sdk/app.py +4266 -0
  73. hpcflow/sdk/cli.py +1479 -0
  74. hpcflow/sdk/cli_common.py +385 -0
  75. hpcflow/sdk/config/__init__.py +5 -0
  76. hpcflow/sdk/config/callbacks.py +246 -0
  77. hpcflow/sdk/config/cli.py +388 -0
  78. hpcflow/sdk/config/config.py +1410 -0
  79. hpcflow/sdk/config/config_file.py +501 -0
  80. hpcflow/sdk/config/errors.py +272 -0
  81. hpcflow/sdk/config/types.py +150 -0
  82. hpcflow/sdk/core/__init__.py +38 -0
  83. hpcflow/sdk/core/actions.py +3857 -0
  84. hpcflow/sdk/core/app_aware.py +25 -0
  85. hpcflow/sdk/core/cache.py +224 -0
  86. hpcflow/sdk/core/command_files.py +814 -0
  87. hpcflow/sdk/core/commands.py +424 -0
  88. hpcflow/sdk/core/element.py +2071 -0
  89. hpcflow/sdk/core/enums.py +221 -0
  90. hpcflow/sdk/core/environment.py +256 -0
  91. hpcflow/sdk/core/errors.py +1043 -0
  92. hpcflow/sdk/core/execute.py +207 -0
  93. hpcflow/sdk/core/json_like.py +809 -0
  94. hpcflow/sdk/core/loop.py +1320 -0
  95. hpcflow/sdk/core/loop_cache.py +282 -0
  96. hpcflow/sdk/core/object_list.py +933 -0
  97. hpcflow/sdk/core/parameters.py +3371 -0
  98. hpcflow/sdk/core/rule.py +196 -0
  99. hpcflow/sdk/core/run_dir_files.py +57 -0
  100. hpcflow/sdk/core/skip_reason.py +7 -0
  101. hpcflow/sdk/core/task.py +3792 -0
  102. hpcflow/sdk/core/task_schema.py +993 -0
  103. hpcflow/sdk/core/test_utils.py +538 -0
  104. hpcflow/sdk/core/types.py +447 -0
  105. hpcflow/sdk/core/utils.py +1207 -0
  106. hpcflow/sdk/core/validation.py +87 -0
  107. hpcflow/sdk/core/values.py +477 -0
  108. hpcflow/sdk/core/workflow.py +4820 -0
  109. hpcflow/sdk/core/zarr_io.py +206 -0
  110. hpcflow/sdk/data/__init__.py +13 -0
  111. hpcflow/sdk/data/config_file_schema.yaml +34 -0
  112. hpcflow/sdk/data/config_schema.yaml +260 -0
  113. hpcflow/sdk/data/environments_spec_schema.yaml +21 -0
  114. hpcflow/sdk/data/files_spec_schema.yaml +5 -0
  115. hpcflow/sdk/data/parameters_spec_schema.yaml +7 -0
  116. hpcflow/sdk/data/task_schema_spec_schema.yaml +3 -0
  117. hpcflow/sdk/data/workflow_spec_schema.yaml +22 -0
  118. hpcflow/sdk/demo/__init__.py +3 -0
  119. hpcflow/sdk/demo/cli.py +242 -0
  120. hpcflow/sdk/helper/__init__.py +3 -0
  121. hpcflow/sdk/helper/cli.py +137 -0
  122. hpcflow/sdk/helper/helper.py +300 -0
  123. hpcflow/sdk/helper/watcher.py +192 -0
  124. hpcflow/sdk/log.py +288 -0
  125. hpcflow/sdk/persistence/__init__.py +18 -0
  126. hpcflow/sdk/persistence/base.py +2817 -0
  127. hpcflow/sdk/persistence/defaults.py +6 -0
  128. hpcflow/sdk/persistence/discovery.py +39 -0
  129. hpcflow/sdk/persistence/json.py +954 -0
  130. hpcflow/sdk/persistence/pending.py +948 -0
  131. hpcflow/sdk/persistence/store_resource.py +203 -0
  132. hpcflow/sdk/persistence/types.py +309 -0
  133. hpcflow/sdk/persistence/utils.py +73 -0
  134. hpcflow/sdk/persistence/zarr.py +2388 -0
  135. hpcflow/sdk/runtime.py +320 -0
  136. hpcflow/sdk/submission/__init__.py +3 -0
  137. hpcflow/sdk/submission/enums.py +70 -0
  138. hpcflow/sdk/submission/jobscript.py +2379 -0
  139. hpcflow/sdk/submission/schedulers/__init__.py +281 -0
  140. hpcflow/sdk/submission/schedulers/direct.py +233 -0
  141. hpcflow/sdk/submission/schedulers/sge.py +376 -0
  142. hpcflow/sdk/submission/schedulers/slurm.py +598 -0
  143. hpcflow/sdk/submission/schedulers/utils.py +25 -0
  144. hpcflow/sdk/submission/shells/__init__.py +52 -0
  145. hpcflow/sdk/submission/shells/base.py +229 -0
  146. hpcflow/sdk/submission/shells/bash.py +504 -0
  147. hpcflow/sdk/submission/shells/os_version.py +115 -0
  148. hpcflow/sdk/submission/shells/powershell.py +352 -0
  149. hpcflow/sdk/submission/submission.py +1402 -0
  150. hpcflow/sdk/submission/types.py +140 -0
  151. hpcflow/sdk/typing.py +194 -0
  152. hpcflow/sdk/utils/arrays.py +69 -0
  153. hpcflow/sdk/utils/deferred_file.py +55 -0
  154. hpcflow/sdk/utils/hashing.py +16 -0
  155. hpcflow/sdk/utils/patches.py +31 -0
  156. hpcflow/sdk/utils/strings.py +69 -0
  157. hpcflow/tests/api/test_api.py +32 -0
  158. hpcflow/tests/conftest.py +123 -0
  159. hpcflow/tests/data/__init__.py +0 -0
  160. hpcflow/tests/data/benchmark_N_elements.yaml +6 -0
  161. hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
  162. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  163. hpcflow/tests/data/workflow_1.json +10 -0
  164. hpcflow/tests/data/workflow_1.yaml +5 -0
  165. hpcflow/tests/data/workflow_1_slurm.yaml +8 -0
  166. hpcflow/tests/data/workflow_1_wsl.yaml +8 -0
  167. hpcflow/tests/data/workflow_test_run_abort.yaml +42 -0
  168. hpcflow/tests/jinja_templates/test_jinja_templates.py +161 -0
  169. hpcflow/tests/programs/test_programs.py +180 -0
  170. hpcflow/tests/schedulers/direct_linux/test_direct_linux_submission.py +12 -0
  171. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  172. hpcflow/tests/schedulers/slurm/test_slurm_submission.py +14 -0
  173. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  174. hpcflow/tests/scripts/test_main_scripts.py +1361 -0
  175. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  176. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  177. hpcflow/tests/shells/wsl/test_wsl_submission.py +14 -0
  178. hpcflow/tests/unit/test_action.py +1066 -0
  179. hpcflow/tests/unit/test_action_rule.py +24 -0
  180. hpcflow/tests/unit/test_app.py +132 -0
  181. hpcflow/tests/unit/test_cache.py +46 -0
  182. hpcflow/tests/unit/test_cli.py +172 -0
  183. hpcflow/tests/unit/test_command.py +377 -0
  184. hpcflow/tests/unit/test_config.py +195 -0
  185. hpcflow/tests/unit/test_config_file.py +162 -0
  186. hpcflow/tests/unit/test_element.py +666 -0
  187. hpcflow/tests/unit/test_element_iteration.py +88 -0
  188. hpcflow/tests/unit/test_element_set.py +158 -0
  189. hpcflow/tests/unit/test_group.py +115 -0
  190. hpcflow/tests/unit/test_input_source.py +1479 -0
  191. hpcflow/tests/unit/test_input_value.py +398 -0
  192. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  193. hpcflow/tests/unit/test_json_like.py +1247 -0
  194. hpcflow/tests/unit/test_loop.py +2674 -0
  195. hpcflow/tests/unit/test_meta_task.py +325 -0
  196. hpcflow/tests/unit/test_multi_path_sequences.py +259 -0
  197. hpcflow/tests/unit/test_object_list.py +116 -0
  198. hpcflow/tests/unit/test_parameter.py +243 -0
  199. hpcflow/tests/unit/test_persistence.py +664 -0
  200. hpcflow/tests/unit/test_resources.py +243 -0
  201. hpcflow/tests/unit/test_run.py +286 -0
  202. hpcflow/tests/unit/test_run_directories.py +29 -0
  203. hpcflow/tests/unit/test_runtime.py +9 -0
  204. hpcflow/tests/unit/test_schema_input.py +372 -0
  205. hpcflow/tests/unit/test_shell.py +129 -0
  206. hpcflow/tests/unit/test_slurm.py +39 -0
  207. hpcflow/tests/unit/test_submission.py +502 -0
  208. hpcflow/tests/unit/test_task.py +2560 -0
  209. hpcflow/tests/unit/test_task_schema.py +182 -0
  210. hpcflow/tests/unit/test_utils.py +616 -0
  211. hpcflow/tests/unit/test_value_sequence.py +549 -0
  212. hpcflow/tests/unit/test_values.py +91 -0
  213. hpcflow/tests/unit/test_workflow.py +827 -0
  214. hpcflow/tests/unit/test_workflow_template.py +186 -0
  215. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  216. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  217. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  218. hpcflow/tests/unit/utils/test_patches.py +5 -0
  219. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  220. hpcflow/tests/unit/utils/test_strings.py +97 -0
  221. hpcflow/tests/workflows/__init__.py +0 -0
  222. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  223. hpcflow/tests/workflows/test_jobscript.py +355 -0
  224. hpcflow/tests/workflows/test_run_status.py +198 -0
  225. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  226. hpcflow/tests/workflows/test_submission.py +140 -0
  227. hpcflow/tests/workflows/test_workflows.py +564 -0
  228. hpcflow/tests/workflows/test_zip.py +18 -0
  229. hpcflow/viz_demo.ipynb +6794 -0
  230. hpcflow-0.2.0a271.dist-info/LICENSE +375 -0
  231. hpcflow-0.2.0a271.dist-info/METADATA +65 -0
  232. hpcflow-0.2.0a271.dist-info/RECORD +237 -0
  233. {hpcflow-0.1.9.dist-info → hpcflow-0.2.0a271.dist-info}/WHEEL +4 -5
  234. hpcflow-0.2.0a271.dist-info/entry_points.txt +6 -0
  235. hpcflow/api.py +0 -458
  236. hpcflow/archive/archive.py +0 -308
  237. hpcflow/archive/cloud/cloud.py +0 -47
  238. hpcflow/archive/cloud/errors.py +0 -9
  239. hpcflow/archive/cloud/providers/dropbox.py +0 -432
  240. hpcflow/archive/errors.py +0 -5
  241. hpcflow/base_db.py +0 -4
  242. hpcflow/config.py +0 -232
  243. hpcflow/copytree.py +0 -66
  244. hpcflow/data/examples/_config.yml +0 -14
  245. hpcflow/data/examples/damask/demo/1.run.yml +0 -4
  246. hpcflow/data/examples/damask/demo/2.process.yml +0 -29
  247. hpcflow/data/examples/damask/demo/geom.geom +0 -2052
  248. hpcflow/data/examples/damask/demo/load.load +0 -1
  249. hpcflow/data/examples/damask/demo/material.config +0 -185
  250. hpcflow/data/examples/damask/inputs/geom.geom +0 -2052
  251. hpcflow/data/examples/damask/inputs/load.load +0 -1
  252. hpcflow/data/examples/damask/inputs/material.config +0 -185
  253. hpcflow/data/examples/damask/profiles/_variable_lookup.yml +0 -21
  254. hpcflow/data/examples/damask/profiles/damask.yml +0 -4
  255. hpcflow/data/examples/damask/profiles/damask_process.yml +0 -8
  256. hpcflow/data/examples/damask/profiles/damask_run.yml +0 -5
  257. hpcflow/data/examples/damask/profiles/default.yml +0 -6
  258. hpcflow/data/examples/thinking.yml +0 -177
  259. hpcflow/errors.py +0 -2
  260. hpcflow/init_db.py +0 -37
  261. hpcflow/models.py +0 -2549
  262. hpcflow/nesting.py +0 -9
  263. hpcflow/profiles.py +0 -455
  264. hpcflow/project.py +0 -81
  265. hpcflow/scheduler.py +0 -323
  266. hpcflow/utils.py +0 -103
  267. hpcflow/validation.py +0 -167
  268. hpcflow/variables.py +0 -544
  269. hpcflow-0.1.9.dist-info/METADATA +0 -168
  270. hpcflow-0.1.9.dist-info/RECORD +0 -45
  271. hpcflow-0.1.9.dist-info/entry_points.txt +0 -8
  272. hpcflow-0.1.9.dist-info/top_level.txt +0 -1
  273. /hpcflow/{archive → data/jinja_templates}/__init__.py +0 -0
  274. /hpcflow/{archive/cloud → data/programs}/__init__.py +0 -0
  275. /hpcflow/{archive/cloud/providers → data/workflows}/__init__.py +0 -0
hpcflow/models.py DELETED
@@ -1,2549 +0,0 @@
1
- """`hpcflow.models.py`"""
2
-
3
-
4
- import re
5
- import os
6
- import enum
7
- from datetime import datetime
8
- from math import ceil, floor
9
- from pathlib import Path
10
- from pprint import pprint
11
- from subprocess import run, PIPE
12
- from time import sleep
13
-
14
- from sqlalchemy import (Column, Integer, DateTime, JSON, ForeignKey, Boolean,
15
- Enum, String, select, Float)
16
- from sqlalchemy.orm import relationship, deferred, Session, reconstructor
17
- from sqlalchemy.exc import IntegrityError, OperationalError
18
-
19
- from hpcflow.config import Config as CONFIG
20
- from hpcflow._version import __version__
21
- from hpcflow.archive.archive import Archive, TaskArchiveStatus
22
- from hpcflow.base_db import Base
23
- from hpcflow.archive.cloud.cloud import CloudProvider
24
- from hpcflow.nesting import NestingType
25
- from hpcflow.scheduler import SunGridEngine
26
- from hpcflow.utils import coerce_same_length, zeropad, format_time_delta, get_random_hex, datetime_to_dict, timedelta_to_dict
27
- from hpcflow.validation import validate_task_multiplicity
28
- from hpcflow.variables import (
29
- select_cmd_group_var_names, select_cmd_group_var_definitions,
30
- extract_variable_names, resolve_variable_values, UnresolvedVariableError
31
- )
32
-
33
- SCHEDULER_MAP = {
34
- 'sge': SunGridEngine,
35
- }
36
-
37
-
38
- class IterationStatus(enum.Enum):
39
-
40
- pending = 'pending'
41
- active = 'active'
42
- complete = 'complete'
43
-
44
-
45
- class Workflow(Base):
46
- """Class to represent a Workflow."""
47
-
48
- __tablename__ = 'workflow'
49
-
50
- id_ = Column('id', Integer, primary_key=True)
51
- create_time = Column(DateTime)
52
- pre_commands = Column(JSON)
53
- _directory = Column('directory', String(255))
54
- root_archive_id = Column(Integer, ForeignKey('archive.id'), nullable=True)
55
- root_archive_excludes = Column(JSON, nullable=True)
56
- root_archive_directory = Column(String(255), nullable=True)
57
- _profile_files = Column('profile_files', JSON, nullable=True)
58
- loop = Column(JSON)
59
- parallel_modes = Column(JSON, nullable=True)
60
-
61
- command_groups = relationship(
62
- 'CommandGroup',
63
- back_populates='workflow',
64
- order_by='CommandGroup.exec_order',
65
- )
66
- submissions = relationship('Submission', back_populates='workflow')
67
- variable_definitions = relationship('VarDefinition', back_populates='workflow')
68
- root_archive = relationship('Archive', back_populates='workflow', uselist=False)
69
- iterations = relationship(
70
- 'Iteration', back_populates='workflow', order_by='Iteration.order_id')
71
-
72
- def __init__(self, directory, command_groups, var_definitions=None,
73
- pre_commands=None, archives=None, root_archive_idx=None,
74
- root_archive_excludes=None, profile_files=None, loop=None,
75
- parallel_modes=None):
76
- """Method to initialise a new Workflow.
77
-
78
- Parameters
79
- ----------
80
- directory : str or Path
81
- Directory in which the Workflow resides.
82
- command_groups : list of dict
83
- List of dictionaries that each represent a command group.
84
- var_definitions : dict, optional
85
- Dictionary whose keys are variable names and values are
86
- dictionaries that define variable definitions. By default, set to
87
- `None`, in which case it is assumed there are no variable
88
- references in any of the command groups.
89
- pre_commands : list of str
90
- List of commands to execute on creation of the Workflow.
91
- archives : list of dict
92
- List of dicts representing archive locations. Each dict in
93
- `command_groups` may contain keys `archive_idx` (which is an
94
- index into `archives`) and `archive_excludes` (which is a list
95
- of glob patterns to ignore when archiving). Each item in `archives`
96
- contains the following keys:
97
- name : str
98
- host : str
99
- path : str
100
- root_archive_idx : int
101
- Index into `archives` that sets the root archive for the workflow.
102
- root_archive_excludes : list of str
103
- File patterns to exclude from the root archive.
104
- profile_files : list of Path, optional
105
- If specified, the list of absolute file paths to the profile files used to
106
- generate this workflow.
107
- loop : dict, optional
108
- If specified, keys are:
109
- max_iterations : int
110
- Maximum number of loop iterations to submit.
111
- groups : list of int, optional
112
- Which command groups to include in iterations beyond the first. If not
113
- specified, all command groups are included in the loop.
114
- parallel_modes : dict, optional
115
- If specified, (case-insensitive) keys are one or more of: 'MPI', 'OpenMP'.
116
- Each is a dict with allowed keys:
117
- env : list of str
118
- Environment set up required for a given parallel mode.
119
- command : str
120
- Command to prepend to any command group commands that use this
121
- parallel mode.
122
-
123
- """
124
-
125
- if loop is None:
126
- loop = {
127
- 'max_iterations': 1,
128
- }
129
-
130
- # Command group directories must be stored internally as variables:
131
- for idx, i in enumerate(command_groups):
132
-
133
- dir_var_value = '.'
134
-
135
- if 'directory' in i:
136
-
137
- var_names = extract_variable_names(
138
- i['directory'], CONFIG.get('variable_delimiters'))
139
- if len(var_names) > 1:
140
- raise NotImplementedError()
141
- elif not var_names:
142
- # Value is set but is not a variable
143
- dir_var_value = i['directory'] or dir_var_value
144
- else:
145
- # Value is already a variable; no action.
146
- continue
147
-
148
- dir_var_defn_name = CONFIG.get('default_cmd_group_dir_var_name')
149
-
150
- command_groups[idx]['directory'] = '{1:}{0:}{2:}'.format(
151
- dir_var_defn_name,
152
- *CONFIG.get('variable_delimiters')
153
- )
154
-
155
- # Add new variable definition:
156
- var_definitions.update({
157
- dir_var_defn_name: {
158
- 'value': dir_var_value,
159
- }
160
- })
161
-
162
- self._directory = str(directory)
163
- self.profile_files = [i.relative_to(self.directory) for i in profile_files]
164
- self.create_time = datetime.now()
165
- self.pre_commands = pre_commands
166
- self.variable_definitions = [
167
- VarDefinition(name=k, **v) for k, v in var_definitions.items()
168
- ]
169
-
170
- # Generate Archive objects:
171
- archive_objs = []
172
- archive_dir_names = []
173
- if archives:
174
- for i in archives:
175
- arch_i = Archive(**i)
176
- archive_objs.append(arch_i)
177
- archive_dir_names.append(arch_i.get_archive_dir(self))
178
-
179
- if root_archive_idx is not None:
180
- self.root_archive = archive_objs[root_archive_idx]
181
- self.root_archive_excludes = root_archive_excludes
182
- self.root_archive_directory = archive_dir_names[root_archive_idx]
183
-
184
- cmd_groups = []
185
- for i in command_groups:
186
-
187
- dir_var_name = extract_variable_names(
188
- i['directory'], CONFIG.get('variable_delimiters'))[0]
189
-
190
- dir_var_defn = [i for i in self.variable_definitions
191
- if i.name == dir_var_name][0]
192
-
193
- i.pop('directory')
194
- i.update({
195
- 'directory_var': dir_var_defn,
196
- })
197
- arch_idx = i.pop('archive_idx', None)
198
- if arch_idx is not None:
199
- i.update({
200
- 'archive': archive_objs[arch_idx],
201
- 'archive_directory': archive_dir_names[arch_idx],
202
- })
203
- cmd_groups.append(CommandGroup(**i))
204
-
205
- self.command_groups = cmd_groups
206
- self.parallel_modes = parallel_modes
207
-
208
- self.loop = loop
209
- for i in range(self.loop['max_iterations']):
210
- self.iterations.append(Iteration(i))
211
-
212
- self.validate(archive_objs)
213
- self._execute_pre_commands()
214
- self.do_root_archive()
215
-
216
- def __repr__(self):
217
- out = ('{}('
218
- 'id={}, '
219
- 'directory={}, '
220
- 'pre_commands={}, '
221
- 'root_archive_id={}, '
222
- 'loop={}'
223
- ')').format(
224
- self.__class__.__name__,
225
- self.id_,
226
- self.directory,
227
- self.pre_commands,
228
- self.root_archive_id,
229
- self.loop,
230
- )
231
-
232
- return out
233
-
234
- def get_variable_definition_by_name(self, variable_name):
235
- """Get the VarDefintion object using the variable name."""
236
-
237
- for i in self.variable_definitions:
238
- if i.name == variable_name:
239
- return i
240
-
241
- msg = ('Cannot find variable definition with '
242
- 'name "{}"'.format(variable_name))
243
- raise ValueError(msg)
244
-
245
- @property
246
- def first_iteration(self):
247
- return self.iterations[0]
248
-
249
- @property
250
- def profile_files(self):
251
- if self._profile_files:
252
- return [Path(i) for i in self._profile_files]
253
- else:
254
- return []
255
-
256
- @profile_files.setter
257
- def profile_files(self, profile_files):
258
- if profile_files:
259
- self._profile_files = [str(i) for i in profile_files]
260
-
261
- @property
262
- def has_alternate_scratch(self):
263
- return bool(self.all_alternate_scratch)
264
-
265
- @property
266
- def all_alternate_scratch(self):
267
- return [i.alternate_scratch for i in self.command_groups if i.alternate_scratch]
268
-
269
- @property
270
- def directory(self):
271
- return Path(self._directory)
272
-
273
- def validate(self, archive_objs):
274
- cmd_group_list = []
275
- for i in self.command_groups:
276
- cmd_group_list.append({
277
- 'is_job_array': i.is_job_array,
278
- 'exec_order': i.exec_order,
279
- 'nesting': i.nesting,
280
- })
281
-
282
- err = '[Workflow instantiation error]'
283
- cmd_group_list = validate_task_multiplicity(cmd_group_list, err)
284
-
285
- for i_idx, i in enumerate(cmd_group_list):
286
- cmd_group = self.command_groups[i_idx]
287
- cmd_group.is_job_array = i['is_job_array']
288
- cmd_group.exec_order = i['exec_order']
289
- cmd_group.nesting = i['nesting']
290
-
291
- # If using an Archive with a cloud provider, check access:
292
- for i in archive_objs:
293
- if i.cloud_provider != CloudProvider.null:
294
- msg = f'Checking access to cloud storage ({i.name})...'
295
- print(msg, end='', flush=True)
296
- i.cloud_provider.check_access()
297
-
298
- def add_submission(self, project, task_range=None):
299
- """Add a new submission to this Workflow.
300
-
301
- Parameters
302
- ----------
303
- project : Project
304
- task_ranges : list, optional
305
- If specified, must be a list of length equal to the number of
306
- channels in the Workflow. Each list element specifies which tasks
307
- to submit from each Workflow channel. Each element may be either a
308
- list, a string "all", or `None`. If an element is a string "all",
309
- all tasks within the specified channel will be submitted. If an
310
- element is `None`, no tasks within the specified channel will be
311
- submitted. If an element is a list, it must have either two or
312
- three elements; if it has two elements, these signify the first and
313
- last tasks, inclusively, to submit from that channel. By default,
314
- the task step size is one, but this can be chosen as a third list
315
- entry. By default, set to `None`, in which case all tasks from all
316
- channels are included.
317
-
318
- Notes
319
- -----
320
- We are temporarily restricting the number of channels to 1, since
321
- supporting multiple channels requires some more technical work. This
322
- restriction is enforced in the `validation.validate_task_multiplicity`
323
- function.
324
-
325
- Examples
326
- --------
327
- Submit all tasks from all channels:
328
- >>> workflow.add_submission()
329
-
330
- Submit tasks 1, 2, 3, 4 and 5 from the first and only channel:
331
- >>> workflow.add_submission([[1, 5]])
332
-
333
- Submit tasks 1 and 3 from the first channel, and tasks 2, 3 and 4 from
334
- the second channel:
335
- >>> workflow.add_submission([[1, 4, 2], [2, 4]])
336
-
337
- Submit all tasks from the first channel, and tasks 2 and 7 from the
338
- second channel:
339
- >>> workflow.add_submission(['all', (2, 7, 5)])
340
-
341
- Submit all tasks from the first channel and no tasks from the second
342
- channel:
343
- >>> workflow.add_submission(['all', None])
344
-
345
-
346
- What to do:
347
- -----------
348
-
349
- 0. Firstly, resolve variable values for the first command group.
350
- 1. Need to identify which command groups must have their
351
- var_multiplicity resolved at submit time, and raise if it cannot
352
- be done. For `is_job_array=False` command groups, var_multiplicity
353
- does not need to be known at submit-time, since the number of
354
- output tasks will be known (either one [for `nesting=hold`], or
355
- equal to number of input tasks [for `nesting=None`]).
356
- 2. To do this, organise command groups into scheduler groups,
357
- which are delineated by command groups with `nesting=hold`.
358
- 3. For each scheduler group, go through the command groups in order
359
- and resolve the `var_multiplicity` if it is required. This is not
360
- the same as actually resolving the variable values. And we don't
361
- need to do that at submit-time, except for the very first command
362
- group! (Or rather, since submit-time and run-time coincide for
363
- the first command group, we have the *opportunity* to resolve
364
- variable values for the first command group; in general, variable
365
- values in a given command group may depend on the commands run in
366
- a previous command group, so this cannot be done.)
367
-
368
- """
369
-
370
- # print('Workflow.add_submission: task_range: {}'.format(task_range), flush=True)
371
-
372
- submission = Submission(self, task_range) # Generate CGSs and Tasks
373
- submission.write_submit_dirs(project.hf_dir)
374
- js_paths = submission.write_jobscripts(project.hf_dir)
375
- submission.submit_jobscripts(js_paths)
376
-
377
- return submission
378
-
379
- def get_num_channels(self, exec_order=0):
380
- """Get the number of command groups with a given execution order.
381
-
382
- Parameters
383
- ----------
384
- exec_order : int, optional
385
- The execution order at which to count command groups.
386
-
387
- Returns
388
- -------
389
- num_channels : int
390
- The number of command groups at the given execution order.
391
-
392
- """
393
-
394
- num_channels = 0
395
- for i in self.command_groups:
396
- if i.exec_order == exec_order:
397
- num_channels += 1
398
-
399
- return num_channels
400
-
401
- def _validate_task_ranges(self, task_ranges):
402
- """Validate task ranges.
403
-
404
- Parameters
405
- ----------
406
- task_ranges : list
407
-
408
- Returns
409
- -------
410
- task_ranges_valid : list
411
-
412
- """
413
-
414
- # Check length equal to num_channels:
415
- if len(task_ranges) != self.get_num_channels():
416
- msg = ('The number of task ranges specified must be equal to the '
417
- 'number of channels in the workflow, which is {}, but {} '
418
- 'task ranges were specified.')
419
- raise ValueError(msg.format(self.get_num_channels(),
420
- len(task_ranges)))
421
-
422
- task_range_msg = (
423
- 'Each task range must be specified as either a list with two or '
424
- 'three elements, representing the first and last task and '
425
- '(optionally) the step size, `None`, or the string "all".'
426
- )
427
-
428
- task_ranges_valid = []
429
- for i in task_ranges:
430
-
431
- # Validate:
432
- if isinstance(i, list):
433
- if len(i) not in [2, 3]:
434
- raise ValueError(task_range_msg)
435
- elif i not in ['all', None]:
436
- raise ValueError(task_range_msg)
437
-
438
- task_range_i = i
439
- if i == 'all':
440
- # Replace "all" with [n, m, s]
441
- task_range_i = [1, -1, 1]
442
-
443
- elif isinstance(i, list) and len(i) == 2:
444
- # Add step size of 1:
445
- task_range_i += [1]
446
-
447
- if task_range_i[1] != -1:
448
- # For known number of tasks, check m >= n >= 1:
449
- if task_range_i[0] < 1:
450
- msg = 'Starting task, `n`, must be >= 1.'
451
- raise ValueError(msg)
452
- if task_range_i[1] < task_range_i[0]:
453
- msg = 'Ending task, `m`, must be >= starting task, `n`.'
454
- raise ValueError(msg)
455
-
456
- task_ranges_valid.append(task_range_i)
457
-
458
- return task_ranges_valid
459
-
460
- def _execute_pre_commands(self):
461
-
462
- for i in self.pre_commands:
463
-
464
- proc = run(i, shell=True, stdout=PIPE, stderr=PIPE)
465
- pre_cmd_out = proc.stdout.decode()
466
- pre_cmd_err = proc.stderr.decode()
467
-
468
- def do_root_archive(self):
469
- """Copy the workflow directory to the root archive location."""
470
-
471
- if self.root_archive:
472
- self.root_archive.execute(self.root_archive_excludes,
473
- self.root_archive_directory)
474
-
475
- def get_stats(self, jsonable=True, datetime_dicts=False):
476
- 'Get task statistics for this workflow.'
477
- out = {
478
- 'workflow_id': self.id_,
479
- 'submissions': [i.get_stats(jsonable=jsonable, datetime_dicts=datetime_dicts)
480
- for i in self.submissions]
481
- }
482
- return out
483
-
484
- def kill_active(self):
485
- 'Kill any active scheduled jobs associated with the workflow.'
486
-
487
- kill_scheduler_ids = []
488
- for sub in self.submissions:
489
- for cg_sub in sub.command_group_submissions:
490
- for iteration in self.iterations:
491
- cg_sub_iter = cg_sub.get_command_group_submission_iteration(iteration)
492
- if cg_sub_iter.scheduler_job_id is not None:
493
- kill_scheduler_ids.append(cg_sub_iter.scheduler_job_id)
494
-
495
- print('Need to kill: {}'.format(kill_scheduler_ids))
496
- del_cmd = ['qdel'] + [str(i) for i in kill_scheduler_ids]
497
- proc = run(del_cmd, stdout=PIPE, stderr=PIPE)
498
- qdel_out = proc.stdout.decode()
499
- qdel_err = proc.stderr.decode()
500
- print(qdel_out)
501
-
502
-
503
- class CommandGroup(Base):
504
- """Class to represent a command group, which is roughly translated into a
505
- job script."""
506
-
507
- __tablename__ = 'command_group'
508
-
509
- id_ = Column('id', Integer, primary_key=True)
510
- workflow_id = Column(Integer, ForeignKey('workflow.id'))
511
- directory_variable_id = Column(Integer, ForeignKey('var_definition.id'))
512
- archive_id = Column(Integer, ForeignKey('archive.id'), nullable=True)
513
-
514
- name = Column(String(255), nullable=True)
515
- stats_name = Column(String(255), nullable=True)
516
- commands = Column(JSON)
517
- is_job_array = Column(Boolean)
518
- exec_order = Column(Integer)
519
- nesting = Column(Enum(NestingType), nullable=True)
520
- environment = Column(JSON, nullable=True)
521
- _scheduler = Column('scheduler', JSON)
522
- profile_name = Column(String(255), nullable=True)
523
- profile_order = Column(Integer, nullable=True)
524
- archive_excludes = Column(JSON, nullable=True)
525
- archive_directory = Column(String(255), nullable=True)
526
- _alternate_scratch = Column('alternate_scratch', String(255), nullable=True)
527
- stats = Column(Boolean)
528
-
529
- archive = relationship('Archive', back_populates='command_groups')
530
- workflow = relationship('Workflow', back_populates='command_groups')
531
- command_group_submissions = relationship('CommandGroupSubmission',
532
- back_populates='command_group')
533
-
534
- directory_variable = relationship('VarDefinition')
535
-
536
- _scheduler_obj = None
537
-
538
- def __repr__(self):
539
- out = (
540
- '{}('
541
- 'commands={!r}, '
542
- 'is_job_array={!r}, '
543
- 'nesting={!r}'
544
- ')'
545
- ).format(
546
- self.__class__.__name__,
547
- self.commands,
548
- self.is_job_array,
549
- self.nesting,
550
- )
551
- return out
552
-
553
- def __init__(self, commands, directory_var, is_job_array=True,
554
- exec_order=None, nesting=None, environment=None, scheduler=None,
555
- profile_name=None, profile_order=None, archive=None,
556
- archive_excludes=None, archive_directory=None, alternate_scratch=None,
557
- stats=None, name=None, stats_name=None):
558
- """Method to initialise a new CommandGroup.
559
-
560
- Parameters
561
- ----------
562
- commands : list of dict
563
- List of dicts containing commands to execute.
564
- directory_var : VarDefinition
565
- The working directory for this command group. TODO...
566
- is_job_array : bool, optional
567
- If True, the command group is executed as a job array. True by
568
- default.
569
- exec_order : int, optional
570
- Execution order of this command relative to other command groups in
571
- the Workflow. By default, `None`.
572
- nesting : str, optional
573
- Either "nest" or "hold". This determines how the task multiplicity
574
- of this command group joins together with the task multiplicity of
575
- the previous command group (i.e. the command group with the lower
576
- execution order as determined by `exec_order`). If "nest", each
577
- task from the previous command group, once completed, will fork
578
- into multiple tasks in the current command group. If "hold", all
579
- tasks in the current command group will only begin once all tasks
580
- in the previous command group have completed. If `None`, the number
581
- of tasks in the previous and current command groups must match,
582
- since a given task in the current command group will only begin
583
- once its corresponding task in the previous command group has
584
- completed. By default, set to `None`.
585
- environment : list of str, optional
586
- List of commands to be run to set up the environment for the command group. By
587
- default set to `None`.
588
- scheduler : dict, optional
589
- Scheduler type and options to be passed directly to the scheduler. By default,
590
- `None`, in which case the DirectExecution scheduler is used and no additional
591
- options are passed.
592
- profile_name : str, optional
593
- If the command group was generated as part of a job profile file,
594
- the profile name should be passed here.
595
- profile_order : int, optional
596
- If the command group was generated as part of a job profile file,
597
- the profile order should be passed here.
598
- archive : Archive, optional
599
- The Archive object associated with this command group.
600
- archive_excludes : list of str
601
- List of glob patterns representing files that should be excluding
602
- when archiving this command group.
603
- archive_directory : str or Path, optional
604
- Name of the directory in which the archive for this command group will reside.
605
- alternate_scratch : str, optional
606
- Location of alternate scratch in which to run commands.
607
-
608
- TODO: document how `nesting` interacts with `is_job_array`.
609
-
610
- """
611
-
612
- self.commands = commands
613
- self.is_job_array = is_job_array
614
- self.exec_order = exec_order
615
- self.nesting = nesting
616
- self.environment = environment
617
- self.scheduler = scheduler
618
- self.directory_variable = directory_var
619
- self.profile_name = profile_name
620
- self.profile_order = profile_order
621
- self.stats = stats
622
- self.name = name
623
- self.stats_name = stats_name
624
-
625
- self.archive = archive
626
- self.archive_excludes = archive_excludes
627
- self.archive_directory = archive_directory
628
-
629
- self._alternate_scratch = alternate_scratch
630
-
631
- self.validate()
632
-
633
- @reconstructor
634
- def init_on_load(self):
635
- self.scheduler = self._scheduler
636
-
637
- def validate(self):
638
-
639
- # Check at least one command:
640
- if not self.commands:
641
- msg = 'At least one command must be specified.'
642
- raise ValueError(msg)
643
-
644
- self.nesting = NestingType[self.nesting] if self.nesting else None
645
-
646
- # Check alternate scratch exists
647
- if self.alternate_scratch:
648
- if not self.alternate_scratch.is_dir():
649
- msg = 'Alternate scratch "{}" is not an existing directory.'
650
- raise ValueError(msg.format(self.alternate_scratch))
651
-
652
- @staticmethod
653
- def get_command_lines(commands):
654
- 'Get all lines in the commands list.'
655
- out = []
656
- for i in commands:
657
- if 'line' in i:
658
- out.append(i['line'])
659
- elif 'subshell' in i:
660
- out.extend(CommandGroup.get_command_lines(i['subshell']))
661
- return out
662
-
663
- @property
664
- def scheduler(self):
665
- return self._scheduler_obj
666
-
667
- @scheduler.setter
668
- def scheduler(self, scheduler):
669
-
670
- if 'name' not in scheduler:
671
- msg = 'Scheduler must have a name that is one of: {}'
672
- raise ValueError(msg.format(list(SCHEDULER_MAP.keys())))
673
-
674
- sch_name = scheduler['name']
675
- if sch_name not in SCHEDULER_MAP.keys():
676
- msg = 'Scheduler "{}" is not known.'.format(scheduler)
677
- raise ValueError(msg)
678
-
679
- sch_class = SCHEDULER_MAP[sch_name]
680
- self._scheduler_obj = sch_class(
681
- options=scheduler['options'],
682
- output_dir=scheduler['output_dir'],
683
- error_dir=scheduler['error_dir'],
684
- )
685
- self._scheduler = scheduler
686
-
687
- @property
688
- def alternate_scratch(self):
689
- if self._alternate_scratch:
690
- return Path(self._alternate_scratch)
691
- else:
692
- return None
693
-
694
- @property
695
- def variable_names(self):
696
- """Get those variable names associated with this command group."""
697
-
698
- var_names = select_cmd_group_var_names(
699
- self.get_command_lines(self.commands),
700
- self.directory_variable.value
701
- )
702
- return var_names
703
-
704
- @property
705
- def variable_definitions(self):
706
- """Get those variable definitions associated with this command group,
707
- excluding those that appear embedded within other variables."""
708
-
709
- var_names = self.variable_names
710
- var_defns = []
711
- for i in self.workflow.variable_definitions:
712
- if i.name in var_names:
713
- var_defns.append(i)
714
-
715
- return var_defns
716
-
717
- @property
718
- def variable_definitions_recursive(self):
719
- """Get those variable definitions associated with this command group,
720
- including those that appear embedded within other variables."""
721
-
722
- var_defns_dict = {
723
- i.name: {
724
- 'data': i.data,
725
- 'file_regex': i.file_regex,
726
- 'file_contents': i.file_contents,
727
- 'value': i.value,
728
- }
729
- for i in self.workflow.variable_definitions
730
- }
731
-
732
- cmd_group_var_defns = select_cmd_group_var_definitions(
733
- var_defns_dict,
734
- self.get_command_lines(self.commands),
735
- self.directory_variable.value,
736
- )
737
-
738
- var_defns = [
739
- i for i in self.workflow.variable_definitions
740
- if i.name in cmd_group_var_defns
741
- ]
742
-
743
- return var_defns
744
-
745
-
746
- class VarDefinition(Base):
747
- """Class to represent a variable definition."""
748
-
749
- __tablename__ = 'var_definition'
750
-
751
- id_ = Column('id', Integer, primary_key=True)
752
- workflow_id = Column('workflow_id', Integer, ForeignKey('workflow.id'))
753
-
754
- name = Column(String(255))
755
- data = Column(JSON, nullable=True)
756
- file_regex = Column(JSON, nullable=True)
757
- file_contents = Column(JSON, nullable=True)
758
- value = Column(String(255), nullable=True)
759
-
760
- workflow = relationship('Workflow', back_populates='variable_definitions')
761
- variable_values = relationship(
762
- 'VarValue',
763
- back_populates='variable_definition',
764
- order_by='VarValue.order_id',
765
- )
766
-
767
- def __repr__(self):
768
- out = ('{}('
769
- 'name={!r}, '
770
- 'data={!r}, '
771
- 'file_regex={!r}, '
772
- 'value={!r}'
773
- ')').format(
774
- self.__class__.__name__,
775
- self.name,
776
- self.data,
777
- self.file_regex,
778
- self.value,
779
- )
780
- return out
781
-
782
- def __init__(self, name, data=None, file_regex=None, value=None, file_contents=None):
783
-
784
- self.name = name
785
- self.data = data
786
- self.file_regex = file_regex
787
- self.file_contents = file_contents
788
- self.value = value
789
-
790
- def is_base_variable(self):
791
- """Check if the variable depends on any other variables."""
792
-
793
- if extract_variable_names(self.value,
794
- CONFIG.get('variable_delimiters')):
795
- return False
796
- else:
797
- return True
798
-
799
- def get_dependent_variable_names(self):
800
- """Get the names of variables on which this variable depends."""
801
- return extract_variable_names(self.value,
802
- CONFIG.get('variable_delimiters'))
803
-
804
- def get_multiplicity(self, submission):
805
- """Get the value multiplicity of this variable for a given
806
- submission.
807
-
808
- TODO: this should first try to get multiplicity from values (as a
809
- function of cmd group directory?)
810
-
811
- """
812
-
813
- # First check if the variable is resolved.
814
-
815
- var_values = {}
816
- for i in self.variable_values:
817
- if i.submission == submission:
818
- if i.directory_value.value not in var_values:
819
- var_values.update({i.directory_value.value: []})
820
- var_values[i.directory_value.value].append(i)
821
-
822
- var_lengths = {}
823
- for directory_path, var_vals in var_values.items():
824
-
825
- if var_vals:
826
- var_length = len(var_vals)
827
-
828
- else:
829
- var_length = None
830
-
831
- if self.data:
832
- var_length = len(self.data)
833
-
834
- elif self.file_regex:
835
-
836
- if 'subset' in self.file_regex:
837
- var_length = len(self.file_regex['subset'])
838
-
839
- elif 'expected_multiplicity' in self.file_regex:
840
- var_length = self.file_regex['expected_multiplicity']
841
-
842
- elif self.file_contents:
843
-
844
- if 'expected_multiplicity' in self.file_contents:
845
- var_length = self.file_contents['expected_multiplicity']
846
-
847
- elif self.is_base_variable():
848
- var_length = 1
849
-
850
- else:
851
- raise ValueError('bad 3!')
852
-
853
- var_lengths.update({directory_path: var_length})
854
-
855
- return var_lengths
856
-
857
- def get_values(self, directory):
858
- """Get the values of this variable.
859
-
860
- TODO: refactor repeated code blocks.
861
-
862
- Parameters
863
- ----------
864
- directory : Path
865
- Directory within which to resolve variable.
866
-
867
- Raises
868
- ------
869
- UnresolvedVariableError
870
- If the variable...
871
-
872
- """
873
-
874
- vals = []
875
-
876
- if self.file_regex:
877
-
878
- if self.file_regex.get('is_dir'):
879
-
880
- for root, _, _ in os.walk(directory):
881
- root_rel = Path(root).relative_to(directory).as_posix()
882
-
883
- match = re.search(self.file_regex['pattern'], root_rel)
884
- if match:
885
- match_groups = match.groups()
886
- if match_groups:
887
- match = match_groups[self.file_regex['group']]
888
- val_fmt = self.value.format(match)
889
- vals.append(val_fmt)
890
-
891
- else:
892
- # Search files in the given directory
893
- for i in directory.iterdir():
894
- match = re.search(self.file_regex['pattern'], i.name)
895
- if match:
896
- match_groups = match.groups()
897
- if match_groups:
898
- match = match_groups[self.file_regex['group']]
899
- val_fmt = self.value.format(match)
900
- vals.append(val_fmt)
901
-
902
- elif self.file_contents:
903
-
904
- path = Path(directory).joinpath(self.file_contents['path'])
905
- with path.open('r') as handle:
906
- for i in handle.readlines():
907
- vals.append(i.strip())
908
-
909
- elif self.data:
910
- for i in self.data:
911
- vals.append(self.value.format(i))
912
-
913
- else:
914
- vals.append(self.value)
915
-
916
- if not vals:
917
- msg = ('Cannot resolve variable value with name: {}')
918
- raise UnresolvedVariableError(msg.format(self.name))
919
-
920
- vals = sorted(vals)
921
-
922
- return vals
923
-
924
-
925
- class Submission(Base):
926
- """Class to represent the submission of (part of) a workflow."""
927
-
928
- __tablename__ = 'submission'
929
-
930
- id_ = Column('id', Integer, primary_key=True)
931
- order_id = Column(Integer)
932
- workflow_id = Column(Integer, ForeignKey('workflow.id'))
933
- submit_time = Column(DateTime)
934
- alt_scratch_dir_name = Column(String(255), nullable=True)
935
-
936
- workflow = relationship('Workflow', back_populates='submissions')
937
- command_group_submissions = relationship(
938
- 'CommandGroupSubmission',
939
- back_populates='submission',
940
- order_by='CommandGroupSubmission.command_group_exec_order',
941
- )
942
-
943
- variable_values = relationship('VarValue', back_populates='submission')
944
-
945
- def __init__(self, workflow, task_range):
946
-
947
- self.submit_time = datetime.now()
948
- self.order_id = len(workflow.submissions)
949
- self.workflow = workflow
950
-
951
- # print('Submission.__init__: task_range: {}'.format(task_range), flush=True)
952
-
953
- self.resolve_variable_values(self.workflow.directory, self.first_iteration)
954
-
955
- cg_subs = []
956
- for i in self.workflow.command_groups:
957
- task_range = [1, -1, 1] # TEMP
958
- cg_sub = CommandGroupSubmission(i, self, task_range)
959
- cg_subs.append(cg_sub)
960
-
961
- session = Session.object_session(self)
962
- session.commit()
963
-
964
- # `SchedulerGroup`s must be generated after `CommandGroupSubmission`s and
965
- # `resolve_variable_values`:
966
- self._scheduler_groups = self.get_scheduler_groups()
967
-
968
- if self.workflow.has_alternate_scratch:
969
- self._make_alternate_scratch_dirs()
970
-
971
- # `Task`s must be generated after `SchedulerGroup`s:
972
- cg_sub_iters = []
973
- for cg_sub in self.command_group_submissions:
974
-
975
- for iteration in self.workflow.iterations:
976
-
977
- if iteration.order_id > 0 and self.workflow.loop.get('groups'):
978
- # For > first iteration, not all command groups need be run:
979
- if cg_sub.command_group_exec_order not in self.workflow.loop['groups']:
980
- continue
981
-
982
- cg_sub_iter = CommandGroupSubmissionIteration(iteration, cg_sub)
983
- cg_sub_iters.append(cg_sub_iter)
984
-
985
- # `cg_sub_iter.num_outputs` requires all cg_sub_iters to be generated:
986
- for cg_sub_iter in cg_sub_iters:
987
- for task_num in range(cg_sub_iter.num_outputs):
988
- Task(cg_sub_iter, task_num)
989
-
990
- self.first_iteration.status = IterationStatus('active')
991
-
992
- @reconstructor
993
- def init_on_load(self):
994
- self._scheduler_groups = self.get_scheduler_groups()
995
-
996
- def _make_alternate_scratch_dirs(self):
997
- 'Create a new directory on each alternate scratch for this submission.'
998
-
999
- alt_scratches = self.workflow.all_alternate_scratch
1000
-
1001
- # Find a suitable alternate scratch directory name for this submission:
1002
- count = 0
1003
- MAX_COUNT = 10
1004
- hex_length = 10
1005
- alt_dirname = get_random_hex(hex_length)
1006
- while True:
1007
- if all([not i.joinpath(alt_dirname).exists() for i in alt_scratches]):
1008
- break
1009
- alt_dirname = get_random_hex(hex_length)
1010
- count += 1
1011
- if count > MAX_COUNT:
1012
- msg = ('Could not find a suitable alternate scratch directory name '
1013
- 'in {} iterations.')
1014
- raise RuntimeError(msg.format(MAX_COUNT))
1015
-
1016
- # Make alternate scratch "root" directories:
1017
- for alt_scratch in alt_scratches:
1018
- alt_scratch_root = alt_scratch.joinpath(alt_dirname)
1019
- alt_scratch_root.mkdir(parents=False, exist_ok=False)
1020
-
1021
- self.alt_scratch_dir_name = alt_dirname
1022
-
1023
- def get_working_directories(self, iteration):
1024
- dirs = []
1025
- for cg_sub in self.command_group_submissions:
1026
- cg_sub_iter = cg_sub.get_command_group_submission_iteration(iteration)
1027
- for i in cg_sub_iter.get_directories():
1028
- if i not in dirs:
1029
- dirs.append(i)
1030
- return dirs
1031
-
1032
- @property
1033
- def first_iteration(self):
1034
- return self.workflow.first_iteration
1035
-
1036
- @property
1037
- def scheduler_groups(self):
1038
- return self._scheduler_groups
1039
-
1040
- def get_scheduler_groups(self):
1041
- 'Get scheduler groups for this workflow submission.'
1042
- return SchedulerGroup.get_scheduler_groups(self)
1043
-
1044
- def get_scheduler_group_index(self, command_group_submission):
1045
- """Get the position of a command group submission within the submission's
1046
- scheduler groups.
1047
-
1048
- Parameters
1049
- ----------
1050
- command_group_submission : CommandGroupSubmission
1051
-
1052
- Returns
1053
- -------
1054
- tuple (int, int)
1055
- First integer identifies which scheduler group. Second integer identifies
1056
- the relative position of the command group within the scheduler group.
1057
-
1058
- """
1059
-
1060
- if command_group_submission not in self.command_group_submissions:
1061
- msg = 'Command group submission {} is not part of the submission.'
1062
- raise ValueError(msg.format(command_group_submission))
1063
-
1064
- for i in self.scheduler_groups:
1065
- if i.has(command_group_submission):
1066
- return (i.order_id, i.index(command_group_submission))
1067
-
1068
- msg = 'Command group submission {} is not part of the scheduler group.'
1069
- raise ValueError(msg.format(command_group_submission))
1070
-
1071
- def get_scheduler_group(self, command_group_submission):
1072
-
1073
- sch_group_idx, _ = self.get_scheduler_group_index(command_group_submission)
1074
- return self.scheduler_groups[sch_group_idx]
1075
-
1076
- def is_variable_resolved(self, variable_definition, iteration, directory_var_val=None):
1077
- """Returns True if the passed variable_definition has been resolved
1078
- for this Submission and iteration."""
1079
- # Check the variable definition is part of the workflow:
1080
- if variable_definition not in self.workflow.variable_definitions:
1081
- msg = ('Passed variable_definition object is not in the '
1082
- ' workflow of this submission.')
1083
- raise ValueError(msg)
1084
-
1085
- for i in self.variable_values:
1086
- if i.variable_definition == variable_definition:
1087
- if i.iteration == iteration:
1088
- if directory_var_val:
1089
- if i.directory_value == directory_var_val:
1090
- return True
1091
- else:
1092
- return True
1093
-
1094
- return False
1095
-
1096
- def resolve_variable_values(self, root_directory, iteration):
1097
- """Attempt to resolve as many variable values in the Workflow as
1098
- possible."""
1099
-
1100
- session = Session.object_session(self)
1101
-
1102
- # Loop through CommandGroupSubmissions in order:
1103
- for i in self.workflow.command_groups:
1104
-
1105
- dir_var = i.directory_variable
1106
-
1107
- # VarValues representing the resolved command group working directories:
1108
- cg_dirs_var_vals = []
1109
- cg_dirs_var_vals_other_val = []
1110
- for j in dir_var.variable_values:
1111
- if j.iteration == iteration:
1112
- cg_dirs_var_vals.append(j)
1113
- else:
1114
- cg_dirs_var_vals_other_val.append(j.value)
1115
-
1116
- if cg_dirs_var_vals:
1117
- pass
1118
- # print(('Submission.resolve_variable_values: found existing resolved '
1119
- # 'directory variables: {}').format(cg_dirs_var_vals), flush=True)
1120
-
1121
- else:
1122
-
1123
- # print(('Submission.resolve_variable_values: trying to resolve directory '
1124
- # 'variable values.'), flush=True)
1125
-
1126
- # Directory variable has not yet been resolved; try:
1127
- try:
1128
- dir_var_vals_dat = dir_var.get_values(root_directory)
1129
- # print(('Submission.resolve_variable_values: found directories with '
1130
- # 'values: {}.'.format(dir_var_vals_dat)), flush=True)
1131
-
1132
- except UnresolvedVariableError:
1133
- # Move on to next command group:
1134
- continue
1135
-
1136
- dir_var_vals_dat_new = [
1137
- j for j in dir_var_vals_dat
1138
- if (j not in cg_dirs_var_vals_other_val or j == '.')
1139
- ]
1140
-
1141
- # print(('Submission.resolve_variable_values: new directories are: '
1142
- # '{}.'.format(dir_var_vals_dat_new)), flush=True)
1143
-
1144
- # Add VarVals:
1145
- for val_idx, val in enumerate(dir_var_vals_dat_new):
1146
- cg_dirs_var_vals.append(
1147
- VarValue(
1148
- value=val,
1149
- order_id=val_idx,
1150
- var_definition=dir_var,
1151
- submission=self,
1152
- iteration=iteration,
1153
- )
1154
- )
1155
-
1156
- var_defns_rec = i.variable_definitions_recursive
1157
-
1158
- # print(('Submission.resolve_variable_values: cg_dirs_var_vals: '
1159
- # '{}.'.format(cg_dirs_var_vals)), flush=True)
1160
-
1161
- for j in cg_dirs_var_vals:
1162
-
1163
- # print(('Submission.resolve_variable_values: dir var val: '
1164
- # '{}.'.format(j)), flush=True)
1165
-
1166
- var_vals_dat = resolve_variable_values(
1167
- var_defns_rec,
1168
- root_directory.joinpath(j.value)
1169
- )
1170
-
1171
- # print(('Submission.resolve_variable_values: var_vals_dat: '
1172
- # '{}.'.format(var_vals_dat)), flush=True)
1173
-
1174
- for k, v in var_vals_dat.items():
1175
-
1176
- # print(('Submission.resolve_variable_values: var_vals_dat k: '
1177
- # '{}; v: {}.'.format(k, v)), flush=True)
1178
-
1179
- vals_dat = v['vals']
1180
- var_defn = self.workflow.get_variable_definition_by_name(k)
1181
-
1182
- # print(('Submission.resolve_variable_values: vals_dat '
1183
- # '{}.'.format(vals_dat)), flush=True)
1184
- # print(('Submission.resolve_variable_values: var_defn '
1185
- # '{}.'.format(var_defn)), flush=True)
1186
-
1187
- if not self.is_variable_resolved(var_defn, iteration, j):
1188
-
1189
- # print(('Submission.resolve_variable_values: {} not resolved...'.format(
1190
- # var_defn)), flush=True)
1191
-
1192
- for val_idx, val in enumerate(vals_dat):
1193
-
1194
- # print(('Submission.resolve_variable_values: val: {}...'.format(
1195
- # val)), flush=True)
1196
-
1197
- VarValue(
1198
- value=val,
1199
- order_id=val_idx,
1200
- var_definition=var_defn,
1201
- submission=self,
1202
- iteration=iteration,
1203
- directory_value=j
1204
- )
1205
- session.commit()
1206
-
1207
- def write_submit_dirs(self, hf_dir):
1208
- """Write the directory structure necessary for this submission."""
1209
-
1210
- # Ensure scheduler output and error directories exist, if specified:
1211
- for cg_sub in self.command_group_submissions:
1212
- root_dir = self.workflow.directory
1213
- out_dir = root_dir.joinpath(cg_sub.command_group.scheduler.output_dir)
1214
- err_dir = root_dir.joinpath(cg_sub.command_group.scheduler.error_dir)
1215
- if not out_dir.is_dir():
1216
- out_dir.mkdir()
1217
- if not err_dir.is_dir():
1218
- err_dir.mkdir()
1219
-
1220
- # Make the workflow directory if it does not exist:
1221
- wf_path = hf_dir.joinpath('workflow_{}'.format(self.workflow_id))
1222
- if not wf_path.exists():
1223
- wf_path.mkdir()
1224
-
1225
- # Make the submit directory:
1226
- submit_path = wf_path.joinpath('submit_{}'.format(self.order_id))
1227
- submit_path.mkdir()
1228
-
1229
- for iteration in self.workflow.iterations:
1230
-
1231
- # Make the iteration directory for each iteration:
1232
- iter_path = submit_path.joinpath('iter_{}'.format(iteration.order_id))
1233
- iter_path.mkdir()
1234
-
1235
- for idx, i in enumerate(self.scheduler_groups):
1236
-
1237
- max_num_tasks = i.get_max_num_tasks(self.first_iteration)
1238
- step_size = i.get_step_size(self.first_iteration)
1239
-
1240
- # Make the scheduler group directory for each scheduler group:
1241
- sg_path = iter_path.joinpath('scheduler_group_{}'.format(idx))
1242
- sg_path.mkdir()
1243
-
1244
- # Loop through cmd groups in this scheduler group:
1245
- for cg_sub_idx, cg_sub in enumerate(i.command_group_submissions):
1246
-
1247
- cg_sub_iter = cg_sub.get_command_group_submission_iteration(iteration)
1248
-
1249
- num_dir_vals = cg_sub_iter.num_directories
1250
- all_dir_slots = [''] * max_num_tasks
1251
-
1252
- # Distribute dirs over num_dir_slots:
1253
- for k in range(0, max_num_tasks, step_size[cg_sub_idx]):
1254
- dir_idx = floor((k / max_num_tasks) * num_dir_vals)
1255
- all_dir_slots[k] = 'REPLACE_WITH_DIR_{}'.format(dir_idx)
1256
-
1257
- wk_dirs_path = iter_path.joinpath('working_dirs_{}{}'.format(
1258
- cg_sub.command_group_exec_order, CONFIG.get('working_dirs_file_ext')))
1259
-
1260
- # Make the working directory template files for each cmd group:
1261
- with wk_dirs_path.open('w') as handle:
1262
- for dir_path in all_dir_slots:
1263
- handle.write('{}\n'.format(dir_path))
1264
-
1265
- # Make the variable values directories for each scheduler group:
1266
- var_values_path = sg_path.joinpath('var_values')
1267
- var_values_path.mkdir()
1268
- for j in range(1, max_num_tasks + 1):
1269
- j_fmt = zeropad(j, max_num_tasks + 1)
1270
- vv_j_path = var_values_path.joinpath(j_fmt)
1271
- vv_j_path.mkdir()
1272
-
1273
- def write_jobscripts(self, hf_dir):
1274
-
1275
- wf_path = hf_dir.joinpath('workflow_{}'.format(self.workflow_id))
1276
- submit_path = wf_path.joinpath('submit_{}'.format(self.order_id))
1277
- js_paths = []
1278
- js_stats_paths = []
1279
- for cg_sub in self.command_group_submissions:
1280
- js_paths_i = cg_sub.write_jobscript(dir_path=submit_path)
1281
- js_paths.append(js_paths_i['jobscript'])
1282
- js_stats_paths.append(js_paths_i['stats_jobscript'])
1283
-
1284
- return js_paths, js_stats_paths
1285
-
1286
- def submit_jobscripts(self, jobscript_paths):
1287
-
1288
- js_paths, js_stat_paths = jobscript_paths
1289
-
1290
- sumbit_cmd = os.getenv('HPCFLOW_QSUB_CMD', 'qsub')
1291
- last_submit_id = None
1292
- for iteration in self.workflow.iterations:
1293
-
1294
- iter_idx_var = 'ITER_IDX={}'.format(iteration.order_id)
1295
-
1296
- for js_path_i, js_stat_path_i, cg_sub in zip(
1297
- js_paths, js_stat_paths, self.command_group_submissions):
1298
-
1299
- cg_sub_iter = cg_sub.get_command_group_submission_iteration(iteration)
1300
- if cg_sub_iter is None:
1301
- continue
1302
-
1303
- qsub_cmd = [sumbit_cmd]
1304
-
1305
- if last_submit_id:
1306
-
1307
- # Add conditional submission:
1308
- if iteration.order_id > 0:
1309
- hold_arg = '-hold_jid'
1310
- elif cg_sub.command_group.nesting == NestingType('hold'):
1311
- hold_arg = '-hold_jid'
1312
- else:
1313
- hold_arg = '-hold_jid_ad'
1314
-
1315
- qsub_cmd += [hold_arg, last_submit_id]
1316
-
1317
- qsub_cmd += ['-v', iter_idx_var]
1318
- qsub_cmd.append(str(js_path_i))
1319
-
1320
- # Submit the jobscript:
1321
- job_id_str = self.submit_jobscript(qsub_cmd, js_path_i, iteration)
1322
- cg_sub_iter.scheduler_job_id = int(job_id_str)
1323
- last_submit_id = job_id_str
1324
-
1325
- # Submit the stats jobscript:
1326
- if js_stat_path_i:
1327
- st_cmd = [sumbit_cmd, '-hold_jid_ad',
1328
- last_submit_id, '-v', iter_idx_var]
1329
- st_cmd.append(str(js_stat_path_i))
1330
-
1331
- job_id_str = self.submit_jobscript(st_cmd, js_stat_path_i, iteration)
1332
- last_submit_id = job_id_str
1333
-
1334
- def submit_jobscript(self, cmd, js_path, iteration):
1335
-
1336
- cwd = str(self.workflow.directory)
1337
- proc = run(cmd, stdout=PIPE, stderr=PIPE, cwd=cwd)
1338
- qsub_out = proc.stdout.decode().strip()
1339
- qsub_err = proc.stderr.decode().strip()
1340
- if qsub_out:
1341
- print(qsub_out, flush=True)
1342
- if qsub_err:
1343
- print(qsub_err, flush=True)
1344
-
1345
- # Extract newly submitted job ID:
1346
- pattern = r'[0-9]+'
1347
- job_id_search = re.search(pattern, qsub_out)
1348
- try:
1349
- job_id_str = job_id_search.group()
1350
- except AttributeError:
1351
- msg = ('Could not retrieve the job ID from the submitted jobscript '
1352
- 'found at {}. No more jobscripts will be submitted.')
1353
- raise ValueError(msg.format(js_path))
1354
-
1355
- return job_id_str
1356
-
1357
- def get_stats(self, jsonable=True, datetime_dicts=False):
1358
- 'Get task statistics for this submission.'
1359
- out = {
1360
- 'submission_id': self.id_,
1361
- 'command_group_submissions': [
1362
- i.get_stats(jsonable=jsonable, datetime_dicts=datetime_dicts)
1363
- for i in self.command_group_submissions]
1364
- }
1365
- return out
1366
-
1367
-
1368
- class CommandGroupSubmission(Base):
1369
- """Class to represent the submission of a single command group."""
1370
-
1371
- __tablename__ = 'command_group_submission'
1372
-
1373
- id_ = Column('id', Integer, primary_key=True)
1374
- command_group_id = Column(Integer, ForeignKey('command_group.id'))
1375
- submission_id = Column(Integer, ForeignKey('submission.id'))
1376
- task_start = Column(Integer)
1377
- task_stop = Column(Integer)
1378
- task_step = Column(Integer)
1379
- commands_written = Column(Boolean)
1380
- # _task_multiplicity = Column('task_multiplicity', Integer, nullable=True)
1381
-
1382
- command_group = relationship('CommandGroup',
1383
- back_populates='command_group_submissions')
1384
-
1385
- submission = relationship('Submission', back_populates='command_group_submissions')
1386
-
1387
- command_group_exec_order = deferred(
1388
- select([CommandGroup.exec_order]).where(
1389
- CommandGroup.id_ == command_group_id))
1390
-
1391
- is_command_writing = relationship(
1392
- 'IsCommandWriting',
1393
- uselist=False,
1394
- cascade='all, delete, delete-orphan'
1395
- )
1396
-
1397
- command_group_submission_iterations = relationship(
1398
- 'CommandGroupSubmissionIteration',
1399
- back_populates='command_group_submission',
1400
- )
1401
-
1402
- def __repr__(self):
1403
- out = (
1404
- '{}('
1405
- 'command_group={!r}, '
1406
- 'submission_id={!r})').format(
1407
- self.__class__.__name__,
1408
- self.command_group,
1409
- self.submission_id,
1410
- )
1411
- return out
1412
-
1413
- def __init__(self, command_group, submission, task_range):
1414
-
1415
- # print('CommandGroupSubmission.__init__: task_range: {}'.format(task_range), flush=True)
1416
-
1417
- self.command_group = command_group
1418
- self.submission = submission
1419
- self.task_range = task_range
1420
-
1421
- @property
1422
- def task_range(self):
1423
- return (self.task_start, self.task_stop, self.task_step)
1424
-
1425
- @task_range.setter
1426
- def task_range(self, task_range):
1427
- self.task_start = task_range[0]
1428
- self.task_stop = task_range[1]
1429
- if len(task_range) == 3:
1430
- self.task_step = task_range[2]
1431
- else:
1432
- self.task_step = 1
1433
-
1434
- @property
1435
- def task_range_idx(self):
1436
- return list(range(*self.task_range))
1437
-
1438
- @property
1439
- def variable_values(self):
1440
-
1441
- var_values = []
1442
- for i in self.command_group.variable_definitions:
1443
- if i.variable_values:
1444
- var_values.append(i)
1445
-
1446
- return var_values
1447
-
1448
- @property
1449
- def num_submitted_tasks(self):
1450
- """Get the number of submitted tasks based on the task range.
1451
-
1452
- Returns
1453
- -------
1454
- num : int
1455
- If the number of tasks is as yet undetermined, `None` is returned.
1456
-
1457
- """
1458
-
1459
- if self.task_stop == -1:
1460
- return None
1461
-
1462
- num = ceil((self.task_stop - (self.task_start - 1)) / self.task_step)
1463
-
1464
- return num
1465
-
1466
- @property
1467
- def scheduler_group_index(self):
1468
- """Get the position of this command group submission within the submission's
1469
- scheduler groups.
1470
-
1471
- Returns
1472
- -------
1473
- tuple (int, int)
1474
- First integer identifies which scheduler group. Second integer identifies
1475
- the relative position of the command group within the scheduler group.
1476
-
1477
- """
1478
- return self.submission.get_scheduler_group_index(self)
1479
-
1480
- @property
1481
- def scheduler_group(self):
1482
- 'Get the scheduler group to which this command group belongs.'
1483
- return self.submission.get_scheduler_group(self)
1484
-
1485
- def get_command_group_submission_iteration(self, iteration):
1486
- for i in iteration.command_group_submission_iterations:
1487
- if i.command_group_submission == self:
1488
- return i
1489
-
1490
- @property
1491
- def alternate_scratch_dir(self):
1492
- if self.command_group.alternate_scratch:
1493
- return self.command_group.alternate_scratch.joinpath(
1494
- self.submission.alt_scratch_dir_name)
1495
- else:
1496
- return None
1497
-
1498
- def get_var_definition_by_name(self, var_name):
1499
- """"""
1500
-
1501
- for i in self.command_group.var_definitions:
1502
- if i.name == var_name:
1503
- return i
1504
-
1505
- def write_jobscript(self, dir_path):
1506
- """Write the jobscript."""
1507
-
1508
- cg_sub_first_iter = self.get_command_group_submission_iteration(
1509
- self.submission.first_iteration)
1510
-
1511
- js_path = self.command_group.scheduler.write_jobscript(
1512
- dir_path=dir_path,
1513
- workflow_directory=self.submission.workflow.directory,
1514
- command_group_order=self.command_group_exec_order,
1515
- max_num_tasks=self.scheduler_group.get_max_num_tasks(
1516
- self.submission.first_iteration),
1517
- task_step_size=cg_sub_first_iter.step_size,
1518
- environment=self.command_group.environment,
1519
- archive=self.command_group.archive is not None,
1520
- alternate_scratch_dir=self.alternate_scratch_dir,
1521
- command_group_submission_id=self.id_,
1522
- name=self.command_group.name,
1523
- )
1524
-
1525
- js_stats_path = None
1526
- if self.command_group.stats:
1527
- js_stats_path = self.command_group.scheduler.write_stats_jobscript(
1528
- dir_path=dir_path,
1529
- workflow_directory=self.submission.workflow.directory,
1530
- command_group_order=self.command_group_exec_order,
1531
- max_num_tasks=self.scheduler_group.get_max_num_tasks(
1532
- self.submission.first_iteration),
1533
- task_step_size=cg_sub_first_iter.step_size,
1534
- command_group_submission_id=self.id_,
1535
- name=self.command_group.stats_name,
1536
- )
1537
-
1538
- out = {
1539
- 'jobscript': js_path,
1540
- 'stats_jobscript': js_stats_path,
1541
- }
1542
-
1543
- return out
1544
-
1545
- def write_runtime_files(self, project, task_idx, iter_idx):
1546
- iteration = self.get_iteration(iter_idx)
1547
- self.queue_write_command_file(project, task_idx, iteration)
1548
- self.write_variable_files(project, task_idx, iteration)
1549
-
1550
- def queue_write_command_file(self, project, task_idx, iteration):
1551
- """Ensure the command file for this command group submission is written, ready
1552
- to be invoked by the jobscript, and also refresh the resolved variable values
1553
- so that when the variable files are written, they are up to date."""
1554
-
1555
- session = Session.object_session(self)
1556
-
1557
- sleep_time = 5
1558
- context = 'CommandGroupSubmission.write_cmd'
1559
- block_msg = ('{{}} {}: Writing command file blocked. Sleeping for {} '
1560
- 'seconds'.format(context, sleep_time))
1561
- unblock_msg = ('{{}} {}: Commands not written and writing available. Writing '
1562
- 'command file.'.format(context))
1563
- written_msg = '{{}} {}: Command files already written.'.format(context)
1564
- refresh_vals_msg = '{{}} {}: Refreshing resolved variable values.'.format(context)
1565
- write_dirs_msg = ('{{}} {}: Writing working directory files for '
1566
- 'iteration {}').format(context, iteration)
1567
- write_as_msg = ('{{}} {}: Writing alternate scratch exclusion list for '
1568
- 'task_idx {}.').format(context, task_idx)
1569
- make_alt_msg = ('{{}} {}: Making alternate scratch working '
1570
- 'directories.'.format(context))
1571
-
1572
- blocked = True
1573
- while blocked:
1574
-
1575
- session.refresh(self)
1576
-
1577
- if self.is_command_writing:
1578
- print(block_msg.format(datetime.now()), flush=True)
1579
- sleep(sleep_time)
1580
-
1581
- else:
1582
- try:
1583
- self.is_command_writing = IsCommandWriting()
1584
- session.commit()
1585
- blocked = False
1586
-
1587
- except IntegrityError:
1588
- # Another process has already set `is_command_writing`
1589
- session.rollback()
1590
- print(block_msg.format(datetime.now()), flush=True)
1591
- sleep(sleep_time)
1592
-
1593
- except OperationalError:
1594
- # Database is likely locked.
1595
- session.rollback()
1596
- print(block_msg.format(datetime.now()), flush=True)
1597
- sleep(sleep_time)
1598
-
1599
- if not blocked:
1600
-
1601
- if iteration.status == IterationStatus('pending'):
1602
- iteration.status = IterationStatus('active')
1603
-
1604
- # This needs to happen once *per task* per CGS:
1605
- print(refresh_vals_msg.format(datetime.now()), flush=True)
1606
- self.submission.resolve_variable_values(project.dir_path, iteration)
1607
-
1608
- # This needs to happen once *per task* per CGS (if it has AS):
1609
- if self.command_group.alternate_scratch:
1610
- print(write_as_msg.format(datetime.now()), flush=True)
1611
- task = self.get_task(task_idx, iteration)
1612
- self.write_alt_scratch_exclusion_list(project, task, iteration)
1613
-
1614
- cg_sub_iter = self.get_command_group_submission_iteration(iteration)
1615
- if not cg_sub_iter.working_dirs_written:
1616
-
1617
- # These need to happen once *per iteration* per CGS:
1618
-
1619
- print(write_dirs_msg.format(datetime.now()), flush=True)
1620
- cg_sub_iter.write_working_directories(project)
1621
-
1622
- if self.command_group.alternate_scratch:
1623
- print(make_alt_msg.format(datetime.now()), flush=True)
1624
- self.make_alternate_scratch_dirs(project, iteration)
1625
-
1626
- cg_sub_iter.working_dirs_written = True
1627
-
1628
- if not self.commands_written:
1629
- # This needs to happen once per CGS:
1630
- print(unblock_msg.format(datetime.now()), flush=True)
1631
- self.write_command_file(project)
1632
- self.commands_written = True
1633
- else:
1634
- print(written_msg.format(datetime.now()), flush=True)
1635
-
1636
- self.is_command_writing = None
1637
- session.commit()
1638
-
1639
- def write_variable_files(self, project, task_idx, iteration):
1640
-
1641
- task = self.get_task(task_idx, iteration)
1642
- var_vals_normed = task.get_variable_values_normed()
1643
-
1644
- print('CGS.write_variable_files: task: {}'.format(task), flush=True)
1645
- print('CGS.write_variable_files: var_vals_normed: {}'.format(
1646
- var_vals_normed), flush=True)
1647
-
1648
- max_num_tasks = self.scheduler_group.get_max_num_tasks(
1649
- self.submission.first_iteration)
1650
-
1651
- var_values_task_dir = project.hf_dir.joinpath(
1652
- 'workflow_{}'.format(self.submission.workflow.id_),
1653
- 'submit_{}'.format(self.submission.order_id),
1654
- 'iter_{}'.format(iteration.order_id),
1655
- 'scheduler_group_{}'.format(self.scheduler_group_index[0]),
1656
- 'var_values',
1657
- zeropad(task.scheduler_id, max_num_tasks),
1658
- )
1659
-
1660
- for var_name, var_val_all in var_vals_normed.items():
1661
- var_fn = 'var_{}{}'.format(var_name, CONFIG.get('variable_file_ext'))
1662
- var_file_path = var_values_task_dir.joinpath(var_fn)
1663
- with var_file_path.open('w') as handle:
1664
- for i in var_val_all:
1665
- handle.write('{}\n'.format(i))
1666
-
1667
- @staticmethod
1668
- def get_formatted_commands(commands, num_cores, parallel_modes, indent=''):
1669
-
1670
- # TODO: what about parallel mode env?
1671
- delims = CONFIG.get('variable_delimiters')
1672
- lns_cmd = []
1673
- for i in commands:
1674
- if 'line' in i:
1675
- cmd_ln = indent
1676
- para_mode = i.get('parallel_mode')
1677
- if para_mode:
1678
- para_mode_config = parallel_modes.get(
1679
- para_mode.lower()) # todo raise on miss
1680
- para_command = para_mode_config.get('command')
1681
- if para_command:
1682
- cmd_ln += para_command.replace('<<num_cores>>', num_cores) + ' '
1683
- line = i['line']
1684
- for var_name in extract_variable_names(line, delims):
1685
- line = line.replace(delims[0] + var_name + delims[1], f'${var_name}')
1686
- cmd_ln += line
1687
- lns_cmd.append(cmd_ln)
1688
- elif 'subshell' in i:
1689
- sub_cmds = CommandGroupSubmission.get_formatted_commands(
1690
- i['subshell'],
1691
- num_cores,
1692
- parallel_modes,
1693
- indent=(indent+'\t'),
1694
- )
1695
- lns_cmd.extend([f'{indent}('] + sub_cmds + [f'{indent})'])
1696
-
1697
- return lns_cmd
1698
-
1699
- def write_command_file(self, project):
1700
-
1701
- lns_cmd = self.get_formatted_commands(
1702
- self.command_group.commands,
1703
- num_cores=self.command_group.scheduler.NUM_CORES_VAR,
1704
- parallel_modes=self.command_group.workflow.parallel_modes,
1705
- indent=('\t' if self.command_group.variable_definitions else ''),
1706
- )
1707
-
1708
- lns_while_start = ['while true', 'do']
1709
- lns_while_end = ['done \\']
1710
-
1711
- dt_stamp = datetime.now().strftime(r'%Y.%m.%d at %H:%M:%S')
1712
- about_msg = ['# --- commands file generated by `hpcflow` (version: {}) '
1713
- 'on {} ---'.format(__version__, dt_stamp)]
1714
-
1715
- max_num_tasks = self.scheduler_group.get_max_num_tasks(
1716
- self.submission.first_iteration)
1717
-
1718
- lns_task_id_pad = [
1719
- 'MAX_NUM_TASKS={}'.format(max_num_tasks),
1720
- 'MAX_NUM_DIGITS="${#MAX_NUM_TASKS}"',
1721
- 'ZEROPAD_TASK_ID=$(printf "%0${MAX_NUM_DIGITS}d" $SGE_TASK_ID)',
1722
- ]
1723
-
1724
- lns_read = []
1725
- lns_fds = []
1726
-
1727
- for idx, i in enumerate(self.command_group.variable_definitions):
1728
-
1729
- fd_idx = idx + 3
1730
-
1731
- var_fn = 'var_{}{}'.format(i.name, CONFIG.get('variable_file_ext'))
1732
- var_file_path = ('$ITER_DIR/scheduler_group_{}/var_values'
1733
- '/$ZEROPAD_TASK_ID/{}').format(
1734
- self.scheduler_group_index[0], var_fn)
1735
-
1736
- lns_read.append('\tread -u{} {} || break'.format(fd_idx, i.name))
1737
-
1738
- if idx > 0:
1739
- lns_fds[-1] += ' \\'
1740
-
1741
- lns_fds.append('\t{}< {}'.format(fd_idx, var_file_path))
1742
-
1743
- lns_cmd_print = ['printf "Running command: \\"{}\\"\\n" >> $LOG_PATH 2>&1'.format(
1744
- i.strip('\t').replace('"', r'\\\\\"')) for i in lns_cmd]
1745
-
1746
- if self.command_group.variable_definitions:
1747
- lns_cmd_print = ['\t{}'.format(i) for i in lns_cmd_print]
1748
- cmd_lns = (about_msg + [''] +
1749
- lns_task_id_pad + [''] +
1750
- lns_while_start + [''] +
1751
- lns_read + [''] +
1752
- lns_cmd_print + [''] +
1753
- lns_cmd + [''] +
1754
- lns_while_end +
1755
- lns_fds + [''])
1756
- else:
1757
- cmd_lns = (about_msg + [''] +
1758
- lns_cmd_print + [''] +
1759
- lns_cmd + [''])
1760
-
1761
- cmd_lns = '\n'.join(cmd_lns)
1762
-
1763
- cmd_path = project.hf_dir.joinpath(
1764
- 'workflow_{}'.format(self.submission.workflow.id_),
1765
- 'submit_{}'.format(self.submission.order_id),
1766
- 'cmd_{}{}'.format(self.command_group_exec_order, CONFIG.get('jobscript_ext')),
1767
- )
1768
- with cmd_path.open('w') as handle:
1769
- handle.write(cmd_lns)
1770
-
1771
- def write_alt_scratch_exclusion_list(self, project, task, iteration):
1772
- 'Write alternate scratch exclusion files (for e.g. rsync)'
1773
-
1774
- # List of Paths to exclude, relative to `self.submission.workflow.directory`:
1775
- excluded_paths = [
1776
- Path(CONFIG.get('hpcflow_directory'))] + self.submission.workflow.profile_files
1777
-
1778
- out_dir = Path(self.command_group.scheduler.output_dir)
1779
- err_dir = Path(self.command_group.scheduler.error_dir)
1780
- if out_dir not in excluded_paths:
1781
- excluded_paths.append(out_dir)
1782
- if err_dir not in excluded_paths:
1783
- excluded_paths.append(err_dir)
1784
-
1785
- working_dir_path = Path(task.get_working_directory_value())
1786
- alt_scratch_exclusions = []
1787
- for exc_path in excluded_paths:
1788
- try:
1789
- exc_path.relative_to(working_dir_path)
1790
- except ValueError:
1791
- continue
1792
- alt_scratch_exclusions.append(exc_path)
1793
-
1794
- exc_list_path = project.hf_dir.joinpath(
1795
- 'workflow_{}'.format(self.submission.workflow.id_),
1796
- 'submit_{}'.format(self.submission.order_id),
1797
- 'iter_{}'.format(iteration.order_id),
1798
- '{}_{}_{}{}'.format(
1799
- CONFIG.get('alt_scratch_exc_file'),
1800
- self.command_group_exec_order,
1801
- task.order_id,
1802
- CONFIG.get('alt_scratch_exc_file_ext'),
1803
- ),
1804
- )
1805
-
1806
- working_dir_abs = self.submission.workflow.directory.joinpath(working_dir_path)
1807
- about = (
1808
- '# Alternate scratch exclusion list. Patterns are relative '
1809
- 'to task #{} working directory:\n'
1810
- '# "{}"\n\n'
1811
- )
1812
- with exc_list_path.open('w') as handle:
1813
- handle.write(about.format(task.order_id, working_dir_abs))
1814
- for exc_path in alt_scratch_exclusions:
1815
- handle.write(str(exc_path) + '\n')
1816
-
1817
- def make_alternate_scratch_dirs(self, project, iteration):
1818
- 'Generate task working directories on the alternate scratch.'
1819
-
1820
- # Get task working directories:
1821
- working_dirs = [task.get_working_directory()
1822
- for task in self.tasks if task.iteration == iteration]
1823
-
1824
- alt_scratch_root = self.command_group.alternate_scratch.joinpath(
1825
- self.submission.alt_scratch_dir_name)
1826
-
1827
- for working_dir in working_dirs:
1828
- if working_dir.value == '.':
1829
- # Already made "root" dir.
1830
- continue
1831
- alt_scratch_w_dir = alt_scratch_root.joinpath(working_dir.value)
1832
- alt_scratch_w_dir.mkdir(parents=True, exist_ok=False)
1833
-
1834
- def get_iteration(self, iter_idx):
1835
- for i in self.submission.workflow.iterations:
1836
- if i.order_id == iter_idx:
1837
- return i
1838
-
1839
- def get_task(self, task_idx, iteration):
1840
- cg_sub_iter = self.get_command_group_submission_iteration(iteration)
1841
- for i in cg_sub_iter.tasks:
1842
- if i.order_id == task_idx and i.iteration == iteration:
1843
- return i
1844
-
1845
- def set_task_start(self, task_idx, iter_idx):
1846
- context = 'CommandGroupSubmission.set_task_start'
1847
- msg = '{{}} {}: Task index {} started.'.format(context, task_idx)
1848
- start_time = datetime.now()
1849
- print(msg.format(start_time), flush=True)
1850
- iteration = self.get_iteration(iter_idx)
1851
- task = self.get_task(task_idx, iteration)
1852
- task.start_time = start_time
1853
- print('task: {}'.format(task))
1854
-
1855
- def set_task_end(self, task_idx, iter_idx):
1856
- context = 'CommandGroupSubmission.set_task_end'
1857
- msg = '{{}} {}: Task index {} ended.'.format(context, task_idx)
1858
- end_time = datetime.now()
1859
- print(msg.format(end_time), flush=True)
1860
- iteration = self.get_iteration(iter_idx)
1861
- task = self.get_task(task_idx, iteration)
1862
- task.end_time = end_time
1863
- print('task: {}'.format(task))
1864
-
1865
- def do_archive(self, task_idx, iter_idx):
1866
- """Archive the working directory associated with a given task in this command
1867
- group submission."""
1868
-
1869
- # Adding a small delay increases the chance that `Task.is_archive_required` will
1870
- # be False (and so save some time overall), in the case where all tasks start at
1871
- # roughly the same time:
1872
- sleep(10)
1873
-
1874
- iteration = self.get_iteration(iter_idx)
1875
- task = self.get_task(task_idx, iteration)
1876
- self.command_group.archive.execute_with_lock(task)
1877
-
1878
- def get_stats(self, jsonable=True, datetime_dicts=False):
1879
- 'Get task statistics for this command group submission.'
1880
- out = {
1881
- 'command_group_submission_id': self.id_,
1882
- 'command_group_id': self.command_group.id_,
1883
- 'commands': self.command_group.commands,
1884
- 'name': self.command_group.name,
1885
- 'tasks': [task.get_stats(jsonable=jsonable, datetime_dicts=datetime_dicts)
1886
- for cgsub_iter in self.command_group_submission_iterations
1887
- for task in cgsub_iter.tasks
1888
- if task.iteration.status != IterationStatus('pending')]
1889
- }
1890
- return out
1891
-
1892
- def get_scheduler_stats(self, task_idx, iter_idx):
1893
-
1894
- # Get scheduler job ID and scheduler task ID:
1895
- iteration = self.get_iteration(iter_idx)
1896
- cg_sub_iter = self.get_command_group_submission_iteration(iteration)
1897
- scheduler_job_id = cg_sub_iter.scheduler_job_id
1898
- task = self.get_task(task_idx, iteration)
1899
- task_id = task.scheduler_id
1900
-
1901
- info = self.command_group.scheduler.get_scheduler_stats(scheduler_job_id, task_id)
1902
-
1903
- if 'MB' in info['maxvmem']:
1904
- maxvmem = float(info['maxvmem'].split('MB')[0])
1905
- elif 'GB' in info['maxvmem']:
1906
- maxvmem = float(info['maxvmem'].split('GB')[0])
1907
- hostname = info['hostname']
1908
- wallclock = int(info['ru_wallclock'].split('s')[0])
1909
-
1910
- task.memory = maxvmem
1911
- task.hostname = hostname
1912
- task.wallclock = wallclock
1913
-
1914
-
1915
- class VarValue(Base):
1916
- """Class to represent the evaluated value of a variable."""
1917
-
1918
- __tablename__ = 'var_value'
1919
-
1920
- id_ = Column('id', Integer, primary_key=True)
1921
- var_definition_id = Column(
1922
- Integer,
1923
- ForeignKey('var_definition.id'),
1924
- )
1925
- submission_id = Column(Integer, ForeignKey('submission.id'))
1926
- value = Column(String(255))
1927
- order_id = Column(Integer)
1928
- directory_value_id = Column('directory_value_id', Integer, ForeignKey('var_value.id'))
1929
- iteration_id = Column(Integer, ForeignKey('iteration.id'))
1930
-
1931
- variable_definition = relationship('VarDefinition', back_populates='variable_values')
1932
- submission = relationship('Submission', back_populates='variable_values')
1933
- directory_value = relationship('VarValue', uselist=False, remote_side=id_)
1934
- iteration = relationship('Iteration', uselist=False)
1935
-
1936
- def __init__(self, value, order_id, var_definition, submission, iteration,
1937
- directory_value=None):
1938
-
1939
- self.value = value
1940
- self.order_id = order_id
1941
- self.iteration = iteration
1942
- self.variable_definition = var_definition
1943
- self.submission = submission
1944
- self.directory_value = directory_value
1945
-
1946
- def __repr__(self):
1947
- out = (
1948
- '{}('
1949
- 'variable_name={}, '
1950
- 'value={}, '
1951
- 'order_id={}, '
1952
- 'iteration={}, '
1953
- 'directory={}'
1954
- ')').format(
1955
- self.__class__.__name__,
1956
- self.variable_definition.name,
1957
- self.value,
1958
- self.order_id,
1959
- self.iteration,
1960
- self.directory_value.value if self.directory_value else None,
1961
- )
1962
- return out
1963
-
1964
-
1965
- class IsCommandWriting(Base):
1966
- """Class to represent active writing of a command file."""
1967
-
1968
- __tablename__ = 'is_command_writing'
1969
-
1970
- command_group_submission_id = Column(
1971
- Integer,
1972
- ForeignKey('command_group_submission.id'),
1973
- primary_key=True,
1974
- unique=True
1975
- )
1976
-
1977
-
1978
- class Task(Base):
1979
- 'Class to represent a single task.'
1980
-
1981
- __tablename__ = 'task'
1982
-
1983
- id_ = Column('id', Integer, primary_key=True)
1984
- order_id = Column(Integer, nullable=False)
1985
- start_time = Column(DateTime)
1986
- end_time = Column(DateTime)
1987
- memory = Column(Float)
1988
- hostname = Column(String(255))
1989
- wallclock = Column(Integer)
1990
- archive_status = Column(Enum(TaskArchiveStatus), nullable=True)
1991
- _archive_start_time = Column('archive_start_time', DateTime, nullable=True)
1992
- _archive_end_time = Column('archive_end_time', DateTime, nullable=True)
1993
- archived_task_id = Column(Integer, ForeignKey('task.id'), nullable=True)
1994
-
1995
- command_group_submission_iteration_id = Column(
1996
- Integer, ForeignKey('command_group_submission_iteration.id'))
1997
-
1998
- command_group_submission_iteration = relationship(
1999
- 'CommandGroupSubmissionIteration', back_populates='tasks', uselist=False)
2000
-
2001
- archived_task = relationship('Task', uselist=False, remote_side=id_)
2002
-
2003
- def __init__(self, command_group_submission_iteration, order_id):
2004
- self.order_id = order_id
2005
- self.command_group_submission_iteration = command_group_submission_iteration
2006
- self.start_time = None
2007
- self.end_time = None
2008
-
2009
- if self.command_group_submission_iteration.command_group_submission.command_group.archive:
2010
- self.archive_status = TaskArchiveStatus('pending')
2011
-
2012
- @property
2013
- def iteration(self):
2014
- return self.command_group_submission_iteration.iteration
2015
-
2016
- def __repr__(self):
2017
- out = (
2018
- '{}('
2019
- 'order_id={}, '
2020
- 'command_group_submission_iteration_id={}, '
2021
- 'start_time={}, '
2022
- 'end_time={}'
2023
- ')').format(
2024
- self.__class__.__name__,
2025
- self.order_id,
2026
- self.command_group_submission_iteration_id,
2027
- self.start_time,
2028
- self.end_time,
2029
- )
2030
- return out
2031
-
2032
- @property
2033
- def duration(self):
2034
- if self.start_time and self.end_time:
2035
- return self.end_time - self.start_time
2036
- else:
2037
- return None
2038
-
2039
- @property
2040
- def scheduler_id(self):
2041
- 'Get the task ID, as understood by the scheduler.'
2042
- num_tasks = self.command_group_submission_iteration.num_outputs
2043
- step_size = self.command_group_submission_iteration.step_size
2044
- scheduler_range = range(1, 1 + (num_tasks * step_size), step_size)
2045
- scheduler_id = scheduler_range[self.order_id]
2046
-
2047
- return scheduler_id
2048
-
2049
- @property
2050
- def archive_start_time(self):
2051
- if self.archived_task:
2052
- # Archive for this task was handled by another task with the same working dir:
2053
- return self.archived_task.archive_start_time
2054
- else:
2055
- return self._archive_start_time
2056
-
2057
- @archive_start_time.setter
2058
- def archive_start_time(self, start_time):
2059
- self._archive_start_time = start_time
2060
-
2061
- @property
2062
- def archive_end_time(self):
2063
- if self.archived_task:
2064
- # Archive for this task was handled by another task with the same working dir:
2065
- return self.archived_task.archive_end_time
2066
- else:
2067
- return self._archive_end_time
2068
-
2069
- @archive_end_time.setter
2070
- def archive_end_time(self, end_time):
2071
- self._archive_end_time = end_time
2072
-
2073
- @property
2074
- def archive_duration(self):
2075
- if self.archive_start_time and self.archive_end_time:
2076
- return self.archive_end_time - self.archive_start_time
2077
- else:
2078
- return None
2079
-
2080
- def get_working_directory(self):
2081
- 'Get the "working directory" of this task.'
2082
- dir_vals = self.command_group_submission_iteration.get_directories()
2083
- dirs_per_task = len(dir_vals) / \
2084
- self.command_group_submission_iteration.num_outputs
2085
- dir_idx = floor(self.order_id * dirs_per_task)
2086
- working_dir = dir_vals[dir_idx]
2087
-
2088
- return working_dir
2089
-
2090
- def get_working_directory_value(self):
2091
- return self.get_working_directory().value
2092
-
2093
- def get_stats(self, jsonable=True, datetime_dicts=False):
2094
- 'Get statistics for this task.'
2095
- out = {
2096
- 'task_id': self.id_,
2097
- 'order_id': self.order_id,
2098
- 'scheduler_id': self.scheduler_id,
2099
- 'start_time': self.start_time,
2100
- 'end_time': self.end_time,
2101
- 'duration': self.duration,
2102
- 'archive_start_time': self.archive_start_time,
2103
- 'archive_end_time': self.archive_end_time,
2104
- 'archive_duration': self.archive_duration,
2105
- 'archived_task_id': self.archived_task_id,
2106
- 'memory': self.memory,
2107
- 'hostname': self.hostname,
2108
- 'wallclock': self.wallclock,
2109
- 'working_directory': self.get_working_directory_value(),
2110
- 'archive_status': self.archive_status,
2111
- 'iteration': self.iteration.order_id,
2112
- }
2113
-
2114
- if datetime_dicts:
2115
- if self.duration:
2116
- out['duration'] = timedelta_to_dict(out['duration'])
2117
- if self.archive_duration:
2118
- out['archive_duration'] = timedelta_to_dict(out['archive_duration'])
2119
- if self.start_time:
2120
- out['start_time'] = datetime_to_dict(out['start_time'])
2121
- if self.end_time:
2122
- out['end_time'] = datetime_to_dict(out['end_time'])
2123
- if self.archive_start_time:
2124
- out['archive_start_time'] = datetime_to_dict(out['archive_start_time'])
2125
- if self.archive_end_time:
2126
- out['archive_end_time'] = datetime_to_dict(out['archive_end_time'])
2127
-
2128
- if jsonable:
2129
-
2130
- if not datetime_dicts:
2131
-
2132
- if self.duration:
2133
- out['duration'] = format_time_delta(out['duration'])
2134
- if self.archive_duration:
2135
- out['archive_duration'] = format_time_delta(out['archive_duration'])
2136
-
2137
- dt_fmt = r'%Y.%m.%d %H:%M:%S'
2138
-
2139
- if self.start_time:
2140
- out['start_time'] = out['start_time'].strftime(dt_fmt)
2141
- if self.end_time:
2142
- out['end_time'] = out['end_time'].strftime(dt_fmt)
2143
- if self.archive_start_time:
2144
- out['archive_start_time'] = out['archive_start_time'].strftime(dt_fmt)
2145
- if self.archive_end_time:
2146
- out['archive_end_time'] = out['archive_end_time'].strftime(dt_fmt)
2147
-
2148
- if self.archive_status:
2149
- out['archive_status'] = self.archive_status.value
2150
-
2151
- return out
2152
-
2153
- def get_same_directory_tasks(self):
2154
- """Get a list of other Tasks within the same command group that share the same
2155
- working directory and iteration."""
2156
- same_dir_tasks = []
2157
- for i in self.command_group_submission_iteration.tasks:
2158
- if i is self:
2159
- continue
2160
- elif i.iteration == self.iteration:
2161
- if i.get_working_directory() is self.get_working_directory():
2162
- same_dir_tasks.append(i)
2163
-
2164
- print('Task.get_same_directory_tasks: same_dir_tasks: {}'.format(same_dir_tasks),
2165
- flush=True)
2166
-
2167
- return same_dir_tasks
2168
-
2169
- def is_archive_required(self):
2170
- """Check if archive of this task is required. It is not required if a different
2171
- task in the same command group submission with the same working directory begun
2172
- its own archive after the commands of this command completed."""
2173
-
2174
- if not self.end_time:
2175
- msg = ('`Task.is_archive_required` should not be called unit the task has '
2176
- 'completed; {} has not completed.'.format(self))
2177
- raise RuntimeError(msg)
2178
-
2179
- for i in self.get_same_directory_tasks():
2180
- print('Checking if other task {} archived started after this task '
2181
- '({}) finished.'.format(i, self), flush=True)
2182
- if i.archive_start_time:
2183
- if i.archive_start_time > self.end_time:
2184
- self.archived_task = i
2185
- return False
2186
-
2187
- return True
2188
-
2189
- def get_variable_values(self):
2190
- """Get the values of variables that are resolved in this task's working
2191
- directory.
2192
-
2193
- Returns
2194
- -------
2195
- var_vals : dict of (str: list of str)
2196
- Keys are the variable definition name and values are list of variable
2197
- values as strings.
2198
-
2199
- """
2200
-
2201
- task_directory = self.get_working_directory()
2202
- cg_sub = self.command_group_submission_iteration.command_group_submission
2203
- sub_var_vals = cg_sub.submission.variable_values
2204
- cmd_group_var_names = cg_sub.command_group.variable_names
2205
- var_vals = {}
2206
-
2207
- print('Task.get_variable_values: sub_var_vals:', flush=True)
2208
- pprint(sub_var_vals)
2209
-
2210
- print('Task.get_variable_values: cmd_group_var_names:', flush=True)
2211
- pprint(cmd_group_var_names)
2212
-
2213
- for i in sub_var_vals:
2214
- if i.directory_value == task_directory:
2215
- var_defn_name = i.variable_definition.name
2216
- if var_defn_name in cmd_group_var_names:
2217
- if var_defn_name in var_vals:
2218
- var_vals[var_defn_name].append(i.value)
2219
- else:
2220
- var_vals.update({var_defn_name: [i.value]})
2221
-
2222
- return var_vals
2223
-
2224
- def get_variable_values_normed(self):
2225
- """Get the values of variables that are resolved in this task's working
2226
- directory, where all variable values have the same, normalised multiplicity.
2227
-
2228
- Returns
2229
- -------
2230
- var_vals_normed : dict of (str: list of str)
2231
- Keys are the variable definition name and values are list of variable
2232
- values as strings. The list of variable values is the same length for
2233
- each variable definition name.
2234
-
2235
- """
2236
-
2237
- var_vals = self.get_variable_values()
2238
- if not var_vals:
2239
- return {}
2240
-
2241
- only_names, only_vals = zip(*var_vals.items())
2242
- only_vals_uniform = coerce_same_length(list(only_vals))
2243
-
2244
- cg_sub = self.command_group_submission_iteration.command_group_submission
2245
- if cg_sub.command_group.is_job_array:
2246
- val_idx = self.order_id % len(only_vals_uniform[0])
2247
- only_vals_uniform = [[i[val_idx]] for i in only_vals_uniform]
2248
-
2249
- var_vals_normed = dict(zip(only_names, only_vals_uniform))
2250
-
2251
- return var_vals_normed
2252
-
2253
-
2254
- class Iteration(Base):
2255
- 'Class to represent a workflow iteration.'
2256
-
2257
- __tablename__ = 'iteration'
2258
-
2259
- id_ = Column('id', Integer, primary_key=True)
2260
- workflow_id = Column(Integer, ForeignKey('workflow.id'))
2261
- order_id = Column(Integer)
2262
- status = Column(Enum(IterationStatus), default=IterationStatus('pending'))
2263
-
2264
- workflow = relationship('Workflow', back_populates='iterations', uselist=False)
2265
- command_group_submission_iterations = relationship(
2266
- 'CommandGroupSubmissionIteration',
2267
- back_populates='iteration',
2268
- )
2269
-
2270
- def __init__(self, order_id):
2271
- self.order_id = order_id
2272
-
2273
- def __repr__(self):
2274
- out = (
2275
- '{}('
2276
- 'workflow_id={}, '
2277
- 'order_id={}'
2278
- ')'
2279
- ).format(
2280
- self.__class__.__name__,
2281
- self.workflow_id,
2282
- self.order_id,
2283
- )
2284
- return out
2285
-
2286
-
2287
- class CommandGroupSubmissionIteration(Base):
2288
-
2289
- __tablename__ = 'command_group_submission_iteration'
2290
-
2291
- id_ = Column('id', Integer, primary_key=True)
2292
- working_dirs_written = Column(Boolean, default=False)
2293
- iteration_id = Column(Integer, ForeignKey('iteration.id'))
2294
- scheduler_job_id = Column(Integer, nullable=True)
2295
- command_group_submission_id = Column(
2296
- Integer, ForeignKey('command_group_submission.id'))
2297
-
2298
- iteration = relationship(
2299
- 'Iteration',
2300
- back_populates='command_group_submission_iterations',
2301
- uselist=False,
2302
- )
2303
- command_group_submission = relationship(
2304
- 'CommandGroupSubmission',
2305
- back_populates='command_group_submission_iterations',
2306
- )
2307
- tasks = relationship('Task', back_populates='command_group_submission_iteration')
2308
-
2309
- def __init__(self, iteration, command_group_submission):
2310
- self.iteration = iteration
2311
- self.command_group_submission = command_group_submission
2312
-
2313
- def __repr__(self):
2314
- out = (
2315
- '{}('
2316
- 'iteration_id={}, '
2317
- 'command_group_submission_id={}, '
2318
- 'scheduler_job_id={}'
2319
- ')'
2320
- ).format(
2321
- self.__class__.__name__,
2322
- self.iteration_id,
2323
- self.command_group_submission_id,
2324
- self.scheduler_job_id,
2325
- )
2326
- return out
2327
-
2328
- def get_directory_values(self):
2329
-
2330
- dir_vals = [i.value for i in self.get_directories()]
2331
- return dir_vals
2332
-
2333
- def get_directories(self):
2334
- """Get the directory variable values associated with this command group
2335
- submission and iteration."""
2336
-
2337
- dir_vars_all = self.command_group_submission.command_group.directory_variable.variable_values
2338
- # Get only those with correct submission and iteration
2339
-
2340
- dirs = []
2341
- for i in dir_vars_all:
2342
- if i.iteration == self.iteration:
2343
- if i.submission == self.command_group_submission.submission:
2344
- dirs.append(i)
2345
-
2346
- # dirs = [i for idx, i in enumerate(dirs) if idx in self.task_range_idx]
2347
-
2348
- return dirs
2349
-
2350
- @property
2351
- def num_directories(self):
2352
- return len(self.get_directories())
2353
-
2354
- def get_task_multiplicity(self):
2355
- """Get the number of tasks associated with this command group submission."""
2356
-
2357
- # TODO: move get_task_multiplicity to CommandGroupSubmissionIteration !
2358
-
2359
- dirs = self.get_directory_values()
2360
-
2361
- sub = self.command_group_submission.submission
2362
-
2363
- var_lengths = {}
2364
- for directory in dirs:
2365
- var_lengths.update({directory: {}})
2366
- for i in self.command_group_submission.command_group.variable_definitions:
2367
- var_lengths_i = i.get_multiplicity(sub) # as a func of dir
2368
- for var_dir, num in var_lengths_i.items():
2369
- if var_dir == directory:
2370
- var_lengths[directory].update({i.name: num})
2371
-
2372
- var_lengths_combined = {}
2373
- for directory, var_nums in var_lengths.items():
2374
- if var_nums:
2375
- uniq_lens = set(var_nums.values())
2376
- num_uniq_lens = len(uniq_lens)
2377
- if num_uniq_lens == 1:
2378
- combined_len = min(uniq_lens)
2379
- elif num_uniq_lens == 2:
2380
- if min(uniq_lens) != 1:
2381
- raise ValueError('bad 4!')
2382
- combined_len = max(uniq_lens)
2383
- else:
2384
- raise ValueError('bad 5!')
2385
- else:
2386
- combined_len = 1
2387
-
2388
- var_lengths_combined.update({directory: combined_len})
2389
-
2390
- return var_lengths_combined
2391
-
2392
- @property
2393
- def num_outputs(self):
2394
- 'Get the number of outputs for this command group submission.'
2395
- return self.command_group_submission.scheduler_group.get_num_outputs(self.iteration)[
2396
- self.command_group_submission.scheduler_group_index[1]]
2397
-
2398
- @property
2399
- def step_size(self):
2400
- 'Get the scheduler step size for this command group submission.'
2401
- return self.command_group_submission.scheduler_group.get_step_size(self.iteration)[
2402
- self.command_group_submission.scheduler_group_index[1]]
2403
-
2404
- @property
2405
- def num_tasks(self):
2406
- return len(self.tasks)
2407
-
2408
- def write_working_directories(self, project):
2409
- 'Replace lines in the working_dirs files with actual directory paths.'
2410
-
2411
- dir_vals = self.get_directories()
2412
-
2413
- cg_sub = self.command_group_submission
2414
-
2415
- wk_dirs_path = project.hf_dir.joinpath(
2416
- 'workflow_{}'.format(cg_sub.submission.workflow.id_),
2417
- 'submit_{}'.format(cg_sub.submission.order_id),
2418
- 'iter_{}'.format(self.iteration.order_id),
2419
- 'working_dirs_{}{}'.format(
2420
- cg_sub.command_group_exec_order, CONFIG.get('working_dirs_file_ext')),
2421
- )
2422
-
2423
- with wk_dirs_path.open() as handle:
2424
- file_lns = handle.readlines()
2425
-
2426
- for idx, i in enumerate(file_lns):
2427
- new_val = i.strip()
2428
- if 'REPLACE_WITH_DIR_' in i:
2429
- dir_idx = int(i.split('REPLACE_WITH_DIR_')[1])
2430
- new_val = dir_vals[dir_idx].value
2431
- file_lns[idx] = new_val
2432
-
2433
- with wk_dirs_path.open('w') as handle:
2434
- for i in file_lns:
2435
- handle.write(i + '\n')
2436
-
2437
-
2438
- class SchedulerGroup(object):
2439
- """Class to represent a collection of consecutive command group submissions that have
2440
- the same scheduler task range."""
2441
-
2442
- def __init__(self, order_id, command_groups_submissions):
2443
-
2444
- self.order_id = order_id
2445
- self.command_group_submissions = command_groups_submissions
2446
-
2447
- def __repr__(self):
2448
- out = ('{}('
2449
- 'order_id={}, '
2450
- 'command_group_submissions={}, '
2451
- ')').format(
2452
- self.__class__.__name__,
2453
- self.order_id,
2454
- self.command_group_submissions,
2455
- )
2456
- return out
2457
-
2458
- def get_max_num_tasks(self, iteration):
2459
- return max(self.get_num_outputs(iteration))
2460
-
2461
- def get_step_size(self, iteration):
2462
- return [int(self.get_max_num_tasks(iteration) / i)
2463
- for i in self.get_num_outputs(iteration)]
2464
-
2465
- def get_num_outputs(self, iteration):
2466
-
2467
- num_outs = 1
2468
- num_outs_prev = num_outs
2469
- num_outs_all = []
2470
-
2471
- # Get num_outputs for all previous cg subs in this scheduler group
2472
- for idx, cg_sub in enumerate(self.command_group_submissions):
2473
-
2474
- # print('SchedulerGroup.get_num_outputs: cg_sub idx: {}'.format(idx), flush=True)
2475
-
2476
- # print('SchedulerGroup.get_num_outputs: cg_sub_iters: ')
2477
- # pprint(cg_sub.command_group_submission_iterations)
2478
-
2479
- cg_sub_iter = None
2480
- for i in cg_sub.command_group_submission_iterations:
2481
- if i.iteration == iteration:
2482
- cg_sub_iter = i
2483
- break
2484
- if not cg_sub_iter:
2485
- raise ValueError('Could not find CommandGroupSubmissionIteration object.')
2486
-
2487
- # Number of outputs depend on task multiplicity, `is_job_array` and `nesting`
2488
- is_job_array = cg_sub.command_group.is_job_array
2489
- nesting = cg_sub.command_group.nesting
2490
-
2491
- # print('SchedulerGroup.get_num_outputs: is_job_array: {}'.format(
2492
- # is_job_array), flush=True)
2493
- # print('SchedulerGroup.get_num_outputs: nesting: {}'.format(nesting), flush=True)
2494
-
2495
- if nesting == NestingType('nest'): # or first_cmd_group:
2496
- num_outs = num_outs_prev
2497
- elif nesting == NestingType('hold'):
2498
- num_outs = 1
2499
- elif nesting is None:
2500
- num_outs = 1
2501
-
2502
- if is_job_array:
2503
- # if nesting in [NestingType('hold'), None]:
2504
- # num_outs *= cg_sub.num_directories
2505
- # print('SchedulerGroup._get_num_outputs: cg_sub.num_directories: {}'.format(
2506
- # cg_sub.num_directories), flush=True)
2507
-
2508
- # cg_sub.task_multiplicity is a dict of directory keys
2509
- num_outs *= sum(cg_sub_iter.get_task_multiplicity().values())
2510
-
2511
- # print('SchedulerGroup.get_num_outputs: cg_sub_iter.task_multiplicity: {}'.format(
2512
- # cg_sub_iter.get_task_multiplicity()), flush=True)
2513
-
2514
- # print('SchedulerGroup.get_num_outputs: num_outs: {}'.format(num_outs), flush=True)
2515
-
2516
- num_outs_all.append(num_outs)
2517
- num_outs_prev = num_outs
2518
-
2519
- # print('SchedulerGroup.get_num_outputs: num_outs_all: {}'.format(
2520
- # num_outs_all), flush=True)
2521
-
2522
- return num_outs_all
2523
-
2524
- def has(self, command_group_submission):
2525
- return command_group_submission in self.command_group_submissions
2526
-
2527
- def index(self, command_group_submission):
2528
- if not self.has(command_group_submission):
2529
- msg = '{} is not in the scheduler group.'
2530
- raise ValueError(msg.format(command_group_submission))
2531
- return self.command_group_submissions.index(command_group_submission)
2532
-
2533
- @classmethod
2534
- def get_scheduler_groups(cls, submission):
2535
- 'Split the command group submissions up into scheduler groups.'
2536
-
2537
- cmd_groups_split = []
2538
- sch_group_idx = 0
2539
-
2540
- for cg_sub in submission.command_group_submissions:
2541
-
2542
- if cg_sub.command_group.nesting == NestingType('hold'):
2543
- sch_group_idx += 1
2544
- if len(cmd_groups_split) == sch_group_idx + 1:
2545
- cmd_groups_split[sch_group_idx].append(cg_sub)
2546
- else:
2547
- cmd_groups_split.append([cg_sub])
2548
-
2549
- return [cls(idx, i) for idx, i in enumerate(cmd_groups_split)]