runzi 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. runzi-0.9.0/PKG-INFO +61 -0
  2. runzi-0.9.0/README.md +31 -0
  3. runzi-0.9.0/pyproject.toml +118 -0
  4. runzi-0.9.0/runzi/__init__.py +1 -0
  5. runzi-0.9.0/runzi/arguments.py +119 -0
  6. runzi-0.9.0/runzi/automation/__init__.py +0 -0
  7. runzi-0.9.0/runzi/automation/azimuthal_rupture_set_builder_task.py +180 -0
  8. runzi-0.9.0/runzi/automation/file_utils.py +178 -0
  9. runzi-0.9.0/runzi/automation/inversion_hazard_report_task.py +272 -0
  10. runzi-0.9.0/runzi/automation/local_config.py +72 -0
  11. runzi-0.9.0/runzi/automation/master.py +20 -0
  12. runzi-0.9.0/runzi/automation/opensha_task_factory.py +236 -0
  13. runzi-0.9.0/runzi/automation/python_task_factory.py +151 -0
  14. runzi-0.9.0/runzi/automation/schedule_tasks.py +50 -0
  15. runzi-0.9.0/runzi/automation/task_config.py +25 -0
  16. runzi-0.9.0/runzi/automation/task_utils.py +52 -0
  17. runzi-0.9.0/runzi/automation/toshi_api/__init__.py +2 -0
  18. runzi-0.9.0/runzi/automation/toshi_api/aggregate_inversion_solution.py +159 -0
  19. runzi-0.9.0/runzi/automation/toshi_api/automation_task.py +159 -0
  20. runzi-0.9.0/runzi/automation/toshi_api/general_task.py +209 -0
  21. runzi-0.9.0/runzi/automation/toshi_api/inversion_solution.py +149 -0
  22. runzi-0.9.0/runzi/automation/toshi_api/inversion_solution_nrml.py +99 -0
  23. runzi-0.9.0/runzi/automation/toshi_api/openquake_hazard/__init__.py +4 -0
  24. runzi-0.9.0/runzi/automation/toshi_api/openquake_hazard/openquake_hazard_config.py +82 -0
  25. runzi-0.9.0/runzi/automation/toshi_api/openquake_hazard/openquake_hazard_solution.py +88 -0
  26. runzi-0.9.0/runzi/automation/toshi_api/openquake_hazard/openquake_hazard_task.py +131 -0
  27. runzi-0.9.0/runzi/automation/toshi_api/scaled_inversion_solution.py +127 -0
  28. runzi-0.9.0/runzi/automation/toshi_api/time_dependent_inversion_solution.py +111 -0
  29. runzi-0.9.0/runzi/automation/toshi_api/toshi_api.py +303 -0
  30. runzi-0.9.0/runzi/automation/worker.py +20 -0
  31. runzi-0.9.0/runzi/aws/__init__.py +1 -0
  32. runzi-0.9.0/runzi/aws/aws.py +208 -0
  33. runzi-0.9.0/runzi/aws/s3_folder_upload.py +89 -0
  34. runzi-0.9.0/runzi/build_tasks.py +99 -0
  35. runzi-0.9.0/runzi/cli/__init__.py +1 -0
  36. runzi-0.9.0/runzi/cli/hazard_cli.py +39 -0
  37. runzi-0.9.0/runzi/cli/inversion_cli.py +40 -0
  38. runzi-0.9.0/runzi/cli/inversion_post_process_cli.py +58 -0
  39. runzi-0.9.0/runzi/cli/reports_cli.py +46 -0
  40. runzi-0.9.0/runzi/cli/runzi_cli.py +16 -0
  41. runzi-0.9.0/runzi/cli/rupture_sets_cli.py +36 -0
  42. runzi-0.9.0/runzi/cli/save_bg_file_archives.py +72 -0
  43. runzi-0.9.0/runzi/cli/save_distseis_mastertable.py +67 -0
  44. runzi-0.9.0/runzi/cli/utils_cli.py +48 -0
  45. runzi-0.9.0/runzi/job_runner.py +133 -0
  46. runzi-0.9.0/runzi/tasks/__init__.py +0 -0
  47. runzi-0.9.0/runzi/tasks/average_solutions/__init__.py +2 -0
  48. runzi-0.9.0/runzi/tasks/average_solutions/average_solutions_runner.py +43 -0
  49. runzi-0.9.0/runzi/tasks/average_solutions/average_solutions_task.py +245 -0
  50. runzi-0.9.0/runzi/tasks/coulomb_rupture_sets/__init__.py +2 -0
  51. runzi-0.9.0/runzi/tasks/coulomb_rupture_sets/coulomb_rupture_set_builder_task.py +297 -0
  52. runzi-0.9.0/runzi/tasks/coulomb_rupture_sets/coulomb_rupture_sets_runner.py +25 -0
  53. runzi-0.9.0/runzi/tasks/get_config.py +25 -0
  54. runzi-0.9.0/runzi/tasks/inversion/__init__.py +4 -0
  55. runzi-0.9.0/runzi/tasks/inversion/crustal_inversion_runner.py +25 -0
  56. runzi-0.9.0/runzi/tasks/inversion/crustal_inversion_solution_task.py +205 -0
  57. runzi-0.9.0/runzi/tasks/inversion/inversion_solution_builder.py +476 -0
  58. runzi-0.9.0/runzi/tasks/inversion/inversion_sub_solution_task.py +156 -0
  59. runzi-0.9.0/runzi/tasks/inversion/subduction_inversion_runner.py +25 -0
  60. runzi-0.9.0/runzi/tasks/inversion/subduction_inversion_solution_task.py +109 -0
  61. runzi-0.9.0/runzi/tasks/inversion_report/__init__.py +2 -0
  62. runzi-0.9.0/runzi/tasks/inversion_report/inversion_diags_report_task.py +159 -0
  63. runzi-0.9.0/runzi/tasks/inversion_report/inversion_report_runner.py +39 -0
  64. runzi-0.9.0/runzi/tasks/oq_hazard/__init__.py +3 -0
  65. runzi-0.9.0/runzi/tasks/oq_hazard/execute_openquake.py +133 -0
  66. runzi-0.9.0/runzi/tasks/oq_hazard/hazard_args.py +257 -0
  67. runzi-0.9.0/runzi/tasks/oq_hazard/oq_disagg_runner.py +134 -0
  68. runzi-0.9.0/runzi/tasks/oq_hazard/oq_disagg_task.py +334 -0
  69. runzi-0.9.0/runzi/tasks/oq_hazard/oq_hazard_runner.py +111 -0
  70. runzi-0.9.0/runzi/tasks/oq_hazard/oq_hazard_task.py +328 -0
  71. runzi-0.9.0/runzi/tasks/oq_opensha_convert/__init__.py +2 -0
  72. runzi-0.9.0/runzi/tasks/oq_opensha_convert/oq_convert_solution_runner.py +30 -0
  73. runzi-0.9.0/runzi/tasks/oq_opensha_convert/oq_opensha_convert_task.py +199 -0
  74. runzi-0.9.0/runzi/tasks/rupset_report/__init__.py +2 -0
  75. runzi-0.9.0/runzi/tasks/rupset_report/ruptset_diags_report_task.py +116 -0
  76. runzi-0.9.0/runzi/tasks/rupset_report/rupture_set_report_runner.py +37 -0
  77. runzi-0.9.0/runzi/tasks/scale_solution/__init__.py +2 -0
  78. runzi-0.9.0/runzi/tasks/scale_solution/scale_solution_runner.py +29 -0
  79. runzi-0.9.0/runzi/tasks/scale_solution/scale_solution_task.py +202 -0
  80. runzi-0.9.0/runzi/tasks/subduction_rupture_sets/__init__.py +2 -0
  81. runzi-0.9.0/runzi/tasks/subduction_rupture_sets/subduction_rupture_set_builder_task.py +210 -0
  82. runzi-0.9.0/runzi/tasks/subduction_rupture_sets/subduction_rupture_sets_runner.py +23 -0
  83. runzi-0.9.0/runzi/tasks/time_dependent_solution/__init__.py +2 -0
  84. runzi-0.9.0/runzi/tasks/time_dependent_solution/time_dependent_solution_runner.py +44 -0
  85. runzi-0.9.0/runzi/tasks/time_dependent_solution/time_dependent_solution_task.py +181 -0
  86. runzi-0.9.0/runzi/tasks/toshi_utils.py +32 -0
  87. runzi-0.9.0/runzi/tasks/utils/__init__.py +2 -0
  88. runzi-0.9.0/runzi/tasks/utils/build_manual_index.py +240 -0
  89. runzi-0.9.0/runzi/tasks/utils/save_file_archive.py +138 -0
  90. runzi-0.9.0/runzi/utils.py +20 -0
runzi-0.9.0/PKG-INFO ADDED
@@ -0,0 +1,61 @@
1
+ Metadata-Version: 2.4
2
+ Name: runzi
3
+ Version: 0.9.0
4
+ Summary: scripting and cli for the NSHM
5
+ License-Expression: GPL-3.0-only
6
+ Author: Chris DiCaprio
7
+ Author-email: christopher.dicaprio@gmail.com
8
+ Requires-Python: >=3.11,<3.12
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Requires-Dist: boto3 (>=1.26.43)
12
+ Requires-Dist: click (>=8.1.7)
13
+ Requires-Dist: gitpython (>=3.1.46)
14
+ Requires-Dist: lxml (>=4.9.2)
15
+ Requires-Dist: nshm-toshi-client (>=1.1.0,<2.0.0)
16
+ Requires-Dist: nzshm-common[geometry] (>=0.8.4)
17
+ Requires-Dist: nzshm-hazlab (>=0.1.1,<0.2.0)
18
+ Requires-Dist: nzshm-model (>=0.13.6)
19
+ Requires-Dist: py4j (==0.10.9.1)
20
+ Requires-Dist: pydantic (>=2.12)
21
+ Requires-Dist: python-dateutil (>=2.8.2)
22
+ Requires-Dist: python-dotenv (>=1.1.0)
23
+ Requires-Dist: pytz (>=2025.1)
24
+ Requires-Dist: solvis (>=1.2.0,<2.0.0)
25
+ Requires-Dist: toshi-hazard-store (>=1.2.3,<2.0.0)
26
+ Requires-Dist: tqdm (>=4.67.3)
27
+ Requires-Dist: typer (>=0.17.4,<0.18.0)
28
+ Description-Content-Type: text/markdown
29
+
30
+ # nzshm-runzi
31
+
32
+
33
+ [![pypi](https://img.shields.io/pypi/v/nzshm-runzi.svg)](https://pypi.org/project/nzshm-runzi/)
34
+ [![python](https://img.shields.io/pypi/pyversions/nzshm-runzi.svg)](https://pypi.org/project/nzshm-runzi/)
35
+ [![Build Status](https://github.com/GNS-Science/nzshm-runzi/actions/workflows/dev.yml/badge.svg)](https://github.com/GNS-Science/nzshm-runzi/actions/workflows/dev.yml)
36
+ [![codecov](https://codecov.io/gh/GNS-Science/nzshm-runzi/branch/main/graphs/badge.svg)](https://codecov.io/github/GNS-Science/nzshm-runzi)
37
+
38
+ * Documentation: <https://GNS-Science.github.io/nzshm-runzi>
39
+ * GitHub: <https://github.com/GNS-Science/nzshm-runzi>
40
+ * PyPI: <https://pypi.org/project/nzshm-runzi/>
41
+ * Free software: GPL-3.0-only
42
+
43
+ Python application for running, scheduling, collecting inputs &amp; outputs of NZSHM jobs on workstations, AWS cloud, and HPC cluster
44
+
45
+ runzi is used by the ESNZ NSHM programme to run OpenSHA style inversions, hazard calculations, and other computational tasks.
46
+
47
+ - Provides a CLI for launching jobs locally or using AWS EC2 services (HPC is currently unsupported after the move from PBS to Slurm).
48
+ - Coordinates with [toshi API](https://github.com/GNS-Science/nshm-toshi-api) and [toshi-hazard-store](https://github.com/GNS-Science/toshi-hazard-store) to lookup and store results and metadata.
49
+
50
+ ## Run
51
+ ```console
52
+ $ runzi [OPTIONS] COMMAND [ARGS]...
53
+ ```
54
+
55
+ ```console
56
+ $ runzi --help
57
+ ```
58
+
59
+
60
+
61
+
runzi-0.9.0/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # nzshm-runzi
2
+
3
+
4
+ [![pypi](https://img.shields.io/pypi/v/nzshm-runzi.svg)](https://pypi.org/project/nzshm-runzi/)
5
+ [![python](https://img.shields.io/pypi/pyversions/nzshm-runzi.svg)](https://pypi.org/project/nzshm-runzi/)
6
+ [![Build Status](https://github.com/GNS-Science/nzshm-runzi/actions/workflows/dev.yml/badge.svg)](https://github.com/GNS-Science/nzshm-runzi/actions/workflows/dev.yml)
7
+ [![codecov](https://codecov.io/gh/GNS-Science/nzshm-runzi/branch/main/graphs/badge.svg)](https://codecov.io/github/GNS-Science/nzshm-runzi)
8
+
9
+ * Documentation: <https://GNS-Science.github.io/nzshm-runzi>
10
+ * GitHub: <https://github.com/GNS-Science/nzshm-runzi>
11
+ * PyPI: <https://pypi.org/project/nzshm-runzi/>
12
+ * Free software: GPL-3.0-only
13
+
14
+ Python application for running, scheduling, collecting inputs &amp; outputs of NZSHM jobs on workstations, AWS cloud, and HPC cluster
15
+
16
+ runzi is used by the ESNZ NSHM programme to run OpenSHA style inversions, hazard calculations, and other computational tasks.
17
+
18
+ - Provides a CLI for launching jobs locally or using AWS EC2 services (HPC is currently unsupported after the move from PBS to Slurm).
19
+ - Coordinates with [toshi API](https://github.com/GNS-Science/nshm-toshi-api) and [toshi-hazard-store](https://github.com/GNS-Science/toshi-hazard-store) to lookup and store results and metadata.
20
+
21
+ ## Run
22
+ ```console
23
+ $ runzi [OPTIONS] COMMAND [ARGS]...
24
+ ```
25
+
26
+ ```console
27
+ $ runzi --help
28
+ ```
29
+
30
+
31
+
@@ -0,0 +1,118 @@
1
+ [project]
2
+ name = "runzi"
3
+ version = "0.9.0"
4
+ readme = "README.md"
5
+ authors = [
6
+ {name = "Chris DiCaprio", email = "christopher.dicaprio@gmail.com"},
7
+ {name = "Chris Chamberlain", email = "chrisbc@artisan.co.nz"},
8
+ {name = "Oakley Jurgens", email = "o.jurgens@gns.cri.nz"},
9
+ ]
10
+ description = "scripting and cli for the NSHM"
11
+ license = "GPL-3.0-only"
12
+ requires-python = ">=3.11,<3.12"
13
+ packages = [
14
+ { include = "runzi" },
15
+ { include = "tests", format = "sdist" },
16
+ ]
17
+ dependencies = [
18
+ "gitpython (>=3.1.46)",
19
+ "py4j (==0.10.9.1)",
20
+ "boto3 (>=1.26.43)",
21
+ "lxml (>=4.9.2)",
22
+ "tqdm (>=4.67.3)",
23
+ "click (>=8.1.7)",
24
+ "python-dateutil (>=2.8.2)",
25
+ "nzshm-common[geometry] (>=0.8.4)",
26
+ "pytz (>=2025.1)",
27
+ "pydantic (>=2.12)",
28
+ "nzshm-model (>=0.13.6)",
29
+ "python-dotenv (>=1.1.0)",
30
+ "toshi-hazard-store (>=1.2.3,<2.0.0)",
31
+ "typer (>=0.17.4,<0.18.0)",
32
+ "solvis (>=1.2.0,<2.0.0)",
33
+ "nshm-toshi-client (>=1.1.0,<2.0.0)",
34
+ "nzshm-hazlab (>=0.1.1,<0.2.0)",
35
+ ]
36
+
37
+
38
+
39
+
40
+ [project.scripts]
41
+ runzi = 'runzi.cli.runzi_cli:app'
42
+
43
+
44
+ [dependency-groups]
45
+ dev = [
46
+ "pytest",
47
+ "flake8",
48
+ "black",
49
+ "isort",
50
+ "bump2version",
51
+ "tox",
52
+ "mypy",
53
+ "pytest-cov",
54
+ "types-requests",
55
+ "types-pytz",
56
+ "types-python-dateutil",
57
+ "pytest-mock",
58
+ "vulture",
59
+ "safety",
60
+ "pip-audit",
61
+ "chardet (<6)",
62
+ ]
63
+
64
+
65
+ doc = [
66
+ "mkdocs",
67
+ "mkdocs-material",
68
+ "mkdocs-include-markdown-plugin",
69
+ "mkdocstrings",
70
+ "mkdocstrings-python",
71
+ "griffe (>=2.0.0,<3.0.0)",
72
+ "griffe-pydantic (>=1.3.1,<2.0.0)",
73
+ "griffe-fieldz (>=0.4.0,<0.5.0)",
74
+ "mkdocs-gen-files (>=0.6.0,<0.7.0)",
75
+ ]
76
+
77
+
78
+
79
+ [tool.isort]
80
+ multi_line_output = 3
81
+ include_trailing_comma = true
82
+ force_grid_wrap = 0
83
+ use_parentheses = true
84
+ ensure_newline_before_comments = true
85
+ line_length = 120
86
+ skip_gitignore = true
87
+
88
+ [tool.black]
89
+ line-length = 120
90
+ skip-string-normalization = true
91
+ target-version = ['py311']
92
+ include = '\.pyi?$'
93
+ exclude = '''
94
+ /(
95
+ \.eggs
96
+ | \.git
97
+ | \.hg
98
+ | \.mypy_cache
99
+ | \.tox
100
+ | \.venv
101
+ | _build
102
+ | buck-out
103
+ | build
104
+ | dist
105
+ | runzi/CONFIG
106
+ )/
107
+ '''
108
+
109
+ [tool.mypy]
110
+ ignore_missing_imports = true
111
+
112
+ [tool.poetry.requires-plugins]
113
+ poetry-plugin-export = ">=1.8"
114
+
115
+ [build-system]
116
+ requires = ["poetry-core"]
117
+ build-backend = "poetry.core.masonry.api"
118
+
@@ -0,0 +1 @@
1
+ __version__ = '0.9.0'
@@ -0,0 +1,119 @@
1
+ import copy
2
+ import json
3
+ from enum import Enum
4
+ from pathlib import Path
5
+ from typing import Any, Generator, Optional, Sequence
6
+
7
+ from pydantic import BaseModel
8
+ from typing_extensions import Self
9
+
10
+ from runzi.aws import BatchEnvironmentSetting
11
+
12
+
13
+ class TaskLanguage(Enum):
14
+ PYTHON = 'python'
15
+ JAVA = 'java'
16
+
17
+
18
+ class SystemArgs(BaseModel):
19
+
20
+ task_language: TaskLanguage
21
+ general_task_id: Optional[str] = None
22
+ task_count: int = 0
23
+ use_api: bool
24
+
25
+ java_threads: Optional[int] = None # only used for pbs mode, which is not supported anymore
26
+ jvm_heap_max: Optional[int] = None
27
+ java_gateway_port: Optional[int] = None
28
+
29
+ ecs_max_job_time_min: int
30
+ ecs_memory: int
31
+ ecs_vcpu: int
32
+ ecs_job_definition: str
33
+ ecs_job_queue: str
34
+ ecs_extra_env: Optional[list[BatchEnvironmentSetting]] = None
35
+
36
+
37
+ class ArgSweeper:
38
+ """Class to hold argument prototype and swept arguments."""
39
+
40
+ def __init__(
41
+ self,
42
+ prototype_args: BaseModel,
43
+ swept_args: dict[str, Sequence[Any]],
44
+ title: str,
45
+ description: str,
46
+ sys_arg_overrides: Optional[dict[str, Any]] = None,
47
+ ):
48
+ """Initialize a SweptArgs instance.
49
+
50
+ Args:
51
+ prototype: The prototype job argument object.
52
+ swept_args: A dictionary of argument names to lists of values to be swept.
53
+ title: The title for the job.
54
+ description: The description for the job.
55
+ sys_arg_overrides: System arguments to override from the default of the JobRunner.
56
+ """
57
+
58
+ self.prototype_args = prototype_args
59
+ self.swept_args = swept_args
60
+ self.title = title
61
+ self.description = description
62
+ self.sys_arg_overrides = sys_arg_overrides or {}
63
+
64
+ @classmethod
65
+ def from_config_file(cls, config_file: Path | str, args_class: type[BaseModel]) -> Self:
66
+ """Create a prototype job argument object and a dict of arguments to be swept.
67
+
68
+ Config files are json format and can optionally contain a "swept_args" object that specifies the names and
69
+ list of values for an argument to take in the jobs to be created. The prototype object is generated from the
70
+ first value from each of the swept arguments. The dict keys are the argument names and values are lists of
71
+ argument values.
72
+
73
+ Args:
74
+ config_file: File-like object or path to configuration file.
75
+ args_class: The type (class) of the configuration/arguments object.
76
+
77
+ Returns:
78
+ A tuple of the prototype config object and a dictionary of arguments to be swept.
79
+ """
80
+
81
+ json_str = Path(config_file).read_text()
82
+ data = json.loads(json_str)
83
+ title = data.pop("title")
84
+ description = data.pop("description")
85
+ swept_args = data.pop("swept_args", {})
86
+ sys_arg_overrides = data.pop("sys_arg_overrides", {})
87
+
88
+ if swept_args:
89
+ for k, v in swept_args.items():
90
+ if k in data:
91
+ raise ValueError(f"Swept argument '{k}' also specified in unswept arguments")
92
+ if not all(isinstance(item, type(v[0])) for item in v):
93
+ raise ValueError(f"All values for swept argument '{k}' must be of the same type")
94
+ data[k] = v[0]
95
+ # we include the base_path context so that any arg_class that needs to
96
+ # resolve absolute paths can (e.g., used by HazardArgs)
97
+ prototype = args_class.model_validate(
98
+ data, extra='forbid', context={"base_path": Path(config_file).parent.resolve()}
99
+ )
100
+
101
+ return cls(prototype, swept_args, title, description, sys_arg_overrides)
102
+
103
+ def get_tasks(self) -> Generator[BaseModel, None, None]:
104
+ """Generate all combinations of swept arguments as job argument objects.
105
+
106
+ Yields:
107
+ Job argument objects for each combination of swept arguments.
108
+ """
109
+ from itertools import product
110
+
111
+ if not self.swept_args:
112
+ yield self.prototype_args
113
+ return
114
+
115
+ prototype_data = self.prototype_args.model_dump()
116
+ for values in product(*self.swept_args.values()):
117
+ update_data = dict(zip(self.swept_args.keys(), values))
118
+ prototype_data_copy = copy.deepcopy(prototype_data)
119
+ yield self.prototype_args.model_validate(prototype_data_copy | update_data)
File without changes
@@ -0,0 +1,180 @@
1
+ import argparse
2
+ import datetime as dt
3
+ import json
4
+ import os
5
+ import platform
6
+ import time
7
+ from pathlib import PurePath
8
+
9
+ import git
10
+ from dateutil.tz import tzutc
11
+ from nshm_toshi_client.general_task import GeneralTask
12
+ from nshm_toshi_client.rupture_generation_task import RuptureGenerationTask
13
+ from nshm_toshi_client.task_relation import TaskRelation
14
+ from py4j.java_gateway import GatewayParameters, JavaGateway
15
+
16
+ API_URL = os.getenv('NZSHM22_TOSHI_API_URL', "http://127.0.0.1:5000/graphql")
17
+ API_KEY = os.getenv('NZSHM22_TOSHI_API_KEY', "")
18
+ S3_URL = os.getenv('NZSHM22_TOSHI_S3_URL', "http://localhost:4569")
19
+
20
+
21
+ class RuptureSetBuilderTask:
22
+ """
23
+ The python client for a RuptureSetBuildTask
24
+ """
25
+
26
+ def __init__(self, job_args):
27
+
28
+ self.use_api = job_args.get('use_api', False)
29
+
30
+ # setup the java gateway binding
31
+ gateway = JavaGateway(gateway_parameters=GatewayParameters(port=job_args['java_gateway_port']))
32
+ app = gateway.entry_point
33
+ self._builder = app.getAzimuthalRuptureSetBuilder()
34
+
35
+ # get the root path for the task local data
36
+ # root_folder = PurePath(os.getcwd())
37
+
38
+ repos = ["opensha", "nshm-nz-opensha"]
39
+ # repo_root = root_folder
40
+ self._output_folder = PurePath(
41
+ job_args.get('working_path')
42
+ ) # .joinpath('tmp').joinpath(dt.datetime.utcnow().isoformat().replace(':','-'))
43
+ # os.mkdir(self._output_folder)
44
+
45
+ # setup the csv (backup) task recorder
46
+ self._writer = None # CSVResultWriter(open(self._output_folder.joinpath('results.csv'), 'w'), repos)
47
+ self._repoheads = get_repo_heads(PurePath(job_args['root_folder']), repos)
48
+
49
+ if self.use_api:
50
+ headers = {"x-api-key": API_KEY}
51
+ self._ruptgen_api = RuptureGenerationTask(
52
+ API_URL, S3_URL, None, with_schema_validation=True, headers=headers
53
+ )
54
+ self._general_api = GeneralTask(API_URL, S3_URL, None, with_schema_validation=True, headers=headers)
55
+ self._task_relation_api = TaskRelation(API_URL, None, with_schema_validation=True, headers=headers)
56
+
57
+ def ruptureSetMetrics(self):
58
+ metrics = {}
59
+ metrics["subsection_count"] = self._builder.getSubSections().size()
60
+ metrics["rupture_count"] = self._builder.getRuptures().size()
61
+ # metrics["possible_cluster_connections"] = conf.getConnectionStrategy().getClusterConnectionCount()
62
+
63
+ # # get info from the configuratiion
64
+ conf = self._builder.getPlausibilityConfig()
65
+ conf_diags = json.loads(conf.toJSON())
66
+ conns = 0
67
+ for cluster in conf_diags['connectionStrategy']['clusters']:
68
+ conns += len(cluster.get('connections', []))
69
+ metrics["cluster_connections"] = conns
70
+
71
+ return metrics
72
+
73
+ def run(self, task_arguments, job_arguments):
74
+
75
+ # print(task_arguments)
76
+ # print(job_arguments)
77
+
78
+ t0 = dt.datetime.utcnow()
79
+
80
+ environment = {
81
+ "host": platform.node(),
82
+ "gitref_opensha": self._repoheads['opensha'],
83
+ "gitref_nshm-nz-opensha": self._repoheads['nshm-nz-opensha'],
84
+ }
85
+
86
+ if self.use_api:
87
+ # create new task in toshi_api
88
+ task_id = self._ruptgen_api.create_task(
89
+ dict(created=dt.datetime.now(tzutc()).isoformat()), arguments=task_arguments, environment=environment
90
+ )
91
+
92
+ # link task tp the parent task
93
+ self._task_relation_api.create_task_relation(job_arguments['general_task_id'], task_id)
94
+ # #link task to the input datafile (*.XML)
95
+ # self._ruptgen_api.link_task_file(task_id, crustal_id, 'READ')
96
+
97
+ else:
98
+ task_id = None
99
+
100
+ # Run the task....
101
+ ta = task_arguments
102
+ # for crustal
103
+ self._builder.setMaxFaultSections(int(ta["max_sections"])).setMaxJumpDistance(
104
+ float(ta["max_jump_distance"])
105
+ ).setPermutationStrategy(ta["connection_strategy"]).setMaxSubSectionLength(
106
+ float(ta["down_dip_width"])
107
+ ).setMinSubSectsPerParent(
108
+ int(ta["min_sub_sects_per_parent"])
109
+ ).setMinSubSections(
110
+ int(ta["min_sub_sections"])
111
+ ).setMaxCumulativeAzimuthChange(
112
+ float(ta["max_cumulative_azimuth"])
113
+ ).setThinningFactor(
114
+ float(ta["thinning_factor"])
115
+ ).setFaultModel(
116
+ ta["fault_model"]
117
+ )
118
+
119
+ # name the output file
120
+ outputfile = self._output_folder.joinpath(self._builder.getDescriptiveName() + ".zip")
121
+ print("building %s started at %s" % (outputfile, dt.datetime.utcnow().isoformat()), end=' ')
122
+
123
+ self._builder.setNumThreads(int(job_arguments["java_threads"])).buildRuptureSet()
124
+
125
+ # capture task metrics
126
+ duration = (dt.datetime.utcnow() - t0).total_seconds()
127
+ metrics = self.ruptureSetMetrics()
128
+
129
+ # write the result
130
+ self._builder.writeRuptureSet(str(outputfile))
131
+
132
+ if self.use_api:
133
+ # record the completed task
134
+ done_args = {
135
+ 'task_id': task_id,
136
+ 'duration': duration,
137
+ 'result': "SUCCESS",
138
+ 'state': "DONE",
139
+ }
140
+ self._ruptgen_api.complete_task(done_args, metrics)
141
+
142
+ # upload the task output
143
+ self._ruptgen_api.upload_task_file(task_id, outputfile, 'WRITE', meta=task_arguments)
144
+
145
+ # and the log files, why not
146
+ java_log_file = self._output_folder.joinpath(f"java_app.{job_arguments['java_gateway_port']}.log")
147
+ self._ruptgen_api.upload_task_file(task_id, java_log_file, 'WRITE')
148
+ pyth_log_file = self._output_folder.joinpath(f"python_script.{job_arguments['java_gateway_port']}.log")
149
+ self._ruptgen_api.upload_task_file(task_id, pyth_log_file, 'WRITE')
150
+
151
+ print("; took %s secs" % (dt.datetime.utcnow() - t0).total_seconds())
152
+
153
+
154
+ def get_repo_heads(rootdir, repos):
155
+ result = {}
156
+ for reponame in repos:
157
+ repo = git.Repo(rootdir.joinpath(reponame))
158
+ headcommit = repo.head.commit
159
+ result[reponame] = headcommit.hexsha
160
+ return result
161
+
162
+
163
+ if __name__ == "__main__":
164
+
165
+ parser = argparse.ArgumentParser()
166
+ parser.add_argument("config")
167
+ args = parser.parse_args()
168
+
169
+ config_file = args.config
170
+ f = open(config_file, 'r', encoding='utf-8')
171
+ config = json.load(f)
172
+
173
+ # maybe the JVM App is a little slow to get listening
174
+ time.sleep(5)
175
+ # Wait for some more time, scaled by taskid to avoid S3 consistency issue
176
+ time.sleep(config['job_arguments']['task_id'] * 5)
177
+
178
+ # print(config)
179
+ task = RuptureSetBuilderTask(config['job_arguments'])
180
+ task.run(**config)
@@ -0,0 +1,178 @@
1
+ #!python3
2
+ """
3
+ helpers for upstream file retrieval
4
+
5
+ """
6
+
7
+ import os
8
+ from pathlib import Path, PurePath
9
+ from typing import TYPE_CHECKING, Any, Generator, Iterable
10
+
11
+ import requests
12
+
13
+ if TYPE_CHECKING:
14
+ from runzi.automation.toshi_api import ToshiApi
15
+
16
+
17
+ def get_output_file_ids(general_task_api, upstream_task_id, file_extension='zip'):
18
+
19
+ api_result = general_task_api.get_subtask_files(upstream_task_id)
20
+ for subtask in api_result['children']['edges']:
21
+
22
+ # get rupture set fault model
23
+ fault_model = ""
24
+ for filenode in subtask['node']['child']['files']['edges']:
25
+ # print("FN:", filenode)
26
+ if filenode['node']['role'] == 'READ' and filenode['node']['file']['file_name'][-3:] == file_extension:
27
+ for kv in filenode['node']['file'].get('meta', []):
28
+ if kv.get('k') == 'fault_model':
29
+ fault_model = kv.get('v')
30
+ break
31
+
32
+ # get rupture set max jump distance
33
+ max_jump_distance = ""
34
+ for filenode in subtask['node']['child']['files']['edges']:
35
+ # print("FN:", filenode)
36
+ if filenode['node']['file'].get('meta', []):
37
+ for kv in filenode['node']['file'].get('meta', []):
38
+ if kv.get('k') == 'max_jump_distance':
39
+ max_jump_distance = kv.get('v')
40
+ break
41
+
42
+ for filenode in subtask['node']['child']['files']['edges']:
43
+ # skip task inputs
44
+ if filenode['node']['role'] == 'READ':
45
+ continue
46
+ if filenode['node']['file']['file_name'][-3:] == file_extension:
47
+ # inversion_meta = dict() ## this relies on order of
48
+ # for kv in filenode['node']['file']['meta']:
49
+ # inversion_meta[kv['k']] = kv['v']
50
+ res = dict(
51
+ id=filenode['node']['file']['id'],
52
+ file_name=filenode['node']['file']['file_name'],
53
+ file_size=filenode['node']['file']['file_size'],
54
+ )
55
+
56
+ if fault_model:
57
+ res['fault_model'] = fault_model
58
+ if max_jump_distance:
59
+ res['max_jump_distance'] = max_jump_distance
60
+ yield res
61
+ # TESTING
62
+ # return
63
+
64
+
65
+ def get_output_file_id(file_api: 'ToshiApi', single_file_id: str) -> Generator[dict[str, Any], None, None]:
66
+
67
+ api_result = file_api.get_file_detail(single_file_id)
68
+ fault_model = ""
69
+ max_jump_distance = ""
70
+
71
+ print("FN:", api_result)
72
+ if api_result['file_name'][-3:] == "zip":
73
+ res = dict(id=api_result['id'], file_name=api_result['file_name'], file_size=api_result['file_size'])
74
+
75
+ if api_result.get('meta'):
76
+ for kv in api_result['meta']:
77
+ if kv.get('k') == 'fault_model':
78
+ fault_model = kv.get('v')
79
+
80
+ for kv in api_result['meta']:
81
+ if kv.get('k') == 'max_jump_distance':
82
+ max_jump_distance = kv.get('v')
83
+
84
+ if fault_model:
85
+ res['fault_model'] = fault_model
86
+ if max_jump_distance:
87
+ res['max_jump_distance'] = max_jump_distance
88
+ yield res # yep yield one
89
+
90
+ return
91
+
92
+
93
+ def get_file_meta(file_api, single_file_id):
94
+
95
+ api_result = file_api.get_file_detail(single_file_id)
96
+ # return api_result.get('meta')
97
+ res = dict()
98
+
99
+ if api_result.get('meta'):
100
+ for kv in api_result['meta']:
101
+ res[kv['k']] = kv['v']
102
+ return res
103
+ else:
104
+ return None
105
+
106
+
107
+ def get_download_info(file_api: 'ToshiApi', file_infos: Iterable[dict[str, Any]]) -> Generator[dict, None, None]:
108
+ """
109
+ [{'id': 'RmlsZToyOS4wRUVjV0E=',
110
+ 'file_name':
111
+ 'RupSet_Cl_FM(CFM_0_3_SANSTVZ)_noInP(T)_slRtP(0.05)_slInL(F)_cfFr(0.75)_cfRN(2)_cfRTh(0.5)_cfRP(0.01)_
112
+ fvJm(T)_jmPTh(0.001)_cmRkTh(360)_mxJmD(15)_plCn(T)_adMnD(6)_adScFr(0)_bi(F)_stGrSp(2)_coFr(0.5).zip',
113
+ 'file_size': 2498443,
114
+ 'short_name': None}]
115
+ """
116
+ # file_info = {}
117
+ for itm in file_infos:
118
+ api_result = file_api.get_file_download_url(itm['id'])
119
+ # print(api_result)
120
+ yield dict(dict(file_url=api_result['file_url']), **itm) # merge the discts
121
+
122
+
123
+ def download_files(
124
+ file_api: 'ToshiApi',
125
+ file_generator: Iterable[dict[str, Any]],
126
+ dest_folder: str,
127
+ id_suffix: bool = False,
128
+ overwrite: bool = False,
129
+ skip_existing: bool = False,
130
+ skip_download: bool = False,
131
+ ) -> dict[str, dict]:
132
+ """
133
+ file_generator = get_output_file_ids(general_api, upstream_task_id) # for files by upstream task ID)
134
+
135
+ or
136
+
137
+ file_generator = get_output_file_id(file_api, file_id) #for file by file ID
138
+ """
139
+ downloads = dict()
140
+
141
+ for info in get_download_info(file_api, file_generator):
142
+
143
+ folder = Path(dest_folder, 'downloads', info['id'])
144
+ folder.mkdir(parents=True, exist_ok=True)
145
+
146
+ # we can skip if file exists and has correct file_size
147
+ file_path: str | PurePath = PurePath(folder, info['file_name'])
148
+
149
+ if id_suffix:
150
+ file_path = str(file_path).replace('.zip', f"_{info['id']}.zip")
151
+
152
+ # shortname = info['short_name'] or info['id']
153
+ if skip_existing and os.path.isfile(file_path):
154
+ print(f"Don't reprocess existing file: {file_path}")
155
+ continue
156
+
157
+ downloads[info['id']] = dict(id=info['id'], filepath=str(file_path), info=info)
158
+
159
+ if not overwrite and os.path.isfile(file_path):
160
+ print(f"Skip DL for existing file: {file_path}")
161
+ continue
162
+
163
+ if skip_download:
164
+ print("Skipping download -> aws mode")
165
+ continue
166
+
167
+ # here we pull the file
168
+ # print(info['file_url'])
169
+ # r0 = requests.head(info['file_url'])
170
+ r1 = requests.get(info['file_url'])
171
+ with open(str(file_path), 'wb') as f:
172
+ f.write(r1.content)
173
+ f.flush()
174
+ print("downloaded input file:", file_path, f)
175
+ if os.path.getsize(file_path) != info['file_size']:
176
+ raise RuntimeError("downloaded file size mismatch")
177
+
178
+ return downloads