databricks-dbt-factory 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,156 @@
1
+ # macos
2
+
3
+ .DS_Store
4
+ *.DS_Store
5
+
6
+ # Byte-compiled / optimized / DLL files
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+
11
+ # C extensions
12
+ *.so
13
+
14
+ # Distribution / packaging
15
+ .Python
16
+ build/
17
+ develop-eggs/
18
+ dist/
19
+ downloads/
20
+ eggs/
21
+ .eggs/
22
+ lib/
23
+ lib64/
24
+ parts/
25
+ sdist/
26
+ var/
27
+ wheels/
28
+ share/python-wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+
34
+ # PyInstaller
35
+ # Usually these files are written by a python script from a template
36
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
37
+ *.manifest
38
+ *.spec
39
+
40
+ # Installer logs
41
+ pip-log.txt
42
+ pip-delete-this-directory.txt
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py,cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+ cover/
58
+
59
+ # Translations
60
+ *.mo
61
+ *.pot
62
+
63
+ *.out
64
+
65
+ # Django stuff:
66
+ *.log
67
+ local_settings.py
68
+ db.sqlite3
69
+ db.sqlite3-journal
70
+
71
+ # Flask stuff:
72
+ instance/
73
+ .webassets-cache
74
+
75
+ # Scrapy stuff:
76
+ .scrapy
77
+
78
+ # Sphinx documentation
79
+ docs/_build/
80
+
81
+ # PyBuilder
82
+ .pybuilder/
83
+ target/
84
+
85
+ # Jupyter Notebook
86
+ .ipynb_checkpoints
87
+
88
+ # IPython
89
+ profile_default/
90
+ ipython_config.py
91
+
92
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
93
+ __pypackages__/
94
+
95
+ # Celery stuff
96
+ celerybeat-schedule
97
+ celerybeat.pid
98
+
99
+ # SageMath parsed files
100
+ *.sage.py
101
+
102
+ # Environments
103
+ .env.admin
104
+ .venv
105
+ .env.*
106
+ env/
107
+ venv/
108
+ ENV/
109
+ env.bak/
110
+ venv.bak/
111
+ .env
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+
131
+ # pytype static type analyzer
132
+ .pytype/
133
+
134
+ # Cython debug symbols
135
+ cython_debug/
136
+
137
+ # PyCharm
138
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
139
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
140
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
141
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
142
+ .idea/
143
+
144
+ # ruff
145
+ .ruff_cache
146
+ /scratch
147
+
148
+ # dev files and scratches
149
+ dev/cleanup.py
150
+
151
+ .databricks
152
+ .vscode
153
+
154
+ .python-version
155
+ .databricks-login.json
156
+ .local-dev
@@ -0,0 +1,9 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024-present mwojtyczka <wojtyczka.marcin@gmail.com>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.3
2
+ Name: databricks-dbt-factory
3
+ Version: 0.0.1
4
+ Summary: Databricks dbt factory library for creating Databricks Job definition where individual models are run as separate tasks.
5
+ Project-URL: Documentation, https://github.com/mwojtyczka/databricks-dbt-factory#readme
6
+ Project-URL: Issues, https://github.com/mwojtyczka/databricks-dbt-factory/issues
7
+ Project-URL: Source, https://github.com/mwojtyczka/databricks-dbt-factory
8
+ Author-email: Marcin Wojtyczka <marcin.wojtyczka@databricks.com>
9
+ License: MIT
10
+ Keywords: Databricks,dbt
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Framework :: Pytest
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: System Administrators
16
+ Classifier: Operating System :: MacOS
17
+ Classifier: Operating System :: Microsoft :: Windows
18
+ Classifier: Programming Language :: Python
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: Implementation :: CPython
23
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
24
+ Classifier: Topic :: Software Development :: Libraries
25
+ Classifier: Topic :: Utilities
26
+ Requires-Python: >=3.10
27
+ Requires-Dist: pyyaml~=6.0.1
28
+ Description-Content-Type: text/markdown
29
+
30
+ Databricks dbt factory
31
+ ===
32
+
33
+ Databricks dbt factory is a simple library to generate Databricks Job tasks based on dbt manifest.
34
+ The tool can overwrite tasks in the existing Databricks job definition (in-place update, or creating new definition).
35
+
36
+ [![PyPI - Version](https://img.shields.io/pypi/v/databricks-dbt-factory.svg)](https://pypi.org/project/databricks-dbt-factory)
37
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/databricks-dbt-factory.svg)](https://pypi.org/project/databricks-dbt-factory)
38
+
39
+ -----
40
+
41
+ **Table of Contents**
42
+
43
+ - [Installation](#installation)
44
+ - [Usage](#usage)
45
+ - [Contribution](#contribution)
46
+ - [License](#license)
47
+
48
+ # Motivation
49
+
50
+ The current integration of dbt with Databricks Workflows treats the entire dbt project as a single execution unit (black box), limiting flexibility and debugging options.
51
+
52
+ This project breaks down each dbt object (seed/snapshot/model/test) into separate Workflow task offering several key benefits:
53
+ * Simplified Troubleshooting: Isolating tasks makes it easier to identify and resolve issues specific to a single model
54
+ * Enhanced Logging and Notifications: Provides more detailed logs and precise error alerts, improving debugging efficiency
55
+ * Better Retriability: Enables retrying only the failed model tasks, saving time and resources compared to rerunning the entire project
56
+ * Seamless Testing: Allows running dbt data tests on tables immediately after a model completes, ensuring faster validation and feedback
57
+
58
+ ### Databricks Workflows run all dbt objects at once:
59
+ ![before](docs/before.png?)
60
+
61
+ ![dbt_task](docs/dbt_task.png?)
62
+
63
+ ### The tool generates workflows where dbt objects are run as individual Databricks Workflow tasks:
64
+ ![after](docs/after.png?)
65
+
66
+ ![workflow](docs/workflow.png?)
67
+
68
+ # Installation
69
+
70
+ ```shell
71
+ pip install databricks-dbt-factory
72
+ ```
73
+
74
+ # Usage
75
+
76
+ Update tasks in the existing Databricks job definition and write the results to `job_definition_new.yaml`:
77
+ ```shell
78
+ databricks_dbt_factory \
79
+ --dbt-manifest-path tests/test_data/manifest.json \
80
+ --input-job-spec-path tests/test_data/job_definition_template.yaml \
81
+ --target-job-spec-path job_definition_new.yaml \
82
+ --source GIT \
83
+ --target dev
84
+ ```
85
+
86
+ **Arguments:**
87
+ - `--new-job-name` (type: str, optional, default: None): Optional job name. If provided, the existing job name in the job spec is updated.
88
+ - `--dbt-manifest-path` (type: str, required): Path to the manifest file.
89
+ - `--input-job-spec-path` (type: str, required): Path to the input job spec file.
90
+ - `--target-job-spec-path` (type: str, required): Path to the target job spec file.
91
+ - `--target` (type: str, required): dbt target to use.
92
+ - `--source` (type: str, optional, default: None): Optional project source. If not provided, WORKSPACE will be used.
93
+ - `--warehouse_id` (type: str, optional, default: None): Optional SQL Warehouse to run dbt models on.
94
+ - `--schema` (type: str, optional, default: None): Optional schema to write to.
95
+ - `--catalog` (type: str, optional, default: None): Optional catalog to write to.
96
+ - `--profiles-directory` (type: str, optional, default: None): Optional (relative) path to the profiles directory.
97
+ - `--project-directory` (type: str, optional, default: None): Optional (relative) path to the project directory.
98
+ - `--environment-key` (type: str, optional, default: Default): Optional (relative) key of an environment.
99
+ - `--extra-dbt-command-options` (type: str, optional, default: ""): Optional additional dbt command options.
100
+ - `--run-tests` (type: bool, optional, default: True): Whether to run data tests after the model. Enabled by default.
101
+ - `--dry-run` (type: bool, optional, default: False): Print generated tasks without updating the job spec file. Disabled by default.
102
+
103
+ You can also check all input arguments by running `databricks_dbt_factory --help`.
104
+
105
+ Demo of the tool can be found [here](https://github.com/mwojtyczka/dbt-demo).
106
+
107
+ # Contribution
108
+
109
+ See contribution guidance [here](CONTRIBUTING.md).
110
+
111
+ # License
112
+
113
+ `databricks-dbt-factory` is distributed under the terms of the [MIT](https://spdx.org/licenses/MIT.html) license.
@@ -0,0 +1,84 @@
1
+ Databricks dbt factory
2
+ ===
3
+
4
+ Databricks dbt factory is a simple library to generate Databricks Job tasks based on dbt manifest.
5
+ The tool can overwrite tasks in the existing Databricks job definition (in-place update, or creating new definition).
6
+
7
+ [![PyPI - Version](https://img.shields.io/pypi/v/databricks-dbt-factory.svg)](https://pypi.org/project/databricks-dbt-factory)
8
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/databricks-dbt-factory.svg)](https://pypi.org/project/databricks-dbt-factory)
9
+
10
+ -----
11
+
12
+ **Table of Contents**
13
+
14
+ - [Installation](#installation)
15
+ - [Usage](#usage)
16
+ - [Contribution](#contribution)
17
+ - [License](#license)
18
+
19
+ # Motivation
20
+
21
+ The current integration of dbt with Databricks Workflows treats the entire dbt project as a single execution unit (black box), limiting flexibility and debugging options.
22
+
23
+ This project breaks down each dbt object (seed/snapshot/model/test) into separate Workflow task offering several key benefits:
24
+ * Simplified Troubleshooting: Isolating tasks makes it easier to identify and resolve issues specific to a single model
25
+ * Enhanced Logging and Notifications: Provides more detailed logs and precise error alerts, improving debugging efficiency
26
+ * Better Retriability: Enables retrying only the failed model tasks, saving time and resources compared to rerunning the entire project
27
+ * Seamless Testing: Allows running dbt data tests on tables immediately after a model completes, ensuring faster validation and feedback
28
+
29
+ ### Databricks Workflows run all dbt objects at once:
30
+ ![before](docs/before.png?)
31
+
32
+ ![dbt_task](docs/dbt_task.png?)
33
+
34
+ ### The tool generates workflows where dbt objects are run as individual Databricks Workflow tasks:
35
+ ![after](docs/after.png?)
36
+
37
+ ![workflow](docs/workflow.png?)
38
+
39
+ # Installation
40
+
41
+ ```shell
42
+ pip install databricks-dbt-factory
43
+ ```
44
+
45
+ # Usage
46
+
47
+ Update tasks in the existing Databricks job definition and write the results to `job_definition_new.yaml`:
48
+ ```shell
49
+ databricks_dbt_factory \
50
+ --dbt-manifest-path tests/test_data/manifest.json \
51
+ --input-job-spec-path tests/test_data/job_definition_template.yaml \
52
+ --target-job-spec-path job_definition_new.yaml \
53
+ --source GIT \
54
+ --target dev
55
+ ```
56
+
57
+ **Arguments:**
58
+ - `--new-job-name` (type: str, optional, default: None): Optional job name. If provided, the existing job name in the job spec is updated.
59
+ - `--dbt-manifest-path` (type: str, required): Path to the manifest file.
60
+ - `--input-job-spec-path` (type: str, required): Path to the input job spec file.
61
+ - `--target-job-spec-path` (type: str, required): Path to the target job spec file.
62
+ - `--target` (type: str, required): dbt target to use.
63
+ - `--source` (type: str, optional, default: None): Optional project source. If not provided, WORKSPACE will be used.
64
+ - `--warehouse_id` (type: str, optional, default: None): Optional SQL Warehouse to run dbt models on.
65
+ - `--schema` (type: str, optional, default: None): Optional schema to write to.
66
+ - `--catalog` (type: str, optional, default: None): Optional catalog to write to.
67
+ - `--profiles-directory` (type: str, optional, default: None): Optional (relative) path to the profiles directory.
68
+ - `--project-directory` (type: str, optional, default: None): Optional (relative) path to the project directory.
69
+ - `--environment-key` (type: str, optional, default: Default): Optional (relative) key of an environment.
70
+ - `--extra-dbt-command-options` (type: str, optional, default: ""): Optional additional dbt command options.
71
+ - `--run-tests` (type: bool, optional, default: True): Whether to run data tests after the model. Enabled by default.
72
+ - `--dry-run` (type: bool, optional, default: False): Print generated tasks without updating the job spec file. Disabled by default.
73
+
74
+ You can also check all input arguments by running `databricks_dbt_factory --help`.
75
+
76
+ Demo of the tool can be found [here](https://github.com/mwojtyczka/dbt-demo).
77
+
78
+ # Contribution
79
+
80
+ See contribution guidance [here](CONTRIBUTING.md).
81
+
82
+ # License
83
+
84
+ `databricks-dbt-factory` is distributed under the terms of the [MIT](https://spdx.org/licenses/MIT.html) license.
File without changes
@@ -0,0 +1,84 @@
1
+ from databricks_dbt_factory import TaskFactory
2
+ from databricks_dbt_factory.SpecsHandler import SpecsHandler
3
+ from databricks_dbt_factory.DbtTask import DbtTask
4
+
5
+
6
+ class DbtFactory:
7
+ """A factory for generating Databricks job definitions from DBT manifests."""
8
+
9
+ def __init__(self, file_handler: SpecsHandler, task_factories: dict[str, TaskFactory]):
10
+ """
11
+ Initializes the DatabricksDbtFactory.
12
+
13
+ Args:
14
+ file_handler (SpecsHandler): An instance of FileHandler to handle file operations.
15
+ task_factories (dict[str, TaskFactory]): A dictionary mapping resource types to their respective TaskFactory.
16
+ """
17
+ self.file_handler = file_handler
18
+ self.task_factories = task_factories
19
+
20
+ def create_tasks_and_update_job_spec(
21
+ self,
22
+ dbt_manifest_path: str,
23
+ input_job_spec_path: str,
24
+ target_job_spec_path: str,
25
+ new_job_name: str | None = None,
26
+ dry_run: bool = False,
27
+ ):
28
+ """
29
+ Generates tasks for Databricks Job from a DBT manifest and updates the existing job definition file
30
+ either in place, or to a new file if target_job_spec_path is provided.
31
+
32
+ Args:
33
+ dbt_manifest_path (str): Path to the DBT manifest file.
34
+ input_job_spec_path (str): Path to the input job specification YAML file.
35
+ target_job_spec_path (str): Path to save the updated job specification file.
36
+ new_job_name (str, optional): The name of the job to update. Defaults to None.
37
+ dry_run (bool, optional): If True, the tasks will be printed to the console instead of writing to a file. Defaults to False.
38
+ """
39
+ manifest = self.file_handler.read_dbt_manifest(dbt_manifest_path)
40
+ tasks = self.create_tasks(manifest)
41
+ if dry_run:
42
+ print(tasks)
43
+ else:
44
+ self.file_handler.replace_tasks_in_job_spec(input_job_spec_path, tasks, target_job_spec_path, new_job_name)
45
+
46
+ def create_tasks(self, dbt_manifest: dict) -> list[dict]:
47
+ """
48
+ Generates tasks for Databricks Job from a DBT manifest.
49
+
50
+ Args:
51
+ dbt_manifest (dict): The DBT manifest content.
52
+
53
+ Returns:
54
+ list[dict]: A list of task dictionaries suitable for the job definition.
55
+ """
56
+ tasks = self._create_tasks(dbt_manifest)
57
+ return [task.to_dict() for task in tasks]
58
+
59
+ def _create_tasks(self, dbt_manifest: dict) -> list[DbtTask]:
60
+ """
61
+ Generates a list of Databricks job tasks based on the DBT manifest.
62
+
63
+ Args:
64
+ dbt_manifest (dict): The DBT manifest content.
65
+
66
+ Returns:
67
+ list[DbtTask]: A list of Task instances.
68
+ """
69
+ dbt_nodes = dbt_manifest.get('nodes', {})
70
+ tasks = []
71
+
72
+ for node_full_name, node_info in dbt_nodes.items():
73
+ resource_type = node_info['resource_type']
74
+ if resource_type not in self.task_factories:
75
+ continue
76
+
77
+ node_name = node_info['name']
78
+ task_key = node_full_name.replace('.', '_') # make sure it can be used as a task key
79
+ factory = self.task_factories[resource_type]
80
+
81
+ task = factory.create_task(node_name, node_info, task_key)
82
+ tasks.append(task)
83
+
84
+ return tasks
@@ -0,0 +1,80 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any
3
+
4
+
5
+ @dataclass(frozen=True)
6
+ class DbtTaskOptions:
7
+ environment_key: str = "Default" # serverless env
8
+ """The key of an environment. It has to be unique within a job."""
9
+
10
+ catalog: str | None = None
11
+ """Optional name of the catalog to use. The value is the top level in the 3-level namespace of
12
+ Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a
13
+ warehouse_id is specified. Requires dbt-databricks >= 1.1.1."""
14
+
15
+ profiles_directory: str | None = None
16
+ """Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is
17
+ specified. If no warehouse_id is specified and this folder is unset, the root directory is used."""
18
+
19
+ project_directory: str | None = None
20
+ """Path to the project directory. Optional for Git sourced tasks, in which case if no value is
21
+ provided, the root of the Git repository is used."""
22
+
23
+ schema: str | None = None
24
+ """Optional schema to write to. This parameter is only used when a warehouse_id is also provided.
25
+ If not provided, the `default` schema is used."""
26
+
27
+ source: str | None = None
28
+ """Optional location type of the project directory. When set to `WORKSPACE`, the project will be
29
+ retrieved from the local Databricks workspace. When set to `GIT`, the project will be retrieved
30
+ from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if
31
+ `git_source` is defined and `WORKSPACE` otherwise.
32
+
33
+ * `WORKSPACE`: Project is located in Databricks workspace. * `GIT`: Project is located in cloud
34
+ Git provider."""
35
+
36
+ warehouse_id: str | None = None
37
+ """ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the
38
+ profile and connection details to dbt. It can be overridden on a per-command basis by using the
39
+ `--profiles-dir` command line argument."""
40
+
41
+
42
+ @dataclass(frozen=True)
43
+ class DbtTask:
44
+ """Represents a dbt task in the Databricks job definition."""
45
+
46
+ task_key: str
47
+ commands: list[str]
48
+ options: DbtTaskOptions
49
+ depends_on: list[str] | None = None
50
+
51
+ def to_dict(self) -> dict:
52
+ """Converts the Task to a dictionary suitable for the job definition."""
53
+ spec: dict[str, Any] = {
54
+ 'task_key': self.task_key,
55
+ 'dbt_task': {
56
+ 'commands': self.commands,
57
+ },
58
+ 'environment_key': self.options.environment_key,
59
+ 'depends_on': [{'task_key': dep} for dep in (self.depends_on or [])],
60
+ }
61
+
62
+ if self.options.source:
63
+ spec['dbt_task']['source'] = self.options.source
64
+
65
+ if self.options.project_directory:
66
+ spec['dbt_task']['project_directory'] = self.options.project_directory
67
+
68
+ if self.options.schema:
69
+ spec['dbt_task']['schema'] = self.options.schema
70
+
71
+ if self.options.warehouse_id: # not required if using "None (Manual) / Serverless"
72
+ spec['dbt_task']['warehouse_id'] = self.options.warehouse_id
73
+
74
+ if self.options.catalog: # catalog can only be specified if warehouse_id is specified
75
+ spec['dbt_task']['catalog'] = self.options.catalog
76
+
77
+ if self.options.profiles_directory: # only if no warehouse_id is specified
78
+ spec['dbt_task']['profiles_directory'] = self.options.profiles_directory
79
+
80
+ return spec
@@ -0,0 +1,68 @@
1
+ import json
2
+ import yaml
3
+
4
+
5
+ class SpecsHandler:
6
+ """Handles reading and writing files for dbt manifests and databricks job definitions."""
7
+
8
+ @staticmethod
9
+ def read_dbt_manifest(path: str) -> dict:
10
+ """
11
+ Reads a JSON manifest file and returns its content as a dictionary.
12
+
13
+ Args:
14
+ path (str): Path to the manifest file.
15
+
16
+ Returns:
17
+ dict: Parsed content of the manifest file.
18
+
19
+ Raises:
20
+ FileNotFoundError: If the file does not exist.
21
+ ValueError: If the file is not a valid manifest file.
22
+ """
23
+ try:
24
+ with open(path, 'r', encoding="utf-8") as file:
25
+ return json.load(file)
26
+ except FileNotFoundError as e:
27
+ raise FileNotFoundError(f"Manifest file not found: {path}. Details: {e}") from e
28
+ except json.JSONDecodeError as e:
29
+ raise ValueError(f"Error parsing JSON from manifest file: {path}. Details: {e}") from e
30
+
31
+ @staticmethod
32
+ def replace_tasks_in_job_spec(
33
+ input_job_spec_path: str,
34
+ new_tasks: list[dict],
35
+ target_job_spec_path: str,
36
+ new_job_name: str | None = None,
37
+ ) -> None:
38
+ """Replace the tasks field in a Databricks job definition YAML file. The first job is only updated.
39
+ Args:
40
+ input_job_spec_path (str): Path to the job definition YAML file.
41
+ new_tasks (dict): New tasks to replace the existing tasks in the job definition file.
42
+ target_job_spec_path (str): Path to save the updated job definition file.
43
+ new_job_name (str, optional): The name of the job to update. Defaults to None.
44
+
45
+ Raises:
46
+ KeyError: If no jobs are found in the provided YAML file.
47
+ """
48
+ with open(input_job_spec_path, 'r', encoding="utf-8") as file:
49
+ job_definition = yaml.safe_load(file)
50
+
51
+ jobs = job_definition.get('resources', {}).get('jobs', {})
52
+
53
+ if jobs is None:
54
+ raise KeyError("No jobs found in the provided YAML file.")
55
+
56
+ # replaces the first job only!
57
+ first_job_key = next(iter(jobs))
58
+ if new_job_name:
59
+ jobs[new_job_name] = jobs.pop(first_job_key)
60
+ first_job_key = new_job_name
61
+
62
+ first_job = jobs[first_job_key]
63
+ if new_job_name:
64
+ first_job['name'] = new_job_name
65
+ first_job['tasks'] = new_tasks # Replace tasks field
66
+
67
+ with open(target_job_spec_path, 'w', encoding="utf-8") as file:
68
+ yaml.dump(job_definition, file, sort_keys=False, width=1000)