apache-airflow-providers-dbt-cloud 3.2.3rc1__tar.gz → 3.3.0rc2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apache-airflow-providers-dbt-cloud-3.2.3rc1/apache_airflow_providers_dbt_cloud.egg-info → apache-airflow-providers-dbt-cloud-3.3.0rc2}/PKG-INFO +13 -11
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/README.rst +9 -8
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/__init__.py +1 -1
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/get_provider_info.py +1 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/hooks/dbt.py +52 -19
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/operators/dbt.py +24 -4
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/sensors/dbt.py +28 -6
- apache-airflow-providers-dbt-cloud-3.3.0rc2/airflow/providers/dbt/cloud/utils/__init__.py +16 -0
- apache-airflow-providers-dbt-cloud-3.3.0rc2/airflow/providers/dbt/cloud/utils/openlineage.py +140 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2/apache_airflow_providers_dbt_cloud.egg-info}/PKG-INFO +13 -11
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/apache_airflow_providers_dbt_cloud.egg-info/SOURCES.txt +2 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/apache_airflow_providers_dbt_cloud.egg-info/requires.txt +3 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/pyproject.toml +7 -5
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/setup.cfg +3 -3
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/setup.py +5 -2
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/LICENSE +0 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/MANIFEST.in +0 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/NOTICE +0 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/hooks/__init__.py +0 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/operators/__init__.py +0 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/sensors/__init__.py +0 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/triggers/__init__.py +0 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/triggers/dbt.py +0 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/apache_airflow_providers_dbt_cloud.egg-info/dependency_links.txt +0 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/apache_airflow_providers_dbt_cloud.egg-info/entry_points.txt +0 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/apache_airflow_providers_dbt_cloud.egg-info/not-zip-safe +0 -0
- {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/apache_airflow_providers_dbt_cloud.egg-info/top_level.txt +0 -0
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-dbt-cloud
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.3.0rc2
|
|
4
4
|
Summary: Provider for Apache Airflow. Implements apache-airflow-providers-dbt-cloud package
|
|
5
5
|
Home-page: https://airflow.apache.org/
|
|
6
6
|
Download-URL: https://archive.apache.org/dist/airflow/providers
|
|
7
7
|
Author: Apache Software Foundation
|
|
8
8
|
Author-email: dev@airflow.apache.org
|
|
9
9
|
License: Apache License 2.0
|
|
10
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.
|
|
11
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.
|
|
10
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/
|
|
11
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/changelog.html
|
|
12
12
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
13
13
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
14
14
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
@@ -30,6 +30,7 @@ Classifier: Topic :: System :: Monitoring
|
|
|
30
30
|
Requires-Python: ~=3.8
|
|
31
31
|
Description-Content-Type: text/x-rst
|
|
32
32
|
Provides-Extra: http
|
|
33
|
+
Provides-Extra: openlineage
|
|
33
34
|
License-File: LICENSE
|
|
34
35
|
License-File: NOTICE
|
|
35
36
|
|
|
@@ -71,7 +72,7 @@ License-File: NOTICE
|
|
|
71
72
|
|
|
72
73
|
Package ``apache-airflow-providers-dbt-cloud``
|
|
73
74
|
|
|
74
|
-
Release: ``3.
|
|
75
|
+
Release: ``3.3.0rc2``
|
|
75
76
|
|
|
76
77
|
|
|
77
78
|
`dbt Cloud <https://www.getdbt.com/product/what-is-dbt/>`__
|
|
@@ -84,7 +85,7 @@ This is a provider package for ``dbt.cloud`` provider. All classes for this prov
|
|
|
84
85
|
are in ``airflow.providers.dbt.cloud`` python package.
|
|
85
86
|
|
|
86
87
|
You can find package information and changelog for the provider
|
|
87
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.
|
|
88
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/>`_.
|
|
88
89
|
|
|
89
90
|
|
|
90
91
|
Installation
|
|
@@ -121,11 +122,12 @@ You can install such cross-provider dependencies when installing from PyPI. For
|
|
|
121
122
|
pip install apache-airflow-providers-dbt-cloud[http]
|
|
122
123
|
|
|
123
124
|
|
|
124
|
-
|
|
125
|
-
Dependent package
|
|
126
|
-
|
|
127
|
-
`apache-airflow-providers-http <https://airflow.apache.org/docs/apache-airflow-providers-http>`_
|
|
128
|
-
|
|
125
|
+
============================================================================================================== ===============
|
|
126
|
+
Dependent package Extra
|
|
127
|
+
============================================================================================================== ===============
|
|
128
|
+
`apache-airflow-providers-http <https://airflow.apache.org/docs/apache-airflow-providers-http>`_ ``http``
|
|
129
|
+
`apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
|
|
130
|
+
============================================================================================================== ===============
|
|
129
131
|
|
|
130
132
|
The changelog for the provider package can be found in the
|
|
131
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.
|
|
133
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/changelog.html>`_.
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
|
|
37
37
|
Package ``apache-airflow-providers-dbt-cloud``
|
|
38
38
|
|
|
39
|
-
Release: ``3.
|
|
39
|
+
Release: ``3.3.0rc2``
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
`dbt Cloud <https://www.getdbt.com/product/what-is-dbt/>`__
|
|
@@ -49,7 +49,7 @@ This is a provider package for ``dbt.cloud`` provider. All classes for this prov
|
|
|
49
49
|
are in ``airflow.providers.dbt.cloud`` python package.
|
|
50
50
|
|
|
51
51
|
You can find package information and changelog for the provider
|
|
52
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.
|
|
52
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/>`_.
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
Installation
|
|
@@ -86,11 +86,12 @@ You can install such cross-provider dependencies when installing from PyPI. For
|
|
|
86
86
|
pip install apache-airflow-providers-dbt-cloud[http]
|
|
87
87
|
|
|
88
88
|
|
|
89
|
-
|
|
90
|
-
Dependent package
|
|
91
|
-
|
|
92
|
-
`apache-airflow-providers-http <https://airflow.apache.org/docs/apache-airflow-providers-http>`_
|
|
93
|
-
|
|
89
|
+
============================================================================================================== ===============
|
|
90
|
+
Dependent package Extra
|
|
91
|
+
============================================================================================================== ===============
|
|
92
|
+
`apache-airflow-providers-http <https://airflow.apache.org/docs/apache-airflow-providers-http>`_ ``http``
|
|
93
|
+
`apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
|
|
94
|
+
============================================================================================================== ===============
|
|
94
95
|
|
|
95
96
|
The changelog for the provider package can be found in the
|
|
96
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.
|
|
97
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/changelog.html>`_.
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
+
import asyncio
|
|
19
20
|
import json
|
|
20
21
|
import time
|
|
21
22
|
from enum import Enum
|
|
@@ -211,12 +212,11 @@ class DbtCloudHook(HttpHook):
|
|
|
211
212
|
async def get_headers_tenants_from_connection(self) -> tuple[dict[str, Any], str]:
|
|
212
213
|
"""Get Headers, tenants from the connection details."""
|
|
213
214
|
headers: dict[str, Any] = {}
|
|
214
|
-
|
|
215
|
-
tenant = self._get_tenant_domain(connection)
|
|
215
|
+
tenant = self._get_tenant_domain(self.connection)
|
|
216
216
|
package_name, provider_version = _get_provider_info()
|
|
217
217
|
headers["User-Agent"] = f"{package_name}-v{provider_version}"
|
|
218
218
|
headers["Content-Type"] = "application/json"
|
|
219
|
-
headers["Authorization"] = f"Token {connection.password}"
|
|
219
|
+
headers["Authorization"] = f"Token {self.connection.password}"
|
|
220
220
|
return headers, tenant
|
|
221
221
|
|
|
222
222
|
@provide_account_id
|
|
@@ -234,13 +234,14 @@ class DbtCloudHook(HttpHook):
|
|
|
234
234
|
endpoint = f"{account_id}/runs/{run_id}/"
|
|
235
235
|
headers, tenant = await self.get_headers_tenants_from_connection()
|
|
236
236
|
url, params = self.get_request_url_params(tenant, endpoint, include_related)
|
|
237
|
-
async with aiohttp.ClientSession(headers=headers) as session
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
237
|
+
async with aiohttp.ClientSession(headers=headers) as session, session.get(
|
|
238
|
+
url, params=params
|
|
239
|
+
) as response:
|
|
240
|
+
try:
|
|
241
|
+
response.raise_for_status()
|
|
242
|
+
return await response.json()
|
|
243
|
+
except ClientResponseError as e:
|
|
244
|
+
raise AirflowException(f"{e.status}:{e.message}")
|
|
244
245
|
|
|
245
246
|
async def get_job_status(
|
|
246
247
|
self, run_id: int, account_id: int | None = None, include_related: list[str] | None = None
|
|
@@ -253,15 +254,10 @@ class DbtCloudHook(HttpHook):
|
|
|
253
254
|
:param include_related: Optional. List of related fields to pull with the run.
|
|
254
255
|
Valid values are "trigger", "job", "repository", and "environment".
|
|
255
256
|
"""
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
)
|
|
261
|
-
job_run_status: int = response["data"]["status"]
|
|
262
|
-
return job_run_status
|
|
263
|
-
except Exception as e:
|
|
264
|
-
raise e
|
|
257
|
+
self.log.info("Getting the status of job run %s.", run_id)
|
|
258
|
+
response = await self.get_job_details(run_id, account_id=account_id, include_related=include_related)
|
|
259
|
+
job_run_status: int = response["data"]["status"]
|
|
260
|
+
return job_run_status
|
|
265
261
|
|
|
266
262
|
@cached_property
|
|
267
263
|
def connection(self) -> Connection:
|
|
@@ -599,6 +595,43 @@ class DbtCloudHook(HttpHook):
|
|
|
599
595
|
endpoint=f"{account_id}/runs/{run_id}/artifacts/{path}", payload={"step": step}
|
|
600
596
|
)
|
|
601
597
|
|
|
598
|
+
@fallback_to_default_account
|
|
599
|
+
async def get_job_run_artifacts_concurrently(
|
|
600
|
+
self,
|
|
601
|
+
run_id: int,
|
|
602
|
+
artifacts: list[str],
|
|
603
|
+
account_id: int | None = None,
|
|
604
|
+
step: int | None = None,
|
|
605
|
+
):
|
|
606
|
+
"""
|
|
607
|
+
Retrieves a list of chosen artifact files generated for a step in completed run of a dbt Cloud job.
|
|
608
|
+
|
|
609
|
+
By default, this returns artifacts from the last step in the run.
|
|
610
|
+
This takes advantage of the asynchronous calls to speed up the retrieval.
|
|
611
|
+
|
|
612
|
+
:param run_id: The ID of a dbt Cloud job run.
|
|
613
|
+
:param step: The index of the Step in the Run to query for artifacts. The first step in the
|
|
614
|
+
run has the index 1. If the step parameter is omitted, artifacts for the last step in the run will
|
|
615
|
+
be returned.
|
|
616
|
+
:param path: The file path related to the artifact file. Paths are rooted at the target/ directory.
|
|
617
|
+
Use "manifest.json", "catalog.json", or "run_results.json" to download dbt-generated artifacts
|
|
618
|
+
for the run.
|
|
619
|
+
:param account_id: Optional. The ID of a dbt Cloud account.
|
|
620
|
+
|
|
621
|
+
:return: The request response.
|
|
622
|
+
"""
|
|
623
|
+
tasks = {
|
|
624
|
+
artifact: sync_to_async(self.get_job_run_artifact)(
|
|
625
|
+
run_id,
|
|
626
|
+
path=artifact,
|
|
627
|
+
account_id=account_id,
|
|
628
|
+
step=step,
|
|
629
|
+
)
|
|
630
|
+
for artifact in artifacts
|
|
631
|
+
}
|
|
632
|
+
results = await asyncio.gather(*tasks.values())
|
|
633
|
+
return {filename: result.json() for filename, result in zip(tasks.keys(), results)}
|
|
634
|
+
|
|
602
635
|
def test_connection(self) -> tuple[bool, str]:
|
|
603
636
|
"""Test dbt Cloud connection."""
|
|
604
637
|
try:
|
|
@@ -19,6 +19,7 @@ from __future__ import annotations
|
|
|
19
19
|
import json
|
|
20
20
|
import time
|
|
21
21
|
import warnings
|
|
22
|
+
from functools import cached_property
|
|
22
23
|
from pathlib import Path
|
|
23
24
|
from typing import TYPE_CHECKING, Any
|
|
24
25
|
|
|
@@ -32,8 +33,10 @@ from airflow.providers.dbt.cloud.hooks.dbt import (
|
|
|
32
33
|
JobRunInfo,
|
|
33
34
|
)
|
|
34
35
|
from airflow.providers.dbt.cloud.triggers.dbt import DbtCloudRunJobTrigger
|
|
36
|
+
from airflow.providers.dbt.cloud.utils.openlineage import generate_openlineage_events_from_dbt_cloud_run
|
|
35
37
|
|
|
36
38
|
if TYPE_CHECKING:
|
|
39
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
37
40
|
from airflow.utils.context import Context
|
|
38
41
|
|
|
39
42
|
|
|
@@ -114,8 +117,7 @@ class DbtCloudRunJobOperator(BaseOperator):
|
|
|
114
117
|
self.timeout = timeout
|
|
115
118
|
self.check_interval = check_interval
|
|
116
119
|
self.additional_run_config = additional_run_config or {}
|
|
117
|
-
self.
|
|
118
|
-
self.run_id: int
|
|
120
|
+
self.run_id: int | None = None
|
|
119
121
|
self.deferrable = deferrable
|
|
120
122
|
|
|
121
123
|
def execute(self, context: Context):
|
|
@@ -124,7 +126,6 @@ class DbtCloudRunJobOperator(BaseOperator):
|
|
|
124
126
|
f"Triggered via Apache Airflow by task {self.task_id!r} in the {self.dag.dag_id} DAG."
|
|
125
127
|
)
|
|
126
128
|
|
|
127
|
-
self.hook = DbtCloudHook(self.dbt_cloud_conn_id)
|
|
128
129
|
trigger_job_response = self.hook.trigger_job_run(
|
|
129
130
|
account_id=self.account_id,
|
|
130
131
|
job_id=self.job_id,
|
|
@@ -134,12 +135,13 @@ class DbtCloudRunJobOperator(BaseOperator):
|
|
|
134
135
|
additional_run_config=self.additional_run_config,
|
|
135
136
|
)
|
|
136
137
|
self.run_id = trigger_job_response.json()["data"]["id"]
|
|
138
|
+
print(self.run_id)
|
|
137
139
|
job_run_url = trigger_job_response.json()["data"]["href"]
|
|
138
140
|
# Push the ``job_run_url`` value to XCom regardless of what happens during execution so that the job
|
|
139
141
|
# run can be monitored via the operator link.
|
|
140
142
|
context["ti"].xcom_push(key="job_run_url", value=job_run_url)
|
|
141
143
|
|
|
142
|
-
if self.wait_for_termination:
|
|
144
|
+
if self.wait_for_termination and isinstance(self.run_id, int):
|
|
143
145
|
if self.deferrable is False:
|
|
144
146
|
self.log.info("Waiting for job run %s to terminate.", str(self.run_id))
|
|
145
147
|
|
|
@@ -196,6 +198,7 @@ class DbtCloudRunJobOperator(BaseOperator):
|
|
|
196
198
|
if event["status"] == "error":
|
|
197
199
|
raise AirflowException(event["message"])
|
|
198
200
|
self.log.info(event["message"])
|
|
201
|
+
self.run_id = event["run_id"]
|
|
199
202
|
return int(event["run_id"])
|
|
200
203
|
|
|
201
204
|
def on_kill(self) -> None:
|
|
@@ -211,6 +214,23 @@ class DbtCloudRunJobOperator(BaseOperator):
|
|
|
211
214
|
):
|
|
212
215
|
self.log.info("Job run %s has been cancelled successfully.", str(self.run_id))
|
|
213
216
|
|
|
217
|
+
@cached_property
|
|
218
|
+
def hook(self):
|
|
219
|
+
"""Returns DBT Cloud hook."""
|
|
220
|
+
return DbtCloudHook(self.dbt_cloud_conn_id)
|
|
221
|
+
|
|
222
|
+
def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage:
|
|
223
|
+
"""
|
|
224
|
+
Implementing _on_complete because job_run needs to be triggered first in execute method.
|
|
225
|
+
|
|
226
|
+
This should send additional events only if operator `wait_for_termination` is set to True.
|
|
227
|
+
"""
|
|
228
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
229
|
+
|
|
230
|
+
if isinstance(self.run_id, int) and self.wait_for_termination is True:
|
|
231
|
+
return generate_openlineage_events_from_dbt_cloud_run(operator=self, task_instance=task_instance)
|
|
232
|
+
return OperatorLineage()
|
|
233
|
+
|
|
214
234
|
|
|
215
235
|
class DbtCloudGetJobRunArtifactOperator(BaseOperator):
|
|
216
236
|
"""
|
|
@@ -18,15 +18,18 @@ from __future__ import annotations
|
|
|
18
18
|
|
|
19
19
|
import time
|
|
20
20
|
import warnings
|
|
21
|
+
from functools import cached_property
|
|
21
22
|
from typing import TYPE_CHECKING, Any
|
|
22
23
|
|
|
23
24
|
from airflow.configuration import conf
|
|
24
|
-
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
|
25
|
+
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
|
|
25
26
|
from airflow.providers.dbt.cloud.hooks.dbt import DbtCloudHook, DbtCloudJobRunException, DbtCloudJobRunStatus
|
|
26
27
|
from airflow.providers.dbt.cloud.triggers.dbt import DbtCloudRunJobTrigger
|
|
28
|
+
from airflow.providers.dbt.cloud.utils.openlineage import generate_openlineage_events_from_dbt_cloud_run
|
|
27
29
|
from airflow.sensors.base import BaseSensorOperator
|
|
28
30
|
|
|
29
31
|
if TYPE_CHECKING:
|
|
32
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
30
33
|
from airflow.utils.context import Context
|
|
31
34
|
|
|
32
35
|
|
|
@@ -78,15 +81,27 @@ class DbtCloudJobRunSensor(BaseSensorOperator):
|
|
|
78
81
|
|
|
79
82
|
self.deferrable = deferrable
|
|
80
83
|
|
|
84
|
+
@cached_property
|
|
85
|
+
def hook(self):
|
|
86
|
+
"""Returns DBT Cloud hook."""
|
|
87
|
+
return DbtCloudHook(self.dbt_cloud_conn_id)
|
|
88
|
+
|
|
81
89
|
def poke(self, context: Context) -> bool:
|
|
82
|
-
|
|
83
|
-
job_run_status = hook.get_job_run_status(run_id=self.run_id, account_id=self.account_id)
|
|
90
|
+
job_run_status = self.hook.get_job_run_status(run_id=self.run_id, account_id=self.account_id)
|
|
84
91
|
|
|
85
92
|
if job_run_status == DbtCloudJobRunStatus.ERROR.value:
|
|
86
|
-
|
|
93
|
+
# TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
|
|
94
|
+
message = f"Job run {self.run_id} has failed."
|
|
95
|
+
if self.soft_fail:
|
|
96
|
+
raise AirflowSkipException(message)
|
|
97
|
+
raise DbtCloudJobRunException(message)
|
|
87
98
|
|
|
88
99
|
if job_run_status == DbtCloudJobRunStatus.CANCELLED.value:
|
|
89
|
-
|
|
100
|
+
# TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
|
|
101
|
+
message = f"Job run {self.run_id} has been cancelled."
|
|
102
|
+
if self.soft_fail:
|
|
103
|
+
raise AirflowSkipException(message)
|
|
104
|
+
raise DbtCloudJobRunException(message)
|
|
90
105
|
|
|
91
106
|
return job_run_status == DbtCloudJobRunStatus.SUCCESS.value
|
|
92
107
|
|
|
@@ -121,10 +136,17 @@ class DbtCloudJobRunSensor(BaseSensorOperator):
|
|
|
121
136
|
execution was successful.
|
|
122
137
|
"""
|
|
123
138
|
if event["status"] in ["error", "cancelled"]:
|
|
124
|
-
|
|
139
|
+
message = f"Error in dbt: {event['message']}"
|
|
140
|
+
if self.soft_fail:
|
|
141
|
+
raise AirflowSkipException(message)
|
|
142
|
+
raise AirflowException()
|
|
125
143
|
self.log.info(event["message"])
|
|
126
144
|
return int(event["run_id"])
|
|
127
145
|
|
|
146
|
+
def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage:
|
|
147
|
+
"""Implementing _on_complete because job_run needs to be triggered first in execute method."""
|
|
148
|
+
return generate_openlineage_events_from_dbt_cloud_run(operator=self, task_instance=task_instance)
|
|
149
|
+
|
|
128
150
|
|
|
129
151
|
class DbtCloudJobRunAsyncSensor(DbtCloudJobRunSensor):
|
|
130
152
|
"""This class is deprecated.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import re
|
|
21
|
+
from contextlib import suppress
|
|
22
|
+
from typing import TYPE_CHECKING
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from airflow.models.taskinstance import TaskInstance
|
|
26
|
+
from airflow.providers.dbt.cloud.operators.dbt import DbtCloudRunJobOperator
|
|
27
|
+
from airflow.providers.dbt.cloud.sensors.dbt import DbtCloudJobRunSensor
|
|
28
|
+
from airflow.providers.openlineage.extractors.base import OperatorLineage
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def generate_openlineage_events_from_dbt_cloud_run(
|
|
32
|
+
operator: DbtCloudRunJobOperator | DbtCloudJobRunSensor, task_instance: TaskInstance
|
|
33
|
+
) -> OperatorLineage:
|
|
34
|
+
"""
|
|
35
|
+
Common method generating OpenLineage events from the DBT Cloud run.
|
|
36
|
+
|
|
37
|
+
This function retrieves information about a DBT Cloud run, including the associated job,
|
|
38
|
+
project, and execution details. It processes the run's artifacts, such as the manifest and run results,
|
|
39
|
+
in parallel for many steps.
|
|
40
|
+
Then it generates and emits OpenLineage events based on the executed DBT tasks.
|
|
41
|
+
|
|
42
|
+
:param operator: Instance of DBT Cloud operator that executed DBT tasks.
|
|
43
|
+
It already should have run_id and dbt cloud hook.
|
|
44
|
+
:param task_instance: Currently executed task instance
|
|
45
|
+
|
|
46
|
+
:return: An empty OperatorLineage object indicating the completion of events generation.
|
|
47
|
+
"""
|
|
48
|
+
from openlineage.common.provider.dbt import DbtCloudArtifactProcessor, ParentRunMetadata
|
|
49
|
+
|
|
50
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
51
|
+
from airflow.providers.openlineage.plugins.adapter import (
|
|
52
|
+
_DAG_NAMESPACE,
|
|
53
|
+
_PRODUCER,
|
|
54
|
+
OpenLineageAdapter,
|
|
55
|
+
)
|
|
56
|
+
from airflow.providers.openlineage.plugins.listener import get_openlineage_listener
|
|
57
|
+
|
|
58
|
+
# if no account_id set this will fallback
|
|
59
|
+
job_run = operator.hook.get_job_run(
|
|
60
|
+
run_id=operator.run_id, account_id=operator.account_id, include_related=["run_steps,job"]
|
|
61
|
+
).json()["data"]
|
|
62
|
+
job = job_run["job"]
|
|
63
|
+
# retrieve account_id from job and use that starting from this line
|
|
64
|
+
account_id = job["account_id"]
|
|
65
|
+
project = operator.hook.get_project(project_id=job["project_id"], account_id=account_id).json()["data"]
|
|
66
|
+
connection = project["connection"]
|
|
67
|
+
execute_steps = job["execute_steps"]
|
|
68
|
+
run_steps = job_run["run_steps"]
|
|
69
|
+
|
|
70
|
+
# filter only dbt invocation steps
|
|
71
|
+
steps = []
|
|
72
|
+
for run_step in run_steps:
|
|
73
|
+
name = run_step["name"]
|
|
74
|
+
if name.startswith("Invoke dbt with `"):
|
|
75
|
+
regex_pattern = "Invoke dbt with `([^`.]*)`"
|
|
76
|
+
m = re.search(regex_pattern, name)
|
|
77
|
+
if m and m.group(1) in execute_steps:
|
|
78
|
+
steps.append(run_step["index"])
|
|
79
|
+
|
|
80
|
+
# catalog is available only if docs are generated
|
|
81
|
+
catalog = None
|
|
82
|
+
with suppress(Exception):
|
|
83
|
+
catalog = operator.hook.get_job_run_artifact(operator.run_id, path="catalog.json").json()["data"]
|
|
84
|
+
|
|
85
|
+
async def get_artifacts_for_steps(steps, artifacts):
|
|
86
|
+
"""Gets artifacts for a list of steps concurrently."""
|
|
87
|
+
tasks = [
|
|
88
|
+
operator.hook.get_job_run_artifacts_concurrently(
|
|
89
|
+
run_id=operator.run_id,
|
|
90
|
+
account_id=account_id,
|
|
91
|
+
step=step,
|
|
92
|
+
artifacts=artifacts,
|
|
93
|
+
)
|
|
94
|
+
for step in steps
|
|
95
|
+
]
|
|
96
|
+
return await asyncio.gather(*tasks)
|
|
97
|
+
|
|
98
|
+
# get artifacts for steps concurrently
|
|
99
|
+
step_artifacts = asyncio.run(
|
|
100
|
+
get_artifacts_for_steps(steps=steps, artifacts=["manifest.json", "run_results.json"])
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# process each step in loop, sending generated events in the same order as steps
|
|
104
|
+
for artifacts in step_artifacts:
|
|
105
|
+
# process manifest
|
|
106
|
+
manifest = artifacts["manifest.json"]
|
|
107
|
+
|
|
108
|
+
if not artifacts.get("run_results.json", None):
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
processor = DbtCloudArtifactProcessor(
|
|
112
|
+
producer=_PRODUCER,
|
|
113
|
+
job_namespace=_DAG_NAMESPACE,
|
|
114
|
+
skip_errors=False,
|
|
115
|
+
logger=operator.log,
|
|
116
|
+
manifest=manifest,
|
|
117
|
+
run_result=artifacts["run_results.json"],
|
|
118
|
+
profile=connection,
|
|
119
|
+
catalog=catalog,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# generate same run id of current task instance
|
|
123
|
+
parent_run_id = OpenLineageAdapter.build_task_instance_run_id(
|
|
124
|
+
operator.task_id, task_instance.execution_date, task_instance.try_number - 1
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
parent_job = ParentRunMetadata(
|
|
128
|
+
run_id=parent_run_id,
|
|
129
|
+
job_name=f"{task_instance.dag_id}.{task_instance.task_id}",
|
|
130
|
+
job_namespace=_DAG_NAMESPACE,
|
|
131
|
+
)
|
|
132
|
+
processor.dbt_run_metadata = parent_job
|
|
133
|
+
|
|
134
|
+
events = processor.parse().events()
|
|
135
|
+
|
|
136
|
+
client = get_openlineage_listener().adapter.get_or_create_openlineage_client()
|
|
137
|
+
|
|
138
|
+
for event in events:
|
|
139
|
+
client.emit(event=event)
|
|
140
|
+
return OperatorLineage()
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-dbt-cloud
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.3.0rc2
|
|
4
4
|
Summary: Provider for Apache Airflow. Implements apache-airflow-providers-dbt-cloud package
|
|
5
5
|
Home-page: https://airflow.apache.org/
|
|
6
6
|
Download-URL: https://archive.apache.org/dist/airflow/providers
|
|
7
7
|
Author: Apache Software Foundation
|
|
8
8
|
Author-email: dev@airflow.apache.org
|
|
9
9
|
License: Apache License 2.0
|
|
10
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.
|
|
11
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.
|
|
10
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/
|
|
11
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/changelog.html
|
|
12
12
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
13
13
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
14
14
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
@@ -30,6 +30,7 @@ Classifier: Topic :: System :: Monitoring
|
|
|
30
30
|
Requires-Python: ~=3.8
|
|
31
31
|
Description-Content-Type: text/x-rst
|
|
32
32
|
Provides-Extra: http
|
|
33
|
+
Provides-Extra: openlineage
|
|
33
34
|
License-File: LICENSE
|
|
34
35
|
License-File: NOTICE
|
|
35
36
|
|
|
@@ -71,7 +72,7 @@ License-File: NOTICE
|
|
|
71
72
|
|
|
72
73
|
Package ``apache-airflow-providers-dbt-cloud``
|
|
73
74
|
|
|
74
|
-
Release: ``3.
|
|
75
|
+
Release: ``3.3.0rc2``
|
|
75
76
|
|
|
76
77
|
|
|
77
78
|
`dbt Cloud <https://www.getdbt.com/product/what-is-dbt/>`__
|
|
@@ -84,7 +85,7 @@ This is a provider package for ``dbt.cloud`` provider. All classes for this prov
|
|
|
84
85
|
are in ``airflow.providers.dbt.cloud`` python package.
|
|
85
86
|
|
|
86
87
|
You can find package information and changelog for the provider
|
|
87
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.
|
|
88
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/>`_.
|
|
88
89
|
|
|
89
90
|
|
|
90
91
|
Installation
|
|
@@ -121,11 +122,12 @@ You can install such cross-provider dependencies when installing from PyPI. For
|
|
|
121
122
|
pip install apache-airflow-providers-dbt-cloud[http]
|
|
122
123
|
|
|
123
124
|
|
|
124
|
-
|
|
125
|
-
Dependent package
|
|
126
|
-
|
|
127
|
-
`apache-airflow-providers-http <https://airflow.apache.org/docs/apache-airflow-providers-http>`_
|
|
128
|
-
|
|
125
|
+
============================================================================================================== ===============
|
|
126
|
+
Dependent package Extra
|
|
127
|
+
============================================================================================================== ===============
|
|
128
|
+
`apache-airflow-providers-http <https://airflow.apache.org/docs/apache-airflow-providers-http>`_ ``http``
|
|
129
|
+
`apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
|
|
130
|
+
============================================================================================================== ===============
|
|
129
131
|
|
|
130
132
|
The changelog for the provider package can be found in the
|
|
131
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.
|
|
133
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/changelog.html>`_.
|
|
@@ -15,6 +15,8 @@ airflow/providers/dbt/cloud/sensors/__init__.py
|
|
|
15
15
|
airflow/providers/dbt/cloud/sensors/dbt.py
|
|
16
16
|
airflow/providers/dbt/cloud/triggers/__init__.py
|
|
17
17
|
airflow/providers/dbt/cloud/triggers/dbt.py
|
|
18
|
+
airflow/providers/dbt/cloud/utils/__init__.py
|
|
19
|
+
airflow/providers/dbt/cloud/utils/openlineage.py
|
|
18
20
|
apache_airflow_providers_dbt_cloud.egg-info/PKG-INFO
|
|
19
21
|
apache_airflow_providers_dbt_cloud.egg-info/SOURCES.txt
|
|
20
22
|
apache_airflow_providers_dbt_cloud.egg-info/dependency_links.txt
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
[tool.black]
|
|
18
18
|
line-length = 110
|
|
19
|
-
target-version = ['
|
|
19
|
+
target-version = ['py38', 'py39', 'py310', 'py311']
|
|
20
20
|
|
|
21
21
|
# Editable installs are currently broken using setuptools 64.0.0 and above. The problem is tracked in
|
|
22
22
|
# https://github.com/pypa/setuptools/issues/3548. We're also discussing how we could potentially fix
|
|
@@ -28,10 +28,8 @@ target-version = ['py37', 'py38', 'py39', 'py310']
|
|
|
28
28
|
requires = ['setuptools==67.2.0']
|
|
29
29
|
build-backend = "setuptools.build_meta"
|
|
30
30
|
|
|
31
|
-
[project]
|
|
32
|
-
requires-python = ">=3.8"
|
|
33
|
-
|
|
34
31
|
[tool.ruff]
|
|
32
|
+
target-version = "py38"
|
|
35
33
|
typing-modules = ["airflow.typing_compat"]
|
|
36
34
|
line-length = 110
|
|
37
35
|
extend-exclude = [
|
|
@@ -62,7 +60,9 @@ extend-select = [
|
|
|
62
60
|
"D402",
|
|
63
61
|
"D403",
|
|
64
62
|
"D412",
|
|
65
|
-
"D419"
|
|
63
|
+
"D419",
|
|
64
|
+
"TCH001", # typing-only-first-party-import
|
|
65
|
+
"TCH002", # typing-only-third-party-import
|
|
66
66
|
]
|
|
67
67
|
extend-ignore = [
|
|
68
68
|
"D203",
|
|
@@ -169,3 +169,5 @@ exclude_also = [
|
|
|
169
169
|
"@(typing(_extensions)?\\.)?overload",
|
|
170
170
|
"if TYPE_CHECKING:"
|
|
171
171
|
]
|
|
172
|
+
[tool.ruff.flake8-type-checking]
|
|
173
|
+
exempt-modules = ["typing", "typing_extensions"]
|
|
@@ -27,8 +27,8 @@ classifiers =
|
|
|
27
27
|
Programming Language :: Python :: 3.11
|
|
28
28
|
Topic :: System :: Monitoring
|
|
29
29
|
project_urls =
|
|
30
|
-
Documentation=https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.
|
|
31
|
-
Changelog=https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.
|
|
30
|
+
Documentation=https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/
|
|
31
|
+
Changelog=https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/changelog.html
|
|
32
32
|
Bug Tracker=https://github.com/apache/airflow/issues
|
|
33
33
|
Source Code=https://github.com/apache/airflow
|
|
34
34
|
Slack Chat=https://s.apache.org/airflow-slack
|
|
@@ -60,6 +60,6 @@ apache_airflow_provider =
|
|
|
60
60
|
packages = airflow.providers.dbt.cloud
|
|
61
61
|
|
|
62
62
|
[egg_info]
|
|
63
|
-
tag_build =
|
|
63
|
+
tag_build = rc2
|
|
64
64
|
tag_date = 0
|
|
65
65
|
|
{apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/setup.py
RENAMED
|
@@ -26,14 +26,17 @@
|
|
|
26
26
|
|
|
27
27
|
from setuptools import find_namespace_packages, setup
|
|
28
28
|
|
|
29
|
-
version = "3.
|
|
29
|
+
version = "3.3.0"
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
def do_setup():
|
|
33
33
|
"""Perform the package apache-airflow-providers-dbt-cloud setup."""
|
|
34
34
|
setup(
|
|
35
35
|
version=version,
|
|
36
|
-
extras_require={
|
|
36
|
+
extras_require={
|
|
37
|
+
"http": ["apache-airflow-providers-http"],
|
|
38
|
+
"openlineage": ["apache-airflow-providers-openlineage"],
|
|
39
|
+
},
|
|
37
40
|
packages=find_namespace_packages(
|
|
38
41
|
include=[
|
|
39
42
|
"airflow.providers.dbt.cloud",
|
{apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|
{apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/NOTICE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|