apache-airflow-providers-dbt-cloud 3.2.3rc1__tar.gz → 3.3.0rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {apache-airflow-providers-dbt-cloud-3.2.3rc1/apache_airflow_providers_dbt_cloud.egg-info → apache-airflow-providers-dbt-cloud-3.3.0rc2}/PKG-INFO +13 -11
  2. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/README.rst +9 -8
  3. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/__init__.py +1 -1
  4. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/get_provider_info.py +1 -0
  5. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/hooks/dbt.py +52 -19
  6. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/operators/dbt.py +24 -4
  7. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/sensors/dbt.py +28 -6
  8. apache-airflow-providers-dbt-cloud-3.3.0rc2/airflow/providers/dbt/cloud/utils/__init__.py +16 -0
  9. apache-airflow-providers-dbt-cloud-3.3.0rc2/airflow/providers/dbt/cloud/utils/openlineage.py +140 -0
  10. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2/apache_airflow_providers_dbt_cloud.egg-info}/PKG-INFO +13 -11
  11. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/apache_airflow_providers_dbt_cloud.egg-info/SOURCES.txt +2 -0
  12. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/apache_airflow_providers_dbt_cloud.egg-info/requires.txt +3 -0
  13. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/pyproject.toml +7 -5
  14. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/setup.cfg +3 -3
  15. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/setup.py +5 -2
  16. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/LICENSE +0 -0
  17. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/MANIFEST.in +0 -0
  18. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/NOTICE +0 -0
  19. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/hooks/__init__.py +0 -0
  20. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/operators/__init__.py +0 -0
  21. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/sensors/__init__.py +0 -0
  22. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/triggers/__init__.py +0 -0
  23. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/airflow/providers/dbt/cloud/triggers/dbt.py +0 -0
  24. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/apache_airflow_providers_dbt_cloud.egg-info/dependency_links.txt +0 -0
  25. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/apache_airflow_providers_dbt_cloud.egg-info/entry_points.txt +0 -0
  26. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/apache_airflow_providers_dbt_cloud.egg-info/not-zip-safe +0 -0
  27. {apache-airflow-providers-dbt-cloud-3.2.3rc1 → apache-airflow-providers-dbt-cloud-3.3.0rc2}/apache_airflow_providers_dbt_cloud.egg-info/top_level.txt +0 -0
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-dbt-cloud
3
- Version: 3.2.3rc1
3
+ Version: 3.3.0rc2
4
4
  Summary: Provider for Apache Airflow. Implements apache-airflow-providers-dbt-cloud package
5
5
  Home-page: https://airflow.apache.org/
6
6
  Download-URL: https://archive.apache.org/dist/airflow/providers
7
7
  Author: Apache Software Foundation
8
8
  Author-email: dev@airflow.apache.org
9
9
  License: Apache License 2.0
10
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.2.3/
11
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.2.3/changelog.html
10
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/
11
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/changelog.html
12
12
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
13
13
  Project-URL: Source Code, https://github.com/apache/airflow
14
14
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
@@ -30,6 +30,7 @@ Classifier: Topic :: System :: Monitoring
30
30
  Requires-Python: ~=3.8
31
31
  Description-Content-Type: text/x-rst
32
32
  Provides-Extra: http
33
+ Provides-Extra: openlineage
33
34
  License-File: LICENSE
34
35
  License-File: NOTICE
35
36
 
@@ -71,7 +72,7 @@ License-File: NOTICE
71
72
 
72
73
  Package ``apache-airflow-providers-dbt-cloud``
73
74
 
74
- Release: ``3.2.3rc1``
75
+ Release: ``3.3.0rc2``
75
76
 
76
77
 
77
78
  `dbt Cloud <https://www.getdbt.com/product/what-is-dbt/>`__
@@ -84,7 +85,7 @@ This is a provider package for ``dbt.cloud`` provider. All classes for this prov
84
85
  are in ``airflow.providers.dbt.cloud`` python package.
85
86
 
86
87
  You can find package information and changelog for the provider
87
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.2.3/>`_.
88
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/>`_.
88
89
 
89
90
 
90
91
  Installation
@@ -121,11 +122,12 @@ You can install such cross-provider dependencies when installing from PyPI. For
121
122
  pip install apache-airflow-providers-dbt-cloud[http]
122
123
 
123
124
 
124
- ================================================================================================ ========
125
- Dependent package Extra
126
- ================================================================================================ ========
127
- `apache-airflow-providers-http <https://airflow.apache.org/docs/apache-airflow-providers-http>`_ ``http``
128
- ================================================================================================ ========
125
+ ============================================================================================================== ===============
126
+ Dependent package Extra
127
+ ============================================================================================================== ===============
128
+ `apache-airflow-providers-http <https://airflow.apache.org/docs/apache-airflow-providers-http>`_ ``http``
129
+ `apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
130
+ ============================================================================================================== ===============
129
131
 
130
132
  The changelog for the provider package can be found in the
131
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.2.3/changelog.html>`_.
133
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/changelog.html>`_.
@@ -36,7 +36,7 @@
36
36
 
37
37
  Package ``apache-airflow-providers-dbt-cloud``
38
38
 
39
- Release: ``3.2.3rc1``
39
+ Release: ``3.3.0rc2``
40
40
 
41
41
 
42
42
  `dbt Cloud <https://www.getdbt.com/product/what-is-dbt/>`__
@@ -49,7 +49,7 @@ This is a provider package for ``dbt.cloud`` provider. All classes for this prov
49
49
  are in ``airflow.providers.dbt.cloud`` python package.
50
50
 
51
51
  You can find package information and changelog for the provider
52
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.2.3/>`_.
52
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/>`_.
53
53
 
54
54
 
55
55
  Installation
@@ -86,11 +86,12 @@ You can install such cross-provider dependencies when installing from PyPI. For
86
86
  pip install apache-airflow-providers-dbt-cloud[http]
87
87
 
88
88
 
89
- ================================================================================================ ========
90
- Dependent package Extra
91
- ================================================================================================ ========
92
- `apache-airflow-providers-http <https://airflow.apache.org/docs/apache-airflow-providers-http>`_ ``http``
93
- ================================================================================================ ========
89
+ ============================================================================================================== ===============
90
+ Dependent package Extra
91
+ ============================================================================================================== ===============
92
+ `apache-airflow-providers-http <https://airflow.apache.org/docs/apache-airflow-providers-http>`_ ``http``
93
+ `apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
94
+ ============================================================================================================== ===============
94
95
 
95
96
  The changelog for the provider package can be found in the
96
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.2.3/changelog.html>`_.
97
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/changelog.html>`_.
@@ -28,7 +28,7 @@ import packaging.version
28
28
 
29
29
  __all__ = ["__version__"]
30
30
 
31
- __version__ = "3.2.3"
31
+ __version__ = "3.3.0"
32
32
 
33
33
  try:
34
34
  from airflow import __version__ as airflow_version
@@ -29,6 +29,7 @@ def get_provider_info():
29
29
  "description": "`dbt Cloud <https://www.getdbt.com/product/what-is-dbt/>`__\n",
30
30
  "suspended": False,
31
31
  "versions": [
32
+ "3.3.0",
32
33
  "3.2.3",
33
34
  "3.2.2",
34
35
  "3.2.1",
@@ -16,6 +16,7 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
+ import asyncio
19
20
  import json
20
21
  import time
21
22
  from enum import Enum
@@ -211,12 +212,11 @@ class DbtCloudHook(HttpHook):
211
212
  async def get_headers_tenants_from_connection(self) -> tuple[dict[str, Any], str]:
212
213
  """Get Headers, tenants from the connection details."""
213
214
  headers: dict[str, Any] = {}
214
- connection: Connection = await sync_to_async(self.get_connection)(self.dbt_cloud_conn_id)
215
- tenant = self._get_tenant_domain(connection)
215
+ tenant = self._get_tenant_domain(self.connection)
216
216
  package_name, provider_version = _get_provider_info()
217
217
  headers["User-Agent"] = f"{package_name}-v{provider_version}"
218
218
  headers["Content-Type"] = "application/json"
219
- headers["Authorization"] = f"Token {connection.password}"
219
+ headers["Authorization"] = f"Token {self.connection.password}"
220
220
  return headers, tenant
221
221
 
222
222
  @provide_account_id
@@ -234,13 +234,14 @@ class DbtCloudHook(HttpHook):
234
234
  endpoint = f"{account_id}/runs/{run_id}/"
235
235
  headers, tenant = await self.get_headers_tenants_from_connection()
236
236
  url, params = self.get_request_url_params(tenant, endpoint, include_related)
237
- async with aiohttp.ClientSession(headers=headers) as session:
238
- async with session.get(url, params=params) as response:
239
- try:
240
- response.raise_for_status()
241
- return await response.json()
242
- except ClientResponseError as e:
243
- raise AirflowException(str(e.status) + ":" + e.message)
237
+ async with aiohttp.ClientSession(headers=headers) as session, session.get(
238
+ url, params=params
239
+ ) as response:
240
+ try:
241
+ response.raise_for_status()
242
+ return await response.json()
243
+ except ClientResponseError as e:
244
+ raise AirflowException(f"{e.status}:{e.message}")
244
245
 
245
246
  async def get_job_status(
246
247
  self, run_id: int, account_id: int | None = None, include_related: list[str] | None = None
@@ -253,15 +254,10 @@ class DbtCloudHook(HttpHook):
253
254
  :param include_related: Optional. List of related fields to pull with the run.
254
255
  Valid values are "trigger", "job", "repository", and "environment".
255
256
  """
256
- try:
257
- self.log.info("Getting the status of job run %s.", run_id)
258
- response = await self.get_job_details(
259
- run_id, account_id=account_id, include_related=include_related
260
- )
261
- job_run_status: int = response["data"]["status"]
262
- return job_run_status
263
- except Exception as e:
264
- raise e
257
+ self.log.info("Getting the status of job run %s.", run_id)
258
+ response = await self.get_job_details(run_id, account_id=account_id, include_related=include_related)
259
+ job_run_status: int = response["data"]["status"]
260
+ return job_run_status
265
261
 
266
262
  @cached_property
267
263
  def connection(self) -> Connection:
@@ -599,6 +595,43 @@ class DbtCloudHook(HttpHook):
599
595
  endpoint=f"{account_id}/runs/{run_id}/artifacts/{path}", payload={"step": step}
600
596
  )
601
597
 
598
+ @fallback_to_default_account
599
+ async def get_job_run_artifacts_concurrently(
600
+ self,
601
+ run_id: int,
602
+ artifacts: list[str],
603
+ account_id: int | None = None,
604
+ step: int | None = None,
605
+ ):
606
+ """
607
+ Retrieves a list of chosen artifact files generated for a step in completed run of a dbt Cloud job.
608
+
609
+ By default, this returns artifacts from the last step in the run.
610
+ This takes advantage of the asynchronous calls to speed up the retrieval.
611
+
612
+ :param run_id: The ID of a dbt Cloud job run.
613
+ :param step: The index of the Step in the Run to query for artifacts. The first step in the
614
+ run has the index 1. If the step parameter is omitted, artifacts for the last step in the run will
615
+ be returned.
616
+ :param path: The file path related to the artifact file. Paths are rooted at the target/ directory.
617
+ Use "manifest.json", "catalog.json", or "run_results.json" to download dbt-generated artifacts
618
+ for the run.
619
+ :param account_id: Optional. The ID of a dbt Cloud account.
620
+
621
+ :return: The request response.
622
+ """
623
+ tasks = {
624
+ artifact: sync_to_async(self.get_job_run_artifact)(
625
+ run_id,
626
+ path=artifact,
627
+ account_id=account_id,
628
+ step=step,
629
+ )
630
+ for artifact in artifacts
631
+ }
632
+ results = await asyncio.gather(*tasks.values())
633
+ return {filename: result.json() for filename, result in zip(tasks.keys(), results)}
634
+
602
635
  def test_connection(self) -> tuple[bool, str]:
603
636
  """Test dbt Cloud connection."""
604
637
  try:
@@ -19,6 +19,7 @@ from __future__ import annotations
19
19
  import json
20
20
  import time
21
21
  import warnings
22
+ from functools import cached_property
22
23
  from pathlib import Path
23
24
  from typing import TYPE_CHECKING, Any
24
25
 
@@ -32,8 +33,10 @@ from airflow.providers.dbt.cloud.hooks.dbt import (
32
33
  JobRunInfo,
33
34
  )
34
35
  from airflow.providers.dbt.cloud.triggers.dbt import DbtCloudRunJobTrigger
36
+ from airflow.providers.dbt.cloud.utils.openlineage import generate_openlineage_events_from_dbt_cloud_run
35
37
 
36
38
  if TYPE_CHECKING:
39
+ from airflow.providers.openlineage.extractors import OperatorLineage
37
40
  from airflow.utils.context import Context
38
41
 
39
42
 
@@ -114,8 +117,7 @@ class DbtCloudRunJobOperator(BaseOperator):
114
117
  self.timeout = timeout
115
118
  self.check_interval = check_interval
116
119
  self.additional_run_config = additional_run_config or {}
117
- self.hook: DbtCloudHook
118
- self.run_id: int
120
+ self.run_id: int | None = None
119
121
  self.deferrable = deferrable
120
122
 
121
123
  def execute(self, context: Context):
@@ -124,7 +126,6 @@ class DbtCloudRunJobOperator(BaseOperator):
124
126
  f"Triggered via Apache Airflow by task {self.task_id!r} in the {self.dag.dag_id} DAG."
125
127
  )
126
128
 
127
- self.hook = DbtCloudHook(self.dbt_cloud_conn_id)
128
129
  trigger_job_response = self.hook.trigger_job_run(
129
130
  account_id=self.account_id,
130
131
  job_id=self.job_id,
@@ -134,12 +135,13 @@ class DbtCloudRunJobOperator(BaseOperator):
134
135
  additional_run_config=self.additional_run_config,
135
136
  )
136
137
  self.run_id = trigger_job_response.json()["data"]["id"]
138
+ print(self.run_id)
137
139
  job_run_url = trigger_job_response.json()["data"]["href"]
138
140
  # Push the ``job_run_url`` value to XCom regardless of what happens during execution so that the job
139
141
  # run can be monitored via the operator link.
140
142
  context["ti"].xcom_push(key="job_run_url", value=job_run_url)
141
143
 
142
- if self.wait_for_termination:
144
+ if self.wait_for_termination and isinstance(self.run_id, int):
143
145
  if self.deferrable is False:
144
146
  self.log.info("Waiting for job run %s to terminate.", str(self.run_id))
145
147
 
@@ -196,6 +198,7 @@ class DbtCloudRunJobOperator(BaseOperator):
196
198
  if event["status"] == "error":
197
199
  raise AirflowException(event["message"])
198
200
  self.log.info(event["message"])
201
+ self.run_id = event["run_id"]
199
202
  return int(event["run_id"])
200
203
 
201
204
  def on_kill(self) -> None:
@@ -211,6 +214,23 @@ class DbtCloudRunJobOperator(BaseOperator):
211
214
  ):
212
215
  self.log.info("Job run %s has been cancelled successfully.", str(self.run_id))
213
216
 
217
+ @cached_property
218
+ def hook(self):
219
+ """Returns DBT Cloud hook."""
220
+ return DbtCloudHook(self.dbt_cloud_conn_id)
221
+
222
+ def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage:
223
+ """
224
+ Implementing _on_complete because job_run needs to be triggered first in execute method.
225
+
226
+ This should send additional events only if operator `wait_for_termination` is set to True.
227
+ """
228
+ from airflow.providers.openlineage.extractors import OperatorLineage
229
+
230
+ if isinstance(self.run_id, int) and self.wait_for_termination is True:
231
+ return generate_openlineage_events_from_dbt_cloud_run(operator=self, task_instance=task_instance)
232
+ return OperatorLineage()
233
+
214
234
 
215
235
  class DbtCloudGetJobRunArtifactOperator(BaseOperator):
216
236
  """
@@ -18,15 +18,18 @@ from __future__ import annotations
18
18
 
19
19
  import time
20
20
  import warnings
21
+ from functools import cached_property
21
22
  from typing import TYPE_CHECKING, Any
22
23
 
23
24
  from airflow.configuration import conf
24
- from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
25
+ from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
25
26
  from airflow.providers.dbt.cloud.hooks.dbt import DbtCloudHook, DbtCloudJobRunException, DbtCloudJobRunStatus
26
27
  from airflow.providers.dbt.cloud.triggers.dbt import DbtCloudRunJobTrigger
28
+ from airflow.providers.dbt.cloud.utils.openlineage import generate_openlineage_events_from_dbt_cloud_run
27
29
  from airflow.sensors.base import BaseSensorOperator
28
30
 
29
31
  if TYPE_CHECKING:
32
+ from airflow.providers.openlineage.extractors import OperatorLineage
30
33
  from airflow.utils.context import Context
31
34
 
32
35
 
@@ -78,15 +81,27 @@ class DbtCloudJobRunSensor(BaseSensorOperator):
78
81
 
79
82
  self.deferrable = deferrable
80
83
 
84
+ @cached_property
85
+ def hook(self):
86
+ """Returns DBT Cloud hook."""
87
+ return DbtCloudHook(self.dbt_cloud_conn_id)
88
+
81
89
  def poke(self, context: Context) -> bool:
82
- hook = DbtCloudHook(self.dbt_cloud_conn_id)
83
- job_run_status = hook.get_job_run_status(run_id=self.run_id, account_id=self.account_id)
90
+ job_run_status = self.hook.get_job_run_status(run_id=self.run_id, account_id=self.account_id)
84
91
 
85
92
  if job_run_status == DbtCloudJobRunStatus.ERROR.value:
86
- raise DbtCloudJobRunException(f"Job run {self.run_id} has failed.")
93
+ # TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
94
+ message = f"Job run {self.run_id} has failed."
95
+ if self.soft_fail:
96
+ raise AirflowSkipException(message)
97
+ raise DbtCloudJobRunException(message)
87
98
 
88
99
  if job_run_status == DbtCloudJobRunStatus.CANCELLED.value:
89
- raise DbtCloudJobRunException(f"Job run {self.run_id} has been cancelled.")
100
+ # TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
101
+ message = f"Job run {self.run_id} has been cancelled."
102
+ if self.soft_fail:
103
+ raise AirflowSkipException(message)
104
+ raise DbtCloudJobRunException(message)
90
105
 
91
106
  return job_run_status == DbtCloudJobRunStatus.SUCCESS.value
92
107
 
@@ -121,10 +136,17 @@ class DbtCloudJobRunSensor(BaseSensorOperator):
121
136
  execution was successful.
122
137
  """
123
138
  if event["status"] in ["error", "cancelled"]:
124
- raise AirflowException("Error in dbt: " + event["message"])
139
+ message = f"Error in dbt: {event['message']}"
140
+ if self.soft_fail:
141
+ raise AirflowSkipException(message)
142
+ raise AirflowException()
125
143
  self.log.info(event["message"])
126
144
  return int(event["run_id"])
127
145
 
146
+ def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage:
147
+ """Implementing _on_complete because job_run needs to be triggered first in execute method."""
148
+ return generate_openlineage_events_from_dbt_cloud_run(operator=self, task_instance=task_instance)
149
+
128
150
 
129
151
  class DbtCloudJobRunAsyncSensor(DbtCloudJobRunSensor):
130
152
  """This class is deprecated.
@@ -0,0 +1,16 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
@@ -0,0 +1,140 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ from __future__ import annotations
18
+
19
+ import asyncio
20
+ import re
21
+ from contextlib import suppress
22
+ from typing import TYPE_CHECKING
23
+
24
+ if TYPE_CHECKING:
25
+ from airflow.models.taskinstance import TaskInstance
26
+ from airflow.providers.dbt.cloud.operators.dbt import DbtCloudRunJobOperator
27
+ from airflow.providers.dbt.cloud.sensors.dbt import DbtCloudJobRunSensor
28
+ from airflow.providers.openlineage.extractors.base import OperatorLineage
29
+
30
+
31
+ def generate_openlineage_events_from_dbt_cloud_run(
32
+ operator: DbtCloudRunJobOperator | DbtCloudJobRunSensor, task_instance: TaskInstance
33
+ ) -> OperatorLineage:
34
+ """
35
+ Common method generating OpenLineage events from the DBT Cloud run.
36
+
37
+ This function retrieves information about a DBT Cloud run, including the associated job,
38
+ project, and execution details. It processes the run's artifacts, such as the manifest and run results,
39
+ in parallel for many steps.
40
+ Then it generates and emits OpenLineage events based on the executed DBT tasks.
41
+
42
+ :param operator: Instance of DBT Cloud operator that executed DBT tasks.
43
+ It already should have run_id and dbt cloud hook.
44
+ :param task_instance: Currently executed task instance
45
+
46
+ :return: An empty OperatorLineage object indicating the completion of events generation.
47
+ """
48
+ from openlineage.common.provider.dbt import DbtCloudArtifactProcessor, ParentRunMetadata
49
+
50
+ from airflow.providers.openlineage.extractors import OperatorLineage
51
+ from airflow.providers.openlineage.plugins.adapter import (
52
+ _DAG_NAMESPACE,
53
+ _PRODUCER,
54
+ OpenLineageAdapter,
55
+ )
56
+ from airflow.providers.openlineage.plugins.listener import get_openlineage_listener
57
+
58
+ # if no account_id set this will fallback
59
+ job_run = operator.hook.get_job_run(
60
+ run_id=operator.run_id, account_id=operator.account_id, include_related=["run_steps,job"]
61
+ ).json()["data"]
62
+ job = job_run["job"]
63
+ # retrieve account_id from job and use that starting from this line
64
+ account_id = job["account_id"]
65
+ project = operator.hook.get_project(project_id=job["project_id"], account_id=account_id).json()["data"]
66
+ connection = project["connection"]
67
+ execute_steps = job["execute_steps"]
68
+ run_steps = job_run["run_steps"]
69
+
70
+ # filter only dbt invocation steps
71
+ steps = []
72
+ for run_step in run_steps:
73
+ name = run_step["name"]
74
+ if name.startswith("Invoke dbt with `"):
75
+ regex_pattern = "Invoke dbt with `([^`.]*)`"
76
+ m = re.search(regex_pattern, name)
77
+ if m and m.group(1) in execute_steps:
78
+ steps.append(run_step["index"])
79
+
80
+ # catalog is available only if docs are generated
81
+ catalog = None
82
+ with suppress(Exception):
83
+ catalog = operator.hook.get_job_run_artifact(operator.run_id, path="catalog.json").json()["data"]
84
+
85
+ async def get_artifacts_for_steps(steps, artifacts):
86
+ """Gets artifacts for a list of steps concurrently."""
87
+ tasks = [
88
+ operator.hook.get_job_run_artifacts_concurrently(
89
+ run_id=operator.run_id,
90
+ account_id=account_id,
91
+ step=step,
92
+ artifacts=artifacts,
93
+ )
94
+ for step in steps
95
+ ]
96
+ return await asyncio.gather(*tasks)
97
+
98
+ # get artifacts for steps concurrently
99
+ step_artifacts = asyncio.run(
100
+ get_artifacts_for_steps(steps=steps, artifacts=["manifest.json", "run_results.json"])
101
+ )
102
+
103
+ # process each step in loop, sending generated events in the same order as steps
104
+ for artifacts in step_artifacts:
105
+ # process manifest
106
+ manifest = artifacts["manifest.json"]
107
+
108
+ if not artifacts.get("run_results.json", None):
109
+ continue
110
+
111
+ processor = DbtCloudArtifactProcessor(
112
+ producer=_PRODUCER,
113
+ job_namespace=_DAG_NAMESPACE,
114
+ skip_errors=False,
115
+ logger=operator.log,
116
+ manifest=manifest,
117
+ run_result=artifacts["run_results.json"],
118
+ profile=connection,
119
+ catalog=catalog,
120
+ )
121
+
122
+ # generate same run id of current task instance
123
+ parent_run_id = OpenLineageAdapter.build_task_instance_run_id(
124
+ operator.task_id, task_instance.execution_date, task_instance.try_number - 1
125
+ )
126
+
127
+ parent_job = ParentRunMetadata(
128
+ run_id=parent_run_id,
129
+ job_name=f"{task_instance.dag_id}.{task_instance.task_id}",
130
+ job_namespace=_DAG_NAMESPACE,
131
+ )
132
+ processor.dbt_run_metadata = parent_job
133
+
134
+ events = processor.parse().events()
135
+
136
+ client = get_openlineage_listener().adapter.get_or_create_openlineage_client()
137
+
138
+ for event in events:
139
+ client.emit(event=event)
140
+ return OperatorLineage()
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-dbt-cloud
3
- Version: 3.2.3rc1
3
+ Version: 3.3.0rc2
4
4
  Summary: Provider for Apache Airflow. Implements apache-airflow-providers-dbt-cloud package
5
5
  Home-page: https://airflow.apache.org/
6
6
  Download-URL: https://archive.apache.org/dist/airflow/providers
7
7
  Author: Apache Software Foundation
8
8
  Author-email: dev@airflow.apache.org
9
9
  License: Apache License 2.0
10
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.2.3/
11
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.2.3/changelog.html
10
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/
11
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/changelog.html
12
12
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
13
13
  Project-URL: Source Code, https://github.com/apache/airflow
14
14
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
@@ -30,6 +30,7 @@ Classifier: Topic :: System :: Monitoring
30
30
  Requires-Python: ~=3.8
31
31
  Description-Content-Type: text/x-rst
32
32
  Provides-Extra: http
33
+ Provides-Extra: openlineage
33
34
  License-File: LICENSE
34
35
  License-File: NOTICE
35
36
 
@@ -71,7 +72,7 @@ License-File: NOTICE
71
72
 
72
73
  Package ``apache-airflow-providers-dbt-cloud``
73
74
 
74
- Release: ``3.2.3rc1``
75
+ Release: ``3.3.0rc2``
75
76
 
76
77
 
77
78
  `dbt Cloud <https://www.getdbt.com/product/what-is-dbt/>`__
@@ -84,7 +85,7 @@ This is a provider package for ``dbt.cloud`` provider. All classes for this prov
84
85
  are in ``airflow.providers.dbt.cloud`` python package.
85
86
 
86
87
  You can find package information and changelog for the provider
87
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.2.3/>`_.
88
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/>`_.
88
89
 
89
90
 
90
91
  Installation
@@ -121,11 +122,12 @@ You can install such cross-provider dependencies when installing from PyPI. For
121
122
  pip install apache-airflow-providers-dbt-cloud[http]
122
123
 
123
124
 
124
- ================================================================================================ ========
125
- Dependent package Extra
126
- ================================================================================================ ========
127
- `apache-airflow-providers-http <https://airflow.apache.org/docs/apache-airflow-providers-http>`_ ``http``
128
- ================================================================================================ ========
125
+ ============================================================================================================== ===============
126
+ Dependent package Extra
127
+ ============================================================================================================== ===============
128
+ `apache-airflow-providers-http <https://airflow.apache.org/docs/apache-airflow-providers-http>`_ ``http``
129
+ `apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
130
+ ============================================================================================================== ===============
129
131
 
130
132
  The changelog for the provider package can be found in the
131
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.2.3/changelog.html>`_.
133
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/changelog.html>`_.
@@ -15,6 +15,8 @@ airflow/providers/dbt/cloud/sensors/__init__.py
15
15
  airflow/providers/dbt/cloud/sensors/dbt.py
16
16
  airflow/providers/dbt/cloud/triggers/__init__.py
17
17
  airflow/providers/dbt/cloud/triggers/dbt.py
18
+ airflow/providers/dbt/cloud/utils/__init__.py
19
+ airflow/providers/dbt/cloud/utils/openlineage.py
18
20
  apache_airflow_providers_dbt_cloud.egg-info/PKG-INFO
19
21
  apache_airflow_providers_dbt_cloud.egg-info/SOURCES.txt
20
22
  apache_airflow_providers_dbt_cloud.egg-info/dependency_links.txt
@@ -5,3 +5,6 @@ asgiref
5
5
 
6
6
  [http]
7
7
  apache-airflow-providers-http
8
+
9
+ [openlineage]
10
+ apache-airflow-providers-openlineage
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
  [tool.black]
18
18
  line-length = 110
19
- target-version = ['py37', 'py38', 'py39', 'py310']
19
+ target-version = ['py38', 'py39', 'py310', 'py311']
20
20
 
21
21
  # Editable installs are currently broken using setuptools 64.0.0 and above. The problem is tracked in
22
22
  # https://github.com/pypa/setuptools/issues/3548. We're also discussing how we could potentially fix
@@ -28,10 +28,8 @@ target-version = ['py37', 'py38', 'py39', 'py310']
28
28
  requires = ['setuptools==67.2.0']
29
29
  build-backend = "setuptools.build_meta"
30
30
 
31
- [project]
32
- requires-python = ">=3.8"
33
-
34
31
  [tool.ruff]
32
+ target-version = "py38"
35
33
  typing-modules = ["airflow.typing_compat"]
36
34
  line-length = 110
37
35
  extend-exclude = [
@@ -62,7 +60,9 @@ extend-select = [
62
60
  "D402",
63
61
  "D403",
64
62
  "D412",
65
- "D419"
63
+ "D419",
64
+ "TCH001", # typing-only-first-party-import
65
+ "TCH002", # typing-only-third-party-import
66
66
  ]
67
67
  extend-ignore = [
68
68
  "D203",
@@ -169,3 +169,5 @@ exclude_also = [
169
169
  "@(typing(_extensions)?\\.)?overload",
170
170
  "if TYPE_CHECKING:"
171
171
  ]
172
+ [tool.ruff.flake8-type-checking]
173
+ exempt-modules = ["typing", "typing_extensions"]
@@ -27,8 +27,8 @@ classifiers =
27
27
  Programming Language :: Python :: 3.11
28
28
  Topic :: System :: Monitoring
29
29
  project_urls =
30
- Documentation=https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.2.3/
31
- Changelog=https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.2.3/changelog.html
30
+ Documentation=https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/
31
+ Changelog=https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/3.3.0/changelog.html
32
32
  Bug Tracker=https://github.com/apache/airflow/issues
33
33
  Source Code=https://github.com/apache/airflow
34
34
  Slack Chat=https://s.apache.org/airflow-slack
@@ -60,6 +60,6 @@ apache_airflow_provider =
60
60
  packages = airflow.providers.dbt.cloud
61
61
 
62
62
  [egg_info]
63
- tag_build = rc1
63
+ tag_build = rc2
64
64
  tag_date = 0
65
65
 
@@ -26,14 +26,17 @@
26
26
 
27
27
  from setuptools import find_namespace_packages, setup
28
28
 
29
- version = "3.2.3"
29
+ version = "3.3.0"
30
30
 
31
31
 
32
32
  def do_setup():
33
33
  """Perform the package apache-airflow-providers-dbt-cloud setup."""
34
34
  setup(
35
35
  version=version,
36
- extras_require={"http": ["apache-airflow-providers-http"]},
36
+ extras_require={
37
+ "http": ["apache-airflow-providers-http"],
38
+ "openlineage": ["apache-airflow-providers-openlineage"],
39
+ },
37
40
  packages=find_namespace_packages(
38
41
  include=[
39
42
  "airflow.providers.dbt.cloud",