apache-airflow-providers-google 12.0.0rc2__py3-none-any.whl → 14.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/LICENSE +0 -52
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +27 -13
- airflow/providers/google/ads/transfers/ads_to_gcs.py +18 -4
- airflow/providers/google/assets/bigquery.py +17 -0
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +2 -3
- airflow/providers/google/cloud/hooks/alloy_db.py +736 -8
- airflow/providers/google/cloud/hooks/automl.py +10 -4
- airflow/providers/google/cloud/hooks/bigquery.py +125 -22
- airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
- airflow/providers/google/cloud/hooks/bigtable.py +2 -3
- airflow/providers/google/cloud/hooks/cloud_batch.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_build.py +4 -5
- airflow/providers/google/cloud/hooks/cloud_composer.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_run.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_sql.py +7 -3
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +119 -7
- airflow/providers/google/cloud/hooks/compute.py +3 -3
- airflow/providers/google/cloud/hooks/datacatalog.py +3 -4
- airflow/providers/google/cloud/hooks/dataflow.py +12 -12
- airflow/providers/google/cloud/hooks/dataform.py +2 -3
- airflow/providers/google/cloud/hooks/datafusion.py +2 -2
- airflow/providers/google/cloud/hooks/dataplex.py +1032 -11
- airflow/providers/google/cloud/hooks/dataproc.py +4 -5
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +3 -4
- airflow/providers/google/cloud/hooks/dlp.py +3 -4
- airflow/providers/google/cloud/hooks/gcs.py +7 -6
- airflow/providers/google/cloud/hooks/kms.py +2 -3
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +8 -8
- airflow/providers/google/cloud/hooks/life_sciences.py +1 -1
- airflow/providers/google/cloud/hooks/managed_kafka.py +482 -0
- airflow/providers/google/cloud/hooks/natural_language.py +2 -3
- airflow/providers/google/cloud/hooks/os_login.py +2 -3
- airflow/providers/google/cloud/hooks/pubsub.py +6 -6
- airflow/providers/google/cloud/hooks/secret_manager.py +2 -3
- airflow/providers/google/cloud/hooks/spanner.py +2 -2
- airflow/providers/google/cloud/hooks/speech_to_text.py +2 -3
- airflow/providers/google/cloud/hooks/stackdriver.py +4 -4
- airflow/providers/google/cloud/hooks/tasks.py +3 -4
- airflow/providers/google/cloud/hooks/text_to_speech.py +2 -3
- airflow/providers/google/cloud/hooks/translate.py +236 -5
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +9 -4
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +4 -5
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -3
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -181
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -3
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -3
- airflow/providers/google/cloud/hooks/video_intelligence.py +2 -3
- airflow/providers/google/cloud/hooks/vision.py +3 -4
- airflow/providers/google/cloud/hooks/workflows.py +2 -3
- airflow/providers/google/cloud/links/alloy_db.py +46 -0
- airflow/providers/google/cloud/links/bigquery.py +25 -0
- airflow/providers/google/cloud/links/dataplex.py +172 -2
- airflow/providers/google/cloud/links/kubernetes_engine.py +1 -2
- airflow/providers/google/cloud/links/managed_kafka.py +104 -0
- airflow/providers/google/cloud/links/translate.py +28 -0
- airflow/providers/google/cloud/log/gcs_task_handler.py +3 -3
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -10
- airflow/providers/google/cloud/openlineage/facets.py +67 -0
- airflow/providers/google/cloud/openlineage/mixins.py +438 -173
- airflow/providers/google/cloud/openlineage/utils.py +394 -61
- airflow/providers/google/cloud/operators/alloy_db.py +980 -69
- airflow/providers/google/cloud/operators/automl.py +83 -245
- airflow/providers/google/cloud/operators/bigquery.py +377 -74
- airflow/providers/google/cloud/operators/bigquery_dts.py +126 -13
- airflow/providers/google/cloud/operators/bigtable.py +1 -3
- airflow/providers/google/cloud/operators/cloud_base.py +1 -2
- airflow/providers/google/cloud/operators/cloud_batch.py +2 -4
- airflow/providers/google/cloud/operators/cloud_build.py +3 -5
- airflow/providers/google/cloud/operators/cloud_composer.py +5 -7
- airflow/providers/google/cloud/operators/cloud_memorystore.py +4 -6
- airflow/providers/google/cloud/operators/cloud_run.py +6 -5
- airflow/providers/google/cloud/operators/cloud_sql.py +20 -8
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +62 -8
- airflow/providers/google/cloud/operators/compute.py +3 -4
- airflow/providers/google/cloud/operators/datacatalog.py +9 -11
- airflow/providers/google/cloud/operators/dataflow.py +1 -112
- airflow/providers/google/cloud/operators/dataform.py +3 -5
- airflow/providers/google/cloud/operators/datafusion.py +1 -1
- airflow/providers/google/cloud/operators/dataplex.py +2046 -7
- airflow/providers/google/cloud/operators/dataproc.py +102 -17
- airflow/providers/google/cloud/operators/dataproc_metastore.py +7 -9
- airflow/providers/google/cloud/operators/dlp.py +17 -19
- airflow/providers/google/cloud/operators/gcs.py +14 -17
- airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/operators/managed_kafka.py +788 -0
- airflow/providers/google/cloud/operators/natural_language.py +3 -5
- airflow/providers/google/cloud/operators/pubsub.py +39 -7
- airflow/providers/google/cloud/operators/speech_to_text.py +3 -5
- airflow/providers/google/cloud/operators/stackdriver.py +3 -5
- airflow/providers/google/cloud/operators/tasks.py +4 -6
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -4
- airflow/providers/google/cloud/operators/translate.py +414 -5
- airflow/providers/google/cloud/operators/translate_speech.py +2 -4
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +9 -8
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +6 -8
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +0 -322
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +4 -6
- airflow/providers/google/cloud/operators/video_intelligence.py +3 -5
- airflow/providers/google/cloud/operators/vision.py +4 -6
- airflow/providers/google/cloud/operators/workflows.py +5 -7
- airflow/providers/google/cloud/secrets/secret_manager.py +1 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +3 -5
- airflow/providers/google/cloud/sensors/bigtable.py +2 -3
- airflow/providers/google/cloud/sensors/cloud_composer.py +32 -8
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +39 -1
- airflow/providers/google/cloud/sensors/dataplex.py +4 -6
- airflow/providers/google/cloud/sensors/dataproc.py +2 -3
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -2
- airflow/providers/google/cloud/sensors/gcs.py +2 -4
- airflow/providers/google/cloud/sensors/pubsub.py +2 -3
- airflow/providers/google/cloud/sensors/workflows.py +3 -5
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +5 -5
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +10 -12
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +36 -4
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +27 -2
- airflow/providers/google/cloud/transfers/mysql_to_gcs.py +27 -2
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +27 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +34 -5
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +15 -0
- airflow/providers/google/cloud/transfers/trino_to_gcs.py +25 -2
- airflow/providers/google/cloud/triggers/bigquery_dts.py +1 -2
- airflow/providers/google/cloud/triggers/cloud_batch.py +1 -2
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -2
- airflow/providers/google/cloud/triggers/cloud_composer.py +13 -3
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +102 -4
- airflow/providers/google/cloud/triggers/dataflow.py +2 -3
- airflow/providers/google/cloud/triggers/dataplex.py +1 -2
- airflow/providers/google/cloud/triggers/dataproc.py +2 -3
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +1 -2
- airflow/providers/google/cloud/triggers/vertex_ai.py +7 -8
- airflow/providers/google/cloud/utils/credentials_provider.py +15 -8
- airflow/providers/google/cloud/utils/external_token_supplier.py +1 -0
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/consts.py +1 -2
- airflow/providers/google/common/hooks/base_google.py +8 -7
- airflow/providers/google/get_provider_info.py +186 -134
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -3
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +5 -7
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0.dist-info}/METADATA +41 -58
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0.dist-info}/RECORD +157 -159
- airflow/providers/google/cloud/example_dags/example_facebook_ads_to_gcs.py +0 -141
- airflow/providers/google/cloud/example_dags/example_looker.py +0 -64
- airflow/providers/google/cloud/example_dags/example_presto_to_gcs.py +0 -194
- airflow/providers/google/cloud/example_dags/example_salesforce_to_gcs.py +0 -129
- airflow/providers/google/marketing_platform/example_dags/__init__.py +0 -16
- airflow/providers/google/marketing_platform/example_dags/example_display_video.py +0 -213
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0.dist-info}/entry_points.txt +0 -0
@@ -30,11 +30,6 @@ from enum import Enum
|
|
30
30
|
from functools import cached_property
|
31
31
|
from typing import TYPE_CHECKING, Any
|
32
32
|
|
33
|
-
from google.api_core.exceptions import AlreadyExists, NotFound
|
34
|
-
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
35
|
-
from google.api_core.retry import Retry, exponential_sleep_generator
|
36
|
-
from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
|
37
|
-
|
38
33
|
from airflow.configuration import conf
|
39
34
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
40
35
|
from airflow.providers.google.cloud.hooks.dataproc import (
|
@@ -54,9 +49,6 @@ from airflow.providers.google.cloud.links.dataproc import (
|
|
54
49
|
DataprocWorkflowLink,
|
55
50
|
DataprocWorkflowTemplateLink,
|
56
51
|
)
|
57
|
-
from airflow.providers.google.cloud.openlineage.utils import (
|
58
|
-
inject_openlineage_properties_into_dataproc_job,
|
59
|
-
)
|
60
52
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
61
53
|
from airflow.providers.google.cloud.triggers.dataproc import (
|
62
54
|
DataprocBatchTrigger,
|
@@ -69,16 +61,19 @@ from airflow.providers.google.cloud.utils.dataproc import DataprocOperationType
|
|
69
61
|
from airflow.providers.google.common.deprecated import deprecated
|
70
62
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
71
63
|
from airflow.utils import timezone
|
64
|
+
from google.api_core.exceptions import AlreadyExists, NotFound
|
65
|
+
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
66
|
+
from google.api_core.retry import Retry, exponential_sleep_generator
|
67
|
+
from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
|
72
68
|
|
73
69
|
if TYPE_CHECKING:
|
70
|
+
from airflow.utils.context import Context
|
74
71
|
from google.api_core import operation
|
75
72
|
from google.api_core.retry_async import AsyncRetry
|
76
73
|
from google.protobuf.duration_pb2 import Duration
|
77
74
|
from google.protobuf.field_mask_pb2 import FieldMask
|
78
75
|
from google.type.interval_pb2 import Interval
|
79
76
|
|
80
|
-
from airflow.utils.context import Context
|
81
|
-
|
82
77
|
|
83
78
|
class PreemptibilityType(Enum):
|
84
79
|
"""Contains possible Type values of Preemptibility applicable for every secondary worker of Cluster."""
|
@@ -1824,6 +1819,12 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
1824
1819
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
1825
1820
|
polling_interval_seconds: int = 10,
|
1826
1821
|
cancel_on_kill: bool = True,
|
1822
|
+
openlineage_inject_parent_job_info: bool = conf.getboolean(
|
1823
|
+
"openlineage", "spark_inject_parent_job_info", fallback=False
|
1824
|
+
),
|
1825
|
+
openlineage_inject_transport_info: bool = conf.getboolean(
|
1826
|
+
"openlineage", "spark_inject_transport_info", fallback=False
|
1827
|
+
),
|
1827
1828
|
**kwargs,
|
1828
1829
|
) -> None:
|
1829
1830
|
super().__init__(**kwargs)
|
@@ -1843,11 +1844,17 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
1843
1844
|
self.polling_interval_seconds = polling_interval_seconds
|
1844
1845
|
self.cancel_on_kill = cancel_on_kill
|
1845
1846
|
self.operation_name: str | None = None
|
1847
|
+
self.openlineage_inject_parent_job_info = openlineage_inject_parent_job_info
|
1848
|
+
self.openlineage_inject_transport_info = openlineage_inject_transport_info
|
1846
1849
|
|
1847
1850
|
def execute(self, context: Context):
|
1848
1851
|
self.log.info("Instantiating Inline Template")
|
1849
1852
|
hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
1850
1853
|
project_id = self.project_id or hook.project_id
|
1854
|
+
if self.openlineage_inject_parent_job_info or self.openlineage_inject_transport_info:
|
1855
|
+
self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
|
1856
|
+
self._inject_openlineage_properties_into_dataproc_workflow_template(context)
|
1857
|
+
|
1851
1858
|
operation = hook.instantiate_inline_workflow_template(
|
1852
1859
|
template=self.template,
|
1853
1860
|
project_id=project_id,
|
@@ -1903,6 +1910,25 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
1903
1910
|
hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
1904
1911
|
hook.get_operations_client(region=self.region).cancel_operation(name=self.operation_name)
|
1905
1912
|
|
1913
|
+
def _inject_openlineage_properties_into_dataproc_workflow_template(self, context: Context) -> None:
|
1914
|
+
try:
|
1915
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
1916
|
+
inject_openlineage_properties_into_dataproc_workflow_template,
|
1917
|
+
)
|
1918
|
+
|
1919
|
+
self.template = inject_openlineage_properties_into_dataproc_workflow_template(
|
1920
|
+
template=self.template,
|
1921
|
+
context=context,
|
1922
|
+
inject_parent_job_info=self.openlineage_inject_parent_job_info,
|
1923
|
+
inject_transport_info=self.openlineage_inject_transport_info,
|
1924
|
+
)
|
1925
|
+
except Exception as e:
|
1926
|
+
self.log.warning(
|
1927
|
+
"An error occurred while trying to inject OpenLineage information. "
|
1928
|
+
"Dataproc template has not been modified by OpenLineage.",
|
1929
|
+
exc_info=e,
|
1930
|
+
)
|
1931
|
+
|
1906
1932
|
|
1907
1933
|
class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
1908
1934
|
"""
|
@@ -1968,6 +1994,9 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
1968
1994
|
openlineage_inject_parent_job_info: bool = conf.getboolean(
|
1969
1995
|
"openlineage", "spark_inject_parent_job_info", fallback=False
|
1970
1996
|
),
|
1997
|
+
openlineage_inject_transport_info: bool = conf.getboolean(
|
1998
|
+
"openlineage", "spark_inject_transport_info", fallback=False
|
1999
|
+
),
|
1971
2000
|
**kwargs,
|
1972
2001
|
) -> None:
|
1973
2002
|
super().__init__(**kwargs)
|
@@ -1990,15 +2019,15 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
1990
2019
|
self.job_id: str | None = None
|
1991
2020
|
self.wait_timeout = wait_timeout
|
1992
2021
|
self.openlineage_inject_parent_job_info = openlineage_inject_parent_job_info
|
2022
|
+
self.openlineage_inject_transport_info = openlineage_inject_transport_info
|
1993
2023
|
|
1994
2024
|
def execute(self, context: Context):
|
1995
2025
|
self.log.info("Submitting job")
|
1996
2026
|
self.hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
1997
|
-
if self.openlineage_inject_parent_job_info:
|
2027
|
+
if self.openlineage_inject_parent_job_info or self.openlineage_inject_transport_info:
|
1998
2028
|
self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
|
1999
|
-
self.
|
2000
|
-
|
2001
|
-
)
|
2029
|
+
self._inject_openlineage_properties_into_dataproc_job(context)
|
2030
|
+
|
2002
2031
|
job_object = self.hook.submit_job(
|
2003
2032
|
project_id=self.project_id,
|
2004
2033
|
region=self.region,
|
@@ -2072,6 +2101,25 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
2072
2101
|
if self.job_id and self.cancel_on_kill:
|
2073
2102
|
self.hook.cancel_job(job_id=self.job_id, project_id=self.project_id, region=self.region)
|
2074
2103
|
|
2104
|
+
def _inject_openlineage_properties_into_dataproc_job(self, context: Context) -> None:
|
2105
|
+
try:
|
2106
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
2107
|
+
inject_openlineage_properties_into_dataproc_job,
|
2108
|
+
)
|
2109
|
+
|
2110
|
+
self.job = inject_openlineage_properties_into_dataproc_job(
|
2111
|
+
job=self.job,
|
2112
|
+
context=context,
|
2113
|
+
inject_parent_job_info=self.openlineage_inject_parent_job_info,
|
2114
|
+
inject_transport_info=self.openlineage_inject_transport_info,
|
2115
|
+
)
|
2116
|
+
except Exception as e:
|
2117
|
+
self.log.warning(
|
2118
|
+
"An error occurred while trying to inject OpenLineage information. "
|
2119
|
+
"Dataproc job has not been modified by OpenLineage.",
|
2120
|
+
exc_info=e,
|
2121
|
+
)
|
2122
|
+
|
2075
2123
|
|
2076
2124
|
class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
|
2077
2125
|
"""
|
@@ -2425,6 +2473,12 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
2425
2473
|
asynchronous: bool = False,
|
2426
2474
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
2427
2475
|
polling_interval_seconds: int = 5,
|
2476
|
+
openlineage_inject_parent_job_info: bool = conf.getboolean(
|
2477
|
+
"openlineage", "spark_inject_parent_job_info", fallback=False
|
2478
|
+
),
|
2479
|
+
openlineage_inject_transport_info: bool = conf.getboolean(
|
2480
|
+
"openlineage", "spark_inject_transport_info", fallback=False
|
2481
|
+
),
|
2428
2482
|
**kwargs,
|
2429
2483
|
):
|
2430
2484
|
super().__init__(**kwargs)
|
@@ -2446,6 +2500,8 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
2446
2500
|
self.asynchronous = asynchronous
|
2447
2501
|
self.deferrable = deferrable
|
2448
2502
|
self.polling_interval_seconds = polling_interval_seconds
|
2503
|
+
self.openlineage_inject_parent_job_info = openlineage_inject_parent_job_info
|
2504
|
+
self.openlineage_inject_transport_info = openlineage_inject_transport_info
|
2449
2505
|
|
2450
2506
|
def execute(self, context: Context):
|
2451
2507
|
if self.asynchronous and self.deferrable:
|
@@ -2468,6 +2524,10 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
2468
2524
|
else:
|
2469
2525
|
self.log.info("Starting batch. The batch ID will be generated since it was not provided.")
|
2470
2526
|
|
2527
|
+
if self.openlineage_inject_parent_job_info or self.openlineage_inject_transport_info:
|
2528
|
+
self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
|
2529
|
+
self._inject_openlineage_properties_into_dataproc_batch(context)
|
2530
|
+
|
2471
2531
|
try:
|
2472
2532
|
self.operation = self.hook.create_batch(
|
2473
2533
|
region=self.region,
|
@@ -2483,7 +2543,10 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
2483
2543
|
self.log.info("Batch with given id already exists.")
|
2484
2544
|
self.log.info("Attaching to the job %s if it is still running.", batch_id)
|
2485
2545
|
else:
|
2486
|
-
|
2546
|
+
if self.operation and self.operation.metadata:
|
2547
|
+
batch_id = self.operation.metadata.batch.split("/")[-1]
|
2548
|
+
else:
|
2549
|
+
raise AirflowException("Operation metadata is not available.")
|
2487
2550
|
self.log.info("The batch %s was created.", batch_id)
|
2488
2551
|
|
2489
2552
|
DataprocBatchLink.persist(
|
@@ -2609,8 +2672,11 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
2609
2672
|
self.log.info("Batch with given id already exists.")
|
2610
2673
|
self.log.info("Attaching to the job %s if it is still running.", self.batch_id)
|
2611
2674
|
else:
|
2612
|
-
|
2613
|
-
|
2675
|
+
if self.operation and self.operation.metadata:
|
2676
|
+
batch_id = self.operation.metadata.batch.split("/")[-1]
|
2677
|
+
self.log.info("The batch %s was created.", batch_id)
|
2678
|
+
else:
|
2679
|
+
raise AirflowException("Operation metadata is not available.")
|
2614
2680
|
|
2615
2681
|
self.log.info("Waiting for the completion of batch job %s", batch_id)
|
2616
2682
|
batch = self.hook.wait_for_batch(
|
@@ -2623,6 +2689,25 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
2623
2689
|
)
|
2624
2690
|
return batch, batch_id
|
2625
2691
|
|
2692
|
+
def _inject_openlineage_properties_into_dataproc_batch(self, context: Context) -> None:
|
2693
|
+
try:
|
2694
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
2695
|
+
inject_openlineage_properties_into_dataproc_batch,
|
2696
|
+
)
|
2697
|
+
|
2698
|
+
self.batch = inject_openlineage_properties_into_dataproc_batch(
|
2699
|
+
batch=self.batch,
|
2700
|
+
context=context,
|
2701
|
+
inject_parent_job_info=self.openlineage_inject_parent_job_info,
|
2702
|
+
inject_transport_info=self.openlineage_inject_transport_info,
|
2703
|
+
)
|
2704
|
+
except Exception as e:
|
2705
|
+
self.log.warning(
|
2706
|
+
"An error occurred while trying to inject OpenLineage information. "
|
2707
|
+
"Dataproc batch has not been modified by OpenLineage.",
|
2708
|
+
exc_info=e,
|
2709
|
+
)
|
2710
|
+
|
2626
2711
|
|
2627
2712
|
class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
|
2628
2713
|
"""
|
@@ -23,25 +23,23 @@ import time
|
|
23
23
|
from collections.abc import Sequence
|
24
24
|
from typing import TYPE_CHECKING
|
25
25
|
|
26
|
-
from google.api_core.exceptions import AlreadyExists
|
27
|
-
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
28
|
-
from google.api_core.retry import Retry, exponential_sleep_generator
|
29
|
-
from google.cloud.metastore_v1 import MetadataExport, MetadataManagementActivity
|
30
|
-
from google.cloud.metastore_v1.types import Backup, MetadataImport, Service
|
31
|
-
from google.cloud.metastore_v1.types.metastore import DatabaseDumpSpec, Restore
|
32
|
-
|
33
26
|
from airflow.exceptions import AirflowException
|
34
27
|
from airflow.models import BaseOperator, BaseOperatorLink
|
35
28
|
from airflow.models.xcom import XCom
|
36
29
|
from airflow.providers.google.cloud.hooks.dataproc_metastore import DataprocMetastoreHook
|
37
30
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
38
31
|
from airflow.providers.google.common.links.storage import StorageLink
|
32
|
+
from google.api_core.exceptions import AlreadyExists
|
33
|
+
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
34
|
+
from google.api_core.retry import Retry, exponential_sleep_generator
|
35
|
+
from google.cloud.metastore_v1 import MetadataExport, MetadataManagementActivity
|
36
|
+
from google.cloud.metastore_v1.types import Backup, MetadataImport, Service
|
37
|
+
from google.cloud.metastore_v1.types.metastore import DatabaseDumpSpec, Restore
|
39
38
|
|
40
39
|
if TYPE_CHECKING:
|
41
|
-
from google.protobuf.field_mask_pb2 import FieldMask
|
42
|
-
|
43
40
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
44
41
|
from airflow.utils.context import Context
|
42
|
+
from google.protobuf.field_mask_pb2 import FieldMask
|
45
43
|
|
46
44
|
|
47
45
|
BASE_LINK = "https://console.cloud.google.com"
|
@@ -22,6 +22,22 @@ from __future__ import annotations
|
|
22
22
|
from collections.abc import Sequence
|
23
23
|
from typing import TYPE_CHECKING
|
24
24
|
|
25
|
+
from airflow.providers.google.cloud.hooks.dlp import CloudDLPHook
|
26
|
+
from airflow.providers.google.cloud.links.data_loss_prevention import (
|
27
|
+
CloudDLPDeidentifyTemplateDetailsLink,
|
28
|
+
CloudDLPDeidentifyTemplatesListLink,
|
29
|
+
CloudDLPInfoTypeDetailsLink,
|
30
|
+
CloudDLPInfoTypesListLink,
|
31
|
+
CloudDLPInspectTemplateDetailsLink,
|
32
|
+
CloudDLPInspectTemplatesListLink,
|
33
|
+
CloudDLPJobDetailsLink,
|
34
|
+
CloudDLPJobsListLink,
|
35
|
+
CloudDLPJobTriggerDetailsLink,
|
36
|
+
CloudDLPJobTriggersListLink,
|
37
|
+
CloudDLPPossibleInfoTypesListLink,
|
38
|
+
)
|
39
|
+
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
40
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
25
41
|
from google.api_core.exceptions import AlreadyExists, InvalidArgument, NotFound
|
26
42
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
27
43
|
from google.cloud.dlp_v2.types import (
|
@@ -45,29 +61,11 @@ from google.cloud.dlp_v2.types import (
|
|
45
61
|
StoredInfoTypeConfig,
|
46
62
|
)
|
47
63
|
|
48
|
-
from airflow.providers.google.cloud.hooks.dlp import CloudDLPHook
|
49
|
-
from airflow.providers.google.cloud.links.data_loss_prevention import (
|
50
|
-
CloudDLPDeidentifyTemplateDetailsLink,
|
51
|
-
CloudDLPDeidentifyTemplatesListLink,
|
52
|
-
CloudDLPInfoTypeDetailsLink,
|
53
|
-
CloudDLPInfoTypesListLink,
|
54
|
-
CloudDLPInspectTemplateDetailsLink,
|
55
|
-
CloudDLPInspectTemplatesListLink,
|
56
|
-
CloudDLPJobDetailsLink,
|
57
|
-
CloudDLPJobsListLink,
|
58
|
-
CloudDLPJobTriggerDetailsLink,
|
59
|
-
CloudDLPJobTriggersListLink,
|
60
|
-
CloudDLPPossibleInfoTypesListLink,
|
61
|
-
)
|
62
|
-
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
63
|
-
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
64
|
-
|
65
64
|
if TYPE_CHECKING:
|
65
|
+
from airflow.utils.context import Context
|
66
66
|
from google.api_core.retry import Retry
|
67
67
|
from google.protobuf.field_mask_pb2 import FieldMask
|
68
68
|
|
69
|
-
from airflow.utils.context import Context
|
70
|
-
|
71
69
|
|
72
70
|
class CloudDLPCancelDLPJobOperator(GoogleCloudBaseOperator):
|
73
71
|
"""
|
@@ -35,14 +35,13 @@ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
35
35
|
if TYPE_CHECKING:
|
36
36
|
from airflow.utils.context import Context
|
37
37
|
|
38
|
-
from google.api_core.exceptions import Conflict
|
39
|
-
from google.cloud.exceptions import GoogleCloudError
|
40
|
-
|
41
38
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
42
39
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
43
40
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
44
41
|
from airflow.providers.google.common.links.storage import FileDetailsLink, StorageLink
|
45
42
|
from airflow.utils import timezone
|
43
|
+
from google.api_core.exceptions import Conflict
|
44
|
+
from google.cloud.exceptions import GoogleCloudError
|
46
45
|
|
47
46
|
|
48
47
|
class GCSCreateBucketOperator(GoogleCloudBaseOperator):
|
@@ -787,22 +786,20 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
|
|
787
786
|
|
788
787
|
def execute(self, context: Context) -> list[str]:
|
789
788
|
# Define intervals and prefixes.
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
789
|
+
orig_start = context["data_interval_start"]
|
790
|
+
orig_end = context["data_interval_end"]
|
791
|
+
|
792
|
+
if orig_start is None or orig_end is None:
|
793
|
+
raise RuntimeError("`data_interval_start` & `data_interval_end` must not be None")
|
794
|
+
|
795
|
+
if not isinstance(orig_start, pendulum.DateTime):
|
796
|
+
orig_start = pendulum.instance(orig_start)
|
797
|
+
|
798
|
+
if not isinstance(orig_end, pendulum.DateTime):
|
799
|
+
orig_end = pendulum.instance(orig_end)
|
800
800
|
|
801
801
|
timespan_start = orig_start
|
802
|
-
if
|
803
|
-
self.log.warning("No following schedule found, setting timespan end to max %s", orig_end)
|
804
|
-
timespan_end = pendulum.instance(datetime.datetime.max)
|
805
|
-
elif orig_start >= orig_end: # Airflow 2.2 sets start == end for non-perodic schedules.
|
802
|
+
if orig_start >= orig_end: # Airflow 2.2 sets start == end for non-perodic schedules.
|
806
803
|
self.log.warning("DAG schedule not periodic, setting timespan end to max %s", orig_end)
|
807
804
|
timespan_end = pendulum.instance(datetime.datetime.max)
|
808
805
|
else:
|
@@ -24,7 +24,6 @@ from collections.abc import Sequence
|
|
24
24
|
from functools import cached_property
|
25
25
|
from typing import TYPE_CHECKING, Any
|
26
26
|
|
27
|
-
from google.api_core.exceptions import AlreadyExists
|
28
27
|
from kubernetes.client import V1JobList, models as k8s
|
29
28
|
from packaging.version import parse as parse_version
|
30
29
|
|
@@ -61,6 +60,7 @@ from airflow.providers.google.common.deprecated import deprecated
|
|
61
60
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
62
61
|
from airflow.providers_manager import ProvidersManager
|
63
62
|
from airflow.utils.timezone import utcnow
|
63
|
+
from google.api_core.exceptions import AlreadyExists
|
64
64
|
|
65
65
|
try:
|
66
66
|
from airflow.providers.cncf.kubernetes.operators.job import KubernetesDeleteJobOperator
|
@@ -73,11 +73,11 @@ except ImportError:
|
|
73
73
|
)
|
74
74
|
|
75
75
|
if TYPE_CHECKING:
|
76
|
-
from google.cloud.container_v1.types import Cluster
|
77
76
|
from kubernetes.client.models import V1Job
|
78
77
|
from pendulum import DateTime
|
79
78
|
|
80
79
|
from airflow.utils.context import Context
|
80
|
+
from google.cloud.container_v1.types import Cluster
|
81
81
|
|
82
82
|
KUBE_CONFIG_ENV_VAR = "KUBECONFIG"
|
83
83
|
|