apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/ads/hooks/ads.py +39 -6
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/alloy_db.py +1 -1
- airflow/providers/google/cloud/hooks/bigquery.py +176 -293
- airflow/providers/google/cloud/hooks/cloud_batch.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_build.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_composer.py +288 -15
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_run.py +18 -10
- airflow/providers/google/cloud/hooks/cloud_sql.py +102 -23
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +29 -7
- airflow/providers/google/cloud/hooks/compute.py +1 -1
- airflow/providers/google/cloud/hooks/compute_ssh.py +6 -2
- airflow/providers/google/cloud/hooks/datacatalog.py +10 -1
- airflow/providers/google/cloud/hooks/dataflow.py +72 -95
- airflow/providers/google/cloud/hooks/dataform.py +1 -1
- airflow/providers/google/cloud/hooks/datafusion.py +21 -19
- airflow/providers/google/cloud/hooks/dataplex.py +2 -2
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +73 -72
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +1 -1
- airflow/providers/google/cloud/hooks/dlp.py +1 -1
- airflow/providers/google/cloud/hooks/functions.py +1 -1
- airflow/providers/google/cloud/hooks/gcs.py +112 -15
- airflow/providers/google/cloud/hooks/gdm.py +1 -1
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +3 -3
- airflow/providers/google/cloud/hooks/looker.py +6 -2
- airflow/providers/google/cloud/hooks/managed_kafka.py +1 -1
- airflow/providers/google/cloud/hooks/mlengine.py +4 -3
- airflow/providers/google/cloud/hooks/pubsub.py +3 -0
- airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
- airflow/providers/google/cloud/hooks/spanner.py +74 -9
- airflow/providers/google/cloud/hooks/stackdriver.py +11 -9
- airflow/providers/google/cloud/hooks/tasks.py +1 -1
- airflow/providers/google/cloud/hooks/translate.py +2 -2
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +2 -210
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -3
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +28 -2
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +308 -8
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/vision.py +3 -3
- airflow/providers/google/cloud/hooks/workflows.py +1 -1
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -13
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -96
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -95
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
- airflow/providers/google/cloud/links/managed_kafka.py +0 -70
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +58 -22
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +10 -8
- airflow/providers/google/cloud/openlineage/utils.py +15 -1
- airflow/providers/google/cloud/operators/alloy_db.py +71 -56
- airflow/providers/google/cloud/operators/bigquery.py +73 -636
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -6
- airflow/providers/google/cloud/operators/bigtable.py +37 -8
- airflow/providers/google/cloud/operators/cloud_base.py +21 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +3 -3
- airflow/providers/google/cloud/operators/cloud_build.py +76 -33
- airflow/providers/google/cloud/operators/cloud_composer.py +129 -41
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
- airflow/providers/google/cloud/operators/cloud_run.py +24 -6
- airflow/providers/google/cloud/operators/cloud_sql.py +8 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +93 -12
- airflow/providers/google/cloud/operators/compute.py +9 -41
- airflow/providers/google/cloud/operators/datacatalog.py +157 -21
- airflow/providers/google/cloud/operators/dataflow.py +40 -16
- airflow/providers/google/cloud/operators/dataform.py +15 -5
- airflow/providers/google/cloud/operators/datafusion.py +42 -21
- airflow/providers/google/cloud/operators/dataplex.py +194 -110
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +80 -36
- airflow/providers/google/cloud/operators/dataproc_metastore.py +97 -89
- airflow/providers/google/cloud/operators/datastore.py +23 -7
- airflow/providers/google/cloud/operators/dlp.py +6 -29
- airflow/providers/google/cloud/operators/functions.py +17 -8
- airflow/providers/google/cloud/operators/gcs.py +12 -9
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +62 -100
- airflow/providers/google/cloud/operators/looker.py +2 -2
- airflow/providers/google/cloud/operators/managed_kafka.py +108 -53
- airflow/providers/google/cloud/operators/natural_language.py +1 -1
- airflow/providers/google/cloud/operators/pubsub.py +68 -15
- airflow/providers/google/cloud/operators/spanner.py +26 -13
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -3
- airflow/providers/google/cloud/operators/stackdriver.py +1 -9
- airflow/providers/google/cloud/operators/tasks.py +1 -12
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -3
- airflow/providers/google/cloud/operators/translate.py +41 -17
- airflow/providers/google/cloud/operators/translate_speech.py +2 -3
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +30 -10
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +55 -27
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -115
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +12 -10
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +31 -8
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
- airflow/providers/google/cloud/operators/vision.py +2 -2
- airflow/providers/google/cloud/operators/workflows.py +18 -15
- airflow/providers/google/cloud/secrets/secret_manager.py +3 -2
- airflow/providers/google/cloud/sensors/bigquery.py +3 -3
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -3
- airflow/providers/google/cloud/sensors/bigtable.py +11 -4
- airflow/providers/google/cloud/sensors/cloud_composer.py +533 -30
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -3
- airflow/providers/google/cloud/sensors/dataflow.py +26 -10
- airflow/providers/google/cloud/sensors/dataform.py +2 -3
- airflow/providers/google/cloud/sensors/datafusion.py +4 -5
- airflow/providers/google/cloud/sensors/dataplex.py +2 -3
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +2 -3
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -3
- airflow/providers/google/cloud/sensors/gcs.py +4 -5
- airflow/providers/google/cloud/sensors/looker.py +2 -3
- airflow/providers/google/cloud/sensors/pubsub.py +4 -5
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -3
- airflow/providers/google/cloud/sensors/workflows.py +2 -3
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +4 -3
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +10 -5
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +21 -13
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +4 -3
- airflow/providers/google/cloud/transfers/gcs_to_local.py +6 -4
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +11 -5
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +13 -7
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +14 -5
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +76 -35
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
- airflow/providers/google/cloud/triggers/cloud_run.py +3 -3
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +92 -2
- airflow/providers/google/cloud/triggers/dataflow.py +122 -0
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +14 -2
- airflow/providers/google/cloud/triggers/dataproc.py +123 -53
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +47 -28
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +15 -19
- airflow/providers/google/cloud/triggers/vertex_ai.py +1 -1
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +2 -2
- airflow/providers/google/cloud/utils/field_sanitizer.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +2 -3
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +27 -9
- airflow/providers/google/common/hooks/operation_helpers.py +1 -1
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +3 -4
- airflow/providers/google/firebase/hooks/firestore.py +1 -1
- airflow/providers/google/firebase/operators/firestore.py +3 -3
- airflow/providers/google/get_provider_info.py +56 -52
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +27 -2
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/campaign_manager.py +1 -1
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -3
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +6 -6
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +3 -64
- airflow/providers/google/suite/hooks/calendar.py +2 -2
- airflow/providers/google/suite/hooks/sheets.py +16 -2
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +3 -3
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/METADATA +90 -46
- apache_airflow_providers_google-19.3.0.dist-info/RECORD +331 -0
- apache_airflow_providers_google-19.3.0.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/hooks/automl.py +0 -673
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1362
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -112
- apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.3.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -20,24 +20,33 @@
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
22
|
import json
|
|
23
|
-
from collections.abc import Iterable, Sequence
|
|
23
|
+
from collections.abc import Collection, Iterable, Sequence
|
|
24
24
|
from datetime import datetime, timedelta
|
|
25
25
|
from functools import cached_property
|
|
26
26
|
from typing import TYPE_CHECKING
|
|
27
27
|
|
|
28
28
|
from dateutil import parser
|
|
29
|
+
from google.api_core.exceptions import NotFound
|
|
29
30
|
from google.cloud.orchestration.airflow.service_v1.types import Environment, ExecuteAirflowCommandResponse
|
|
30
31
|
|
|
31
32
|
from airflow.configuration import conf
|
|
32
|
-
from airflow.
|
|
33
|
+
from airflow.providers.common.compat.sdk import AirflowException, AirflowSkipException, BaseSensorOperator
|
|
33
34
|
from airflow.providers.google.cloud.hooks.cloud_composer import CloudComposerHook
|
|
34
|
-
from airflow.providers.google.cloud.triggers.cloud_composer import
|
|
35
|
+
from airflow.providers.google.cloud.triggers.cloud_composer import (
|
|
36
|
+
CloudComposerDAGRunTrigger,
|
|
37
|
+
CloudComposerExternalTaskTrigger,
|
|
38
|
+
)
|
|
35
39
|
from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME
|
|
36
|
-
from airflow.
|
|
37
|
-
|
|
40
|
+
from airflow.providers.standard.exceptions import (
|
|
41
|
+
DuplicateStateError,
|
|
42
|
+
ExternalDagFailedError,
|
|
43
|
+
ExternalTaskFailedError,
|
|
44
|
+
ExternalTaskGroupFailedError,
|
|
45
|
+
)
|
|
46
|
+
from airflow.utils.state import State, TaskInstanceState
|
|
38
47
|
|
|
39
48
|
if TYPE_CHECKING:
|
|
40
|
-
from airflow.
|
|
49
|
+
from airflow.providers.common.compat.sdk import Context
|
|
41
50
|
|
|
42
51
|
|
|
43
52
|
class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
@@ -56,6 +65,7 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
|
56
65
|
Or [datetime(2024,3,22,0,0,0)] in this case sensor will check for states from specific time in the
|
|
57
66
|
past till current time execution.
|
|
58
67
|
Default value datetime.timedelta(days=1).
|
|
68
|
+
:param composer_dag_run_id: The Run ID of executable task. The 'execution_range' param is ignored, if both specified.
|
|
59
69
|
:param gcp_conn_id: The connection ID to use when fetching connection info.
|
|
60
70
|
:param impersonation_chain: Optional service account to impersonate using short-term
|
|
61
71
|
credentials, or chained list of accounts required to get the access_token
|
|
@@ -86,10 +96,12 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
|
86
96
|
composer_dag_id: str,
|
|
87
97
|
allowed_states: Iterable[str] | None = None,
|
|
88
98
|
execution_range: timedelta | list[datetime] | None = None,
|
|
99
|
+
composer_dag_run_id: str | None = None,
|
|
89
100
|
gcp_conn_id: str = "google_cloud_default",
|
|
90
101
|
impersonation_chain: str | Sequence[str] | None = None,
|
|
91
102
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
|
92
103
|
poll_interval: int = 10,
|
|
104
|
+
use_rest_api: bool = False,
|
|
93
105
|
**kwargs,
|
|
94
106
|
) -> None:
|
|
95
107
|
super().__init__(**kwargs)
|
|
@@ -99,21 +111,35 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
|
99
111
|
self.composer_dag_id = composer_dag_id
|
|
100
112
|
self.allowed_states = list(allowed_states) if allowed_states else [TaskInstanceState.SUCCESS.value]
|
|
101
113
|
self.execution_range = execution_range
|
|
114
|
+
self.composer_dag_run_id = composer_dag_run_id
|
|
102
115
|
self.gcp_conn_id = gcp_conn_id
|
|
103
116
|
self.impersonation_chain = impersonation_chain
|
|
104
117
|
self.deferrable = deferrable
|
|
105
118
|
self.poll_interval = poll_interval
|
|
119
|
+
self.use_rest_api = use_rest_api
|
|
120
|
+
|
|
121
|
+
if self.composer_dag_run_id and self.execution_range:
|
|
122
|
+
self.log.warning(
|
|
123
|
+
"The composer_dag_run_id parameter and execution_range parameter do not work together. This run will ignore execution_range parameter and count only specified composer_dag_run_id parameter."
|
|
124
|
+
)
|
|
106
125
|
|
|
107
126
|
def _get_logical_dates(self, context) -> tuple[datetime, datetime]:
|
|
127
|
+
logical_date = context.get("logical_date", None)
|
|
128
|
+
if logical_date is None:
|
|
129
|
+
raise RuntimeError(
|
|
130
|
+
"logical_date is None. Please make sure the sensor is not used in an asset-triggered Dag. "
|
|
131
|
+
"CloudComposerDAGRunSensor was designed to be used in time-based scheduled Dags only, "
|
|
132
|
+
"and asset-triggered Dags do not have logical_date. "
|
|
133
|
+
)
|
|
108
134
|
if isinstance(self.execution_range, timedelta):
|
|
109
135
|
if self.execution_range < timedelta(0):
|
|
110
|
-
return
|
|
111
|
-
return
|
|
136
|
+
return logical_date, logical_date - self.execution_range
|
|
137
|
+
return logical_date - self.execution_range, logical_date
|
|
112
138
|
if isinstance(self.execution_range, list) and len(self.execution_range) > 0:
|
|
113
139
|
return self.execution_range[0], self.execution_range[1] if len(
|
|
114
140
|
self.execution_range
|
|
115
|
-
) > 1 else
|
|
116
|
-
return
|
|
141
|
+
) > 1 else logical_date
|
|
142
|
+
return logical_date - timedelta(1), logical_date
|
|
117
143
|
|
|
118
144
|
def poke(self, context: Context) -> bool:
|
|
119
145
|
start_date, end_date = self._get_logical_dates(context)
|
|
@@ -123,6 +149,20 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
|
123
149
|
|
|
124
150
|
dag_runs = self._pull_dag_runs()
|
|
125
151
|
|
|
152
|
+
if len(dag_runs) == 0:
|
|
153
|
+
self.log.info("Dag runs are empty. Sensor waits for dag runs...")
|
|
154
|
+
return False
|
|
155
|
+
|
|
156
|
+
if self.composer_dag_run_id:
|
|
157
|
+
self.log.info(
|
|
158
|
+
"Sensor waits for allowed states %s for specified RunID: %s",
|
|
159
|
+
self.allowed_states,
|
|
160
|
+
self.composer_dag_run_id,
|
|
161
|
+
)
|
|
162
|
+
composer_dag_run_id_status = self._check_composer_dag_run_id_states(
|
|
163
|
+
dag_runs=dag_runs,
|
|
164
|
+
)
|
|
165
|
+
return composer_dag_run_id_status
|
|
126
166
|
self.log.info("Sensor waits for allowed states: %s", self.allowed_states)
|
|
127
167
|
allowed_states_status = self._check_dag_runs_states(
|
|
128
168
|
dag_runs=dag_runs,
|
|
@@ -134,26 +174,51 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
|
134
174
|
|
|
135
175
|
def _pull_dag_runs(self) -> list[dict]:
|
|
136
176
|
"""Pull the list of dag runs."""
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
177
|
+
if self.use_rest_api:
|
|
178
|
+
try:
|
|
179
|
+
environment = self.hook.get_environment(
|
|
180
|
+
project_id=self.project_id,
|
|
181
|
+
region=self.region,
|
|
182
|
+
environment_id=self.environment_id,
|
|
183
|
+
timeout=self.timeout,
|
|
184
|
+
)
|
|
185
|
+
except NotFound as not_found_err:
|
|
186
|
+
self.log.info("The Composer environment %s does not exist.", self.environment_id)
|
|
187
|
+
raise AirflowException(not_found_err)
|
|
188
|
+
composer_airflow_uri = environment.config.airflow_uri
|
|
189
|
+
|
|
190
|
+
self.log.info(
|
|
191
|
+
"Pulling the DAG %s runs from the %s environment...",
|
|
192
|
+
self.composer_dag_id,
|
|
193
|
+
self.environment_id,
|
|
194
|
+
)
|
|
195
|
+
dag_runs_response = self.hook.get_dag_runs(
|
|
196
|
+
composer_airflow_uri=composer_airflow_uri,
|
|
197
|
+
composer_dag_id=self.composer_dag_id,
|
|
198
|
+
timeout=self.timeout,
|
|
199
|
+
)
|
|
200
|
+
dag_runs = dag_runs_response["dag_runs"]
|
|
201
|
+
else:
|
|
202
|
+
cmd_parameters = (
|
|
203
|
+
["-d", self.composer_dag_id, "-o", "json"]
|
|
204
|
+
if self._composer_airflow_version < 3
|
|
205
|
+
else [self.composer_dag_id, "-o", "json"]
|
|
206
|
+
)
|
|
207
|
+
dag_runs_cmd = self.hook.execute_airflow_command(
|
|
208
|
+
project_id=self.project_id,
|
|
209
|
+
region=self.region,
|
|
210
|
+
environment_id=self.environment_id,
|
|
211
|
+
command="dags",
|
|
212
|
+
subcommand="list-runs",
|
|
213
|
+
parameters=cmd_parameters,
|
|
214
|
+
)
|
|
215
|
+
cmd_result = self.hook.wait_command_execution_result(
|
|
216
|
+
project_id=self.project_id,
|
|
217
|
+
region=self.region,
|
|
218
|
+
environment_id=self.environment_id,
|
|
219
|
+
execution_cmd_info=ExecuteAirflowCommandResponse.to_dict(dag_runs_cmd),
|
|
220
|
+
)
|
|
221
|
+
dag_runs = json.loads(cmd_result["output"][0]["content"])
|
|
157
222
|
return dag_runs
|
|
158
223
|
|
|
159
224
|
def _check_dag_runs_states(
|
|
@@ -184,16 +249,27 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
|
184
249
|
image_version = environment_config["config"]["software_config"]["image_version"]
|
|
185
250
|
return int(image_version.split("airflow-")[1].split(".")[0])
|
|
186
251
|
|
|
252
|
+
def _check_composer_dag_run_id_states(self, dag_runs: list[dict]) -> bool:
|
|
253
|
+
for dag_run in dag_runs:
|
|
254
|
+
if (
|
|
255
|
+
dag_run["dag_run_id" if self.use_rest_api else "run_id"] == self.composer_dag_run_id
|
|
256
|
+
and dag_run["state"] in self.allowed_states
|
|
257
|
+
):
|
|
258
|
+
return True
|
|
259
|
+
return False
|
|
260
|
+
|
|
187
261
|
def execute(self, context: Context) -> None:
|
|
188
262
|
self._composer_airflow_version = self._get_composer_airflow_version()
|
|
189
263
|
if self.deferrable:
|
|
190
264
|
start_date, end_date = self._get_logical_dates(context)
|
|
191
265
|
self.defer(
|
|
266
|
+
timeout=timedelta(seconds=self.timeout) if self.timeout else None,
|
|
192
267
|
trigger=CloudComposerDAGRunTrigger(
|
|
193
268
|
project_id=self.project_id,
|
|
194
269
|
region=self.region,
|
|
195
270
|
environment_id=self.environment_id,
|
|
196
271
|
composer_dag_id=self.composer_dag_id,
|
|
272
|
+
composer_dag_run_id=self.composer_dag_run_id,
|
|
197
273
|
start_date=start_date,
|
|
198
274
|
end_date=end_date,
|
|
199
275
|
allowed_states=self.allowed_states,
|
|
@@ -201,6 +277,7 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
|
201
277
|
impersonation_chain=self.impersonation_chain,
|
|
202
278
|
poll_interval=self.poll_interval,
|
|
203
279
|
composer_airflow_version=self._composer_airflow_version,
|
|
280
|
+
use_rest_api=self.use_rest_api,
|
|
204
281
|
),
|
|
205
282
|
method_name=GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME,
|
|
206
283
|
)
|
|
@@ -217,3 +294,429 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
|
217
294
|
gcp_conn_id=self.gcp_conn_id,
|
|
218
295
|
impersonation_chain=self.impersonation_chain,
|
|
219
296
|
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class CloudComposerExternalTaskSensor(BaseSensorOperator):
|
|
300
|
+
"""
|
|
301
|
+
Waits for a different DAG, task group, or task to complete for a specific composer environment.
|
|
302
|
+
|
|
303
|
+
If both `composer_external_task_group_id` and `composer_external_task_id` are ``None`` (default), the sensor
|
|
304
|
+
waits for the DAG.
|
|
305
|
+
Values for `composer_external_task_group_id` and `composer_external_task_id` can't be set at the same time.
|
|
306
|
+
|
|
307
|
+
By default, the CloudComposerExternalTaskSensor will wait for the external task to
|
|
308
|
+
succeed, at which point it will also succeed. However, by default it will
|
|
309
|
+
*not* fail if the external task fails, but will continue to check the status
|
|
310
|
+
until the sensor times out (thus giving you time to retry the external task
|
|
311
|
+
without also having to clear the sensor).
|
|
312
|
+
|
|
313
|
+
By default, the CloudComposerExternalTaskSensor will not skip if the external task skips.
|
|
314
|
+
To change this, simply set ``skipped_states=[TaskInstanceState.SKIPPED]``.
|
|
315
|
+
Note that if you are monitoring multiple tasks, and one enters error state
|
|
316
|
+
and the other enters a skipped state, then the external task will react to
|
|
317
|
+
whichever one it sees first. If both happen together, then the failed state
|
|
318
|
+
takes priority.
|
|
319
|
+
|
|
320
|
+
It is possible to alter the default behavior by setting states which
|
|
321
|
+
cause the sensor to fail, e.g. by setting ``allowed_states=[DagRunState.FAILED]``
|
|
322
|
+
and ``failed_states=[DagRunState.SUCCESS]`` you will flip the behaviour to
|
|
323
|
+
get a sensor which goes green when the external task *fails* and immediately
|
|
324
|
+
goes red if the external task *succeeds*!
|
|
325
|
+
|
|
326
|
+
Note that ``soft_fail`` is respected when examining the failed_states. Thus
|
|
327
|
+
if the external task enters a failed state and ``soft_fail == True`` the
|
|
328
|
+
sensor will _skip_ rather than fail. As a result, setting ``soft_fail=True``
|
|
329
|
+
and ``failed_states=[DagRunState.SKIPPED]`` will result in the sensor
|
|
330
|
+
skipping if the external task skips. However, this is a contrived
|
|
331
|
+
example---consider using ``skipped_states`` if you would like this
|
|
332
|
+
behaviour. Using ``skipped_states`` allows the sensor to skip if the target
|
|
333
|
+
fails, but still enter failed state on timeout. Using ``soft_fail == True``
|
|
334
|
+
as above will cause the sensor to skip if the target fails, but also if it
|
|
335
|
+
times out.
|
|
336
|
+
|
|
337
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
|
338
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
|
339
|
+
:param environment_id: The name of the Composer environment.
|
|
340
|
+
:param composer_external_dag_id: The dag_id that contains the task you want to
|
|
341
|
+
wait for. (templated)
|
|
342
|
+
:param composer_external_task_id: The task_id that contains the task you want to
|
|
343
|
+
wait for. (templated)
|
|
344
|
+
:param composer_external_task_ids: The list of task_ids that you want to wait for. (templated)
|
|
345
|
+
If ``None`` (default value) the sensor waits for the DAG. Either
|
|
346
|
+
composer_external_task_id or composer_external_task_ids can be passed to
|
|
347
|
+
CloudComposerExternalTaskSensor, but not both.
|
|
348
|
+
:param composer_external_task_group_id: The task_group_id that contains the task you want to
|
|
349
|
+
wait for. (templated)
|
|
350
|
+
:param allowed_states: Iterable of allowed states, default is ``['success']``
|
|
351
|
+
:param skipped_states: Iterable of states to make this task mark as skipped, default is ``None``
|
|
352
|
+
:param failed_states: Iterable of failed or dis-allowed states, default is ``None``
|
|
353
|
+
:param execution_range: execution DAGs time range. Sensor checks DAGs states only for DAGs which were
|
|
354
|
+
started in this time range. For yesterday, use [positive!] datetime.timedelta(days=1).
|
|
355
|
+
For future, use [negative!] datetime.timedelta(days=-1). For specific time, use list of
|
|
356
|
+
datetimes [datetime(2024,3,22,11,0,0), datetime(2024,3,22,12,0,0)].
|
|
357
|
+
Or [datetime(2024,3,22,0,0,0)] in this case sensor will check for states from specific time in the
|
|
358
|
+
past till current time execution.
|
|
359
|
+
Default value datetime.timedelta(days=1).
|
|
360
|
+
:param gcp_conn_id: The connection ID to use when fetching connection info.
|
|
361
|
+
:param impersonation_chain: Optional service account to impersonate using short-term
|
|
362
|
+
credentials, or chained list of accounts required to get the access_token
|
|
363
|
+
of the last account in the list, which will be impersonated in the request.
|
|
364
|
+
If set as a string, the account must grant the originating account
|
|
365
|
+
the Service Account Token Creator IAM role.
|
|
366
|
+
If set as a sequence, the identities from the list must grant
|
|
367
|
+
Service Account Token Creator IAM role to the directly preceding identity, with first
|
|
368
|
+
account from the list granting this role to the originating account (templated).
|
|
369
|
+
:param poll_interval: Optional: Control the rate of the poll for the result of deferrable run.
|
|
370
|
+
:param deferrable: Run sensor in deferrable mode.
|
|
371
|
+
"""
|
|
372
|
+
|
|
373
|
+
template_fields = (
|
|
374
|
+
"project_id",
|
|
375
|
+
"region",
|
|
376
|
+
"environment_id",
|
|
377
|
+
"composer_external_dag_id",
|
|
378
|
+
"composer_external_task_id",
|
|
379
|
+
"composer_external_task_ids",
|
|
380
|
+
"composer_external_task_group_id",
|
|
381
|
+
"impersonation_chain",
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
def __init__(
|
|
385
|
+
self,
|
|
386
|
+
*,
|
|
387
|
+
project_id: str,
|
|
388
|
+
region: str,
|
|
389
|
+
environment_id: str,
|
|
390
|
+
composer_external_dag_id: str,
|
|
391
|
+
composer_external_task_id: str | None = None,
|
|
392
|
+
composer_external_task_ids: Collection[str] | None = None,
|
|
393
|
+
composer_external_task_group_id: str | None = None,
|
|
394
|
+
allowed_states: Iterable[str] | None = None,
|
|
395
|
+
skipped_states: Iterable[str] | None = None,
|
|
396
|
+
failed_states: Iterable[str] | None = None,
|
|
397
|
+
execution_range: timedelta | list[datetime] | None = None,
|
|
398
|
+
gcp_conn_id: str = "google_cloud_default",
|
|
399
|
+
impersonation_chain: str | Sequence[str] | None = None,
|
|
400
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
|
401
|
+
poll_interval: int = 10,
|
|
402
|
+
**kwargs,
|
|
403
|
+
) -> None:
|
|
404
|
+
super().__init__(**kwargs)
|
|
405
|
+
self.project_id = project_id
|
|
406
|
+
self.region = region
|
|
407
|
+
self.environment_id = environment_id
|
|
408
|
+
|
|
409
|
+
self.allowed_states = list(allowed_states) if allowed_states else [TaskInstanceState.SUCCESS.value]
|
|
410
|
+
self.skipped_states = list(skipped_states) if skipped_states else []
|
|
411
|
+
self.failed_states = list(failed_states) if failed_states else []
|
|
412
|
+
|
|
413
|
+
total_states = set(self.allowed_states + self.skipped_states + self.failed_states)
|
|
414
|
+
|
|
415
|
+
if len(total_states) != len(self.allowed_states) + len(self.skipped_states) + len(self.failed_states):
|
|
416
|
+
raise DuplicateStateError(
|
|
417
|
+
"Duplicate values provided across allowed_states, skipped_states and failed_states."
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
# convert [] to None
|
|
421
|
+
if not composer_external_task_ids:
|
|
422
|
+
composer_external_task_ids = None
|
|
423
|
+
|
|
424
|
+
# can't set both single task id and a list of task ids
|
|
425
|
+
if composer_external_task_id is not None and composer_external_task_ids is not None:
|
|
426
|
+
raise ValueError(
|
|
427
|
+
"Only one of `composer_external_task_id` or `composer_external_task_ids` may "
|
|
428
|
+
"be provided to CloudComposerExternalTaskSensor; "
|
|
429
|
+
"use `composer_external_task_id` or `composer_external_task_ids` or `composer_external_task_group_id`."
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
# since both not set, convert the single id to a 1-elt list - from here on, we only consider the list
|
|
433
|
+
if composer_external_task_id is not None:
|
|
434
|
+
composer_external_task_ids = [composer_external_task_id]
|
|
435
|
+
|
|
436
|
+
if composer_external_task_group_id is not None and composer_external_task_ids is not None:
|
|
437
|
+
raise ValueError(
|
|
438
|
+
"Only one of `composer_external_task_group_id` or `composer_external_task_ids` may "
|
|
439
|
+
"be provided to CloudComposerExternalTaskSensor; "
|
|
440
|
+
"use `composer_external_task_id` or `composer_external_task_ids` or `composer_external_task_group_id`."
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
# check the requested states are all valid states for the target type, be it dag or task
|
|
444
|
+
if composer_external_task_ids or composer_external_task_group_id:
|
|
445
|
+
if not total_states <= set(State.task_states):
|
|
446
|
+
raise ValueError(
|
|
447
|
+
"Valid values for `allowed_states`, `skipped_states` and `failed_states` "
|
|
448
|
+
"when `composer_external_task_id` or `composer_external_task_ids` or `composer_external_task_group_id` "
|
|
449
|
+
f"is not `None`: {State.task_states}"
|
|
450
|
+
)
|
|
451
|
+
elif not total_states <= set(State.dag_states):
|
|
452
|
+
raise ValueError(
|
|
453
|
+
"Valid values for `allowed_states`, `skipped_states` and `failed_states` "
|
|
454
|
+
f"when `composer_external_task_id` and `composer_external_task_group_id` is `None`: {State.dag_states}"
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
self.execution_range = execution_range
|
|
458
|
+
self.composer_external_dag_id = composer_external_dag_id
|
|
459
|
+
self.composer_external_task_id = composer_external_task_id
|
|
460
|
+
self.composer_external_task_ids = composer_external_task_ids
|
|
461
|
+
self.composer_external_task_group_id = composer_external_task_group_id
|
|
462
|
+
self.gcp_conn_id = gcp_conn_id
|
|
463
|
+
self.impersonation_chain = impersonation_chain
|
|
464
|
+
self.deferrable = deferrable
|
|
465
|
+
self.poll_interval = poll_interval
|
|
466
|
+
|
|
467
|
+
def _get_logical_dates(self, context) -> tuple[datetime, datetime]:
|
|
468
|
+
logical_date = context.get("logical_date", None)
|
|
469
|
+
if logical_date is None:
|
|
470
|
+
raise RuntimeError(
|
|
471
|
+
"logical_date is None. Please make sure the sensor is not used in an asset-triggered Dag. "
|
|
472
|
+
"CloudComposerDAGRunSensor was designed to be used in time-based scheduled Dags only, "
|
|
473
|
+
"and asset-triggered Dags do not have logical_date. "
|
|
474
|
+
)
|
|
475
|
+
if isinstance(self.execution_range, timedelta):
|
|
476
|
+
if self.execution_range < timedelta(0):
|
|
477
|
+
return logical_date, logical_date - self.execution_range
|
|
478
|
+
return logical_date - self.execution_range, logical_date
|
|
479
|
+
if isinstance(self.execution_range, list) and len(self.execution_range) > 0:
|
|
480
|
+
return self.execution_range[0], self.execution_range[1] if len(
|
|
481
|
+
self.execution_range
|
|
482
|
+
) > 1 else logical_date
|
|
483
|
+
return logical_date - timedelta(1), logical_date
|
|
484
|
+
|
|
485
|
+
def poke(self, context: Context) -> bool:
|
|
486
|
+
start_date, end_date = self._get_logical_dates(context)
|
|
487
|
+
|
|
488
|
+
task_instances = self._get_task_instances(
|
|
489
|
+
start_date=start_date.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
490
|
+
end_date=end_date.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
if len(task_instances) == 0:
|
|
494
|
+
self.log.info("Task Instances are empty. Sensor waits for task instances...")
|
|
495
|
+
return False
|
|
496
|
+
|
|
497
|
+
if self.failed_states:
|
|
498
|
+
external_task_status = self._check_task_instances_states(
|
|
499
|
+
task_instances=task_instances,
|
|
500
|
+
start_date=start_date,
|
|
501
|
+
end_date=end_date,
|
|
502
|
+
states=self.failed_states,
|
|
503
|
+
)
|
|
504
|
+
self._handle_failed_states(external_task_status)
|
|
505
|
+
|
|
506
|
+
if self.skipped_states:
|
|
507
|
+
external_task_status = self._check_task_instances_states(
|
|
508
|
+
task_instances=task_instances,
|
|
509
|
+
start_date=start_date,
|
|
510
|
+
end_date=end_date,
|
|
511
|
+
states=self.skipped_states,
|
|
512
|
+
)
|
|
513
|
+
self._handle_skipped_states(external_task_status)
|
|
514
|
+
|
|
515
|
+
self.log.info("Sensor waits for allowed states: %s", self.allowed_states)
|
|
516
|
+
external_task_status = self._check_task_instances_states(
|
|
517
|
+
task_instances=task_instances,
|
|
518
|
+
start_date=start_date,
|
|
519
|
+
end_date=end_date,
|
|
520
|
+
states=self.allowed_states,
|
|
521
|
+
)
|
|
522
|
+
return external_task_status
|
|
523
|
+
|
|
524
|
+
def _get_task_instances(self, start_date: str, end_date: str) -> list[dict]:
|
|
525
|
+
"""Get the list of task instances."""
|
|
526
|
+
try:
|
|
527
|
+
environment = self.hook.get_environment(
|
|
528
|
+
project_id=self.project_id,
|
|
529
|
+
region=self.region,
|
|
530
|
+
environment_id=self.environment_id,
|
|
531
|
+
timeout=self.timeout,
|
|
532
|
+
)
|
|
533
|
+
except NotFound as not_found_err:
|
|
534
|
+
self.log.info("The Composer environment %s does not exist.", self.environment_id)
|
|
535
|
+
raise AirflowException(not_found_err)
|
|
536
|
+
composer_airflow_uri = environment.config.airflow_uri
|
|
537
|
+
|
|
538
|
+
self.log.info(
|
|
539
|
+
"Pulling the DAG '%s' task instances from the '%s' environment...",
|
|
540
|
+
self.composer_external_dag_id,
|
|
541
|
+
self.environment_id,
|
|
542
|
+
)
|
|
543
|
+
task_instances_response = self.hook.get_task_instances(
|
|
544
|
+
composer_airflow_uri=composer_airflow_uri,
|
|
545
|
+
composer_dag_id=self.composer_external_dag_id,
|
|
546
|
+
query_parameters={
|
|
547
|
+
"execution_date_gte"
|
|
548
|
+
if self._composer_airflow_version < 3
|
|
549
|
+
else "logical_date_gte": start_date,
|
|
550
|
+
"execution_date_lte" if self._composer_airflow_version < 3 else "logical_date_lte": end_date,
|
|
551
|
+
},
|
|
552
|
+
timeout=self.timeout,
|
|
553
|
+
)
|
|
554
|
+
task_instances = task_instances_response["task_instances"]
|
|
555
|
+
|
|
556
|
+
if self.composer_external_task_ids:
|
|
557
|
+
task_instances = [
|
|
558
|
+
task_instance
|
|
559
|
+
for task_instance in task_instances
|
|
560
|
+
if task_instance["task_id"] in self.composer_external_task_ids
|
|
561
|
+
]
|
|
562
|
+
elif self.composer_external_task_group_id:
|
|
563
|
+
task_instances = [
|
|
564
|
+
task_instance
|
|
565
|
+
for task_instance in task_instances
|
|
566
|
+
if self.composer_external_task_group_id in task_instance["task_id"].split(".")
|
|
567
|
+
]
|
|
568
|
+
|
|
569
|
+
return task_instances
|
|
570
|
+
|
|
571
|
+
def _check_task_instances_states(
|
|
572
|
+
self,
|
|
573
|
+
task_instances: list[dict],
|
|
574
|
+
start_date: datetime,
|
|
575
|
+
end_date: datetime,
|
|
576
|
+
states: Iterable[str],
|
|
577
|
+
) -> bool:
|
|
578
|
+
for task_instance in task_instances:
|
|
579
|
+
if (
|
|
580
|
+
start_date.timestamp()
|
|
581
|
+
< parser.parse(
|
|
582
|
+
task_instance["execution_date" if self._composer_airflow_version < 3 else "logical_date"]
|
|
583
|
+
).timestamp()
|
|
584
|
+
< end_date.timestamp()
|
|
585
|
+
) and task_instance["state"] not in states:
|
|
586
|
+
return False
|
|
587
|
+
return True
|
|
588
|
+
|
|
589
|
+
def _get_composer_airflow_version(self) -> int:
|
|
590
|
+
"""Return Composer Airflow version."""
|
|
591
|
+
environment_obj = self.hook.get_environment(
|
|
592
|
+
project_id=self.project_id,
|
|
593
|
+
region=self.region,
|
|
594
|
+
environment_id=self.environment_id,
|
|
595
|
+
)
|
|
596
|
+
environment_config = Environment.to_dict(environment_obj)
|
|
597
|
+
image_version = environment_config["config"]["software_config"]["image_version"]
|
|
598
|
+
return int(image_version.split("airflow-")[1].split(".")[0])
|
|
599
|
+
|
|
600
|
+
def _handle_failed_states(self, failed_status: bool) -> None:
|
|
601
|
+
"""Handle failed states and raise appropriate exceptions."""
|
|
602
|
+
if failed_status:
|
|
603
|
+
if self.composer_external_task_ids:
|
|
604
|
+
if self.soft_fail:
|
|
605
|
+
raise AirflowSkipException(
|
|
606
|
+
f"Some of the external tasks '{self.composer_external_task_ids}' "
|
|
607
|
+
f"in DAG '{self.composer_external_dag_id}' failed. Skipping due to soft_fail."
|
|
608
|
+
)
|
|
609
|
+
raise ExternalTaskFailedError(
|
|
610
|
+
f"Some of the external tasks '{self.composer_external_task_ids}' "
|
|
611
|
+
f"in DAG '{self.composer_external_dag_id}' failed."
|
|
612
|
+
)
|
|
613
|
+
if self.composer_external_task_group_id:
|
|
614
|
+
if self.soft_fail:
|
|
615
|
+
raise AirflowSkipException(
|
|
616
|
+
f"The external task_group '{self.composer_external_task_group_id}' "
|
|
617
|
+
f"in DAG '{self.composer_external_dag_id}' failed. Skipping due to soft_fail."
|
|
618
|
+
)
|
|
619
|
+
raise ExternalTaskGroupFailedError(
|
|
620
|
+
f"The external task_group '{self.composer_external_task_group_id}' "
|
|
621
|
+
f"in DAG '{self.composer_external_dag_id}' failed."
|
|
622
|
+
)
|
|
623
|
+
if self.soft_fail:
|
|
624
|
+
raise AirflowSkipException(
|
|
625
|
+
f"The external DAG '{self.composer_external_dag_id}' failed. Skipping due to soft_fail."
|
|
626
|
+
)
|
|
627
|
+
raise ExternalDagFailedError(f"The external DAG '{self.composer_external_dag_id}' failed.")
|
|
628
|
+
|
|
629
|
+
def _handle_skipped_states(self, skipped_status: bool) -> None:
|
|
630
|
+
"""Handle skipped states and raise appropriate exceptions."""
|
|
631
|
+
if skipped_status:
|
|
632
|
+
if self.composer_external_task_ids:
|
|
633
|
+
raise AirflowSkipException(
|
|
634
|
+
f"Some of the external tasks '{self.composer_external_task_ids}' "
|
|
635
|
+
f"in DAG '{self.composer_external_dag_id}' reached a state in our states-to-skip-on list. Skipping."
|
|
636
|
+
)
|
|
637
|
+
if self.composer_external_task_group_id:
|
|
638
|
+
raise AirflowSkipException(
|
|
639
|
+
f"The external task_group '{self.composer_external_task_group_id}' "
|
|
640
|
+
f"in DAG '{self.composer_external_dag_id}' reached a state in our states-to-skip-on list. Skipping."
|
|
641
|
+
)
|
|
642
|
+
raise AirflowSkipException(
|
|
643
|
+
f"The external DAG '{self.composer_external_dag_id}' reached a state in our states-to-skip-on list. "
|
|
644
|
+
"Skipping."
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
def execute(self, context: Context) -> None:
|
|
648
|
+
self._composer_airflow_version = self._get_composer_airflow_version()
|
|
649
|
+
|
|
650
|
+
if self.composer_external_task_ids and len(self.composer_external_task_ids) > len(
|
|
651
|
+
set(self.composer_external_task_ids)
|
|
652
|
+
):
|
|
653
|
+
raise ValueError("Duplicate task_ids passed in composer_external_task_ids parameter")
|
|
654
|
+
|
|
655
|
+
if self.composer_external_task_ids:
|
|
656
|
+
self.log.info(
|
|
657
|
+
"Poking for tasks '%s' in dag '%s' on Composer environment '%s' ... ",
|
|
658
|
+
self.composer_external_task_ids,
|
|
659
|
+
self.composer_external_dag_id,
|
|
660
|
+
self.environment_id,
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
if self.composer_external_task_group_id:
|
|
664
|
+
self.log.info(
|
|
665
|
+
"Poking for task_group '%s' in dag '%s' on Composer environment '%s' ... ",
|
|
666
|
+
self.composer_external_task_group_id,
|
|
667
|
+
self.composer_external_dag_id,
|
|
668
|
+
self.environment_id,
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
if (
|
|
672
|
+
self.composer_external_dag_id
|
|
673
|
+
and not self.composer_external_task_group_id
|
|
674
|
+
and not self.composer_external_task_ids
|
|
675
|
+
):
|
|
676
|
+
self.log.info(
|
|
677
|
+
"Poking for DAG '%s' on Composer environment '%s' ... ",
|
|
678
|
+
self.composer_external_dag_id,
|
|
679
|
+
self.environment_id,
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
if self.deferrable:
|
|
683
|
+
start_date, end_date = self._get_logical_dates(context)
|
|
684
|
+
self.defer(
|
|
685
|
+
timeout=timedelta(seconds=self.timeout) if self.timeout else None,
|
|
686
|
+
trigger=CloudComposerExternalTaskTrigger(
|
|
687
|
+
project_id=self.project_id,
|
|
688
|
+
region=self.region,
|
|
689
|
+
environment_id=self.environment_id,
|
|
690
|
+
composer_external_dag_id=self.composer_external_dag_id,
|
|
691
|
+
composer_external_task_ids=self.composer_external_task_ids,
|
|
692
|
+
composer_external_task_group_id=self.composer_external_task_group_id,
|
|
693
|
+
start_date=start_date,
|
|
694
|
+
end_date=end_date,
|
|
695
|
+
allowed_states=self.allowed_states,
|
|
696
|
+
skipped_states=self.skipped_states,
|
|
697
|
+
failed_states=self.failed_states,
|
|
698
|
+
gcp_conn_id=self.gcp_conn_id,
|
|
699
|
+
impersonation_chain=self.impersonation_chain,
|
|
700
|
+
poll_interval=self.poll_interval,
|
|
701
|
+
composer_airflow_version=self._composer_airflow_version,
|
|
702
|
+
),
|
|
703
|
+
method_name=GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME,
|
|
704
|
+
)
|
|
705
|
+
super().execute(context)
|
|
706
|
+
|
|
707
|
+
def execute_complete(self, context: Context, event: dict):
|
|
708
|
+
if event and event["status"] == "error":
|
|
709
|
+
raise AirflowException(event["message"])
|
|
710
|
+
if event and event["status"] == "failed":
|
|
711
|
+
self._handle_failed_states(True)
|
|
712
|
+
elif event and event["status"] == "skipped":
|
|
713
|
+
self._handle_skipped_states(True)
|
|
714
|
+
|
|
715
|
+
self.log.info("External tasks for DAG '%s' has executed successfully.", self.composer_external_dag_id)
|
|
716
|
+
|
|
717
|
+
@cached_property
|
|
718
|
+
def hook(self) -> CloudComposerHook:
|
|
719
|
+
return CloudComposerHook(
|
|
720
|
+
gcp_conn_id=self.gcp_conn_id,
|
|
721
|
+
impersonation_chain=self.impersonation_chain,
|
|
722
|
+
)
|
|
@@ -23,7 +23,7 @@ from collections.abc import Sequence
|
|
|
23
23
|
from typing import TYPE_CHECKING, Any
|
|
24
24
|
|
|
25
25
|
from airflow.configuration import conf
|
|
26
|
-
from airflow.
|
|
26
|
+
from airflow.providers.common.compat.sdk import AirflowException, BaseSensorOperator
|
|
27
27
|
from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
|
|
28
28
|
COUNTERS,
|
|
29
29
|
METADATA,
|
|
@@ -35,10 +35,9 @@ from airflow.providers.google.cloud.triggers.cloud_storage_transfer_service impo
|
|
|
35
35
|
CloudStorageTransferServiceCheckJobStatusTrigger,
|
|
36
36
|
)
|
|
37
37
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
38
|
-
from airflow.sensors.base import BaseSensorOperator
|
|
39
38
|
|
|
40
39
|
if TYPE_CHECKING:
|
|
41
|
-
from airflow.
|
|
40
|
+
from airflow.providers.common.compat.sdk import Context
|
|
42
41
|
|
|
43
42
|
|
|
44
43
|
class CloudDataTransferServiceJobStatusSensor(BaseSensorOperator):
|