apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +5 -8
- airflow/providers/google/cloud/hooks/automl.py +35 -1
- airflow/providers/google/cloud/hooks/bigquery.py +126 -41
- airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
- airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
- airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
- airflow/providers/google/cloud/hooks/dataflow.py +246 -32
- airflow/providers/google/cloud/hooks/dataplex.py +6 -2
- airflow/providers/google/cloud/hooks/dlp.py +14 -14
- airflow/providers/google/cloud/hooks/gcs.py +6 -2
- airflow/providers/google/cloud/hooks/gdm.py +2 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/mlengine.py +8 -4
- airflow/providers/google/cloud/hooks/pubsub.py +1 -1
- airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +91 -0
- airflow/providers/google/cloud/links/vertex_ai.py +2 -1
- airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
- airflow/providers/google/cloud/operators/automl.py +243 -37
- airflow/providers/google/cloud/operators/bigquery.py +164 -62
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
- airflow/providers/google/cloud/operators/bigtable.py +7 -6
- airflow/providers/google/cloud/operators/cloud_build.py +12 -11
- airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
- airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
- airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
- airflow/providers/google/cloud/operators/compute.py +12 -11
- airflow/providers/google/cloud/operators/datacatalog.py +21 -20
- airflow/providers/google/cloud/operators/dataflow.py +59 -42
- airflow/providers/google/cloud/operators/datafusion.py +11 -10
- airflow/providers/google/cloud/operators/datapipeline.py +3 -2
- airflow/providers/google/cloud/operators/dataprep.py +5 -4
- airflow/providers/google/cloud/operators/dataproc.py +20 -17
- airflow/providers/google/cloud/operators/datastore.py +8 -7
- airflow/providers/google/cloud/operators/dlp.py +31 -30
- airflow/providers/google/cloud/operators/functions.py +4 -3
- airflow/providers/google/cloud/operators/gcs.py +66 -41
- airflow/providers/google/cloud/operators/kubernetes_engine.py +256 -49
- airflow/providers/google/cloud/operators/life_sciences.py +2 -1
- airflow/providers/google/cloud/operators/mlengine.py +11 -10
- airflow/providers/google/cloud/operators/pubsub.py +6 -5
- airflow/providers/google/cloud/operators/spanner.py +7 -6
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
- airflow/providers/google/cloud/operators/stackdriver.py +11 -10
- airflow/providers/google/cloud/operators/tasks.py +14 -13
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
- airflow/providers/google/cloud/operators/translate_speech.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
- airflow/providers/google/cloud/operators/vision.py +13 -12
- airflow/providers/google/cloud/operators/workflows.py +12 -14
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/sensors/bigtable.py +2 -1
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/sensors/dataflow.py +239 -52
- airflow/providers/google/cloud/sensors/datafusion.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +14 -12
- airflow/providers/google/cloud/sensors/tasks.py +2 -1
- airflow/providers/google/cloud/sensors/workflows.py +2 -1
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
- airflow/providers/google/cloud/triggers/bigquery.py +75 -6
- airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
- airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/triggers/dataflow.py +504 -4
- airflow/providers/google/cloud/triggers/dataproc.py +190 -27
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -3
- airflow/providers/google/cloud/triggers/mlengine.py +2 -1
- airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
- airflow/providers/google/common/hooks/base_google.py +45 -7
- airflow/providers/google/firebase/hooks/firestore.py +2 -2
- airflow/providers/google/firebase/operators/firestore.py +2 -1
- airflow/providers/google/get_provider_info.py +5 -3
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/METADATA +18 -18
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/RECORD +90 -90
- airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/entry_points.txt +0 -0
@@ -30,6 +30,7 @@ from airflow.models import BaseOperator
|
|
30
30
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
|
31
31
|
from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
|
32
32
|
from airflow.providers.google.cloud.triggers.bigquery import BigQueryInsertJobTrigger
|
33
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
33
34
|
from airflow.utils.helpers import merge_dicts
|
34
35
|
|
35
36
|
if TYPE_CHECKING:
|
@@ -104,7 +105,7 @@ class BigQueryToGCSOperator(BaseOperator):
|
|
104
105
|
*,
|
105
106
|
source_project_dataset_table: str,
|
106
107
|
destination_cloud_storage_uris: list[str],
|
107
|
-
project_id: str
|
108
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
108
109
|
compression: str = "NONE",
|
109
110
|
export_format: str = "CSV",
|
110
111
|
field_delimiter: str = ",",
|
@@ -91,7 +91,7 @@ class BigQueryToMsSqlOperator(BigQueryToSqlBaseOperator):
|
|
91
91
|
self.source_project_dataset_table = source_project_dataset_table
|
92
92
|
|
93
93
|
def get_sql_hook(self) -> MsSqlHook:
|
94
|
-
return MsSqlHook(schema=self.database,
|
94
|
+
return MsSqlHook(schema=self.database, mssql_conn_id=self.mssql_conn_id)
|
95
95
|
|
96
96
|
def persist_links(self, context: Context) -> None:
|
97
97
|
project_id, dataset_id, table_id = self.source_project_dataset_table.split(".")
|
@@ -42,6 +42,7 @@ from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQuery
|
|
42
42
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
43
43
|
from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
|
44
44
|
from airflow.providers.google.cloud.triggers.bigquery import BigQueryInsertJobTrigger
|
45
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
45
46
|
from airflow.utils.helpers import merge_dicts
|
46
47
|
|
47
48
|
if TYPE_CHECKING:
|
@@ -229,7 +230,7 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
229
230
|
job_id: str | None = None,
|
230
231
|
force_rerun: bool = True,
|
231
232
|
reattach_states: set[str] | None = None,
|
232
|
-
project_id: str
|
233
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
233
234
|
**kwargs,
|
234
235
|
) -> None:
|
235
236
|
super().__init__(**kwargs)
|
@@ -749,7 +750,6 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
749
750
|
)
|
750
751
|
from openlineage.client.run import Dataset
|
751
752
|
|
752
|
-
from airflow.providers.google.cloud.hooks.gcs import _parse_gcs_url
|
753
753
|
from airflow.providers.google.cloud.utils.openlineage import (
|
754
754
|
get_facets_from_bq_table,
|
755
755
|
get_identity_column_lineage_facet,
|
@@ -766,8 +766,7 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
766
766
|
"schema": output_dataset_facets["schema"],
|
767
767
|
}
|
768
768
|
input_datasets = []
|
769
|
-
for
|
770
|
-
bucket, blob = _parse_gcs_url(uri)
|
769
|
+
for blob in sorted(self.source_objects):
|
771
770
|
additional_facets = {}
|
772
771
|
|
773
772
|
if "*" in blob:
|
@@ -777,7 +776,7 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
777
776
|
"symlink": SymlinksDatasetFacet(
|
778
777
|
identifiers=[
|
779
778
|
SymlinksDatasetFacetIdentifiers(
|
780
|
-
namespace=f"gs://{bucket}", name=blob, type="file"
|
779
|
+
namespace=f"gs://{self.bucket}", name=blob, type="file"
|
781
780
|
)
|
782
781
|
]
|
783
782
|
),
|
@@ -788,7 +787,7 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
788
787
|
blob = "/"
|
789
788
|
|
790
789
|
dataset = Dataset(
|
791
|
-
namespace=f"gs://{bucket}",
|
790
|
+
namespace=f"gs://{self.bucket}",
|
792
791
|
name=blob,
|
793
792
|
facets=merge_dicts(input_dataset_facets, additional_facets),
|
794
793
|
)
|
@@ -234,8 +234,6 @@ class GCSToGCSOperator(BaseOperator):
|
|
234
234
|
self.source_object_required = source_object_required
|
235
235
|
self.exact_match = exact_match
|
236
236
|
self.match_glob = match_glob
|
237
|
-
self.resolved_source_objects: set[str] = set()
|
238
|
-
self.resolved_target_objects: set[str] = set()
|
239
237
|
|
240
238
|
def execute(self, context: Context):
|
241
239
|
hook = GCSHook(
|
@@ -540,13 +538,6 @@ class GCSToGCSOperator(BaseOperator):
|
|
540
538
|
self.destination_bucket,
|
541
539
|
destination_object,
|
542
540
|
)
|
543
|
-
|
544
|
-
self.resolved_source_objects.add(source_object)
|
545
|
-
if not destination_object:
|
546
|
-
self.resolved_target_objects.add(source_object)
|
547
|
-
else:
|
548
|
-
self.resolved_target_objects.add(destination_object)
|
549
|
-
|
550
541
|
hook.rewrite(self.source_bucket, source_object, self.destination_bucket, destination_object)
|
551
542
|
|
552
543
|
if self.move_object:
|
@@ -559,17 +550,36 @@ class GCSToGCSOperator(BaseOperator):
|
|
559
550
|
This means we won't have to normalize self.source_object and self.source_objects,
|
560
551
|
destination bucket and so on.
|
561
552
|
"""
|
553
|
+
from pathlib import Path
|
554
|
+
|
562
555
|
from openlineage.client.run import Dataset
|
563
556
|
|
564
557
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
565
558
|
|
559
|
+
def _process_prefix(pref):
|
560
|
+
if WILDCARD in pref:
|
561
|
+
pref = pref.split(WILDCARD)[0]
|
562
|
+
# Use parent if not a file (dot not in name) and not a dir (ends with slash)
|
563
|
+
if "." not in pref.split("/")[-1] and not pref.endswith("/"):
|
564
|
+
pref = Path(pref).parent.as_posix()
|
565
|
+
return ["/" if pref in ("", "/", ".") else pref.rstrip("/")] # Adjust root path
|
566
|
+
|
567
|
+
inputs = []
|
568
|
+
for prefix in self.source_objects:
|
569
|
+
result = _process_prefix(prefix)
|
570
|
+
inputs.extend(result)
|
571
|
+
|
572
|
+
if self.destination_object is None:
|
573
|
+
outputs = inputs.copy()
|
574
|
+
else:
|
575
|
+
outputs = _process_prefix(self.destination_object)
|
576
|
+
|
566
577
|
return OperatorLineage(
|
567
578
|
inputs=[
|
568
|
-
Dataset(namespace=f"gs://{self.source_bucket}", name=source)
|
569
|
-
for source in sorted(self.resolved_source_objects)
|
579
|
+
Dataset(namespace=f"gs://{self.source_bucket}", name=source) for source in sorted(set(inputs))
|
570
580
|
],
|
571
581
|
outputs=[
|
572
582
|
Dataset(namespace=f"gs://{self.destination_bucket}", name=target)
|
573
|
-
for target in sorted(
|
583
|
+
for target in sorted(set(outputs))
|
574
584
|
],
|
575
585
|
)
|
@@ -17,13 +17,20 @@
|
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
19
|
import asyncio
|
20
|
-
from typing import Any, AsyncIterator, Sequence, SupportsAbs
|
20
|
+
from typing import TYPE_CHECKING, Any, AsyncIterator, Sequence, SupportsAbs
|
21
21
|
|
22
22
|
from aiohttp import ClientSession
|
23
23
|
from aiohttp.client_exceptions import ClientResponseError
|
24
24
|
|
25
|
+
from airflow.exceptions import AirflowException
|
26
|
+
from airflow.models.taskinstance import TaskInstance
|
25
27
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryAsyncHook, BigQueryTableAsyncHook
|
26
28
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
29
|
+
from airflow.utils.session import provide_session
|
30
|
+
from airflow.utils.state import TaskInstanceState
|
31
|
+
|
32
|
+
if TYPE_CHECKING:
|
33
|
+
from sqlalchemy.orm.session import Session
|
27
34
|
|
28
35
|
|
29
36
|
class BigQueryInsertJobTrigger(BaseTrigger):
|
@@ -51,12 +58,13 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
51
58
|
self,
|
52
59
|
conn_id: str,
|
53
60
|
job_id: str | None,
|
54
|
-
project_id: str
|
61
|
+
project_id: str,
|
55
62
|
location: str | None,
|
56
63
|
dataset_id: str | None = None,
|
57
64
|
table_id: str | None = None,
|
58
65
|
poll_interval: float = 4.0,
|
59
66
|
impersonation_chain: str | Sequence[str] | None = None,
|
67
|
+
cancel_on_kill: bool = True,
|
60
68
|
):
|
61
69
|
super().__init__()
|
62
70
|
self.log.info("Using the connection %s .", conn_id)
|
@@ -69,6 +77,7 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
69
77
|
self.table_id = table_id
|
70
78
|
self.poll_interval = poll_interval
|
71
79
|
self.impersonation_chain = impersonation_chain
|
80
|
+
self.cancel_on_kill = cancel_on_kill
|
72
81
|
|
73
82
|
def serialize(self) -> tuple[str, dict[str, Any]]:
|
74
83
|
"""Serialize BigQueryInsertJobTrigger arguments and classpath."""
|
@@ -83,9 +92,40 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
83
92
|
"table_id": self.table_id,
|
84
93
|
"poll_interval": self.poll_interval,
|
85
94
|
"impersonation_chain": self.impersonation_chain,
|
95
|
+
"cancel_on_kill": self.cancel_on_kill,
|
86
96
|
},
|
87
97
|
)
|
88
98
|
|
99
|
+
@provide_session
|
100
|
+
def get_task_instance(self, session: Session) -> TaskInstance:
|
101
|
+
query = session.query(TaskInstance).filter(
|
102
|
+
TaskInstance.dag_id == self.task_instance.dag_id,
|
103
|
+
TaskInstance.task_id == self.task_instance.task_id,
|
104
|
+
TaskInstance.run_id == self.task_instance.run_id,
|
105
|
+
TaskInstance.map_index == self.task_instance.map_index,
|
106
|
+
)
|
107
|
+
task_instance = query.one_or_none()
|
108
|
+
if task_instance is None:
|
109
|
+
raise AirflowException(
|
110
|
+
"TaskInstance with dag_id: %s, task_id: %s, run_id: %s and map_index: %s is not found",
|
111
|
+
self.task_instance.dag_id,
|
112
|
+
self.task_instance.task_id,
|
113
|
+
self.task_instance.run_id,
|
114
|
+
self.task_instance.map_index,
|
115
|
+
)
|
116
|
+
return task_instance
|
117
|
+
|
118
|
+
def safe_to_cancel(self) -> bool:
|
119
|
+
"""
|
120
|
+
Whether it is safe to cancel the external job which is being executed by this trigger.
|
121
|
+
|
122
|
+
This is to avoid the case that `asyncio.CancelledError` is called because the trigger itself is stopped.
|
123
|
+
Because in those cases, we should NOT cancel the external job.
|
124
|
+
"""
|
125
|
+
# Database query is needed to get the latest state of the task instance.
|
126
|
+
task_instance = self.get_task_instance() # type: ignore[call-arg]
|
127
|
+
return task_instance.state != TaskInstanceState.DEFERRED
|
128
|
+
|
89
129
|
async def run(self) -> AsyncIterator[TriggerEvent]: # type: ignore[override]
|
90
130
|
"""Get current job execution status and yields a TriggerEvent."""
|
91
131
|
hook = self._get_async_hook()
|
@@ -113,6 +153,28 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
113
153
|
self.poll_interval,
|
114
154
|
)
|
115
155
|
await asyncio.sleep(self.poll_interval)
|
156
|
+
except asyncio.CancelledError:
|
157
|
+
if self.job_id and self.cancel_on_kill and self.safe_to_cancel():
|
158
|
+
self.log.info(
|
159
|
+
"The job is safe to cancel the as airflow TaskInstance is not in deferred state."
|
160
|
+
)
|
161
|
+
self.log.info(
|
162
|
+
"Cancelling job. Project ID: %s, Location: %s, Job ID: %s",
|
163
|
+
self.project_id,
|
164
|
+
self.location,
|
165
|
+
self.job_id,
|
166
|
+
)
|
167
|
+
await hook.cancel_job( # type: ignore[union-attr]
|
168
|
+
job_id=self.job_id, project_id=self.project_id, location=self.location
|
169
|
+
)
|
170
|
+
else:
|
171
|
+
self.log.info(
|
172
|
+
"Trigger may have shutdown. Skipping to cancel job because the airflow "
|
173
|
+
"task is not cancelled yet: Project ID: %s, Location:%s, Job ID:%s",
|
174
|
+
self.project_id,
|
175
|
+
self.location,
|
176
|
+
self.job_id,
|
177
|
+
)
|
116
178
|
except Exception as e:
|
117
179
|
self.log.exception("Exception occurred while checking for query completion")
|
118
180
|
yield TriggerEvent({"status": "error", "message": str(e)})
|
@@ -137,6 +199,7 @@ class BigQueryCheckTrigger(BigQueryInsertJobTrigger):
|
|
137
199
|
"table_id": self.table_id,
|
138
200
|
"poll_interval": self.poll_interval,
|
139
201
|
"impersonation_chain": self.impersonation_chain,
|
202
|
+
"cancel_on_kill": self.cancel_on_kill,
|
140
203
|
},
|
141
204
|
)
|
142
205
|
|
@@ -194,9 +257,10 @@ class BigQueryGetDataTrigger(BigQueryInsertJobTrigger):
|
|
194
257
|
(default: False).
|
195
258
|
"""
|
196
259
|
|
197
|
-
def __init__(self, as_dict: bool = False, **kwargs):
|
260
|
+
def __init__(self, as_dict: bool = False, selected_fields: str | None = None, **kwargs):
|
198
261
|
super().__init__(**kwargs)
|
199
262
|
self.as_dict = as_dict
|
263
|
+
self.selected_fields = selected_fields
|
200
264
|
|
201
265
|
def serialize(self) -> tuple[str, dict[str, Any]]:
|
202
266
|
"""Serialize BigQueryInsertJobTrigger arguments and classpath."""
|
@@ -212,6 +276,7 @@ class BigQueryGetDataTrigger(BigQueryInsertJobTrigger):
|
|
212
276
|
"poll_interval": self.poll_interval,
|
213
277
|
"impersonation_chain": self.impersonation_chain,
|
214
278
|
"as_dict": self.as_dict,
|
279
|
+
"selected_fields": self.selected_fields,
|
215
280
|
},
|
216
281
|
)
|
217
282
|
|
@@ -224,7 +289,11 @@ class BigQueryGetDataTrigger(BigQueryInsertJobTrigger):
|
|
224
289
|
job_status = await hook.get_job_status(job_id=self.job_id, project_id=self.project_id)
|
225
290
|
if job_status["status"] == "success":
|
226
291
|
query_results = await hook.get_job_output(job_id=self.job_id, project_id=self.project_id)
|
227
|
-
records = hook.get_records(
|
292
|
+
records = hook.get_records(
|
293
|
+
query_results=query_results,
|
294
|
+
as_dict=self.as_dict,
|
295
|
+
selected_fields=self.selected_fields,
|
296
|
+
)
|
228
297
|
self.log.debug("Response from hook: %s", job_status["status"])
|
229
298
|
yield TriggerEvent(
|
230
299
|
{
|
@@ -282,7 +351,7 @@ class BigQueryIntervalCheckTrigger(BigQueryInsertJobTrigger):
|
|
282
351
|
conn_id: str,
|
283
352
|
first_job_id: str,
|
284
353
|
second_job_id: str,
|
285
|
-
project_id: str
|
354
|
+
project_id: str,
|
286
355
|
table: str,
|
287
356
|
metrics_thresholds: dict[str, int],
|
288
357
|
location: str | None = None,
|
@@ -443,7 +512,7 @@ class BigQueryValueCheckTrigger(BigQueryInsertJobTrigger):
|
|
443
512
|
sql: str,
|
444
513
|
pass_value: int | float | str,
|
445
514
|
job_id: str | None,
|
446
|
-
project_id: str
|
515
|
+
project_id: str,
|
447
516
|
tolerance: Any = None,
|
448
517
|
dataset_id: str | None = None,
|
449
518
|
table_id: str | None = None,
|
@@ -78,3 +78,71 @@ class CloudComposerExecutionTrigger(BaseTrigger):
|
|
78
78
|
"operation_done": operation.done,
|
79
79
|
}
|
80
80
|
)
|
81
|
+
|
82
|
+
|
83
|
+
class CloudComposerAirflowCLICommandTrigger(BaseTrigger):
|
84
|
+
"""The trigger wait for the Airflow CLI command result."""
|
85
|
+
|
86
|
+
def __init__(
|
87
|
+
self,
|
88
|
+
project_id: str,
|
89
|
+
region: str,
|
90
|
+
environment_id: str,
|
91
|
+
execution_cmd_info: dict,
|
92
|
+
gcp_conn_id: str = "google_cloud_default",
|
93
|
+
impersonation_chain: str | Sequence[str] | None = None,
|
94
|
+
poll_interval: int = 10,
|
95
|
+
):
|
96
|
+
super().__init__()
|
97
|
+
self.project_id = project_id
|
98
|
+
self.region = region
|
99
|
+
self.environment_id = environment_id
|
100
|
+
self.execution_cmd_info = execution_cmd_info
|
101
|
+
self.gcp_conn_id = gcp_conn_id
|
102
|
+
self.impersonation_chain = impersonation_chain
|
103
|
+
self.poll_interval = poll_interval
|
104
|
+
|
105
|
+
self.gcp_hook = CloudComposerAsyncHook(
|
106
|
+
gcp_conn_id=self.gcp_conn_id,
|
107
|
+
impersonation_chain=self.impersonation_chain,
|
108
|
+
)
|
109
|
+
|
110
|
+
def serialize(self) -> tuple[str, dict[str, Any]]:
|
111
|
+
return (
|
112
|
+
"airflow.providers.google.cloud.triggers.cloud_composer.CloudComposerAirflowCLICommandTrigger",
|
113
|
+
{
|
114
|
+
"project_id": self.project_id,
|
115
|
+
"region": self.region,
|
116
|
+
"environment_id": self.environment_id,
|
117
|
+
"execution_cmd_info": self.execution_cmd_info,
|
118
|
+
"gcp_conn_id": self.gcp_conn_id,
|
119
|
+
"impersonation_chain": self.impersonation_chain,
|
120
|
+
"poll_interval": self.poll_interval,
|
121
|
+
},
|
122
|
+
)
|
123
|
+
|
124
|
+
async def run(self):
|
125
|
+
try:
|
126
|
+
result = await self.gcp_hook.wait_command_execution_result(
|
127
|
+
project_id=self.project_id,
|
128
|
+
region=self.region,
|
129
|
+
environment_id=self.environment_id,
|
130
|
+
execution_cmd_info=self.execution_cmd_info,
|
131
|
+
poll_interval=self.poll_interval,
|
132
|
+
)
|
133
|
+
except AirflowException as ex:
|
134
|
+
yield TriggerEvent(
|
135
|
+
{
|
136
|
+
"status": "error",
|
137
|
+
"message": str(ex),
|
138
|
+
}
|
139
|
+
)
|
140
|
+
return
|
141
|
+
|
142
|
+
yield TriggerEvent(
|
143
|
+
{
|
144
|
+
"status": "success",
|
145
|
+
"result": result,
|
146
|
+
}
|
147
|
+
)
|
148
|
+
return
|
@@ -23,6 +23,7 @@ import asyncio
|
|
23
23
|
from typing import Sequence
|
24
24
|
|
25
25
|
from airflow.providers.google.cloud.hooks.cloud_sql import CloudSQLAsyncHook, CloudSqlOperationStatus
|
26
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
26
27
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
27
28
|
|
28
29
|
|
@@ -36,7 +37,7 @@ class CloudSQLExportTrigger(BaseTrigger):
|
|
36
37
|
def __init__(
|
37
38
|
self,
|
38
39
|
operation_name: str,
|
39
|
-
project_id: str
|
40
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
40
41
|
gcp_conn_id: str = "google_cloud_default",
|
41
42
|
impersonation_chain: str | Sequence[str] | None = None,
|
42
43
|
poke_interval: int = 20,
|
@@ -27,6 +27,7 @@ from airflow.exceptions import AirflowException
|
|
27
27
|
from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
|
28
28
|
CloudDataTransferServiceAsyncHook,
|
29
29
|
)
|
30
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
30
31
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
31
32
|
|
32
33
|
|
@@ -43,7 +44,7 @@ class CloudStorageTransferServiceCreateJobsTrigger(BaseTrigger):
|
|
43
44
|
def __init__(
|
44
45
|
self,
|
45
46
|
job_names: list[str],
|
46
|
-
project_id: str
|
47
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
47
48
|
poll_interval: int = 10,
|
48
49
|
gcp_conn_id: str = "google_cloud_default",
|
49
50
|
) -> None:
|