apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/cloud/hooks/automl.py +1 -1
- airflow/providers/google/cloud/hooks/bigquery.py +64 -33
- airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
- airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
- airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
- airflow/providers/google/cloud/hooks/dataflow.py +246 -32
- airflow/providers/google/cloud/hooks/dataplex.py +6 -2
- airflow/providers/google/cloud/hooks/dlp.py +14 -14
- airflow/providers/google/cloud/hooks/gcs.py +6 -2
- airflow/providers/google/cloud/hooks/gdm.py +2 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/mlengine.py +8 -4
- airflow/providers/google/cloud/hooks/pubsub.py +1 -1
- airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
- airflow/providers/google/cloud/links/vertex_ai.py +2 -1
- airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
- airflow/providers/google/cloud/operators/automl.py +13 -12
- airflow/providers/google/cloud/operators/bigquery.py +36 -22
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
- airflow/providers/google/cloud/operators/bigtable.py +7 -6
- airflow/providers/google/cloud/operators/cloud_build.py +12 -11
- airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
- airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
- airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
- airflow/providers/google/cloud/operators/compute.py +12 -11
- airflow/providers/google/cloud/operators/datacatalog.py +21 -20
- airflow/providers/google/cloud/operators/dataflow.py +59 -42
- airflow/providers/google/cloud/operators/datafusion.py +11 -10
- airflow/providers/google/cloud/operators/datapipeline.py +3 -2
- airflow/providers/google/cloud/operators/dataprep.py +5 -4
- airflow/providers/google/cloud/operators/dataproc.py +19 -16
- airflow/providers/google/cloud/operators/datastore.py +8 -7
- airflow/providers/google/cloud/operators/dlp.py +31 -30
- airflow/providers/google/cloud/operators/functions.py +4 -3
- airflow/providers/google/cloud/operators/gcs.py +66 -41
- airflow/providers/google/cloud/operators/kubernetes_engine.py +232 -12
- airflow/providers/google/cloud/operators/life_sciences.py +2 -1
- airflow/providers/google/cloud/operators/mlengine.py +11 -10
- airflow/providers/google/cloud/operators/pubsub.py +6 -5
- airflow/providers/google/cloud/operators/spanner.py +7 -6
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
- airflow/providers/google/cloud/operators/stackdriver.py +11 -10
- airflow/providers/google/cloud/operators/tasks.py +14 -13
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
- airflow/providers/google/cloud/operators/translate_speech.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
- airflow/providers/google/cloud/operators/vision.py +13 -12
- airflow/providers/google/cloud/operators/workflows.py +10 -9
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/sensors/bigtable.py +2 -1
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/sensors/dataflow.py +239 -52
- airflow/providers/google/cloud/sensors/datafusion.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +14 -12
- airflow/providers/google/cloud/sensors/tasks.py +2 -1
- airflow/providers/google/cloud/sensors/workflows.py +2 -1
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
- airflow/providers/google/cloud/triggers/bigquery.py +14 -3
- airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
- airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/triggers/dataflow.py +504 -4
- airflow/providers/google/cloud/triggers/dataproc.py +110 -26
- airflow/providers/google/cloud/triggers/mlengine.py +2 -1
- airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
- airflow/providers/google/common/hooks/base_google.py +45 -7
- airflow/providers/google/firebase/hooks/firestore.py +2 -2
- airflow/providers/google/firebase/operators/firestore.py +2 -1
- airflow/providers/google/get_provider_info.py +3 -2
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/METADATA +8 -8
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/RECORD +88 -89
- airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/entry_points.txt +0 -0
@@ -30,6 +30,7 @@ from airflow.models import BaseOperator
|
|
30
30
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
|
31
31
|
from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
|
32
32
|
from airflow.providers.google.cloud.triggers.bigquery import BigQueryInsertJobTrigger
|
33
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
33
34
|
from airflow.utils.helpers import merge_dicts
|
34
35
|
|
35
36
|
if TYPE_CHECKING:
|
@@ -104,7 +105,7 @@ class BigQueryToGCSOperator(BaseOperator):
|
|
104
105
|
*,
|
105
106
|
source_project_dataset_table: str,
|
106
107
|
destination_cloud_storage_uris: list[str],
|
107
|
-
project_id: str
|
108
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
108
109
|
compression: str = "NONE",
|
109
110
|
export_format: str = "CSV",
|
110
111
|
field_delimiter: str = ",",
|
@@ -91,7 +91,7 @@ class BigQueryToMsSqlOperator(BigQueryToSqlBaseOperator):
|
|
91
91
|
self.source_project_dataset_table = source_project_dataset_table
|
92
92
|
|
93
93
|
def get_sql_hook(self) -> MsSqlHook:
|
94
|
-
return MsSqlHook(schema=self.database,
|
94
|
+
return MsSqlHook(schema=self.database, mssql_conn_id=self.mssql_conn_id)
|
95
95
|
|
96
96
|
def persist_links(self, context: Context) -> None:
|
97
97
|
project_id, dataset_id, table_id = self.source_project_dataset_table.split(".")
|
@@ -42,6 +42,7 @@ from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQuery
|
|
42
42
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
43
43
|
from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
|
44
44
|
from airflow.providers.google.cloud.triggers.bigquery import BigQueryInsertJobTrigger
|
45
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
45
46
|
from airflow.utils.helpers import merge_dicts
|
46
47
|
|
47
48
|
if TYPE_CHECKING:
|
@@ -229,7 +230,7 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
229
230
|
job_id: str | None = None,
|
230
231
|
force_rerun: bool = True,
|
231
232
|
reattach_states: set[str] | None = None,
|
232
|
-
project_id: str
|
233
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
233
234
|
**kwargs,
|
234
235
|
) -> None:
|
235
236
|
super().__init__(**kwargs)
|
@@ -749,7 +750,6 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
749
750
|
)
|
750
751
|
from openlineage.client.run import Dataset
|
751
752
|
|
752
|
-
from airflow.providers.google.cloud.hooks.gcs import _parse_gcs_url
|
753
753
|
from airflow.providers.google.cloud.utils.openlineage import (
|
754
754
|
get_facets_from_bq_table,
|
755
755
|
get_identity_column_lineage_facet,
|
@@ -766,8 +766,7 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
766
766
|
"schema": output_dataset_facets["schema"],
|
767
767
|
}
|
768
768
|
input_datasets = []
|
769
|
-
for
|
770
|
-
bucket, blob = _parse_gcs_url(uri)
|
769
|
+
for blob in sorted(self.source_objects):
|
771
770
|
additional_facets = {}
|
772
771
|
|
773
772
|
if "*" in blob:
|
@@ -777,7 +776,7 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
777
776
|
"symlink": SymlinksDatasetFacet(
|
778
777
|
identifiers=[
|
779
778
|
SymlinksDatasetFacetIdentifiers(
|
780
|
-
namespace=f"gs://{bucket}", name=blob, type="file"
|
779
|
+
namespace=f"gs://{self.bucket}", name=blob, type="file"
|
781
780
|
)
|
782
781
|
]
|
783
782
|
),
|
@@ -788,7 +787,7 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
788
787
|
blob = "/"
|
789
788
|
|
790
789
|
dataset = Dataset(
|
791
|
-
namespace=f"gs://{bucket}",
|
790
|
+
namespace=f"gs://{self.bucket}",
|
792
791
|
name=blob,
|
793
792
|
facets=merge_dicts(input_dataset_facets, additional_facets),
|
794
793
|
)
|
@@ -234,8 +234,6 @@ class GCSToGCSOperator(BaseOperator):
|
|
234
234
|
self.source_object_required = source_object_required
|
235
235
|
self.exact_match = exact_match
|
236
236
|
self.match_glob = match_glob
|
237
|
-
self.resolved_source_objects: set[str] = set()
|
238
|
-
self.resolved_target_objects: set[str] = set()
|
239
237
|
|
240
238
|
def execute(self, context: Context):
|
241
239
|
hook = GCSHook(
|
@@ -540,13 +538,6 @@ class GCSToGCSOperator(BaseOperator):
|
|
540
538
|
self.destination_bucket,
|
541
539
|
destination_object,
|
542
540
|
)
|
543
|
-
|
544
|
-
self.resolved_source_objects.add(source_object)
|
545
|
-
if not destination_object:
|
546
|
-
self.resolved_target_objects.add(source_object)
|
547
|
-
else:
|
548
|
-
self.resolved_target_objects.add(destination_object)
|
549
|
-
|
550
541
|
hook.rewrite(self.source_bucket, source_object, self.destination_bucket, destination_object)
|
551
542
|
|
552
543
|
if self.move_object:
|
@@ -559,17 +550,36 @@ class GCSToGCSOperator(BaseOperator):
|
|
559
550
|
This means we won't have to normalize self.source_object and self.source_objects,
|
560
551
|
destination bucket and so on.
|
561
552
|
"""
|
553
|
+
from pathlib import Path
|
554
|
+
|
562
555
|
from openlineage.client.run import Dataset
|
563
556
|
|
564
557
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
565
558
|
|
559
|
+
def _process_prefix(pref):
|
560
|
+
if WILDCARD in pref:
|
561
|
+
pref = pref.split(WILDCARD)[0]
|
562
|
+
# Use parent if not a file (dot not in name) and not a dir (ends with slash)
|
563
|
+
if "." not in pref.split("/")[-1] and not pref.endswith("/"):
|
564
|
+
pref = Path(pref).parent.as_posix()
|
565
|
+
return ["/" if pref in ("", "/", ".") else pref.rstrip("/")] # Adjust root path
|
566
|
+
|
567
|
+
inputs = []
|
568
|
+
for prefix in self.source_objects:
|
569
|
+
result = _process_prefix(prefix)
|
570
|
+
inputs.extend(result)
|
571
|
+
|
572
|
+
if self.destination_object is None:
|
573
|
+
outputs = inputs.copy()
|
574
|
+
else:
|
575
|
+
outputs = _process_prefix(self.destination_object)
|
576
|
+
|
566
577
|
return OperatorLineage(
|
567
578
|
inputs=[
|
568
|
-
Dataset(namespace=f"gs://{self.source_bucket}", name=source)
|
569
|
-
for source in sorted(self.resolved_source_objects)
|
579
|
+
Dataset(namespace=f"gs://{self.source_bucket}", name=source) for source in sorted(set(inputs))
|
570
580
|
],
|
571
581
|
outputs=[
|
572
582
|
Dataset(namespace=f"gs://{self.destination_bucket}", name=target)
|
573
|
-
for target in sorted(
|
583
|
+
for target in sorted(set(outputs))
|
574
584
|
],
|
575
585
|
)
|
@@ -51,12 +51,13 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
51
51
|
self,
|
52
52
|
conn_id: str,
|
53
53
|
job_id: str | None,
|
54
|
-
project_id: str
|
54
|
+
project_id: str,
|
55
55
|
location: str | None,
|
56
56
|
dataset_id: str | None = None,
|
57
57
|
table_id: str | None = None,
|
58
58
|
poll_interval: float = 4.0,
|
59
59
|
impersonation_chain: str | Sequence[str] | None = None,
|
60
|
+
cancel_on_kill: bool = True,
|
60
61
|
):
|
61
62
|
super().__init__()
|
62
63
|
self.log.info("Using the connection %s .", conn_id)
|
@@ -69,6 +70,7 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
69
70
|
self.table_id = table_id
|
70
71
|
self.poll_interval = poll_interval
|
71
72
|
self.impersonation_chain = impersonation_chain
|
73
|
+
self.cancel_on_kill = cancel_on_kill
|
72
74
|
|
73
75
|
def serialize(self) -> tuple[str, dict[str, Any]]:
|
74
76
|
"""Serialize BigQueryInsertJobTrigger arguments and classpath."""
|
@@ -83,6 +85,7 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
83
85
|
"table_id": self.table_id,
|
84
86
|
"poll_interval": self.poll_interval,
|
85
87
|
"impersonation_chain": self.impersonation_chain,
|
88
|
+
"cancel_on_kill": self.cancel_on_kill,
|
86
89
|
},
|
87
90
|
)
|
88
91
|
|
@@ -113,6 +116,14 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
113
116
|
self.poll_interval,
|
114
117
|
)
|
115
118
|
await asyncio.sleep(self.poll_interval)
|
119
|
+
except asyncio.CancelledError:
|
120
|
+
self.log.info("Task was killed.")
|
121
|
+
if self.job_id and self.cancel_on_kill:
|
122
|
+
await hook.cancel_job( # type: ignore[union-attr]
|
123
|
+
job_id=self.job_id, project_id=self.project_id, location=self.location
|
124
|
+
)
|
125
|
+
else:
|
126
|
+
self.log.info("Skipping to cancel job: %s:%s.%s", self.project_id, self.location, self.job_id)
|
116
127
|
except Exception as e:
|
117
128
|
self.log.exception("Exception occurred while checking for query completion")
|
118
129
|
yield TriggerEvent({"status": "error", "message": str(e)})
|
@@ -282,7 +293,7 @@ class BigQueryIntervalCheckTrigger(BigQueryInsertJobTrigger):
|
|
282
293
|
conn_id: str,
|
283
294
|
first_job_id: str,
|
284
295
|
second_job_id: str,
|
285
|
-
project_id: str
|
296
|
+
project_id: str,
|
286
297
|
table: str,
|
287
298
|
metrics_thresholds: dict[str, int],
|
288
299
|
location: str | None = None,
|
@@ -443,7 +454,7 @@ class BigQueryValueCheckTrigger(BigQueryInsertJobTrigger):
|
|
443
454
|
sql: str,
|
444
455
|
pass_value: int | float | str,
|
445
456
|
job_id: str | None,
|
446
|
-
project_id: str
|
457
|
+
project_id: str,
|
447
458
|
tolerance: Any = None,
|
448
459
|
dataset_id: str | None = None,
|
449
460
|
table_id: str | None = None,
|
@@ -78,3 +78,71 @@ class CloudComposerExecutionTrigger(BaseTrigger):
|
|
78
78
|
"operation_done": operation.done,
|
79
79
|
}
|
80
80
|
)
|
81
|
+
|
82
|
+
|
83
|
+
class CloudComposerAirflowCLICommandTrigger(BaseTrigger):
|
84
|
+
"""The trigger wait for the Airflow CLI command result."""
|
85
|
+
|
86
|
+
def __init__(
|
87
|
+
self,
|
88
|
+
project_id: str,
|
89
|
+
region: str,
|
90
|
+
environment_id: str,
|
91
|
+
execution_cmd_info: dict,
|
92
|
+
gcp_conn_id: str = "google_cloud_default",
|
93
|
+
impersonation_chain: str | Sequence[str] | None = None,
|
94
|
+
poll_interval: int = 10,
|
95
|
+
):
|
96
|
+
super().__init__()
|
97
|
+
self.project_id = project_id
|
98
|
+
self.region = region
|
99
|
+
self.environment_id = environment_id
|
100
|
+
self.execution_cmd_info = execution_cmd_info
|
101
|
+
self.gcp_conn_id = gcp_conn_id
|
102
|
+
self.impersonation_chain = impersonation_chain
|
103
|
+
self.poll_interval = poll_interval
|
104
|
+
|
105
|
+
self.gcp_hook = CloudComposerAsyncHook(
|
106
|
+
gcp_conn_id=self.gcp_conn_id,
|
107
|
+
impersonation_chain=self.impersonation_chain,
|
108
|
+
)
|
109
|
+
|
110
|
+
def serialize(self) -> tuple[str, dict[str, Any]]:
|
111
|
+
return (
|
112
|
+
"airflow.providers.google.cloud.triggers.cloud_composer.CloudComposerAirflowCLICommandTrigger",
|
113
|
+
{
|
114
|
+
"project_id": self.project_id,
|
115
|
+
"region": self.region,
|
116
|
+
"environment_id": self.environment_id,
|
117
|
+
"execution_cmd_info": self.execution_cmd_info,
|
118
|
+
"gcp_conn_id": self.gcp_conn_id,
|
119
|
+
"impersonation_chain": self.impersonation_chain,
|
120
|
+
"poll_interval": self.poll_interval,
|
121
|
+
},
|
122
|
+
)
|
123
|
+
|
124
|
+
async def run(self):
|
125
|
+
try:
|
126
|
+
result = await self.gcp_hook.wait_command_execution_result(
|
127
|
+
project_id=self.project_id,
|
128
|
+
region=self.region,
|
129
|
+
environment_id=self.environment_id,
|
130
|
+
execution_cmd_info=self.execution_cmd_info,
|
131
|
+
poll_interval=self.poll_interval,
|
132
|
+
)
|
133
|
+
except AirflowException as ex:
|
134
|
+
yield TriggerEvent(
|
135
|
+
{
|
136
|
+
"status": "error",
|
137
|
+
"message": str(ex),
|
138
|
+
}
|
139
|
+
)
|
140
|
+
return
|
141
|
+
|
142
|
+
yield TriggerEvent(
|
143
|
+
{
|
144
|
+
"status": "success",
|
145
|
+
"result": result,
|
146
|
+
}
|
147
|
+
)
|
148
|
+
return
|
@@ -23,6 +23,7 @@ import asyncio
|
|
23
23
|
from typing import Sequence
|
24
24
|
|
25
25
|
from airflow.providers.google.cloud.hooks.cloud_sql import CloudSQLAsyncHook, CloudSqlOperationStatus
|
26
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
26
27
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
27
28
|
|
28
29
|
|
@@ -36,7 +37,7 @@ class CloudSQLExportTrigger(BaseTrigger):
|
|
36
37
|
def __init__(
|
37
38
|
self,
|
38
39
|
operation_name: str,
|
39
|
-
project_id: str
|
40
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
40
41
|
gcp_conn_id: str = "google_cloud_default",
|
41
42
|
impersonation_chain: str | Sequence[str] | None = None,
|
42
43
|
poke_interval: int = 20,
|
@@ -27,6 +27,7 @@ from airflow.exceptions import AirflowException
|
|
27
27
|
from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
|
28
28
|
CloudDataTransferServiceAsyncHook,
|
29
29
|
)
|
30
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
30
31
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
31
32
|
|
32
33
|
|
@@ -43,7 +44,7 @@ class CloudStorageTransferServiceCreateJobsTrigger(BaseTrigger):
|
|
43
44
|
def __init__(
|
44
45
|
self,
|
45
46
|
job_names: list[str],
|
46
|
-
project_id: str
|
47
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
47
48
|
poll_interval: int = 10,
|
48
49
|
gcp_conn_id: str = "google_cloud_default",
|
49
50
|
) -> None:
|