apache-airflow-providers-google 12.0.0rc2__py3-none-any.whl → 13.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/LICENSE +0 -52
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +27 -13
- airflow/providers/google/ads/transfers/ads_to_gcs.py +18 -4
- airflow/providers/google/assets/bigquery.py +17 -0
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +2 -3
- airflow/providers/google/cloud/hooks/alloy_db.py +736 -8
- airflow/providers/google/cloud/hooks/automl.py +10 -4
- airflow/providers/google/cloud/hooks/bigquery.py +125 -22
- airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
- airflow/providers/google/cloud/hooks/bigtable.py +2 -3
- airflow/providers/google/cloud/hooks/cloud_batch.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_build.py +4 -5
- airflow/providers/google/cloud/hooks/cloud_composer.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_run.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_sql.py +7 -3
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +119 -7
- airflow/providers/google/cloud/hooks/compute.py +3 -3
- airflow/providers/google/cloud/hooks/datacatalog.py +3 -4
- airflow/providers/google/cloud/hooks/dataflow.py +12 -12
- airflow/providers/google/cloud/hooks/dataform.py +2 -3
- airflow/providers/google/cloud/hooks/datafusion.py +2 -2
- airflow/providers/google/cloud/hooks/dataplex.py +1032 -11
- airflow/providers/google/cloud/hooks/dataproc.py +4 -5
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +3 -4
- airflow/providers/google/cloud/hooks/dlp.py +3 -4
- airflow/providers/google/cloud/hooks/gcs.py +7 -6
- airflow/providers/google/cloud/hooks/kms.py +2 -3
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +8 -8
- airflow/providers/google/cloud/hooks/life_sciences.py +1 -1
- airflow/providers/google/cloud/hooks/managed_kafka.py +482 -0
- airflow/providers/google/cloud/hooks/natural_language.py +2 -3
- airflow/providers/google/cloud/hooks/os_login.py +2 -3
- airflow/providers/google/cloud/hooks/pubsub.py +6 -6
- airflow/providers/google/cloud/hooks/secret_manager.py +2 -3
- airflow/providers/google/cloud/hooks/spanner.py +2 -2
- airflow/providers/google/cloud/hooks/speech_to_text.py +2 -3
- airflow/providers/google/cloud/hooks/stackdriver.py +4 -4
- airflow/providers/google/cloud/hooks/tasks.py +3 -4
- airflow/providers/google/cloud/hooks/text_to_speech.py +2 -3
- airflow/providers/google/cloud/hooks/translate.py +236 -5
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +9 -4
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +4 -5
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -3
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -181
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -3
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -3
- airflow/providers/google/cloud/hooks/video_intelligence.py +2 -3
- airflow/providers/google/cloud/hooks/vision.py +3 -4
- airflow/providers/google/cloud/hooks/workflows.py +2 -3
- airflow/providers/google/cloud/links/alloy_db.py +46 -0
- airflow/providers/google/cloud/links/bigquery.py +25 -0
- airflow/providers/google/cloud/links/dataplex.py +172 -2
- airflow/providers/google/cloud/links/kubernetes_engine.py +1 -2
- airflow/providers/google/cloud/links/managed_kafka.py +104 -0
- airflow/providers/google/cloud/links/translate.py +28 -0
- airflow/providers/google/cloud/log/gcs_task_handler.py +3 -3
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -10
- airflow/providers/google/cloud/openlineage/facets.py +67 -0
- airflow/providers/google/cloud/openlineage/mixins.py +438 -173
- airflow/providers/google/cloud/openlineage/utils.py +394 -61
- airflow/providers/google/cloud/operators/alloy_db.py +980 -69
- airflow/providers/google/cloud/operators/automl.py +83 -245
- airflow/providers/google/cloud/operators/bigquery.py +377 -74
- airflow/providers/google/cloud/operators/bigquery_dts.py +126 -13
- airflow/providers/google/cloud/operators/bigtable.py +1 -3
- airflow/providers/google/cloud/operators/cloud_base.py +1 -2
- airflow/providers/google/cloud/operators/cloud_batch.py +2 -4
- airflow/providers/google/cloud/operators/cloud_build.py +3 -5
- airflow/providers/google/cloud/operators/cloud_composer.py +5 -7
- airflow/providers/google/cloud/operators/cloud_memorystore.py +4 -6
- airflow/providers/google/cloud/operators/cloud_run.py +6 -5
- airflow/providers/google/cloud/operators/cloud_sql.py +20 -8
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +62 -8
- airflow/providers/google/cloud/operators/compute.py +3 -4
- airflow/providers/google/cloud/operators/datacatalog.py +9 -11
- airflow/providers/google/cloud/operators/dataflow.py +1 -112
- airflow/providers/google/cloud/operators/dataform.py +3 -5
- airflow/providers/google/cloud/operators/datafusion.py +1 -1
- airflow/providers/google/cloud/operators/dataplex.py +2046 -7
- airflow/providers/google/cloud/operators/dataproc.py +102 -17
- airflow/providers/google/cloud/operators/dataproc_metastore.py +7 -9
- airflow/providers/google/cloud/operators/dlp.py +17 -19
- airflow/providers/google/cloud/operators/gcs.py +14 -17
- airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/operators/managed_kafka.py +788 -0
- airflow/providers/google/cloud/operators/natural_language.py +3 -5
- airflow/providers/google/cloud/operators/pubsub.py +39 -7
- airflow/providers/google/cloud/operators/speech_to_text.py +3 -5
- airflow/providers/google/cloud/operators/stackdriver.py +3 -5
- airflow/providers/google/cloud/operators/tasks.py +4 -6
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -4
- airflow/providers/google/cloud/operators/translate.py +414 -5
- airflow/providers/google/cloud/operators/translate_speech.py +2 -4
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +9 -8
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +6 -8
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +0 -322
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +4 -6
- airflow/providers/google/cloud/operators/video_intelligence.py +3 -5
- airflow/providers/google/cloud/operators/vision.py +4 -6
- airflow/providers/google/cloud/operators/workflows.py +5 -7
- airflow/providers/google/cloud/secrets/secret_manager.py +1 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +3 -5
- airflow/providers/google/cloud/sensors/bigtable.py +2 -3
- airflow/providers/google/cloud/sensors/cloud_composer.py +32 -8
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +39 -1
- airflow/providers/google/cloud/sensors/dataplex.py +4 -6
- airflow/providers/google/cloud/sensors/dataproc.py +2 -3
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -2
- airflow/providers/google/cloud/sensors/gcs.py +2 -4
- airflow/providers/google/cloud/sensors/pubsub.py +2 -3
- airflow/providers/google/cloud/sensors/workflows.py +3 -5
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +5 -5
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +10 -12
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +36 -4
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +27 -2
- airflow/providers/google/cloud/transfers/mysql_to_gcs.py +27 -2
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +27 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +34 -5
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +15 -0
- airflow/providers/google/cloud/transfers/trino_to_gcs.py +25 -2
- airflow/providers/google/cloud/triggers/bigquery_dts.py +1 -2
- airflow/providers/google/cloud/triggers/cloud_batch.py +1 -2
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -2
- airflow/providers/google/cloud/triggers/cloud_composer.py +13 -3
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +102 -4
- airflow/providers/google/cloud/triggers/dataflow.py +2 -3
- airflow/providers/google/cloud/triggers/dataplex.py +1 -2
- airflow/providers/google/cloud/triggers/dataproc.py +2 -3
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +1 -2
- airflow/providers/google/cloud/triggers/vertex_ai.py +7 -8
- airflow/providers/google/cloud/utils/credentials_provider.py +15 -8
- airflow/providers/google/cloud/utils/external_token_supplier.py +1 -0
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/consts.py +1 -2
- airflow/providers/google/common/hooks/base_google.py +8 -7
- airflow/providers/google/get_provider_info.py +186 -134
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -3
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +5 -7
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/METADATA +41 -58
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/RECORD +157 -159
- airflow/providers/google/cloud/example_dags/example_facebook_ads_to_gcs.py +0 -141
- airflow/providers/google/cloud/example_dags/example_looker.py +0 -64
- airflow/providers/google/cloud/example_dags/example_presto_to_gcs.py +0 -194
- airflow/providers/google/cloud/example_dags/example_salesforce_to_gcs.py +0 -129
- airflow/providers/google/marketing_platform/example_dags/__init__.py +0 -16
- airflow/providers/google/marketing_platform/example_dags/example_display_video.py +0 -213
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/entry_points.txt +0 -0
@@ -24,6 +24,8 @@ import json
|
|
24
24
|
import time
|
25
25
|
import uuid
|
26
26
|
from decimal import Decimal
|
27
|
+
from functools import cached_property
|
28
|
+
from typing import TYPE_CHECKING
|
27
29
|
|
28
30
|
import pendulum
|
29
31
|
from slugify import slugify
|
@@ -31,6 +33,9 @@ from slugify import slugify
|
|
31
33
|
from airflow.providers.google.cloud.transfers.sql_to_gcs import BaseSQLToGCSOperator
|
32
34
|
from airflow.providers.postgres.hooks.postgres import PostgresHook
|
33
35
|
|
36
|
+
if TYPE_CHECKING:
|
37
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
38
|
+
|
34
39
|
|
35
40
|
class _PostgresServerSideCursorDecorator:
|
36
41
|
"""
|
@@ -132,10 +137,13 @@ class PostgresToGCSOperator(BaseSQLToGCSOperator):
|
|
132
137
|
)
|
133
138
|
return None
|
134
139
|
|
140
|
+
@cached_property
|
141
|
+
def db_hook(self) -> PostgresHook:
|
142
|
+
return PostgresHook(postgres_conn_id=self.postgres_conn_id)
|
143
|
+
|
135
144
|
def query(self):
|
136
145
|
"""Query Postgres and returns a cursor to the results."""
|
137
|
-
|
138
|
-
conn = hook.get_conn()
|
146
|
+
conn = self.db_hook.get_conn()
|
139
147
|
cursor = conn.cursor(name=self._unique_name())
|
140
148
|
cursor.execute(self.sql, self.parameters)
|
141
149
|
if self.use_server_side_cursor:
|
@@ -180,3 +188,20 @@ class PostgresToGCSOperator(BaseSQLToGCSOperator):
|
|
180
188
|
if isinstance(value, Decimal):
|
181
189
|
return float(value)
|
182
190
|
return value
|
191
|
+
|
192
|
+
def get_openlineage_facets_on_start(self) -> OperatorLineage | None:
|
193
|
+
from airflow.providers.common.compat.openlineage.facet import SQLJobFacet
|
194
|
+
from airflow.providers.common.compat.openlineage.utils.sql import get_openlineage_facets_with_sql
|
195
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
196
|
+
|
197
|
+
sql_parsing_result = get_openlineage_facets_with_sql(
|
198
|
+
hook=self.db_hook,
|
199
|
+
sql=self.sql,
|
200
|
+
conn_id=self.postgres_conn_id,
|
201
|
+
database=self.db_hook.database,
|
202
|
+
)
|
203
|
+
gcs_output_datasets = self._get_openlineage_output_datasets()
|
204
|
+
if sql_parsing_result:
|
205
|
+
sql_parsing_result.outputs = gcs_output_datasets
|
206
|
+
return sql_parsing_result
|
207
|
+
return OperatorLineage(outputs=gcs_output_datasets, job_facets={"sql": SQLJobFacet(self.sql)})
|
@@ -21,6 +21,7 @@ from __future__ import annotations
|
|
21
21
|
|
22
22
|
import os
|
23
23
|
from collections.abc import Sequence
|
24
|
+
from functools import cached_property
|
24
25
|
from tempfile import NamedTemporaryFile
|
25
26
|
from typing import TYPE_CHECKING
|
26
27
|
|
@@ -109,6 +110,10 @@ class SFTPToGCSOperator(BaseOperator):
|
|
109
110
|
self.impersonation_chain = impersonation_chain
|
110
111
|
self.sftp_prefetch = sftp_prefetch
|
111
112
|
|
113
|
+
@cached_property
|
114
|
+
def sftp_hook(self):
|
115
|
+
return SFTPHook(self.sftp_conn_id)
|
116
|
+
|
112
117
|
def execute(self, context: Context):
|
113
118
|
self.destination_path = self._set_destination_path(self.destination_path)
|
114
119
|
self.destination_bucket = self._set_bucket_name(self.destination_bucket)
|
@@ -117,8 +122,6 @@ class SFTPToGCSOperator(BaseOperator):
|
|
117
122
|
impersonation_chain=self.impersonation_chain,
|
118
123
|
)
|
119
124
|
|
120
|
-
sftp_hook = SFTPHook(self.sftp_conn_id)
|
121
|
-
|
122
125
|
if WILDCARD in self.source_path:
|
123
126
|
total_wildcards = self.source_path.count(WILDCARD)
|
124
127
|
if total_wildcards > 1:
|
@@ -130,7 +133,7 @@ class SFTPToGCSOperator(BaseOperator):
|
|
130
133
|
prefix, delimiter = self.source_path.split(WILDCARD, 1)
|
131
134
|
base_path = os.path.dirname(prefix)
|
132
135
|
|
133
|
-
files, _, _ = sftp_hook.get_tree_map(base_path, prefix=prefix, delimiter=delimiter)
|
136
|
+
files, _, _ = self.sftp_hook.get_tree_map(base_path, prefix=prefix, delimiter=delimiter)
|
134
137
|
|
135
138
|
for file in files:
|
136
139
|
destination_path = file.replace(base_path, self.destination_path, 1)
|
@@ -140,13 +143,13 @@ class SFTPToGCSOperator(BaseOperator):
|
|
140
143
|
# retain the "/" prefix, if it has.
|
141
144
|
if not self.destination_path:
|
142
145
|
destination_path = destination_path.lstrip("/")
|
143
|
-
self._copy_single_object(gcs_hook, sftp_hook, file, destination_path)
|
146
|
+
self._copy_single_object(gcs_hook, self.sftp_hook, file, destination_path)
|
144
147
|
|
145
148
|
else:
|
146
149
|
destination_object = (
|
147
150
|
self.destination_path if self.destination_path else self.source_path.rsplit("/", 1)[1]
|
148
151
|
)
|
149
|
-
self._copy_single_object(gcs_hook, sftp_hook, self.source_path, destination_object)
|
152
|
+
self._copy_single_object(gcs_hook, self.sftp_hook, self.source_path, destination_object)
|
150
153
|
|
151
154
|
def _copy_single_object(
|
152
155
|
self,
|
@@ -188,3 +191,29 @@ class SFTPToGCSOperator(BaseOperator):
|
|
188
191
|
def _set_bucket_name(name: str) -> str:
|
189
192
|
bucket = name if not name.startswith("gs://") else name[5:]
|
190
193
|
return bucket.strip("/")
|
194
|
+
|
195
|
+
def get_openlineage_facets_on_start(self):
|
196
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
197
|
+
from airflow.providers.google.cloud.openlineage.utils import extract_ds_name_from_gcs_path
|
198
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
199
|
+
|
200
|
+
source_name = extract_ds_name_from_gcs_path(self.source_path.split(WILDCARD, 1)[0])
|
201
|
+
if self.source_path.startswith("/") and source_name != "/":
|
202
|
+
source_name = "/" + source_name
|
203
|
+
|
204
|
+
if WILDCARD not in self.source_path and not self.destination_path:
|
205
|
+
dest_name = self.source_path.rsplit("/", 1)[1]
|
206
|
+
else:
|
207
|
+
dest_name = extract_ds_name_from_gcs_path(f"{self.destination_path}")
|
208
|
+
|
209
|
+
return OperatorLineage(
|
210
|
+
inputs=[
|
211
|
+
Dataset(
|
212
|
+
namespace=f"file://{self.sftp_hook.remote_host}:{self.sftp_hook.port}",
|
213
|
+
name=source_name,
|
214
|
+
)
|
215
|
+
],
|
216
|
+
outputs=[
|
217
|
+
Dataset(namespace="gs://" + self._set_bucket_name(self.destination_bucket), name=dest_name)
|
218
|
+
],
|
219
|
+
)
|
@@ -34,6 +34,7 @@ from airflow.models import BaseOperator
|
|
34
34
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
35
35
|
|
36
36
|
if TYPE_CHECKING:
|
37
|
+
from airflow.providers.common.compat.openlineage.facet import OutputDataset
|
37
38
|
from airflow.utils.context import Context
|
38
39
|
|
39
40
|
|
@@ -151,6 +152,7 @@ class BaseSQLToGCSOperator(BaseOperator):
|
|
151
152
|
self.partition_columns = partition_columns
|
152
153
|
self.write_on_empty = write_on_empty
|
153
154
|
self.parquet_row_group_size = parquet_row_group_size
|
155
|
+
self._uploaded_file_names: list[str] = []
|
154
156
|
|
155
157
|
def execute(self, context: Context):
|
156
158
|
if self.partition_columns:
|
@@ -501,3 +503,16 @@ class BaseSQLToGCSOperator(BaseOperator):
|
|
501
503
|
gzip=self.gzip if is_data_file else False,
|
502
504
|
metadata=metadata,
|
503
505
|
)
|
506
|
+
self._uploaded_file_names.append(object_name)
|
507
|
+
|
508
|
+
def _get_openlineage_output_datasets(self) -> list[OutputDataset]:
|
509
|
+
"""Retrieve OpenLineage output datasets."""
|
510
|
+
from airflow.providers.common.compat.openlineage.facet import OutputDataset
|
511
|
+
from airflow.providers.google.cloud.openlineage.utils import extract_ds_name_from_gcs_path
|
512
|
+
|
513
|
+
return [
|
514
|
+
OutputDataset(
|
515
|
+
namespace=f"gs://{self.bucket}",
|
516
|
+
name=extract_ds_name_from_gcs_path(self.filename.split("{}", maxsplit=1)[0]),
|
517
|
+
)
|
518
|
+
]
|
@@ -17,6 +17,7 @@
|
|
17
17
|
# under the License.
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
|
+
from functools import cached_property
|
20
21
|
from typing import TYPE_CHECKING, Any
|
21
22
|
|
22
23
|
from airflow.providers.google.cloud.transfers.sql_to_gcs import BaseSQLToGCSOperator
|
@@ -26,6 +27,8 @@ if TYPE_CHECKING:
|
|
26
27
|
from trino.client import TrinoResult
|
27
28
|
from trino.dbapi import Cursor as TrinoCursor
|
28
29
|
|
30
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
31
|
+
|
29
32
|
|
30
33
|
class _TrinoToGCSTrinoCursorAdapter:
|
31
34
|
"""
|
@@ -181,10 +184,13 @@ class TrinoToGCSOperator(BaseSQLToGCSOperator):
|
|
181
184
|
super().__init__(**kwargs)
|
182
185
|
self.trino_conn_id = trino_conn_id
|
183
186
|
|
187
|
+
@cached_property
|
188
|
+
def db_hook(self) -> TrinoHook:
|
189
|
+
return TrinoHook(trino_conn_id=self.trino_conn_id)
|
190
|
+
|
184
191
|
def query(self):
|
185
192
|
"""Query trino and returns a cursor to the results."""
|
186
|
-
|
187
|
-
conn = trino.get_conn()
|
193
|
+
conn = self.db_hook.get_conn()
|
188
194
|
cursor = conn.cursor()
|
189
195
|
self.log.info("Executing: %s", self.sql)
|
190
196
|
cursor.execute(self.sql)
|
@@ -207,3 +213,20 @@ class TrinoToGCSOperator(BaseSQLToGCSOperator):
|
|
207
213
|
:param schema_type: BigQuery data type
|
208
214
|
"""
|
209
215
|
return value
|
216
|
+
|
217
|
+
def get_openlineage_facets_on_start(self) -> OperatorLineage | None:
|
218
|
+
from airflow.providers.common.compat.openlineage.facet import SQLJobFacet
|
219
|
+
from airflow.providers.common.compat.openlineage.utils.sql import get_openlineage_facets_with_sql
|
220
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
221
|
+
|
222
|
+
sql_parsing_result = get_openlineage_facets_with_sql(
|
223
|
+
hook=self.db_hook,
|
224
|
+
sql=self.sql,
|
225
|
+
conn_id=self.trino_conn_id,
|
226
|
+
database=None,
|
227
|
+
)
|
228
|
+
gcs_output_datasets = self._get_openlineage_output_datasets()
|
229
|
+
if sql_parsing_result:
|
230
|
+
sql_parsing_result.outputs = gcs_output_datasets
|
231
|
+
return sql_parsing_result
|
232
|
+
return OperatorLineage(outputs=gcs_output_datasets, job_facets={"sql": SQLJobFacet(self.sql)})
|
@@ -21,10 +21,9 @@ import asyncio
|
|
21
21
|
from collections.abc import AsyncIterator, Sequence
|
22
22
|
from typing import Any
|
23
23
|
|
24
|
-
from google.cloud.bigquery_datatransfer_v1 import TransferRun, TransferState
|
25
|
-
|
26
24
|
from airflow.providers.google.cloud.hooks.bigquery_dts import AsyncBiqQueryDataTransferServiceHook
|
27
25
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
26
|
+
from google.cloud.bigquery_datatransfer_v1 import TransferRun, TransferState
|
28
27
|
|
29
28
|
|
30
29
|
class BigQueryDataTransferRunTrigger(BaseTrigger):
|
@@ -20,10 +20,9 @@ import asyncio
|
|
20
20
|
from collections.abc import AsyncIterator, Sequence
|
21
21
|
from typing import Any
|
22
22
|
|
23
|
-
from google.cloud.batch_v1 import Job, JobStatus
|
24
|
-
|
25
23
|
from airflow.providers.google.cloud.hooks.cloud_batch import CloudBatchAsyncHook
|
26
24
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
25
|
+
from google.cloud.batch_v1 import Job, JobStatus
|
27
26
|
|
28
27
|
DEFAULT_BATCH_LOCATION = "us-central1"
|
29
28
|
|
@@ -20,10 +20,9 @@ import asyncio
|
|
20
20
|
from collections.abc import AsyncIterator, Sequence
|
21
21
|
from typing import Any
|
22
22
|
|
23
|
-
from google.cloud.devtools.cloudbuild_v1.types import Build
|
24
|
-
|
25
23
|
from airflow.providers.google.cloud.hooks.cloud_build import CloudBuildAsyncHook
|
26
24
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
25
|
+
from google.cloud.devtools.cloudbuild_v1.types import Build
|
27
26
|
|
28
27
|
|
29
28
|
class CloudBuildCreateBuildTrigger(BaseTrigger):
|
@@ -25,11 +25,11 @@ from datetime import datetime
|
|
25
25
|
from typing import Any
|
26
26
|
|
27
27
|
from dateutil import parser
|
28
|
-
from google.cloud.orchestration.airflow.service_v1.types import ExecuteAirflowCommandResponse
|
29
28
|
|
30
29
|
from airflow.exceptions import AirflowException
|
31
30
|
from airflow.providers.google.cloud.hooks.cloud_composer import CloudComposerAsyncHook
|
32
31
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
32
|
+
from google.cloud.orchestration.airflow.service_v1.types import ExecuteAirflowCommandResponse
|
33
33
|
|
34
34
|
|
35
35
|
class CloudComposerExecutionTrigger(BaseTrigger):
|
@@ -169,6 +169,7 @@ class CloudComposerDAGRunTrigger(BaseTrigger):
|
|
169
169
|
gcp_conn_id: str = "google_cloud_default",
|
170
170
|
impersonation_chain: str | Sequence[str] | None = None,
|
171
171
|
poll_interval: int = 10,
|
172
|
+
composer_airflow_version: int = 2,
|
172
173
|
):
|
173
174
|
super().__init__()
|
174
175
|
self.project_id = project_id
|
@@ -181,6 +182,7 @@ class CloudComposerDAGRunTrigger(BaseTrigger):
|
|
181
182
|
self.gcp_conn_id = gcp_conn_id
|
182
183
|
self.impersonation_chain = impersonation_chain
|
183
184
|
self.poll_interval = poll_interval
|
185
|
+
self.composer_airflow_version = composer_airflow_version
|
184
186
|
|
185
187
|
self.gcp_hook = CloudComposerAsyncHook(
|
186
188
|
gcp_conn_id=self.gcp_conn_id,
|
@@ -201,18 +203,24 @@ class CloudComposerDAGRunTrigger(BaseTrigger):
|
|
201
203
|
"gcp_conn_id": self.gcp_conn_id,
|
202
204
|
"impersonation_chain": self.impersonation_chain,
|
203
205
|
"poll_interval": self.poll_interval,
|
206
|
+
"composer_airflow_version": self.composer_airflow_version,
|
204
207
|
},
|
205
208
|
)
|
206
209
|
|
207
210
|
async def _pull_dag_runs(self) -> list[dict]:
|
208
211
|
"""Pull the list of dag runs."""
|
212
|
+
cmd_parameters = (
|
213
|
+
["-d", self.composer_dag_id, "-o", "json"]
|
214
|
+
if self.composer_airflow_version < 3
|
215
|
+
else [self.composer_dag_id, "-o", "json"]
|
216
|
+
)
|
209
217
|
dag_runs_cmd = await self.gcp_hook.execute_airflow_command(
|
210
218
|
project_id=self.project_id,
|
211
219
|
region=self.region,
|
212
220
|
environment_id=self.environment_id,
|
213
221
|
command="dags",
|
214
222
|
subcommand="list-runs",
|
215
|
-
parameters=
|
223
|
+
parameters=cmd_parameters,
|
216
224
|
)
|
217
225
|
cmd_result = await self.gcp_hook.wait_command_execution_result(
|
218
226
|
project_id=self.project_id,
|
@@ -232,7 +240,9 @@ class CloudComposerDAGRunTrigger(BaseTrigger):
|
|
232
240
|
for dag_run in dag_runs:
|
233
241
|
if (
|
234
242
|
start_date.timestamp()
|
235
|
-
< parser.parse(
|
243
|
+
< parser.parse(
|
244
|
+
dag_run["execution_date" if self.composer_airflow_version < 3 else "logical_date"]
|
245
|
+
).timestamp()
|
236
246
|
< end_date.timestamp()
|
237
247
|
) and dag_run["state"] not in self.allowed_states:
|
238
248
|
return False
|
@@ -18,18 +18,18 @@
|
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
20
|
import asyncio
|
21
|
-
from collections.abc import AsyncIterator, Iterable
|
21
|
+
from collections.abc import AsyncIterator, Iterable, Sequence
|
22
22
|
from typing import Any
|
23
23
|
|
24
|
-
from google.api_core.exceptions import GoogleAPIError
|
25
|
-
from google.cloud.storage_transfer_v1.types import TransferOperation
|
26
|
-
|
27
24
|
from airflow.exceptions import AirflowException
|
28
25
|
from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
|
29
26
|
CloudDataTransferServiceAsyncHook,
|
27
|
+
GcpTransferOperationStatus,
|
30
28
|
)
|
31
29
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
32
30
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
31
|
+
from google.api_core.exceptions import GoogleAPIError
|
32
|
+
from google.cloud.storage_transfer_v1.types import TransferOperation
|
33
33
|
|
34
34
|
|
35
35
|
class CloudStorageTransferServiceCreateJobsTrigger(BaseTrigger):
|
@@ -132,3 +132,101 @@ class CloudStorageTransferServiceCreateJobsTrigger(BaseTrigger):
|
|
132
132
|
project_id=self.project_id,
|
133
133
|
gcp_conn_id=self.gcp_conn_id,
|
134
134
|
)
|
135
|
+
|
136
|
+
|
137
|
+
class CloudStorageTransferServiceCheckJobStatusTrigger(BaseTrigger):
|
138
|
+
"""
|
139
|
+
CloudStorageTransferServiceCheckJobStatusTrigger run on the trigger worker to check Cloud Storage Transfer job.
|
140
|
+
|
141
|
+
:param job_name: The name of the transfer job
|
142
|
+
:param expected_statuses: The expected state of the operation.
|
143
|
+
See:
|
144
|
+
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferOperations#Status
|
145
|
+
:param project_id: The ID of the project that owns the Transfer Job.
|
146
|
+
:param poke_interval: Polling period in seconds to check for the status
|
147
|
+
:param gcp_conn_id: The connection ID used to connect to Google Cloud.
|
148
|
+
:param impersonation_chain: Optional service account to impersonate using short-term
|
149
|
+
credentials, or chained list of accounts required to get the access_token
|
150
|
+
of the last account in the list, which will be impersonated in the request.
|
151
|
+
If set as a string, the account must grant the originating account
|
152
|
+
the Service Account Token Creator IAM role.
|
153
|
+
If set as a sequence, the identities from the list must grant
|
154
|
+
Service Account Token Creator IAM role to the directly preceding identity, with first
|
155
|
+
account from the list granting this role to the originating account (templated).
|
156
|
+
"""
|
157
|
+
|
158
|
+
def __init__(
|
159
|
+
self,
|
160
|
+
job_name: str,
|
161
|
+
expected_statuses: set[str] | str | None = None,
|
162
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
163
|
+
poke_interval: float = 10.0,
|
164
|
+
gcp_conn_id: str = "google_cloud_default",
|
165
|
+
impersonation_chain: str | Sequence[str] | None = None,
|
166
|
+
):
|
167
|
+
super().__init__()
|
168
|
+
self.job_name = job_name
|
169
|
+
self.expected_statuses = expected_statuses
|
170
|
+
self.project_id = project_id
|
171
|
+
self.poke_interval = poke_interval
|
172
|
+
self.gcp_conn_id = gcp_conn_id
|
173
|
+
self.impersonation_chain = impersonation_chain
|
174
|
+
|
175
|
+
def serialize(self) -> tuple[str, dict[str, Any]]:
|
176
|
+
"""Serialize CloudStorageTransferServiceCheckJobStatusTrigger arguments and classpath."""
|
177
|
+
return (
|
178
|
+
f"{self.__class__.__module__ }.{self.__class__.__qualname__}",
|
179
|
+
{
|
180
|
+
"job_name": self.job_name,
|
181
|
+
"expected_statuses": self.expected_statuses,
|
182
|
+
"project_id": self.project_id,
|
183
|
+
"poke_interval": self.poke_interval,
|
184
|
+
"gcp_conn_id": self.gcp_conn_id,
|
185
|
+
"impersonation_chain": self.impersonation_chain,
|
186
|
+
},
|
187
|
+
)
|
188
|
+
|
189
|
+
def _get_async_hook(self) -> CloudDataTransferServiceAsyncHook:
|
190
|
+
return CloudDataTransferServiceAsyncHook(
|
191
|
+
project_id=self.project_id,
|
192
|
+
gcp_conn_id=self.gcp_conn_id,
|
193
|
+
impersonation_chain=self.impersonation_chain,
|
194
|
+
)
|
195
|
+
|
196
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
197
|
+
"""Check the status of the transfer job and yield a TriggerEvent."""
|
198
|
+
hook = self._get_async_hook()
|
199
|
+
expected_statuses = (
|
200
|
+
{GcpTransferOperationStatus.SUCCESS} if not self.expected_statuses else self.expected_statuses
|
201
|
+
)
|
202
|
+
|
203
|
+
try:
|
204
|
+
while True:
|
205
|
+
operations = await hook.list_transfer_operations(
|
206
|
+
request_filter={
|
207
|
+
"project_id": self.project_id or hook.project_id,
|
208
|
+
"job_names": [self.job_name],
|
209
|
+
}
|
210
|
+
)
|
211
|
+
check = await CloudDataTransferServiceAsyncHook.operations_contain_expected_statuses(
|
212
|
+
operations=operations,
|
213
|
+
expected_statuses=expected_statuses,
|
214
|
+
)
|
215
|
+
if check:
|
216
|
+
yield TriggerEvent(
|
217
|
+
{
|
218
|
+
"status": "success",
|
219
|
+
"message": "Transfer operation completed successfully",
|
220
|
+
"operations": operations,
|
221
|
+
}
|
222
|
+
)
|
223
|
+
return
|
224
|
+
|
225
|
+
self.log.info(
|
226
|
+
"Sleeping for %s seconds.",
|
227
|
+
self.poke_interval,
|
228
|
+
)
|
229
|
+
await asyncio.sleep(self.poke_interval)
|
230
|
+
except Exception as e:
|
231
|
+
self.log.exception("Exception occurred while checking for query completion")
|
232
|
+
yield TriggerEvent({"status": "error", "message": str(e)})
|
@@ -22,6 +22,8 @@ from collections.abc import Sequence
|
|
22
22
|
from functools import cached_property
|
23
23
|
from typing import TYPE_CHECKING, Any
|
24
24
|
|
25
|
+
from airflow.providers.google.cloud.hooks.dataflow import AsyncDataflowHook, DataflowJobStatus
|
26
|
+
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
25
27
|
from google.cloud.dataflow_v1beta3 import JobState
|
26
28
|
from google.cloud.dataflow_v1beta3.types import (
|
27
29
|
AutoscalingEvent,
|
@@ -32,9 +34,6 @@ from google.cloud.dataflow_v1beta3.types import (
|
|
32
34
|
MetricUpdate,
|
33
35
|
)
|
34
36
|
|
35
|
-
from airflow.providers.google.cloud.hooks.dataflow import AsyncDataflowHook, DataflowJobStatus
|
36
|
-
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
37
|
-
|
38
37
|
if TYPE_CHECKING:
|
39
38
|
from google.cloud.dataflow_v1beta3.services.messages_v1_beta3.pagers import ListJobMessagesAsyncPager
|
40
39
|
|
@@ -22,10 +22,9 @@ from __future__ import annotations
|
|
22
22
|
import asyncio
|
23
23
|
from collections.abc import AsyncIterator, Sequence
|
24
24
|
|
25
|
-
from google.cloud.dataplex_v1.types import DataScanJob
|
26
|
-
|
27
25
|
from airflow.providers.google.cloud.hooks.dataplex import DataplexAsyncHook
|
28
26
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
27
|
+
from google.cloud.dataplex_v1.types import DataScanJob
|
29
28
|
|
30
29
|
|
31
30
|
class DataplexDataQualityJobTrigger(BaseTrigger):
|
@@ -25,9 +25,6 @@ import time
|
|
25
25
|
from collections.abc import AsyncIterator, Sequence
|
26
26
|
from typing import TYPE_CHECKING, Any
|
27
27
|
|
28
|
-
from google.api_core.exceptions import NotFound
|
29
|
-
from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
|
30
|
-
|
31
28
|
from airflow.exceptions import AirflowException
|
32
29
|
from airflow.models.taskinstance import TaskInstance
|
33
30
|
from airflow.providers.google.cloud.hooks.dataproc import DataprocAsyncHook, DataprocHook
|
@@ -36,6 +33,8 @@ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
36
33
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
37
34
|
from airflow.utils.session import provide_session
|
38
35
|
from airflow.utils.state import TaskInstanceState
|
36
|
+
from google.api_core.exceptions import NotFound
|
37
|
+
from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
|
39
38
|
|
40
39
|
if TYPE_CHECKING:
|
41
40
|
from sqlalchemy.orm.session import Session
|
@@ -23,7 +23,6 @@ from collections.abc import AsyncIterator, Sequence
|
|
23
23
|
from functools import cached_property
|
24
24
|
from typing import TYPE_CHECKING, Any
|
25
25
|
|
26
|
-
from google.cloud.container_v1.types import Operation
|
27
26
|
from packaging.version import parse as parse_version
|
28
27
|
|
29
28
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
@@ -37,6 +36,7 @@ from airflow.providers.google.cloud.hooks.kubernetes_engine import (
|
|
37
36
|
)
|
38
37
|
from airflow.providers_manager import ProvidersManager
|
39
38
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
39
|
+
from google.cloud.container_v1.types import Operation
|
40
40
|
|
41
41
|
if TYPE_CHECKING:
|
42
42
|
from datetime import datetime
|
@@ -23,10 +23,9 @@ from collections.abc import AsyncIterator, Sequence
|
|
23
23
|
from functools import cached_property
|
24
24
|
from typing import Any
|
25
25
|
|
26
|
-
from google.cloud.pubsub_v1.types import ReceivedMessage
|
27
|
-
|
28
26
|
from airflow.providers.google.cloud.hooks.pubsub import PubSubAsyncHook
|
29
27
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
28
|
+
from google.cloud.pubsub_v1.types import ReceivedMessage
|
30
29
|
|
31
30
|
|
32
31
|
class PubsubPullTrigger(BaseTrigger):
|
@@ -20,14 +20,6 @@ from collections.abc import AsyncIterator, Sequence
|
|
20
20
|
from functools import cached_property
|
21
21
|
from typing import TYPE_CHECKING, Any
|
22
22
|
|
23
|
-
from google.cloud.aiplatform_v1 import (
|
24
|
-
BatchPredictionJob,
|
25
|
-
HyperparameterTuningJob,
|
26
|
-
JobState,
|
27
|
-
PipelineState,
|
28
|
-
types,
|
29
|
-
)
|
30
|
-
|
31
23
|
from airflow.exceptions import AirflowException
|
32
24
|
from airflow.providers.google.cloud.hooks.vertex_ai.batch_prediction_job import BatchPredictionJobAsyncHook
|
33
25
|
from airflow.providers.google.cloud.hooks.vertex_ai.custom_job import CustomJobAsyncHook
|
@@ -36,6 +28,13 @@ from airflow.providers.google.cloud.hooks.vertex_ai.hyperparameter_tuning_job im
|
|
36
28
|
)
|
37
29
|
from airflow.providers.google.cloud.hooks.vertex_ai.pipeline_job import PipelineJobAsyncHook
|
38
30
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
31
|
+
from google.cloud.aiplatform_v1 import (
|
32
|
+
BatchPredictionJob,
|
33
|
+
HyperparameterTuningJob,
|
34
|
+
JobState,
|
35
|
+
PipelineState,
|
36
|
+
types,
|
37
|
+
)
|
39
38
|
|
40
39
|
if TYPE_CHECKING:
|
41
40
|
from proto import Message
|
@@ -29,10 +29,6 @@ from urllib.parse import urlencode
|
|
29
29
|
|
30
30
|
import google.auth
|
31
31
|
import google.oauth2.service_account
|
32
|
-
from google.auth import impersonated_credentials # type: ignore[attr-defined]
|
33
|
-
from google.auth.credentials import AnonymousCredentials, Credentials
|
34
|
-
from google.auth.environment_vars import CREDENTIALS, LEGACY_PROJECT, PROJECT
|
35
|
-
|
36
32
|
from airflow.exceptions import AirflowException
|
37
33
|
from airflow.providers.google.cloud._internal_client.secret_manager_client import _SecretManagerClient
|
38
34
|
from airflow.providers.google.cloud.utils.external_token_supplier import (
|
@@ -40,6 +36,9 @@ from airflow.providers.google.cloud.utils.external_token_supplier import (
|
|
40
36
|
)
|
41
37
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
42
38
|
from airflow.utils.process_utils import patch_environ
|
39
|
+
from google.auth import impersonated_credentials # type: ignore[attr-defined]
|
40
|
+
from google.auth.credentials import AnonymousCredentials, Credentials
|
41
|
+
from google.auth.environment_vars import CREDENTIALS, LEGACY_PROJECT, PROJECT
|
43
42
|
|
44
43
|
log = logging.getLogger(__name__)
|
45
44
|
|
@@ -219,11 +218,19 @@ class _CredentialProvider(LoggingMixin):
|
|
219
218
|
idp_extra_params_dict: dict[str, str] | None = None,
|
220
219
|
) -> None:
|
221
220
|
super().__init__()
|
222
|
-
|
223
|
-
|
221
|
+
key_options_map = {
|
222
|
+
"key_path": key_path,
|
223
|
+
"keyfile_dict": keyfile_dict,
|
224
|
+
"credential_config_file": credential_config_file,
|
225
|
+
"key_secret_name": key_secret_name,
|
226
|
+
"is_anonymous": is_anonymous,
|
227
|
+
}
|
228
|
+
key_options_label_provided = [label for label, credential in key_options_map.items() if credential]
|
229
|
+
if len(key_options_label_provided) > 1:
|
224
230
|
raise AirflowException(
|
225
|
-
"The `keyfile_dict`, `key_path`, `credential_config_file`, `is_anonymous` and"
|
226
|
-
" `key_secret_name` fields are all mutually exclusive.
|
231
|
+
f"The `keyfile_dict`, `key_path`, `credential_config_file`, `is_anonymous` and"
|
232
|
+
f" `key_secret_name` fields are all mutually exclusive. "
|
233
|
+
f"Received options: {key_options_label_provided}. Please provide only one value."
|
227
234
|
)
|
228
235
|
self.key_path = key_path
|
229
236
|
self.keyfile_dict = keyfile_dict
|
@@ -23,17 +23,17 @@ import logging
|
|
23
23
|
from functools import wraps
|
24
24
|
from typing import Callable, TypeVar, cast
|
25
25
|
|
26
|
+
from flask import Response, current_app, request as flask_request # type: ignore
|
27
|
+
|
26
28
|
import google
|
27
29
|
import google.auth.transport.requests
|
28
30
|
import google.oauth2.id_token
|
29
|
-
from
|
31
|
+
from airflow.configuration import conf
|
32
|
+
from airflow.providers.google.common.utils.id_token_credentials import get_default_id_token_credentials
|
30
33
|
from google.auth import exceptions
|
31
34
|
from google.auth.transport.requests import AuthorizedSession
|
32
35
|
from google.oauth2 import service_account
|
33
36
|
|
34
|
-
from airflow.configuration import conf
|
35
|
-
from airflow.providers.google.common.utils.id_token_credentials import get_default_id_token_credentials
|
36
|
-
|
37
37
|
log = logging.getLogger(__name__)
|
38
38
|
|
39
39
|
_GOOGLE_ISSUERS = ("accounts.google.com", "https://accounts.google.com")
|
@@ -16,9 +16,8 @@
|
|
16
16
|
# under the License.
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
|
-
from google.api_core.gapic_v1.client_info import ClientInfo
|
20
|
-
|
21
19
|
from airflow import version
|
20
|
+
from google.api_core.gapic_v1.client_info import ClientInfo
|
22
21
|
|
23
22
|
GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME = "execute_complete"
|
24
23
|
|