apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/ads/hooks/ads.py +39 -5
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/bigquery.py +166 -281
- airflow/providers/google/cloud/hooks/cloud_composer.py +287 -14
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_run.py +17 -9
- airflow/providers/google/cloud/hooks/cloud_sql.py +101 -22
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +27 -6
- airflow/providers/google/cloud/hooks/compute_ssh.py +5 -1
- airflow/providers/google/cloud/hooks/datacatalog.py +9 -1
- airflow/providers/google/cloud/hooks/dataflow.py +71 -94
- airflow/providers/google/cloud/hooks/datafusion.py +1 -1
- airflow/providers/google/cloud/hooks/dataplex.py +1 -1
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +72 -71
- airflow/providers/google/cloud/hooks/gcs.py +111 -14
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/looker.py +6 -1
- airflow/providers/google/cloud/hooks/mlengine.py +3 -2
- airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
- airflow/providers/google/cloud/hooks/spanner.py +73 -8
- airflow/providers/google/cloud/hooks/stackdriver.py +10 -8
- airflow/providers/google/cloud/hooks/translate.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -209
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +2 -2
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +27 -1
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +307 -7
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/vision.py +2 -2
- airflow/providers/google/cloud/hooks/workflows.py +1 -1
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -13
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -96
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -95
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
- airflow/providers/google/cloud/links/managed_kafka.py +0 -70
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +17 -9
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +10 -8
- airflow/providers/google/cloud/openlineage/utils.py +15 -1
- airflow/providers/google/cloud/operators/alloy_db.py +70 -55
- airflow/providers/google/cloud/operators/bigquery.py +73 -636
- airflow/providers/google/cloud/operators/bigquery_dts.py +3 -5
- airflow/providers/google/cloud/operators/bigtable.py +36 -7
- airflow/providers/google/cloud/operators/cloud_base.py +21 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +2 -2
- airflow/providers/google/cloud/operators/cloud_build.py +75 -32
- airflow/providers/google/cloud/operators/cloud_composer.py +128 -40
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
- airflow/providers/google/cloud/operators/cloud_run.py +23 -5
- airflow/providers/google/cloud/operators/cloud_sql.py +8 -16
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -11
- airflow/providers/google/cloud/operators/compute.py +8 -40
- airflow/providers/google/cloud/operators/datacatalog.py +157 -21
- airflow/providers/google/cloud/operators/dataflow.py +38 -15
- airflow/providers/google/cloud/operators/dataform.py +15 -5
- airflow/providers/google/cloud/operators/datafusion.py +41 -20
- airflow/providers/google/cloud/operators/dataplex.py +193 -109
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +78 -35
- airflow/providers/google/cloud/operators/dataproc_metastore.py +96 -88
- airflow/providers/google/cloud/operators/datastore.py +22 -6
- airflow/providers/google/cloud/operators/dlp.py +6 -29
- airflow/providers/google/cloud/operators/functions.py +16 -7
- airflow/providers/google/cloud/operators/gcs.py +10 -8
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +60 -99
- airflow/providers/google/cloud/operators/looker.py +1 -1
- airflow/providers/google/cloud/operators/managed_kafka.py +107 -52
- airflow/providers/google/cloud/operators/natural_language.py +1 -1
- airflow/providers/google/cloud/operators/pubsub.py +60 -14
- airflow/providers/google/cloud/operators/spanner.py +25 -12
- airflow/providers/google/cloud/operators/speech_to_text.py +1 -2
- airflow/providers/google/cloud/operators/stackdriver.py +1 -9
- airflow/providers/google/cloud/operators/tasks.py +1 -12
- airflow/providers/google/cloud/operators/text_to_speech.py +1 -2
- airflow/providers/google/cloud/operators/translate.py +40 -16
- airflow/providers/google/cloud/operators/translate_speech.py +1 -2
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +29 -9
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +54 -26
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +11 -9
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +30 -7
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
- airflow/providers/google/cloud/operators/vision.py +2 -2
- airflow/providers/google/cloud/operators/workflows.py +18 -15
- airflow/providers/google/cloud/sensors/bigquery.py +2 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -2
- airflow/providers/google/cloud/sensors/bigtable.py +11 -4
- airflow/providers/google/cloud/sensors/cloud_composer.py +533 -29
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -2
- airflow/providers/google/cloud/sensors/dataflow.py +26 -9
- airflow/providers/google/cloud/sensors/dataform.py +2 -2
- airflow/providers/google/cloud/sensors/datafusion.py +4 -4
- airflow/providers/google/cloud/sensors/dataplex.py +2 -2
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -2
- airflow/providers/google/cloud/sensors/gcs.py +4 -4
- airflow/providers/google/cloud/sensors/looker.py +2 -2
- airflow/providers/google/cloud/sensors/pubsub.py +4 -4
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
- airflow/providers/google/cloud/sensors/workflows.py +2 -2
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +20 -12
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +13 -4
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +75 -34
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +302 -46
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +91 -1
- airflow/providers/google/cloud/triggers/dataflow.py +122 -0
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +14 -2
- airflow/providers/google/cloud/triggers/dataproc.py +122 -52
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +45 -27
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +15 -19
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +1 -2
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +27 -8
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +3 -4
- airflow/providers/google/firebase/operators/firestore.py +2 -2
- airflow/providers/google/get_provider_info.py +56 -52
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +26 -1
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +1 -2
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +3 -63
- airflow/providers/google/suite/hooks/calendar.py +1 -1
- airflow/providers/google/suite/hooks/sheets.py +15 -1
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +92 -48
- apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
- apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/hooks/automl.py +0 -673
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1362
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -112
- apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
|
@@ -25,8 +25,9 @@ This module contains Google PubSub operators.
|
|
|
25
25
|
|
|
26
26
|
from __future__ import annotations
|
|
27
27
|
|
|
28
|
-
from collections.abc import Sequence
|
|
29
|
-
from
|
|
28
|
+
from collections.abc import Callable, Sequence
|
|
29
|
+
from functools import cached_property
|
|
30
|
+
from typing import TYPE_CHECKING, Any
|
|
30
31
|
|
|
31
32
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
32
33
|
from google.cloud.pubsub_v1.types import (
|
|
@@ -40,6 +41,7 @@ from google.cloud.pubsub_v1.types import (
|
|
|
40
41
|
SchemaSettings,
|
|
41
42
|
)
|
|
42
43
|
|
|
44
|
+
from airflow.configuration import conf
|
|
43
45
|
from airflow.exceptions import AirflowException
|
|
44
46
|
from airflow.providers.google.cloud.hooks.pubsub import PubSubHook
|
|
45
47
|
from airflow.providers.google.cloud.links.pubsub import PubSubSubscriptionLink, PubSubTopicLink
|
|
@@ -51,7 +53,8 @@ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
|
51
53
|
if TYPE_CHECKING:
|
|
52
54
|
from google.api_core.retry import Retry
|
|
53
55
|
|
|
54
|
-
from airflow.
|
|
56
|
+
from airflow.providers.common.compat.sdk import Context
|
|
57
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
55
58
|
|
|
56
59
|
|
|
57
60
|
class PubSubCreateTopicOperator(GoogleCloudBaseOperator):
|
|
@@ -182,7 +185,6 @@ class PubSubCreateTopicOperator(GoogleCloudBaseOperator):
|
|
|
182
185
|
self.log.info("Created topic %s", self.topic)
|
|
183
186
|
PubSubTopicLink.persist(
|
|
184
187
|
context=context,
|
|
185
|
-
task_instance=self,
|
|
186
188
|
topic_id=self.topic,
|
|
187
189
|
project_id=self.project_id or hook.project_id,
|
|
188
190
|
)
|
|
@@ -359,15 +361,18 @@ class PubSubCreateSubscriptionOperator(GoogleCloudBaseOperator):
|
|
|
359
361
|
self.timeout = timeout
|
|
360
362
|
self.metadata = metadata
|
|
361
363
|
self.impersonation_chain = impersonation_chain
|
|
364
|
+
self._resolved_subscription_name: str | None = None
|
|
362
365
|
|
|
363
|
-
|
|
364
|
-
|
|
366
|
+
@cached_property
|
|
367
|
+
def pubsub_hook(self):
|
|
368
|
+
return PubSubHook(
|
|
365
369
|
gcp_conn_id=self.gcp_conn_id,
|
|
366
370
|
impersonation_chain=self.impersonation_chain,
|
|
367
371
|
)
|
|
368
372
|
|
|
373
|
+
def execute(self, context: Context) -> str:
|
|
369
374
|
self.log.info("Creating subscription for topic %s", self.topic)
|
|
370
|
-
result =
|
|
375
|
+
result = self.pubsub_hook.create_subscription(
|
|
371
376
|
project_id=self.project_id,
|
|
372
377
|
topic=self.topic,
|
|
373
378
|
subscription=self.subscription,
|
|
@@ -389,14 +394,34 @@ class PubSubCreateSubscriptionOperator(GoogleCloudBaseOperator):
|
|
|
389
394
|
)
|
|
390
395
|
|
|
391
396
|
self.log.info("Created subscription for topic %s", self.topic)
|
|
397
|
+
|
|
398
|
+
# Store resolved subscription for Open Lineage
|
|
399
|
+
self._resolved_subscription_name = self.subscription or result
|
|
400
|
+
|
|
392
401
|
PubSubSubscriptionLink.persist(
|
|
393
402
|
context=context,
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
project_id=self.project_id or hook.project_id,
|
|
403
|
+
subscription_id=self._resolved_subscription_name, # result returns subscription name
|
|
404
|
+
project_id=self.project_id or self.pubsub_hook.project_id,
|
|
397
405
|
)
|
|
398
406
|
return result
|
|
399
407
|
|
|
408
|
+
def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
|
|
409
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
|
410
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
411
|
+
|
|
412
|
+
topic_project_id = self.project_id or self.pubsub_hook.project_id
|
|
413
|
+
subscription_project_id = self.subscription_project_id or topic_project_id
|
|
414
|
+
|
|
415
|
+
return OperatorLineage(
|
|
416
|
+
inputs=[Dataset(namespace="pubsub", name=f"topic:{topic_project_id}:{self.topic}")],
|
|
417
|
+
outputs=[
|
|
418
|
+
Dataset(
|
|
419
|
+
namespace="pubsub",
|
|
420
|
+
name=f"subscription:{subscription_project_id}:{self._resolved_subscription_name}",
|
|
421
|
+
)
|
|
422
|
+
],
|
|
423
|
+
)
|
|
424
|
+
|
|
400
425
|
|
|
401
426
|
class PubSubDeleteTopicOperator(GoogleCloudBaseOperator):
|
|
402
427
|
"""
|
|
@@ -693,17 +718,28 @@ class PubSubPublishMessageOperator(GoogleCloudBaseOperator):
|
|
|
693
718
|
self.enable_message_ordering = enable_message_ordering
|
|
694
719
|
self.impersonation_chain = impersonation_chain
|
|
695
720
|
|
|
696
|
-
|
|
697
|
-
|
|
721
|
+
@cached_property
|
|
722
|
+
def pubsub_hook(self):
|
|
723
|
+
return PubSubHook(
|
|
698
724
|
gcp_conn_id=self.gcp_conn_id,
|
|
699
725
|
impersonation_chain=self.impersonation_chain,
|
|
700
726
|
enable_message_ordering=self.enable_message_ordering,
|
|
701
727
|
)
|
|
702
728
|
|
|
729
|
+
def execute(self, context: Context) -> None:
|
|
703
730
|
self.log.info("Publishing to topic %s", self.topic)
|
|
704
|
-
|
|
731
|
+
self.pubsub_hook.publish(project_id=self.project_id, topic=self.topic, messages=self.messages)
|
|
705
732
|
self.log.info("Published to topic %s", self.topic)
|
|
706
733
|
|
|
734
|
+
def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
|
|
735
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
|
736
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
737
|
+
|
|
738
|
+
project_id = self.project_id or self.pubsub_hook.project_id
|
|
739
|
+
output_dataset = [Dataset(namespace="pubsub", name=f"topic:{project_id}:{self.topic}")]
|
|
740
|
+
|
|
741
|
+
return OperatorLineage(outputs=output_dataset)
|
|
742
|
+
|
|
707
743
|
|
|
708
744
|
class PubSubPullOperator(GoogleCloudBaseOperator):
|
|
709
745
|
"""
|
|
@@ -770,7 +806,7 @@ class PubSubPullOperator(GoogleCloudBaseOperator):
|
|
|
770
806
|
messages_callback: Callable[[list[ReceivedMessage], Context], Any] | None = None,
|
|
771
807
|
gcp_conn_id: str = "google_cloud_default",
|
|
772
808
|
impersonation_chain: str | Sequence[str] | None = None,
|
|
773
|
-
deferrable: bool = False,
|
|
809
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
|
774
810
|
poll_interval: int = 300,
|
|
775
811
|
**kwargs,
|
|
776
812
|
) -> None:
|
|
@@ -854,3 +890,13 @@ class PubSubPullOperator(GoogleCloudBaseOperator):
|
|
|
854
890
|
messages_json = [ReceivedMessage.to_dict(m) for m in pulled_messages]
|
|
855
891
|
|
|
856
892
|
return messages_json
|
|
893
|
+
|
|
894
|
+
def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
|
|
895
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
|
896
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
897
|
+
|
|
898
|
+
output_dataset = [
|
|
899
|
+
Dataset(namespace="pubsub", name=f"subscription:{self.project_id}:{self.subscription}")
|
|
900
|
+
]
|
|
901
|
+
|
|
902
|
+
return OperatorLineage(outputs=output_dataset)
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
22
|
from collections.abc import Sequence
|
|
23
|
+
from functools import cached_property
|
|
23
24
|
from typing import TYPE_CHECKING
|
|
24
25
|
|
|
25
26
|
from airflow.exceptions import AirflowException
|
|
@@ -29,7 +30,8 @@ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseO
|
|
|
29
30
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
30
31
|
|
|
31
32
|
if TYPE_CHECKING:
|
|
32
|
-
from airflow.
|
|
33
|
+
from airflow.providers.common.compat.sdk import Context
|
|
34
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
33
35
|
|
|
34
36
|
|
|
35
37
|
class SpannerDeployInstanceOperator(GoogleCloudBaseOperator):
|
|
@@ -122,7 +124,6 @@ class SpannerDeployInstanceOperator(GoogleCloudBaseOperator):
|
|
|
122
124
|
)
|
|
123
125
|
SpannerInstanceLink.persist(
|
|
124
126
|
context=context,
|
|
125
|
-
task_instance=self,
|
|
126
127
|
instance_id=self.instance_id,
|
|
127
128
|
project_id=self.project_id or hook.project_id,
|
|
128
129
|
)
|
|
@@ -255,6 +256,13 @@ class SpannerQueryDatabaseInstanceOperator(GoogleCloudBaseOperator):
|
|
|
255
256
|
self.impersonation_chain = impersonation_chain
|
|
256
257
|
super().__init__(**kwargs)
|
|
257
258
|
|
|
259
|
+
@cached_property
|
|
260
|
+
def hook(self) -> SpannerHook:
|
|
261
|
+
return SpannerHook(
|
|
262
|
+
gcp_conn_id=self.gcp_conn_id,
|
|
263
|
+
impersonation_chain=self.impersonation_chain,
|
|
264
|
+
)
|
|
265
|
+
|
|
258
266
|
def _validate_inputs(self) -> None:
|
|
259
267
|
if self.project_id == "":
|
|
260
268
|
raise AirflowException("The required parameter 'project_id' is empty")
|
|
@@ -266,10 +274,6 @@ class SpannerQueryDatabaseInstanceOperator(GoogleCloudBaseOperator):
|
|
|
266
274
|
raise AirflowException("The required parameter 'query' is empty")
|
|
267
275
|
|
|
268
276
|
def execute(self, context: Context):
|
|
269
|
-
hook = SpannerHook(
|
|
270
|
-
gcp_conn_id=self.gcp_conn_id,
|
|
271
|
-
impersonation_chain=self.impersonation_chain,
|
|
272
|
-
)
|
|
273
277
|
if isinstance(self.query, str):
|
|
274
278
|
queries = [x.strip() for x in self.query.split(";")]
|
|
275
279
|
self.sanitize_queries(queries)
|
|
@@ -281,8 +285,8 @@ class SpannerQueryDatabaseInstanceOperator(GoogleCloudBaseOperator):
|
|
|
281
285
|
self.instance_id,
|
|
282
286
|
self.database_id,
|
|
283
287
|
)
|
|
284
|
-
self.log.info(queries)
|
|
285
|
-
hook.execute_dml(
|
|
288
|
+
self.log.info("Executing queries: %s", queries)
|
|
289
|
+
result_rows_count_per_query = self.hook.execute_dml(
|
|
286
290
|
project_id=self.project_id,
|
|
287
291
|
instance_id=self.instance_id,
|
|
288
292
|
database_id=self.database_id,
|
|
@@ -290,11 +294,11 @@ class SpannerQueryDatabaseInstanceOperator(GoogleCloudBaseOperator):
|
|
|
290
294
|
)
|
|
291
295
|
SpannerDatabaseLink.persist(
|
|
292
296
|
context=context,
|
|
293
|
-
task_instance=self,
|
|
294
297
|
instance_id=self.instance_id,
|
|
295
298
|
database_id=self.database_id,
|
|
296
|
-
project_id=self.project_id or hook.project_id,
|
|
299
|
+
project_id=self.project_id or self.hook.project_id,
|
|
297
300
|
)
|
|
301
|
+
return result_rows_count_per_query
|
|
298
302
|
|
|
299
303
|
@staticmethod
|
|
300
304
|
def sanitize_queries(queries: list[str]) -> None:
|
|
@@ -306,6 +310,17 @@ class SpannerQueryDatabaseInstanceOperator(GoogleCloudBaseOperator):
|
|
|
306
310
|
if queries and queries[-1] == "":
|
|
307
311
|
queries.pop()
|
|
308
312
|
|
|
313
|
+
def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
314
|
+
"""Build a generic OpenLineage facet, aligned with SQL-based operators."""
|
|
315
|
+
from airflow.providers.common.compat.openlineage.utils.sql import get_openlineage_facets_with_sql
|
|
316
|
+
|
|
317
|
+
return get_openlineage_facets_with_sql(
|
|
318
|
+
hook=self.hook,
|
|
319
|
+
sql=self.query,
|
|
320
|
+
conn_id=self.gcp_conn_id,
|
|
321
|
+
database=self.database_id,
|
|
322
|
+
)
|
|
323
|
+
|
|
309
324
|
|
|
310
325
|
class SpannerDeployDatabaseInstanceOperator(GoogleCloudBaseOperator):
|
|
311
326
|
"""
|
|
@@ -380,7 +395,6 @@ class SpannerDeployDatabaseInstanceOperator(GoogleCloudBaseOperator):
|
|
|
380
395
|
)
|
|
381
396
|
SpannerDatabaseLink.persist(
|
|
382
397
|
context=context,
|
|
383
|
-
task_instance=self,
|
|
384
398
|
instance_id=self.instance_id,
|
|
385
399
|
database_id=self.database_id,
|
|
386
400
|
project_id=self.project_id or hook.project_id,
|
|
@@ -496,7 +510,6 @@ class SpannerUpdateDatabaseInstanceOperator(GoogleCloudBaseOperator):
|
|
|
496
510
|
)
|
|
497
511
|
SpannerDatabaseLink.persist(
|
|
498
512
|
context=context,
|
|
499
|
-
task_instance=self,
|
|
500
513
|
instance_id=self.instance_id,
|
|
501
514
|
database_id=self.database_id,
|
|
502
515
|
project_id=self.project_id or hook.project_id,
|
|
@@ -35,7 +35,7 @@ if TYPE_CHECKING:
|
|
|
35
35
|
from google.api_core.retry import Retry
|
|
36
36
|
from google.cloud.speech_v1.types import RecognitionConfig
|
|
37
37
|
|
|
38
|
-
from airflow.
|
|
38
|
+
from airflow.providers.common.compat.sdk import Context
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class CloudSpeechToTextRecognizeSpeechOperator(GoogleCloudBaseOperator):
|
|
@@ -117,7 +117,6 @@ class CloudSpeechToTextRecognizeSpeechOperator(GoogleCloudBaseOperator):
|
|
|
117
117
|
if self.audio.uri:
|
|
118
118
|
FileDetailsLink.persist(
|
|
119
119
|
context=context,
|
|
120
|
-
task_instance=self,
|
|
121
120
|
# Slice from: "gs://{BUCKET_NAME}/{FILE_NAME}" to: "{BUCKET_NAME}/{FILE_NAME}"
|
|
122
121
|
uri=self.audio.uri[5:],
|
|
123
122
|
project_id=self.project_id or hook.project_id,
|
|
@@ -34,7 +34,7 @@ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
|
34
34
|
if TYPE_CHECKING:
|
|
35
35
|
from google.api_core.retry import Retry
|
|
36
36
|
|
|
37
|
-
from airflow.
|
|
37
|
+
from airflow.providers.common.compat.sdk import Context
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
class StackdriverListAlertPoliciesOperator(GoogleCloudBaseOperator):
|
|
@@ -145,7 +145,6 @@ class StackdriverListAlertPoliciesOperator(GoogleCloudBaseOperator):
|
|
|
145
145
|
)
|
|
146
146
|
StackdriverPoliciesLink.persist(
|
|
147
147
|
context=context,
|
|
148
|
-
operator_instance=self,
|
|
149
148
|
project_id=self.project_id or self.hook.project_id,
|
|
150
149
|
)
|
|
151
150
|
return [AlertPolicy.to_dict(policy) for policy in result]
|
|
@@ -228,7 +227,6 @@ class StackdriverEnableAlertPoliciesOperator(GoogleCloudBaseOperator):
|
|
|
228
227
|
)
|
|
229
228
|
StackdriverPoliciesLink.persist(
|
|
230
229
|
context=context,
|
|
231
|
-
operator_instance=self,
|
|
232
230
|
project_id=self.project_id or self.hook.project_id,
|
|
233
231
|
)
|
|
234
232
|
|
|
@@ -311,7 +309,6 @@ class StackdriverDisableAlertPoliciesOperator(GoogleCloudBaseOperator):
|
|
|
311
309
|
)
|
|
312
310
|
StackdriverPoliciesLink.persist(
|
|
313
311
|
context=context,
|
|
314
|
-
operator_instance=self,
|
|
315
312
|
project_id=self.project_id or self.hook.project_id,
|
|
316
313
|
)
|
|
317
314
|
|
|
@@ -394,7 +391,6 @@ class StackdriverUpsertAlertOperator(GoogleCloudBaseOperator):
|
|
|
394
391
|
)
|
|
395
392
|
StackdriverPoliciesLink.persist(
|
|
396
393
|
context=context,
|
|
397
|
-
operator_instance=self,
|
|
398
394
|
project_id=self.project_id or self.hook.project_id,
|
|
399
395
|
)
|
|
400
396
|
|
|
@@ -580,7 +576,6 @@ class StackdriverListNotificationChannelsOperator(GoogleCloudBaseOperator):
|
|
|
580
576
|
)
|
|
581
577
|
StackdriverNotificationsLink.persist(
|
|
582
578
|
context=context,
|
|
583
|
-
operator_instance=self,
|
|
584
579
|
project_id=self.project_id or self.hook.project_id,
|
|
585
580
|
)
|
|
586
581
|
return [NotificationChannel.to_dict(channel) for channel in channels]
|
|
@@ -666,7 +661,6 @@ class StackdriverEnableNotificationChannelsOperator(GoogleCloudBaseOperator):
|
|
|
666
661
|
)
|
|
667
662
|
StackdriverNotificationsLink.persist(
|
|
668
663
|
context=context,
|
|
669
|
-
operator_instance=self,
|
|
670
664
|
project_id=self.project_id or self.hook.project_id,
|
|
671
665
|
)
|
|
672
666
|
|
|
@@ -751,7 +745,6 @@ class StackdriverDisableNotificationChannelsOperator(GoogleCloudBaseOperator):
|
|
|
751
745
|
)
|
|
752
746
|
StackdriverNotificationsLink.persist(
|
|
753
747
|
context=context,
|
|
754
|
-
operator_instance=self,
|
|
755
748
|
project_id=self.project_id or self.hook.project_id,
|
|
756
749
|
)
|
|
757
750
|
|
|
@@ -838,7 +831,6 @@ class StackdriverUpsertNotificationChannelOperator(GoogleCloudBaseOperator):
|
|
|
838
831
|
)
|
|
839
832
|
StackdriverNotificationsLink.persist(
|
|
840
833
|
context=context,
|
|
841
|
-
operator_instance=self,
|
|
842
834
|
project_id=self.project_id or self.hook.project_id,
|
|
843
835
|
)
|
|
844
836
|
|
|
@@ -35,7 +35,7 @@ if TYPE_CHECKING:
|
|
|
35
35
|
from google.api_core.retry import Retry
|
|
36
36
|
from google.protobuf.field_mask_pb2 import FieldMask
|
|
37
37
|
|
|
38
|
-
from airflow.
|
|
38
|
+
from airflow.providers.common.compat.sdk import Context
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
MetaData = Sequence[tuple[str, str]]
|
|
@@ -137,7 +137,6 @@ class CloudTasksQueueCreateOperator(GoogleCloudBaseOperator):
|
|
|
137
137
|
metadata=self.metadata,
|
|
138
138
|
)
|
|
139
139
|
CloudTasksQueueLink.persist(
|
|
140
|
-
operator_instance=self,
|
|
141
140
|
context=context,
|
|
142
141
|
queue_name=queue.name,
|
|
143
142
|
)
|
|
@@ -236,7 +235,6 @@ class CloudTasksQueueUpdateOperator(GoogleCloudBaseOperator):
|
|
|
236
235
|
metadata=self.metadata,
|
|
237
236
|
)
|
|
238
237
|
CloudTasksQueueLink.persist(
|
|
239
|
-
operator_instance=self,
|
|
240
238
|
context=context,
|
|
241
239
|
queue_name=queue.name,
|
|
242
240
|
)
|
|
@@ -319,7 +317,6 @@ class CloudTasksQueueGetOperator(GoogleCloudBaseOperator):
|
|
|
319
317
|
metadata=self.metadata,
|
|
320
318
|
)
|
|
321
319
|
CloudTasksQueueLink.persist(
|
|
322
|
-
operator_instance=self,
|
|
323
320
|
context=context,
|
|
324
321
|
queue_name=queue.name,
|
|
325
322
|
)
|
|
@@ -406,7 +403,6 @@ class CloudTasksQueuesListOperator(GoogleCloudBaseOperator):
|
|
|
406
403
|
metadata=self.metadata,
|
|
407
404
|
)
|
|
408
405
|
CloudTasksLink.persist(
|
|
409
|
-
operator_instance=self,
|
|
410
406
|
context=context,
|
|
411
407
|
project_id=self.project_id or hook.project_id,
|
|
412
408
|
)
|
|
@@ -564,7 +560,6 @@ class CloudTasksQueuePurgeOperator(GoogleCloudBaseOperator):
|
|
|
564
560
|
metadata=self.metadata,
|
|
565
561
|
)
|
|
566
562
|
CloudTasksQueueLink.persist(
|
|
567
|
-
operator_instance=self,
|
|
568
563
|
context=context,
|
|
569
564
|
queue_name=queue.name,
|
|
570
565
|
)
|
|
@@ -647,7 +642,6 @@ class CloudTasksQueuePauseOperator(GoogleCloudBaseOperator):
|
|
|
647
642
|
metadata=self.metadata,
|
|
648
643
|
)
|
|
649
644
|
CloudTasksQueueLink.persist(
|
|
650
|
-
operator_instance=self,
|
|
651
645
|
context=context,
|
|
652
646
|
queue_name=queue.name,
|
|
653
647
|
)
|
|
@@ -730,7 +724,6 @@ class CloudTasksQueueResumeOperator(GoogleCloudBaseOperator):
|
|
|
730
724
|
metadata=self.metadata,
|
|
731
725
|
)
|
|
732
726
|
CloudTasksQueueLink.persist(
|
|
733
|
-
operator_instance=self,
|
|
734
727
|
context=context,
|
|
735
728
|
queue_name=queue.name,
|
|
736
729
|
)
|
|
@@ -830,7 +823,6 @@ class CloudTasksTaskCreateOperator(GoogleCloudBaseOperator):
|
|
|
830
823
|
metadata=self.metadata,
|
|
831
824
|
)
|
|
832
825
|
CloudTasksQueueLink.persist(
|
|
833
|
-
operator_instance=self,
|
|
834
826
|
context=context,
|
|
835
827
|
queue_name=task.name,
|
|
836
828
|
)
|
|
@@ -923,7 +915,6 @@ class CloudTasksTaskGetOperator(GoogleCloudBaseOperator):
|
|
|
923
915
|
metadata=self.metadata,
|
|
924
916
|
)
|
|
925
917
|
CloudTasksQueueLink.persist(
|
|
926
|
-
operator_instance=self,
|
|
927
918
|
context=context,
|
|
928
919
|
queue_name=task.name,
|
|
929
920
|
)
|
|
@@ -1016,7 +1007,6 @@ class CloudTasksTasksListOperator(GoogleCloudBaseOperator):
|
|
|
1016
1007
|
metadata=self.metadata,
|
|
1017
1008
|
)
|
|
1018
1009
|
CloudTasksQueueLink.persist(
|
|
1019
|
-
operator_instance=self,
|
|
1020
1010
|
context=context,
|
|
1021
1011
|
queue_name=f"projects/{self.project_id or hook.project_id}/"
|
|
1022
1012
|
f"locations/{self.location}/queues/{self.queue_name}",
|
|
@@ -1190,7 +1180,6 @@ class CloudTasksTaskRunOperator(GoogleCloudBaseOperator):
|
|
|
1190
1180
|
metadata=self.metadata,
|
|
1191
1181
|
)
|
|
1192
1182
|
CloudTasksQueueLink.persist(
|
|
1193
|
-
operator_instance=self,
|
|
1194
1183
|
context=context,
|
|
1195
1184
|
queue_name=task.name,
|
|
1196
1185
|
)
|
|
@@ -36,7 +36,7 @@ if TYPE_CHECKING:
|
|
|
36
36
|
from google.api_core.retry import Retry
|
|
37
37
|
from google.cloud.texttospeech_v1.types import AudioConfig, SynthesisInput, VoiceSelectionParams
|
|
38
38
|
|
|
39
|
-
from airflow.
|
|
39
|
+
from airflow.providers.common.compat.sdk import Context
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
class CloudTextToSpeechSynthesizeOperator(GoogleCloudBaseOperator):
|
|
@@ -150,7 +150,6 @@ class CloudTextToSpeechSynthesizeOperator(GoogleCloudBaseOperator):
|
|
|
150
150
|
)
|
|
151
151
|
FileDetailsLink.persist(
|
|
152
152
|
context=context,
|
|
153
|
-
task_instance=self,
|
|
154
153
|
uri=f"{self.target_bucket_name}/{self.target_filename}",
|
|
155
154
|
project_id=cloud_storage_hook.project_id,
|
|
156
155
|
)
|
|
@@ -37,6 +37,7 @@ from airflow.providers.google.cloud.links.translate import (
|
|
|
37
37
|
TranslationNativeDatasetLink,
|
|
38
38
|
)
|
|
39
39
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
|
40
|
+
from airflow.providers.google.cloud.operators.vertex_ai.dataset import DatasetImportDataResultsCheckHelper
|
|
40
41
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
41
42
|
|
|
42
43
|
if TYPE_CHECKING:
|
|
@@ -55,7 +56,7 @@ if TYPE_CHECKING:
|
|
|
55
56
|
)
|
|
56
57
|
from google.cloud.translate_v3.types.translation_service import Glossary, GlossaryInputConfig
|
|
57
58
|
|
|
58
|
-
from airflow.
|
|
59
|
+
from airflow.providers.common.compat.sdk import Context
|
|
59
60
|
|
|
60
61
|
|
|
61
62
|
class CloudTranslateTextOperator(GoogleCloudBaseOperator):
|
|
@@ -394,7 +395,6 @@ class TranslateTextBatchOperator(GoogleCloudBaseOperator):
|
|
|
394
395
|
self.log.info("Translate text batch job started.")
|
|
395
396
|
TranslateTextBatchLink.persist(
|
|
396
397
|
context=context,
|
|
397
|
-
task_instance=self,
|
|
398
398
|
project_id=self.project_id or hook.project_id,
|
|
399
399
|
output_config=self.output_config,
|
|
400
400
|
)
|
|
@@ -480,15 +480,15 @@ class TranslateCreateDatasetOperator(GoogleCloudBaseOperator):
|
|
|
480
480
|
result = hook.wait_for_operation_result(result_operation)
|
|
481
481
|
result = type(result).to_dict(result)
|
|
482
482
|
dataset_id = hook.extract_object_id(result)
|
|
483
|
-
|
|
483
|
+
context["ti"].xcom_push(key="dataset_id", value=dataset_id)
|
|
484
484
|
self.log.info("Dataset creation complete. The dataset_id: %s.", dataset_id)
|
|
485
485
|
|
|
486
486
|
project_id = self.project_id or hook.project_id
|
|
487
487
|
TranslationNativeDatasetLink.persist(
|
|
488
488
|
context=context,
|
|
489
|
-
task_instance=self,
|
|
490
489
|
dataset_id=dataset_id,
|
|
491
490
|
project_id=project_id,
|
|
491
|
+
location=self.location,
|
|
492
492
|
)
|
|
493
493
|
return result
|
|
494
494
|
|
|
@@ -556,7 +556,6 @@ class TranslateDatasetsListOperator(GoogleCloudBaseOperator):
|
|
|
556
556
|
project_id = self.project_id or hook.project_id
|
|
557
557
|
TranslationDatasetsListLink.persist(
|
|
558
558
|
context=context,
|
|
559
|
-
task_instance=self,
|
|
560
559
|
project_id=project_id,
|
|
561
560
|
)
|
|
562
561
|
self.log.info("Requesting datasets list")
|
|
@@ -577,7 +576,7 @@ class TranslateDatasetsListOperator(GoogleCloudBaseOperator):
|
|
|
577
576
|
return result_ids
|
|
578
577
|
|
|
579
578
|
|
|
580
|
-
class TranslateImportDataOperator(GoogleCloudBaseOperator):
|
|
579
|
+
class TranslateImportDataOperator(GoogleCloudBaseOperator, DatasetImportDataResultsCheckHelper):
|
|
581
580
|
"""
|
|
582
581
|
Import data to the translation dataset.
|
|
583
582
|
|
|
@@ -604,6 +603,7 @@ class TranslateImportDataOperator(GoogleCloudBaseOperator):
|
|
|
604
603
|
If set as a sequence, the identities from the list must grant
|
|
605
604
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
|
606
605
|
account from the list granting this role to the originating account (templated).
|
|
606
|
+
:param raise_for_empty_result: Raise an error if no additional data has been populated after the import.
|
|
607
607
|
"""
|
|
608
608
|
|
|
609
609
|
template_fields: Sequence[str] = (
|
|
@@ -629,6 +629,7 @@ class TranslateImportDataOperator(GoogleCloudBaseOperator):
|
|
|
629
629
|
retry: Retry | _MethodDefault = DEFAULT,
|
|
630
630
|
gcp_conn_id: str = "google_cloud_default",
|
|
631
631
|
impersonation_chain: str | Sequence[str] | None = None,
|
|
632
|
+
raise_for_empty_result: bool = False,
|
|
632
633
|
**kwargs,
|
|
633
634
|
) -> None:
|
|
634
635
|
super().__init__(**kwargs)
|
|
@@ -641,9 +642,21 @@ class TranslateImportDataOperator(GoogleCloudBaseOperator):
|
|
|
641
642
|
self.retry = retry
|
|
642
643
|
self.gcp_conn_id = gcp_conn_id
|
|
643
644
|
self.impersonation_chain = impersonation_chain
|
|
645
|
+
self.raise_for_empty_result = raise_for_empty_result
|
|
644
646
|
|
|
645
647
|
def execute(self, context: Context):
|
|
646
648
|
hook = TranslateHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
|
649
|
+
initial_dataset_size = self._get_number_of_ds_items(
|
|
650
|
+
dataset=hook.get_dataset(
|
|
651
|
+
dataset_id=self.dataset_id,
|
|
652
|
+
project_id=self.project_id,
|
|
653
|
+
location=self.location,
|
|
654
|
+
retry=self.retry,
|
|
655
|
+
timeout=self.timeout,
|
|
656
|
+
metadata=self.metadata,
|
|
657
|
+
),
|
|
658
|
+
total_key_name="example_count",
|
|
659
|
+
)
|
|
647
660
|
self.log.info("Importing data to dataset...")
|
|
648
661
|
operation = hook.import_dataset_data(
|
|
649
662
|
dataset_id=self.dataset_id,
|
|
@@ -657,12 +670,27 @@ class TranslateImportDataOperator(GoogleCloudBaseOperator):
|
|
|
657
670
|
project_id = self.project_id or hook.project_id
|
|
658
671
|
TranslationNativeDatasetLink.persist(
|
|
659
672
|
context=context,
|
|
660
|
-
task_instance=self,
|
|
661
673
|
dataset_id=self.dataset_id,
|
|
662
674
|
project_id=project_id,
|
|
675
|
+
location=self.location,
|
|
663
676
|
)
|
|
664
677
|
hook.wait_for_operation_done(operation=operation, timeout=self.timeout)
|
|
678
|
+
|
|
679
|
+
result_dataset_size = self._get_number_of_ds_items(
|
|
680
|
+
dataset=hook.get_dataset(
|
|
681
|
+
dataset_id=self.dataset_id,
|
|
682
|
+
project_id=self.project_id,
|
|
683
|
+
location=self.location,
|
|
684
|
+
retry=self.retry,
|
|
685
|
+
timeout=self.timeout,
|
|
686
|
+
metadata=self.metadata,
|
|
687
|
+
),
|
|
688
|
+
total_key_name="example_count",
|
|
689
|
+
)
|
|
690
|
+
if self.raise_for_empty_result:
|
|
691
|
+
self._raise_for_empty_import_result(self.dataset_id, initial_dataset_size, result_dataset_size)
|
|
665
692
|
self.log.info("Importing data finished!")
|
|
693
|
+
return {"total_imported": int(result_dataset_size) - int(initial_dataset_size)}
|
|
666
694
|
|
|
667
695
|
|
|
668
696
|
class TranslateDeleteDatasetOperator(GoogleCloudBaseOperator):
|
|
@@ -821,16 +849,16 @@ class TranslateCreateModelOperator(GoogleCloudBaseOperator):
|
|
|
821
849
|
result = hook.wait_for_operation_result(operation=result_operation)
|
|
822
850
|
result = type(result).to_dict(result)
|
|
823
851
|
model_id = hook.extract_object_id(result)
|
|
824
|
-
|
|
852
|
+
context["ti"].xcom_push(key="model_id", value=model_id)
|
|
825
853
|
self.log.info("Model creation complete. The model_id: %s.", model_id)
|
|
826
854
|
|
|
827
855
|
project_id = self.project_id or hook.project_id
|
|
828
856
|
TranslationModelLink.persist(
|
|
829
857
|
context=context,
|
|
830
|
-
task_instance=self,
|
|
831
858
|
dataset_id=self.dataset_id,
|
|
832
859
|
model_id=model_id,
|
|
833
860
|
project_id=project_id,
|
|
861
|
+
location=self.location,
|
|
834
862
|
)
|
|
835
863
|
return result
|
|
836
864
|
|
|
@@ -898,7 +926,6 @@ class TranslateModelsListOperator(GoogleCloudBaseOperator):
|
|
|
898
926
|
project_id = self.project_id or hook.project_id
|
|
899
927
|
TranslationModelsListLink.persist(
|
|
900
928
|
context=context,
|
|
901
|
-
task_instance=self,
|
|
902
929
|
project_id=project_id,
|
|
903
930
|
)
|
|
904
931
|
self.log.info("Requesting models list")
|
|
@@ -1141,7 +1168,6 @@ class TranslateDocumentOperator(GoogleCloudBaseOperator):
|
|
|
1141
1168
|
if self.document_output_config:
|
|
1142
1169
|
TranslateResultByOutputConfigLink.persist(
|
|
1143
1170
|
context=context,
|
|
1144
|
-
task_instance=self,
|
|
1145
1171
|
project_id=self.project_id or hook.project_id,
|
|
1146
1172
|
output_config=self.document_output_config,
|
|
1147
1173
|
)
|
|
@@ -1304,7 +1330,6 @@ class TranslateDocumentBatchOperator(GoogleCloudBaseOperator):
|
|
|
1304
1330
|
self.log.info("Batch document translation job started.")
|
|
1305
1331
|
TranslateResultByOutputConfigLink.persist(
|
|
1306
1332
|
context=context,
|
|
1307
|
-
task_instance=self,
|
|
1308
1333
|
project_id=self.project_id or hook.project_id,
|
|
1309
1334
|
output_config=self.output_config,
|
|
1310
1335
|
)
|
|
@@ -1411,7 +1436,7 @@ class TranslateCreateGlossaryOperator(GoogleCloudBaseOperator):
|
|
|
1411
1436
|
result = type(result).to_dict(result)
|
|
1412
1437
|
|
|
1413
1438
|
glossary_id = hook.extract_object_id(result)
|
|
1414
|
-
|
|
1439
|
+
context["ti"].xcom_push(key="glossary_id", value=glossary_id)
|
|
1415
1440
|
self.log.info("Glossary creation complete. The glossary_id: %s.", glossary_id)
|
|
1416
1441
|
return result
|
|
1417
1442
|
|
|
@@ -1610,7 +1635,6 @@ class TranslateListGlossariesOperator(GoogleCloudBaseOperator):
|
|
|
1610
1635
|
project_id = self.project_id or hook.project_id
|
|
1611
1636
|
TranslationGlossariesListLink.persist(
|
|
1612
1637
|
context=context,
|
|
1613
|
-
task_instance=self,
|
|
1614
1638
|
project_id=project_id,
|
|
1615
1639
|
)
|
|
1616
1640
|
self.log.info("Requesting glossaries list")
|
|
@@ -1630,8 +1654,8 @@ class TranslateListGlossariesOperator(GoogleCloudBaseOperator):
|
|
|
1630
1654
|
raise AirflowException(e)
|
|
1631
1655
|
|
|
1632
1656
|
result_ids = []
|
|
1633
|
-
for
|
|
1634
|
-
glossary_item = type(
|
|
1657
|
+
for glossary_item_raw in results_pager:
|
|
1658
|
+
glossary_item = type(glossary_item_raw).to_dict(glossary_item_raw)
|
|
1635
1659
|
glossary_id = hook.extract_object_id(glossary_item)
|
|
1636
1660
|
result_ids.append(glossary_id)
|
|
1637
1661
|
self.log.info("Fetching the glossaries list complete. Glossary id-s: %s", result_ids)
|
|
@@ -34,7 +34,7 @@ from airflow.providers.google.common.links.storage import FileDetailsLink
|
|
|
34
34
|
if TYPE_CHECKING:
|
|
35
35
|
from google.cloud.speech_v1.types import RecognitionAudio, RecognitionConfig
|
|
36
36
|
|
|
37
|
-
from airflow.
|
|
37
|
+
from airflow.providers.common.compat.sdk import Context
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
class CloudTranslateSpeechOperator(GoogleCloudBaseOperator):
|
|
@@ -173,7 +173,6 @@ class CloudTranslateSpeechOperator(GoogleCloudBaseOperator):
|
|
|
173
173
|
if self.audio.uri:
|
|
174
174
|
FileDetailsLink.persist(
|
|
175
175
|
context=context,
|
|
176
|
-
task_instance=self,
|
|
177
176
|
# Slice from: "gs://{BUCKET_NAME}/{FILE_NAME}" to: "{BUCKET_NAME}/{FILE_NAME}"
|
|
178
177
|
uri=self.audio.uri[5:],
|
|
179
178
|
project_id=self.project_id or translate_hook.project_id,
|