apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/ads/hooks/ads.py +39 -6
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/alloy_db.py +1 -1
- airflow/providers/google/cloud/hooks/bigquery.py +176 -293
- airflow/providers/google/cloud/hooks/cloud_batch.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_build.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_composer.py +288 -15
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_run.py +18 -10
- airflow/providers/google/cloud/hooks/cloud_sql.py +102 -23
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +29 -7
- airflow/providers/google/cloud/hooks/compute.py +1 -1
- airflow/providers/google/cloud/hooks/compute_ssh.py +6 -2
- airflow/providers/google/cloud/hooks/datacatalog.py +10 -1
- airflow/providers/google/cloud/hooks/dataflow.py +72 -95
- airflow/providers/google/cloud/hooks/dataform.py +1 -1
- airflow/providers/google/cloud/hooks/datafusion.py +21 -19
- airflow/providers/google/cloud/hooks/dataplex.py +2 -2
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +73 -72
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +1 -1
- airflow/providers/google/cloud/hooks/dlp.py +1 -1
- airflow/providers/google/cloud/hooks/functions.py +1 -1
- airflow/providers/google/cloud/hooks/gcs.py +112 -15
- airflow/providers/google/cloud/hooks/gdm.py +1 -1
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +3 -3
- airflow/providers/google/cloud/hooks/looker.py +6 -2
- airflow/providers/google/cloud/hooks/managed_kafka.py +1 -1
- airflow/providers/google/cloud/hooks/mlengine.py +4 -3
- airflow/providers/google/cloud/hooks/pubsub.py +3 -0
- airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
- airflow/providers/google/cloud/hooks/spanner.py +74 -9
- airflow/providers/google/cloud/hooks/stackdriver.py +11 -9
- airflow/providers/google/cloud/hooks/tasks.py +1 -1
- airflow/providers/google/cloud/hooks/translate.py +2 -2
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +2 -210
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -3
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +28 -2
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +308 -8
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/vision.py +3 -3
- airflow/providers/google/cloud/hooks/workflows.py +1 -1
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -13
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -96
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -95
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
- airflow/providers/google/cloud/links/managed_kafka.py +0 -70
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +58 -22
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +10 -8
- airflow/providers/google/cloud/openlineage/utils.py +15 -1
- airflow/providers/google/cloud/operators/alloy_db.py +71 -56
- airflow/providers/google/cloud/operators/bigquery.py +73 -636
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -6
- airflow/providers/google/cloud/operators/bigtable.py +37 -8
- airflow/providers/google/cloud/operators/cloud_base.py +21 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +3 -3
- airflow/providers/google/cloud/operators/cloud_build.py +76 -33
- airflow/providers/google/cloud/operators/cloud_composer.py +129 -41
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
- airflow/providers/google/cloud/operators/cloud_run.py +24 -6
- airflow/providers/google/cloud/operators/cloud_sql.py +8 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +93 -12
- airflow/providers/google/cloud/operators/compute.py +9 -41
- airflow/providers/google/cloud/operators/datacatalog.py +157 -21
- airflow/providers/google/cloud/operators/dataflow.py +40 -16
- airflow/providers/google/cloud/operators/dataform.py +15 -5
- airflow/providers/google/cloud/operators/datafusion.py +42 -21
- airflow/providers/google/cloud/operators/dataplex.py +194 -110
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +80 -36
- airflow/providers/google/cloud/operators/dataproc_metastore.py +97 -89
- airflow/providers/google/cloud/operators/datastore.py +23 -7
- airflow/providers/google/cloud/operators/dlp.py +6 -29
- airflow/providers/google/cloud/operators/functions.py +17 -8
- airflow/providers/google/cloud/operators/gcs.py +12 -9
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +62 -100
- airflow/providers/google/cloud/operators/looker.py +2 -2
- airflow/providers/google/cloud/operators/managed_kafka.py +108 -53
- airflow/providers/google/cloud/operators/natural_language.py +1 -1
- airflow/providers/google/cloud/operators/pubsub.py +68 -15
- airflow/providers/google/cloud/operators/spanner.py +26 -13
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -3
- airflow/providers/google/cloud/operators/stackdriver.py +1 -9
- airflow/providers/google/cloud/operators/tasks.py +1 -12
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -3
- airflow/providers/google/cloud/operators/translate.py +41 -17
- airflow/providers/google/cloud/operators/translate_speech.py +2 -3
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +30 -10
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +55 -27
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -115
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +12 -10
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +31 -8
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
- airflow/providers/google/cloud/operators/vision.py +2 -2
- airflow/providers/google/cloud/operators/workflows.py +18 -15
- airflow/providers/google/cloud/secrets/secret_manager.py +3 -2
- airflow/providers/google/cloud/sensors/bigquery.py +3 -3
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -3
- airflow/providers/google/cloud/sensors/bigtable.py +11 -4
- airflow/providers/google/cloud/sensors/cloud_composer.py +533 -30
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -3
- airflow/providers/google/cloud/sensors/dataflow.py +26 -10
- airflow/providers/google/cloud/sensors/dataform.py +2 -3
- airflow/providers/google/cloud/sensors/datafusion.py +4 -5
- airflow/providers/google/cloud/sensors/dataplex.py +2 -3
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +2 -3
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -3
- airflow/providers/google/cloud/sensors/gcs.py +4 -5
- airflow/providers/google/cloud/sensors/looker.py +2 -3
- airflow/providers/google/cloud/sensors/pubsub.py +4 -5
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -3
- airflow/providers/google/cloud/sensors/workflows.py +2 -3
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +4 -3
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +10 -5
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +21 -13
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +4 -3
- airflow/providers/google/cloud/transfers/gcs_to_local.py +6 -4
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +11 -5
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +13 -7
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +14 -5
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +76 -35
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
- airflow/providers/google/cloud/triggers/cloud_run.py +3 -3
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +92 -2
- airflow/providers/google/cloud/triggers/dataflow.py +122 -0
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +14 -2
- airflow/providers/google/cloud/triggers/dataproc.py +123 -53
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +47 -28
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +15 -19
- airflow/providers/google/cloud/triggers/vertex_ai.py +1 -1
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +2 -2
- airflow/providers/google/cloud/utils/field_sanitizer.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +2 -3
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +27 -9
- airflow/providers/google/common/hooks/operation_helpers.py +1 -1
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +3 -4
- airflow/providers/google/firebase/hooks/firestore.py +1 -1
- airflow/providers/google/firebase/operators/firestore.py +3 -3
- airflow/providers/google/get_provider_info.py +56 -52
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +27 -2
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/campaign_manager.py +1 -1
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -3
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +6 -6
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +3 -64
- airflow/providers/google/suite/hooks/calendar.py +2 -2
- airflow/providers/google/suite/hooks/sheets.py +16 -2
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +3 -3
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/METADATA +90 -46
- apache_airflow_providers_google-19.3.0.dist-info/RECORD +331 -0
- apache_airflow_providers_google-19.3.0.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/hooks/automl.py +0 -673
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1362
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -112
- apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.3.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -28,7 +28,7 @@ from typing import TYPE_CHECKING, Any
|
|
|
28
28
|
from googleapiclient.errors import HttpError
|
|
29
29
|
|
|
30
30
|
from airflow.configuration import conf
|
|
31
|
-
from airflow.
|
|
31
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
32
32
|
from airflow.providers.google.cloud.hooks.dataflow import (
|
|
33
33
|
DEFAULT_DATAFLOW_LOCATION,
|
|
34
34
|
DataflowHook,
|
|
@@ -43,7 +43,7 @@ from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_MET
|
|
|
43
43
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
44
44
|
|
|
45
45
|
if TYPE_CHECKING:
|
|
46
|
-
from airflow.
|
|
46
|
+
from airflow.providers.common.compat.sdk import Context
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
class CheckJobRunning(Enum):
|
|
@@ -383,7 +383,12 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
|
383
383
|
def execute(self, context: Context):
|
|
384
384
|
def set_current_job(current_job):
|
|
385
385
|
self.job = current_job
|
|
386
|
-
DataflowJobLink.persist(
|
|
386
|
+
DataflowJobLink.persist(
|
|
387
|
+
context=context,
|
|
388
|
+
project_id=self.project_id,
|
|
389
|
+
region=self.location,
|
|
390
|
+
job_id=self.job.get("id"),
|
|
391
|
+
)
|
|
387
392
|
|
|
388
393
|
options = self.dataflow_default_options
|
|
389
394
|
options.update(self.options)
|
|
@@ -404,7 +409,7 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
|
404
409
|
append_job_name=self.append_job_name,
|
|
405
410
|
)
|
|
406
411
|
job_id = self.hook.extract_job_id(self.job)
|
|
407
|
-
|
|
412
|
+
context["task_instance"].xcom_push(key="job_id", value=job_id)
|
|
408
413
|
return job_id
|
|
409
414
|
|
|
410
415
|
self.job = self.hook.launch_job_with_template(
|
|
@@ -418,7 +423,9 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
|
418
423
|
environment=self.environment,
|
|
419
424
|
)
|
|
420
425
|
job_id = self.hook.extract_job_id(self.job)
|
|
421
|
-
DataflowJobLink.persist(
|
|
426
|
+
DataflowJobLink.persist(
|
|
427
|
+
context=context, project_id=self.project_id, region=self.location, job_id=job_id
|
|
428
|
+
)
|
|
422
429
|
self.defer(
|
|
423
430
|
trigger=TemplateJobStartTrigger(
|
|
424
431
|
project_id=self.project_id,
|
|
@@ -439,7 +446,7 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
|
439
446
|
raise AirflowException(event["message"])
|
|
440
447
|
|
|
441
448
|
job_id = event["job_id"]
|
|
442
|
-
|
|
449
|
+
context["task_instance"].xcom_push(key="job_id", value=job_id)
|
|
443
450
|
self.log.info("Task %s completed with response %s", self.task_id, event["message"])
|
|
444
451
|
return job_id
|
|
445
452
|
|
|
@@ -576,6 +583,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
|
576
583
|
def hook(self) -> DataflowHook:
|
|
577
584
|
hook = DataflowHook(
|
|
578
585
|
gcp_conn_id=self.gcp_conn_id,
|
|
586
|
+
poll_sleep=self.poll_sleep,
|
|
579
587
|
drain_pipeline=self.drain_pipeline,
|
|
580
588
|
cancel_timeout=self.cancel_timeout,
|
|
581
589
|
wait_until_finished=self.wait_until_finished,
|
|
@@ -590,7 +598,9 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
|
590
598
|
|
|
591
599
|
def set_current_job(current_job):
|
|
592
600
|
self.job = current_job
|
|
593
|
-
DataflowJobLink.persist(
|
|
601
|
+
DataflowJobLink.persist(
|
|
602
|
+
context=context, project_id=self.project_id, region=self.location, job_id=self.job.get("id")
|
|
603
|
+
)
|
|
594
604
|
|
|
595
605
|
if not self.deferrable:
|
|
596
606
|
self.job = self.hook.start_flex_template(
|
|
@@ -600,7 +610,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
|
600
610
|
on_new_job_callback=set_current_job,
|
|
601
611
|
)
|
|
602
612
|
job_id = self.hook.extract_job_id(self.job)
|
|
603
|
-
|
|
613
|
+
context["task_instance"].xcom_push(key="job_id", value=job_id)
|
|
604
614
|
return self.job
|
|
605
615
|
|
|
606
616
|
self.job = self.hook.launch_job_with_flex_template(
|
|
@@ -609,7 +619,9 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
|
609
619
|
project_id=self.project_id,
|
|
610
620
|
)
|
|
611
621
|
job_id = self.hook.extract_job_id(self.job)
|
|
612
|
-
DataflowJobLink.persist(
|
|
622
|
+
DataflowJobLink.persist(
|
|
623
|
+
context=context, project_id=self.project_id, region=self.location, job_id=job_id
|
|
624
|
+
)
|
|
613
625
|
self.defer(
|
|
614
626
|
trigger=TemplateJobStartTrigger(
|
|
615
627
|
project_id=self.project_id,
|
|
@@ -639,7 +651,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
|
639
651
|
|
|
640
652
|
job_id = event["job_id"]
|
|
641
653
|
self.log.info("Task %s completed with response %s", job_id, event["message"])
|
|
642
|
-
|
|
654
|
+
context["task_instance"].xcom_push(key="job_id", value=job_id)
|
|
643
655
|
job = self.hook.get_job(job_id=job_id, project_id=self.project_id, location=self.location)
|
|
644
656
|
return job
|
|
645
657
|
|
|
@@ -764,7 +776,9 @@ class DataflowStartYamlJobOperator(GoogleCloudBaseOperator):
|
|
|
764
776
|
location=self.region,
|
|
765
777
|
)
|
|
766
778
|
|
|
767
|
-
DataflowJobLink.persist(
|
|
779
|
+
DataflowJobLink.persist(
|
|
780
|
+
context=context, project_id=self.project_id, region=self.region, job_id=self.job_id
|
|
781
|
+
)
|
|
768
782
|
|
|
769
783
|
if self.deferrable:
|
|
770
784
|
self.defer(
|
|
@@ -794,7 +808,7 @@ class DataflowStartYamlJobOperator(GoogleCloudBaseOperator):
|
|
|
794
808
|
raise AirflowException(event["message"])
|
|
795
809
|
job = event["job"]
|
|
796
810
|
self.log.info("Job %s completed with response %s", job["id"], event["message"])
|
|
797
|
-
|
|
811
|
+
context["task_instance"].xcom_push(key="job_id", value=job["id"])
|
|
798
812
|
|
|
799
813
|
return job
|
|
800
814
|
|
|
@@ -971,6 +985,14 @@ class DataflowCreatePipelineOperator(GoogleCloudBaseOperator):
|
|
|
971
985
|
|
|
972
986
|
self.pipeline_name = self.body["name"].split("/")[-1] if self.body else None
|
|
973
987
|
|
|
988
|
+
@property
|
|
989
|
+
def extra_links_params(self) -> dict[str, Any]:
|
|
990
|
+
return {
|
|
991
|
+
"project_id": self.project_id,
|
|
992
|
+
"location": self.location,
|
|
993
|
+
"pipeline_name": self.pipeline_name,
|
|
994
|
+
}
|
|
995
|
+
|
|
974
996
|
def execute(self, context: Context):
|
|
975
997
|
if self.body is None:
|
|
976
998
|
raise AirflowException(
|
|
@@ -1003,8 +1025,8 @@ class DataflowCreatePipelineOperator(GoogleCloudBaseOperator):
|
|
|
1003
1025
|
pipeline_name=self.pipeline_name,
|
|
1004
1026
|
location=self.location,
|
|
1005
1027
|
)
|
|
1006
|
-
DataflowPipelineLink.persist(
|
|
1007
|
-
|
|
1028
|
+
DataflowPipelineLink.persist(context=context)
|
|
1029
|
+
context["task_instance"].xcom_push(key="pipeline_name", value=self.pipeline_name)
|
|
1008
1030
|
if self.pipeline:
|
|
1009
1031
|
if "error" in self.pipeline:
|
|
1010
1032
|
raise AirflowException(self.pipeline.get("error").get("message"))
|
|
@@ -1075,8 +1097,10 @@ class DataflowRunPipelineOperator(GoogleCloudBaseOperator):
|
|
|
1075
1097
|
location=self.location,
|
|
1076
1098
|
)["job"]
|
|
1077
1099
|
job_id = self.dataflow_hook.extract_job_id(self.job)
|
|
1078
|
-
|
|
1079
|
-
DataflowJobLink.persist(
|
|
1100
|
+
context["task_instance"].xcom_push(key="job_id", value=job_id)
|
|
1101
|
+
DataflowJobLink.persist(
|
|
1102
|
+
context=context, project_id=self.project_id, region=self.location, job_id=job_id
|
|
1103
|
+
)
|
|
1080
1104
|
except HttpError as e:
|
|
1081
1105
|
if e.resp.status == 404:
|
|
1082
1106
|
raise AirflowException("Pipeline with given name was not found.")
|
|
@@ -28,7 +28,7 @@ from airflow.providers.google.cloud.links.dataform import (
|
|
|
28
28
|
if TYPE_CHECKING:
|
|
29
29
|
from google.api_core.retry import Retry
|
|
30
30
|
|
|
31
|
-
from airflow.
|
|
31
|
+
from airflow.providers.common.compat.sdk import Context
|
|
32
32
|
|
|
33
33
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
34
34
|
from google.cloud.dataform_v1beta1.types import (
|
|
@@ -258,7 +258,6 @@ class DataformCreateWorkflowInvocationOperator(GoogleCloudBaseOperator):
|
|
|
258
258
|
)
|
|
259
259
|
workflow_invocation_id = result.name.split("/")[-1]
|
|
260
260
|
DataformWorkflowInvocationLink.persist(
|
|
261
|
-
operator_instance=self,
|
|
262
261
|
context=context,
|
|
263
262
|
project_id=self.project_id,
|
|
264
263
|
region=self.region,
|
|
@@ -347,6 +346,13 @@ class DataformGetWorkflowInvocationOperator(GoogleCloudBaseOperator):
|
|
|
347
346
|
timeout=self.timeout,
|
|
348
347
|
metadata=self.metadata,
|
|
349
348
|
)
|
|
349
|
+
DataformWorkflowInvocationLink.persist(
|
|
350
|
+
context=context,
|
|
351
|
+
project_id=self.project_id,
|
|
352
|
+
region=self.region,
|
|
353
|
+
repository_id=self.repository_id,
|
|
354
|
+
workflow_invocation_id=self.workflow_invocation_id,
|
|
355
|
+
)
|
|
350
356
|
return WorkflowInvocation.to_dict(result)
|
|
351
357
|
|
|
352
358
|
|
|
@@ -412,7 +418,6 @@ class DataformQueryWorkflowInvocationActionsOperator(GoogleCloudBaseOperator):
|
|
|
412
418
|
impersonation_chain=self.impersonation_chain,
|
|
413
419
|
)
|
|
414
420
|
DataformWorkflowInvocationLink.persist(
|
|
415
|
-
operator_instance=self,
|
|
416
421
|
context=context,
|
|
417
422
|
project_id=self.project_id,
|
|
418
423
|
region=self.region,
|
|
@@ -494,6 +499,13 @@ class DataformCancelWorkflowInvocationOperator(GoogleCloudBaseOperator):
|
|
|
494
499
|
gcp_conn_id=self.gcp_conn_id,
|
|
495
500
|
impersonation_chain=self.impersonation_chain,
|
|
496
501
|
)
|
|
502
|
+
DataformWorkflowInvocationLink.persist(
|
|
503
|
+
context=context,
|
|
504
|
+
project_id=self.project_id,
|
|
505
|
+
region=self.region,
|
|
506
|
+
repository_id=self.repository_id,
|
|
507
|
+
workflow_invocation_id=self.workflow_invocation_id,
|
|
508
|
+
)
|
|
497
509
|
hook.cancel_workflow_invocation(
|
|
498
510
|
project_id=self.project_id,
|
|
499
511
|
region=self.region,
|
|
@@ -576,7 +588,6 @@ class DataformCreateRepositoryOperator(GoogleCloudBaseOperator):
|
|
|
576
588
|
)
|
|
577
589
|
|
|
578
590
|
DataformRepositoryLink.persist(
|
|
579
|
-
operator_instance=self,
|
|
580
591
|
context=context,
|
|
581
592
|
project_id=self.project_id,
|
|
582
593
|
region=self.region,
|
|
@@ -735,7 +746,6 @@ class DataformCreateWorkspaceOperator(GoogleCloudBaseOperator):
|
|
|
735
746
|
)
|
|
736
747
|
|
|
737
748
|
DataformWorkspaceLink.persist(
|
|
738
|
-
operator_instance=self,
|
|
739
749
|
context=context,
|
|
740
750
|
project_id=self.project_id,
|
|
741
751
|
region=self.region,
|
|
@@ -26,7 +26,7 @@ from google.api_core.retry import exponential_sleep_generator
|
|
|
26
26
|
from googleapiclient.errors import HttpError
|
|
27
27
|
|
|
28
28
|
from airflow.configuration import conf
|
|
29
|
-
from airflow.
|
|
29
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
30
30
|
from airflow.providers.google.cloud.hooks.datafusion import SUCCESS_STATES, DataFusionHook, PipelineStates
|
|
31
31
|
from airflow.providers.google.cloud.links.datafusion import (
|
|
32
32
|
DataFusionInstanceLink,
|
|
@@ -40,7 +40,8 @@ from airflow.providers.google.cloud.utils.helpers import resource_path_to_dict
|
|
|
40
40
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
41
41
|
|
|
42
42
|
if TYPE_CHECKING:
|
|
43
|
-
from airflow.
|
|
43
|
+
from airflow.providers.common.compat.sdk import Context
|
|
44
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
44
45
|
|
|
45
46
|
|
|
46
47
|
class CloudDataFusionRestartInstanceOperator(GoogleCloudBaseOperator):
|
|
@@ -111,10 +112,9 @@ class CloudDataFusionRestartInstanceOperator(GoogleCloudBaseOperator):
|
|
|
111
112
|
project_id = resource_path_to_dict(resource_name=instance["name"])["projects"]
|
|
112
113
|
DataFusionInstanceLink.persist(
|
|
113
114
|
context=context,
|
|
114
|
-
task_instance=self,
|
|
115
115
|
project_id=project_id,
|
|
116
116
|
instance_name=self.instance_name,
|
|
117
|
-
|
|
117
|
+
region=self.location,
|
|
118
118
|
)
|
|
119
119
|
|
|
120
120
|
|
|
@@ -269,10 +269,9 @@ class CloudDataFusionCreateInstanceOperator(GoogleCloudBaseOperator):
|
|
|
269
269
|
project_id = resource_path_to_dict(resource_name=instance["name"])["projects"]
|
|
270
270
|
DataFusionInstanceLink.persist(
|
|
271
271
|
context=context,
|
|
272
|
-
task_instance=self,
|
|
273
272
|
project_id=project_id,
|
|
274
273
|
instance_name=self.instance_name,
|
|
275
|
-
|
|
274
|
+
region=self.location,
|
|
276
275
|
)
|
|
277
276
|
return instance
|
|
278
277
|
|
|
@@ -358,10 +357,9 @@ class CloudDataFusionUpdateInstanceOperator(GoogleCloudBaseOperator):
|
|
|
358
357
|
project_id = resource_path_to_dict(resource_name=instance["name"])["projects"]
|
|
359
358
|
DataFusionInstanceLink.persist(
|
|
360
359
|
context=context,
|
|
361
|
-
task_instance=self,
|
|
362
360
|
project_id=project_id,
|
|
363
361
|
instance_name=self.instance_name,
|
|
364
|
-
|
|
362
|
+
region=self.location,
|
|
365
363
|
)
|
|
366
364
|
|
|
367
365
|
|
|
@@ -429,10 +427,9 @@ class CloudDataFusionGetInstanceOperator(GoogleCloudBaseOperator):
|
|
|
429
427
|
project_id = resource_path_to_dict(resource_name=instance["name"])["projects"]
|
|
430
428
|
DataFusionInstanceLink.persist(
|
|
431
429
|
context=context,
|
|
432
|
-
task_instance=self,
|
|
433
430
|
project_id=project_id,
|
|
434
431
|
instance_name=self.instance_name,
|
|
435
|
-
|
|
432
|
+
region=self.location,
|
|
436
433
|
)
|
|
437
434
|
return instance
|
|
438
435
|
|
|
@@ -519,7 +516,6 @@ class CloudDataFusionCreatePipelineOperator(GoogleCloudBaseOperator):
|
|
|
519
516
|
)
|
|
520
517
|
DataFusionPipelineLink.persist(
|
|
521
518
|
context=context,
|
|
522
|
-
task_instance=self,
|
|
523
519
|
uri=instance["serviceEndpoint"],
|
|
524
520
|
pipeline_name=self.pipeline_name,
|
|
525
521
|
namespace=self.namespace,
|
|
@@ -693,7 +689,6 @@ class CloudDataFusionListPipelinesOperator(GoogleCloudBaseOperator):
|
|
|
693
689
|
|
|
694
690
|
DataFusionPipelinesLink.persist(
|
|
695
691
|
context=context,
|
|
696
|
-
task_instance=self,
|
|
697
692
|
uri=service_endpoint,
|
|
698
693
|
namespace=self.namespace,
|
|
699
694
|
)
|
|
@@ -783,6 +778,7 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
|
|
|
783
778
|
self.pipeline_timeout = pipeline_timeout
|
|
784
779
|
self.deferrable = deferrable
|
|
785
780
|
self.poll_interval = poll_interval
|
|
781
|
+
self.pipeline_id: str | None = None
|
|
786
782
|
|
|
787
783
|
if success_states:
|
|
788
784
|
self.success_states = success_states
|
|
@@ -802,18 +798,17 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
|
|
|
802
798
|
project_id=self.project_id,
|
|
803
799
|
)
|
|
804
800
|
api_url = instance["apiEndpoint"]
|
|
805
|
-
pipeline_id = hook.start_pipeline(
|
|
801
|
+
self.pipeline_id = hook.start_pipeline(
|
|
806
802
|
pipeline_name=self.pipeline_name,
|
|
807
803
|
pipeline_type=self.pipeline_type,
|
|
808
804
|
instance_url=api_url,
|
|
809
805
|
namespace=self.namespace,
|
|
810
806
|
runtime_args=self.runtime_args,
|
|
811
807
|
)
|
|
812
|
-
self.log.info("Pipeline %s submitted successfully.", pipeline_id)
|
|
808
|
+
self.log.info("Pipeline %s submitted successfully.", self.pipeline_id)
|
|
813
809
|
|
|
814
810
|
DataFusionPipelineLink.persist(
|
|
815
811
|
context=context,
|
|
816
|
-
task_instance=self,
|
|
817
812
|
uri=instance["serviceEndpoint"],
|
|
818
813
|
pipeline_name=self.pipeline_name,
|
|
819
814
|
namespace=self.namespace,
|
|
@@ -831,7 +826,7 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
|
|
|
831
826
|
namespace=self.namespace,
|
|
832
827
|
pipeline_name=self.pipeline_name,
|
|
833
828
|
pipeline_type=self.pipeline_type.value,
|
|
834
|
-
pipeline_id=pipeline_id,
|
|
829
|
+
pipeline_id=self.pipeline_id,
|
|
835
830
|
poll_interval=self.poll_interval,
|
|
836
831
|
gcp_conn_id=self.gcp_conn_id,
|
|
837
832
|
impersonation_chain=self.impersonation_chain,
|
|
@@ -841,19 +836,21 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
|
|
|
841
836
|
else:
|
|
842
837
|
if not self.asynchronous:
|
|
843
838
|
# when NOT using asynchronous mode it will just wait for pipeline to finish and print message
|
|
844
|
-
self.log.info(
|
|
839
|
+
self.log.info(
|
|
840
|
+
"Waiting when pipeline %s will be in one of the success states", self.pipeline_id
|
|
841
|
+
)
|
|
845
842
|
hook.wait_for_pipeline_state(
|
|
846
843
|
success_states=self.success_states,
|
|
847
|
-
pipeline_id=pipeline_id,
|
|
844
|
+
pipeline_id=self.pipeline_id,
|
|
848
845
|
pipeline_name=self.pipeline_name,
|
|
849
846
|
pipeline_type=self.pipeline_type,
|
|
850
847
|
namespace=self.namespace,
|
|
851
848
|
instance_url=api_url,
|
|
852
849
|
timeout=self.pipeline_timeout,
|
|
853
850
|
)
|
|
854
|
-
self.log.info("Pipeline %s discovered success state.", pipeline_id)
|
|
851
|
+
self.log.info("Pipeline %s discovered success state.", self.pipeline_id)
|
|
855
852
|
# otherwise, return pipeline_id so that sensor can use it later to check the pipeline state
|
|
856
|
-
return pipeline_id
|
|
853
|
+
return self.pipeline_id
|
|
857
854
|
|
|
858
855
|
def execute_complete(self, context: Context, event: dict[str, Any]):
|
|
859
856
|
"""
|
|
@@ -870,6 +867,31 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
|
|
|
870
867
|
)
|
|
871
868
|
return event["pipeline_id"]
|
|
872
869
|
|
|
870
|
+
def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
871
|
+
"""Build and return OpenLineage facets and datasets for the completed pipeline start."""
|
|
872
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
|
873
|
+
from airflow.providers.google.cloud.openlineage.facets import DataFusionRunFacet
|
|
874
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
875
|
+
|
|
876
|
+
pipeline_resource = f"{self.project_id}:{self.location}:{self.instance_name}:{self.pipeline_name}"
|
|
877
|
+
|
|
878
|
+
inputs = [Dataset(namespace="datafusion", name=pipeline_resource)]
|
|
879
|
+
|
|
880
|
+
if self.pipeline_id:
|
|
881
|
+
output_name = f"{pipeline_resource}:{self.pipeline_id}"
|
|
882
|
+
else:
|
|
883
|
+
output_name = f"{pipeline_resource}:unknown"
|
|
884
|
+
outputs = [Dataset(namespace="datafusion", name=output_name)]
|
|
885
|
+
|
|
886
|
+
run_facets = {
|
|
887
|
+
"dataFusionRun": DataFusionRunFacet(
|
|
888
|
+
runId=self.pipeline_id,
|
|
889
|
+
runtimeArgs=self.runtime_args,
|
|
890
|
+
)
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
return OperatorLineage(inputs=inputs, outputs=outputs, run_facets=run_facets, job_facets={})
|
|
894
|
+
|
|
873
895
|
|
|
874
896
|
class CloudDataFusionStopPipelineOperator(GoogleCloudBaseOperator):
|
|
875
897
|
"""
|
|
@@ -943,7 +965,6 @@ class CloudDataFusionStopPipelineOperator(GoogleCloudBaseOperator):
|
|
|
943
965
|
|
|
944
966
|
DataFusionPipelineLink.persist(
|
|
945
967
|
context=context,
|
|
946
|
-
task_instance=self,
|
|
947
968
|
uri=instance["serviceEndpoint"],
|
|
948
969
|
pipeline_name=self.pipeline_name,
|
|
949
970
|
namespace=self.namespace,
|