apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/ads/hooks/ads.py +39 -5
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/bigquery.py +166 -281
- airflow/providers/google/cloud/hooks/cloud_composer.py +287 -14
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_run.py +17 -9
- airflow/providers/google/cloud/hooks/cloud_sql.py +101 -22
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +27 -6
- airflow/providers/google/cloud/hooks/compute_ssh.py +5 -1
- airflow/providers/google/cloud/hooks/datacatalog.py +9 -1
- airflow/providers/google/cloud/hooks/dataflow.py +71 -94
- airflow/providers/google/cloud/hooks/datafusion.py +1 -1
- airflow/providers/google/cloud/hooks/dataplex.py +1 -1
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +72 -71
- airflow/providers/google/cloud/hooks/gcs.py +111 -14
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/looker.py +6 -1
- airflow/providers/google/cloud/hooks/mlengine.py +3 -2
- airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
- airflow/providers/google/cloud/hooks/spanner.py +73 -8
- airflow/providers/google/cloud/hooks/stackdriver.py +10 -8
- airflow/providers/google/cloud/hooks/translate.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -209
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +2 -2
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +27 -1
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +307 -7
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/vision.py +2 -2
- airflow/providers/google/cloud/hooks/workflows.py +1 -1
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -13
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -96
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -95
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
- airflow/providers/google/cloud/links/managed_kafka.py +0 -70
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +17 -9
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +10 -8
- airflow/providers/google/cloud/openlineage/utils.py +15 -1
- airflow/providers/google/cloud/operators/alloy_db.py +70 -55
- airflow/providers/google/cloud/operators/bigquery.py +73 -636
- airflow/providers/google/cloud/operators/bigquery_dts.py +3 -5
- airflow/providers/google/cloud/operators/bigtable.py +36 -7
- airflow/providers/google/cloud/operators/cloud_base.py +21 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +2 -2
- airflow/providers/google/cloud/operators/cloud_build.py +75 -32
- airflow/providers/google/cloud/operators/cloud_composer.py +128 -40
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
- airflow/providers/google/cloud/operators/cloud_run.py +23 -5
- airflow/providers/google/cloud/operators/cloud_sql.py +8 -16
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -11
- airflow/providers/google/cloud/operators/compute.py +8 -40
- airflow/providers/google/cloud/operators/datacatalog.py +157 -21
- airflow/providers/google/cloud/operators/dataflow.py +38 -15
- airflow/providers/google/cloud/operators/dataform.py +15 -5
- airflow/providers/google/cloud/operators/datafusion.py +41 -20
- airflow/providers/google/cloud/operators/dataplex.py +193 -109
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +78 -35
- airflow/providers/google/cloud/operators/dataproc_metastore.py +96 -88
- airflow/providers/google/cloud/operators/datastore.py +22 -6
- airflow/providers/google/cloud/operators/dlp.py +6 -29
- airflow/providers/google/cloud/operators/functions.py +16 -7
- airflow/providers/google/cloud/operators/gcs.py +10 -8
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +60 -99
- airflow/providers/google/cloud/operators/looker.py +1 -1
- airflow/providers/google/cloud/operators/managed_kafka.py +107 -52
- airflow/providers/google/cloud/operators/natural_language.py +1 -1
- airflow/providers/google/cloud/operators/pubsub.py +60 -14
- airflow/providers/google/cloud/operators/spanner.py +25 -12
- airflow/providers/google/cloud/operators/speech_to_text.py +1 -2
- airflow/providers/google/cloud/operators/stackdriver.py +1 -9
- airflow/providers/google/cloud/operators/tasks.py +1 -12
- airflow/providers/google/cloud/operators/text_to_speech.py +1 -2
- airflow/providers/google/cloud/operators/translate.py +40 -16
- airflow/providers/google/cloud/operators/translate_speech.py +1 -2
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +29 -9
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +54 -26
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +11 -9
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +30 -7
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
- airflow/providers/google/cloud/operators/vision.py +2 -2
- airflow/providers/google/cloud/operators/workflows.py +18 -15
- airflow/providers/google/cloud/sensors/bigquery.py +2 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -2
- airflow/providers/google/cloud/sensors/bigtable.py +11 -4
- airflow/providers/google/cloud/sensors/cloud_composer.py +533 -29
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -2
- airflow/providers/google/cloud/sensors/dataflow.py +26 -9
- airflow/providers/google/cloud/sensors/dataform.py +2 -2
- airflow/providers/google/cloud/sensors/datafusion.py +4 -4
- airflow/providers/google/cloud/sensors/dataplex.py +2 -2
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -2
- airflow/providers/google/cloud/sensors/gcs.py +4 -4
- airflow/providers/google/cloud/sensors/looker.py +2 -2
- airflow/providers/google/cloud/sensors/pubsub.py +4 -4
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
- airflow/providers/google/cloud/sensors/workflows.py +2 -2
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +20 -12
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +13 -4
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +75 -34
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +302 -46
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +91 -1
- airflow/providers/google/cloud/triggers/dataflow.py +122 -0
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +14 -2
- airflow/providers/google/cloud/triggers/dataproc.py +122 -52
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +45 -27
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +15 -19
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +1 -2
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +27 -8
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +3 -4
- airflow/providers/google/firebase/operators/firestore.py +2 -2
- airflow/providers/google/get_provider_info.py +56 -52
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +26 -1
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +1 -2
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +3 -63
- airflow/providers/google/suite/hooks/calendar.py +1 -1
- airflow/providers/google/suite/hooks/sheets.py +15 -1
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +92 -48
- apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
- apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/hooks/automl.py +0 -673
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1362
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -112
- apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
|
@@ -43,7 +43,7 @@ from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_MET
|
|
|
43
43
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
44
44
|
|
|
45
45
|
if TYPE_CHECKING:
|
|
46
|
-
from airflow.
|
|
46
|
+
from airflow.providers.common.compat.sdk import Context
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
class CheckJobRunning(Enum):
|
|
@@ -383,7 +383,12 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
|
383
383
|
def execute(self, context: Context):
|
|
384
384
|
def set_current_job(current_job):
|
|
385
385
|
self.job = current_job
|
|
386
|
-
DataflowJobLink.persist(
|
|
386
|
+
DataflowJobLink.persist(
|
|
387
|
+
context=context,
|
|
388
|
+
project_id=self.project_id,
|
|
389
|
+
region=self.location,
|
|
390
|
+
job_id=self.job.get("id"),
|
|
391
|
+
)
|
|
387
392
|
|
|
388
393
|
options = self.dataflow_default_options
|
|
389
394
|
options.update(self.options)
|
|
@@ -404,7 +409,7 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
|
404
409
|
append_job_name=self.append_job_name,
|
|
405
410
|
)
|
|
406
411
|
job_id = self.hook.extract_job_id(self.job)
|
|
407
|
-
|
|
412
|
+
context["task_instance"].xcom_push(key="job_id", value=job_id)
|
|
408
413
|
return job_id
|
|
409
414
|
|
|
410
415
|
self.job = self.hook.launch_job_with_template(
|
|
@@ -418,7 +423,9 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
|
418
423
|
environment=self.environment,
|
|
419
424
|
)
|
|
420
425
|
job_id = self.hook.extract_job_id(self.job)
|
|
421
|
-
DataflowJobLink.persist(
|
|
426
|
+
DataflowJobLink.persist(
|
|
427
|
+
context=context, project_id=self.project_id, region=self.location, job_id=job_id
|
|
428
|
+
)
|
|
422
429
|
self.defer(
|
|
423
430
|
trigger=TemplateJobStartTrigger(
|
|
424
431
|
project_id=self.project_id,
|
|
@@ -439,7 +446,7 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
|
439
446
|
raise AirflowException(event["message"])
|
|
440
447
|
|
|
441
448
|
job_id = event["job_id"]
|
|
442
|
-
|
|
449
|
+
context["task_instance"].xcom_push(key="job_id", value=job_id)
|
|
443
450
|
self.log.info("Task %s completed with response %s", self.task_id, event["message"])
|
|
444
451
|
return job_id
|
|
445
452
|
|
|
@@ -590,7 +597,9 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
|
590
597
|
|
|
591
598
|
def set_current_job(current_job):
|
|
592
599
|
self.job = current_job
|
|
593
|
-
DataflowJobLink.persist(
|
|
600
|
+
DataflowJobLink.persist(
|
|
601
|
+
context=context, project_id=self.project_id, region=self.location, job_id=self.job.get("id")
|
|
602
|
+
)
|
|
594
603
|
|
|
595
604
|
if not self.deferrable:
|
|
596
605
|
self.job = self.hook.start_flex_template(
|
|
@@ -600,7 +609,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
|
600
609
|
on_new_job_callback=set_current_job,
|
|
601
610
|
)
|
|
602
611
|
job_id = self.hook.extract_job_id(self.job)
|
|
603
|
-
|
|
612
|
+
context["task_instance"].xcom_push(key="job_id", value=job_id)
|
|
604
613
|
return self.job
|
|
605
614
|
|
|
606
615
|
self.job = self.hook.launch_job_with_flex_template(
|
|
@@ -609,7 +618,9 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
|
609
618
|
project_id=self.project_id,
|
|
610
619
|
)
|
|
611
620
|
job_id = self.hook.extract_job_id(self.job)
|
|
612
|
-
DataflowJobLink.persist(
|
|
621
|
+
DataflowJobLink.persist(
|
|
622
|
+
context=context, project_id=self.project_id, region=self.location, job_id=job_id
|
|
623
|
+
)
|
|
613
624
|
self.defer(
|
|
614
625
|
trigger=TemplateJobStartTrigger(
|
|
615
626
|
project_id=self.project_id,
|
|
@@ -639,7 +650,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
|
639
650
|
|
|
640
651
|
job_id = event["job_id"]
|
|
641
652
|
self.log.info("Task %s completed with response %s", job_id, event["message"])
|
|
642
|
-
|
|
653
|
+
context["task_instance"].xcom_push(key="job_id", value=job_id)
|
|
643
654
|
job = self.hook.get_job(job_id=job_id, project_id=self.project_id, location=self.location)
|
|
644
655
|
return job
|
|
645
656
|
|
|
@@ -764,7 +775,9 @@ class DataflowStartYamlJobOperator(GoogleCloudBaseOperator):
|
|
|
764
775
|
location=self.region,
|
|
765
776
|
)
|
|
766
777
|
|
|
767
|
-
DataflowJobLink.persist(
|
|
778
|
+
DataflowJobLink.persist(
|
|
779
|
+
context=context, project_id=self.project_id, region=self.region, job_id=self.job_id
|
|
780
|
+
)
|
|
768
781
|
|
|
769
782
|
if self.deferrable:
|
|
770
783
|
self.defer(
|
|
@@ -794,7 +807,7 @@ class DataflowStartYamlJobOperator(GoogleCloudBaseOperator):
|
|
|
794
807
|
raise AirflowException(event["message"])
|
|
795
808
|
job = event["job"]
|
|
796
809
|
self.log.info("Job %s completed with response %s", job["id"], event["message"])
|
|
797
|
-
|
|
810
|
+
context["task_instance"].xcom_push(key="job_id", value=job["id"])
|
|
798
811
|
|
|
799
812
|
return job
|
|
800
813
|
|
|
@@ -971,6 +984,14 @@ class DataflowCreatePipelineOperator(GoogleCloudBaseOperator):
|
|
|
971
984
|
|
|
972
985
|
self.pipeline_name = self.body["name"].split("/")[-1] if self.body else None
|
|
973
986
|
|
|
987
|
+
@property
|
|
988
|
+
def extra_links_params(self) -> dict[str, Any]:
|
|
989
|
+
return {
|
|
990
|
+
"project_id": self.project_id,
|
|
991
|
+
"location": self.location,
|
|
992
|
+
"pipeline_name": self.pipeline_name,
|
|
993
|
+
}
|
|
994
|
+
|
|
974
995
|
def execute(self, context: Context):
|
|
975
996
|
if self.body is None:
|
|
976
997
|
raise AirflowException(
|
|
@@ -1003,8 +1024,8 @@ class DataflowCreatePipelineOperator(GoogleCloudBaseOperator):
|
|
|
1003
1024
|
pipeline_name=self.pipeline_name,
|
|
1004
1025
|
location=self.location,
|
|
1005
1026
|
)
|
|
1006
|
-
DataflowPipelineLink.persist(
|
|
1007
|
-
|
|
1027
|
+
DataflowPipelineLink.persist(context=context)
|
|
1028
|
+
context["task_instance"].xcom_push(key="pipeline_name", value=self.pipeline_name)
|
|
1008
1029
|
if self.pipeline:
|
|
1009
1030
|
if "error" in self.pipeline:
|
|
1010
1031
|
raise AirflowException(self.pipeline.get("error").get("message"))
|
|
@@ -1075,8 +1096,10 @@ class DataflowRunPipelineOperator(GoogleCloudBaseOperator):
|
|
|
1075
1096
|
location=self.location,
|
|
1076
1097
|
)["job"]
|
|
1077
1098
|
job_id = self.dataflow_hook.extract_job_id(self.job)
|
|
1078
|
-
|
|
1079
|
-
DataflowJobLink.persist(
|
|
1099
|
+
context["task_instance"].xcom_push(key="job_id", value=job_id)
|
|
1100
|
+
DataflowJobLink.persist(
|
|
1101
|
+
context=context, project_id=self.project_id, region=self.location, job_id=job_id
|
|
1102
|
+
)
|
|
1080
1103
|
except HttpError as e:
|
|
1081
1104
|
if e.resp.status == 404:
|
|
1082
1105
|
raise AirflowException("Pipeline with given name was not found.")
|
|
@@ -28,7 +28,7 @@ from airflow.providers.google.cloud.links.dataform import (
|
|
|
28
28
|
if TYPE_CHECKING:
|
|
29
29
|
from google.api_core.retry import Retry
|
|
30
30
|
|
|
31
|
-
from airflow.
|
|
31
|
+
from airflow.providers.common.compat.sdk import Context
|
|
32
32
|
|
|
33
33
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
34
34
|
from google.cloud.dataform_v1beta1.types import (
|
|
@@ -258,7 +258,6 @@ class DataformCreateWorkflowInvocationOperator(GoogleCloudBaseOperator):
|
|
|
258
258
|
)
|
|
259
259
|
workflow_invocation_id = result.name.split("/")[-1]
|
|
260
260
|
DataformWorkflowInvocationLink.persist(
|
|
261
|
-
operator_instance=self,
|
|
262
261
|
context=context,
|
|
263
262
|
project_id=self.project_id,
|
|
264
263
|
region=self.region,
|
|
@@ -347,6 +346,13 @@ class DataformGetWorkflowInvocationOperator(GoogleCloudBaseOperator):
|
|
|
347
346
|
timeout=self.timeout,
|
|
348
347
|
metadata=self.metadata,
|
|
349
348
|
)
|
|
349
|
+
DataformWorkflowInvocationLink.persist(
|
|
350
|
+
context=context,
|
|
351
|
+
project_id=self.project_id,
|
|
352
|
+
region=self.region,
|
|
353
|
+
repository_id=self.repository_id,
|
|
354
|
+
workflow_invocation_id=self.workflow_invocation_id,
|
|
355
|
+
)
|
|
350
356
|
return WorkflowInvocation.to_dict(result)
|
|
351
357
|
|
|
352
358
|
|
|
@@ -412,7 +418,6 @@ class DataformQueryWorkflowInvocationActionsOperator(GoogleCloudBaseOperator):
|
|
|
412
418
|
impersonation_chain=self.impersonation_chain,
|
|
413
419
|
)
|
|
414
420
|
DataformWorkflowInvocationLink.persist(
|
|
415
|
-
operator_instance=self,
|
|
416
421
|
context=context,
|
|
417
422
|
project_id=self.project_id,
|
|
418
423
|
region=self.region,
|
|
@@ -494,6 +499,13 @@ class DataformCancelWorkflowInvocationOperator(GoogleCloudBaseOperator):
|
|
|
494
499
|
gcp_conn_id=self.gcp_conn_id,
|
|
495
500
|
impersonation_chain=self.impersonation_chain,
|
|
496
501
|
)
|
|
502
|
+
DataformWorkflowInvocationLink.persist(
|
|
503
|
+
context=context,
|
|
504
|
+
project_id=self.project_id,
|
|
505
|
+
region=self.region,
|
|
506
|
+
repository_id=self.repository_id,
|
|
507
|
+
workflow_invocation_id=self.workflow_invocation_id,
|
|
508
|
+
)
|
|
497
509
|
hook.cancel_workflow_invocation(
|
|
498
510
|
project_id=self.project_id,
|
|
499
511
|
region=self.region,
|
|
@@ -576,7 +588,6 @@ class DataformCreateRepositoryOperator(GoogleCloudBaseOperator):
|
|
|
576
588
|
)
|
|
577
589
|
|
|
578
590
|
DataformRepositoryLink.persist(
|
|
579
|
-
operator_instance=self,
|
|
580
591
|
context=context,
|
|
581
592
|
project_id=self.project_id,
|
|
582
593
|
region=self.region,
|
|
@@ -735,7 +746,6 @@ class DataformCreateWorkspaceOperator(GoogleCloudBaseOperator):
|
|
|
735
746
|
)
|
|
736
747
|
|
|
737
748
|
DataformWorkspaceLink.persist(
|
|
738
|
-
operator_instance=self,
|
|
739
749
|
context=context,
|
|
740
750
|
project_id=self.project_id,
|
|
741
751
|
region=self.region,
|
|
@@ -40,7 +40,8 @@ from airflow.providers.google.cloud.utils.helpers import resource_path_to_dict
|
|
|
40
40
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
41
41
|
|
|
42
42
|
if TYPE_CHECKING:
|
|
43
|
-
from airflow.
|
|
43
|
+
from airflow.providers.common.compat.sdk import Context
|
|
44
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
44
45
|
|
|
45
46
|
|
|
46
47
|
class CloudDataFusionRestartInstanceOperator(GoogleCloudBaseOperator):
|
|
@@ -111,10 +112,9 @@ class CloudDataFusionRestartInstanceOperator(GoogleCloudBaseOperator):
|
|
|
111
112
|
project_id = resource_path_to_dict(resource_name=instance["name"])["projects"]
|
|
112
113
|
DataFusionInstanceLink.persist(
|
|
113
114
|
context=context,
|
|
114
|
-
task_instance=self,
|
|
115
115
|
project_id=project_id,
|
|
116
116
|
instance_name=self.instance_name,
|
|
117
|
-
|
|
117
|
+
region=self.location,
|
|
118
118
|
)
|
|
119
119
|
|
|
120
120
|
|
|
@@ -269,10 +269,9 @@ class CloudDataFusionCreateInstanceOperator(GoogleCloudBaseOperator):
|
|
|
269
269
|
project_id = resource_path_to_dict(resource_name=instance["name"])["projects"]
|
|
270
270
|
DataFusionInstanceLink.persist(
|
|
271
271
|
context=context,
|
|
272
|
-
task_instance=self,
|
|
273
272
|
project_id=project_id,
|
|
274
273
|
instance_name=self.instance_name,
|
|
275
|
-
|
|
274
|
+
region=self.location,
|
|
276
275
|
)
|
|
277
276
|
return instance
|
|
278
277
|
|
|
@@ -358,10 +357,9 @@ class CloudDataFusionUpdateInstanceOperator(GoogleCloudBaseOperator):
|
|
|
358
357
|
project_id = resource_path_to_dict(resource_name=instance["name"])["projects"]
|
|
359
358
|
DataFusionInstanceLink.persist(
|
|
360
359
|
context=context,
|
|
361
|
-
task_instance=self,
|
|
362
360
|
project_id=project_id,
|
|
363
361
|
instance_name=self.instance_name,
|
|
364
|
-
|
|
362
|
+
region=self.location,
|
|
365
363
|
)
|
|
366
364
|
|
|
367
365
|
|
|
@@ -429,10 +427,9 @@ class CloudDataFusionGetInstanceOperator(GoogleCloudBaseOperator):
|
|
|
429
427
|
project_id = resource_path_to_dict(resource_name=instance["name"])["projects"]
|
|
430
428
|
DataFusionInstanceLink.persist(
|
|
431
429
|
context=context,
|
|
432
|
-
task_instance=self,
|
|
433
430
|
project_id=project_id,
|
|
434
431
|
instance_name=self.instance_name,
|
|
435
|
-
|
|
432
|
+
region=self.location,
|
|
436
433
|
)
|
|
437
434
|
return instance
|
|
438
435
|
|
|
@@ -519,7 +516,6 @@ class CloudDataFusionCreatePipelineOperator(GoogleCloudBaseOperator):
|
|
|
519
516
|
)
|
|
520
517
|
DataFusionPipelineLink.persist(
|
|
521
518
|
context=context,
|
|
522
|
-
task_instance=self,
|
|
523
519
|
uri=instance["serviceEndpoint"],
|
|
524
520
|
pipeline_name=self.pipeline_name,
|
|
525
521
|
namespace=self.namespace,
|
|
@@ -693,7 +689,6 @@ class CloudDataFusionListPipelinesOperator(GoogleCloudBaseOperator):
|
|
|
693
689
|
|
|
694
690
|
DataFusionPipelinesLink.persist(
|
|
695
691
|
context=context,
|
|
696
|
-
task_instance=self,
|
|
697
692
|
uri=service_endpoint,
|
|
698
693
|
namespace=self.namespace,
|
|
699
694
|
)
|
|
@@ -783,6 +778,7 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
|
|
|
783
778
|
self.pipeline_timeout = pipeline_timeout
|
|
784
779
|
self.deferrable = deferrable
|
|
785
780
|
self.poll_interval = poll_interval
|
|
781
|
+
self.pipeline_id: str | None = None
|
|
786
782
|
|
|
787
783
|
if success_states:
|
|
788
784
|
self.success_states = success_states
|
|
@@ -802,18 +798,17 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
|
|
|
802
798
|
project_id=self.project_id,
|
|
803
799
|
)
|
|
804
800
|
api_url = instance["apiEndpoint"]
|
|
805
|
-
pipeline_id = hook.start_pipeline(
|
|
801
|
+
self.pipeline_id = hook.start_pipeline(
|
|
806
802
|
pipeline_name=self.pipeline_name,
|
|
807
803
|
pipeline_type=self.pipeline_type,
|
|
808
804
|
instance_url=api_url,
|
|
809
805
|
namespace=self.namespace,
|
|
810
806
|
runtime_args=self.runtime_args,
|
|
811
807
|
)
|
|
812
|
-
self.log.info("Pipeline %s submitted successfully.", pipeline_id)
|
|
808
|
+
self.log.info("Pipeline %s submitted successfully.", self.pipeline_id)
|
|
813
809
|
|
|
814
810
|
DataFusionPipelineLink.persist(
|
|
815
811
|
context=context,
|
|
816
|
-
task_instance=self,
|
|
817
812
|
uri=instance["serviceEndpoint"],
|
|
818
813
|
pipeline_name=self.pipeline_name,
|
|
819
814
|
namespace=self.namespace,
|
|
@@ -831,7 +826,7 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
|
|
|
831
826
|
namespace=self.namespace,
|
|
832
827
|
pipeline_name=self.pipeline_name,
|
|
833
828
|
pipeline_type=self.pipeline_type.value,
|
|
834
|
-
pipeline_id=pipeline_id,
|
|
829
|
+
pipeline_id=self.pipeline_id,
|
|
835
830
|
poll_interval=self.poll_interval,
|
|
836
831
|
gcp_conn_id=self.gcp_conn_id,
|
|
837
832
|
impersonation_chain=self.impersonation_chain,
|
|
@@ -841,19 +836,21 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
|
|
|
841
836
|
else:
|
|
842
837
|
if not self.asynchronous:
|
|
843
838
|
# when NOT using asynchronous mode it will just wait for pipeline to finish and print message
|
|
844
|
-
self.log.info(
|
|
839
|
+
self.log.info(
|
|
840
|
+
"Waiting when pipeline %s will be in one of the success states", self.pipeline_id
|
|
841
|
+
)
|
|
845
842
|
hook.wait_for_pipeline_state(
|
|
846
843
|
success_states=self.success_states,
|
|
847
|
-
pipeline_id=pipeline_id,
|
|
844
|
+
pipeline_id=self.pipeline_id,
|
|
848
845
|
pipeline_name=self.pipeline_name,
|
|
849
846
|
pipeline_type=self.pipeline_type,
|
|
850
847
|
namespace=self.namespace,
|
|
851
848
|
instance_url=api_url,
|
|
852
849
|
timeout=self.pipeline_timeout,
|
|
853
850
|
)
|
|
854
|
-
self.log.info("Pipeline %s discovered success state.", pipeline_id)
|
|
851
|
+
self.log.info("Pipeline %s discovered success state.", self.pipeline_id)
|
|
855
852
|
# otherwise, return pipeline_id so that sensor can use it later to check the pipeline state
|
|
856
|
-
return pipeline_id
|
|
853
|
+
return self.pipeline_id
|
|
857
854
|
|
|
858
855
|
def execute_complete(self, context: Context, event: dict[str, Any]):
|
|
859
856
|
"""
|
|
@@ -870,6 +867,31 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
|
|
|
870
867
|
)
|
|
871
868
|
return event["pipeline_id"]
|
|
872
869
|
|
|
870
|
+
def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
871
|
+
"""Build and return OpenLineage facets and datasets for the completed pipeline start."""
|
|
872
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
|
873
|
+
from airflow.providers.google.cloud.openlineage.facets import DataFusionRunFacet
|
|
874
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
875
|
+
|
|
876
|
+
pipeline_resource = f"{self.project_id}:{self.location}:{self.instance_name}:{self.pipeline_name}"
|
|
877
|
+
|
|
878
|
+
inputs = [Dataset(namespace="datafusion", name=pipeline_resource)]
|
|
879
|
+
|
|
880
|
+
if self.pipeline_id:
|
|
881
|
+
output_name = f"{pipeline_resource}:{self.pipeline_id}"
|
|
882
|
+
else:
|
|
883
|
+
output_name = f"{pipeline_resource}:unknown"
|
|
884
|
+
outputs = [Dataset(namespace="datafusion", name=output_name)]
|
|
885
|
+
|
|
886
|
+
run_facets = {
|
|
887
|
+
"dataFusionRun": DataFusionRunFacet(
|
|
888
|
+
runId=self.pipeline_id,
|
|
889
|
+
runtimeArgs=self.runtime_args,
|
|
890
|
+
)
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
return OperatorLineage(inputs=inputs, outputs=outputs, run_facets=run_facets, job_facets={})
|
|
894
|
+
|
|
873
895
|
|
|
874
896
|
class CloudDataFusionStopPipelineOperator(GoogleCloudBaseOperator):
|
|
875
897
|
"""
|
|
@@ -943,7 +965,6 @@ class CloudDataFusionStopPipelineOperator(GoogleCloudBaseOperator):
|
|
|
943
965
|
|
|
944
966
|
DataFusionPipelineLink.persist(
|
|
945
967
|
context=context,
|
|
946
|
-
task_instance=self,
|
|
947
968
|
uri=instance["serviceEndpoint"],
|
|
948
969
|
pipeline_name=self.pipeline_name,
|
|
949
970
|
namespace=self.namespace,
|