apache-airflow-providers-google 12.0.0rc2__py3-none-any.whl → 14.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. airflow/providers/google/LICENSE +0 -52
  2. airflow/providers/google/__init__.py +1 -1
  3. airflow/providers/google/ads/hooks/ads.py +27 -13
  4. airflow/providers/google/ads/transfers/ads_to_gcs.py +18 -4
  5. airflow/providers/google/assets/bigquery.py +17 -0
  6. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +2 -3
  7. airflow/providers/google/cloud/hooks/alloy_db.py +736 -8
  8. airflow/providers/google/cloud/hooks/automl.py +10 -4
  9. airflow/providers/google/cloud/hooks/bigquery.py +125 -22
  10. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  11. airflow/providers/google/cloud/hooks/bigtable.py +2 -3
  12. airflow/providers/google/cloud/hooks/cloud_batch.py +3 -4
  13. airflow/providers/google/cloud/hooks/cloud_build.py +4 -5
  14. airflow/providers/google/cloud/hooks/cloud_composer.py +3 -4
  15. airflow/providers/google/cloud/hooks/cloud_memorystore.py +3 -4
  16. airflow/providers/google/cloud/hooks/cloud_run.py +3 -4
  17. airflow/providers/google/cloud/hooks/cloud_sql.py +7 -3
  18. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +119 -7
  19. airflow/providers/google/cloud/hooks/compute.py +3 -3
  20. airflow/providers/google/cloud/hooks/datacatalog.py +3 -4
  21. airflow/providers/google/cloud/hooks/dataflow.py +12 -12
  22. airflow/providers/google/cloud/hooks/dataform.py +2 -3
  23. airflow/providers/google/cloud/hooks/datafusion.py +2 -2
  24. airflow/providers/google/cloud/hooks/dataplex.py +1032 -11
  25. airflow/providers/google/cloud/hooks/dataproc.py +4 -5
  26. airflow/providers/google/cloud/hooks/dataproc_metastore.py +3 -4
  27. airflow/providers/google/cloud/hooks/dlp.py +3 -4
  28. airflow/providers/google/cloud/hooks/gcs.py +7 -6
  29. airflow/providers/google/cloud/hooks/kms.py +2 -3
  30. airflow/providers/google/cloud/hooks/kubernetes_engine.py +8 -8
  31. airflow/providers/google/cloud/hooks/life_sciences.py +1 -1
  32. airflow/providers/google/cloud/hooks/managed_kafka.py +482 -0
  33. airflow/providers/google/cloud/hooks/natural_language.py +2 -3
  34. airflow/providers/google/cloud/hooks/os_login.py +2 -3
  35. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  36. airflow/providers/google/cloud/hooks/secret_manager.py +2 -3
  37. airflow/providers/google/cloud/hooks/spanner.py +2 -2
  38. airflow/providers/google/cloud/hooks/speech_to_text.py +2 -3
  39. airflow/providers/google/cloud/hooks/stackdriver.py +4 -4
  40. airflow/providers/google/cloud/hooks/tasks.py +3 -4
  41. airflow/providers/google/cloud/hooks/text_to_speech.py +2 -3
  42. airflow/providers/google/cloud/hooks/translate.py +236 -5
  43. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +9 -4
  44. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -4
  45. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +4 -5
  46. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +3 -4
  47. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -3
  48. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +3 -4
  49. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -181
  50. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -4
  51. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -3
  52. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -4
  53. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -3
  54. airflow/providers/google/cloud/hooks/video_intelligence.py +2 -3
  55. airflow/providers/google/cloud/hooks/vision.py +3 -4
  56. airflow/providers/google/cloud/hooks/workflows.py +2 -3
  57. airflow/providers/google/cloud/links/alloy_db.py +46 -0
  58. airflow/providers/google/cloud/links/bigquery.py +25 -0
  59. airflow/providers/google/cloud/links/dataplex.py +172 -2
  60. airflow/providers/google/cloud/links/kubernetes_engine.py +1 -2
  61. airflow/providers/google/cloud/links/managed_kafka.py +104 -0
  62. airflow/providers/google/cloud/links/translate.py +28 -0
  63. airflow/providers/google/cloud/log/gcs_task_handler.py +3 -3
  64. airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -10
  65. airflow/providers/google/cloud/openlineage/facets.py +67 -0
  66. airflow/providers/google/cloud/openlineage/mixins.py +438 -173
  67. airflow/providers/google/cloud/openlineage/utils.py +394 -61
  68. airflow/providers/google/cloud/operators/alloy_db.py +980 -69
  69. airflow/providers/google/cloud/operators/automl.py +83 -245
  70. airflow/providers/google/cloud/operators/bigquery.py +377 -74
  71. airflow/providers/google/cloud/operators/bigquery_dts.py +126 -13
  72. airflow/providers/google/cloud/operators/bigtable.py +1 -3
  73. airflow/providers/google/cloud/operators/cloud_base.py +1 -2
  74. airflow/providers/google/cloud/operators/cloud_batch.py +2 -4
  75. airflow/providers/google/cloud/operators/cloud_build.py +3 -5
  76. airflow/providers/google/cloud/operators/cloud_composer.py +5 -7
  77. airflow/providers/google/cloud/operators/cloud_memorystore.py +4 -6
  78. airflow/providers/google/cloud/operators/cloud_run.py +6 -5
  79. airflow/providers/google/cloud/operators/cloud_sql.py +20 -8
  80. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +62 -8
  81. airflow/providers/google/cloud/operators/compute.py +3 -4
  82. airflow/providers/google/cloud/operators/datacatalog.py +9 -11
  83. airflow/providers/google/cloud/operators/dataflow.py +1 -112
  84. airflow/providers/google/cloud/operators/dataform.py +3 -5
  85. airflow/providers/google/cloud/operators/datafusion.py +1 -1
  86. airflow/providers/google/cloud/operators/dataplex.py +2046 -7
  87. airflow/providers/google/cloud/operators/dataproc.py +102 -17
  88. airflow/providers/google/cloud/operators/dataproc_metastore.py +7 -9
  89. airflow/providers/google/cloud/operators/dlp.py +17 -19
  90. airflow/providers/google/cloud/operators/gcs.py +14 -17
  91. airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -2
  92. airflow/providers/google/cloud/operators/managed_kafka.py +788 -0
  93. airflow/providers/google/cloud/operators/natural_language.py +3 -5
  94. airflow/providers/google/cloud/operators/pubsub.py +39 -7
  95. airflow/providers/google/cloud/operators/speech_to_text.py +3 -5
  96. airflow/providers/google/cloud/operators/stackdriver.py +3 -5
  97. airflow/providers/google/cloud/operators/tasks.py +4 -6
  98. airflow/providers/google/cloud/operators/text_to_speech.py +2 -4
  99. airflow/providers/google/cloud/operators/translate.py +414 -5
  100. airflow/providers/google/cloud/operators/translate_speech.py +2 -4
  101. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +9 -8
  102. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +4 -6
  103. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +6 -8
  104. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +4 -6
  105. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +4 -6
  106. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +0 -322
  107. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +4 -6
  108. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +4 -6
  109. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +4 -6
  110. airflow/providers/google/cloud/operators/video_intelligence.py +3 -5
  111. airflow/providers/google/cloud/operators/vision.py +4 -6
  112. airflow/providers/google/cloud/operators/workflows.py +5 -7
  113. airflow/providers/google/cloud/secrets/secret_manager.py +1 -2
  114. airflow/providers/google/cloud/sensors/bigquery_dts.py +3 -5
  115. airflow/providers/google/cloud/sensors/bigtable.py +2 -3
  116. airflow/providers/google/cloud/sensors/cloud_composer.py +32 -8
  117. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +39 -1
  118. airflow/providers/google/cloud/sensors/dataplex.py +4 -6
  119. airflow/providers/google/cloud/sensors/dataproc.py +2 -3
  120. airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -2
  121. airflow/providers/google/cloud/sensors/gcs.py +2 -4
  122. airflow/providers/google/cloud/sensors/pubsub.py +2 -3
  123. airflow/providers/google/cloud/sensors/workflows.py +3 -5
  124. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +5 -5
  125. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +10 -12
  126. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +1 -1
  127. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +36 -4
  128. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +27 -2
  129. airflow/providers/google/cloud/transfers/mysql_to_gcs.py +27 -2
  130. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +27 -2
  131. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +34 -5
  132. airflow/providers/google/cloud/transfers/sql_to_gcs.py +15 -0
  133. airflow/providers/google/cloud/transfers/trino_to_gcs.py +25 -2
  134. airflow/providers/google/cloud/triggers/bigquery_dts.py +1 -2
  135. airflow/providers/google/cloud/triggers/cloud_batch.py +1 -2
  136. airflow/providers/google/cloud/triggers/cloud_build.py +1 -2
  137. airflow/providers/google/cloud/triggers/cloud_composer.py +13 -3
  138. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +102 -4
  139. airflow/providers/google/cloud/triggers/dataflow.py +2 -3
  140. airflow/providers/google/cloud/triggers/dataplex.py +1 -2
  141. airflow/providers/google/cloud/triggers/dataproc.py +2 -3
  142. airflow/providers/google/cloud/triggers/kubernetes_engine.py +1 -1
  143. airflow/providers/google/cloud/triggers/pubsub.py +1 -2
  144. airflow/providers/google/cloud/triggers/vertex_ai.py +7 -8
  145. airflow/providers/google/cloud/utils/credentials_provider.py +15 -8
  146. airflow/providers/google/cloud/utils/external_token_supplier.py +1 -0
  147. airflow/providers/google/common/auth_backend/google_openid.py +4 -4
  148. airflow/providers/google/common/consts.py +1 -2
  149. airflow/providers/google/common/hooks/base_google.py +8 -7
  150. airflow/providers/google/get_provider_info.py +186 -134
  151. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -3
  152. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  153. airflow/providers/google/marketing_platform/operators/analytics_admin.py +5 -7
  154. {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0rc1.dist-info}/METADATA +40 -57
  155. {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0rc1.dist-info}/RECORD +157 -159
  156. airflow/providers/google/cloud/example_dags/example_facebook_ads_to_gcs.py +0 -141
  157. airflow/providers/google/cloud/example_dags/example_looker.py +0 -64
  158. airflow/providers/google/cloud/example_dags/example_presto_to_gcs.py +0 -194
  159. airflow/providers/google/cloud/example_dags/example_salesforce_to_gcs.py +0 -129
  160. airflow/providers/google/marketing_platform/example_dags/__init__.py +0 -16
  161. airflow/providers/google/marketing_platform/example_dags/example_display_video.py +0 -213
  162. {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0rc1.dist-info}/WHEEL +0 -0
  163. {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0rc1.dist-info}/entry_points.txt +0 -0
@@ -24,14 +24,6 @@ from collections.abc import Sequence
24
24
  from functools import cached_property
25
25
  from typing import TYPE_CHECKING
26
26
 
27
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
28
- from google.cloud.bigquery_datatransfer_v1 import (
29
- StartManualTransferRunsResponse,
30
- TransferConfig,
31
- TransferRun,
32
- TransferState,
33
- )
34
-
35
27
  from airflow.configuration import conf
36
28
  from airflow.exceptions import AirflowException
37
29
  from airflow.providers.google.cloud.hooks.bigquery_dts import BiqQueryDataTransferServiceHook, get_object_id
@@ -39,11 +31,17 @@ from airflow.providers.google.cloud.links.bigquery_dts import BigQueryDataTransf
39
31
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
40
32
  from airflow.providers.google.cloud.triggers.bigquery_dts import BigQueryDataTransferRunTrigger
41
33
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
34
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
35
+ from google.cloud.bigquery_datatransfer_v1 import (
36
+ StartManualTransferRunsResponse,
37
+ TransferConfig,
38
+ TransferRun,
39
+ TransferState,
40
+ )
42
41
 
43
42
  if TYPE_CHECKING:
44
- from google.api_core.retry import Retry
45
-
46
43
  from airflow.utils.context import Context
44
+ from google.api_core.retry import Retry
47
45
 
48
46
 
49
47
  def _get_transfer_config_details(config_transfer_name: str):
@@ -299,6 +297,7 @@ class BigQueryDataTransferServiceStartTransferRunsOperator(GoogleCloudBaseOperat
299
297
  self.gcp_conn_id = gcp_conn_id
300
298
  self.impersonation_chain = impersonation_chain
301
299
  self.deferrable = deferrable
300
+ self._transfer_run: dict = {}
302
301
 
303
302
  @cached_property
304
303
  def hook(self) -> BiqQueryDataTransferServiceHook:
@@ -339,12 +338,13 @@ class BigQueryDataTransferServiceStartTransferRunsOperator(GoogleCloudBaseOperat
339
338
  self.xcom_push(context, key="run_id", value=run_id)
340
339
 
341
340
  if not self.deferrable:
342
- result = self._wait_for_transfer_to_be_done(
341
+ # Save as attribute for further use by OpenLineage
342
+ self._transfer_run = self._wait_for_transfer_to_be_done(
343
343
  run_id=run_id,
344
344
  transfer_config_id=transfer_config["config_id"],
345
345
  )
346
346
  self.log.info("Transfer run %s submitted successfully.", run_id)
347
- return result
347
+ return self._transfer_run
348
348
 
349
349
  self.defer(
350
350
  trigger=BigQueryDataTransferRunTrigger(
@@ -412,4 +412,117 @@ class BigQueryDataTransferServiceStartTransferRunsOperator(GoogleCloudBaseOperat
412
412
  event["message"],
413
413
  )
414
414
 
415
- return TransferRun.to_dict(transfer_run)
415
+ # Save as attribute for further use by OpenLineage
416
+ self._transfer_run = TransferRun.to_dict(transfer_run)
417
+ return self._transfer_run
418
+
419
+ def get_openlineage_facets_on_complete(self, _):
420
+ """Implement _on_complete as we need a run config to extract information."""
421
+ from urllib.parse import urlsplit
422
+
423
+ from airflow.providers.common.compat.openlineage.facet import Dataset, ErrorMessageRunFacet
424
+ from airflow.providers.google.cloud.hooks.gcs import _parse_gcs_url
425
+ from airflow.providers.google.cloud.openlineage.utils import (
426
+ BIGQUERY_NAMESPACE,
427
+ extract_ds_name_from_gcs_path,
428
+ )
429
+ from airflow.providers.openlineage.extractors import OperatorLineage
430
+ from airflow.providers.openlineage.sqlparser import DatabaseInfo, SQLParser
431
+
432
+ if not self._transfer_run:
433
+ self.log.debug("No BigQuery Data Transfer configuration was found by OpenLineage.")
434
+ return OperatorLineage()
435
+
436
+ data_source_id = self._transfer_run["data_source_id"]
437
+ dest_dataset_id = self._transfer_run["destination_dataset_id"]
438
+ params = self._transfer_run["params"]
439
+
440
+ input_datasets, output_datasets = [], []
441
+ run_facets, job_facets = {}, {}
442
+ if data_source_id in ("google_cloud_storage", "amazon_s3", "azure_blob_storage"):
443
+ if data_source_id == "google_cloud_storage":
444
+ bucket, path = _parse_gcs_url(params["data_path_template"]) # gs://bucket...
445
+ namespace = f"gs://{bucket}"
446
+ name = extract_ds_name_from_gcs_path(path)
447
+ elif data_source_id == "amazon_s3":
448
+ parsed_url = urlsplit(params["data_path"]) # s3://bucket...
449
+ namespace = f"s3://{parsed_url.netloc}"
450
+ name = extract_ds_name_from_gcs_path(parsed_url.path)
451
+ else: # azure_blob_storage
452
+ storage_account = params["storage_account"]
453
+ container = params["container"]
454
+ namespace = f"abfss://{container}@{storage_account}.dfs.core.windows.net"
455
+ name = extract_ds_name_from_gcs_path(params["data_path"])
456
+
457
+ input_datasets.append(Dataset(namespace=namespace, name=name))
458
+ dest_table_name = params["destination_table_name_template"]
459
+ output_datasets.append(
460
+ Dataset(
461
+ namespace=BIGQUERY_NAMESPACE,
462
+ name=f"{self.project_id}.{dest_dataset_id}.{dest_table_name}",
463
+ )
464
+ )
465
+ elif data_source_id in ("postgresql", "oracle", "mysql"):
466
+ scheme = data_source_id if data_source_id != "postgresql" else "postgres"
467
+ host = params["connector.endpoint.host"]
468
+ port = params["connector.endpoint.port"]
469
+
470
+ for asset in params["assets"]:
471
+ # MySQL: db/table; Other: db/schema/table;
472
+ table_name = asset.split("/")[-1]
473
+
474
+ input_datasets.append(
475
+ Dataset(namespace=f"{scheme}://{host}:{int(port)}", name=asset.replace("/", "."))
476
+ )
477
+ output_datasets.append(
478
+ Dataset(
479
+ namespace=BIGQUERY_NAMESPACE, name=f"{self.project_id}.{dest_dataset_id}.{table_name}"
480
+ )
481
+ )
482
+ elif data_source_id == "scheduled_query":
483
+ bq_db_info = DatabaseInfo(
484
+ scheme="bigquery",
485
+ authority=None,
486
+ database=self.project_id,
487
+ )
488
+ parser_result = SQLParser("bigquery").generate_openlineage_metadata_from_sql(
489
+ sql=params["query"],
490
+ database_info=bq_db_info,
491
+ database=self.project_id,
492
+ use_connection=False,
493
+ hook=None, # Hook is not used when use_connection=False
494
+ sqlalchemy_engine=None,
495
+ )
496
+ if parser_result.inputs:
497
+ input_datasets.extend(parser_result.inputs)
498
+ if parser_result.outputs:
499
+ output_datasets.extend(parser_result.outputs)
500
+ if parser_result.job_facets:
501
+ job_facets = {**job_facets, **parser_result.job_facets}
502
+ if parser_result.run_facets:
503
+ run_facets = {**run_facets, **parser_result.run_facets}
504
+ dest_table_name = params.get("destination_table_name_template")
505
+ if dest_table_name:
506
+ output_datasets.append(
507
+ Dataset(
508
+ namespace=BIGQUERY_NAMESPACE,
509
+ name=f"{self.project_id}.{dest_dataset_id}.{dest_table_name}",
510
+ )
511
+ )
512
+ else:
513
+ self.log.debug(
514
+ "BigQuery Data Transfer data_source_id `%s` is not supported by OpenLineage.", data_source_id
515
+ )
516
+ return OperatorLineage()
517
+
518
+ error_status = self._transfer_run.get("error_status")
519
+ if error_status and str(error_status["code"]) != "0":
520
+ run_facets["errorMessage"] = ErrorMessageRunFacet(
521
+ message=error_status["message"],
522
+ programmingLanguage="python",
523
+ stackTrace=str(error_status["details"]),
524
+ )
525
+
526
+ return OperatorLineage(
527
+ inputs=input_datasets, outputs=output_datasets, job_facets=job_facets, run_facets=run_facets
528
+ )
@@ -23,7 +23,6 @@ from collections.abc import Iterable, Sequence
23
23
  from typing import TYPE_CHECKING
24
24
 
25
25
  import google.api_core.exceptions
26
-
27
26
  from airflow.exceptions import AirflowException
28
27
  from airflow.providers.google.cloud.hooks.bigtable import BigtableHook
29
28
  from airflow.providers.google.cloud.links.bigtable import (
@@ -37,11 +36,10 @@ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
37
36
  if TYPE_CHECKING:
38
37
  import enum
39
38
 
39
+ from airflow.utils.context import Context
40
40
  from google.cloud.bigtable import enums
41
41
  from google.cloud.bigtable.column_family import GarbageCollectionRule
42
42
 
43
- from airflow.utils.context import Context
44
-
45
43
 
46
44
  class BigtableValidationMixin:
47
45
  """Common class for Cloud Bigtable operators for validating required fields."""
@@ -19,9 +19,8 @@
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
- from google.api_core.gapic_v1.method import DEFAULT
23
-
24
22
  from airflow.models import BaseOperator
23
+ from google.api_core.gapic_v1.method import DEFAULT
25
24
 
26
25
 
27
26
  class GoogleCloudBaseOperator(BaseOperator):
@@ -20,18 +20,16 @@ from __future__ import annotations
20
20
  from collections.abc import Sequence
21
21
  from typing import TYPE_CHECKING
22
22
 
23
- from google.cloud.batch_v1 import Job, Task
24
-
25
23
  from airflow.configuration import conf
26
24
  from airflow.exceptions import AirflowException
27
25
  from airflow.providers.google.cloud.hooks.cloud_batch import CloudBatchHook
28
26
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
29
27
  from airflow.providers.google.cloud.triggers.cloud_batch import CloudBatchJobFinishedTrigger
28
+ from google.cloud.batch_v1 import Job, Task
30
29
 
31
30
  if TYPE_CHECKING:
32
- from google.api_core import operation
33
-
34
31
  from airflow.utils.context import Context
32
+ from google.api_core import operation
35
33
 
36
34
 
37
35
  class CloudBatchSubmitJobOperator(GoogleCloudBaseOperator):
@@ -26,9 +26,6 @@ from copy import deepcopy
26
26
  from typing import TYPE_CHECKING, Any
27
27
  from urllib.parse import unquote, urlsplit
28
28
 
29
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
30
- from google.cloud.devtools.cloudbuild_v1.types import Build, BuildTrigger, RepoSource
31
-
32
29
  from airflow.configuration import conf
33
30
  from airflow.exceptions import AirflowException
34
31
  from airflow.providers.google.cloud.hooks.cloud_build import CloudBuildHook
@@ -44,11 +41,12 @@ from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_MET
44
41
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
45
42
  from airflow.utils import yaml
46
43
  from airflow.utils.helpers import exactly_one
44
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
45
+ from google.cloud.devtools.cloudbuild_v1.types import Build, BuildTrigger, RepoSource
47
46
 
48
47
  if TYPE_CHECKING:
49
- from google.api_core.retry import Retry
50
-
51
48
  from airflow.utils.context import Context
49
+ from google.api_core.retry import Retry
52
50
 
53
51
 
54
52
  REGEX_REPO_PATH = re.compile(r"^/(?P<project_id>[^/]+)/(?P<repo_name>[^/]+)[\+/]*(?P<branch_name>[^:]+)?")
@@ -21,11 +21,6 @@ import shlex
21
21
  from collections.abc import Sequence
22
22
  from typing import TYPE_CHECKING
23
23
 
24
- from google.api_core.exceptions import AlreadyExists
25
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
26
- from google.cloud.orchestration.airflow.service_v1 import ImageVersion
27
- from google.cloud.orchestration.airflow.service_v1.types import Environment, ExecuteAirflowCommandResponse
28
-
29
24
  from airflow.configuration import conf
30
25
  from airflow.exceptions import AirflowException
31
26
  from airflow.providers.google.cloud.hooks.cloud_composer import CloudComposerHook
@@ -36,13 +31,16 @@ from airflow.providers.google.cloud.triggers.cloud_composer import (
36
31
  CloudComposerExecutionTrigger,
37
32
  )
38
33
  from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME
34
+ from google.api_core.exceptions import AlreadyExists
35
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
36
+ from google.cloud.orchestration.airflow.service_v1 import ImageVersion
37
+ from google.cloud.orchestration.airflow.service_v1.types import Environment, ExecuteAirflowCommandResponse
39
38
 
40
39
  if TYPE_CHECKING:
40
+ from airflow.utils.context import Context
41
41
  from google.api_core.retry import Retry
42
42
  from google.protobuf.field_mask_pb2 import FieldMask
43
43
 
44
- from airflow.utils.context import Context
45
-
46
44
  CLOUD_COMPOSER_BASE_LINK = "https://console.cloud.google.com/composer/environments"
47
45
  CLOUD_COMPOSER_DETAILS_LINK = (
48
46
  CLOUD_COMPOSER_BASE_LINK + "/detail/{region}/{environment_id}/monitoring?project={project_id}"
@@ -29,10 +29,6 @@ from __future__ import annotations
29
29
  from collections.abc import Sequence
30
30
  from typing import TYPE_CHECKING
31
31
 
32
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
33
- from google.cloud.memcache_v1beta2.types import cloud_memcache
34
- from google.cloud.redis_v1 import FailoverInstanceRequest, InputConfig, Instance, OutputConfig
35
-
36
32
  from airflow.providers.google.cloud.hooks.cloud_memorystore import (
37
33
  CloudMemorystoreHook,
38
34
  CloudMemorystoreMemcachedHook,
@@ -45,13 +41,15 @@ from airflow.providers.google.cloud.links.cloud_memorystore import (
45
41
  )
46
42
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
47
43
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
44
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
45
+ from google.cloud.memcache_v1beta2.types import cloud_memcache
46
+ from google.cloud.redis_v1 import FailoverInstanceRequest, InputConfig, Instance, OutputConfig
48
47
 
49
48
  if TYPE_CHECKING:
49
+ from airflow.utils.context import Context
50
50
  from google.api_core.retry import Retry
51
51
  from google.protobuf.field_mask_pb2 import FieldMask
52
52
 
53
- from airflow.utils.context import Context
54
-
55
53
 
56
54
  class CloudMemorystoreCreateInstanceOperator(GoogleCloudBaseOperator):
57
55
  """
@@ -21,21 +21,19 @@ from collections.abc import Sequence
21
21
  from typing import TYPE_CHECKING, Any
22
22
 
23
23
  import google.cloud.exceptions
24
- from google.api_core.exceptions import AlreadyExists
25
- from google.cloud.run_v2 import Job, Service
26
-
27
24
  from airflow.configuration import conf
28
25
  from airflow.exceptions import AirflowException
29
26
  from airflow.providers.google.cloud.hooks.cloud_run import CloudRunHook, CloudRunServiceHook
30
27
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
31
28
  from airflow.providers.google.cloud.triggers.cloud_run import CloudRunJobFinishedTrigger, RunJobStatus
29
+ from google.api_core.exceptions import AlreadyExists
30
+ from google.cloud.run_v2 import Job, Service
32
31
 
33
32
  if TYPE_CHECKING:
33
+ from airflow.utils.context import Context
34
34
  from google.api_core import operation
35
35
  from google.cloud.run_v2.types import Execution
36
36
 
37
- from airflow.utils.context import Context
38
-
39
37
 
40
38
  class CloudRunCreateJobOperator(GoogleCloudBaseOperator):
41
39
  """
@@ -300,6 +298,9 @@ class CloudRunExecuteJobOperator(GoogleCloudBaseOperator):
300
298
  region=self.region, project_id=self.project_id, job_name=self.job_name, overrides=self.overrides
301
299
  )
302
300
 
301
+ if self.operation is None:
302
+ raise AirflowException("Operation is None")
303
+
303
304
  if not self.deferrable:
304
305
  result: Execution = self._wait_for_operation(self.operation)
305
306
  self._fail_if_execution_failed(result)
@@ -20,6 +20,7 @@
20
20
  from __future__ import annotations
21
21
 
22
22
  from collections.abc import Iterable, Mapping, Sequence
23
+ from contextlib import contextmanager
23
24
  from functools import cached_property
24
25
  from typing import TYPE_CHECKING, Any
25
26
 
@@ -38,8 +39,7 @@ from airflow.providers.google.common.links.storage import FileDetailsLink
38
39
 
39
40
  if TYPE_CHECKING:
40
41
  from airflow.models import Connection
41
- from airflow.providers.mysql.hooks.mysql import MySqlHook
42
- from airflow.providers.postgres.hooks.postgres import PostgresHook
42
+ from airflow.providers.openlineage.extractors import OperatorLineage
43
43
  from airflow.utils.context import Context
44
44
 
45
45
 
@@ -1256,7 +1256,8 @@ class CloudSQLExecuteQueryOperator(GoogleCloudBaseOperator):
1256
1256
  self.ssl_client_key = ssl_client_key
1257
1257
  self.ssl_secret_id = ssl_secret_id
1258
1258
 
1259
- def _execute_query(self, hook: CloudSQLDatabaseHook, database_hook: PostgresHook | MySqlHook) -> None:
1259
+ @contextmanager
1260
+ def cloud_sql_proxy_context(self, hook: CloudSQLDatabaseHook):
1260
1261
  cloud_sql_proxy_runner = None
1261
1262
  try:
1262
1263
  if hook.use_proxy:
@@ -1266,27 +1267,27 @@ class CloudSQLExecuteQueryOperator(GoogleCloudBaseOperator):
1266
1267
  # be taken over here by another bind(0).
1267
1268
  # It's quite unlikely to happen though!
1268
1269
  cloud_sql_proxy_runner.start_proxy()
1269
- self.log.info('Executing: "%s"', self.sql)
1270
- database_hook.run(self.sql, self.autocommit, parameters=self.parameters)
1270
+ yield
1271
1271
  finally:
1272
1272
  if cloud_sql_proxy_runner:
1273
1273
  cloud_sql_proxy_runner.stop_proxy()
1274
1274
 
1275
1275
  def execute(self, context: Context):
1276
- self.gcp_connection = BaseHook.get_connection(self.gcp_conn_id)
1277
-
1278
1276
  hook = self.hook
1279
1277
  hook.validate_ssl_certs()
1280
1278
  connection = hook.create_connection()
1281
1279
  hook.validate_socket_path_length()
1282
1280
  database_hook = hook.get_database_hook(connection=connection)
1283
1281
  try:
1284
- self._execute_query(hook, database_hook)
1282
+ with self.cloud_sql_proxy_context(hook):
1283
+ self.log.info('Executing: "%s"', self.sql)
1284
+ database_hook.run(self.sql, self.autocommit, parameters=self.parameters)
1285
1285
  finally:
1286
1286
  hook.cleanup_database_hook()
1287
1287
 
1288
1288
  @cached_property
1289
1289
  def hook(self):
1290
+ self.gcp_connection = BaseHook.get_connection(self.gcp_conn_id)
1290
1291
  return CloudSQLDatabaseHook(
1291
1292
  gcp_cloudsql_conn_id=self.gcp_cloudsql_conn_id,
1292
1293
  gcp_conn_id=self.gcp_conn_id,
@@ -1297,3 +1298,14 @@ class CloudSQLExecuteQueryOperator(GoogleCloudBaseOperator):
1297
1298
  ssl_key=self.ssl_client_key,
1298
1299
  ssl_secret_id=self.ssl_secret_id,
1299
1300
  )
1301
+
1302
+ def get_openlineage_facets_on_complete(self, _) -> OperatorLineage | None:
1303
+ from airflow.providers.common.compat.openlineage.utils.sql import get_openlineage_facets_with_sql
1304
+
1305
+ with self.cloud_sql_proxy_context(self.hook):
1306
+ return get_openlineage_facets_with_sql(
1307
+ hook=self.hook.db_hook,
1308
+ sql=self.sql, # type:ignore[arg-type] # Iterable[str] instead of list[str]
1309
+ conn_id=self.gcp_cloudsql_conn_id,
1310
+ database=self.hook.database,
1311
+ )
@@ -21,9 +21,10 @@ from __future__ import annotations
21
21
 
22
22
  from collections.abc import Sequence
23
23
  from copy import deepcopy
24
- from datetime import date, time
25
- from typing import TYPE_CHECKING
24
+ from datetime import date, time, timedelta
25
+ from typing import TYPE_CHECKING, Any
26
26
 
27
+ from airflow.configuration import conf
27
28
  from airflow.exceptions import AirflowException
28
29
  from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
29
30
  from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
@@ -63,6 +64,9 @@ from airflow.providers.google.cloud.links.cloud_storage_transfer import (
63
64
  CloudStorageTransferListLink,
64
65
  )
65
66
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
67
+ from airflow.providers.google.cloud.triggers.cloud_storage_transfer_service import (
68
+ CloudStorageTransferServiceCheckJobStatusTrigger,
69
+ )
66
70
  from airflow.providers.google.cloud.utils.helpers import normalize_directory_path
67
71
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
68
72
 
@@ -908,6 +912,7 @@ class CloudDataTransferServiceS3ToGCSOperator(GoogleCloudBaseOperator):
908
912
  :param aws_role_arn: Optional AWS role ARN for workload identity federation. This will
909
913
  override the `aws_conn_id` for authentication between GCP and AWS; see
910
914
  https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#AwsS3Data
915
+ :param deferrable: Run operator in the deferrable mode.
911
916
  """
912
917
 
913
918
  template_fields: Sequence[str] = (
@@ -942,6 +947,7 @@ class CloudDataTransferServiceS3ToGCSOperator(GoogleCloudBaseOperator):
942
947
  google_impersonation_chain: str | Sequence[str] | None = None,
943
948
  delete_job_after_completion: bool = False,
944
949
  aws_role_arn: str | None = None,
950
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
945
951
  **kwargs,
946
952
  ) -> None:
947
953
  super().__init__(**kwargs)
@@ -961,6 +967,7 @@ class CloudDataTransferServiceS3ToGCSOperator(GoogleCloudBaseOperator):
961
967
  self.google_impersonation_chain = google_impersonation_chain
962
968
  self.delete_job_after_completion = delete_job_after_completion
963
969
  self.aws_role_arn = aws_role_arn
970
+ self.deferrable = deferrable
964
971
  self._validate_inputs()
965
972
 
966
973
  def _validate_inputs(self) -> None:
@@ -979,9 +986,31 @@ class CloudDataTransferServiceS3ToGCSOperator(GoogleCloudBaseOperator):
979
986
  job = hook.create_transfer_job(body=body)
980
987
 
981
988
  if self.wait:
982
- hook.wait_for_transfer_job(job, timeout=self.timeout)
983
- if self.delete_job_after_completion:
984
- hook.delete_transfer_job(job_name=job[NAME], project_id=self.project_id)
989
+ if not self.deferrable:
990
+ hook.wait_for_transfer_job(job, timeout=self.timeout)
991
+ if self.delete_job_after_completion:
992
+ hook.delete_transfer_job(job_name=job[NAME], project_id=self.project_id)
993
+ else:
994
+ self.defer(
995
+ timeout=timedelta(seconds=self.timeout or 60),
996
+ trigger=CloudStorageTransferServiceCheckJobStatusTrigger(
997
+ job_name=job[NAME],
998
+ project_id=job[PROJECT_ID],
999
+ gcp_conn_id=self.gcp_conn_id,
1000
+ impersonation_chain=self.google_impersonation_chain,
1001
+ ),
1002
+ method_name="execute_complete",
1003
+ )
1004
+
1005
+ def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
1006
+ """
1007
+ Act as a callback for when the trigger fires.
1008
+
1009
+ This returns immediately. It relies on trigger to throw an exception,
1010
+ otherwise it assumes execution was successful.
1011
+ """
1012
+ if event["status"] == "error":
1013
+ raise AirflowException(event["message"])
985
1014
 
986
1015
  def _create_body(self) -> dict:
987
1016
  body = {
@@ -1079,6 +1108,7 @@ class CloudDataTransferServiceGCSToGCSOperator(GoogleCloudBaseOperator):
1079
1108
  account from the list granting this role to the originating account (templated).
1080
1109
  :param delete_job_after_completion: If True, delete the job after complete.
1081
1110
  If set to True, 'wait' must be set to True.
1111
+ :param deferrable: Run operator in the deferrable mode.
1082
1112
  """
1083
1113
 
1084
1114
  # [START gcp_transfer_gcs_to_gcs_template_fields]
@@ -1113,6 +1143,7 @@ class CloudDataTransferServiceGCSToGCSOperator(GoogleCloudBaseOperator):
1113
1143
  timeout: float | None = None,
1114
1144
  google_impersonation_chain: str | Sequence[str] | None = None,
1115
1145
  delete_job_after_completion: bool = False,
1146
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
1116
1147
  **kwargs,
1117
1148
  ) -> None:
1118
1149
  super().__init__(**kwargs)
@@ -1130,6 +1161,7 @@ class CloudDataTransferServiceGCSToGCSOperator(GoogleCloudBaseOperator):
1130
1161
  self.timeout = timeout
1131
1162
  self.google_impersonation_chain = google_impersonation_chain
1132
1163
  self.delete_job_after_completion = delete_job_after_completion
1164
+ self.deferrable = deferrable
1133
1165
  self._validate_inputs()
1134
1166
 
1135
1167
  def _validate_inputs(self) -> None:
@@ -1149,9 +1181,31 @@ class CloudDataTransferServiceGCSToGCSOperator(GoogleCloudBaseOperator):
1149
1181
  job = hook.create_transfer_job(body=body)
1150
1182
 
1151
1183
  if self.wait:
1152
- hook.wait_for_transfer_job(job, timeout=self.timeout)
1153
- if self.delete_job_after_completion:
1154
- hook.delete_transfer_job(job_name=job[NAME], project_id=self.project_id)
1184
+ if not self.deferrable:
1185
+ hook.wait_for_transfer_job(job, timeout=self.timeout)
1186
+ if self.delete_job_after_completion:
1187
+ hook.delete_transfer_job(job_name=job[NAME], project_id=self.project_id)
1188
+ else:
1189
+ self.defer(
1190
+ timeout=timedelta(seconds=self.timeout or 60),
1191
+ trigger=CloudStorageTransferServiceCheckJobStatusTrigger(
1192
+ job_name=job[NAME],
1193
+ project_id=job[PROJECT_ID],
1194
+ gcp_conn_id=self.gcp_conn_id,
1195
+ impersonation_chain=self.google_impersonation_chain,
1196
+ ),
1197
+ method_name="execute_complete",
1198
+ )
1199
+
1200
+ def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
1201
+ """
1202
+ Act as a callback for when the trigger fires.
1203
+
1204
+ This returns immediately. It relies on trigger to throw an exception,
1205
+ otherwise it assumes execution was successful.
1206
+ """
1207
+ if event["status"] == "error":
1208
+ raise AirflowException(event["message"])
1155
1209
 
1156
1210
  def _create_body(self) -> dict:
1157
1211
  body = {
@@ -23,8 +23,6 @@ from collections.abc import Sequence
23
23
  from copy import deepcopy
24
24
  from typing import TYPE_CHECKING, Any
25
25
 
26
- from google.api_core import exceptions
27
- from google.cloud.compute_v1.types import Instance, InstanceGroupManager, InstanceTemplate
28
26
  from json_merge_patch import merge
29
27
 
30
28
  from airflow.exceptions import AirflowException
@@ -38,11 +36,12 @@ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseO
38
36
  from airflow.providers.google.cloud.utils.field_sanitizer import GcpBodyFieldSanitizer
39
37
  from airflow.providers.google.cloud.utils.field_validator import GcpBodyFieldValidator
40
38
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
39
+ from google.api_core import exceptions
40
+ from google.cloud.compute_v1.types import Instance, InstanceGroupManager, InstanceTemplate
41
41
 
42
42
  if TYPE_CHECKING:
43
- from google.api_core.retry import Retry
44
-
45
43
  from airflow.utils.context import Context
44
+ from google.api_core.retry import Retry
46
45
 
47
46
 
48
47
  class ComputeEngineBaseOperator(GoogleCloudBaseOperator):
@@ -19,6 +19,14 @@ from __future__ import annotations
19
19
  from collections.abc import Sequence
20
20
  from typing import TYPE_CHECKING
21
21
 
22
+ from airflow.providers.google.cloud.hooks.datacatalog import CloudDataCatalogHook
23
+ from airflow.providers.google.cloud.links.datacatalog import (
24
+ DataCatalogEntryGroupLink,
25
+ DataCatalogEntryLink,
26
+ DataCatalogTagTemplateLink,
27
+ )
28
+ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
29
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
22
30
  from google.api_core.exceptions import AlreadyExists, NotFound
23
31
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
24
32
  from google.cloud.datacatalog import (
@@ -32,21 +40,11 @@ from google.cloud.datacatalog import (
32
40
  TagTemplateField,
33
41
  )
34
42
 
35
- from airflow.providers.google.cloud.hooks.datacatalog import CloudDataCatalogHook
36
- from airflow.providers.google.cloud.links.datacatalog import (
37
- DataCatalogEntryGroupLink,
38
- DataCatalogEntryLink,
39
- DataCatalogTagTemplateLink,
40
- )
41
- from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
42
- from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
43
-
44
43
  if TYPE_CHECKING:
44
+ from airflow.utils.context import Context
45
45
  from google.api_core.retry import Retry
46
46
  from google.protobuf.field_mask_pb2 import FieldMask
47
47
 
48
- from airflow.utils.context import Context
49
-
50
48
 
51
49
  class CloudDataCatalogCreateEntryOperator(GoogleCloudBaseOperator):
52
50
  """