apache-airflow-providers-google 10.19.0rc1__py3-none-any.whl → 10.20.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. airflow/providers/google/LICENSE +4 -4
  2. airflow/providers/google/__init__.py +1 -1
  3. airflow/providers/google/ads/hooks/ads.py +4 -4
  4. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +26 -0
  5. airflow/providers/google/cloud/hooks/dataflow.py +132 -1
  6. airflow/providers/google/cloud/hooks/datapipeline.py +22 -73
  7. airflow/providers/google/cloud/hooks/gcs.py +21 -0
  8. airflow/providers/google/cloud/hooks/pubsub.py +10 -1
  9. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -0
  10. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +15 -3
  11. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -1
  12. airflow/providers/google/cloud/links/dataflow.py +25 -0
  13. airflow/providers/google/cloud/openlineage/mixins.py +271 -0
  14. airflow/providers/google/cloud/openlineage/utils.py +5 -218
  15. airflow/providers/google/cloud/operators/bigquery.py +74 -20
  16. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +76 -0
  17. airflow/providers/google/cloud/operators/dataflow.py +235 -1
  18. airflow/providers/google/cloud/operators/datapipeline.py +29 -121
  19. airflow/providers/google/cloud/operators/dataplex.py +1 -1
  20. airflow/providers/google/cloud/operators/dataproc_metastore.py +17 -6
  21. airflow/providers/google/cloud/operators/kubernetes_engine.py +9 -6
  22. airflow/providers/google/cloud/operators/pubsub.py +18 -0
  23. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +6 -0
  24. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +16 -0
  25. airflow/providers/google/cloud/sensors/cloud_composer.py +171 -2
  26. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +13 -0
  27. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +56 -1
  28. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +6 -12
  29. airflow/providers/google/cloud/triggers/cloud_composer.py +115 -0
  30. airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -0
  31. airflow/providers/google/cloud/utils/credentials_provider.py +81 -6
  32. airflow/providers/google/cloud/utils/external_token_supplier.py +175 -0
  33. airflow/providers/google/common/hooks/base_google.py +35 -1
  34. airflow/providers/google/common/utils/id_token_credentials.py +1 -1
  35. airflow/providers/google/get_provider_info.py +19 -14
  36. {apache_airflow_providers_google-10.19.0rc1.dist-info → apache_airflow_providers_google-10.20.0rc1.dist-info}/METADATA +41 -35
  37. {apache_airflow_providers_google-10.19.0rc1.dist-info → apache_airflow_providers_google-10.20.0rc1.dist-info}/RECORD +39 -37
  38. {apache_airflow_providers_google-10.19.0rc1.dist-info → apache_airflow_providers_google-10.20.0rc1.dist-info}/WHEEL +0 -0
  39. {apache_airflow_providers_google-10.19.0rc1.dist-info → apache_airflow_providers_google-10.20.0rc1.dist-info}/entry_points.txt +0 -0
@@ -47,7 +47,7 @@ from airflow.providers.common.sql.operators.sql import (
47
47
  from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
48
48
  from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
49
49
  from airflow.providers.google.cloud.links.bigquery import BigQueryDatasetLink, BigQueryTableLink
50
- from airflow.providers.google.cloud.openlineage.utils import _BigQueryOpenLineageMixin
50
+ from airflow.providers.google.cloud.openlineage.mixins import _BigQueryOpenLineageMixin
51
51
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
52
52
  from airflow.providers.google.cloud.triggers.bigquery import (
53
53
  BigQueryCheckTrigger,
@@ -67,6 +67,7 @@ if TYPE_CHECKING:
67
67
  from airflow.models.taskinstancekey import TaskInstanceKey
68
68
  from airflow.utils.context import Context
69
69
 
70
+
70
71
  BIGQUERY_JOB_DETAILS_LINK_FMT = "https://console.cloud.google.com/bigquery?j={job_id}"
71
72
 
72
73
  LABEL_REGEX = re.compile(r"^[\w-]{0,63}$")
@@ -149,7 +150,12 @@ class _BigQueryOperatorsEncryptionConfigurationMixin:
149
150
  # annotation of the `self`. Then you can inherit this class in the target operator.
150
151
  # e.g: BigQueryCheckOperator, BigQueryTableCheckOperator
151
152
  def include_encryption_configuration( # type:ignore[misc]
152
- self: BigQueryCheckOperator | BigQueryTableCheckOperator,
153
+ self: BigQueryCheckOperator
154
+ | BigQueryTableCheckOperator
155
+ | BigQueryValueCheckOperator
156
+ | BigQueryColumnCheckOperator
157
+ | BigQueryGetDataOperator
158
+ | BigQueryIntervalCheckOperator,
153
159
  configuration: dict,
154
160
  config_key: str,
155
161
  ) -> None:
@@ -205,7 +211,7 @@ class BigQueryCheckOperator(
205
211
  Token Creator IAM role to the directly preceding identity, with first
206
212
  account from the list granting this role to the originating account. (templated)
207
213
  :param labels: a dictionary containing labels for the table, passed to BigQuery.
208
- :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
214
+ :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
209
215
 
210
216
  .. code-block:: python
211
217
 
@@ -326,7 +332,9 @@ class BigQueryCheckOperator(
326
332
  self.log.info("Success.")
327
333
 
328
334
 
329
- class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
335
+ class BigQueryValueCheckOperator(
336
+ _BigQueryDbHookMixin, SQLValueCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
337
+ ):
330
338
  """Perform a simple value check using sql code.
331
339
 
332
340
  .. seealso::
@@ -336,6 +344,13 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
336
344
  :param sql: SQL to execute.
337
345
  :param use_legacy_sql: Whether to use legacy SQL (true)
338
346
  or standard SQL (false).
347
+ :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
348
+
349
+ .. code-block:: python
350
+
351
+ encryption_configuration = {
352
+ "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
353
+ }
339
354
  :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
340
355
  :param location: The geographic location of the job. See details at:
341
356
  https://cloud.google.com/bigquery/docs/locations#specifying_your_location
@@ -370,6 +385,7 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
370
385
  sql: str,
371
386
  pass_value: Any,
372
387
  tolerance: Any = None,
388
+ encryption_configuration: dict | None = None,
373
389
  gcp_conn_id: str = "google_cloud_default",
374
390
  use_legacy_sql: bool = True,
375
391
  location: str | None = None,
@@ -383,6 +399,7 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
383
399
  self.location = location
384
400
  self.gcp_conn_id = gcp_conn_id
385
401
  self.use_legacy_sql = use_legacy_sql
402
+ self.encryption_configuration = encryption_configuration
386
403
  self.impersonation_chain = impersonation_chain
387
404
  self.labels = labels
388
405
  self.deferrable = deferrable
@@ -401,6 +418,8 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
401
418
  },
402
419
  }
403
420
 
421
+ self.include_encryption_configuration(configuration, "query")
422
+
404
423
  return hook.insert_job(
405
424
  configuration=configuration,
406
425
  project_id=hook.project_id,
@@ -460,7 +479,9 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
460
479
  )
461
480
 
462
481
 
463
- class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperator):
482
+ class BigQueryIntervalCheckOperator(
483
+ _BigQueryDbHookMixin, SQLIntervalCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
484
+ ):
464
485
  """
465
486
  Check that the values of metrics given as SQL expressions are within a tolerance of the older ones.
466
487
 
@@ -481,6 +502,13 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
481
502
  between the current day, and the prior days_back.
482
503
  :param use_legacy_sql: Whether to use legacy SQL (true)
483
504
  or standard SQL (false).
505
+ :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
506
+
507
+ .. code-block:: python
508
+
509
+ encryption_configuration = {
510
+ "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
511
+ }
484
512
  :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
485
513
  :param location: The geographic location of the job. See details at:
486
514
  https://cloud.google.com/bigquery/docs/locations#specifying_your_location
@@ -520,6 +548,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
520
548
  gcp_conn_id: str = "google_cloud_default",
521
549
  use_legacy_sql: bool = True,
522
550
  location: str | None = None,
551
+ encryption_configuration: dict | None = None,
523
552
  impersonation_chain: str | Sequence[str] | None = None,
524
553
  labels: dict | None = None,
525
554
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
@@ -538,6 +567,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
538
567
  self.gcp_conn_id = gcp_conn_id
539
568
  self.use_legacy_sql = use_legacy_sql
540
569
  self.location = location
570
+ self.encryption_configuration = encryption_configuration
541
571
  self.impersonation_chain = impersonation_chain
542
572
  self.labels = labels
543
573
  self.project_id = project_id
@@ -552,6 +582,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
552
582
  ) -> BigQueryJob:
553
583
  """Submit a new job and get the job id for polling the status using Triggerer."""
554
584
  configuration = {"query": {"query": sql, "useLegacySql": self.use_legacy_sql}}
585
+ self.include_encryption_configuration(configuration, "query")
555
586
  return hook.insert_job(
556
587
  configuration=configuration,
557
588
  project_id=self.project_id or hook.project_id,
@@ -608,7 +639,9 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
608
639
  )
609
640
 
610
641
 
611
- class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
642
+ class BigQueryColumnCheckOperator(
643
+ _BigQueryDbHookMixin, SQLColumnCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
644
+ ):
612
645
  """
613
646
  Subclasses the SQLColumnCheckOperator in order to provide a job id for OpenLineage to parse.
614
647
 
@@ -623,6 +656,13 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
623
656
  :param partition_clause: a string SQL statement added to a WHERE clause
624
657
  to partition data
625
658
  :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
659
+ :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
660
+
661
+ .. code-block:: python
662
+
663
+ encryption_configuration = {
664
+ "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
665
+ }
626
666
  :param use_legacy_sql: Whether to use legacy SQL (true)
627
667
  or standard SQL (false).
628
668
  :param location: The geographic location of the job. See details at:
@@ -650,6 +690,7 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
650
690
  partition_clause: str | None = None,
651
691
  database: str | None = None,
652
692
  accept_none: bool = True,
693
+ encryption_configuration: dict | None = None,
653
694
  gcp_conn_id: str = "google_cloud_default",
654
695
  use_legacy_sql: bool = True,
655
696
  location: str | None = None,
@@ -671,6 +712,7 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
671
712
  self.database = database
672
713
  self.accept_none = accept_none
673
714
  self.gcp_conn_id = gcp_conn_id
715
+ self.encryption_configuration = encryption_configuration
674
716
  self.use_legacy_sql = use_legacy_sql
675
717
  self.location = location
676
718
  self.impersonation_chain = impersonation_chain
@@ -683,7 +725,7 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
683
725
  ) -> BigQueryJob:
684
726
  """Submit a new job and get the job id for polling the status using Trigger."""
685
727
  configuration = {"query": {"query": self.sql, "useLegacySql": self.use_legacy_sql}}
686
-
728
+ self.include_encryption_configuration(configuration, "query")
687
729
  return hook.insert_job(
688
730
  configuration=configuration,
689
731
  project_id=hook.project_id,
@@ -765,7 +807,7 @@ class BigQueryTableCheckOperator(
765
807
  Service Account Token Creator IAM role to the directly preceding identity, with first
766
808
  account from the list granting this role to the originating account (templated).
767
809
  :param labels: a dictionary containing labels for the table, passed to BigQuery
768
- :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
810
+ :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
769
811
 
770
812
  .. code-block:: python
771
813
 
@@ -851,7 +893,7 @@ class BigQueryTableCheckOperator(
851
893
  self.log.info("All tests have passed")
852
894
 
853
895
 
854
- class BigQueryGetDataOperator(GoogleCloudBaseOperator):
896
+ class BigQueryGetDataOperator(GoogleCloudBaseOperator, _BigQueryOperatorsEncryptionConfigurationMixin):
855
897
  """
856
898
  Fetch data and return it, either from a BigQuery table, or results of a query job.
857
899
 
@@ -920,6 +962,13 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
920
962
  from the table. (templated)
921
963
  :param selected_fields: List of fields to return (comma-separated). If
922
964
  unspecified, all fields are returned.
965
+ :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
966
+
967
+ .. code-block:: python
968
+
969
+ encryption_configuration = {
970
+ "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
971
+ }
923
972
  :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
924
973
  :param location: The location used for the operation.
925
974
  :param impersonation_chain: Optional service account to impersonate using short-term
@@ -964,6 +1013,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
964
1013
  selected_fields: str | None = None,
965
1014
  gcp_conn_id: str = "google_cloud_default",
966
1015
  location: str | None = None,
1016
+ encryption_configuration: dict | None = None,
967
1017
  impersonation_chain: str | Sequence[str] | None = None,
968
1018
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
969
1019
  poll_interval: float = 4.0,
@@ -983,6 +1033,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
983
1033
  self.gcp_conn_id = gcp_conn_id
984
1034
  self.location = location
985
1035
  self.impersonation_chain = impersonation_chain
1036
+ self.encryption_configuration = encryption_configuration
986
1037
  self.project_id = project_id
987
1038
  self.deferrable = deferrable
988
1039
  self.poll_interval = poll_interval
@@ -996,6 +1047,8 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
996
1047
  ) -> BigQueryJob:
997
1048
  get_query = self.generate_query(hook=hook)
998
1049
  configuration = {"query": {"query": get_query, "useLegacySql": self.use_legacy_sql}}
1050
+ self.include_encryption_configuration(configuration, "query")
1051
+
999
1052
  """Submit a new job and get the job id for polling the status using Triggerer."""
1000
1053
  return hook.insert_job(
1001
1054
  configuration=configuration,
@@ -1198,7 +1251,7 @@ class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
1198
1251
  :param location: The geographic location of the job. Required except for
1199
1252
  US and EU. See details at
1200
1253
  https://cloud.google.com/bigquery/docs/locations#specifying_your_location
1201
- :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
1254
+ :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
1202
1255
 
1203
1256
  .. code-block:: python
1204
1257
 
@@ -1392,9 +1445,9 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1392
1445
 
1393
1446
  .. seealso::
1394
1447
  https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning
1395
- :param gcp_conn_id: [Optional] The connection ID used to connect to Google Cloud and
1448
+ :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud and
1396
1449
  interact with the Bigquery service.
1397
- :param google_cloud_storage_conn_id: [Optional] The connection ID used to connect to Google Cloud.
1450
+ :param google_cloud_storage_conn_id: (Optional) The connection ID used to connect to Google Cloud.
1398
1451
  and interact with the Google Cloud Storage service.
1399
1452
  :param labels: a dictionary containing labels for the table, passed to BigQuery
1400
1453
 
@@ -1432,13 +1485,13 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1432
1485
  google_cloud_storage_conn_id="airflow-conn-id",
1433
1486
  )
1434
1487
 
1435
- :param view: [Optional] A dictionary containing definition for the view.
1488
+ :param view: (Optional) A dictionary containing definition for the view.
1436
1489
  If set, it will create a view instead of a table:
1437
1490
 
1438
1491
  .. seealso::
1439
1492
  https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
1440
- :param materialized_view: [Optional] The materialized view definition.
1441
- :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
1493
+ :param materialized_view: (Optional) The materialized view definition.
1494
+ :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
1442
1495
 
1443
1496
  .. code-block:: python
1444
1497
 
@@ -1446,7 +1499,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1446
1499
  "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
1447
1500
  }
1448
1501
  :param location: The location used for the operation.
1449
- :param cluster_fields: [Optional] The fields used for clustering.
1502
+ :param cluster_fields: (Optional) The fields used for clustering.
1450
1503
  BigQuery supports clustering for both partitioned and
1451
1504
  non-partitioned tables.
1452
1505
 
@@ -1644,7 +1697,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1644
1697
  :param autodetect: Try to detect schema and format options automatically.
1645
1698
  The schema_fields and schema_object options will be honored when specified explicitly.
1646
1699
  https://cloud.google.com/bigquery/docs/schema-detect#schema_auto-detection_for_external_data_sources
1647
- :param compression: [Optional] The compression type of the data source.
1700
+ :param compression: (Optional) The compression type of the data source.
1648
1701
  Possible values include GZIP and NONE.
1649
1702
  The default value is NONE.
1650
1703
  This setting is ignored for Google Cloud Bigtable,
@@ -1666,7 +1719,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1666
1719
  and interact with the Google Cloud Storage service.
1667
1720
  :param src_fmt_configs: configure optional fields specific to the source format
1668
1721
  :param labels: a dictionary containing labels for the table, passed to BigQuery
1669
- :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
1722
+ :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
1670
1723
 
1671
1724
  .. code-block:: python
1672
1725
 
@@ -2666,6 +2719,7 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
2666
2719
  project_id: str = PROVIDE_PROJECT_ID,
2667
2720
  gcp_conn_id: str = "google_cloud_default",
2668
2721
  impersonation_chain: str | Sequence[str] | None = None,
2722
+ location: str | None = None,
2669
2723
  **kwargs,
2670
2724
  ) -> None:
2671
2725
  self.schema_fields_updates = schema_fields_updates
@@ -2675,12 +2729,12 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
2675
2729
  self.project_id = project_id
2676
2730
  self.gcp_conn_id = gcp_conn_id
2677
2731
  self.impersonation_chain = impersonation_chain
2732
+ self.location = location
2678
2733
  super().__init__(**kwargs)
2679
2734
 
2680
2735
  def execute(self, context: Context):
2681
2736
  bq_hook = BigQueryHook(
2682
- gcp_conn_id=self.gcp_conn_id,
2683
- impersonation_chain=self.impersonation_chain,
2737
+ gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain, location=self.location
2684
2738
  )
2685
2739
 
2686
2740
  table = bq_hook.update_table_schema(
@@ -443,6 +443,82 @@ class CloudDataTransferServiceDeleteJobOperator(GoogleCloudBaseOperator):
443
443
  hook.delete_transfer_job(job_name=self.job_name, project_id=self.project_id)
444
444
 
445
445
 
446
+ class CloudDataTransferServiceRunJobOperator(GoogleCloudBaseOperator):
447
+ """
448
+ Runs a transfer job.
449
+
450
+ .. seealso::
451
+ For more information on how to use this operator, take a look at the guide:
452
+ :ref:`howto/operator:CloudDataTransferServiceRunJobOperator`
453
+
454
+ :param job_name: (Required) Name of the job to be run
455
+ :param project_id: (Optional) the ID of the project that owns the Transfer
456
+ Job. If set to None or missing, the default project_id from the Google Cloud
457
+ connection is used.
458
+ :param gcp_conn_id: The connection ID used to connect to Google Cloud.
459
+ :param api_version: API version used (e.g. v1).
460
+ :param google_impersonation_chain: Optional Google service account to impersonate using
461
+ short-term credentials, or chained list of accounts required to get the access_token
462
+ of the last account in the list, which will be impersonated in the request.
463
+ If set as a string, the account must grant the originating account
464
+ the Service Account Token Creator IAM role.
465
+ If set as a sequence, the identities from the list must grant
466
+ Service Account Token Creator IAM role to the directly preceding identity, with first
467
+ account from the list granting this role to the originating account (templated).
468
+ """
469
+
470
+ # [START gcp_transfer_job_run_template_fields]
471
+ template_fields: Sequence[str] = (
472
+ "job_name",
473
+ "project_id",
474
+ "gcp_conn_id",
475
+ "api_version",
476
+ "google_impersonation_chain",
477
+ )
478
+ # [END gcp_transfer_job_run_template_fields]
479
+ operator_extra_links = (CloudStorageTransferJobLink(),)
480
+
481
+ def __init__(
482
+ self,
483
+ *,
484
+ job_name: str,
485
+ gcp_conn_id: str = "google_cloud_default",
486
+ api_version: str = "v1",
487
+ project_id: str = PROVIDE_PROJECT_ID,
488
+ google_impersonation_chain: str | Sequence[str] | None = None,
489
+ **kwargs,
490
+ ) -> None:
491
+ super().__init__(**kwargs)
492
+ self.job_name = job_name
493
+ self.project_id = project_id
494
+ self.gcp_conn_id = gcp_conn_id
495
+ self.api_version = api_version
496
+ self.google_impersonation_chain = google_impersonation_chain
497
+
498
+ def _validate_inputs(self) -> None:
499
+ if not self.job_name:
500
+ raise AirflowException("The required parameter 'job_name' is empty or None")
501
+
502
+ def execute(self, context: Context) -> dict:
503
+ self._validate_inputs()
504
+ hook = CloudDataTransferServiceHook(
505
+ api_version=self.api_version,
506
+ gcp_conn_id=self.gcp_conn_id,
507
+ impersonation_chain=self.google_impersonation_chain,
508
+ )
509
+
510
+ project_id = self.project_id or hook.project_id
511
+ if project_id:
512
+ CloudStorageTransferJobLink.persist(
513
+ context=context,
514
+ task_instance=self,
515
+ project_id=project_id,
516
+ job_name=self.job_name,
517
+ )
518
+
519
+ return hook.run_transfer_job(job_name=self.job_name, project_id=project_id)
520
+
521
+
446
522
  class CloudDataTransferServiceGetOperationOperator(GoogleCloudBaseOperator):
447
523
  """
448
524
  Gets the latest state of a long-running operation in Google Storage Transfer Service.
@@ -28,6 +28,7 @@ from functools import cached_property
28
28
  from typing import TYPE_CHECKING, Any, Sequence
29
29
 
30
30
  from deprecated import deprecated
31
+ from googleapiclient.errors import HttpError
31
32
 
32
33
  from airflow.configuration import conf
33
34
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
@@ -38,7 +39,7 @@ from airflow.providers.google.cloud.hooks.dataflow import (
38
39
  process_line_and_extract_dataflow_job_id_callback,
39
40
  )
40
41
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
41
- from airflow.providers.google.cloud.links.dataflow import DataflowJobLink
42
+ from airflow.providers.google.cloud.links.dataflow import DataflowJobLink, DataflowPipelineLink
42
43
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
43
44
  from airflow.providers.google.cloud.triggers.dataflow import TemplateJobStartTrigger
44
45
  from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME
@@ -1358,3 +1359,236 @@ class DataflowStopJobOperator(GoogleCloudBaseOperator):
1358
1359
  self.log.info("No jobs to stop")
1359
1360
 
1360
1361
  return None
1362
+
1363
+
1364
+ class DataflowCreatePipelineOperator(GoogleCloudBaseOperator):
1365
+ """
1366
+ Creates a new Dataflow Data Pipeline instance.
1367
+
1368
+ .. seealso::
1369
+ For more information on how to use this operator, take a look at the guide:
1370
+ :ref:`howto/operator:DataflowCreatePipelineOperator`
1371
+
1372
+ :param body: The request body (contains instance of Pipeline). See:
1373
+ https://cloud.google.com/dataflow/docs/reference/data-pipelines/rest/v1/projects.locations.pipelines/create#request-body
1374
+ :param project_id: The ID of the GCP project that owns the job.
1375
+ :param location: The location to direct the Data Pipelines instance to (for example us-central1).
1376
+ :param gcp_conn_id: The connection ID to connect to the Google Cloud
1377
+ Platform.
1378
+ :param impersonation_chain: Optional service account to impersonate using short-term
1379
+ credentials, or chained list of accounts required to get the access_token
1380
+ of the last account in the list, which will be impersonated in the request.
1381
+ If set as a string, the account must grant the originating account
1382
+ the Service Account Token Creator IAM role.
1383
+ If set as a sequence, the identities from the list must grant
1384
+ Service Account Token Creator IAM role to the directly preceding identity, with first
1385
+ account from the list granting this role to the originating account (templated).
1386
+
1387
+ .. warning::
1388
+ This option requires Apache Beam 2.39.0 or newer.
1389
+
1390
+ Returns the created Dataflow Data Pipeline instance in JSON representation.
1391
+ """
1392
+
1393
+ operator_extra_links = (DataflowPipelineLink(),)
1394
+
1395
+ def __init__(
1396
+ self,
1397
+ *,
1398
+ body: dict,
1399
+ project_id: str = PROVIDE_PROJECT_ID,
1400
+ location: str = DEFAULT_DATAFLOW_LOCATION,
1401
+ gcp_conn_id: str = "google_cloud_default",
1402
+ impersonation_chain: str | Sequence[str] | None = None,
1403
+ **kwargs,
1404
+ ) -> None:
1405
+ super().__init__(**kwargs)
1406
+
1407
+ self.body = body
1408
+ self.project_id = project_id
1409
+ self.location = location
1410
+ self.gcp_conn_id = gcp_conn_id
1411
+ self.impersonation_chain = impersonation_chain
1412
+ self.dataflow_hook: DataflowHook | None = None
1413
+
1414
+ self.pipeline_name = self.body["name"].split("/")[-1] if self.body else None
1415
+
1416
+ def execute(self, context: Context):
1417
+ if self.body is None:
1418
+ raise AirflowException(
1419
+ "Request Body not given; cannot create a Data Pipeline without the Request Body."
1420
+ )
1421
+ if self.project_id is None:
1422
+ raise AirflowException(
1423
+ "Project ID not given; cannot create a Data Pipeline without the Project ID."
1424
+ )
1425
+ if self.location is None:
1426
+ raise AirflowException("location not given; cannot create a Data Pipeline without the location.")
1427
+
1428
+ self.dataflow_hook = DataflowHook(
1429
+ gcp_conn_id=self.gcp_conn_id,
1430
+ impersonation_chain=self.impersonation_chain,
1431
+ )
1432
+ self.body["pipelineSources"] = {"airflow": "airflow"}
1433
+ try:
1434
+ self.pipeline = self.dataflow_hook.create_data_pipeline(
1435
+ project_id=self.project_id,
1436
+ body=self.body,
1437
+ location=self.location,
1438
+ )
1439
+ except HttpError as e:
1440
+ if e.resp.status == 409:
1441
+ # If the pipeline already exists, retrieve it
1442
+ self.log.info("Pipeline with given name already exists.")
1443
+ self.pipeline = self.dataflow_hook.get_data_pipeline(
1444
+ project_id=self.project_id,
1445
+ pipeline_name=self.pipeline_name,
1446
+ location=self.location,
1447
+ )
1448
+ DataflowPipelineLink.persist(self, context, self.project_id, self.location, self.pipeline_name)
1449
+ self.xcom_push(context, key="pipeline_name", value=self.pipeline_name)
1450
+ if self.pipeline:
1451
+ if "error" in self.pipeline:
1452
+ raise AirflowException(self.pipeline.get("error").get("message"))
1453
+
1454
+ return self.pipeline
1455
+
1456
+
1457
+ class DataflowRunPipelineOperator(GoogleCloudBaseOperator):
1458
+ """
1459
+ Runs a Dataflow Data Pipeline.
1460
+
1461
+ .. seealso::
1462
+ For more information on how to use this operator, take a look at the guide:
1463
+ :ref:`howto/operator:DataflowRunPipelineOperator`
1464
+
1465
+ :param pipeline_name: The display name of the pipeline. In example
1466
+ projects/PROJECT_ID/locations/LOCATION_ID/pipelines/PIPELINE_ID it would be the PIPELINE_ID.
1467
+ :param project_id: The ID of the GCP project that owns the job.
1468
+ :param location: The location to direct the Data Pipelines instance to (for example us-central1).
1469
+ :param gcp_conn_id: The connection ID to connect to the Google Cloud Platform.
1470
+ :param impersonation_chain: Optional service account to impersonate using short-term
1471
+ credentials, or chained list of accounts required to get the access_token
1472
+ of the last account in the list, which will be impersonated in the request.
1473
+ If set as a string, the account must grant the originating account
1474
+ the Service Account Token Creator IAM role.
1475
+ If set as a sequence, the identities from the list must grant
1476
+ Service Account Token Creator IAM role to the directly preceding identity, with first
1477
+ account from the list granting this role to the originating account (templated).
1478
+
1479
+ Returns the created Job in JSON representation.
1480
+ """
1481
+
1482
+ operator_extra_links = (DataflowJobLink(),)
1483
+
1484
+ def __init__(
1485
+ self,
1486
+ pipeline_name: str,
1487
+ project_id: str = PROVIDE_PROJECT_ID,
1488
+ location: str = DEFAULT_DATAFLOW_LOCATION,
1489
+ gcp_conn_id: str = "google_cloud_default",
1490
+ impersonation_chain: str | Sequence[str] | None = None,
1491
+ **kwargs,
1492
+ ) -> None:
1493
+ super().__init__(**kwargs)
1494
+
1495
+ self.pipeline_name = pipeline_name
1496
+ self.project_id = project_id
1497
+ self.location = location
1498
+ self.gcp_conn_id = gcp_conn_id
1499
+ self.impersonation_chain = impersonation_chain
1500
+ self.dataflow_hook: DataflowHook | None = None
1501
+
1502
+ def execute(self, context: Context):
1503
+ self.dataflow_hook = DataflowHook(
1504
+ gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
1505
+ )
1506
+
1507
+ if self.pipeline_name is None:
1508
+ raise AirflowException("Data Pipeline name not given; cannot run unspecified pipeline.")
1509
+ if self.project_id is None:
1510
+ raise AirflowException("Data Pipeline Project ID not given; cannot run pipeline.")
1511
+ if self.location is None:
1512
+ raise AirflowException("Data Pipeline location not given; cannot run pipeline.")
1513
+ try:
1514
+ self.job = self.dataflow_hook.run_data_pipeline(
1515
+ pipeline_name=self.pipeline_name,
1516
+ project_id=self.project_id,
1517
+ location=self.location,
1518
+ )["job"]
1519
+ job_id = self.dataflow_hook.extract_job_id(self.job)
1520
+ self.xcom_push(context, key="job_id", value=job_id)
1521
+ DataflowJobLink.persist(self, context, self.project_id, self.location, job_id)
1522
+ except HttpError as e:
1523
+ if e.resp.status == 404:
1524
+ raise AirflowException("Pipeline with given name was not found.")
1525
+ except Exception as exc:
1526
+ raise AirflowException("Error occurred when running Pipeline: %s", exc)
1527
+
1528
+ return self.job
1529
+
1530
+
1531
+ class DataflowDeletePipelineOperator(GoogleCloudBaseOperator):
1532
+ """
1533
+ Deletes a Dataflow Data Pipeline.
1534
+
1535
+ .. seealso::
1536
+ For more information on how to use this operator, take a look at the guide:
1537
+ :ref:`howto/operator:DataflowDeletePipelineOperator`
1538
+
1539
+ :param pipeline_name: The display name of the pipeline. In example
1540
+ projects/PROJECT_ID/locations/LOCATION_ID/pipelines/PIPELINE_ID it would be the PIPELINE_ID.
1541
+ :param project_id: The ID of the GCP project that owns the job.
1542
+ :param location: The location to direct the Data Pipelines instance to (for example us-central1).
1543
+ :param gcp_conn_id: The connection ID to connect to the Google Cloud Platform.
1544
+ :param impersonation_chain: Optional service account to impersonate using short-term
1545
+ credentials, or chained list of accounts required to get the access_token
1546
+ of the last account in the list, which will be impersonated in the request.
1547
+ If set as a string, the account must grant the originating account
1548
+ the Service Account Token Creator IAM role.
1549
+ If set as a sequence, the identities from the list must grant
1550
+ Service Account Token Creator IAM role to the directly preceding identity, with first
1551
+ account from the list granting this role to the originating account (templated).
1552
+ """
1553
+
1554
+ def __init__(
1555
+ self,
1556
+ pipeline_name: str,
1557
+ project_id: str = PROVIDE_PROJECT_ID,
1558
+ location: str = DEFAULT_DATAFLOW_LOCATION,
1559
+ gcp_conn_id: str = "google_cloud_default",
1560
+ impersonation_chain: str | Sequence[str] | None = None,
1561
+ **kwargs,
1562
+ ) -> None:
1563
+ super().__init__(**kwargs)
1564
+
1565
+ self.pipeline_name = pipeline_name
1566
+ self.project_id = project_id
1567
+ self.location = location
1568
+ self.gcp_conn_id = gcp_conn_id
1569
+ self.impersonation_chain = impersonation_chain
1570
+ self.dataflow_hook: DataflowHook | None = None
1571
+ self.response: dict | None = None
1572
+
1573
+ def execute(self, context: Context):
1574
+ self.dataflow_hook = DataflowHook(
1575
+ gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
1576
+ )
1577
+
1578
+ if self.pipeline_name is None:
1579
+ raise AirflowException("Data Pipeline name not given; cannot run unspecified pipeline.")
1580
+ if self.project_id is None:
1581
+ raise AirflowException("Data Pipeline Project ID not given; cannot run pipeline.")
1582
+ if self.location is None:
1583
+ raise AirflowException("Data Pipeline location not given; cannot run pipeline.")
1584
+
1585
+ self.response = self.dataflow_hook.delete_data_pipeline(
1586
+ pipeline_name=self.pipeline_name,
1587
+ project_id=self.project_id,
1588
+ location=self.location,
1589
+ )
1590
+
1591
+ if self.response:
1592
+ raise AirflowException(self.response)
1593
+
1594
+ return None