PyPI - apache-airflow-providers-google - Versions diffs - 10.19.0rc1__py3-none-any.whl → 10.20.0rc1__py3-none-any.whl - Mend

apache-airflow-providers-google 10.19.0rc1py3-none-any.whl → 10.20.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

airflow/providers/google/cloud/operators/bigquery.py CHANGED Viewed

@@ -47,7 +47,7 @@ from airflow.providers.common.sql.operators.sql import (
 from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
 from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
 from airflow.providers.google.cloud.links.bigquery import BigQueryDatasetLink, BigQueryTableLink
-from airflow.providers.google.cloud.openlineage.utils import _BigQueryOpenLineageMixin
+from airflow.providers.google.cloud.openlineage.mixins import _BigQueryOpenLineageMixin
 from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
 from airflow.providers.google.cloud.triggers.bigquery import (
     BigQueryCheckTrigger,
@@ -67,6 +67,7 @@ if TYPE_CHECKING:
     from airflow.models.taskinstancekey import TaskInstanceKey
     from airflow.utils.context import Context
 BIGQUERY_JOB_DETAILS_LINK_FMT = "https://console.cloud.google.com/bigquery?j={job_id}"
 LABEL_REGEX = re.compile(r"^[\w-]{0,63}$")
@@ -149,7 +150,12 @@ class _BigQueryOperatorsEncryptionConfigurationMixin:
     # annotation of the `self`. Then you can inherit this class in the target operator.
     # e.g: BigQueryCheckOperator, BigQueryTableCheckOperator
     def include_encryption_configuration(  # type:ignore[misc]
-        self: BigQueryCheckOperator | BigQueryTableCheckOperator,
+        self: BigQueryCheckOperator
+        | BigQueryTableCheckOperator
+        | BigQueryValueCheckOperator
+        | BigQueryColumnCheckOperator
+        | BigQueryGetDataOperator
+        | BigQueryIntervalCheckOperator,
         configuration: dict,
         config_key: str,
     ) -> None:
@@ -205,7 +211,7 @@ class BigQueryCheckOperator(
         Token Creator IAM role to the directly preceding identity, with first
         account from the list granting this role to the originating account. (templated)
     :param labels: a dictionary containing labels for the table, passed to BigQuery.
-    :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
+    :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
         .. code-block:: python
@@ -326,7 +332,9 @@ class BigQueryCheckOperator(
         self.log.info("Success.")
-class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
+class BigQueryValueCheckOperator(
+    _BigQueryDbHookMixin, SQLValueCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
+):
     """Perform a simple value check using sql code.
     .. seealso::
@@ -336,6 +344,13 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
     :param sql: SQL to execute.
     :param use_legacy_sql: Whether to use legacy SQL (true)
         or standard SQL (false).
+    :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
+        .. code-block:: python
+            encryption_configuration = {
+                "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
+            }
     :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
     :param location: The geographic location of the job. See details at:
         https://cloud.google.com/bigquery/docs/locations#specifying_your_location
@@ -370,6 +385,7 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
         sql: str,
         pass_value: Any,
         tolerance: Any = None,
+        encryption_configuration: dict | None = None,
         gcp_conn_id: str = "google_cloud_default",
         use_legacy_sql: bool = True,
         location: str | None = None,
@@ -383,6 +399,7 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
         self.location = location
         self.gcp_conn_id = gcp_conn_id
         self.use_legacy_sql = use_legacy_sql
+        self.encryption_configuration = encryption_configuration
         self.impersonation_chain = impersonation_chain
         self.labels = labels
         self.deferrable = deferrable
@@ -401,6 +418,8 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
             },
         }
+        self.include_encryption_configuration(configuration, "query")
         return hook.insert_job(
             configuration=configuration,
             project_id=hook.project_id,
@@ -460,7 +479,9 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
         )
-class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperator):
+class BigQueryIntervalCheckOperator(
+    _BigQueryDbHookMixin, SQLIntervalCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
+):
     """
     Check that the values of metrics given as SQL expressions are within a tolerance of the older ones.
@@ -481,6 +502,13 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
         between the current day, and the prior days_back.
     :param use_legacy_sql: Whether to use legacy SQL (true)
         or standard SQL (false).
+    :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
+        .. code-block:: python
+            encryption_configuration = {
+                "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
+            }
     :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
     :param location: The geographic location of the job. See details at:
         https://cloud.google.com/bigquery/docs/locations#specifying_your_location
@@ -520,6 +548,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
         gcp_conn_id: str = "google_cloud_default",
         use_legacy_sql: bool = True,
         location: str | None = None,
+        encryption_configuration: dict | None = None,
         impersonation_chain: str | Sequence[str] | None = None,
         labels: dict | None = None,
         deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
@@ -538,6 +567,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
         self.gcp_conn_id = gcp_conn_id
         self.use_legacy_sql = use_legacy_sql
         self.location = location
+        self.encryption_configuration = encryption_configuration
         self.impersonation_chain = impersonation_chain
         self.labels = labels
         self.project_id = project_id
@@ -552,6 +582,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
     ) -> BigQueryJob:
         """Submit a new job and get the job id for polling the status using Triggerer."""
         configuration = {"query": {"query": sql, "useLegacySql": self.use_legacy_sql}}
+        self.include_encryption_configuration(configuration, "query")
         return hook.insert_job(
             configuration=configuration,
             project_id=self.project_id or hook.project_id,
@@ -608,7 +639,9 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
         )
-class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
+class BigQueryColumnCheckOperator(
+    _BigQueryDbHookMixin, SQLColumnCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
+):
     """
     Subclasses the SQLColumnCheckOperator in order to provide a job id for OpenLineage to parse.
@@ -623,6 +656,13 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
     :param partition_clause: a string SQL statement added to a WHERE clause
         to partition data
     :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
+    :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
+        .. code-block:: python
+            encryption_configuration = {
+                "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
+            }
     :param use_legacy_sql: Whether to use legacy SQL (true)
         or standard SQL (false).
     :param location: The geographic location of the job. See details at:
@@ -650,6 +690,7 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
         partition_clause: str | None = None,
         database: str | None = None,
         accept_none: bool = True,
+        encryption_configuration: dict | None = None,
         gcp_conn_id: str = "google_cloud_default",
         use_legacy_sql: bool = True,
         location: str | None = None,
@@ -671,6 +712,7 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
         self.database = database
         self.accept_none = accept_none
         self.gcp_conn_id = gcp_conn_id
+        self.encryption_configuration = encryption_configuration
         self.use_legacy_sql = use_legacy_sql
         self.location = location
         self.impersonation_chain = impersonation_chain
@@ -683,7 +725,7 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
     ) -> BigQueryJob:
         """Submit a new job and get the job id for polling the status using Trigger."""
         configuration = {"query": {"query": self.sql, "useLegacySql": self.use_legacy_sql}}
+        self.include_encryption_configuration(configuration, "query")
         return hook.insert_job(
             configuration=configuration,
             project_id=hook.project_id,
@@ -765,7 +807,7 @@ class BigQueryTableCheckOperator(
         Service Account Token Creator IAM role to the directly preceding identity, with first
         account from the list granting this role to the originating account (templated).
     :param labels: a dictionary containing labels for the table, passed to BigQuery
-    :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
+    :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
         .. code-block:: python
@@ -851,7 +893,7 @@ class BigQueryTableCheckOperator(
         self.log.info("All tests have passed")
-class BigQueryGetDataOperator(GoogleCloudBaseOperator):
+class BigQueryGetDataOperator(GoogleCloudBaseOperator, _BigQueryOperatorsEncryptionConfigurationMixin):
     """
     Fetch data and return it, either from a BigQuery table, or results of a query job.
@@ -920,6 +962,13 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
         from the table. (templated)
     :param selected_fields: List of fields to return (comma-separated). If
         unspecified, all fields are returned.
+    :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
+        .. code-block:: python
+            encryption_configuration = {
+                "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
+            }
     :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
     :param location: The location used for the operation.
     :param impersonation_chain: Optional service account to impersonate using short-term
@@ -964,6 +1013,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
         selected_fields: str | None = None,
         gcp_conn_id: str = "google_cloud_default",
         location: str | None = None,
+        encryption_configuration: dict | None = None,
         impersonation_chain: str | Sequence[str] | None = None,
         deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
         poll_interval: float = 4.0,
@@ -983,6 +1033,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
         self.gcp_conn_id = gcp_conn_id
         self.location = location
         self.impersonation_chain = impersonation_chain
+        self.encryption_configuration = encryption_configuration
         self.project_id = project_id
         self.deferrable = deferrable
         self.poll_interval = poll_interval
@@ -996,6 +1047,8 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
     ) -> BigQueryJob:
         get_query = self.generate_query(hook=hook)
         configuration = {"query": {"query": get_query, "useLegacySql": self.use_legacy_sql}}
+        self.include_encryption_configuration(configuration, "query")
         """Submit a new job and get the job id for polling the status using Triggerer."""
         return hook.insert_job(
             configuration=configuration,
@@ -1198,7 +1251,7 @@ class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
     :param location: The geographic location of the job. Required except for
         US and EU. See details at
         https://cloud.google.com/bigquery/docs/locations#specifying_your_location
-    :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
+    :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
         .. code-block:: python
@@ -1392,9 +1445,9 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
         .. seealso::
             https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning
-    :param gcp_conn_id: [Optional] The connection ID used to connect to Google Cloud and
+    :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud and
         interact with the Bigquery service.
-    :param google_cloud_storage_conn_id: [Optional] The connection ID used to connect to Google Cloud.
+    :param google_cloud_storage_conn_id: (Optional) The connection ID used to connect to Google Cloud.
         and interact with the Google Cloud Storage service.
     :param labels: a dictionary containing labels for the table, passed to BigQuery
@@ -1432,13 +1485,13 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
             google_cloud_storage_conn_id="airflow-conn-id",
         )
-    :param view: [Optional] A dictionary containing definition for the view.
+    :param view: (Optional) A dictionary containing definition for the view.
         If set, it will create a view instead of a table:
         .. seealso::
             https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
-    :param materialized_view: [Optional] The materialized view definition.
-    :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
+    :param materialized_view: (Optional) The materialized view definition.
+    :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
         .. code-block:: python
@@ -1446,7 +1499,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
                 "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
             }
     :param location: The location used for the operation.
-    :param cluster_fields: [Optional] The fields used for clustering.
+    :param cluster_fields: (Optional) The fields used for clustering.
             BigQuery supports clustering for both partitioned and
             non-partitioned tables.
@@ -1644,7 +1697,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
     :param autodetect: Try to detect schema and format options automatically.
         The schema_fields and schema_object options will be honored when specified explicitly.
         https://cloud.google.com/bigquery/docs/schema-detect#schema_auto-detection_for_external_data_sources
-    :param compression: [Optional] The compression type of the data source.
+    :param compression: (Optional) The compression type of the data source.
         Possible values include GZIP and NONE.
         The default value is NONE.
         This setting is ignored for Google Cloud Bigtable,
@@ -1666,7 +1719,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
         and interact with the Google Cloud Storage service.
     :param src_fmt_configs: configure optional fields specific to the source format
     :param labels: a dictionary containing labels for the table, passed to BigQuery
-    :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
+    :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
         .. code-block:: python
@@ -2666,6 +2719,7 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
         project_id: str = PROVIDE_PROJECT_ID,
         gcp_conn_id: str = "google_cloud_default",
         impersonation_chain: str | Sequence[str] | None = None,
+        location: str | None = None,
         **kwargs,
     ) -> None:
         self.schema_fields_updates = schema_fields_updates
@@ -2675,12 +2729,12 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
         self.project_id = project_id
         self.gcp_conn_id = gcp_conn_id
         self.impersonation_chain = impersonation_chain
+        self.location = location
         super().__init__(**kwargs)
     def execute(self, context: Context):
         bq_hook = BigQueryHook(
-            gcp_conn_id=self.gcp_conn_id,
-            impersonation_chain=self.impersonation_chain,
+            gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain, location=self.location
         )
         table = bq_hook.update_table_schema(

airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py CHANGED Viewed

@@ -443,6 +443,82 @@ class CloudDataTransferServiceDeleteJobOperator(GoogleCloudBaseOperator):
         hook.delete_transfer_job(job_name=self.job_name, project_id=self.project_id)
+class CloudDataTransferServiceRunJobOperator(GoogleCloudBaseOperator):
+    """
+    Runs a transfer job.
+    .. seealso::
+        For more information on how to use this operator, take a look at the guide:
+        :ref:`howto/operator:CloudDataTransferServiceRunJobOperator`
+    :param job_name: (Required) Name of the job to be run
+    :param project_id: (Optional) the ID of the project that owns the Transfer
+        Job. If set to None or missing, the default project_id from the Google Cloud
+        connection is used.
+    :param gcp_conn_id: The connection ID used to connect to Google Cloud.
+    :param api_version: API version used (e.g. v1).
+    :param google_impersonation_chain: Optional Google service account to impersonate using
+        short-term credentials, or chained list of accounts required to get the access_token
+        of the last account in the list, which will be impersonated in the request.
+        If set as a string, the account must grant the originating account
+        the Service Account Token Creator IAM role.
+        If set as a sequence, the identities from the list must grant
+        Service Account Token Creator IAM role to the directly preceding identity, with first
+        account from the list granting this role to the originating account (templated).
+    """
+    # [START gcp_transfer_job_run_template_fields]
+    template_fields: Sequence[str] = (
+        "job_name",
+        "project_id",
+        "gcp_conn_id",
+        "api_version",
+        "google_impersonation_chain",
+    )
+    # [END gcp_transfer_job_run_template_fields]
+    operator_extra_links = (CloudStorageTransferJobLink(),)
+    def __init__(
+        self,
+        *,
+        job_name: str,
+        gcp_conn_id: str = "google_cloud_default",
+        api_version: str = "v1",
+        project_id: str = PROVIDE_PROJECT_ID,
+        google_impersonation_chain: str | Sequence[str] | None = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.job_name = job_name
+        self.project_id = project_id
+        self.gcp_conn_id = gcp_conn_id
+        self.api_version = api_version
+        self.google_impersonation_chain = google_impersonation_chain
+    def _validate_inputs(self) -> None:
+        if not self.job_name:
+            raise AirflowException("The required parameter 'job_name' is empty or None")
+    def execute(self, context: Context) -> dict:
+        self._validate_inputs()
+        hook = CloudDataTransferServiceHook(
+            api_version=self.api_version,
+            gcp_conn_id=self.gcp_conn_id,
+            impersonation_chain=self.google_impersonation_chain,
+        )
+        project_id = self.project_id or hook.project_id
+        if project_id:
+            CloudStorageTransferJobLink.persist(
+                context=context,
+                task_instance=self,
+                project_id=project_id,
+                job_name=self.job_name,
+            )
+        return hook.run_transfer_job(job_name=self.job_name, project_id=project_id)
 class CloudDataTransferServiceGetOperationOperator(GoogleCloudBaseOperator):
     """
     Gets the latest state of a long-running operation in Google Storage Transfer Service.

airflow/providers/google/cloud/operators/dataflow.py CHANGED Viewed

@@ -28,6 +28,7 @@ from functools import cached_property
 from typing import TYPE_CHECKING, Any, Sequence
 from deprecated import deprecated
+from googleapiclient.errors import HttpError
 from airflow.configuration import conf
 from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
@@ -38,7 +39,7 @@ from airflow.providers.google.cloud.hooks.dataflow import (
     process_line_and_extract_dataflow_job_id_callback,
 )
 from airflow.providers.google.cloud.hooks.gcs import GCSHook
-from airflow.providers.google.cloud.links.dataflow import DataflowJobLink
+from airflow.providers.google.cloud.links.dataflow import DataflowJobLink, DataflowPipelineLink
 from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
 from airflow.providers.google.cloud.triggers.dataflow import TemplateJobStartTrigger
 from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME
@@ -1358,3 +1359,236 @@ class DataflowStopJobOperator(GoogleCloudBaseOperator):
             self.log.info("No jobs to stop")
         return None
+class DataflowCreatePipelineOperator(GoogleCloudBaseOperator):
+    """
+    Creates a new Dataflow Data Pipeline instance.
+    .. seealso::
+        For more information on how to use this operator, take a look at the guide:
+        :ref:`howto/operator:DataflowCreatePipelineOperator`
+    :param body: The request body (contains instance of Pipeline). See:
+        https://cloud.google.com/dataflow/docs/reference/data-pipelines/rest/v1/projects.locations.pipelines/create#request-body
+    :param project_id: The ID of the GCP project that owns the job.
+    :param location: The location to direct the Data Pipelines instance to (for example us-central1).
+    :param gcp_conn_id: The connection ID to connect to the Google Cloud
+        Platform.
+    :param impersonation_chain: Optional service account to impersonate using short-term
+        credentials, or chained list of accounts required to get the access_token
+        of the last account in the list, which will be impersonated in the request.
+        If set as a string, the account must grant the originating account
+        the Service Account Token Creator IAM role.
+        If set as a sequence, the identities from the list must grant
+        Service Account Token Creator IAM role to the directly preceding identity, with first
+        account from the list granting this role to the originating account (templated).
+        .. warning::
+            This option requires Apache Beam 2.39.0 or newer.
+    Returns the created Dataflow Data Pipeline instance in JSON representation.
+    """
+    operator_extra_links = (DataflowPipelineLink(),)
+    def __init__(
+        self,
+        *,
+        body: dict,
+        project_id: str = PROVIDE_PROJECT_ID,
+        location: str = DEFAULT_DATAFLOW_LOCATION,
+        gcp_conn_id: str = "google_cloud_default",
+        impersonation_chain: str | Sequence[str] | None = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.body = body
+        self.project_id = project_id
+        self.location = location
+        self.gcp_conn_id = gcp_conn_id
+        self.impersonation_chain = impersonation_chain
+        self.dataflow_hook: DataflowHook | None = None
+        self.pipeline_name = self.body["name"].split("/")[-1] if self.body else None
+    def execute(self, context: Context):
+        if self.body is None:
+            raise AirflowException(
+                "Request Body not given; cannot create a Data Pipeline without the Request Body."
+            )
+        if self.project_id is None:
+            raise AirflowException(
+                "Project ID not given; cannot create a Data Pipeline without the Project ID."
+            )
+        if self.location is None:
+            raise AirflowException("location not given; cannot create a Data Pipeline without the location.")
+        self.dataflow_hook = DataflowHook(
+            gcp_conn_id=self.gcp_conn_id,
+            impersonation_chain=self.impersonation_chain,
+        )
+        self.body["pipelineSources"] = {"airflow": "airflow"}
+        try:
+            self.pipeline = self.dataflow_hook.create_data_pipeline(
+                project_id=self.project_id,
+                body=self.body,
+                location=self.location,
+            )
+        except HttpError as e:
+            if e.resp.status == 409:
+                # If the pipeline already exists, retrieve it
+                self.log.info("Pipeline with given name already exists.")
+                self.pipeline = self.dataflow_hook.get_data_pipeline(
+                    project_id=self.project_id,
+                    pipeline_name=self.pipeline_name,
+                    location=self.location,
+                )
+        DataflowPipelineLink.persist(self, context, self.project_id, self.location, self.pipeline_name)
+        self.xcom_push(context, key="pipeline_name", value=self.pipeline_name)
+        if self.pipeline:
+            if "error" in self.pipeline:
+                raise AirflowException(self.pipeline.get("error").get("message"))
+        return self.pipeline
+class DataflowRunPipelineOperator(GoogleCloudBaseOperator):
+    """
+    Runs a Dataflow Data Pipeline.
+    .. seealso::
+        For more information on how to use this operator, take a look at the guide:
+        :ref:`howto/operator:DataflowRunPipelineOperator`
+    :param pipeline_name:  The display name of the pipeline. In example
+        projects/PROJECT_ID/locations/LOCATION_ID/pipelines/PIPELINE_ID it would be the PIPELINE_ID.
+    :param project_id: The ID of the GCP project that owns the job.
+    :param location: The location to direct the Data Pipelines instance to (for example us-central1).
+    :param gcp_conn_id: The connection ID to connect to the Google Cloud Platform.
+    :param impersonation_chain: Optional service account to impersonate using short-term
+        credentials, or chained list of accounts required to get the access_token
+        of the last account in the list, which will be impersonated in the request.
+        If set as a string, the account must grant the originating account
+        the Service Account Token Creator IAM role.
+        If set as a sequence, the identities from the list must grant
+        Service Account Token Creator IAM role to the directly preceding identity, with first
+        account from the list granting this role to the originating account (templated).
+    Returns the created Job in JSON representation.
+    """
+    operator_extra_links = (DataflowJobLink(),)
+    def __init__(
+        self,
+        pipeline_name: str,
+        project_id: str = PROVIDE_PROJECT_ID,
+        location: str = DEFAULT_DATAFLOW_LOCATION,
+        gcp_conn_id: str = "google_cloud_default",
+        impersonation_chain: str | Sequence[str] | None = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.pipeline_name = pipeline_name
+        self.project_id = project_id
+        self.location = location
+        self.gcp_conn_id = gcp_conn_id
+        self.impersonation_chain = impersonation_chain
+        self.dataflow_hook: DataflowHook | None = None
+    def execute(self, context: Context):
+        self.dataflow_hook = DataflowHook(
+            gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
+        )
+        if self.pipeline_name is None:
+            raise AirflowException("Data Pipeline name not given; cannot run unspecified pipeline.")
+        if self.project_id is None:
+            raise AirflowException("Data Pipeline Project ID not given; cannot run pipeline.")
+        if self.location is None:
+            raise AirflowException("Data Pipeline location not given; cannot run pipeline.")
+        try:
+            self.job = self.dataflow_hook.run_data_pipeline(
+                pipeline_name=self.pipeline_name,
+                project_id=self.project_id,
+                location=self.location,
+            )["job"]
+            job_id = self.dataflow_hook.extract_job_id(self.job)
+            self.xcom_push(context, key="job_id", value=job_id)
+            DataflowJobLink.persist(self, context, self.project_id, self.location, job_id)
+        except HttpError as e:
+            if e.resp.status == 404:
+                raise AirflowException("Pipeline with given name was not found.")
+        except Exception as exc:
+            raise AirflowException("Error occurred when running Pipeline: %s", exc)
+        return self.job
+class DataflowDeletePipelineOperator(GoogleCloudBaseOperator):
+    """
+    Deletes a Dataflow Data Pipeline.
+    .. seealso::
+        For more information on how to use this operator, take a look at the guide:
+        :ref:`howto/operator:DataflowDeletePipelineOperator`
+    :param pipeline_name: The display name of the pipeline. In example
+        projects/PROJECT_ID/locations/LOCATION_ID/pipelines/PIPELINE_ID it would be the PIPELINE_ID.
+    :param project_id: The ID of the GCP project that owns the job.
+    :param location: The location to direct the Data Pipelines instance to (for example us-central1).
+    :param gcp_conn_id: The connection ID to connect to the Google Cloud Platform.
+    :param impersonation_chain: Optional service account to impersonate using short-term
+        credentials, or chained list of accounts required to get the access_token
+        of the last account in the list, which will be impersonated in the request.
+        If set as a string, the account must grant the originating account
+        the Service Account Token Creator IAM role.
+        If set as a sequence, the identities from the list must grant
+        Service Account Token Creator IAM role to the directly preceding identity, with first
+        account from the list granting this role to the originating account (templated).
+    """
+    def __init__(
+        self,
+        pipeline_name: str,
+        project_id: str = PROVIDE_PROJECT_ID,
+        location: str = DEFAULT_DATAFLOW_LOCATION,
+        gcp_conn_id: str = "google_cloud_default",
+        impersonation_chain: str | Sequence[str] | None = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.pipeline_name = pipeline_name
+        self.project_id = project_id
+        self.location = location
+        self.gcp_conn_id = gcp_conn_id
+        self.impersonation_chain = impersonation_chain
+        self.dataflow_hook: DataflowHook | None = None
+        self.response: dict | None = None
+    def execute(self, context: Context):
+        self.dataflow_hook = DataflowHook(
+            gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
+        )
+        if self.pipeline_name is None:
+            raise AirflowException("Data Pipeline name not given; cannot run unspecified pipeline.")
+        if self.project_id is None:
+            raise AirflowException("Data Pipeline Project ID not given; cannot run pipeline.")
+        if self.location is None:
+            raise AirflowException("Data Pipeline location not given; cannot run pipeline.")
+        self.response = self.dataflow_hook.delete_data_pipeline(
+            pipeline_name=self.pipeline_name,
+            project_id=self.project_id,
+            location=self.location,
+        )
+        if self.response:
+            raise AirflowException(self.response)
+        return None

apache-airflow-providers-google 10.19.0rc1__py3-none-any.whl → 10.20.0rc1__py3-none-any.whl

apache-airflow-providers-google 10.19.0rc1py3-none-any.whl → 10.20.0rc1py3-none-any.whl