PyPI - apache-airflow-providers-google - Versions diffs - 10.20.0rc1__py3-none-any.whl → 10.21.0__py3-none-any.whl - Mend

apache-airflow-providers-google 10.20.0rc1py3-none-any.whl → 10.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

airflow/providers/google/cloud/operators/vertex_ai/generative_model.py CHANGED Viewed

@@ -21,6 +21,9 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Sequence
+from deprecated import deprecated
+from airflow.exceptions import AirflowProviderDeprecationWarning
 from airflow.providers.google.cloud.hooks.vertex_ai.generative_model import GenerativeModelHook
 from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
@@ -28,6 +31,10 @@ if TYPE_CHECKING:
     from airflow.utils.context import Context
+@deprecated(
+    reason="This operator is deprecated and will be removed after 01.01.2025, please use `TextGenerationModelPredictOperator`.",
+    category=AirflowProviderDeprecationWarning,
+)
 class PromptLanguageModelOperator(GoogleCloudBaseOperator):
     """
     Uses the Vertex AI PaLM API to generate natural language text.
@@ -113,6 +120,10 @@ class PromptLanguageModelOperator(GoogleCloudBaseOperator):
         return response
+@deprecated(
+    reason="This operator is deprecated and will be removed after 01.01.2025, please use `TextEmbeddingModelGetEmbeddingsOperator`.",
+    category=AirflowProviderDeprecationWarning,
+)
 class GenerateTextEmbeddingsOperator(GoogleCloudBaseOperator):
     """
     Uses the Vertex AI PaLM API to generate natural language text.
@@ -177,6 +188,10 @@ class GenerateTextEmbeddingsOperator(GoogleCloudBaseOperator):
         return response
+@deprecated(
+    reason="This operator is deprecated and will be removed after 01.01.2025, please use `GenerativeModelGenerateContentOperator`.",
+    category=AirflowProviderDeprecationWarning,
+)
 class PromptMultimodalModelOperator(GoogleCloudBaseOperator):
     """
     Use the Vertex AI Gemini Pro foundation model to generate natural language text.
@@ -249,6 +264,10 @@ class PromptMultimodalModelOperator(GoogleCloudBaseOperator):
         return response
+@deprecated(
+    reason="This operator is deprecated and will be removed after 01.01.2025, please use `GenerativeModelGenerateContentOperator`.",
+    category=AirflowProviderDeprecationWarning,
+)
 class PromptMultimodalModelWithMediaOperator(GoogleCloudBaseOperator):
     """
     Use the Vertex AI Gemini Pro foundation model to generate natural language text.
@@ -328,3 +347,227 @@ class PromptMultimodalModelWithMediaOperator(GoogleCloudBaseOperator):
         self.xcom_push(context, key="prompt_response", value=response)
         return response
+class TextGenerationModelPredictOperator(GoogleCloudBaseOperator):
+    """
+    Uses the Vertex AI PaLM API to generate natural language text.
+    :param project_id: Required. The ID of the Google Cloud project that the
+        service belongs to (templated).
+    :param location: Required. The ID of the Google Cloud location that the
+        service belongs to (templated).
+    :param prompt: Required. Inputs or queries that a user or a program gives
+        to the Vertex AI PaLM API, in order to elicit a specific response (templated).
+    :param pretrained_model: By default uses the pre-trained model `text-bison`,
+        optimized for performing natural language tasks such as classification,
+        summarization, extraction, content creation, and ideation.
+    :param temperature: Temperature controls the degree of randomness in token
+        selection. Defaults to 0.0.
+    :param max_output_tokens: Token limit determines the maximum amount of text
+        output. Defaults to 256.
+    :param top_p: Tokens are selected from most probable to least until the sum
+        of their probabilities equals the top_p value. Defaults to 0.8.
+    :param top_k: A top_k of 1 means the selected token is the most probable
+        among all tokens. Defaults to 0.4.
+    :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
+    :param impersonation_chain: Optional service account to impersonate using short-term
+        credentials, or chained list of accounts required to get the access_token
+        of the last account in the list, which will be impersonated in the request.
+        If set as a string, the account must grant the originating account
+        the Service Account Token Creator IAM role.
+        If set as a sequence, the identities from the list must grant
+        Service Account Token Creator IAM role to the directly preceding identity, with first
+        account from the list granting this role to the originating account (templated).
+    """
+    template_fields = ("location", "project_id", "impersonation_chain", "prompt")
+    def __init__(
+        self,
+        *,
+        project_id: str,
+        location: str,
+        prompt: str,
+        pretrained_model: str = "text-bison",
+        temperature: float = 0.0,
+        max_output_tokens: int = 256,
+        top_p: float = 0.8,
+        top_k: int = 40,
+        gcp_conn_id: str = "google_cloud_default",
+        impersonation_chain: str | Sequence[str] | None = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.project_id = project_id
+        self.location = location
+        self.prompt = prompt
+        self.pretrained_model = pretrained_model
+        self.temperature = temperature
+        self.max_output_tokens = max_output_tokens
+        self.top_p = top_p
+        self.top_k = top_k
+        self.gcp_conn_id = gcp_conn_id
+        self.impersonation_chain = impersonation_chain
+    def execute(self, context: Context):
+        self.hook = GenerativeModelHook(
+            gcp_conn_id=self.gcp_conn_id,
+            impersonation_chain=self.impersonation_chain,
+        )
+        self.log.info("Submitting prompt")
+        response = self.hook.text_generation_model_predict(
+            project_id=self.project_id,
+            location=self.location,
+            prompt=self.prompt,
+            pretrained_model=self.pretrained_model,
+            temperature=self.temperature,
+            max_output_tokens=self.max_output_tokens,
+            top_p=self.top_p,
+            top_k=self.top_k,
+        )
+        self.log.info("Model response: %s", response)
+        self.xcom_push(context, key="model_response", value=response)
+        return response
+class TextEmbeddingModelGetEmbeddingsOperator(GoogleCloudBaseOperator):
+    """
+    Uses the Vertex AI Embeddings API to generate embeddings based on prompt.
+    :param project_id: Required. The ID of the Google Cloud project that the
+        service belongs to (templated).
+    :param location: Required. The ID of the Google Cloud location that the
+        service belongs to (templated).
+    :param prompt: Required. Inputs or queries that a user or a program gives
+        to the Vertex AI PaLM API, in order to elicit a specific response (templated).
+    :param pretrained_model: By default uses the pre-trained model `textembedding-gecko`,
+        optimized for performing text embeddings.
+    :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
+    :param impersonation_chain: Optional service account to impersonate using short-term
+        credentials, or chained list of accounts required to get the access_token
+        of the last account in the list, which will be impersonated in the request.
+        If set as a string, the account must grant the originating account
+        the Service Account Token Creator IAM role.
+        If set as a sequence, the identities from the list must grant
+        Service Account Token Creator IAM role to the directly preceding identity, with first
+        account from the list granting this role to the originating account (templated).
+    """
+    template_fields = ("location", "project_id", "impersonation_chain", "prompt")
+    def __init__(
+        self,
+        *,
+        project_id: str,
+        location: str,
+        prompt: str,
+        pretrained_model: str = "textembedding-gecko",
+        gcp_conn_id: str = "google_cloud_default",
+        impersonation_chain: str | Sequence[str] | None = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.project_id = project_id
+        self.location = location
+        self.prompt = prompt
+        self.pretrained_model = pretrained_model
+        self.gcp_conn_id = gcp_conn_id
+        self.impersonation_chain = impersonation_chain
+    def execute(self, context: Context):
+        self.hook = GenerativeModelHook(
+            gcp_conn_id=self.gcp_conn_id,
+            impersonation_chain=self.impersonation_chain,
+        )
+        self.log.info("Generating text embeddings")
+        response = self.hook.text_embedding_model_get_embeddings(
+            project_id=self.project_id,
+            location=self.location,
+            prompt=self.prompt,
+            pretrained_model=self.pretrained_model,
+        )
+        self.log.info("Model response: %s", response)
+        self.xcom_push(context, key="model_response", value=response)
+        return response
+class GenerativeModelGenerateContentOperator(GoogleCloudBaseOperator):
+    """
+    Use the Vertex AI Gemini Pro foundation model to generate content.
+    :param project_id: Required. The ID of the Google Cloud project that the
+        service belongs to (templated).
+    :param contents: Required. The multi-part content of a message that a user or a program
+        gives to the generative model, in order to elicit a specific response.
+    :param location: Required. The ID of the Google Cloud location that the
+        service belongs to (templated).
+    :param generation_config: Optional. Generation configuration settings.
+    :param safety_settings: Optional. Per request settings for blocking unsafe content.
+    :param pretrained_model: By default uses the pre-trained model `gemini-pro`,
+        supporting prompts with text-only input, including natural language
+        tasks, multi-turn text and code chat, and code generation. It can
+        output text and code.
+    :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
+    :param impersonation_chain: Optional service account to impersonate using short-term
+        credentials, or chained list of accounts required to get the access_token
+        of the last account in the list, which will be impersonated in the request.
+        If set as a string, the account must grant the originating account
+        the Service Account Token Creator IAM role.
+        If set as a sequence, the identities from the list must grant
+        Service Account Token Creator IAM role to the directly preceding identity, with first
+        account from the list granting this role to the originating account (templated).
+    """
+    template_fields = ("location", "project_id", "impersonation_chain", "contents")
+    def __init__(
+        self,
+        *,
+        project_id: str,
+        contents: list,
+        location: str,
+        tools: list | None = None,
+        generation_config: dict | None = None,
+        safety_settings: dict | None = None,
+        pretrained_model: str = "gemini-pro",
+        gcp_conn_id: str = "google_cloud_default",
+        impersonation_chain: str | Sequence[str] | None = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.project_id = project_id
+        self.location = location
+        self.contents = contents
+        self.tools = tools
+        self.generation_config = generation_config
+        self.safety_settings = safety_settings
+        self.pretrained_model = pretrained_model
+        self.gcp_conn_id = gcp_conn_id
+        self.impersonation_chain = impersonation_chain
+    def execute(self, context: Context):
+        self.hook = GenerativeModelHook(
+            gcp_conn_id=self.gcp_conn_id,
+            impersonation_chain=self.impersonation_chain,
+        )
+        response = self.hook.generative_model_generate_content(
+            project_id=self.project_id,
+            location=self.location,
+            contents=self.contents,
+            tools=self.tools,
+            generation_config=self.generation_config,
+            safety_settings=self.safety_settings,
+            pretrained_model=self.pretrained_model,
+        )
+        self.log.info("Model response: %s", response)
+        self.xcom_push(context, key="model_response", value=response)
+        return response

airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py CHANGED Viewed

@@ -301,7 +301,8 @@ class GetPipelineJobOperator(GoogleCloudBaseOperator):
 class ListPipelineJobOperator(GoogleCloudBaseOperator):
-    """Lists PipelineJob in a Location.
+    """
+    Lists PipelineJob in a Location.
     :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
     :param region: Required. The ID of the Google Cloud region that the service belongs to.

airflow/providers/google/cloud/operators/vision.py CHANGED Viewed

@@ -47,7 +47,8 @@ MetaData = Sequence[Tuple[str, str]]
 class CloudVisionCreateProductSetOperator(GoogleCloudBaseOperator):
-    """Create a new ProductSet resource.
+    """
+    Create a new ProductSet resource.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -139,7 +140,8 @@ class CloudVisionCreateProductSetOperator(GoogleCloudBaseOperator):
 class CloudVisionGetProductSetOperator(GoogleCloudBaseOperator):
-    """Get information associated with a ProductSet.
+    """
+    Get information associated with a ProductSet.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -216,7 +218,8 @@ class CloudVisionGetProductSetOperator(GoogleCloudBaseOperator):
 class CloudVisionUpdateProductSetOperator(GoogleCloudBaseOperator):
-    """Make changes to a `ProductSet` resource.
+    """
+    Make changes to a `ProductSet` resource.
     Only ``display_name`` can be updated currently.
@@ -322,7 +325,8 @@ class CloudVisionUpdateProductSetOperator(GoogleCloudBaseOperator):
 class CloudVisionDeleteProductSetOperator(GoogleCloudBaseOperator):
-    """Permanently deletes a ``ProductSet``.
+    """
+    Permanently deletes a ``ProductSet``.
     ``Products`` and ``ReferenceImages`` in the ``ProductSet`` are not deleted.
     The actual image files are not deleted from Google Cloud Storage.
@@ -402,7 +406,8 @@ class CloudVisionDeleteProductSetOperator(GoogleCloudBaseOperator):
 class CloudVisionCreateProductOperator(GoogleCloudBaseOperator):
-    """Create and return a new product resource.
+    """
+    Create and return a new product resource.
     Possible errors regarding the ``Product`` object provided:
@@ -499,7 +504,8 @@ class CloudVisionCreateProductOperator(GoogleCloudBaseOperator):
 class CloudVisionGetProductOperator(GoogleCloudBaseOperator):
-    """Get information associated with a ``Product``.
+    """
+    Get information associated with a ``Product``.
     Possible errors:
@@ -580,7 +586,8 @@ class CloudVisionGetProductOperator(GoogleCloudBaseOperator):
 class CloudVisionUpdateProductOperator(GoogleCloudBaseOperator):
-    """Make changes to a Product resource.
+    """
+    Make changes to a Product resource.
     Only the display_name, description, and labels fields can be updated right now.
@@ -693,7 +700,8 @@ class CloudVisionUpdateProductOperator(GoogleCloudBaseOperator):
 class CloudVisionDeleteProductOperator(GoogleCloudBaseOperator):
-    """Permanently delete a product and its reference images.
+    """
+    Permanently delete a product and its reference images.
     Metadata of the product and all its images will be deleted right away, but
     search queries against ProductSets containing the product may still work
@@ -778,7 +786,8 @@ class CloudVisionDeleteProductOperator(GoogleCloudBaseOperator):
 class CloudVisionImageAnnotateOperator(GoogleCloudBaseOperator):
-    """Run image detection and annotation for an image or a batch of images.
+    """
+    Run image detection and annotation for an image or a batch of images.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -845,7 +854,8 @@ class CloudVisionImageAnnotateOperator(GoogleCloudBaseOperator):
 class CloudVisionCreateReferenceImageOperator(GoogleCloudBaseOperator):
-    """Create and return a new ReferenceImage ID resource.
+    """
+    Create and return a new ReferenceImage ID resource.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -948,7 +958,8 @@ class CloudVisionCreateReferenceImageOperator(GoogleCloudBaseOperator):
 class CloudVisionDeleteReferenceImageOperator(GoogleCloudBaseOperator):
-    """Delete a ReferenceImage ID resource.
+    """
+    Delete a ReferenceImage ID resource.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -1033,7 +1044,8 @@ class CloudVisionDeleteReferenceImageOperator(GoogleCloudBaseOperator):
 class CloudVisionAddProductToProductSetOperator(GoogleCloudBaseOperator):
-    """Add a Product to the specified ProductSet.
+    """
+    Add a Product to the specified ProductSet.
     If the Product is already present, no change is made. One Product can be
     added to at most 100 ProductSets.
@@ -1122,7 +1134,8 @@ class CloudVisionAddProductToProductSetOperator(GoogleCloudBaseOperator):
 class CloudVisionRemoveProductFromProductSetOperator(GoogleCloudBaseOperator):
-    """Remove a Product from the specified ProductSet.
+    """
+    Remove a Product from the specified ProductSet.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -1204,7 +1217,8 @@ class CloudVisionRemoveProductFromProductSetOperator(GoogleCloudBaseOperator):
 class CloudVisionDetectTextOperator(GoogleCloudBaseOperator):
-    """Detect Text in the image.
+    """
+    Detect Text in the image.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -1285,7 +1299,8 @@ class CloudVisionDetectTextOperator(GoogleCloudBaseOperator):
 class CloudVisionTextDetectOperator(GoogleCloudBaseOperator):
-    """Detect Document Text in the image.
+    """
+    Detect Document Text in the image.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -1365,7 +1380,8 @@ class CloudVisionTextDetectOperator(GoogleCloudBaseOperator):
 class CloudVisionDetectImageLabelsOperator(GoogleCloudBaseOperator):
-    """Detect Document Text in the image.
+    """
+    Detect Document Text in the image.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -1435,7 +1451,8 @@ class CloudVisionDetectImageLabelsOperator(GoogleCloudBaseOperator):
 class CloudVisionDetectImageSafeSearchOperator(GoogleCloudBaseOperator):
-    """Detect Document Text in the image.
+    """
+    Detect Document Text in the image.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -1507,7 +1524,8 @@ class CloudVisionDetectImageSafeSearchOperator(GoogleCloudBaseOperator):
 def prepare_additional_parameters(
     additional_properties: dict | None, language_hints: Any, web_detection_params: Any
 ) -> dict | None:
-    """Create a value for the ``additional_properties`` parameter.
+    """
+    Create a value for the ``additional_properties`` parameter.
     The new value is based on ``language_hints``, ``web_detection_params``, and
     ``additional_properties`` parameters specified by the user.

airflow/providers/google/cloud/sensors/gcs.py CHANGED Viewed

@@ -188,7 +188,15 @@ def ts_function(context):
     try:
         return context["data_interval_end"]
     except KeyError:
-        return context["dag"].following_schedule(context["execution_date"])
+        from airflow.utils import timezone
+        data_interval = context["dag"].infer_automated_data_interval(
+            timezone.coerce_datetime(context["execution_date"])
+        )
+        next_info = context["dag"].next_dagrun_info(data_interval, restricted=False)
+        if next_info is None:
+            return None
+        return next_info.data_interval.start
 class GCSObjectUpdateSensor(BaseSensorOperator):
@@ -575,7 +583,8 @@ class GCSUploadSessionCompleteSensor(BaseSensorOperator):
             )
     def execute_complete(self, context: dict[str, Any], event: dict[str, str] | None = None) -> str:
-        """Rely on trigger to throw an exception, otherwise it assumes execution was successful.
+        """
+        Rely on trigger to throw an exception, otherwise it assumes execution was successful.
         Callback for when the trigger fires - returns immediately.

airflow/providers/google/cloud/sensors/pubsub.py CHANGED Viewed

@@ -185,7 +185,8 @@ class PubSubPullSensor(BaseSensorOperator):
         pulled_messages: list[ReceivedMessage],
         context: Context,
     ):
-        """Convert `ReceivedMessage` objects into JSON-serializable dicts.
+        """
+        Convert `ReceivedMessage` objects into JSON-serializable dicts.
         This method can be overridden by subclasses or by `messages_callback` constructor argument.

airflow/providers/google/cloud/transfers/bigquery_to_gcs.py CHANGED Viewed

@@ -142,8 +142,6 @@ class BigQueryToGCSOperator(BaseOperator):
         self.hook: BigQueryHook | None = None
         self.deferrable = deferrable
-        self._job_id: str = ""
     @staticmethod
     def _handle_job_error(job: BigQueryJob | UnknownJob) -> None:
         if job.error_result:
@@ -212,7 +210,7 @@ class BigQueryToGCSOperator(BaseOperator):
         self.hook = hook
         configuration = self._prepare_configuration()
-        job_id = hook.generate_job_id(
+        self.job_id = hook.generate_job_id(
             job_id=self.job_id,
             dag_id=self.dag_id,
             task_id=self.task_id,
@@ -224,14 +222,14 @@ class BigQueryToGCSOperator(BaseOperator):
         try:
             self.log.info("Executing: %s", configuration)
             job: BigQueryJob | UnknownJob = self._submit_job(
-                hook=hook, job_id=job_id, configuration=configuration
+                hook=hook, job_id=self.job_id, configuration=configuration
             )
         except Conflict:
             # If the job already exists retrieve it
             job = hook.get_job(
                 project_id=self.project_id,
                 location=self.location,
-                job_id=job_id,
+                job_id=self.job_id,
             )
             if job.state in self.reattach_states:
                 # We are reattaching to a job
@@ -240,12 +238,12 @@ class BigQueryToGCSOperator(BaseOperator):
             else:
                 # Same job configuration so we need force_rerun
                 raise AirflowException(
-                    f"Job with id: {job_id} already exists and is in {job.state} state. If you "
+                    f"Job with id: {self.job_id} already exists and is in {job.state} state. If you "
                     f"want to force rerun it consider setting `force_rerun=True`."
                     f"Or, if you want to reattach in this scenario add {job.state} to `reattach_states`"
                 )
-        self._job_id = job.job_id
+        self.job_id = job.job_id
         conf = job.to_api_repr()["configuration"]["extract"]["sourceTable"]
         dataset_id, project_id, table_id = conf["datasetId"], conf["projectId"], conf["tableId"]
         BigQueryTableLink.persist(
@@ -261,7 +259,7 @@ class BigQueryToGCSOperator(BaseOperator):
                 timeout=self.execution_timeout,
                 trigger=BigQueryInsertJobTrigger(
                     conn_id=self.gcp_conn_id,
-                    job_id=self._job_id,
+                    job_id=self.job_id,
                     project_id=self.project_id or self.hook.project_id,
                     location=self.location or self.hook.location,
                     impersonation_chain=self.impersonation_chain,
@@ -272,7 +270,8 @@ class BigQueryToGCSOperator(BaseOperator):
             job.result(timeout=self.result_timeout, retry=self.result_retry)
     def execute_complete(self, context: Context, event: dict[str, Any]):
-        """Return immediately and relies on trigger to throw a success event. Callback for the trigger.
+        """
+        Return immediately and relies on trigger to throw a success event. Callback for the trigger.
         Relies on trigger to throw an exception, otherwise it assumes execution was successful.
         """
@@ -283,6 +282,8 @@ class BigQueryToGCSOperator(BaseOperator):
             self.task_id,
             event["message"],
         )
+        # Save job_id as an attribute to be later used by listeners
+        self.job_id = event.get("job_id")
     def get_openlineage_facets_on_complete(self, task_instance):
         """Implement on_complete as we will include final BQ job id."""
@@ -302,7 +303,15 @@ class BigQueryToGCSOperator(BaseOperator):
         )
         from airflow.providers.openlineage.extractors import OperatorLineage
-        table_object = self.hook.get_client(self.hook.project_id).get_table(self.source_project_dataset_table)
+        if not self.hook:
+            self.hook = BigQueryHook(
+                gcp_conn_id=self.gcp_conn_id,
+                location=self.location,
+                impersonation_chain=self.impersonation_chain,
+            )
+        project_id = self.project_id or self.hook.project_id
+        table_object = self.hook.get_client(project_id).get_table(self.source_project_dataset_table)
         input_dataset = Dataset(
             namespace="bigquery",
@@ -346,9 +355,9 @@ class BigQueryToGCSOperator(BaseOperator):
             output_datasets.append(dataset)
         run_facets = {}
-        if self._job_id:
+        if self.job_id:
             run_facets = {
-                "externalQuery": ExternalQueryRunFacet(externalQueryId=self._job_id, source="bigquery"),
+                "externalQuery": ExternalQueryRunFacet(externalQueryId=self.job_id, source="bigquery"),
             }
         return OperatorLineage(inputs=[input_dataset], outputs=output_datasets, run_facets=run_facets)

airflow/providers/google/cloud/transfers/bigquery_to_postgres.py CHANGED Viewed

@@ -76,7 +76,7 @@ class BigQueryToPostgresOperator(BigQueryToSqlBaseOperator):
         self.replace_index = replace_index
     def get_sql_hook(self) -> PostgresHook:
-        return PostgresHook(schema=self.database, postgres_conn_id=self.postgres_conn_id)
+        return PostgresHook(database=self.database, postgres_conn_id=self.postgres_conn_id)
     def execute(self, context: Context) -> None:
         big_query_hook = BigQueryHook(

airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py CHANGED Viewed

@@ -43,7 +43,8 @@ class FlushAction(Enum):
 class FacebookAdsReportToGcsOperator(BaseOperator):
-    """Fetch from Facebook Ads API.
+    """
+    Fetch from Facebook Ads API.
     This converts and saves the data as a temporary JSON file, and uploads the
     JSON to Google Cloud Storage.

airflow/providers/google/cloud/transfers/gcs_to_bigquery.py CHANGED Viewed

@@ -449,7 +449,8 @@ class GCSToBigQueryOperator(BaseOperator):
                     return self._find_max_value_in_column()
     def execute_complete(self, context: Context, event: dict[str, Any]):
-        """Return immediately and relies on trigger to throw a success event. Callback for the trigger.
+        """
+        Return immediately and relies on trigger to throw a success event. Callback for the trigger.
         Relies on trigger to throw an exception, otherwise it assumes execution was successful.
         """
@@ -460,6 +461,8 @@ class GCSToBigQueryOperator(BaseOperator):
             self.task_id,
             event["message"],
         )
+        # Save job_id as an attribute to be later used by listeners
+        self.job_id = event.get("job_id")
         return self._find_max_value_in_column()
     def _find_max_value_in_column(self):
@@ -756,17 +759,26 @@ class GCSToBigQueryOperator(BaseOperator):
         )
         from airflow.providers.openlineage.extractors import OperatorLineage
-        table_object = self.hook.get_client(self.hook.project_id).get_table(
-            self.destination_project_dataset_table
-        )
+        if not self.hook:
+            self.hook = BigQueryHook(
+                gcp_conn_id=self.gcp_conn_id,
+                location=self.location,
+                impersonation_chain=self.impersonation_chain,
+            )
+        project_id = self.project_id or self.hook.project_id
+        table_object = self.hook.get_client(project_id).get_table(self.destination_project_dataset_table)
         output_dataset_facets = get_facets_from_bq_table(table_object)
+        source_objects = (
+            self.source_objects if isinstance(self.source_objects, list) else [self.source_objects]
+        )
         input_dataset_facets = {
             "schema": output_dataset_facets["schema"],
         }
         input_datasets = []
-        for blob in sorted(self.source_objects):
+        for blob in sorted(source_objects):
             additional_facets = {}
             if "*" in blob:

apache-airflow-providers-google 10.20.0rc1__py3-none-any.whl → 10.21.0__py3-none-any.whl

apache-airflow-providers-google 10.20.0rc1py3-none-any.whl → 10.21.0py3-none-any.whl