apache-airflow-providers-google 16.0.0a1__py3-none-any.whl → 16.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +34 -0
  3. airflow/providers/google/cloud/hooks/bigquery.py +63 -76
  4. airflow/providers/google/cloud/hooks/gcs.py +3 -3
  5. airflow/providers/google/cloud/hooks/looker.py +5 -0
  6. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -36
  7. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -66
  8. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +1 -1
  9. airflow/providers/google/cloud/links/cloud_run.py +59 -0
  10. airflow/providers/google/cloud/log/gcs_task_handler.py +4 -4
  11. airflow/providers/google/cloud/operators/bigquery.py +49 -10
  12. airflow/providers/google/cloud/operators/cloud_run.py +10 -1
  13. airflow/providers/google/cloud/operators/gcs.py +1 -0
  14. airflow/providers/google/cloud/operators/kubernetes_engine.py +3 -85
  15. airflow/providers/google/cloud/operators/pubsub.py +2 -1
  16. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +0 -92
  17. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +4 -0
  18. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +9 -5
  19. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +1 -1
  20. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  21. airflow/providers/google/cloud/transfers/s3_to_gcs.py +11 -5
  22. airflow/providers/google/cloud/triggers/bigquery.py +32 -5
  23. airflow/providers/google/cloud/triggers/dataproc.py +62 -10
  24. airflow/providers/google/get_provider_info.py +14 -5
  25. airflow/providers/google/leveldb/hooks/leveldb.py +25 -0
  26. {apache_airflow_providers_google-16.0.0a1.dist-info → apache_airflow_providers_google-16.0.0rc1.dist-info}/METADATA +23 -22
  27. {apache_airflow_providers_google-16.0.0a1.dist-info → apache_airflow_providers_google-16.0.0rc1.dist-info}/RECORD +29 -28
  28. airflow/providers/google/cloud/links/automl.py +0 -193
  29. {apache_airflow_providers_google-16.0.0a1.dist-info → apache_airflow_providers_google-16.0.0rc1.dist-info}/WHEEL +0 -0
  30. {apache_airflow_providers_google-16.0.0a1.dist-info → apache_airflow_providers_google-16.0.0rc1.dist-info}/entry_points.txt +0 -0
@@ -93,16 +93,32 @@ class IfExistAction(enum.Enum):
93
93
  SKIP = "skip"
94
94
 
95
95
 
96
+ class _BigQueryHookWithFlexibleProjectId(BigQueryHook):
97
+ @property
98
+ def project_id(self) -> str:
99
+ _, project_id = self.get_credentials_and_project_id()
100
+ return project_id or PROVIDE_PROJECT_ID
101
+
102
+ @project_id.setter
103
+ def project_id(self, value: str) -> None:
104
+ cached_creds, _ = self.get_credentials_and_project_id()
105
+ self._cached_project_id = value or PROVIDE_PROJECT_ID
106
+ self._cached_credntials = cached_creds
107
+
108
+
96
109
  class _BigQueryDbHookMixin:
97
- def get_db_hook(self: BigQueryCheckOperator) -> BigQueryHook: # type:ignore[misc]
110
+ def get_db_hook(self: BigQueryCheckOperator) -> _BigQueryHookWithFlexibleProjectId: # type:ignore[misc]
98
111
  """Get BigQuery DB Hook."""
99
- return BigQueryHook(
112
+ hook = _BigQueryHookWithFlexibleProjectId(
100
113
  gcp_conn_id=self.gcp_conn_id,
101
114
  use_legacy_sql=self.use_legacy_sql,
102
115
  location=self.location,
103
116
  impersonation_chain=self.impersonation_chain,
104
117
  labels=self.labels,
105
118
  )
119
+ if self.project_id:
120
+ hook.project_id = self.project_id
121
+ return hook
106
122
 
107
123
 
108
124
  class _BigQueryOperatorsEncryptionConfigurationMixin:
@@ -190,6 +206,7 @@ class BigQueryCheckOperator(
190
206
  https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs.
191
207
  For example, [{ 'name': 'corpus', 'parameterType': { 'type': 'STRING' },
192
208
  'parameterValue': { 'value': 'romeoandjuliet' } }]. (templated)
209
+ :param project_id: Google Cloud Project where the job is running
193
210
  """
194
211
 
195
212
  template_fields: Sequence[str] = (
@@ -208,6 +225,7 @@ class BigQueryCheckOperator(
208
225
  *,
209
226
  sql: str,
210
227
  gcp_conn_id: str = "google_cloud_default",
228
+ project_id: str = PROVIDE_PROJECT_ID,
211
229
  use_legacy_sql: bool = True,
212
230
  location: str | None = None,
213
231
  impersonation_chain: str | Sequence[str] | None = None,
@@ -228,6 +246,7 @@ class BigQueryCheckOperator(
228
246
  self.deferrable = deferrable
229
247
  self.poll_interval = poll_interval
230
248
  self.query_params = query_params
249
+ self.project_id = project_id
231
250
 
232
251
  def _submit_job(
233
252
  self,
@@ -243,7 +262,7 @@ class BigQueryCheckOperator(
243
262
 
244
263
  return hook.insert_job(
245
264
  configuration=configuration,
246
- project_id=hook.project_id,
265
+ project_id=self.project_id,
247
266
  location=self.location,
248
267
  job_id=job_id,
249
268
  nowait=True,
@@ -257,6 +276,8 @@ class BigQueryCheckOperator(
257
276
  gcp_conn_id=self.gcp_conn_id,
258
277
  impersonation_chain=self.impersonation_chain,
259
278
  )
279
+ if self.project_id is None:
280
+ self.project_id = hook.project_id
260
281
  job = self._submit_job(hook, job_id="")
261
282
  context["ti"].xcom_push(key="job_id", value=job.job_id)
262
283
  if job.running():
@@ -265,7 +286,7 @@ class BigQueryCheckOperator(
265
286
  trigger=BigQueryCheckTrigger(
266
287
  conn_id=self.gcp_conn_id,
267
288
  job_id=job.job_id,
268
- project_id=hook.project_id,
289
+ project_id=self.project_id,
269
290
  location=self.location or hook.location,
270
291
  poll_interval=self.poll_interval,
271
292
  impersonation_chain=self.impersonation_chain,
@@ -342,6 +363,7 @@ class BigQueryValueCheckOperator(
342
363
  :param deferrable: Run operator in the deferrable mode.
343
364
  :param poll_interval: (Deferrable mode only) polling period in seconds to
344
365
  check for the status of job.
366
+ :param project_id: Google Cloud Project where the job is running
345
367
  """
346
368
 
347
369
  template_fields: Sequence[str] = (
@@ -363,6 +385,7 @@ class BigQueryValueCheckOperator(
363
385
  tolerance: Any = None,
364
386
  encryption_configuration: dict | None = None,
365
387
  gcp_conn_id: str = "google_cloud_default",
388
+ project_id: str = PROVIDE_PROJECT_ID,
366
389
  use_legacy_sql: bool = True,
367
390
  location: str | None = None,
368
391
  impersonation_chain: str | Sequence[str] | None = None,
@@ -380,6 +403,7 @@ class BigQueryValueCheckOperator(
380
403
  self.labels = labels
381
404
  self.deferrable = deferrable
382
405
  self.poll_interval = poll_interval
406
+ self.project_id = project_id
383
407
 
384
408
  def _submit_job(
385
409
  self,
@@ -398,7 +422,7 @@ class BigQueryValueCheckOperator(
398
422
 
399
423
  return hook.insert_job(
400
424
  configuration=configuration,
401
- project_id=hook.project_id,
425
+ project_id=self.project_id,
402
426
  location=self.location,
403
427
  job_id=job_id,
404
428
  nowait=True,
@@ -409,7 +433,8 @@ class BigQueryValueCheckOperator(
409
433
  super().execute(context=context)
410
434
  else:
411
435
  hook = BigQueryHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
412
-
436
+ if self.project_id is None:
437
+ self.project_id = hook.project_id
413
438
  job = self._submit_job(hook, job_id="")
414
439
  context["ti"].xcom_push(key="job_id", value=job.job_id)
415
440
  if job.running():
@@ -418,7 +443,7 @@ class BigQueryValueCheckOperator(
418
443
  trigger=BigQueryValueCheckTrigger(
419
444
  conn_id=self.gcp_conn_id,
420
445
  job_id=job.job_id,
421
- project_id=hook.project_id,
446
+ project_id=self.project_id,
422
447
  location=self.location or hook.location,
423
448
  sql=self.sql,
424
449
  pass_value=self.pass_value,
@@ -575,6 +600,9 @@ class BigQueryIntervalCheckOperator(
575
600
  hook = BigQueryHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
576
601
  self.log.info("Using ratio formula: %s", self.ratio_formula)
577
602
 
603
+ if self.project_id is None:
604
+ self.project_id = hook.project_id
605
+
578
606
  self.log.info("Executing SQL check: %s", self.sql1)
579
607
  job_1 = self._submit_job(hook, sql=self.sql1, job_id="")
580
608
  context["ti"].xcom_push(key="job_id", value=job_1.job_id)
@@ -587,7 +615,7 @@ class BigQueryIntervalCheckOperator(
587
615
  conn_id=self.gcp_conn_id,
588
616
  first_job_id=job_1.job_id,
589
617
  second_job_id=job_2.job_id,
590
- project_id=hook.project_id,
618
+ project_id=self.project_id,
591
619
  table=self.table,
592
620
  location=self.location or hook.location,
593
621
  metrics_thresholds=self.metrics_thresholds,
@@ -654,6 +682,7 @@ class BigQueryColumnCheckOperator(
654
682
  Service Account Token Creator IAM role to the directly preceding identity, with first
655
683
  account from the list granting this role to the originating account (templated).
656
684
  :param labels: a dictionary containing labels for the table, passed to BigQuery
685
+ :param project_id: Google Cloud Project where the job is running
657
686
  """
658
687
 
659
688
  template_fields: Sequence[str] = tuple(set(SQLColumnCheckOperator.template_fields) | {"gcp_conn_id"})
@@ -670,6 +699,7 @@ class BigQueryColumnCheckOperator(
670
699
  accept_none: bool = True,
671
700
  encryption_configuration: dict | None = None,
672
701
  gcp_conn_id: str = "google_cloud_default",
702
+ project_id: str = PROVIDE_PROJECT_ID,
673
703
  use_legacy_sql: bool = True,
674
704
  location: str | None = None,
675
705
  impersonation_chain: str | Sequence[str] | None = None,
@@ -695,6 +725,7 @@ class BigQueryColumnCheckOperator(
695
725
  self.location = location
696
726
  self.impersonation_chain = impersonation_chain
697
727
  self.labels = labels
728
+ self.project_id = project_id
698
729
 
699
730
  def _submit_job(
700
731
  self,
@@ -706,7 +737,7 @@ class BigQueryColumnCheckOperator(
706
737
  self.include_encryption_configuration(configuration, "query")
707
738
  return hook.insert_job(
708
739
  configuration=configuration,
709
- project_id=hook.project_id,
740
+ project_id=self.project_id,
710
741
  location=self.location,
711
742
  job_id=job_id,
712
743
  nowait=False,
@@ -715,6 +746,9 @@ class BigQueryColumnCheckOperator(
715
746
  def execute(self, context=None):
716
747
  """Perform checks on the given columns."""
717
748
  hook = self.get_db_hook()
749
+
750
+ if self.project_id is None:
751
+ self.project_id = hook.project_id
718
752
  failed_tests = []
719
753
 
720
754
  job = self._submit_job(hook, job_id="")
@@ -786,6 +820,7 @@ class BigQueryTableCheckOperator(
786
820
  account from the list granting this role to the originating account (templated).
787
821
  :param labels: a dictionary containing labels for the table, passed to BigQuery
788
822
  :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
823
+ :param project_id: Google Cloud Project where the job is running
789
824
 
790
825
  .. code-block:: python
791
826
 
@@ -805,6 +840,7 @@ class BigQueryTableCheckOperator(
805
840
  checks: dict,
806
841
  partition_clause: str | None = None,
807
842
  gcp_conn_id: str = "google_cloud_default",
843
+ project_id: str = PROVIDE_PROJECT_ID,
808
844
  use_legacy_sql: bool = True,
809
845
  location: str | None = None,
810
846
  impersonation_chain: str | Sequence[str] | None = None,
@@ -819,6 +855,7 @@ class BigQueryTableCheckOperator(
819
855
  self.impersonation_chain = impersonation_chain
820
856
  self.labels = labels
821
857
  self.encryption_configuration = encryption_configuration
858
+ self.project_id = project_id
822
859
 
823
860
  def _submit_job(
824
861
  self,
@@ -832,7 +869,7 @@ class BigQueryTableCheckOperator(
832
869
 
833
870
  return hook.insert_job(
834
871
  configuration=configuration,
835
- project_id=hook.project_id,
872
+ project_id=self.project_id,
836
873
  location=self.location,
837
874
  job_id=job_id,
838
875
  nowait=False,
@@ -841,6 +878,8 @@ class BigQueryTableCheckOperator(
841
878
  def execute(self, context=None):
842
879
  """Execute the given checks on the table."""
843
880
  hook = self.get_db_hook()
881
+ if self.project_id is None:
882
+ self.project_id = hook.project_id
844
883
  job = self._submit_job(hook, job_id="")
845
884
  context["ti"].xcom_push(key="job_id", value=job.job_id)
846
885
  records = job.result().to_dataframe()
@@ -27,6 +27,7 @@ from google.cloud.run_v2 import Job, Service
27
27
  from airflow.configuration import conf
28
28
  from airflow.exceptions import AirflowException
29
29
  from airflow.providers.google.cloud.hooks.cloud_run import CloudRunHook, CloudRunServiceHook
30
+ from airflow.providers.google.cloud.links.cloud_run import CloudRunJobLoggingLink
30
31
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
31
32
  from airflow.providers.google.cloud.triggers.cloud_run import CloudRunJobFinishedTrigger, RunJobStatus
32
33
 
@@ -248,7 +249,7 @@ class CloudRunExecuteJobOperator(GoogleCloudBaseOperator):
248
249
 
249
250
  :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
250
251
  :param region: Required. The ID of the Google Cloud region that the service belongs to.
251
- :param job_name: Required. The name of the job to update.
252
+ :param job_name: Required. The name of the job to execute.
252
253
  :param overrides: Optional map of override values.
253
254
  :param gcp_conn_id: The connection ID used to connect to Google Cloud.
254
255
  :param polling_period_seconds: Optional. Control the rate of the poll for the result of deferrable run.
@@ -265,6 +266,7 @@ class CloudRunExecuteJobOperator(GoogleCloudBaseOperator):
265
266
  :param deferrable: Run the operator in deferrable mode.
266
267
  """
267
268
 
269
+ operator_extra_links = (CloudRunJobLoggingLink(),)
268
270
  template_fields = (
269
271
  "project_id",
270
272
  "region",
@@ -312,6 +314,13 @@ class CloudRunExecuteJobOperator(GoogleCloudBaseOperator):
312
314
  if self.operation is None:
313
315
  raise AirflowException("Operation is None")
314
316
 
317
+ if self.operation.metadata.log_uri:
318
+ CloudRunJobLoggingLink.persist(
319
+ context=context,
320
+ task_instance=self,
321
+ log_uri=self.operation.metadata.log_uri,
322
+ )
323
+
315
324
  if not self.deferrable:
316
325
  result: Execution = self._wait_for_operation(self.operation)
317
326
  self._fail_if_execution_failed(result)
@@ -204,6 +204,7 @@ class GCSListObjectsOperator(GoogleCloudBaseOperator):
204
204
  "bucket",
205
205
  "prefix",
206
206
  "delimiter",
207
+ "match_glob",
207
208
  "impersonation_chain",
208
209
  )
209
210
 
@@ -57,7 +57,6 @@ from airflow.providers.google.cloud.triggers.kubernetes_engine import (
57
57
  GKEOperationTrigger,
58
58
  GKEStartPodTrigger,
59
59
  )
60
- from airflow.providers.google.common.deprecated import deprecated
61
60
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
62
61
  from airflow.providers_manager import ProvidersManager
63
62
  from airflow.utils.timezone import utcnow
@@ -222,7 +221,6 @@ class GKEDeleteClusterOperator(GKEOperatorMixin, GoogleCloudBaseOperator):
222
221
  If set as a sequence, the identities from the list must grant
223
222
  Service Account Token Creator IAM role to the directly preceding identity, with first
224
223
  account from the list granting this role to the originating account (templated).
225
- :param name: (Deprecated) The name of the resource to delete, in this case cluster name
226
224
  :param api_version: The api version to use
227
225
  :param deferrable: Run operator in the deferrable mode.
228
226
  :param poll_interval: Interval size which defines how often operation status is checked.
@@ -241,7 +239,6 @@ class GKEDeleteClusterOperator(GKEOperatorMixin, GoogleCloudBaseOperator):
241
239
  gcp_conn_id: str = "google_cloud_default",
242
240
  impersonation_chain: str | Sequence[str] | None = None,
243
241
  cluster_name: str | None = None,
244
- name: str | None = None,
245
242
  api_version: str = "v2",
246
243
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
247
244
  poll_interval: int = 10,
@@ -251,36 +248,17 @@ class GKEDeleteClusterOperator(GKEOperatorMixin, GoogleCloudBaseOperator):
251
248
  super().__init__(*args, **kwargs)
252
249
 
253
250
  self.location = location
254
- self.cluster_name = cluster_name or name
251
+ self.cluster_name = cluster_name
255
252
  self.use_internal_ip = use_internal_ip
256
253
  self.use_dns_endpoint = use_dns_endpoint
257
254
  self.project_id = project_id
258
255
  self.gcp_conn_id = gcp_conn_id
259
256
  self.impersonation_chain = impersonation_chain
260
- self._name = name
261
257
  self.api_version = api_version
262
258
  self.deferrable = deferrable
263
259
  self.poll_interval = poll_interval
264
260
  self._check_input()
265
261
 
266
- @property
267
- @deprecated(
268
- planned_removal_date="May 01, 2025",
269
- use_instead="cluster_name",
270
- category=AirflowProviderDeprecationWarning,
271
- )
272
- def name(self) -> str | None:
273
- return self._name
274
-
275
- @name.setter
276
- @deprecated(
277
- planned_removal_date="May 01, 2025",
278
- use_instead="cluster_name",
279
- category=AirflowProviderDeprecationWarning,
280
- )
281
- def name(self, name: str) -> None:
282
- self._name = name
283
-
284
262
  def _check_input(self) -> None:
285
263
  if not all([self.project_id, self.cluster_name, self.location]):
286
264
  self.log.error("One of (project_id, cluster_name, location) is missing or incorrect")
@@ -622,16 +600,10 @@ class GKEStartPodOperator(GKEOperatorMixin, KubernetesPodOperator):
622
600
  If set as a sequence, the identities from the list must grant
623
601
  Service Account Token Creator IAM role to the directly preceding identity, with first
624
602
  account from the list granting this role to the originating account (templated).
625
- :param regional: (Deprecated) The location param is region name.
626
603
  :param on_finish_action: What to do when the pod reaches its final state, or the execution is interrupted.
627
604
  If "delete_pod", the pod will be deleted regardless its state; if "delete_succeeded_pod",
628
605
  only succeeded pod will be deleted. You can set to "keep_pod" to keep the pod.
629
606
  Current default is `keep_pod`, but this will be changed in the next major release of this provider.
630
- :param is_delete_operator_pod: (Deprecated) What to do when the pod reaches its final
631
- state, or the execution is interrupted. If True, delete the
632
- pod; if False, leave the pod. Current default is False, but this will be
633
- changed in the next major release of this provider.
634
- Deprecated - use `on_finish_action` instead.
635
607
  :param deferrable: Run operator in the deferrable mode.
636
608
  """
637
609
 
@@ -651,30 +623,15 @@ class GKEStartPodOperator(GKEOperatorMixin, KubernetesPodOperator):
651
623
  project_id: str = PROVIDE_PROJECT_ID,
652
624
  gcp_conn_id: str = "google_cloud_default",
653
625
  impersonation_chain: str | Sequence[str] | None = None,
654
- regional: bool | None = None,
655
626
  on_finish_action: str | None = None,
656
- is_delete_operator_pod: bool | None = None,
657
627
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
658
628
  *args,
659
629
  **kwargs,
660
630
  ) -> None:
661
- if is_delete_operator_pod is not None:
662
- kwargs["on_finish_action"] = (
663
- OnFinishAction.DELETE_POD if is_delete_operator_pod else OnFinishAction.KEEP_POD
664
- )
665
- elif on_finish_action is not None:
631
+ if on_finish_action is not None:
666
632
  kwargs["on_finish_action"] = OnFinishAction(on_finish_action)
667
633
  else:
668
- warnings.warn(
669
- f"You have not set parameter `on_finish_action` in class {self.__class__.__name__}. "
670
- "Currently the default for this parameter is `keep_pod` but in a future release"
671
- " the default will be changed to `delete_pod`. To ensure pods are not deleted in"
672
- " the future you will need to set `on_finish_action=keep_pod` explicitly.",
673
- AirflowProviderDeprecationWarning,
674
- stacklevel=2,
675
- )
676
- kwargs["on_finish_action"] = OnFinishAction.KEEP_POD
677
-
634
+ kwargs["on_finish_action"] = OnFinishAction.DELETE_POD
678
635
  super().__init__(*args, **kwargs)
679
636
  self.project_id = project_id
680
637
  self.location = location
@@ -683,9 +640,6 @@ class GKEStartPodOperator(GKEOperatorMixin, KubernetesPodOperator):
683
640
  self.use_internal_ip = use_internal_ip
684
641
  self.use_dns_endpoint = use_dns_endpoint
685
642
  self.impersonation_chain = impersonation_chain
686
- self._regional = regional
687
- if is_delete_operator_pod is not None:
688
- self.is_delete_operator_pod = is_delete_operator_pod
689
643
  self.deferrable = deferrable
690
644
 
691
645
  # There is no need to manage the kube_config file, as it will be generated automatically.
@@ -693,42 +647,6 @@ class GKEStartPodOperator(GKEOperatorMixin, KubernetesPodOperator):
693
647
  if self.config_file:
694
648
  raise AirflowException("config_file is not an allowed parameter for the GKEStartPodOperator.")
695
649
 
696
- @property
697
- @deprecated(
698
- planned_removal_date="May 01, 2025",
699
- use_instead="on_finish_action",
700
- category=AirflowProviderDeprecationWarning,
701
- )
702
- def is_delete_operator_pod(self) -> bool | None:
703
- return self._is_delete_operator_pod
704
-
705
- @is_delete_operator_pod.setter
706
- @deprecated(
707
- planned_removal_date="May 01, 2025",
708
- use_instead="on_finish_action",
709
- category=AirflowProviderDeprecationWarning,
710
- )
711
- def is_delete_operator_pod(self, is_delete_operator_pod) -> None:
712
- self._is_delete_operator_pod = is_delete_operator_pod
713
-
714
- @property
715
- @deprecated(
716
- planned_removal_date="May 01, 2025",
717
- reason="The parameter is not in actual use.",
718
- category=AirflowProviderDeprecationWarning,
719
- )
720
- def regional(self) -> bool | None:
721
- return self._regional
722
-
723
- @regional.setter
724
- @deprecated(
725
- planned_removal_date="May 01, 2025",
726
- reason="The parameter is not in actual use.",
727
- category=AirflowProviderDeprecationWarning,
728
- )
729
- def regional(self, regional) -> None:
730
- self._regional = regional
731
-
732
650
  def invoke_defer_method(self, last_log_time: DateTime | None = None):
733
651
  """Redefine triggers which are being used in child classes."""
734
652
  trigger_start_time = utcnow()
@@ -40,6 +40,7 @@ from google.cloud.pubsub_v1.types import (
40
40
  SchemaSettings,
41
41
  )
42
42
 
43
+ from airflow.configuration import conf
43
44
  from airflow.exceptions import AirflowException
44
45
  from airflow.providers.google.cloud.hooks.pubsub import PubSubHook
45
46
  from airflow.providers.google.cloud.links.pubsub import PubSubSubscriptionLink, PubSubTopicLink
@@ -770,7 +771,7 @@ class PubSubPullOperator(GoogleCloudBaseOperator):
770
771
  messages_callback: Callable[[list[ReceivedMessage], Context], Any] | None = None,
771
772
  gcp_conn_id: str = "google_cloud_default",
772
773
  impersonation_chain: str | Sequence[str] | None = None,
773
- deferrable: bool = False,
774
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
774
775
  poll_interval: int = 300,
775
776
  **kwargs,
776
777
  ) -> None:
@@ -22,105 +22,13 @@ from __future__ import annotations
22
22
  from collections.abc import Sequence
23
23
  from typing import TYPE_CHECKING
24
24
 
25
- from airflow.exceptions import AirflowProviderDeprecationWarning
26
25
  from airflow.providers.google.cloud.hooks.vertex_ai.generative_model import GenerativeModelHook
27
26
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
28
- from airflow.providers.google.common.deprecated import deprecated
29
27
 
30
28
  if TYPE_CHECKING:
31
29
  from airflow.utils.context import Context
32
30
 
33
31
 
34
- @deprecated(
35
- planned_removal_date="April 09, 2025",
36
- use_instead="GenerativeModelGenerateContentOperator",
37
- category=AirflowProviderDeprecationWarning,
38
- )
39
- class TextGenerationModelPredictOperator(GoogleCloudBaseOperator):
40
- """
41
- Uses the Vertex AI PaLM API to generate natural language text.
42
-
43
- :param project_id: Required. The ID of the Google Cloud project that the
44
- service belongs to (templated).
45
- :param location: Required. The ID of the Google Cloud location that the
46
- service belongs to (templated).
47
- :param prompt: Required. Inputs or queries that a user or a program gives
48
- to the Vertex AI PaLM API, in order to elicit a specific response (templated).
49
- :param pretrained_model: By default uses the pre-trained model `text-bison`,
50
- optimized for performing natural language tasks such as classification,
51
- summarization, extraction, content creation, and ideation.
52
- :param temperature: Temperature controls the degree of randomness in token
53
- selection. Defaults to 0.0.
54
- :param max_output_tokens: Token limit determines the maximum amount of text
55
- output. Defaults to 256.
56
- :param top_p: Tokens are selected from most probable to least until the sum
57
- of their probabilities equals the top_p value. Defaults to 0.8.
58
- :param top_k: A top_k of 1 means the selected token is the most probable
59
- among all tokens. Defaults to 0.4.
60
- :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
61
- :param impersonation_chain: Optional service account to impersonate using short-term
62
- credentials, or chained list of accounts required to get the access_token
63
- of the last account in the list, which will be impersonated in the request.
64
- If set as a string, the account must grant the originating account
65
- the Service Account Token Creator IAM role.
66
- If set as a sequence, the identities from the list must grant
67
- Service Account Token Creator IAM role to the directly preceding identity, with first
68
- account from the list granting this role to the originating account (templated).
69
- """
70
-
71
- template_fields = ("location", "project_id", "impersonation_chain", "prompt")
72
-
73
- def __init__(
74
- self,
75
- *,
76
- project_id: str,
77
- location: str,
78
- prompt: str,
79
- pretrained_model: str = "text-bison",
80
- temperature: float = 0.0,
81
- max_output_tokens: int = 256,
82
- top_p: float = 0.8,
83
- top_k: int = 40,
84
- gcp_conn_id: str = "google_cloud_default",
85
- impersonation_chain: str | Sequence[str] | None = None,
86
- **kwargs,
87
- ) -> None:
88
- super().__init__(**kwargs)
89
- self.project_id = project_id
90
- self.location = location
91
- self.prompt = prompt
92
- self.pretrained_model = pretrained_model
93
- self.temperature = temperature
94
- self.max_output_tokens = max_output_tokens
95
- self.top_p = top_p
96
- self.top_k = top_k
97
- self.gcp_conn_id = gcp_conn_id
98
- self.impersonation_chain = impersonation_chain
99
-
100
- def execute(self, context: Context):
101
- self.hook = GenerativeModelHook(
102
- gcp_conn_id=self.gcp_conn_id,
103
- impersonation_chain=self.impersonation_chain,
104
- )
105
-
106
- self.log.info("Submitting prompt")
107
- response = self.hook.text_generation_model_predict(
108
- project_id=self.project_id,
109
- location=self.location,
110
- prompt=self.prompt,
111
- pretrained_model=self.pretrained_model,
112
- temperature=self.temperature,
113
- max_output_tokens=self.max_output_tokens,
114
- top_p=self.top_p,
115
- top_k=self.top_k,
116
- )
117
-
118
- self.log.info("Model response: %s", response)
119
- self.xcom_push(context, key="model_response", value=response)
120
-
121
- return response
122
-
123
-
124
32
  class TextEmbeddingModelGetEmbeddingsOperator(GoogleCloudBaseOperator):
125
33
  """
126
34
  Uses the Vertex AI Embeddings API to generate embeddings based on prompt.
@@ -112,6 +112,10 @@ class RunPipelineJobOperator(GoogleCloudBaseOperator):
112
112
  "project_id",
113
113
  "input_artifacts",
114
114
  "impersonation_chain",
115
+ "template_path",
116
+ "pipeline_root",
117
+ "parameter_values",
118
+ "service_account",
115
119
  ]
116
120
  operator_extra_links = (VertexAIPipelineJobLink(),)
117
121
 
@@ -25,6 +25,7 @@ from typing import TYPE_CHECKING
25
25
  from airflow.models import BaseOperator
26
26
  from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook
27
27
  from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
28
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
28
29
 
29
30
  if TYPE_CHECKING:
30
31
  from airflow.utils.context import Context
@@ -73,6 +74,7 @@ class BigQueryToBigQueryOperator(BaseOperator):
73
74
  If set as a sequence, the identities from the list must grant
74
75
  Service Account Token Creator IAM role to the directly preceding identity, with first
75
76
  account from the list granting this role to the originating account (templated).
77
+ :param project_id: Google Cloud Project where the job is running
76
78
  """
77
79
 
78
80
  template_fields: Sequence[str] = (
@@ -93,6 +95,7 @@ class BigQueryToBigQueryOperator(BaseOperator):
93
95
  write_disposition: str = "WRITE_EMPTY",
94
96
  create_disposition: str = "CREATE_IF_NEEDED",
95
97
  gcp_conn_id: str = "google_cloud_default",
98
+ project_id: str = PROVIDE_PROJECT_ID,
96
99
  labels: dict | None = None,
97
100
  encryption_configuration: dict | None = None,
98
101
  location: str | None = None,
@@ -112,6 +115,7 @@ class BigQueryToBigQueryOperator(BaseOperator):
112
115
  self.impersonation_chain = impersonation_chain
113
116
  self.hook: BigQueryHook | None = None
114
117
  self._job_conf: dict = {}
118
+ self.project_id = project_id
115
119
 
116
120
  def _prepare_job_configuration(self):
117
121
  self.source_project_dataset_tables = (
@@ -124,7 +128,7 @@ class BigQueryToBigQueryOperator(BaseOperator):
124
128
  for source_project_dataset_table in self.source_project_dataset_tables:
125
129
  source_project, source_dataset, source_table = self.hook.split_tablename(
126
130
  table_input=source_project_dataset_table,
127
- default_project_id=self.hook.project_id,
131
+ default_project_id=self.project_id,
128
132
  var_name="source_project_dataset_table",
129
133
  )
130
134
  source_project_dataset_tables_fixup.append(
@@ -133,7 +137,7 @@ class BigQueryToBigQueryOperator(BaseOperator):
133
137
 
134
138
  destination_project, destination_dataset, destination_table = self.hook.split_tablename(
135
139
  table_input=self.destination_project_dataset_table,
136
- default_project_id=self.hook.project_id,
140
+ default_project_id=self.project_id,
137
141
  )
138
142
  configuration = {
139
143
  "copy": {
@@ -168,12 +172,12 @@ class BigQueryToBigQueryOperator(BaseOperator):
168
172
  impersonation_chain=self.impersonation_chain,
169
173
  )
170
174
 
171
- if not self.hook.project_id:
172
- raise ValueError("The project_id should be set")
175
+ if not self.project_id:
176
+ self.project_id = self.hook.project_id
173
177
 
174
178
  configuration = self._prepare_job_configuration()
175
179
  self._job_conf = self.hook.insert_job(
176
- configuration=configuration, project_id=self.hook.project_id
180
+ configuration=configuration, project_id=self.project_id
177
181
  ).to_api_repr()
178
182
 
179
183
  dest_table_info = self._job_conf["configuration"]["copy"]["destinationTable"]
@@ -208,7 +208,7 @@ class FacebookAdsReportToGcsOperator(BaseOperator):
208
208
 
209
209
  def _flush_rows(self, converted_rows: list[Any] | None, object_name: str):
210
210
  if converted_rows:
211
- headers = converted_rows[0].keys()
211
+ headers = self.fields
212
212
  with tempfile.NamedTemporaryFile("w", suffix=".csv") as csvfile:
213
213
  writer = csv.DictWriter(csvfile, fieldnames=headers)
214
214
  writer.writeheader()