apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. airflow/providers/google/__init__.py +5 -8
  2. airflow/providers/google/cloud/hooks/automl.py +35 -1
  3. airflow/providers/google/cloud/hooks/bigquery.py +126 -41
  4. airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
  5. airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
  6. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
  7. airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
  8. airflow/providers/google/cloud/hooks/dataflow.py +246 -32
  9. airflow/providers/google/cloud/hooks/dataplex.py +6 -2
  10. airflow/providers/google/cloud/hooks/dlp.py +14 -14
  11. airflow/providers/google/cloud/hooks/gcs.py +6 -2
  12. airflow/providers/google/cloud/hooks/gdm.py +2 -2
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
  14. airflow/providers/google/cloud/hooks/mlengine.py +8 -4
  15. airflow/providers/google/cloud/hooks/pubsub.py +1 -1
  16. airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
  17. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
  18. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +91 -0
  19. airflow/providers/google/cloud/links/vertex_ai.py +2 -1
  20. airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
  21. airflow/providers/google/cloud/operators/automl.py +243 -37
  22. airflow/providers/google/cloud/operators/bigquery.py +164 -62
  23. airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
  24. airflow/providers/google/cloud/operators/bigtable.py +7 -6
  25. airflow/providers/google/cloud/operators/cloud_build.py +12 -11
  26. airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
  27. airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
  28. airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
  29. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
  30. airflow/providers/google/cloud/operators/compute.py +12 -11
  31. airflow/providers/google/cloud/operators/datacatalog.py +21 -20
  32. airflow/providers/google/cloud/operators/dataflow.py +59 -42
  33. airflow/providers/google/cloud/operators/datafusion.py +11 -10
  34. airflow/providers/google/cloud/operators/datapipeline.py +3 -2
  35. airflow/providers/google/cloud/operators/dataprep.py +5 -4
  36. airflow/providers/google/cloud/operators/dataproc.py +20 -17
  37. airflow/providers/google/cloud/operators/datastore.py +8 -7
  38. airflow/providers/google/cloud/operators/dlp.py +31 -30
  39. airflow/providers/google/cloud/operators/functions.py +4 -3
  40. airflow/providers/google/cloud/operators/gcs.py +66 -41
  41. airflow/providers/google/cloud/operators/kubernetes_engine.py +256 -49
  42. airflow/providers/google/cloud/operators/life_sciences.py +2 -1
  43. airflow/providers/google/cloud/operators/mlengine.py +11 -10
  44. airflow/providers/google/cloud/operators/pubsub.py +6 -5
  45. airflow/providers/google/cloud/operators/spanner.py +7 -6
  46. airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
  47. airflow/providers/google/cloud/operators/stackdriver.py +11 -10
  48. airflow/providers/google/cloud/operators/tasks.py +14 -13
  49. airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
  50. airflow/providers/google/cloud/operators/translate_speech.py +2 -1
  51. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
  52. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
  53. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
  54. airflow/providers/google/cloud/operators/vision.py +13 -12
  55. airflow/providers/google/cloud/operators/workflows.py +12 -14
  56. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  57. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
  58. airflow/providers/google/cloud/sensors/bigtable.py +2 -1
  59. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
  60. airflow/providers/google/cloud/sensors/dataflow.py +239 -52
  61. airflow/providers/google/cloud/sensors/datafusion.py +2 -1
  62. airflow/providers/google/cloud/sensors/dataproc.py +3 -2
  63. airflow/providers/google/cloud/sensors/gcs.py +14 -12
  64. airflow/providers/google/cloud/sensors/tasks.py +2 -1
  65. airflow/providers/google/cloud/sensors/workflows.py +2 -1
  66. airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
  67. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
  68. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
  69. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  70. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
  71. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
  72. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
  73. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
  74. airflow/providers/google/cloud/triggers/bigquery.py +75 -6
  75. airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
  76. airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
  77. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
  78. airflow/providers/google/cloud/triggers/dataflow.py +504 -4
  79. airflow/providers/google/cloud/triggers/dataproc.py +190 -27
  80. airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -3
  81. airflow/providers/google/cloud/triggers/mlengine.py +2 -1
  82. airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
  83. airflow/providers/google/common/hooks/base_google.py +45 -7
  84. airflow/providers/google/firebase/hooks/firestore.py +2 -2
  85. airflow/providers/google/firebase/operators/firestore.py +2 -1
  86. airflow/providers/google/get_provider_info.py +5 -3
  87. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/METADATA +18 -18
  88. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/RECORD +90 -90
  89. airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
  90. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/WHEEL +0 -0
  91. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/entry_points.txt +0 -0
@@ -41,6 +41,8 @@ from airflow.providers.google.cloud.hooks.gcs import GCSHook
41
41
  from airflow.providers.google.cloud.links.dataflow import DataflowJobLink
42
42
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
43
43
  from airflow.providers.google.cloud.triggers.dataflow import TemplateJobStartTrigger
44
+ from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME
45
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
44
46
  from airflow.version import version
45
47
 
46
48
  if TYPE_CHECKING:
@@ -142,7 +144,7 @@ class DataflowConfiguration:
142
144
  *,
143
145
  job_name: str = "{{task.task_id}}",
144
146
  append_job_name: bool = True,
145
- project_id: str | None = None,
147
+ project_id: str = PROVIDE_PROJECT_ID,
146
148
  location: str | None = DEFAULT_DATAFLOW_LOCATION,
147
149
  gcp_conn_id: str = "google_cloud_default",
148
150
  poll_sleep: int = 10,
@@ -348,7 +350,7 @@ class DataflowCreateJavaJobOperator(GoogleCloudBaseOperator):
348
350
  job_name: str = "{{task.task_id}}",
349
351
  dataflow_default_options: dict | None = None,
350
352
  options: dict | None = None,
351
- project_id: str | None = None,
353
+ project_id: str = PROVIDE_PROJECT_ID,
352
354
  location: str = DEFAULT_DATAFLOW_LOCATION,
353
355
  gcp_conn_id: str = "google_cloud_default",
354
356
  poll_sleep: int = 10,
@@ -459,7 +461,7 @@ class DataflowCreateJavaJobOperator(GoogleCloudBaseOperator):
459
461
 
460
462
  class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
461
463
  """
462
- Start a Templated Cloud Dataflow job; the parameters of the operation will be passed to the job.
464
+ Start a Dataflow job with a classic template; the parameters of the operation will be passed to the job.
463
465
 
464
466
  .. seealso::
465
467
  For more information on how to use this operator, take a look at the guide:
@@ -606,7 +608,7 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
606
608
  self,
607
609
  *,
608
610
  template: str,
609
- project_id: str | None = None,
611
+ project_id: str = PROVIDE_PROJECT_ID,
610
612
  job_name: str = "{{task.task_id}}",
611
613
  options: dict[str, Any] | None = None,
612
614
  dataflow_default_options: dict[str, Any] | None = None,
@@ -642,7 +644,7 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
642
644
  self.deferrable = deferrable
643
645
  self.expected_terminal_state = expected_terminal_state
644
646
 
645
- self.job: dict | None = None
647
+ self.job: dict[str, str] | None = None
646
648
 
647
649
  self._validate_deferrable_params()
648
650
 
@@ -680,29 +682,34 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
680
682
  if not self.location:
681
683
  self.location = DEFAULT_DATAFLOW_LOCATION
682
684
 
683
- self.job = self.hook.start_template_dataflow(
685
+ if not self.deferrable:
686
+ self.job = self.hook.start_template_dataflow(
687
+ job_name=self.job_name,
688
+ variables=options,
689
+ parameters=self.parameters,
690
+ dataflow_template=self.template,
691
+ on_new_job_callback=set_current_job,
692
+ project_id=self.project_id,
693
+ location=self.location,
694
+ environment=self.environment,
695
+ append_job_name=self.append_job_name,
696
+ )
697
+ job_id = self.hook.extract_job_id(self.job)
698
+ self.xcom_push(context, key="job_id", value=job_id)
699
+ return job_id
700
+
701
+ self.job = self.hook.launch_job_with_template(
684
702
  job_name=self.job_name,
685
703
  variables=options,
686
704
  parameters=self.parameters,
687
705
  dataflow_template=self.template,
688
- on_new_job_callback=set_current_job,
689
706
  project_id=self.project_id,
707
+ append_job_name=self.append_job_name,
690
708
  location=self.location,
691
709
  environment=self.environment,
692
- append_job_name=self.append_job_name,
693
710
  )
694
- job_id = self.job.get("id")
695
-
696
- if job_id is None:
697
- raise AirflowException(
698
- "While reading job object after template execution error occurred. Job object has no id."
699
- )
700
-
701
- if not self.deferrable:
702
- return job_id
703
-
704
- context["ti"].xcom_push(key="job_id", value=job_id)
705
-
711
+ job_id = self.hook.extract_job_id(self.job)
712
+ DataflowJobLink.persist(self, context, self.project_id, self.location, job_id)
706
713
  self.defer(
707
714
  trigger=TemplateJobStartTrigger(
708
715
  project_id=self.project_id,
@@ -713,16 +720,17 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
713
720
  impersonation_chain=self.impersonation_chain,
714
721
  cancel_timeout=self.cancel_timeout,
715
722
  ),
716
- method_name="execute_complete",
723
+ method_name=GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME,
717
724
  )
718
725
 
719
- def execute_complete(self, context: Context, event: dict[str, Any]):
726
+ def execute_complete(self, context: Context, event: dict[str, Any]) -> str:
720
727
  """Execute after trigger finishes its work."""
721
728
  if event["status"] in ("error", "stopped"):
722
729
  self.log.info("status: %s, msg: %s", event["status"], event["message"])
723
730
  raise AirflowException(event["message"])
724
731
 
725
732
  job_id = event["job_id"]
733
+ self.xcom_push(context, key="job_id", value=job_id)
726
734
  self.log.info("Task %s completed with response %s", self.task_id, event["message"])
727
735
  return job_id
728
736
 
@@ -740,7 +748,7 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
740
748
 
741
749
  class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
742
750
  """
743
- Starts flex templates with the Dataflow pipeline.
751
+ Starts a Dataflow Job with a Flex Template.
744
752
 
745
753
  .. seealso::
746
754
  For more information on how to use this operator, take a look at the guide:
@@ -802,6 +810,9 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
802
810
  :param expected_terminal_state: The expected final status of the operator on which the corresponding
803
811
  Airflow task succeeds. When not specified, it will be determined by the hook.
804
812
  :param append_job_name: True if unique suffix has to be appended to job name.
813
+ :param poll_sleep: The time in seconds to sleep between polling Google
814
+ Cloud Platform for the dataflow job status while the job is in the
815
+ JOB_STATE_RUNNING state.
805
816
  """
806
817
 
807
818
  template_fields: Sequence[str] = ("body", "location", "project_id", "gcp_conn_id")
@@ -811,7 +822,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
811
822
  self,
812
823
  body: dict,
813
824
  location: str,
814
- project_id: str | None = None,
825
+ project_id: str = PROVIDE_PROJECT_ID,
815
826
  gcp_conn_id: str = "google_cloud_default",
816
827
  drain_pipeline: bool = False,
817
828
  cancel_timeout: int | None = 10 * 60,
@@ -820,6 +831,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
820
831
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
821
832
  append_job_name: bool = True,
822
833
  expected_terminal_state: str | None = None,
834
+ poll_sleep: int = 10,
823
835
  *args,
824
836
  **kwargs,
825
837
  ) -> None:
@@ -831,11 +843,12 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
831
843
  self.drain_pipeline = drain_pipeline
832
844
  self.cancel_timeout = cancel_timeout
833
845
  self.wait_until_finished = wait_until_finished
834
- self.job: dict | None = None
846
+ self.job: dict[str, str] | None = None
835
847
  self.impersonation_chain = impersonation_chain
836
848
  self.deferrable = deferrable
837
849
  self.expected_terminal_state = expected_terminal_state
838
850
  self.append_job_name = append_job_name
851
+ self.poll_sleep = poll_sleep
839
852
 
840
853
  self._validate_deferrable_params()
841
854
 
@@ -870,32 +883,35 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
870
883
  self.job = current_job
871
884
  DataflowJobLink.persist(self, context, self.project_id, self.location, self.job.get("id"))
872
885
 
873
- self.job = self.hook.start_flex_template(
886
+ if not self.deferrable:
887
+ self.job = self.hook.start_flex_template(
888
+ body=self.body,
889
+ location=self.location,
890
+ project_id=self.project_id,
891
+ on_new_job_callback=set_current_job,
892
+ )
893
+ job_id = self.hook.extract_job_id(self.job)
894
+ self.xcom_push(context, key="job_id", value=job_id)
895
+ return self.job
896
+
897
+ self.job = self.hook.launch_job_with_flex_template(
874
898
  body=self.body,
875
899
  location=self.location,
876
900
  project_id=self.project_id,
877
- on_new_job_callback=set_current_job,
878
901
  )
879
-
880
- job_id = self.job.get("id")
881
- if job_id is None:
882
- raise AirflowException(
883
- "While reading job object after template execution error occurred. Job object has no id."
884
- )
885
-
886
- if not self.deferrable:
887
- return self.job
888
-
902
+ job_id = self.hook.extract_job_id(self.job)
903
+ DataflowJobLink.persist(self, context, self.project_id, self.location, job_id)
889
904
  self.defer(
890
905
  trigger=TemplateJobStartTrigger(
891
906
  project_id=self.project_id,
892
907
  job_id=job_id,
893
908
  location=self.location,
894
909
  gcp_conn_id=self.gcp_conn_id,
910
+ poll_sleep=self.poll_sleep,
895
911
  impersonation_chain=self.impersonation_chain,
896
912
  cancel_timeout=self.cancel_timeout,
897
913
  ),
898
- method_name="execute_complete",
914
+ method_name=GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME,
899
915
  )
900
916
 
901
917
  def _append_uuid_to_job_name(self):
@@ -906,7 +922,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
906
922
  job_body["jobName"] = job_name
907
923
  self.log.info("Job name was changed to %s", job_name)
908
924
 
909
- def execute_complete(self, context: Context, event: dict):
925
+ def execute_complete(self, context: Context, event: dict) -> dict[str, str]:
910
926
  """Execute after trigger finishes its work."""
911
927
  if event["status"] in ("error", "stopped"):
912
928
  self.log.info("status: %s, msg: %s", event["status"], event["message"])
@@ -914,6 +930,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
914
930
 
915
931
  job_id = event["job_id"]
916
932
  self.log.info("Task %s completed with response %s", job_id, event["message"])
933
+ self.xcom_push(context, key="job_id", value=job_id)
917
934
  job = self.hook.get_job(job_id=job_id, project_id=self.project_id, location=self.location)
918
935
  return job
919
936
 
@@ -982,7 +999,7 @@ class DataflowStartSqlJobOperator(GoogleCloudBaseOperator):
982
999
  query: str,
983
1000
  options: dict[str, Any],
984
1001
  location: str = DEFAULT_DATAFLOW_LOCATION,
985
- project_id: str | None = None,
1002
+ project_id: str = PROVIDE_PROJECT_ID,
986
1003
  gcp_conn_id: str = "google_cloud_default",
987
1004
  drain_pipeline: bool = False,
988
1005
  impersonation_chain: str | Sequence[str] | None = None,
@@ -1150,7 +1167,7 @@ class DataflowCreatePythonJobOperator(GoogleCloudBaseOperator):
1150
1167
  py_options: list[str] | None = None,
1151
1168
  py_requirements: list[str] | None = None,
1152
1169
  py_system_site_packages: bool = False,
1153
- project_id: str | None = None,
1170
+ project_id: str = PROVIDE_PROJECT_ID,
1154
1171
  location: str = DEFAULT_DATAFLOW_LOCATION,
1155
1172
  gcp_conn_id: str = "google_cloud_default",
1156
1173
  poll_sleep: int = 10,
@@ -1297,7 +1314,7 @@ class DataflowStopJobOperator(GoogleCloudBaseOperator):
1297
1314
  self,
1298
1315
  job_name_prefix: str | None = None,
1299
1316
  job_id: str | None = None,
1300
- project_id: str | None = None,
1317
+ project_id: str = PROVIDE_PROJECT_ID,
1301
1318
  location: str = DEFAULT_DATAFLOW_LOCATION,
1302
1319
  gcp_conn_id: str = "google_cloud_default",
1303
1320
  poll_sleep: int = 10,
@@ -37,6 +37,7 @@ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseO
37
37
  from airflow.providers.google.cloud.triggers.datafusion import DataFusionStartPipelineTrigger
38
38
  from airflow.providers.google.cloud.utils.datafusion import DataFusionPipelineType
39
39
  from airflow.providers.google.cloud.utils.helpers import resource_path_to_dict
40
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
40
41
 
41
42
  if TYPE_CHECKING:
42
43
  from airflow.utils.context import Context
@@ -97,7 +98,7 @@ class CloudDataFusionRestartInstanceOperator(GoogleCloudBaseOperator):
97
98
  *,
98
99
  instance_name: str,
99
100
  location: str,
100
- project_id: str | None = None,
101
+ project_id: str = PROVIDE_PROJECT_ID,
101
102
  api_version: str = "v1beta1",
102
103
  gcp_conn_id: str = "google_cloud_default",
103
104
  impersonation_chain: str | Sequence[str] | None = None,
@@ -169,7 +170,7 @@ class CloudDataFusionDeleteInstanceOperator(GoogleCloudBaseOperator):
169
170
  *,
170
171
  instance_name: str,
171
172
  location: str,
172
- project_id: str | None = None,
173
+ project_id: str = PROVIDE_PROJECT_ID,
173
174
  api_version: str = "v1beta1",
174
175
  gcp_conn_id: str = "google_cloud_default",
175
176
  impersonation_chain: str | Sequence[str] | None = None,
@@ -237,7 +238,7 @@ class CloudDataFusionCreateInstanceOperator(GoogleCloudBaseOperator):
237
238
  instance_name: str,
238
239
  instance: dict[str, Any],
239
240
  location: str,
240
- project_id: str | None = None,
241
+ project_id: str = PROVIDE_PROJECT_ID,
241
242
  api_version: str = "v1beta1",
242
243
  gcp_conn_id: str = "google_cloud_default",
243
244
  impersonation_chain: str | Sequence[str] | None = None,
@@ -340,7 +341,7 @@ class CloudDataFusionUpdateInstanceOperator(GoogleCloudBaseOperator):
340
341
  instance: dict[str, Any],
341
342
  update_mask: str,
342
343
  location: str,
343
- project_id: str | None = None,
344
+ project_id: str = PROVIDE_PROJECT_ID,
344
345
  api_version: str = "v1beta1",
345
346
  gcp_conn_id: str = "google_cloud_default",
346
347
  impersonation_chain: str | Sequence[str] | None = None,
@@ -417,7 +418,7 @@ class CloudDataFusionGetInstanceOperator(GoogleCloudBaseOperator):
417
418
  *,
418
419
  instance_name: str,
419
420
  location: str,
420
- project_id: str | None = None,
421
+ project_id: str = PROVIDE_PROJECT_ID,
421
422
  api_version: str = "v1beta1",
422
423
  gcp_conn_id: str = "google_cloud_default",
423
424
  impersonation_chain: str | Sequence[str] | None = None,
@@ -499,7 +500,7 @@ class CloudDataFusionCreatePipelineOperator(GoogleCloudBaseOperator):
499
500
  instance_name: str,
500
501
  location: str,
501
502
  namespace: str = "default",
502
- project_id: str | None = None,
503
+ project_id: str = PROVIDE_PROJECT_ID,
503
504
  api_version: str = "v1beta1",
504
505
  gcp_conn_id: str = "google_cloud_default",
505
506
  impersonation_chain: str | Sequence[str] | None = None,
@@ -587,7 +588,7 @@ class CloudDataFusionDeletePipelineOperator(GoogleCloudBaseOperator):
587
588
  location: str,
588
589
  version_id: str | None = None,
589
590
  namespace: str = "default",
590
- project_id: str | None = None,
591
+ project_id: str = PROVIDE_PROJECT_ID,
591
592
  api_version: str = "v1beta1",
592
593
  gcp_conn_id: str = "google_cloud_default",
593
594
  impersonation_chain: str | Sequence[str] | None = None,
@@ -670,7 +671,7 @@ class CloudDataFusionListPipelinesOperator(GoogleCloudBaseOperator):
670
671
  artifact_name: str | None = None,
671
672
  artifact_version: str | None = None,
672
673
  namespace: str = "default",
673
- project_id: str | None = None,
674
+ project_id: str = PROVIDE_PROJECT_ID,
674
675
  api_version: str = "v1beta1",
675
676
  gcp_conn_id: str = "google_cloud_default",
676
677
  impersonation_chain: str | Sequence[str] | None = None,
@@ -777,7 +778,7 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
777
778
  success_states: list[str] | None = None,
778
779
  namespace: str = "default",
779
780
  pipeline_timeout: int = 5 * 60,
780
- project_id: str | None = None,
781
+ project_id: str = PROVIDE_PROJECT_ID,
781
782
  api_version: str = "v1beta1",
782
783
  gcp_conn_id: str = "google_cloud_default",
783
784
  impersonation_chain: str | Sequence[str] | None = None,
@@ -929,7 +930,7 @@ class CloudDataFusionStopPipelineOperator(GoogleCloudBaseOperator):
929
930
  instance_name: str,
930
931
  location: str,
931
932
  namespace: str = "default",
932
- project_id: str | None = None,
933
+ project_id: str = PROVIDE_PROJECT_ID,
933
934
  api_version: str = "v1beta1",
934
935
  gcp_conn_id: str = "google_cloud_default",
935
936
  impersonation_chain: str | Sequence[str] | None = None,
@@ -24,6 +24,7 @@ from typing import TYPE_CHECKING, Sequence
24
24
  from airflow.exceptions import AirflowException
25
25
  from airflow.providers.google.cloud.hooks.datapipeline import DEFAULT_DATAPIPELINE_LOCATION, DataPipelineHook
26
26
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
27
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
27
28
 
28
29
  if TYPE_CHECKING:
29
30
  from airflow.utils.context import Context
@@ -58,7 +59,7 @@ class CreateDataPipelineOperator(GoogleCloudBaseOperator):
58
59
  self,
59
60
  *,
60
61
  body: dict,
61
- project_id: str | None = None,
62
+ project_id: str = PROVIDE_PROJECT_ID,
62
63
  location: str = DEFAULT_DATAPIPELINE_LOCATION,
63
64
  gcp_conn_id: str = "google_cloud_default",
64
65
  impersonation_chain: str | Sequence[str] | None = None,
@@ -120,7 +121,7 @@ class RunDataPipelineOperator(GoogleCloudBaseOperator):
120
121
  def __init__(
121
122
  self,
122
123
  data_pipeline_name: str,
123
- project_id: str | None = None,
124
+ project_id: str = PROVIDE_PROJECT_ID,
124
125
  location: str = DEFAULT_DATAPIPELINE_LOCATION,
125
126
  gcp_conn_id: str = "google_cloud_default",
126
127
  **kwargs,
@@ -24,6 +24,7 @@ from typing import TYPE_CHECKING, Sequence
24
24
  from airflow.providers.google.cloud.hooks.dataprep import GoogleDataprepHook
25
25
  from airflow.providers.google.cloud.links.dataprep import DataprepFlowLink, DataprepJobGroupLink
26
26
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
27
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
27
28
 
28
29
  if TYPE_CHECKING:
29
30
  from airflow.utils.context import Context
@@ -92,7 +93,7 @@ class DataprepGetJobGroupOperator(GoogleCloudBaseOperator):
92
93
  self,
93
94
  *,
94
95
  dataprep_conn_id: str = "dataprep_default",
95
- project_id: str | None = None,
96
+ project_id: str = PROVIDE_PROJECT_ID,
96
97
  job_group_id: int | str,
97
98
  embed: str,
98
99
  include_deleted: bool,
@@ -149,7 +150,7 @@ class DataprepRunJobGroupOperator(GoogleCloudBaseOperator):
149
150
  def __init__(
150
151
  self,
151
152
  *,
152
- project_id: str | None = None,
153
+ project_id: str = PROVIDE_PROJECT_ID,
153
154
  dataprep_conn_id: str = "dataprep_default",
154
155
  body_request: dict,
155
156
  **kwargs,
@@ -198,7 +199,7 @@ class DataprepCopyFlowOperator(GoogleCloudBaseOperator):
198
199
  def __init__(
199
200
  self,
200
201
  *,
201
- project_id: str | None = None,
202
+ project_id: str = PROVIDE_PROJECT_ID,
202
203
  dataprep_conn_id: str = "dataprep_default",
203
204
  flow_id: int | str,
204
205
  name: str = "",
@@ -280,7 +281,7 @@ class DataprepRunFlowOperator(GoogleCloudBaseOperator):
280
281
  def __init__(
281
282
  self,
282
283
  *,
283
- project_id: str | None = None,
284
+ project_id: str = PROVIDE_PROJECT_ID,
284
285
  flow_id: int | str,
285
286
  body_request: dict,
286
287
  dataprep_conn_id: str = "dataprep_default",
@@ -63,6 +63,7 @@ from airflow.providers.google.cloud.triggers.dataproc import (
63
63
  DataprocSubmitTrigger,
64
64
  )
65
65
  from airflow.providers.google.cloud.utils.dataproc import DataprocOperationType
66
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
66
67
  from airflow.utils import timezone
67
68
 
68
69
  if TYPE_CHECKING:
@@ -627,7 +628,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
627
628
  *,
628
629
  cluster_name: str,
629
630
  region: str,
630
- project_id: str | None = None,
631
+ project_id: str = PROVIDE_PROJECT_ID,
631
632
  cluster_config: dict | Cluster | None = None,
632
633
  virtual_cluster_config: dict | None = None,
633
634
  labels: dict | None = None,
@@ -815,6 +816,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
815
816
  gcp_conn_id=self.gcp_conn_id,
816
817
  impersonation_chain=self.impersonation_chain,
817
818
  polling_interval_seconds=self.polling_interval_seconds,
819
+ delete_on_error=self.delete_on_error,
818
820
  ),
819
821
  method_name="execute_complete",
820
822
  )
@@ -928,7 +930,7 @@ class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
928
930
  self,
929
931
  *,
930
932
  cluster_name: str,
931
- project_id: str | None = None,
933
+ project_id: str = PROVIDE_PROJECT_ID,
932
934
  region: str = "global",
933
935
  num_workers: int = 2,
934
936
  num_preemptible_workers: int = 0,
@@ -1047,7 +1049,7 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
1047
1049
  *,
1048
1050
  region: str,
1049
1051
  cluster_name: str,
1050
- project_id: str | None = None,
1052
+ project_id: str = PROVIDE_PROJECT_ID,
1051
1053
  cluster_uuid: str | None = None,
1052
1054
  request_id: str | None = None,
1053
1055
  retry: AsyncRetry | _MethodDefault = DEFAULT,
@@ -1173,7 +1175,7 @@ class _DataprocStartStopClusterBaseOperator(GoogleCloudBaseOperator):
1173
1175
  *,
1174
1176
  cluster_name: str,
1175
1177
  region: str,
1176
- project_id: str | None = None,
1178
+ project_id: str = PROVIDE_PROJECT_ID,
1177
1179
  cluster_uuid: str | None = None,
1178
1180
  request_id: str | None = None,
1179
1181
  retry: AsyncRetry | _MethodDefault = DEFAULT,
@@ -1372,7 +1374,7 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
1372
1374
  region: str,
1373
1375
  job_name: str = "{{task.task_id}}_{{ds_nodash}}",
1374
1376
  cluster_name: str = "cluster-1",
1375
- project_id: str | None = None,
1377
+ project_id: str = PROVIDE_PROJECT_ID,
1376
1378
  dataproc_properties: dict | None = None,
1377
1379
  dataproc_jars: list[str] | None = None,
1378
1380
  gcp_conn_id: str = "google_cloud_default",
@@ -2021,7 +2023,7 @@ class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
2021
2023
 
2022
2024
  @staticmethod
2023
2025
  def _generate_temp_filename(filename):
2024
- return f"{time:%Y%m%d%H%M%S}_{uuid.uuid4()!s:.8}_{ntpath.basename(filename)}"
2026
+ return f"{time.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4()!s:.8}_{ntpath.basename(filename)}"
2025
2027
 
2026
2028
  def _upload_file_temp(self, bucket, local_file):
2027
2029
  """Upload a local file to a Google Cloud Storage bucket."""
@@ -2135,7 +2137,7 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
2135
2137
  *,
2136
2138
  template: dict,
2137
2139
  region: str,
2138
- project_id: str | None = None,
2140
+ project_id: str = PROVIDE_PROJECT_ID,
2139
2141
  retry: Retry | _MethodDefault = DEFAULT,
2140
2142
  timeout: float | None = None,
2141
2143
  metadata: Sequence[tuple[str, str]] = (),
@@ -2229,7 +2231,7 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
2229
2231
  *,
2230
2232
  template_id: str,
2231
2233
  region: str,
2232
- project_id: str | None = None,
2234
+ project_id: str = PROVIDE_PROJECT_ID,
2233
2235
  version: int | None = None,
2234
2236
  request_id: str | None = None,
2235
2237
  parameters: dict[str, str] | None = None,
@@ -2376,7 +2378,7 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
2376
2378
  *,
2377
2379
  template: dict,
2378
2380
  region: str,
2379
- project_id: str | None = None,
2381
+ project_id: str = PROVIDE_PROJECT_ID,
2380
2382
  request_id: str | None = None,
2381
2383
  retry: AsyncRetry | _MethodDefault = DEFAULT,
2382
2384
  timeout: float | None = None,
@@ -2513,7 +2515,7 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
2513
2515
  *,
2514
2516
  job: dict,
2515
2517
  region: str,
2516
- project_id: str | None = None,
2518
+ project_id: str = PROVIDE_PROJECT_ID,
2517
2519
  request_id: str | None = None,
2518
2520
  retry: Retry | _MethodDefault = DEFAULT,
2519
2521
  timeout: float | None = None,
@@ -2590,6 +2592,7 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
2590
2592
  gcp_conn_id=self.gcp_conn_id,
2591
2593
  impersonation_chain=self.impersonation_chain,
2592
2594
  polling_interval_seconds=self.polling_interval_seconds,
2595
+ cancel_on_kill=self.cancel_on_kill,
2593
2596
  ),
2594
2597
  method_name="execute_complete",
2595
2598
  )
@@ -2682,7 +2685,7 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
2682
2685
  graceful_decommission_timeout: dict | Duration,
2683
2686
  region: str,
2684
2687
  request_id: str | None = None,
2685
- project_id: str | None = None,
2688
+ project_id: str = PROVIDE_PROJECT_ID,
2686
2689
  retry: AsyncRetry | _MethodDefault = DEFAULT,
2687
2690
  timeout: float | None = None,
2688
2691
  metadata: Sequence[tuple[str, str]] = (),
@@ -2816,7 +2819,7 @@ class DataprocDiagnoseClusterOperator(GoogleCloudBaseOperator):
2816
2819
  *,
2817
2820
  region: str,
2818
2821
  cluster_name: str,
2819
- project_id: str | None = None,
2822
+ project_id: str = PROVIDE_PROJECT_ID,
2820
2823
  tarball_gcs_dir: str | None = None,
2821
2824
  diagnosis_interval: dict | Interval | None = None,
2822
2825
  jobs: MutableSequence[str] | None = None,
@@ -2954,7 +2957,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2954
2957
  self,
2955
2958
  *,
2956
2959
  region: str | None = None,
2957
- project_id: str | None = None,
2960
+ project_id: str = PROVIDE_PROJECT_ID,
2958
2961
  batch: dict | Batch,
2959
2962
  batch_id: str,
2960
2963
  request_id: str | None = None,
@@ -3146,7 +3149,7 @@ class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
3146
3149
  *,
3147
3150
  batch_id: str,
3148
3151
  region: str,
3149
- project_id: str | None = None,
3152
+ project_id: str = PROVIDE_PROJECT_ID,
3150
3153
  retry: Retry | _MethodDefault = DEFAULT,
3151
3154
  timeout: float | None = None,
3152
3155
  metadata: Sequence[tuple[str, str]] = (),
@@ -3210,7 +3213,7 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
3210
3213
  *,
3211
3214
  batch_id: str,
3212
3215
  region: str,
3213
- project_id: str | None = None,
3216
+ project_id: str = PROVIDE_PROJECT_ID,
3214
3217
  retry: Retry | _MethodDefault = DEFAULT,
3215
3218
  timeout: float | None = None,
3216
3219
  metadata: Sequence[tuple[str, str]] = (),
@@ -3285,7 +3288,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
3285
3288
  self,
3286
3289
  *,
3287
3290
  region: str,
3288
- project_id: str | None = None,
3291
+ project_id: str = PROVIDE_PROJECT_ID,
3289
3292
  page_size: int | None = None,
3290
3293
  page_token: str | None = None,
3291
3294
  retry: Retry | _MethodDefault = DEFAULT,
@@ -3358,7 +3361,7 @@ class DataprocCancelOperationOperator(GoogleCloudBaseOperator):
3358
3361
  *,
3359
3362
  operation_name: str,
3360
3363
  region: str,
3361
- project_id: str | None = None,
3364
+ project_id: str = PROVIDE_PROJECT_ID,
3362
3365
  retry: Retry | _MethodDefault = DEFAULT,
3363
3366
  timeout: float | None = None,
3364
3367
  metadata: Sequence[tuple[str, str]] = (),
@@ -29,6 +29,7 @@ from airflow.providers.google.cloud.links.datastore import (
29
29
  CloudDatastoreImportExportLink,
30
30
  )
31
31
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
32
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
32
33
  from airflow.providers.google.common.links.storage import StorageLink
33
34
 
34
35
  if TYPE_CHECKING:
@@ -90,7 +91,7 @@ class CloudDatastoreExportEntitiesOperator(GoogleCloudBaseOperator):
90
91
  labels: dict | None = None,
91
92
  polling_interval_in_seconds: int = 10,
92
93
  overwrite_existing: bool = False,
93
- project_id: str | None = None,
94
+ project_id: str = PROVIDE_PROJECT_ID,
94
95
  impersonation_chain: str | Sequence[str] | None = None,
95
96
  **kwargs,
96
97
  ) -> None:
@@ -194,7 +195,7 @@ class CloudDatastoreImportEntitiesOperator(GoogleCloudBaseOperator):
194
195
  labels: dict | None = None,
195
196
  datastore_conn_id: str = "google_cloud_default",
196
197
  polling_interval_in_seconds: float = 10,
197
- project_id: str | None = None,
198
+ project_id: str = PROVIDE_PROJECT_ID,
198
199
  impersonation_chain: str | Sequence[str] | None = None,
199
200
  **kwargs,
200
201
  ) -> None:
@@ -268,7 +269,7 @@ class CloudDatastoreAllocateIdsOperator(GoogleCloudBaseOperator):
268
269
  self,
269
270
  *,
270
271
  partial_keys: list,
271
- project_id: str | None = None,
272
+ project_id: str = PROVIDE_PROJECT_ID,
272
273
  gcp_conn_id: str = "google_cloud_default",
273
274
  impersonation_chain: str | Sequence[str] | None = None,
274
275
  **kwargs,
@@ -326,7 +327,7 @@ class CloudDatastoreBeginTransactionOperator(GoogleCloudBaseOperator):
326
327
  self,
327
328
  *,
328
329
  transaction_options: dict[str, Any],
329
- project_id: str | None = None,
330
+ project_id: str = PROVIDE_PROJECT_ID,
330
331
  gcp_conn_id: str = "google_cloud_default",
331
332
  impersonation_chain: str | Sequence[str] | None = None,
332
333
  **kwargs,
@@ -384,7 +385,7 @@ class CloudDatastoreCommitOperator(GoogleCloudBaseOperator):
384
385
  self,
385
386
  *,
386
387
  body: dict[str, Any],
387
- project_id: str | None = None,
388
+ project_id: str = PROVIDE_PROJECT_ID,
388
389
  gcp_conn_id: str = "google_cloud_default",
389
390
  impersonation_chain: str | Sequence[str] | None = None,
390
391
  **kwargs,
@@ -442,7 +443,7 @@ class CloudDatastoreRollbackOperator(GoogleCloudBaseOperator):
442
443
  self,
443
444
  *,
444
445
  transaction: str,
445
- project_id: str | None = None,
446
+ project_id: str = PROVIDE_PROJECT_ID,
446
447
  gcp_conn_id: str = "google_cloud_default",
447
448
  impersonation_chain: str | Sequence[str] | None = None,
448
449
  **kwargs,
@@ -498,7 +499,7 @@ class CloudDatastoreRunQueryOperator(GoogleCloudBaseOperator):
498
499
  self,
499
500
  *,
500
501
  body: dict[str, Any],
501
- project_id: str | None = None,
502
+ project_id: str = PROVIDE_PROJECT_ID,
502
503
  gcp_conn_id: str = "google_cloud_default",
503
504
  impersonation_chain: str | Sequence[str] | None = None,
504
505
  **kwargs,