apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +5 -8
- airflow/providers/google/cloud/hooks/automl.py +35 -1
- airflow/providers/google/cloud/hooks/bigquery.py +126 -41
- airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
- airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
- airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
- airflow/providers/google/cloud/hooks/dataflow.py +246 -32
- airflow/providers/google/cloud/hooks/dataplex.py +6 -2
- airflow/providers/google/cloud/hooks/dlp.py +14 -14
- airflow/providers/google/cloud/hooks/gcs.py +6 -2
- airflow/providers/google/cloud/hooks/gdm.py +2 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/mlengine.py +8 -4
- airflow/providers/google/cloud/hooks/pubsub.py +1 -1
- airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +91 -0
- airflow/providers/google/cloud/links/vertex_ai.py +2 -1
- airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
- airflow/providers/google/cloud/operators/automl.py +243 -37
- airflow/providers/google/cloud/operators/bigquery.py +164 -62
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
- airflow/providers/google/cloud/operators/bigtable.py +7 -6
- airflow/providers/google/cloud/operators/cloud_build.py +12 -11
- airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
- airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
- airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
- airflow/providers/google/cloud/operators/compute.py +12 -11
- airflow/providers/google/cloud/operators/datacatalog.py +21 -20
- airflow/providers/google/cloud/operators/dataflow.py +59 -42
- airflow/providers/google/cloud/operators/datafusion.py +11 -10
- airflow/providers/google/cloud/operators/datapipeline.py +3 -2
- airflow/providers/google/cloud/operators/dataprep.py +5 -4
- airflow/providers/google/cloud/operators/dataproc.py +20 -17
- airflow/providers/google/cloud/operators/datastore.py +8 -7
- airflow/providers/google/cloud/operators/dlp.py +31 -30
- airflow/providers/google/cloud/operators/functions.py +4 -3
- airflow/providers/google/cloud/operators/gcs.py +66 -41
- airflow/providers/google/cloud/operators/kubernetes_engine.py +256 -49
- airflow/providers/google/cloud/operators/life_sciences.py +2 -1
- airflow/providers/google/cloud/operators/mlengine.py +11 -10
- airflow/providers/google/cloud/operators/pubsub.py +6 -5
- airflow/providers/google/cloud/operators/spanner.py +7 -6
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
- airflow/providers/google/cloud/operators/stackdriver.py +11 -10
- airflow/providers/google/cloud/operators/tasks.py +14 -13
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
- airflow/providers/google/cloud/operators/translate_speech.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
- airflow/providers/google/cloud/operators/vision.py +13 -12
- airflow/providers/google/cloud/operators/workflows.py +12 -14
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/sensors/bigtable.py +2 -1
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/sensors/dataflow.py +239 -52
- airflow/providers/google/cloud/sensors/datafusion.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +14 -12
- airflow/providers/google/cloud/sensors/tasks.py +2 -1
- airflow/providers/google/cloud/sensors/workflows.py +2 -1
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
- airflow/providers/google/cloud/triggers/bigquery.py +75 -6
- airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
- airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/triggers/dataflow.py +504 -4
- airflow/providers/google/cloud/triggers/dataproc.py +190 -27
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -3
- airflow/providers/google/cloud/triggers/mlengine.py +2 -1
- airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
- airflow/providers/google/common/hooks/base_google.py +45 -7
- airflow/providers/google/firebase/hooks/firestore.py +2 -2
- airflow/providers/google/firebase/operators/firestore.py +2 -1
- airflow/providers/google/get_provider_info.py +5 -3
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/METADATA +18 -18
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/RECORD +90 -90
- airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/entry_points.txt +0 -0
@@ -41,6 +41,8 @@ from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
|
41
41
|
from airflow.providers.google.cloud.links.dataflow import DataflowJobLink
|
42
42
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
43
43
|
from airflow.providers.google.cloud.triggers.dataflow import TemplateJobStartTrigger
|
44
|
+
from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME
|
45
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
44
46
|
from airflow.version import version
|
45
47
|
|
46
48
|
if TYPE_CHECKING:
|
@@ -142,7 +144,7 @@ class DataflowConfiguration:
|
|
142
144
|
*,
|
143
145
|
job_name: str = "{{task.task_id}}",
|
144
146
|
append_job_name: bool = True,
|
145
|
-
project_id: str
|
147
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
146
148
|
location: str | None = DEFAULT_DATAFLOW_LOCATION,
|
147
149
|
gcp_conn_id: str = "google_cloud_default",
|
148
150
|
poll_sleep: int = 10,
|
@@ -348,7 +350,7 @@ class DataflowCreateJavaJobOperator(GoogleCloudBaseOperator):
|
|
348
350
|
job_name: str = "{{task.task_id}}",
|
349
351
|
dataflow_default_options: dict | None = None,
|
350
352
|
options: dict | None = None,
|
351
|
-
project_id: str
|
353
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
352
354
|
location: str = DEFAULT_DATAFLOW_LOCATION,
|
353
355
|
gcp_conn_id: str = "google_cloud_default",
|
354
356
|
poll_sleep: int = 10,
|
@@ -459,7 +461,7 @@ class DataflowCreateJavaJobOperator(GoogleCloudBaseOperator):
|
|
459
461
|
|
460
462
|
class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
461
463
|
"""
|
462
|
-
Start a
|
464
|
+
Start a Dataflow job with a classic template; the parameters of the operation will be passed to the job.
|
463
465
|
|
464
466
|
.. seealso::
|
465
467
|
For more information on how to use this operator, take a look at the guide:
|
@@ -606,7 +608,7 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
606
608
|
self,
|
607
609
|
*,
|
608
610
|
template: str,
|
609
|
-
project_id: str
|
611
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
610
612
|
job_name: str = "{{task.task_id}}",
|
611
613
|
options: dict[str, Any] | None = None,
|
612
614
|
dataflow_default_options: dict[str, Any] | None = None,
|
@@ -642,7 +644,7 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
642
644
|
self.deferrable = deferrable
|
643
645
|
self.expected_terminal_state = expected_terminal_state
|
644
646
|
|
645
|
-
self.job: dict | None = None
|
647
|
+
self.job: dict[str, str] | None = None
|
646
648
|
|
647
649
|
self._validate_deferrable_params()
|
648
650
|
|
@@ -680,29 +682,34 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
680
682
|
if not self.location:
|
681
683
|
self.location = DEFAULT_DATAFLOW_LOCATION
|
682
684
|
|
683
|
-
|
685
|
+
if not self.deferrable:
|
686
|
+
self.job = self.hook.start_template_dataflow(
|
687
|
+
job_name=self.job_name,
|
688
|
+
variables=options,
|
689
|
+
parameters=self.parameters,
|
690
|
+
dataflow_template=self.template,
|
691
|
+
on_new_job_callback=set_current_job,
|
692
|
+
project_id=self.project_id,
|
693
|
+
location=self.location,
|
694
|
+
environment=self.environment,
|
695
|
+
append_job_name=self.append_job_name,
|
696
|
+
)
|
697
|
+
job_id = self.hook.extract_job_id(self.job)
|
698
|
+
self.xcom_push(context, key="job_id", value=job_id)
|
699
|
+
return job_id
|
700
|
+
|
701
|
+
self.job = self.hook.launch_job_with_template(
|
684
702
|
job_name=self.job_name,
|
685
703
|
variables=options,
|
686
704
|
parameters=self.parameters,
|
687
705
|
dataflow_template=self.template,
|
688
|
-
on_new_job_callback=set_current_job,
|
689
706
|
project_id=self.project_id,
|
707
|
+
append_job_name=self.append_job_name,
|
690
708
|
location=self.location,
|
691
709
|
environment=self.environment,
|
692
|
-
append_job_name=self.append_job_name,
|
693
710
|
)
|
694
|
-
job_id = self.
|
695
|
-
|
696
|
-
if job_id is None:
|
697
|
-
raise AirflowException(
|
698
|
-
"While reading job object after template execution error occurred. Job object has no id."
|
699
|
-
)
|
700
|
-
|
701
|
-
if not self.deferrable:
|
702
|
-
return job_id
|
703
|
-
|
704
|
-
context["ti"].xcom_push(key="job_id", value=job_id)
|
705
|
-
|
711
|
+
job_id = self.hook.extract_job_id(self.job)
|
712
|
+
DataflowJobLink.persist(self, context, self.project_id, self.location, job_id)
|
706
713
|
self.defer(
|
707
714
|
trigger=TemplateJobStartTrigger(
|
708
715
|
project_id=self.project_id,
|
@@ -713,16 +720,17 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
713
720
|
impersonation_chain=self.impersonation_chain,
|
714
721
|
cancel_timeout=self.cancel_timeout,
|
715
722
|
),
|
716
|
-
method_name=
|
723
|
+
method_name=GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME,
|
717
724
|
)
|
718
725
|
|
719
|
-
def execute_complete(self, context: Context, event: dict[str, Any]):
|
726
|
+
def execute_complete(self, context: Context, event: dict[str, Any]) -> str:
|
720
727
|
"""Execute after trigger finishes its work."""
|
721
728
|
if event["status"] in ("error", "stopped"):
|
722
729
|
self.log.info("status: %s, msg: %s", event["status"], event["message"])
|
723
730
|
raise AirflowException(event["message"])
|
724
731
|
|
725
732
|
job_id = event["job_id"]
|
733
|
+
self.xcom_push(context, key="job_id", value=job_id)
|
726
734
|
self.log.info("Task %s completed with response %s", self.task_id, event["message"])
|
727
735
|
return job_id
|
728
736
|
|
@@ -740,7 +748,7 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
|
|
740
748
|
|
741
749
|
class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
742
750
|
"""
|
743
|
-
Starts
|
751
|
+
Starts a Dataflow Job with a Flex Template.
|
744
752
|
|
745
753
|
.. seealso::
|
746
754
|
For more information on how to use this operator, take a look at the guide:
|
@@ -802,6 +810,9 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
802
810
|
:param expected_terminal_state: The expected final status of the operator on which the corresponding
|
803
811
|
Airflow task succeeds. When not specified, it will be determined by the hook.
|
804
812
|
:param append_job_name: True if unique suffix has to be appended to job name.
|
813
|
+
:param poll_sleep: The time in seconds to sleep between polling Google
|
814
|
+
Cloud Platform for the dataflow job status while the job is in the
|
815
|
+
JOB_STATE_RUNNING state.
|
805
816
|
"""
|
806
817
|
|
807
818
|
template_fields: Sequence[str] = ("body", "location", "project_id", "gcp_conn_id")
|
@@ -811,7 +822,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
811
822
|
self,
|
812
823
|
body: dict,
|
813
824
|
location: str,
|
814
|
-
project_id: str
|
825
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
815
826
|
gcp_conn_id: str = "google_cloud_default",
|
816
827
|
drain_pipeline: bool = False,
|
817
828
|
cancel_timeout: int | None = 10 * 60,
|
@@ -820,6 +831,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
820
831
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
821
832
|
append_job_name: bool = True,
|
822
833
|
expected_terminal_state: str | None = None,
|
834
|
+
poll_sleep: int = 10,
|
823
835
|
*args,
|
824
836
|
**kwargs,
|
825
837
|
) -> None:
|
@@ -831,11 +843,12 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
831
843
|
self.drain_pipeline = drain_pipeline
|
832
844
|
self.cancel_timeout = cancel_timeout
|
833
845
|
self.wait_until_finished = wait_until_finished
|
834
|
-
self.job: dict | None = None
|
846
|
+
self.job: dict[str, str] | None = None
|
835
847
|
self.impersonation_chain = impersonation_chain
|
836
848
|
self.deferrable = deferrable
|
837
849
|
self.expected_terminal_state = expected_terminal_state
|
838
850
|
self.append_job_name = append_job_name
|
851
|
+
self.poll_sleep = poll_sleep
|
839
852
|
|
840
853
|
self._validate_deferrable_params()
|
841
854
|
|
@@ -870,32 +883,35 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
870
883
|
self.job = current_job
|
871
884
|
DataflowJobLink.persist(self, context, self.project_id, self.location, self.job.get("id"))
|
872
885
|
|
873
|
-
|
886
|
+
if not self.deferrable:
|
887
|
+
self.job = self.hook.start_flex_template(
|
888
|
+
body=self.body,
|
889
|
+
location=self.location,
|
890
|
+
project_id=self.project_id,
|
891
|
+
on_new_job_callback=set_current_job,
|
892
|
+
)
|
893
|
+
job_id = self.hook.extract_job_id(self.job)
|
894
|
+
self.xcom_push(context, key="job_id", value=job_id)
|
895
|
+
return self.job
|
896
|
+
|
897
|
+
self.job = self.hook.launch_job_with_flex_template(
|
874
898
|
body=self.body,
|
875
899
|
location=self.location,
|
876
900
|
project_id=self.project_id,
|
877
|
-
on_new_job_callback=set_current_job,
|
878
901
|
)
|
879
|
-
|
880
|
-
|
881
|
-
if job_id is None:
|
882
|
-
raise AirflowException(
|
883
|
-
"While reading job object after template execution error occurred. Job object has no id."
|
884
|
-
)
|
885
|
-
|
886
|
-
if not self.deferrable:
|
887
|
-
return self.job
|
888
|
-
|
902
|
+
job_id = self.hook.extract_job_id(self.job)
|
903
|
+
DataflowJobLink.persist(self, context, self.project_id, self.location, job_id)
|
889
904
|
self.defer(
|
890
905
|
trigger=TemplateJobStartTrigger(
|
891
906
|
project_id=self.project_id,
|
892
907
|
job_id=job_id,
|
893
908
|
location=self.location,
|
894
909
|
gcp_conn_id=self.gcp_conn_id,
|
910
|
+
poll_sleep=self.poll_sleep,
|
895
911
|
impersonation_chain=self.impersonation_chain,
|
896
912
|
cancel_timeout=self.cancel_timeout,
|
897
913
|
),
|
898
|
-
method_name=
|
914
|
+
method_name=GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME,
|
899
915
|
)
|
900
916
|
|
901
917
|
def _append_uuid_to_job_name(self):
|
@@ -906,7 +922,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
906
922
|
job_body["jobName"] = job_name
|
907
923
|
self.log.info("Job name was changed to %s", job_name)
|
908
924
|
|
909
|
-
def execute_complete(self, context: Context, event: dict):
|
925
|
+
def execute_complete(self, context: Context, event: dict) -> dict[str, str]:
|
910
926
|
"""Execute after trigger finishes its work."""
|
911
927
|
if event["status"] in ("error", "stopped"):
|
912
928
|
self.log.info("status: %s, msg: %s", event["status"], event["message"])
|
@@ -914,6 +930,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
|
|
914
930
|
|
915
931
|
job_id = event["job_id"]
|
916
932
|
self.log.info("Task %s completed with response %s", job_id, event["message"])
|
933
|
+
self.xcom_push(context, key="job_id", value=job_id)
|
917
934
|
job = self.hook.get_job(job_id=job_id, project_id=self.project_id, location=self.location)
|
918
935
|
return job
|
919
936
|
|
@@ -982,7 +999,7 @@ class DataflowStartSqlJobOperator(GoogleCloudBaseOperator):
|
|
982
999
|
query: str,
|
983
1000
|
options: dict[str, Any],
|
984
1001
|
location: str = DEFAULT_DATAFLOW_LOCATION,
|
985
|
-
project_id: str
|
1002
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
986
1003
|
gcp_conn_id: str = "google_cloud_default",
|
987
1004
|
drain_pipeline: bool = False,
|
988
1005
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -1150,7 +1167,7 @@ class DataflowCreatePythonJobOperator(GoogleCloudBaseOperator):
|
|
1150
1167
|
py_options: list[str] | None = None,
|
1151
1168
|
py_requirements: list[str] | None = None,
|
1152
1169
|
py_system_site_packages: bool = False,
|
1153
|
-
project_id: str
|
1170
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1154
1171
|
location: str = DEFAULT_DATAFLOW_LOCATION,
|
1155
1172
|
gcp_conn_id: str = "google_cloud_default",
|
1156
1173
|
poll_sleep: int = 10,
|
@@ -1297,7 +1314,7 @@ class DataflowStopJobOperator(GoogleCloudBaseOperator):
|
|
1297
1314
|
self,
|
1298
1315
|
job_name_prefix: str | None = None,
|
1299
1316
|
job_id: str | None = None,
|
1300
|
-
project_id: str
|
1317
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1301
1318
|
location: str = DEFAULT_DATAFLOW_LOCATION,
|
1302
1319
|
gcp_conn_id: str = "google_cloud_default",
|
1303
1320
|
poll_sleep: int = 10,
|
@@ -37,6 +37,7 @@ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseO
|
|
37
37
|
from airflow.providers.google.cloud.triggers.datafusion import DataFusionStartPipelineTrigger
|
38
38
|
from airflow.providers.google.cloud.utils.datafusion import DataFusionPipelineType
|
39
39
|
from airflow.providers.google.cloud.utils.helpers import resource_path_to_dict
|
40
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
40
41
|
|
41
42
|
if TYPE_CHECKING:
|
42
43
|
from airflow.utils.context import Context
|
@@ -97,7 +98,7 @@ class CloudDataFusionRestartInstanceOperator(GoogleCloudBaseOperator):
|
|
97
98
|
*,
|
98
99
|
instance_name: str,
|
99
100
|
location: str,
|
100
|
-
project_id: str
|
101
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
101
102
|
api_version: str = "v1beta1",
|
102
103
|
gcp_conn_id: str = "google_cloud_default",
|
103
104
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -169,7 +170,7 @@ class CloudDataFusionDeleteInstanceOperator(GoogleCloudBaseOperator):
|
|
169
170
|
*,
|
170
171
|
instance_name: str,
|
171
172
|
location: str,
|
172
|
-
project_id: str
|
173
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
173
174
|
api_version: str = "v1beta1",
|
174
175
|
gcp_conn_id: str = "google_cloud_default",
|
175
176
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -237,7 +238,7 @@ class CloudDataFusionCreateInstanceOperator(GoogleCloudBaseOperator):
|
|
237
238
|
instance_name: str,
|
238
239
|
instance: dict[str, Any],
|
239
240
|
location: str,
|
240
|
-
project_id: str
|
241
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
241
242
|
api_version: str = "v1beta1",
|
242
243
|
gcp_conn_id: str = "google_cloud_default",
|
243
244
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -340,7 +341,7 @@ class CloudDataFusionUpdateInstanceOperator(GoogleCloudBaseOperator):
|
|
340
341
|
instance: dict[str, Any],
|
341
342
|
update_mask: str,
|
342
343
|
location: str,
|
343
|
-
project_id: str
|
344
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
344
345
|
api_version: str = "v1beta1",
|
345
346
|
gcp_conn_id: str = "google_cloud_default",
|
346
347
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -417,7 +418,7 @@ class CloudDataFusionGetInstanceOperator(GoogleCloudBaseOperator):
|
|
417
418
|
*,
|
418
419
|
instance_name: str,
|
419
420
|
location: str,
|
420
|
-
project_id: str
|
421
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
421
422
|
api_version: str = "v1beta1",
|
422
423
|
gcp_conn_id: str = "google_cloud_default",
|
423
424
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -499,7 +500,7 @@ class CloudDataFusionCreatePipelineOperator(GoogleCloudBaseOperator):
|
|
499
500
|
instance_name: str,
|
500
501
|
location: str,
|
501
502
|
namespace: str = "default",
|
502
|
-
project_id: str
|
503
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
503
504
|
api_version: str = "v1beta1",
|
504
505
|
gcp_conn_id: str = "google_cloud_default",
|
505
506
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -587,7 +588,7 @@ class CloudDataFusionDeletePipelineOperator(GoogleCloudBaseOperator):
|
|
587
588
|
location: str,
|
588
589
|
version_id: str | None = None,
|
589
590
|
namespace: str = "default",
|
590
|
-
project_id: str
|
591
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
591
592
|
api_version: str = "v1beta1",
|
592
593
|
gcp_conn_id: str = "google_cloud_default",
|
593
594
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -670,7 +671,7 @@ class CloudDataFusionListPipelinesOperator(GoogleCloudBaseOperator):
|
|
670
671
|
artifact_name: str | None = None,
|
671
672
|
artifact_version: str | None = None,
|
672
673
|
namespace: str = "default",
|
673
|
-
project_id: str
|
674
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
674
675
|
api_version: str = "v1beta1",
|
675
676
|
gcp_conn_id: str = "google_cloud_default",
|
676
677
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -777,7 +778,7 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
|
|
777
778
|
success_states: list[str] | None = None,
|
778
779
|
namespace: str = "default",
|
779
780
|
pipeline_timeout: int = 5 * 60,
|
780
|
-
project_id: str
|
781
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
781
782
|
api_version: str = "v1beta1",
|
782
783
|
gcp_conn_id: str = "google_cloud_default",
|
783
784
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -929,7 +930,7 @@ class CloudDataFusionStopPipelineOperator(GoogleCloudBaseOperator):
|
|
929
930
|
instance_name: str,
|
930
931
|
location: str,
|
931
932
|
namespace: str = "default",
|
932
|
-
project_id: str
|
933
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
933
934
|
api_version: str = "v1beta1",
|
934
935
|
gcp_conn_id: str = "google_cloud_default",
|
935
936
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -24,6 +24,7 @@ from typing import TYPE_CHECKING, Sequence
|
|
24
24
|
from airflow.exceptions import AirflowException
|
25
25
|
from airflow.providers.google.cloud.hooks.datapipeline import DEFAULT_DATAPIPELINE_LOCATION, DataPipelineHook
|
26
26
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
27
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
27
28
|
|
28
29
|
if TYPE_CHECKING:
|
29
30
|
from airflow.utils.context import Context
|
@@ -58,7 +59,7 @@ class CreateDataPipelineOperator(GoogleCloudBaseOperator):
|
|
58
59
|
self,
|
59
60
|
*,
|
60
61
|
body: dict,
|
61
|
-
project_id: str
|
62
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
62
63
|
location: str = DEFAULT_DATAPIPELINE_LOCATION,
|
63
64
|
gcp_conn_id: str = "google_cloud_default",
|
64
65
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -120,7 +121,7 @@ class RunDataPipelineOperator(GoogleCloudBaseOperator):
|
|
120
121
|
def __init__(
|
121
122
|
self,
|
122
123
|
data_pipeline_name: str,
|
123
|
-
project_id: str
|
124
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
124
125
|
location: str = DEFAULT_DATAPIPELINE_LOCATION,
|
125
126
|
gcp_conn_id: str = "google_cloud_default",
|
126
127
|
**kwargs,
|
@@ -24,6 +24,7 @@ from typing import TYPE_CHECKING, Sequence
|
|
24
24
|
from airflow.providers.google.cloud.hooks.dataprep import GoogleDataprepHook
|
25
25
|
from airflow.providers.google.cloud.links.dataprep import DataprepFlowLink, DataprepJobGroupLink
|
26
26
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
27
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
27
28
|
|
28
29
|
if TYPE_CHECKING:
|
29
30
|
from airflow.utils.context import Context
|
@@ -92,7 +93,7 @@ class DataprepGetJobGroupOperator(GoogleCloudBaseOperator):
|
|
92
93
|
self,
|
93
94
|
*,
|
94
95
|
dataprep_conn_id: str = "dataprep_default",
|
95
|
-
project_id: str
|
96
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
96
97
|
job_group_id: int | str,
|
97
98
|
embed: str,
|
98
99
|
include_deleted: bool,
|
@@ -149,7 +150,7 @@ class DataprepRunJobGroupOperator(GoogleCloudBaseOperator):
|
|
149
150
|
def __init__(
|
150
151
|
self,
|
151
152
|
*,
|
152
|
-
project_id: str
|
153
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
153
154
|
dataprep_conn_id: str = "dataprep_default",
|
154
155
|
body_request: dict,
|
155
156
|
**kwargs,
|
@@ -198,7 +199,7 @@ class DataprepCopyFlowOperator(GoogleCloudBaseOperator):
|
|
198
199
|
def __init__(
|
199
200
|
self,
|
200
201
|
*,
|
201
|
-
project_id: str
|
202
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
202
203
|
dataprep_conn_id: str = "dataprep_default",
|
203
204
|
flow_id: int | str,
|
204
205
|
name: str = "",
|
@@ -280,7 +281,7 @@ class DataprepRunFlowOperator(GoogleCloudBaseOperator):
|
|
280
281
|
def __init__(
|
281
282
|
self,
|
282
283
|
*,
|
283
|
-
project_id: str
|
284
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
284
285
|
flow_id: int | str,
|
285
286
|
body_request: dict,
|
286
287
|
dataprep_conn_id: str = "dataprep_default",
|
@@ -63,6 +63,7 @@ from airflow.providers.google.cloud.triggers.dataproc import (
|
|
63
63
|
DataprocSubmitTrigger,
|
64
64
|
)
|
65
65
|
from airflow.providers.google.cloud.utils.dataproc import DataprocOperationType
|
66
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
66
67
|
from airflow.utils import timezone
|
67
68
|
|
68
69
|
if TYPE_CHECKING:
|
@@ -627,7 +628,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
627
628
|
*,
|
628
629
|
cluster_name: str,
|
629
630
|
region: str,
|
630
|
-
project_id: str
|
631
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
631
632
|
cluster_config: dict | Cluster | None = None,
|
632
633
|
virtual_cluster_config: dict | None = None,
|
633
634
|
labels: dict | None = None,
|
@@ -815,6 +816,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
815
816
|
gcp_conn_id=self.gcp_conn_id,
|
816
817
|
impersonation_chain=self.impersonation_chain,
|
817
818
|
polling_interval_seconds=self.polling_interval_seconds,
|
819
|
+
delete_on_error=self.delete_on_error,
|
818
820
|
),
|
819
821
|
method_name="execute_complete",
|
820
822
|
)
|
@@ -928,7 +930,7 @@ class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
|
|
928
930
|
self,
|
929
931
|
*,
|
930
932
|
cluster_name: str,
|
931
|
-
project_id: str
|
933
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
932
934
|
region: str = "global",
|
933
935
|
num_workers: int = 2,
|
934
936
|
num_preemptible_workers: int = 0,
|
@@ -1047,7 +1049,7 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
|
|
1047
1049
|
*,
|
1048
1050
|
region: str,
|
1049
1051
|
cluster_name: str,
|
1050
|
-
project_id: str
|
1052
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1051
1053
|
cluster_uuid: str | None = None,
|
1052
1054
|
request_id: str | None = None,
|
1053
1055
|
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
@@ -1173,7 +1175,7 @@ class _DataprocStartStopClusterBaseOperator(GoogleCloudBaseOperator):
|
|
1173
1175
|
*,
|
1174
1176
|
cluster_name: str,
|
1175
1177
|
region: str,
|
1176
|
-
project_id: str
|
1178
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1177
1179
|
cluster_uuid: str | None = None,
|
1178
1180
|
request_id: str | None = None,
|
1179
1181
|
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
@@ -1372,7 +1374,7 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
|
|
1372
1374
|
region: str,
|
1373
1375
|
job_name: str = "{{task.task_id}}_{{ds_nodash}}",
|
1374
1376
|
cluster_name: str = "cluster-1",
|
1375
|
-
project_id: str
|
1377
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1376
1378
|
dataproc_properties: dict | None = None,
|
1377
1379
|
dataproc_jars: list[str] | None = None,
|
1378
1380
|
gcp_conn_id: str = "google_cloud_default",
|
@@ -2021,7 +2023,7 @@ class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
|
|
2021
2023
|
|
2022
2024
|
@staticmethod
|
2023
2025
|
def _generate_temp_filename(filename):
|
2024
|
-
return f"{time
|
2026
|
+
return f"{time.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4()!s:.8}_{ntpath.basename(filename)}"
|
2025
2027
|
|
2026
2028
|
def _upload_file_temp(self, bucket, local_file):
|
2027
2029
|
"""Upload a local file to a Google Cloud Storage bucket."""
|
@@ -2135,7 +2137,7 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
2135
2137
|
*,
|
2136
2138
|
template: dict,
|
2137
2139
|
region: str,
|
2138
|
-
project_id: str
|
2140
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2139
2141
|
retry: Retry | _MethodDefault = DEFAULT,
|
2140
2142
|
timeout: float | None = None,
|
2141
2143
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -2229,7 +2231,7 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
2229
2231
|
*,
|
2230
2232
|
template_id: str,
|
2231
2233
|
region: str,
|
2232
|
-
project_id: str
|
2234
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2233
2235
|
version: int | None = None,
|
2234
2236
|
request_id: str | None = None,
|
2235
2237
|
parameters: dict[str, str] | None = None,
|
@@ -2376,7 +2378,7 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
2376
2378
|
*,
|
2377
2379
|
template: dict,
|
2378
2380
|
region: str,
|
2379
|
-
project_id: str
|
2381
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2380
2382
|
request_id: str | None = None,
|
2381
2383
|
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
2382
2384
|
timeout: float | None = None,
|
@@ -2513,7 +2515,7 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
2513
2515
|
*,
|
2514
2516
|
job: dict,
|
2515
2517
|
region: str,
|
2516
|
-
project_id: str
|
2518
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2517
2519
|
request_id: str | None = None,
|
2518
2520
|
retry: Retry | _MethodDefault = DEFAULT,
|
2519
2521
|
timeout: float | None = None,
|
@@ -2590,6 +2592,7 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
2590
2592
|
gcp_conn_id=self.gcp_conn_id,
|
2591
2593
|
impersonation_chain=self.impersonation_chain,
|
2592
2594
|
polling_interval_seconds=self.polling_interval_seconds,
|
2595
|
+
cancel_on_kill=self.cancel_on_kill,
|
2593
2596
|
),
|
2594
2597
|
method_name="execute_complete",
|
2595
2598
|
)
|
@@ -2682,7 +2685,7 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
|
|
2682
2685
|
graceful_decommission_timeout: dict | Duration,
|
2683
2686
|
region: str,
|
2684
2687
|
request_id: str | None = None,
|
2685
|
-
project_id: str
|
2688
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2686
2689
|
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
2687
2690
|
timeout: float | None = None,
|
2688
2691
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -2816,7 +2819,7 @@ class DataprocDiagnoseClusterOperator(GoogleCloudBaseOperator):
|
|
2816
2819
|
*,
|
2817
2820
|
region: str,
|
2818
2821
|
cluster_name: str,
|
2819
|
-
project_id: str
|
2822
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2820
2823
|
tarball_gcs_dir: str | None = None,
|
2821
2824
|
diagnosis_interval: dict | Interval | None = None,
|
2822
2825
|
jobs: MutableSequence[str] | None = None,
|
@@ -2954,7 +2957,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
2954
2957
|
self,
|
2955
2958
|
*,
|
2956
2959
|
region: str | None = None,
|
2957
|
-
project_id: str
|
2960
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2958
2961
|
batch: dict | Batch,
|
2959
2962
|
batch_id: str,
|
2960
2963
|
request_id: str | None = None,
|
@@ -3146,7 +3149,7 @@ class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
|
|
3146
3149
|
*,
|
3147
3150
|
batch_id: str,
|
3148
3151
|
region: str,
|
3149
|
-
project_id: str
|
3152
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
3150
3153
|
retry: Retry | _MethodDefault = DEFAULT,
|
3151
3154
|
timeout: float | None = None,
|
3152
3155
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -3210,7 +3213,7 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
|
|
3210
3213
|
*,
|
3211
3214
|
batch_id: str,
|
3212
3215
|
region: str,
|
3213
|
-
project_id: str
|
3216
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
3214
3217
|
retry: Retry | _MethodDefault = DEFAULT,
|
3215
3218
|
timeout: float | None = None,
|
3216
3219
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -3285,7 +3288,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
|
|
3285
3288
|
self,
|
3286
3289
|
*,
|
3287
3290
|
region: str,
|
3288
|
-
project_id: str
|
3291
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
3289
3292
|
page_size: int | None = None,
|
3290
3293
|
page_token: str | None = None,
|
3291
3294
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -3358,7 +3361,7 @@ class DataprocCancelOperationOperator(GoogleCloudBaseOperator):
|
|
3358
3361
|
*,
|
3359
3362
|
operation_name: str,
|
3360
3363
|
region: str,
|
3361
|
-
project_id: str
|
3364
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
3362
3365
|
retry: Retry | _MethodDefault = DEFAULT,
|
3363
3366
|
timeout: float | None = None,
|
3364
3367
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -29,6 +29,7 @@ from airflow.providers.google.cloud.links.datastore import (
|
|
29
29
|
CloudDatastoreImportExportLink,
|
30
30
|
)
|
31
31
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
32
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
32
33
|
from airflow.providers.google.common.links.storage import StorageLink
|
33
34
|
|
34
35
|
if TYPE_CHECKING:
|
@@ -90,7 +91,7 @@ class CloudDatastoreExportEntitiesOperator(GoogleCloudBaseOperator):
|
|
90
91
|
labels: dict | None = None,
|
91
92
|
polling_interval_in_seconds: int = 10,
|
92
93
|
overwrite_existing: bool = False,
|
93
|
-
project_id: str
|
94
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
94
95
|
impersonation_chain: str | Sequence[str] | None = None,
|
95
96
|
**kwargs,
|
96
97
|
) -> None:
|
@@ -194,7 +195,7 @@ class CloudDatastoreImportEntitiesOperator(GoogleCloudBaseOperator):
|
|
194
195
|
labels: dict | None = None,
|
195
196
|
datastore_conn_id: str = "google_cloud_default",
|
196
197
|
polling_interval_in_seconds: float = 10,
|
197
|
-
project_id: str
|
198
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
198
199
|
impersonation_chain: str | Sequence[str] | None = None,
|
199
200
|
**kwargs,
|
200
201
|
) -> None:
|
@@ -268,7 +269,7 @@ class CloudDatastoreAllocateIdsOperator(GoogleCloudBaseOperator):
|
|
268
269
|
self,
|
269
270
|
*,
|
270
271
|
partial_keys: list,
|
271
|
-
project_id: str
|
272
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
272
273
|
gcp_conn_id: str = "google_cloud_default",
|
273
274
|
impersonation_chain: str | Sequence[str] | None = None,
|
274
275
|
**kwargs,
|
@@ -326,7 +327,7 @@ class CloudDatastoreBeginTransactionOperator(GoogleCloudBaseOperator):
|
|
326
327
|
self,
|
327
328
|
*,
|
328
329
|
transaction_options: dict[str, Any],
|
329
|
-
project_id: str
|
330
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
330
331
|
gcp_conn_id: str = "google_cloud_default",
|
331
332
|
impersonation_chain: str | Sequence[str] | None = None,
|
332
333
|
**kwargs,
|
@@ -384,7 +385,7 @@ class CloudDatastoreCommitOperator(GoogleCloudBaseOperator):
|
|
384
385
|
self,
|
385
386
|
*,
|
386
387
|
body: dict[str, Any],
|
387
|
-
project_id: str
|
388
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
388
389
|
gcp_conn_id: str = "google_cloud_default",
|
389
390
|
impersonation_chain: str | Sequence[str] | None = None,
|
390
391
|
**kwargs,
|
@@ -442,7 +443,7 @@ class CloudDatastoreRollbackOperator(GoogleCloudBaseOperator):
|
|
442
443
|
self,
|
443
444
|
*,
|
444
445
|
transaction: str,
|
445
|
-
project_id: str
|
446
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
446
447
|
gcp_conn_id: str = "google_cloud_default",
|
447
448
|
impersonation_chain: str | Sequence[str] | None = None,
|
448
449
|
**kwargs,
|
@@ -498,7 +499,7 @@ class CloudDatastoreRunQueryOperator(GoogleCloudBaseOperator):
|
|
498
499
|
self,
|
499
500
|
*,
|
500
501
|
body: dict[str, Any],
|
501
|
-
project_id: str
|
502
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
502
503
|
gcp_conn_id: str = "google_cloud_default",
|
503
504
|
impersonation_chain: str | Sequence[str] | None = None,
|
504
505
|
**kwargs,
|