apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +5 -8
- airflow/providers/google/cloud/hooks/automl.py +35 -1
- airflow/providers/google/cloud/hooks/bigquery.py +126 -41
- airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
- airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
- airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
- airflow/providers/google/cloud/hooks/dataflow.py +246 -32
- airflow/providers/google/cloud/hooks/dataplex.py +6 -2
- airflow/providers/google/cloud/hooks/dlp.py +14 -14
- airflow/providers/google/cloud/hooks/gcs.py +6 -2
- airflow/providers/google/cloud/hooks/gdm.py +2 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/mlengine.py +8 -4
- airflow/providers/google/cloud/hooks/pubsub.py +1 -1
- airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +91 -0
- airflow/providers/google/cloud/links/vertex_ai.py +2 -1
- airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
- airflow/providers/google/cloud/operators/automl.py +243 -37
- airflow/providers/google/cloud/operators/bigquery.py +164 -62
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
- airflow/providers/google/cloud/operators/bigtable.py +7 -6
- airflow/providers/google/cloud/operators/cloud_build.py +12 -11
- airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
- airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
- airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
- airflow/providers/google/cloud/operators/compute.py +12 -11
- airflow/providers/google/cloud/operators/datacatalog.py +21 -20
- airflow/providers/google/cloud/operators/dataflow.py +59 -42
- airflow/providers/google/cloud/operators/datafusion.py +11 -10
- airflow/providers/google/cloud/operators/datapipeline.py +3 -2
- airflow/providers/google/cloud/operators/dataprep.py +5 -4
- airflow/providers/google/cloud/operators/dataproc.py +20 -17
- airflow/providers/google/cloud/operators/datastore.py +8 -7
- airflow/providers/google/cloud/operators/dlp.py +31 -30
- airflow/providers/google/cloud/operators/functions.py +4 -3
- airflow/providers/google/cloud/operators/gcs.py +66 -41
- airflow/providers/google/cloud/operators/kubernetes_engine.py +256 -49
- airflow/providers/google/cloud/operators/life_sciences.py +2 -1
- airflow/providers/google/cloud/operators/mlengine.py +11 -10
- airflow/providers/google/cloud/operators/pubsub.py +6 -5
- airflow/providers/google/cloud/operators/spanner.py +7 -6
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
- airflow/providers/google/cloud/operators/stackdriver.py +11 -10
- airflow/providers/google/cloud/operators/tasks.py +14 -13
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
- airflow/providers/google/cloud/operators/translate_speech.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
- airflow/providers/google/cloud/operators/vision.py +13 -12
- airflow/providers/google/cloud/operators/workflows.py +12 -14
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/sensors/bigtable.py +2 -1
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/sensors/dataflow.py +239 -52
- airflow/providers/google/cloud/sensors/datafusion.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +14 -12
- airflow/providers/google/cloud/sensors/tasks.py +2 -1
- airflow/providers/google/cloud/sensors/workflows.py +2 -1
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
- airflow/providers/google/cloud/triggers/bigquery.py +75 -6
- airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
- airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/triggers/dataflow.py +504 -4
- airflow/providers/google/cloud/triggers/dataproc.py +190 -27
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -3
- airflow/providers/google/cloud/triggers/mlengine.py +2 -1
- airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
- airflow/providers/google/common/hooks/base_google.py +45 -7
- airflow/providers/google/firebase/hooks/firestore.py +2 -2
- airflow/providers/google/firebase/operators/firestore.py +2 -1
- airflow/providers/google/get_provider_info.py +5 -3
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/METADATA +18 -18
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/RECORD +90 -90
- airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/entry_points.txt +0 -0
@@ -31,9 +31,23 @@ from copy import deepcopy
|
|
31
31
|
from typing import TYPE_CHECKING, Any, Callable, Generator, Sequence, TypeVar, cast
|
32
32
|
|
33
33
|
from deprecated import deprecated
|
34
|
-
from google.cloud.dataflow_v1beta3 import
|
34
|
+
from google.cloud.dataflow_v1beta3 import (
|
35
|
+
GetJobRequest,
|
36
|
+
Job,
|
37
|
+
JobState,
|
38
|
+
JobsV1Beta3AsyncClient,
|
39
|
+
JobView,
|
40
|
+
ListJobMessagesRequest,
|
41
|
+
MessagesV1Beta3AsyncClient,
|
42
|
+
MetricsV1Beta3AsyncClient,
|
43
|
+
)
|
44
|
+
from google.cloud.dataflow_v1beta3.types import (
|
45
|
+
GetJobMetricsRequest,
|
46
|
+
JobMessageImportance,
|
47
|
+
JobMetrics,
|
48
|
+
)
|
35
49
|
from google.cloud.dataflow_v1beta3.types.jobs import ListJobsRequest
|
36
|
-
from googleapiclient.discovery import build
|
50
|
+
from googleapiclient.discovery import Resource, build
|
37
51
|
|
38
52
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
39
53
|
from airflow.providers.apache.beam.hooks.beam import BeamHook, BeamRunnerType, beam_options_to_args
|
@@ -47,6 +61,8 @@ from airflow.utils.timeout import timeout
|
|
47
61
|
|
48
62
|
if TYPE_CHECKING:
|
49
63
|
from google.cloud.dataflow_v1beta3.services.jobs_v1_beta3.pagers import ListJobsAsyncPager
|
64
|
+
from google.cloud.dataflow_v1beta3.services.messages_v1_beta3.pagers import ListJobMessagesAsyncPager
|
65
|
+
from google.protobuf.timestamp_pb2 import Timestamp
|
50
66
|
|
51
67
|
|
52
68
|
# This is the default location
|
@@ -561,7 +577,7 @@ class DataflowHook(GoogleBaseHook):
|
|
561
577
|
impersonation_chain=impersonation_chain,
|
562
578
|
)
|
563
579
|
|
564
|
-
def get_conn(self) ->
|
580
|
+
def get_conn(self) -> Resource:
|
565
581
|
"""Return a Google Cloud Dataflow service object."""
|
566
582
|
http_authorized = self._authorize()
|
567
583
|
return build("dataflow", "v1b3", http=http_authorized, cache_discovery=False)
|
@@ -641,9 +657,9 @@ class DataflowHook(GoogleBaseHook):
|
|
641
657
|
on_new_job_callback: Callable[[dict], None] | None = None,
|
642
658
|
location: str = DEFAULT_DATAFLOW_LOCATION,
|
643
659
|
environment: dict | None = None,
|
644
|
-
) -> dict:
|
660
|
+
) -> dict[str, str]:
|
645
661
|
"""
|
646
|
-
|
662
|
+
Launch a Dataflow job with a Classic Template and wait for its completion.
|
647
663
|
|
648
664
|
:param job_name: The name of the job.
|
649
665
|
:param variables: Map of job runtime environment options.
|
@@ -676,26 +692,14 @@ class DataflowHook(GoogleBaseHook):
|
|
676
692
|
environment=environment,
|
677
693
|
)
|
678
694
|
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
projectId=project_id,
|
687
|
-
location=location,
|
688
|
-
gcsPath=dataflow_template,
|
689
|
-
body={
|
690
|
-
"jobName": name,
|
691
|
-
"parameters": parameters,
|
692
|
-
"environment": environment,
|
693
|
-
},
|
694
|
-
)
|
695
|
+
job: dict[str, str] = self.send_launch_template_request(
|
696
|
+
project_id=project_id,
|
697
|
+
location=location,
|
698
|
+
gcs_path=dataflow_template,
|
699
|
+
job_name=name,
|
700
|
+
parameters=parameters,
|
701
|
+
environment=environment,
|
695
702
|
)
|
696
|
-
response = request.execute(num_retries=self.num_retries)
|
697
|
-
|
698
|
-
job = response["job"]
|
699
703
|
|
700
704
|
if on_new_job_id_callback:
|
701
705
|
warnings.warn(
|
@@ -703,7 +707,7 @@ class DataflowHook(GoogleBaseHook):
|
|
703
707
|
AirflowProviderDeprecationWarning,
|
704
708
|
stacklevel=3,
|
705
709
|
)
|
706
|
-
on_new_job_id_callback(job
|
710
|
+
on_new_job_id_callback(job["id"])
|
707
711
|
|
708
712
|
if on_new_job_callback:
|
709
713
|
on_new_job_callback(job)
|
@@ -722,7 +726,62 @@ class DataflowHook(GoogleBaseHook):
|
|
722
726
|
expected_terminal_state=self.expected_terminal_state,
|
723
727
|
)
|
724
728
|
jobs_controller.wait_for_done()
|
725
|
-
return
|
729
|
+
return job
|
730
|
+
|
731
|
+
@_fallback_to_location_from_variables
|
732
|
+
@_fallback_to_project_id_from_variables
|
733
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
734
|
+
def launch_job_with_template(
|
735
|
+
self,
|
736
|
+
*,
|
737
|
+
job_name: str,
|
738
|
+
variables: dict,
|
739
|
+
parameters: dict,
|
740
|
+
dataflow_template: str,
|
741
|
+
project_id: str,
|
742
|
+
append_job_name: bool = True,
|
743
|
+
location: str = DEFAULT_DATAFLOW_LOCATION,
|
744
|
+
environment: dict | None = None,
|
745
|
+
) -> dict[str, str]:
|
746
|
+
"""
|
747
|
+
Launch a Dataflow job with a Classic Template and exit without waiting for its completion.
|
748
|
+
|
749
|
+
:param job_name: The name of the job.
|
750
|
+
:param variables: Map of job runtime environment options.
|
751
|
+
It will update environment argument if passed.
|
752
|
+
|
753
|
+
.. seealso::
|
754
|
+
For more information on possible configurations, look at the API documentation
|
755
|
+
`https://cloud.google.com/dataflow/pipelines/specifying-exec-params
|
756
|
+
<https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment>`__
|
757
|
+
|
758
|
+
:param parameters: Parameters for the template
|
759
|
+
:param dataflow_template: GCS path to the template.
|
760
|
+
:param project_id: Optional, the Google Cloud project ID in which to start a job.
|
761
|
+
If set to None or missing, the default project_id from the Google Cloud connection is used.
|
762
|
+
:param append_job_name: True if unique suffix has to be appended to job name.
|
763
|
+
:param location: Job location.
|
764
|
+
|
765
|
+
.. seealso::
|
766
|
+
For more information on possible configurations, look at the API documentation
|
767
|
+
`https://cloud.google.com/dataflow/pipelines/specifying-exec-params
|
768
|
+
<https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment>`__
|
769
|
+
:return: the Dataflow job response
|
770
|
+
"""
|
771
|
+
name = self.build_dataflow_job_name(job_name, append_job_name)
|
772
|
+
environment = self._update_environment(
|
773
|
+
variables=variables,
|
774
|
+
environment=environment,
|
775
|
+
)
|
776
|
+
job: dict[str, str] = self.send_launch_template_request(
|
777
|
+
project_id=project_id,
|
778
|
+
location=location,
|
779
|
+
gcs_path=dataflow_template,
|
780
|
+
job_name=name,
|
781
|
+
parameters=parameters,
|
782
|
+
environment=environment,
|
783
|
+
)
|
784
|
+
return job
|
726
785
|
|
727
786
|
def _update_environment(self, variables: dict, environment: dict | None = None) -> dict:
|
728
787
|
environment = environment or {}
|
@@ -758,6 +817,35 @@ class DataflowHook(GoogleBaseHook):
|
|
758
817
|
|
759
818
|
return environment
|
760
819
|
|
820
|
+
def send_launch_template_request(
|
821
|
+
self,
|
822
|
+
*,
|
823
|
+
project_id: str,
|
824
|
+
location: str,
|
825
|
+
gcs_path: str,
|
826
|
+
job_name: str,
|
827
|
+
parameters: dict,
|
828
|
+
environment: dict,
|
829
|
+
) -> dict[str, str]:
|
830
|
+
service: Resource = self.get_conn()
|
831
|
+
request = (
|
832
|
+
service.projects()
|
833
|
+
.locations()
|
834
|
+
.templates()
|
835
|
+
.launch(
|
836
|
+
projectId=project_id,
|
837
|
+
location=location,
|
838
|
+
gcsPath=gcs_path,
|
839
|
+
body={
|
840
|
+
"jobName": job_name,
|
841
|
+
"parameters": parameters,
|
842
|
+
"environment": environment,
|
843
|
+
},
|
844
|
+
)
|
845
|
+
)
|
846
|
+
response: dict = request.execute(num_retries=self.num_retries)
|
847
|
+
return response["job"]
|
848
|
+
|
761
849
|
@GoogleBaseHook.fallback_to_default_project_id
|
762
850
|
def start_flex_template(
|
763
851
|
self,
|
@@ -766,9 +854,9 @@ class DataflowHook(GoogleBaseHook):
|
|
766
854
|
project_id: str,
|
767
855
|
on_new_job_id_callback: Callable[[str], None] | None = None,
|
768
856
|
on_new_job_callback: Callable[[dict], None] | None = None,
|
769
|
-
) -> dict:
|
857
|
+
) -> dict[str, str]:
|
770
858
|
"""
|
771
|
-
|
859
|
+
Launch a Dataflow job with a Flex Template and wait for its completion.
|
772
860
|
|
773
861
|
:param body: The request body. See:
|
774
862
|
https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.locations.flexTemplates/launch#request-body
|
@@ -779,15 +867,16 @@ class DataflowHook(GoogleBaseHook):
|
|
779
867
|
:param on_new_job_callback: A callback that is called when a Job is detected.
|
780
868
|
:return: the Job
|
781
869
|
"""
|
782
|
-
service = self.get_conn()
|
870
|
+
service: Resource = self.get_conn()
|
783
871
|
request = (
|
784
872
|
service.projects()
|
785
873
|
.locations()
|
786
874
|
.flexTemplates()
|
787
875
|
.launch(projectId=project_id, body=body, location=location)
|
788
876
|
)
|
789
|
-
response = request.execute(num_retries=self.num_retries)
|
877
|
+
response: dict = request.execute(num_retries=self.num_retries)
|
790
878
|
job = response["job"]
|
879
|
+
job_id: str = job["id"]
|
791
880
|
|
792
881
|
if on_new_job_id_callback:
|
793
882
|
warnings.warn(
|
@@ -795,7 +884,7 @@ class DataflowHook(GoogleBaseHook):
|
|
795
884
|
AirflowProviderDeprecationWarning,
|
796
885
|
stacklevel=3,
|
797
886
|
)
|
798
|
-
on_new_job_id_callback(
|
887
|
+
on_new_job_id_callback(job_id)
|
799
888
|
|
800
889
|
if on_new_job_callback:
|
801
890
|
on_new_job_callback(job)
|
@@ -803,7 +892,7 @@ class DataflowHook(GoogleBaseHook):
|
|
803
892
|
jobs_controller = _DataflowJobsController(
|
804
893
|
dataflow=self.get_conn(),
|
805
894
|
project_number=project_id,
|
806
|
-
job_id=
|
895
|
+
job_id=job_id,
|
807
896
|
location=location,
|
808
897
|
poll_sleep=self.poll_sleep,
|
809
898
|
num_retries=self.num_retries,
|
@@ -814,6 +903,42 @@ class DataflowHook(GoogleBaseHook):
|
|
814
903
|
|
815
904
|
return jobs_controller.get_jobs(refresh=True)[0]
|
816
905
|
|
906
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
907
|
+
def launch_job_with_flex_template(
|
908
|
+
self,
|
909
|
+
body: dict,
|
910
|
+
location: str,
|
911
|
+
project_id: str,
|
912
|
+
) -> dict[str, str]:
|
913
|
+
"""
|
914
|
+
Launch a Dataflow Job with a Flex Template and exit without waiting for the job completion.
|
915
|
+
|
916
|
+
:param body: The request body. See:
|
917
|
+
https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.locations.flexTemplates/launch#request-body
|
918
|
+
:param location: The location of the Dataflow job (for example europe-west1)
|
919
|
+
:param project_id: The ID of the GCP project that owns the job.
|
920
|
+
If set to ``None`` or missing, the default project_id from the GCP connection is used.
|
921
|
+
:return: a Dataflow job response
|
922
|
+
"""
|
923
|
+
service: Resource = self.get_conn()
|
924
|
+
request = (
|
925
|
+
service.projects()
|
926
|
+
.locations()
|
927
|
+
.flexTemplates()
|
928
|
+
.launch(projectId=project_id, body=body, location=location)
|
929
|
+
)
|
930
|
+
response: dict = request.execute(num_retries=self.num_retries)
|
931
|
+
return response["job"]
|
932
|
+
|
933
|
+
@staticmethod
|
934
|
+
def extract_job_id(job: dict) -> str:
|
935
|
+
try:
|
936
|
+
return job["id"]
|
937
|
+
except KeyError:
|
938
|
+
raise AirflowException(
|
939
|
+
"While reading job object after template execution error occurred. Job object has no id."
|
940
|
+
)
|
941
|
+
|
817
942
|
@_fallback_to_location_from_variables
|
818
943
|
@_fallback_to_project_id_from_variables
|
819
944
|
@GoogleBaseHook.fallback_to_default_project_id
|
@@ -1353,3 +1478,92 @@ class AsyncDataflowHook(GoogleBaseAsyncHook):
|
|
1353
1478
|
)
|
1354
1479
|
page_result: ListJobsAsyncPager = await client.list_jobs(request=request)
|
1355
1480
|
return page_result
|
1481
|
+
|
1482
|
+
async def list_job_messages(
|
1483
|
+
self,
|
1484
|
+
job_id: str,
|
1485
|
+
project_id: str | None = PROVIDE_PROJECT_ID,
|
1486
|
+
minimum_importance: int = JobMessageImportance.JOB_MESSAGE_BASIC,
|
1487
|
+
page_size: int | None = None,
|
1488
|
+
page_token: str | None = None,
|
1489
|
+
start_time: Timestamp | None = None,
|
1490
|
+
end_time: Timestamp | None = None,
|
1491
|
+
location: str | None = DEFAULT_DATAFLOW_LOCATION,
|
1492
|
+
) -> ListJobMessagesAsyncPager:
|
1493
|
+
"""
|
1494
|
+
Return ListJobMessagesAsyncPager object from MessagesV1Beta3AsyncClient.
|
1495
|
+
|
1496
|
+
This method wraps around a similar method of MessagesV1Beta3AsyncClient. ListJobMessagesAsyncPager can be iterated
|
1497
|
+
over to extract messages associated with a specific Job ID.
|
1498
|
+
|
1499
|
+
For more details see the MessagesV1Beta3AsyncClient method description at:
|
1500
|
+
https://cloud.google.com/python/docs/reference/dataflow/latest/google.cloud.dataflow_v1beta3.services.messages_v1_beta3.MessagesV1Beta3AsyncClient
|
1501
|
+
|
1502
|
+
:param job_id: ID of the Dataflow job to get messages about.
|
1503
|
+
:param project_id: Optional. The Google Cloud project ID in which to start a job.
|
1504
|
+
If set to None or missing, the default project_id from the Google Cloud connection is used.
|
1505
|
+
:param minimum_importance: Optional. Filter to only get messages with importance >= level.
|
1506
|
+
For more details see the description at:
|
1507
|
+
https://cloud.google.com/python/docs/reference/dataflow/latest/google.cloud.dataflow_v1beta3.types.JobMessageImportance
|
1508
|
+
:param page_size: Optional. If specified, determines the maximum number of messages to return.
|
1509
|
+
If unspecified, the service may choose an appropriate default, or may return an arbitrarily large number of results.
|
1510
|
+
:param page_token: Optional. If supplied, this should be the value of next_page_token returned by an earlier call.
|
1511
|
+
This will cause the next page of results to be returned.
|
1512
|
+
:param start_time: Optional. If specified, return only messages with timestamps >= start_time.
|
1513
|
+
The default is the job creation time (i.e. beginning of messages).
|
1514
|
+
:param end_time: Optional. If specified, return only messages with timestamps < end_time. The default is the current time.
|
1515
|
+
:param location: Optional. The [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that contains
|
1516
|
+
the job specified by job_id.
|
1517
|
+
"""
|
1518
|
+
project_id = project_id or (await self.get_project_id())
|
1519
|
+
client = await self.initialize_client(MessagesV1Beta3AsyncClient)
|
1520
|
+
request = ListJobMessagesRequest(
|
1521
|
+
{
|
1522
|
+
"project_id": project_id,
|
1523
|
+
"job_id": job_id,
|
1524
|
+
"minimum_importance": minimum_importance,
|
1525
|
+
"page_size": page_size,
|
1526
|
+
"page_token": page_token,
|
1527
|
+
"start_time": start_time,
|
1528
|
+
"end_time": end_time,
|
1529
|
+
"location": location,
|
1530
|
+
}
|
1531
|
+
)
|
1532
|
+
page_results: ListJobMessagesAsyncPager = await client.list_job_messages(request=request)
|
1533
|
+
return page_results
|
1534
|
+
|
1535
|
+
async def get_job_metrics(
|
1536
|
+
self,
|
1537
|
+
job_id: str,
|
1538
|
+
project_id: str | None = PROVIDE_PROJECT_ID,
|
1539
|
+
start_time: Timestamp | None = None,
|
1540
|
+
location: str | None = DEFAULT_DATAFLOW_LOCATION,
|
1541
|
+
) -> JobMetrics:
|
1542
|
+
"""
|
1543
|
+
Return JobMetrics object from MetricsV1Beta3AsyncClient.
|
1544
|
+
|
1545
|
+
This method wraps around a similar method of MetricsV1Beta3AsyncClient.
|
1546
|
+
|
1547
|
+
For more details see the MetricsV1Beta3AsyncClient method description at:
|
1548
|
+
https://cloud.google.com/python/docs/reference/dataflow/latest/google.cloud.dataflow_v1beta3.services.metrics_v1_beta3.MetricsV1Beta3AsyncClient
|
1549
|
+
|
1550
|
+
:param job_id: ID of the Dataflow job to get metrics for.
|
1551
|
+
:param project_id: Optional. The Google Cloud project ID in which to start a job.
|
1552
|
+
If set to None or missing, the default project_id from the Google Cloud connection is used.
|
1553
|
+
:param start_time: Optional. Return only metric data that has changed since this time.
|
1554
|
+
Default is to return all information about all metrics for the job.
|
1555
|
+
:param location: Optional. The [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that contains
|
1556
|
+
the job specified by job_id.
|
1557
|
+
"""
|
1558
|
+
project_id = project_id or (await self.get_project_id())
|
1559
|
+
client: MetricsV1Beta3AsyncClient = await self.initialize_client(MetricsV1Beta3AsyncClient)
|
1560
|
+
request = GetJobMetricsRequest(
|
1561
|
+
{
|
1562
|
+
"project_id": project_id,
|
1563
|
+
"job_id": job_id,
|
1564
|
+
"start_time": start_time,
|
1565
|
+
"location": location,
|
1566
|
+
}
|
1567
|
+
)
|
1568
|
+
job_metrics: JobMetrics = await client.get_job_metrics(request=request)
|
1569
|
+
return job_metrics
|
@@ -36,7 +36,11 @@ from google.protobuf.field_mask_pb2 import FieldMask
|
|
36
36
|
|
37
37
|
from airflow.exceptions import AirflowException
|
38
38
|
from airflow.providers.google.common.consts import CLIENT_INFO
|
39
|
-
from airflow.providers.google.common.hooks.base_google import
|
39
|
+
from airflow.providers.google.common.hooks.base_google import (
|
40
|
+
PROVIDE_PROJECT_ID,
|
41
|
+
GoogleBaseAsyncHook,
|
42
|
+
GoogleBaseHook,
|
43
|
+
)
|
40
44
|
|
41
45
|
if TYPE_CHECKING:
|
42
46
|
from google.api_core.operation import Operation
|
@@ -665,7 +669,7 @@ class DataplexHook(GoogleBaseHook):
|
|
665
669
|
self,
|
666
670
|
data_scan_id: str,
|
667
671
|
job_id: str | None = None,
|
668
|
-
project_id: str
|
672
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
669
673
|
region: str | None = None,
|
670
674
|
wait_time: int = 10,
|
671
675
|
result_timeout: float | None = None,
|
@@ -162,7 +162,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
162
162
|
def create_deidentify_template(
|
163
163
|
self,
|
164
164
|
organization_id: str | None = None,
|
165
|
-
project_id: str
|
165
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
166
166
|
deidentify_template: dict | DeidentifyTemplate | None = None,
|
167
167
|
template_id: str | None = None,
|
168
168
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -287,7 +287,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
287
287
|
def create_inspect_template(
|
288
288
|
self,
|
289
289
|
organization_id: str | None = None,
|
290
|
-
project_id: str
|
290
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
291
291
|
inspect_template: InspectTemplate | None = None,
|
292
292
|
template_id: str | None = None,
|
293
293
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -376,7 +376,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
376
376
|
def create_stored_info_type(
|
377
377
|
self,
|
378
378
|
organization_id: str | None = None,
|
379
|
-
project_id: str
|
379
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
380
380
|
config: dict | StoredInfoTypeConfig | None = None,
|
381
381
|
stored_info_type_id: str | None = None,
|
382
382
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -565,7 +565,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
565
565
|
self,
|
566
566
|
template_id: str,
|
567
567
|
organization_id: str | None = None,
|
568
|
-
project_id: str
|
568
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
569
569
|
retry: Retry | _MethodDefault = DEFAULT,
|
570
570
|
timeout: float | None = None,
|
571
571
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -652,7 +652,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
652
652
|
self,
|
653
653
|
stored_info_type_id: str,
|
654
654
|
organization_id: str | None = None,
|
655
|
-
project_id: str
|
655
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
656
656
|
retry: Retry | _MethodDefault = DEFAULT,
|
657
657
|
timeout: float | None = None,
|
658
658
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -701,7 +701,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
701
701
|
self,
|
702
702
|
template_id: str,
|
703
703
|
organization_id: str | None = None,
|
704
|
-
project_id: str
|
704
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
705
705
|
retry: Retry | _MethodDefault = DEFAULT,
|
706
706
|
timeout: float | None = None,
|
707
707
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -788,7 +788,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
788
788
|
self,
|
789
789
|
template_id: str,
|
790
790
|
organization_id: str | None = None,
|
791
|
-
project_id: str
|
791
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
792
792
|
retry: Retry | _MethodDefault = DEFAULT,
|
793
793
|
timeout: float | None = None,
|
794
794
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -875,7 +875,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
875
875
|
self,
|
876
876
|
stored_info_type_id: str,
|
877
877
|
organization_id: str | None = None,
|
878
|
-
project_id: str
|
878
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
879
879
|
retry: Retry | _MethodDefault = DEFAULT,
|
880
880
|
timeout: float | None = None,
|
881
881
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -967,7 +967,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
967
967
|
def list_deidentify_templates(
|
968
968
|
self,
|
969
969
|
organization_id: str | None = None,
|
970
|
-
project_id: str
|
970
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
971
971
|
page_size: int | None = None,
|
972
972
|
order_by: str | None = None,
|
973
973
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -1103,7 +1103,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
1103
1103
|
def list_inspect_templates(
|
1104
1104
|
self,
|
1105
1105
|
organization_id: str | None = None,
|
1106
|
-
project_id: str
|
1106
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1107
1107
|
page_size: int | None = None,
|
1108
1108
|
order_by: str | None = None,
|
1109
1109
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -1201,7 +1201,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
1201
1201
|
def list_stored_info_types(
|
1202
1202
|
self,
|
1203
1203
|
organization_id: str | None = None,
|
1204
|
-
project_id: str
|
1204
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1205
1205
|
page_size: int | None = None,
|
1206
1206
|
order_by: str | None = None,
|
1207
1207
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -1356,7 +1356,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
1356
1356
|
self,
|
1357
1357
|
template_id: str,
|
1358
1358
|
organization_id: str | None = None,
|
1359
|
-
project_id: str
|
1359
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1360
1360
|
deidentify_template: dict | DeidentifyTemplate | None = None,
|
1361
1361
|
update_mask: dict | FieldMask | None = None,
|
1362
1362
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -1411,7 +1411,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
1411
1411
|
self,
|
1412
1412
|
template_id: str,
|
1413
1413
|
organization_id: str | None = None,
|
1414
|
-
project_id: str
|
1414
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1415
1415
|
inspect_template: dict | InspectTemplate | None = None,
|
1416
1416
|
update_mask: dict | FieldMask | None = None,
|
1417
1417
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -1513,7 +1513,7 @@ class CloudDLPHook(GoogleBaseHook):
|
|
1513
1513
|
self,
|
1514
1514
|
stored_info_type_id: str,
|
1515
1515
|
organization_id: str | None = None,
|
1516
|
-
project_id: str
|
1516
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1517
1517
|
config: dict | StoredInfoTypeConfig | None = None,
|
1518
1518
|
update_mask: dict | FieldMask | None = None,
|
1519
1519
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -45,7 +45,11 @@ from requests import Session
|
|
45
45
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
46
46
|
from airflow.providers.google.cloud.utils.helpers import normalize_directory_path
|
47
47
|
from airflow.providers.google.common.consts import CLIENT_INFO
|
48
|
-
from airflow.providers.google.common.hooks.base_google import
|
48
|
+
from airflow.providers.google.common.hooks.base_google import (
|
49
|
+
PROVIDE_PROJECT_ID,
|
50
|
+
GoogleBaseAsyncHook,
|
51
|
+
GoogleBaseHook,
|
52
|
+
)
|
49
53
|
from airflow.typing_compat import ParamSpec
|
50
54
|
from airflow.utils import timezone
|
51
55
|
from airflow.version import version
|
@@ -1013,7 +1017,7 @@ class GCSHook(GoogleBaseHook):
|
|
1013
1017
|
resource: dict | None = None,
|
1014
1018
|
storage_class: str = "MULTI_REGIONAL",
|
1015
1019
|
location: str = "US",
|
1016
|
-
project_id: str
|
1020
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1017
1021
|
labels: dict | None = None,
|
1018
1022
|
) -> str:
|
1019
1023
|
"""
|
@@ -22,7 +22,7 @@ from typing import Any, Sequence
|
|
22
22
|
from googleapiclient.discovery import Resource, build
|
23
23
|
|
24
24
|
from airflow.exceptions import AirflowException
|
25
|
-
from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
|
25
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
26
26
|
|
27
27
|
|
28
28
|
class GoogleDeploymentManagerHook(GoogleBaseHook):
|
@@ -56,7 +56,7 @@ class GoogleDeploymentManagerHook(GoogleBaseHook):
|
|
56
56
|
@GoogleBaseHook.fallback_to_default_project_id
|
57
57
|
def list_deployments(
|
58
58
|
self,
|
59
|
-
project_id: str
|
59
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
60
60
|
deployment_filter: str | None = None,
|
61
61
|
order_by: str | None = None,
|
62
62
|
) -> list[dict[str, Any]]:
|
@@ -156,7 +156,7 @@ class GKEHook(GoogleBaseHook):
|
|
156
156
|
def get_client(self) -> ClusterManagerClient:
|
157
157
|
return self.get_conn()
|
158
158
|
|
159
|
-
def wait_for_operation(self, operation: Operation, project_id: str
|
159
|
+
def wait_for_operation(self, operation: Operation, project_id: str = PROVIDE_PROJECT_ID) -> Operation:
|
160
160
|
"""Continuously fetch the status from Google Cloud.
|
161
161
|
|
162
162
|
This is done until the given operation completes, or raises an error.
|
@@ -176,7 +176,7 @@ class GKEHook(GoogleBaseHook):
|
|
176
176
|
operation = self.get_operation(operation.name, project_id=project_id or self.project_id)
|
177
177
|
return operation
|
178
178
|
|
179
|
-
def get_operation(self, operation_name: str, project_id: str
|
179
|
+
def get_operation(self, operation_name: str, project_id: str = PROVIDE_PROJECT_ID) -> Operation:
|
180
180
|
"""Get an operation from Google Cloud.
|
181
181
|
|
182
182
|
:param operation_name: Name of operation to fetch
|
@@ -31,7 +31,11 @@ from googleapiclient.discovery import Resource, build
|
|
31
31
|
from googleapiclient.errors import HttpError
|
32
32
|
|
33
33
|
from airflow.exceptions import AirflowException
|
34
|
-
from airflow.providers.google.common.hooks.base_google import
|
34
|
+
from airflow.providers.google.common.hooks.base_google import (
|
35
|
+
PROVIDE_PROJECT_ID,
|
36
|
+
GoogleBaseAsyncHook,
|
37
|
+
GoogleBaseHook,
|
38
|
+
)
|
35
39
|
from airflow.version import version as airflow_version
|
36
40
|
|
37
41
|
if TYPE_CHECKING:
|
@@ -550,7 +554,7 @@ class MLEngineAsyncHook(GoogleBaseAsyncHook):
|
|
550
554
|
def _check_fileds(
|
551
555
|
self,
|
552
556
|
job_id: str,
|
553
|
-
project_id: str
|
557
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
554
558
|
):
|
555
559
|
if not project_id:
|
556
560
|
raise AirflowException("Google Cloud project id is required.")
|
@@ -569,7 +573,7 @@ class MLEngineAsyncHook(GoogleBaseAsyncHook):
|
|
569
573
|
|
570
574
|
return job
|
571
575
|
|
572
|
-
async def get_job(self, job_id: str, session: Session, project_id: str
|
576
|
+
async def get_job(self, job_id: str, session: Session, project_id: str = PROVIDE_PROJECT_ID):
|
573
577
|
"""Get the specified job resource by job ID and project ID."""
|
574
578
|
self._check_fileds(project_id=project_id, job_id=job_id)
|
575
579
|
|
@@ -579,7 +583,7 @@ class MLEngineAsyncHook(GoogleBaseAsyncHook):
|
|
579
583
|
async def get_job_status(
|
580
584
|
self,
|
581
585
|
job_id: str,
|
582
|
-
project_id: str
|
586
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
583
587
|
) -> str | None:
|
584
588
|
"""
|
585
589
|
Poll for job status asynchronously using gcloud-aio.
|
@@ -590,7 +590,7 @@ class PubSubAsyncHook(GoogleBaseAsyncHook):
|
|
590
590
|
|
591
591
|
sync_hook_class = PubSubHook
|
592
592
|
|
593
|
-
def __init__(self, project_id: str
|
593
|
+
def __init__(self, project_id: str = PROVIDE_PROJECT_ID, **kwargs: Any):
|
594
594
|
super().__init__(**kwargs)
|
595
595
|
self.project_id = project_id
|
596
596
|
self._client: SubscriberAsyncClient | None = None
|