apache-airflow-providers-google 11.0.0rc1__py3-none-any.whl → 12.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. airflow/providers/google/__init__.py +3 -3
  2. airflow/providers/google/assets/gcs.py +1 -7
  3. airflow/providers/google/cloud/hooks/alloy_db.py +289 -0
  4. airflow/providers/google/cloud/hooks/cloud_batch.py +13 -5
  5. airflow/providers/google/cloud/hooks/dataproc.py +7 -3
  6. airflow/providers/google/cloud/hooks/dataproc_metastore.py +41 -22
  7. airflow/providers/google/cloud/hooks/kubernetes_engine.py +7 -38
  8. airflow/providers/google/cloud/hooks/translate.py +355 -0
  9. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +147 -0
  10. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +10 -0
  11. airflow/providers/google/cloud/links/alloy_db.py +55 -0
  12. airflow/providers/google/cloud/links/translate.py +98 -0
  13. airflow/providers/google/cloud/log/stackdriver_task_handler.py +1 -5
  14. airflow/providers/google/cloud/openlineage/mixins.py +4 -12
  15. airflow/providers/google/cloud/openlineage/utils.py +200 -22
  16. airflow/providers/google/cloud/operators/alloy_db.py +459 -0
  17. airflow/providers/google/cloud/operators/automl.py +55 -44
  18. airflow/providers/google/cloud/operators/bigquery.py +60 -15
  19. airflow/providers/google/cloud/operators/dataproc.py +12 -0
  20. airflow/providers/google/cloud/operators/gcs.py +5 -14
  21. airflow/providers/google/cloud/operators/kubernetes_engine.py +377 -705
  22. airflow/providers/google/cloud/operators/mlengine.py +41 -31
  23. airflow/providers/google/cloud/operators/translate.py +586 -1
  24. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +163 -0
  25. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +5 -0
  26. airflow/providers/google/cloud/sensors/dataproc.py +2 -2
  27. airflow/providers/google/cloud/sensors/vertex_ai/__init__.py +16 -0
  28. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +112 -0
  29. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +6 -11
  30. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +3 -0
  31. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +3 -0
  32. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -10
  33. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +3 -15
  34. airflow/providers/google/cloud/transfers/gcs_to_local.py +9 -0
  35. airflow/providers/google/cloud/transfers/local_to_gcs.py +41 -6
  36. airflow/providers/google/cloud/transfers/s3_to_gcs.py +15 -0
  37. airflow/providers/google/get_provider_info.py +30 -18
  38. airflow/providers/google/version_compat.py +36 -0
  39. {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0rc1.dist-info}/METADATA +16 -18
  40. {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0rc1.dist-info}/RECORD +42 -37
  41. airflow/providers/google/cloud/hooks/datapipeline.py +0 -71
  42. airflow/providers/google/cloud/openlineage/BigQueryErrorRunFacet.json +0 -30
  43. airflow/providers/google/cloud/operators/datapipeline.py +0 -63
  44. {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0rc1.dist-info}/WHEEL +0 -0
  45. {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0rc1.dist-info}/entry_points.txt +0 -0
@@ -20,7 +20,7 @@
20
20
  from __future__ import annotations
21
21
 
22
22
  from collections.abc import MutableMapping, MutableSequence, Sequence
23
- from typing import TYPE_CHECKING
23
+ from typing import TYPE_CHECKING, cast
24
24
 
25
25
  from google.api_core.exceptions import GoogleAPICallError
26
26
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
@@ -28,8 +28,11 @@ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
28
28
  from airflow.exceptions import AirflowException
29
29
  from airflow.providers.google.cloud.hooks.translate import CloudTranslateHook, TranslateHook
30
30
  from airflow.providers.google.cloud.links.translate import (
31
+ TranslateResultByOutputConfigLink,
31
32
  TranslateTextBatchLink,
32
33
  TranslationDatasetsListLink,
34
+ TranslationModelLink,
35
+ TranslationModelsListLink,
33
36
  TranslationNativeDatasetLink,
34
37
  )
35
38
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
@@ -38,7 +41,11 @@ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
38
41
  if TYPE_CHECKING:
39
42
  from google.api_core.retry import Retry
40
43
  from google.cloud.translate_v3.types import (
44
+ BatchDocumentInputConfig,
45
+ BatchDocumentOutputConfig,
41
46
  DatasetInputConfig,
47
+ DocumentInputConfig,
48
+ DocumentOutputConfig,
42
49
  InputConfig,
43
50
  OutputConfig,
44
51
  TranslateTextGlossaryConfig,
@@ -723,3 +730,581 @@ class TranslateDeleteDatasetOperator(GoogleCloudBaseOperator):
723
730
  )
724
731
  hook.wait_for_operation_done(operation=operation, timeout=self.timeout)
725
732
  self.log.info("Dataset deletion complete!")
733
+
734
+
735
+ class TranslateCreateModelOperator(GoogleCloudBaseOperator):
736
+ """
737
+ Creates a Google Cloud Translate model.
738
+
739
+ Creates a `native` translation model, using API V3.
740
+ For more information on how to use this operator, take a look at the guide:
741
+ :ref:`howto/operator:TranslateCreateModelOperator`.
742
+
743
+ :param dataset_id: The dataset id used for model training.
744
+ :param project_id: ID of the Google Cloud project where dataset is located.
745
+ If not provided default project_id is used.
746
+ :param location: The location of the project.
747
+ :param retry: Designation of what errors, if any, should be retried.
748
+ :param timeout: The timeout for this request.
749
+ :param metadata: Strings which should be sent along with the request as metadata.
750
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
751
+ :param impersonation_chain: Optional service account to impersonate using short-term
752
+ credentials, or chained list of accounts required to get the access_token
753
+ of the last account in the list, which will be impersonated in the request.
754
+ If set as a string, the account must grant the originating account
755
+ the Service Account Token Creator IAM role.
756
+ If set as a sequence, the identities from the list must grant
757
+ Service Account Token Creator IAM role to the directly preceding identity, with first
758
+ account from the list granting this role to the originating account (templated).
759
+ """
760
+
761
+ template_fields: Sequence[str] = (
762
+ "dataset_id",
763
+ "location",
764
+ "project_id",
765
+ "gcp_conn_id",
766
+ "impersonation_chain",
767
+ )
768
+
769
+ operator_extra_links = (TranslationModelLink(),)
770
+
771
+ def __init__(
772
+ self,
773
+ *,
774
+ project_id: str = PROVIDE_PROJECT_ID,
775
+ location: str,
776
+ dataset_id: str,
777
+ display_name: str,
778
+ timeout: float | None = None,
779
+ retry: Retry | _MethodDefault = DEFAULT,
780
+ gcp_conn_id: str = "google_cloud_default",
781
+ metadata: Sequence[tuple[str, str]] = (),
782
+ impersonation_chain: str | Sequence[str] | None = None,
783
+ **kwargs,
784
+ ) -> None:
785
+ super().__init__(**kwargs)
786
+ self.project_id = project_id
787
+ self.location = location
788
+ self.dataset_id = dataset_id
789
+ self.display_name = display_name
790
+ self.metadata = metadata
791
+ self.timeout = timeout
792
+ self.retry = retry
793
+ self.gcp_conn_id = gcp_conn_id
794
+ self.impersonation_chain = impersonation_chain
795
+
796
+ def execute(self, context: Context) -> str:
797
+ hook = TranslateHook(
798
+ gcp_conn_id=self.gcp_conn_id,
799
+ impersonation_chain=self.impersonation_chain,
800
+ )
801
+ self.log.info("Model creation started, dataset_id %s...", self.dataset_id)
802
+ try:
803
+ result_operation = hook.create_model(
804
+ dataset_id=self.dataset_id,
805
+ display_name=self.display_name,
806
+ location=self.location,
807
+ project_id=self.project_id,
808
+ retry=self.retry,
809
+ timeout=self.timeout,
810
+ metadata=self.metadata,
811
+ )
812
+ except GoogleAPICallError as e:
813
+ self.log.error("Error submitting create_model operation ")
814
+ raise AirflowException(e)
815
+
816
+ self.log.info("Training has started")
817
+ hook.wait_for_operation_done(operation=result_operation)
818
+ result = hook.wait_for_operation_result(operation=result_operation)
819
+ result = type(result).to_dict(result)
820
+ model_id = hook.extract_object_id(result)
821
+ self.xcom_push(context, key="model_id", value=model_id)
822
+ self.log.info("Model creation complete. The model_id: %s.", model_id)
823
+
824
+ project_id = self.project_id or hook.project_id
825
+ TranslationModelLink.persist(
826
+ context=context,
827
+ task_instance=self,
828
+ dataset_id=self.dataset_id,
829
+ model_id=model_id,
830
+ project_id=project_id,
831
+ )
832
+ return result
833
+
834
+
835
+ class TranslateModelsListOperator(GoogleCloudBaseOperator):
836
+ """
837
+ Get a list of native Google Cloud Translation models in a project.
838
+
839
+ Get project's list of `native` translation models, using API V3.
840
+ For more information on how to use this operator, take a look at the guide:
841
+ :ref:`howto/operator:TranslateModelsListOperator`.
842
+
843
+ :param project_id: ID of the Google Cloud project where dataset is located.
844
+ If not provided default project_id is used.
845
+ :param location: The location of the project.
846
+ :param retry: Designation of what errors, if any, should be retried.
847
+ :param timeout: The timeout for this request.
848
+ :param metadata: Strings which should be sent along with the request as metadata.
849
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
850
+ :param impersonation_chain: Optional service account to impersonate using short-term
851
+ credentials, or chained list of accounts required to get the access_token
852
+ of the last account in the list, which will be impersonated in the request.
853
+ If set as a string, the account must grant the originating account
854
+ the Service Account Token Creator IAM role.
855
+ If set as a sequence, the identities from the list must grant
856
+ Service Account Token Creator IAM role to the directly preceding identity, with first
857
+ account from the list granting this role to the originating account (templated).
858
+ """
859
+
860
+ template_fields: Sequence[str] = (
861
+ "location",
862
+ "project_id",
863
+ "gcp_conn_id",
864
+ "impersonation_chain",
865
+ )
866
+
867
+ operator_extra_links = (TranslationModelsListLink(),)
868
+
869
+ def __init__(
870
+ self,
871
+ *,
872
+ project_id: str = PROVIDE_PROJECT_ID,
873
+ location: str,
874
+ metadata: Sequence[tuple[str, str]] = (),
875
+ timeout: float | _MethodDefault = DEFAULT,
876
+ retry: Retry | _MethodDefault = DEFAULT,
877
+ gcp_conn_id: str = "google_cloud_default",
878
+ impersonation_chain: str | Sequence[str] | None = None,
879
+ **kwargs,
880
+ ) -> None:
881
+ super().__init__(**kwargs)
882
+ self.project_id = project_id
883
+ self.location = location
884
+ self.metadata = metadata
885
+ self.timeout = timeout
886
+ self.retry = retry
887
+ self.gcp_conn_id = gcp_conn_id
888
+ self.impersonation_chain = impersonation_chain
889
+
890
+ def execute(self, context: Context):
891
+ hook = TranslateHook(
892
+ gcp_conn_id=self.gcp_conn_id,
893
+ impersonation_chain=self.impersonation_chain,
894
+ )
895
+ project_id = self.project_id or hook.project_id
896
+ TranslationModelsListLink.persist(
897
+ context=context,
898
+ task_instance=self,
899
+ project_id=project_id,
900
+ )
901
+ self.log.info("Requesting models list")
902
+ results_pager = hook.list_models(
903
+ location=self.location,
904
+ project_id=self.project_id,
905
+ retry=self.retry,
906
+ timeout=self.timeout,
907
+ metadata=self.metadata,
908
+ )
909
+ result_ids = []
910
+ for model_item in results_pager:
911
+ model_data = type(model_item).to_dict(model_item)
912
+ model_id = hook.extract_object_id(model_data)
913
+ result_ids.append(model_id)
914
+ self.log.info("Fetching the models list complete. Model id-s: %s", result_ids)
915
+ return result_ids
916
+
917
+
918
+ class TranslateDeleteModelOperator(GoogleCloudBaseOperator):
919
+ """
920
+ Delete translation model and all of its contents.
921
+
922
+ Deletes the translation model and it's data, using API V3.
923
+ For more information on how to use this operator, take a look at the guide:
924
+ :ref:`howto/operator:TranslateDeleteModelOperator`.
925
+
926
+ :param model_id: The model_id of target native model to be deleted.
927
+ :param location: The location of the project.
928
+ :param retry: Designation of what errors, if any, should be retried.
929
+ :param timeout: The timeout for this request.
930
+ :param metadata: Strings which should be sent along with the request as metadata.
931
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
932
+ :param impersonation_chain: Optional service account to impersonate using short-term
933
+ credentials, or chained list of accounts required to get the access_token
934
+ of the last account in the list, which will be impersonated in the request.
935
+ If set as a string, the account must grant the originating account
936
+ the Service Account Token Creator IAM role.
937
+ If set as a sequence, the identities from the list must grant
938
+ Service Account Token Creator IAM role to the directly preceding identity, with first
939
+ account from the list granting this role to the originating account (templated).
940
+ """
941
+
942
+ template_fields: Sequence[str] = (
943
+ "model_id",
944
+ "location",
945
+ "project_id",
946
+ "gcp_conn_id",
947
+ "impersonation_chain",
948
+ )
949
+
950
+ def __init__(
951
+ self,
952
+ *,
953
+ model_id: str,
954
+ location: str,
955
+ project_id: str = PROVIDE_PROJECT_ID,
956
+ metadata: Sequence[tuple[str, str]] = (),
957
+ timeout: float | None = None,
958
+ retry: Retry | _MethodDefault = DEFAULT,
959
+ gcp_conn_id: str = "google_cloud_default",
960
+ impersonation_chain: str | Sequence[str] | None = None,
961
+ **kwargs,
962
+ ) -> None:
963
+ super().__init__(**kwargs)
964
+ self.model_id = model_id
965
+ self.project_id = project_id
966
+ self.location = location
967
+ self.metadata = metadata
968
+ self.timeout = timeout
969
+ self.retry = retry
970
+ self.gcp_conn_id = gcp_conn_id
971
+ self.impersonation_chain = impersonation_chain
972
+
973
+ def execute(self, context: Context):
974
+ hook = TranslateHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
975
+ self.log.info("Deleting the model %s...", self.model_id)
976
+ operation = hook.delete_model(
977
+ model_id=self.model_id,
978
+ location=self.location,
979
+ project_id=self.project_id,
980
+ retry=self.retry,
981
+ timeout=self.timeout,
982
+ metadata=self.metadata,
983
+ )
984
+ hook.wait_for_operation_done(operation=operation, timeout=self.timeout)
985
+ self.log.info("Model deletion complete!")
986
+
987
+
988
+ class TranslateDocumentOperator(GoogleCloudBaseOperator):
989
+ """
990
+ Translate document provided.
991
+
992
+ Wraps the Google cloud Translate Text (Advanced) functionality.
993
+ Supports wide range of input/output file types, please visit the
994
+ https://cloud.google.com/translate/docs/advanced/translate-documents for more details.
995
+
996
+ For more information on how to use this operator, take a look at the guide:
997
+ :ref:`howto/operator:TranslateDocumentOperator`.
998
+
999
+ :param project_id: Optional. The ID of the Google Cloud project that the
1000
+ service belongs to. If not specified the hook project_id will be used.
1001
+ :param source_language_code: Optional. The ISO-639 language code of the
1002
+ input document text if known. If the source language isn't specified,
1003
+ the API attempts to identify the source language automatically and returns
1004
+ the source language within the response.
1005
+ :param target_language_code: Required. The ISO-639 language code to use
1006
+ for translation of the input document text.
1007
+ :param location: Optional. Project or location to make a call. Must refer to a caller's project.
1008
+ If not specified, 'global' is used.
1009
+ Non-global location is required for requests using AutoML models or custom glossaries.
1010
+ Models and glossaries must be within the same region (have the same location-id).
1011
+ :param document_input_config: A document translation request input config.
1012
+ :param document_output_config: Optional. A document translation request output config.
1013
+ If not provided the translated file will only be returned through a byte-stream
1014
+ and its output mime type will be the same as the input file's mime type.
1015
+ :param customized_attribution: Optional. This flag is to support user customized
1016
+ attribution. If not provided, the default is ``Machine Translated by Google``.
1017
+ Customized attribution should follow rules in
1018
+ https://cloud.google.com/translate/attribution#attribution_and_logos
1019
+ :param is_translate_native_pdf_only: Optional. Param for external customers.
1020
+ If true, the page limit of online native PDF translation is 300 and only native PDF pages
1021
+ will be translated.
1022
+ :param enable_shadow_removal_native_pdf: Optional. If true, use the text removal server to remove the
1023
+ shadow text on background image for native PDF translation.
1024
+ Shadow removal feature can only be enabled when both ``is_translate_native_pdf_only``,
1025
+ ``pdf_native_only`` are False.
1026
+ :param enable_rotation_correction: Optional. If true, enable auto rotation
1027
+ correction in DVS.
1028
+ :param model: Optional. The ``model`` type requested for this translation.
1029
+ If not provided, the default Google model (NMT) will be used.
1030
+ The format depends on model type:
1031
+
1032
+ - AutoML Translation models:
1033
+ ``projects/{project-number-or-id}/locations/{location-id}/models/{model-id}``
1034
+ - General (built-in) models:
1035
+ ``projects/{project-number-or-id}/locations/{location-id}/models/general/nmt``
1036
+
1037
+ If not provided, the default Google model (NMT) will be used
1038
+ for translation.
1039
+ :param glossary_config: Optional. Glossary to be applied.
1040
+ :param transliteration_config: Optional. Transliteration to be applied.
1041
+ :param retry: Designation of what errors, if any, should be retried.
1042
+ :param timeout: The timeout for this request.
1043
+ :param metadata: Strings which should be sent along with the request as metadata.
1044
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
1045
+ :param impersonation_chain: Optional service account to impersonate using short-term
1046
+ credentials, or chained list of accounts required to get the access_token
1047
+ of the last account in the list, which will be impersonated in the request.
1048
+ If set as a string, the account must grant the originating account
1049
+ the Service Account Token Creator IAM role.
1050
+ If set as a sequence, the identities from the list must grant
1051
+ Service Account Token Creator IAM role to the directly preceding identity, with first
1052
+ account from the list granting this role to the originating account (templated).
1053
+ """
1054
+
1055
+ operator_extra_links = (TranslateResultByOutputConfigLink(),)
1056
+
1057
+ template_fields: Sequence[str] = (
1058
+ "source_language_code",
1059
+ "target_language_code",
1060
+ "document_input_config",
1061
+ "document_output_config",
1062
+ "model",
1063
+ "gcp_conn_id",
1064
+ "impersonation_chain",
1065
+ )
1066
+
1067
+ def __init__(
1068
+ self,
1069
+ *,
1070
+ location: str | None = None,
1071
+ project_id: str = PROVIDE_PROJECT_ID,
1072
+ source_language_code: str | None = None,
1073
+ target_language_code: str,
1074
+ document_input_config: DocumentInputConfig | dict,
1075
+ document_output_config: DocumentOutputConfig | dict | None,
1076
+ customized_attribution: str | None = None,
1077
+ is_translate_native_pdf_only: bool = False,
1078
+ enable_shadow_removal_native_pdf: bool = False,
1079
+ enable_rotation_correction: bool = False,
1080
+ model: str | None = None,
1081
+ glossary_config: TranslateTextGlossaryConfig | None = None,
1082
+ labels: str | None = None,
1083
+ timeout: float | _MethodDefault = DEFAULT,
1084
+ retry: Retry | _MethodDefault | None = DEFAULT,
1085
+ metadata: Sequence[tuple[str, str]] = (),
1086
+ gcp_conn_id: str = "google_cloud_default",
1087
+ impersonation_chain: str | Sequence[str] | None = None,
1088
+ **kwargs,
1089
+ ) -> None:
1090
+ super().__init__(**kwargs)
1091
+ self.project_id = project_id
1092
+ self.source_language_code = source_language_code
1093
+ self.target_language_code = target_language_code
1094
+ self.document_input_config = document_input_config
1095
+ self.document_output_config = document_output_config
1096
+ self.customized_attribution = customized_attribution
1097
+ self.is_translate_native_pdf_only = is_translate_native_pdf_only
1098
+ self.enable_shadow_removal_native_pdf = enable_shadow_removal_native_pdf
1099
+ self.enable_rotation_correction = enable_rotation_correction
1100
+ self.location = location
1101
+ self.labels = labels
1102
+ self.model = model
1103
+ self.glossary_config = glossary_config
1104
+ self.metadate = metadata
1105
+ self.timeout = timeout
1106
+ self.retry = retry
1107
+ self.gcp_conn_id = gcp_conn_id
1108
+ self.impersonation_chain = impersonation_chain
1109
+
1110
+ def execute(self, context: Context) -> dict:
1111
+ hook = TranslateHook(
1112
+ gcp_conn_id=self.gcp_conn_id,
1113
+ impersonation_chain=self.impersonation_chain,
1114
+ )
1115
+ try:
1116
+ self.log.info("Starting the document translation")
1117
+ doc_translation_result = hook.translate_document(
1118
+ source_language_code=self.source_language_code,
1119
+ target_language_code=self.target_language_code,
1120
+ document_input_config=self.document_input_config,
1121
+ document_output_config=self.document_output_config,
1122
+ customized_attribution=self.customized_attribution,
1123
+ is_translate_native_pdf_only=self.is_translate_native_pdf_only,
1124
+ enable_shadow_removal_native_pdf=self.enable_shadow_removal_native_pdf,
1125
+ enable_rotation_correction=self.enable_rotation_correction,
1126
+ location=self.location,
1127
+ labels=self.labels,
1128
+ model=self.model,
1129
+ glossary_config=self.glossary_config,
1130
+ timeout=self.timeout,
1131
+ retry=self.retry,
1132
+ metadata=self.metadate,
1133
+ )
1134
+ self.log.info("Document translation completed")
1135
+ except GoogleAPICallError as e:
1136
+ self.log.error("An error occurred executing translate_document method: \n%s", e)
1137
+ raise AirflowException(e)
1138
+ if self.document_output_config:
1139
+ TranslateResultByOutputConfigLink.persist(
1140
+ context=context,
1141
+ task_instance=self,
1142
+ project_id=self.project_id or hook.project_id,
1143
+ output_config=self.document_output_config,
1144
+ )
1145
+ return cast(dict, type(doc_translation_result).to_dict(doc_translation_result))
1146
+
1147
+
1148
+ class TranslateDocumentBatchOperator(GoogleCloudBaseOperator):
1149
+ """
1150
+ Translate documents provided via input and output configurations.
1151
+
1152
+ Up to 10 target languages per operation supported.
1153
+ Wraps the Google cloud Translate Text (Advanced) functionality.
1154
+ See https://cloud.google.com/translate/docs/advanced/batch-translation.
1155
+
1156
+ For more information on how to use this operator, take a look at the guide:
1157
+ :ref:`howto/operator:TranslateDocumentBatchOperator`.
1158
+
1159
+ :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
1160
+ :param source_language_code: Optional. The ISO-639 language code of the
1161
+ input text if known. If the source language isn't specified, the API attempts to identify
1162
+ the source language automatically and returns the source language within the response.
1163
+ :param target_language_codes: Required. The ISO-639 language code to use
1164
+ for translation of the input document. Specify up to 10 language codes here.
1165
+ :param location: Optional. Project or location to make a call. Must refer to
1166
+ a caller's project. If not specified, 'global' is used.
1167
+ Non-global location is required for requests using AutoML models or custom glossaries.
1168
+ Models and glossaries must be within the same region (have the same location-id).
1169
+ :param input_configs: Input configurations. The total number of files matched should be <=
1170
+ 100. The total content size to translate should be <= 100M Unicode codepoints.
1171
+ The files must use UTF-8 encoding.
1172
+ :param output_config: Output configuration. If 2 input configs match to the same file (that
1173
+ is, same input path), no output for duplicate inputs will be generated.
1174
+ :param format_conversions: Optional. The file format conversion map that is applied to
1175
+ all input files. The map key is the original mime_type.
1176
+ The map value is the target mime_type of translated documents.
1177
+ Supported file format conversion includes:
1178
+
1179
+ - ``application/pdf`` to
1180
+ ``application/vnd.openxmlformats-officedocument.wordprocessingml.document``
1181
+
1182
+ If nothing specified, output files will be in the same format as the original file.
1183
+ :param customized_attribution: Optional. This flag is to support user customized
1184
+ attribution. If not provided, the default is ``Machine Translated by Google``.
1185
+ Customized attribution should follow rules in
1186
+ https://cloud.google.com/translate/attribution#attribution_and_logos
1187
+ :param enable_shadow_removal_native_pdf: Optional. If true, use the text removal server to remove the
1188
+ shadow text on background image for native PDF translation.
1189
+ Shadow removal feature can only be enabled when both ``is_translate_native_pdf_only``,
1190
+ ``pdf_native_only`` are False.
1191
+ :param enable_rotation_correction: Optional. If true, enable auto rotation
1192
+ correction in DVS.
1193
+ :param models: Optional. The models to use for translation. Map's key is
1194
+ target language code. Map's value is the model name. Value
1195
+ can be a built-in general model, or an AutoML Translation model.
1196
+ The value format depends on model type:
1197
+
1198
+ - AutoML Translation models:
1199
+ ``projects/{project-number-or-id}/locations/{location-id}/models/{model-id}``
1200
+
1201
+ - General (built-in) models:
1202
+ ``projects/{project-number-or-id}/locations/{location-id}/models/general/nmt``,
1203
+
1204
+ If the map is empty or a specific model is not requested for
1205
+ a language pair, then default google model (NMT) is used.
1206
+ :param glossaries: Glossaries to be applied. It's keyed by target language code.
1207
+ :param retry: Designation of what errors, if any, should be retried.
1208
+ :param timeout: The timeout for this request.
1209
+ :param metadata: Strings which should be sent along with the request as metadata.
1210
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
1211
+ :param impersonation_chain: Optional service account to impersonate using short-term
1212
+ credentials, or chained list of accounts required to get the access_token
1213
+ of the last account in the list, which will be impersonated in the request.
1214
+ If set as a string, the account must grant the originating account
1215
+ the Service Account Token Creator IAM role.
1216
+ If set as a sequence, the identities from the list must grant
1217
+ Service Account Token Creator IAM role to the directly preceding identity, with first
1218
+ account from the list granting this role to the originating account (templated).
1219
+ """
1220
+
1221
+ operator_extra_links = (TranslateResultByOutputConfigLink(),)
1222
+
1223
+ template_fields: Sequence[str] = (
1224
+ "input_configs",
1225
+ "output_config",
1226
+ "target_language_codes",
1227
+ "source_language_code",
1228
+ "models",
1229
+ "glossaries",
1230
+ "gcp_conn_id",
1231
+ "impersonation_chain",
1232
+ )
1233
+
1234
+ def __init__(
1235
+ self,
1236
+ *,
1237
+ project_id: str = PROVIDE_PROJECT_ID,
1238
+ source_language_code: str,
1239
+ target_language_codes: MutableSequence[str] | None = None,
1240
+ location: str | None = None,
1241
+ input_configs: MutableSequence[BatchDocumentInputConfig | dict],
1242
+ output_config: BatchDocumentOutputConfig | dict,
1243
+ customized_attribution: str | None = None,
1244
+ format_conversions: MutableMapping[str, str] | None = None,
1245
+ enable_shadow_removal_native_pdf: bool = False,
1246
+ enable_rotation_correction: bool = False,
1247
+ models: MutableMapping[str, str] | None = None,
1248
+ glossaries: MutableMapping[str, TranslateTextGlossaryConfig] | None = None,
1249
+ metadata: Sequence[tuple[str, str]] = (),
1250
+ timeout: float | _MethodDefault = DEFAULT,
1251
+ retry: Retry | _MethodDefault | None = DEFAULT,
1252
+ gcp_conn_id: str = "google_cloud_default",
1253
+ impersonation_chain: str | Sequence[str] | None = None,
1254
+ **kwargs,
1255
+ ) -> None:
1256
+ super().__init__(**kwargs)
1257
+ self.project_id = project_id
1258
+ self.location = location
1259
+ self.target_language_codes = target_language_codes
1260
+ self.source_language_code = source_language_code
1261
+ self.input_configs = input_configs
1262
+ self.output_config = output_config
1263
+ self.customized_attribution = customized_attribution
1264
+ self.format_conversions = format_conversions
1265
+ self.enable_shadow_removal_native_pdf = enable_shadow_removal_native_pdf
1266
+ self.enable_rotation_correction = enable_rotation_correction
1267
+ self.models = models
1268
+ self.glossaries = glossaries
1269
+ self.metadata = metadata
1270
+ self.timeout = timeout
1271
+ self.retry = retry
1272
+ self.gcp_conn_id = gcp_conn_id
1273
+ self.impersonation_chain = impersonation_chain
1274
+
1275
+ def execute(self, context: Context) -> dict:
1276
+ hook = TranslateHook(
1277
+ gcp_conn_id=self.gcp_conn_id,
1278
+ impersonation_chain=self.impersonation_chain,
1279
+ )
1280
+ try:
1281
+ batch_document_translate_operation = hook.batch_translate_document(
1282
+ project_id=self.project_id,
1283
+ location=self.location,
1284
+ target_language_codes=self.target_language_codes,
1285
+ source_language_code=self.source_language_code,
1286
+ input_configs=self.input_configs,
1287
+ output_config=self.output_config,
1288
+ customized_attribution=self.customized_attribution,
1289
+ format_conversions=self.format_conversions,
1290
+ enable_shadow_removal_native_pdf=self.enable_shadow_removal_native_pdf,
1291
+ enable_rotation_correction=self.enable_rotation_correction,
1292
+ models=self.models,
1293
+ glossaries=self.glossaries,
1294
+ metadata=self.metadata,
1295
+ timeout=self.timeout,
1296
+ retry=self.retry,
1297
+ )
1298
+ except GoogleAPICallError as e:
1299
+ self.log.error("An error occurred executing batch_translate_document method: \n%s", e)
1300
+ raise AirflowException(e)
1301
+ self.log.info("Batch document translation job started.")
1302
+ TranslateResultByOutputConfigLink.persist(
1303
+ context=context,
1304
+ task_instance=self,
1305
+ project_id=self.project_id or hook.project_id,
1306
+ output_config=self.output_config,
1307
+ )
1308
+ result = hook.wait_for_operation_result(batch_document_translate_operation)
1309
+ self.log.info("Batch document translation job finished")
1310
+ return cast(dict, type(result).to_dict(result))