apache-airflow-providers-google 10.26.0__py3-none-any.whl → 11.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +2 -1
  3. airflow/providers/google/ads/operators/ads.py +2 -1
  4. airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -1
  5. airflow/providers/google/assets/gcs.py +17 -1
  6. airflow/providers/google/cloud/hooks/automl.py +3 -6
  7. airflow/providers/google/cloud/hooks/bigquery.py +41 -1486
  8. airflow/providers/google/cloud/hooks/bigquery_dts.py +4 -11
  9. airflow/providers/google/cloud/hooks/bigtable.py +3 -6
  10. airflow/providers/google/cloud/hooks/cloud_batch.py +6 -3
  11. airflow/providers/google/cloud/hooks/cloud_build.py +3 -15
  12. airflow/providers/google/cloud/hooks/cloud_composer.py +2 -17
  13. airflow/providers/google/cloud/hooks/cloud_memorystore.py +5 -6
  14. airflow/providers/google/cloud/hooks/cloud_run.py +10 -5
  15. airflow/providers/google/cloud/hooks/cloud_sql.py +5 -7
  16. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +3 -7
  17. airflow/providers/google/cloud/hooks/compute.py +3 -6
  18. airflow/providers/google/cloud/hooks/compute_ssh.py +0 -5
  19. airflow/providers/google/cloud/hooks/datacatalog.py +3 -6
  20. airflow/providers/google/cloud/hooks/dataflow.py +3 -14
  21. airflow/providers/google/cloud/hooks/dataform.py +2 -9
  22. airflow/providers/google/cloud/hooks/datafusion.py +4 -15
  23. airflow/providers/google/cloud/hooks/dataplex.py +4 -7
  24. airflow/providers/google/cloud/hooks/dataprep.py +2 -2
  25. airflow/providers/google/cloud/hooks/dataproc.py +77 -22
  26. airflow/providers/google/cloud/hooks/dataproc_metastore.py +2 -9
  27. airflow/providers/google/cloud/hooks/datastore.py +3 -6
  28. airflow/providers/google/cloud/hooks/dlp.py +3 -6
  29. airflow/providers/google/cloud/hooks/functions.py +2 -6
  30. airflow/providers/google/cloud/hooks/gcs.py +2 -18
  31. airflow/providers/google/cloud/hooks/gdm.py +1 -17
  32. airflow/providers/google/cloud/hooks/kms.py +3 -6
  33. airflow/providers/google/cloud/hooks/kubernetes_engine.py +7 -97
  34. airflow/providers/google/cloud/hooks/life_sciences.py +2 -6
  35. airflow/providers/google/cloud/hooks/looker.py +2 -1
  36. airflow/providers/google/cloud/hooks/mlengine.py +0 -8
  37. airflow/providers/google/cloud/hooks/natural_language.py +3 -6
  38. airflow/providers/google/cloud/hooks/os_login.py +3 -6
  39. airflow/providers/google/cloud/hooks/pubsub.py +3 -6
  40. airflow/providers/google/cloud/hooks/secret_manager.py +3 -73
  41. airflow/providers/google/cloud/hooks/spanner.py +3 -6
  42. airflow/providers/google/cloud/hooks/speech_to_text.py +3 -6
  43. airflow/providers/google/cloud/hooks/stackdriver.py +3 -6
  44. airflow/providers/google/cloud/hooks/tasks.py +3 -6
  45. airflow/providers/google/cloud/hooks/text_to_speech.py +3 -6
  46. airflow/providers/google/cloud/hooks/translate.py +455 -9
  47. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +3 -6
  48. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -6
  49. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +3 -6
  50. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +2 -9
  51. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -9
  52. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -14
  53. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -6
  54. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -9
  55. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -1
  56. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -1
  57. airflow/providers/google/cloud/hooks/video_intelligence.py +3 -6
  58. airflow/providers/google/cloud/hooks/vision.py +3 -6
  59. airflow/providers/google/cloud/hooks/workflows.py +2 -9
  60. airflow/providers/google/cloud/links/dataproc.py +0 -1
  61. airflow/providers/google/cloud/links/translate.py +91 -0
  62. airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
  63. airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -3
  64. airflow/providers/google/cloud/openlineage/utils.py +54 -21
  65. airflow/providers/google/cloud/operators/automl.py +5 -4
  66. airflow/providers/google/cloud/operators/bigquery.py +2 -341
  67. airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
  68. airflow/providers/google/cloud/operators/bigtable.py +2 -1
  69. airflow/providers/google/cloud/operators/cloud_batch.py +2 -1
  70. airflow/providers/google/cloud/operators/cloud_build.py +2 -1
  71. airflow/providers/google/cloud/operators/cloud_composer.py +2 -1
  72. airflow/providers/google/cloud/operators/cloud_memorystore.py +2 -1
  73. airflow/providers/google/cloud/operators/cloud_run.py +2 -1
  74. airflow/providers/google/cloud/operators/cloud_sql.py +2 -1
  75. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +2 -1
  76. airflow/providers/google/cloud/operators/compute.py +2 -1
  77. airflow/providers/google/cloud/operators/datacatalog.py +2 -1
  78. airflow/providers/google/cloud/operators/dataflow.py +2 -517
  79. airflow/providers/google/cloud/operators/dataform.py +2 -1
  80. airflow/providers/google/cloud/operators/datafusion.py +2 -1
  81. airflow/providers/google/cloud/operators/dataplex.py +37 -31
  82. airflow/providers/google/cloud/operators/dataprep.py +2 -1
  83. airflow/providers/google/cloud/operators/dataproc.py +3 -633
  84. airflow/providers/google/cloud/operators/dataproc_metastore.py +2 -1
  85. airflow/providers/google/cloud/operators/datastore.py +2 -1
  86. airflow/providers/google/cloud/operators/dlp.py +2 -1
  87. airflow/providers/google/cloud/operators/functions.py +2 -1
  88. airflow/providers/google/cloud/operators/gcs.py +5 -4
  89. airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -11
  90. airflow/providers/google/cloud/operators/life_sciences.py +2 -1
  91. airflow/providers/google/cloud/operators/mlengine.py +2 -1
  92. airflow/providers/google/cloud/operators/natural_language.py +3 -2
  93. airflow/providers/google/cloud/operators/pubsub.py +2 -1
  94. airflow/providers/google/cloud/operators/spanner.py +2 -1
  95. airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
  96. airflow/providers/google/cloud/operators/stackdriver.py +2 -1
  97. airflow/providers/google/cloud/operators/tasks.py +3 -2
  98. airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
  99. airflow/providers/google/cloud/operators/translate.py +622 -32
  100. airflow/providers/google/cloud/operators/translate_speech.py +2 -1
  101. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +2 -93
  102. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +3 -13
  103. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +2 -17
  104. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +2 -1
  105. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +2 -1
  106. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +2 -1
  107. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +3 -13
  108. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +2 -1
  109. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +2 -1
  110. airflow/providers/google/cloud/operators/video_intelligence.py +2 -1
  111. airflow/providers/google/cloud/operators/vision.py +3 -2
  112. airflow/providers/google/cloud/operators/workflows.py +3 -2
  113. airflow/providers/google/cloud/secrets/secret_manager.py +2 -19
  114. airflow/providers/google/cloud/sensors/bigquery.py +2 -81
  115. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
  116. airflow/providers/google/cloud/sensors/bigtable.py +2 -1
  117. airflow/providers/google/cloud/sensors/cloud_composer.py +8 -94
  118. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
  119. airflow/providers/google/cloud/sensors/dataflow.py +2 -1
  120. airflow/providers/google/cloud/sensors/dataform.py +2 -1
  121. airflow/providers/google/cloud/sensors/datafusion.py +2 -1
  122. airflow/providers/google/cloud/sensors/dataplex.py +2 -1
  123. airflow/providers/google/cloud/sensors/dataprep.py +2 -1
  124. airflow/providers/google/cloud/sensors/dataproc.py +2 -1
  125. airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -1
  126. airflow/providers/google/cloud/sensors/gcs.py +4 -36
  127. airflow/providers/google/cloud/sensors/pubsub.py +2 -1
  128. airflow/providers/google/cloud/sensors/tasks.py +2 -1
  129. airflow/providers/google/cloud/sensors/workflows.py +2 -1
  130. airflow/providers/google/cloud/transfers/adls_to_gcs.py +2 -1
  131. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -1
  132. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -1
  133. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +75 -18
  134. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +9 -7
  135. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +2 -1
  136. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +1 -1
  137. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +2 -1
  138. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +2 -1
  139. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -1
  140. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +2 -1
  141. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +13 -9
  142. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -1
  143. airflow/providers/google/cloud/transfers/gcs_to_local.py +2 -1
  144. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +2 -1
  145. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +2 -1
  146. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -1
  147. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -1
  148. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  149. airflow/providers/google/cloud/transfers/s3_to_gcs.py +2 -1
  150. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -1
  151. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +2 -1
  152. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +2 -1
  153. airflow/providers/google/cloud/transfers/sql_to_gcs.py +2 -1
  154. airflow/providers/google/cloud/triggers/bigquery.py +2 -1
  155. airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
  156. airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
  157. airflow/providers/google/cloud/triggers/cloud_build.py +2 -1
  158. airflow/providers/google/cloud/triggers/cloud_composer.py +3 -2
  159. airflow/providers/google/cloud/triggers/cloud_run.py +2 -1
  160. airflow/providers/google/cloud/triggers/cloud_sql.py +1 -1
  161. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
  162. airflow/providers/google/cloud/triggers/dataflow.py +2 -1
  163. airflow/providers/google/cloud/triggers/datafusion.py +2 -1
  164. airflow/providers/google/cloud/triggers/dataplex.py +1 -1
  165. airflow/providers/google/cloud/triggers/dataproc.py +2 -1
  166. airflow/providers/google/cloud/triggers/gcs.py +3 -2
  167. airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -1
  168. airflow/providers/google/cloud/triggers/mlengine.py +2 -1
  169. airflow/providers/google/cloud/triggers/pubsub.py +2 -1
  170. airflow/providers/google/cloud/triggers/vertex_ai.py +2 -1
  171. airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
  172. airflow/providers/google/cloud/utils/dataform.py +1 -1
  173. airflow/providers/google/cloud/utils/field_validator.py +2 -1
  174. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +2 -1
  175. airflow/providers/google/common/hooks/base_google.py +4 -11
  176. airflow/providers/google/common/hooks/discovery_api.py +1 -6
  177. airflow/providers/google/firebase/hooks/firestore.py +1 -1
  178. airflow/providers/google/firebase/operators/firestore.py +2 -1
  179. airflow/providers/google/get_provider_info.py +7 -22
  180. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -1
  181. airflow/providers/google/marketing_platform/hooks/campaign_manager.py +2 -3
  182. airflow/providers/google/marketing_platform/hooks/display_video.py +4 -3
  183. airflow/providers/google/marketing_platform/hooks/search_ads.py +6 -6
  184. airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -1
  185. airflow/providers/google/marketing_platform/operators/campaign_manager.py +2 -42
  186. airflow/providers/google/marketing_platform/operators/display_video.py +2 -47
  187. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -1
  188. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -7
  189. airflow/providers/google/marketing_platform/sensors/display_video.py +2 -13
  190. airflow/providers/google/suite/hooks/calendar.py +2 -8
  191. airflow/providers/google/suite/hooks/drive.py +2 -6
  192. airflow/providers/google/suite/hooks/sheets.py +2 -7
  193. airflow/providers/google/suite/operators/sheets.py +2 -7
  194. airflow/providers/google/suite/sensors/drive.py +2 -7
  195. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -7
  196. airflow/providers/google/suite/transfers/gcs_to_sheets.py +2 -7
  197. airflow/providers/google/suite/transfers/local_to_drive.py +2 -7
  198. airflow/providers/google/suite/transfers/sql_to_sheets.py +2 -7
  199. {apache_airflow_providers_google-10.26.0.dist-info → apache_airflow_providers_google-11.0.0.dist-info}/METADATA +10 -10
  200. apache_airflow_providers_google-11.0.0.dist-info/RECORD +315 -0
  201. airflow/providers/google/marketing_platform/hooks/analytics.py +0 -211
  202. airflow/providers/google/marketing_platform/operators/analytics.py +0 -551
  203. apache_airflow_providers_google-10.26.0.dist-info/RECORD +0 -317
  204. {apache_airflow_providers_google-10.26.0.dist-info → apache_airflow_providers_google-11.0.0.dist-info}/WHEEL +0 -0
  205. {apache_airflow_providers_google-10.26.0.dist-info → apache_airflow_providers_google-11.0.0.dist-info}/entry_points.txt +0 -0
@@ -19,13 +19,33 @@
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
- from typing import TYPE_CHECKING, Sequence
22
+ from collections.abc import MutableMapping, MutableSequence, Sequence
23
+ from typing import TYPE_CHECKING
24
+
25
+ from google.api_core.exceptions import GoogleAPICallError
26
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
23
27
 
24
28
  from airflow.exceptions import AirflowException
25
- from airflow.providers.google.cloud.hooks.translate import CloudTranslateHook
29
+ from airflow.providers.google.cloud.hooks.translate import CloudTranslateHook, TranslateHook
30
+ from airflow.providers.google.cloud.links.translate import (
31
+ TranslateTextBatchLink,
32
+ TranslationDatasetsListLink,
33
+ TranslationNativeDatasetLink,
34
+ )
26
35
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
36
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
27
37
 
28
38
  if TYPE_CHECKING:
39
+ from google.api_core.retry import Retry
40
+ from google.cloud.translate_v3.types import (
41
+ DatasetInputConfig,
42
+ InputConfig,
43
+ OutputConfig,
44
+ TranslateTextGlossaryConfig,
45
+ TransliterationConfig,
46
+ automl_translation,
47
+ )
48
+
29
49
  from airflow.utils.context import Context
30
50
 
31
51
 
@@ -42,43 +62,27 @@ class CloudTranslateTextOperator(GoogleCloudBaseOperator):
42
62
  Execute method returns str or list.
43
63
 
44
64
  This is a list of dictionaries for each queried value. Each
45
- dictionary typically contains three keys (though not
46
- all will be present in all cases).
65
+ dictionary typically contains three keys (though not all will be present in all cases):
47
66
 
48
- * ``detectedSourceLanguage``: The detected language (as an
49
- ISO 639-1 language code) of the text.
50
- * ``translatedText``: The translation of the text into the
51
- target language.
67
+ * ``detectedSourceLanguage``: The detected language (as an ISO 639-1 language code) of the text.
68
+ * ``translatedText``: The translation of the text into the target language.
52
69
  * ``input``: The corresponding input value.
53
70
  * ``model``: The model used to translate the text.
54
71
 
55
72
  If only a single value is passed, then only a single
56
- dictionary is set as XCom return value.
73
+ dictionary is set as the XCom return value.
57
74
 
58
75
  :param values: String or list of strings to translate.
59
-
60
- :param target_language: The language to translate results into. This
61
- is required by the API and defaults to
62
- the target language of the current instance.
63
-
64
- :param format_: (Optional) One of ``text`` or ``html``, to specify
65
- if the input text is plain text or HTML.
66
-
67
- :param source_language: (Optional) The language of the text to
68
- be translated.
69
-
70
- :param model: (Optional) The model used to translate the text, such
71
- as ``'base'`` or ``'nmt'``.
72
-
73
- :param impersonation_chain: Optional service account to impersonate using short-term
74
- credentials, or chained list of accounts required to get the access_token
75
- of the last account in the list, which will be impersonated in the request.
76
- If set as a string, the account must grant the originating account
77
- the Service Account Token Creator IAM role.
78
- If set as a sequence, the identities from the list must grant
79
- Service Account Token Creator IAM role to the directly preceding identity, with first
80
- account from the list granting this role to the originating account (templated).
81
-
76
+ :param target_language: The language to translate results into. This is required by the API.
77
+ :param format_: (Optional) One of ``text`` or ``html``, to specify if the input text is plain text or HTML.
78
+ :param source_language: (Optional) The language of the text to be translated.
79
+ :param model: (Optional) The model used to translate the text, such as ``'base'`` or ``'nmt'``.
80
+ :param impersonation_chain: Optional service account to impersonate using short-term credentials, or
81
+ chained list of accounts required to get the access_token of the last account in the list, which
82
+ will be impersonated in the request. If set as a string, the account must grant the originating
83
+ account the Service Account Token Creator IAM role. If set as a sequence, the identities from
84
+ the list must grant Service Account Token Creator IAM role to the directly preceding identity,
85
+ with the first account from the list granting this role to the originating account (templated).
82
86
  """
83
87
 
84
88
  # [START translate_template_fields]
@@ -133,3 +137,589 @@ class CloudTranslateTextOperator(GoogleCloudBaseOperator):
133
137
  self.log.error("An error has been thrown from translate method:")
134
138
  self.log.error(e)
135
139
  raise AirflowException(e)
140
+
141
+
142
+ class TranslateTextOperator(GoogleCloudBaseOperator):
143
+ """
144
+ Translate text content of moderate amount, for larger volumes of text please use the TranslateTextBatchOperator.
145
+
146
+ Wraps the Google cloud Translate Text (Advanced) functionality.
147
+ See https://cloud.google.com/translate/docs/advanced/translating-text-v3
148
+
149
+ For more information on how to use this operator, take a look at the guide:
150
+ :ref:`howto/operator:TranslateTextOperator`.
151
+
152
+ :param project_id: Optional. The ID of the Google Cloud project that the
153
+ service belongs to.
154
+ :param location: optional. The ID of the Google Cloud location that the
155
+ service belongs to. if not specified, 'global' is used.
156
+ Non-global location is required for requests using AutoML models or custom glossaries.
157
+ :param contents: Required. The sequence of content strings to be translated.
158
+ Limited to 1024 items with 30_000 codepoints total recommended.
159
+ :param mime_type: Optional. The format of the source text, If left blank,
160
+ the MIME type defaults to "text/html".
161
+ :param source_language_code: Optional. The ISO-639 language code of the
162
+ input text if known. If not specified, attempted to recognize automatically.
163
+ :param target_language_code: Required. The ISO-639 language code to use
164
+ for translation of the input text.
165
+ :param model: Optional. The ``model`` type requested for this translation.
166
+ If not provided, the default Google model (NMT) will be used.
167
+ The format depends on model type:
168
+
169
+ - AutoML Translation models:
170
+ ``projects/{project-number-or-id}/locations/{location-id}/models/{model-id}``
171
+ - General (built-in) models:
172
+ ``projects/{project-number-or-id}/locations/{location-id}/models/general/nmt``
173
+ - Translation LLM models:
174
+ ``projects/{project-number-or-id}/locations/{location-id}/models/general/translation-llm``
175
+
176
+ For global (non-region) requests, use 'global' ``location-id``.
177
+ :param glossary_config: Optional. Glossary to be applied.
178
+ :param transliteration_config: Optional. Transliteration to be applied.
179
+ :param retry: Designation of what errors, if any, should be retried.
180
+ :param timeout: The timeout for this request.
181
+ :param metadata: Strings which should be sent along with the request as metadata.
182
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
183
+ :param impersonation_chain: Optional service account to impersonate using short-term
184
+ credentials, or chained list of accounts required to get the access_token
185
+ of the last account in the list, which will be impersonated in the request.
186
+ If set as a string, the account must grant the originating account
187
+ the Service Account Token Creator IAM role.
188
+ If set as a sequence, the identities from the list must grant
189
+ Service Account Token Creator IAM role to the directly preceding identity, with first
190
+ account from the list granting this role to the originating account (templated).
191
+ """
192
+
193
+ template_fields: Sequence[str] = (
194
+ "contents",
195
+ "target_language_code",
196
+ "mime_type",
197
+ "source_language_code",
198
+ "model",
199
+ "gcp_conn_id",
200
+ "impersonation_chain",
201
+ )
202
+
203
+ def __init__(
204
+ self,
205
+ *,
206
+ contents: Sequence[str],
207
+ source_language_code: str | None = None,
208
+ target_language_code: str,
209
+ mime_type: str | None = None,
210
+ location: str | None = None,
211
+ project_id: str = PROVIDE_PROJECT_ID,
212
+ model: str | None = None,
213
+ transliteration_config: TransliterationConfig | None = None,
214
+ glossary_config: TranslateTextGlossaryConfig | None = None,
215
+ labels: str | None = None,
216
+ timeout: float | _MethodDefault = DEFAULT,
217
+ retry: Retry | _MethodDefault | None = DEFAULT,
218
+ metadata: Sequence[tuple[str, str]] = (),
219
+ gcp_conn_id: str = "google_cloud_default",
220
+ impersonation_chain: str | Sequence[str] | None = None,
221
+ **kwargs,
222
+ ) -> None:
223
+ super().__init__(**kwargs)
224
+ self.project_id = project_id
225
+ self.contents = contents
226
+ self.source_language_code = source_language_code
227
+ self.target_language_code = target_language_code
228
+ self.mime_type = mime_type
229
+ self.location = location
230
+ self.labels = labels
231
+ self.model = model
232
+ self.transliteration_config = transliteration_config
233
+ self.glossary_config = glossary_config
234
+ self.metadate = metadata
235
+ self.timeout = timeout
236
+ self.retry = retry
237
+ self.gcp_conn_id = gcp_conn_id
238
+ self.impersonation_chain = impersonation_chain
239
+
240
+ def execute(self, context: Context) -> dict:
241
+ hook = TranslateHook(
242
+ gcp_conn_id=self.gcp_conn_id,
243
+ impersonation_chain=self.impersonation_chain,
244
+ )
245
+ try:
246
+ self.log.info("Starting the text translation run")
247
+ translation_result = hook.translate_text(
248
+ contents=self.contents,
249
+ source_language_code=self.source_language_code,
250
+ target_language_code=self.target_language_code,
251
+ mime_type=self.mime_type,
252
+ location=self.location,
253
+ labels=self.labels,
254
+ model=self.model,
255
+ transliteration_config=self.transliteration_config,
256
+ glossary_config=self.glossary_config,
257
+ timeout=self.timeout,
258
+ retry=self.retry,
259
+ metadata=self.metadate,
260
+ )
261
+ self.log.info("Text translation run complete")
262
+ return translation_result
263
+ except GoogleAPICallError as e:
264
+ self.log.error("An error occurred executing translate_text method: \n%s", e)
265
+ raise AirflowException(e)
266
+
267
+
268
+ class TranslateTextBatchOperator(GoogleCloudBaseOperator):
269
+ """
270
+ Translate large volumes of text content, by the inputs provided.
271
+
272
+ Wraps the Google cloud Translate Text (Advanced) functionality.
273
+ See https://cloud.google.com/translate/docs/advanced/batch-translation
274
+
275
+ For more information on how to use this operator, take a look at the guide:
276
+ :ref:`howto/operator:TranslateTextBatchOperator`.
277
+
278
+ :param project_id: Optional. The ID of the Google Cloud project that the
279
+ service belongs to. If not specified the hook project_id will be used.
280
+ :param location: required. The ID of the Google Cloud location, (non-global) that the
281
+ service belongs to.
282
+ :param source_language_code: Required. Source language code.
283
+ :param target_language_codes: Required. Up to 10 language codes allowed here.
284
+ :param input_configs: Required. Input configurations.
285
+ The total number of files matched should be <=100. The total content size should be <= 100M Unicode codepoints.
286
+ The files must use UTF-8 encoding.
287
+ :param models: Optional. The models to use for translation. Map's key is
288
+ target language code. Map's value is model name. Value can
289
+ be a built-in general model, or an AutoML Translation model.
290
+ The value format depends on model type:
291
+
292
+ - AutoML Translation models:
293
+ ``projects/{project-number-or-id}/locations/{location-id}/models/{model-id}``
294
+ - General (built-in) models:
295
+ ``projects/{project-number-or-id}/locations/{location-id}/models/general/nmt``
296
+
297
+ If the map is empty or a specific model is not requested for
298
+ a language pair, then the default Google model (NMT) is used.
299
+ :param output_config: Required. Output configuration.
300
+ :param glossaries: Optional. Glossaries to be applied for translation. It's keyed by target language code.
301
+ :param labels: Optional. The labels with user-defined metadata.
302
+ See https://cloud.google.com/translate/docs/advanced/labels for more information.
303
+
304
+ :param retry: Designation of what errors, if any, should be retried.
305
+ :param timeout: The timeout for this request.
306
+ :param metadata: Strings which should be sent along with the request as metadata.
307
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
308
+ :param impersonation_chain: Optional service account to impersonate using short-term
309
+ credentials, or chained list of accounts required to get the access_token
310
+ of the last account in the list, which will be impersonated in the request.
311
+ If set as a string, the account must grant the originating account
312
+ the Service Account Token Creator IAM role.
313
+ If set as a sequence, the identities from the list must grant
314
+ Service Account Token Creator IAM role to the directly preceding identity, with first
315
+ account from the list granting this role to the originating account (templated).
316
+ """
317
+
318
+ operator_extra_links = (TranslateTextBatchLink(),)
319
+
320
+ template_fields: Sequence[str] = (
321
+ "input_configs",
322
+ "target_language_codes",
323
+ "source_language_code",
324
+ "models",
325
+ "glossaries",
326
+ "gcp_conn_id",
327
+ "impersonation_chain",
328
+ )
329
+
330
+ def __init__(
331
+ self,
332
+ *,
333
+ project_id: str = PROVIDE_PROJECT_ID,
334
+ location: str,
335
+ target_language_codes: MutableSequence[str],
336
+ source_language_code: str,
337
+ input_configs: MutableSequence[InputConfig | dict],
338
+ output_config: OutputConfig | dict,
339
+ models: str | None = None,
340
+ glossaries: MutableMapping[str, TranslateTextGlossaryConfig] | None = None,
341
+ labels: MutableMapping[str, str] | None = None,
342
+ metadata: Sequence[tuple[str, str]] = (),
343
+ timeout: float | _MethodDefault = DEFAULT,
344
+ retry: Retry | _MethodDefault | None = DEFAULT,
345
+ gcp_conn_id: str = "google_cloud_default",
346
+ impersonation_chain: str | Sequence[str] | None = None,
347
+ **kwargs,
348
+ ) -> None:
349
+ super().__init__(**kwargs)
350
+ self.project_id = project_id
351
+ self.location = location
352
+ self.target_language_codes = target_language_codes
353
+ self.source_language_code = source_language_code
354
+ self.input_configs = input_configs
355
+ self.output_config = output_config
356
+ self.models = models
357
+ self.glossaries = glossaries
358
+ self.labels = labels
359
+ self.metadata = metadata
360
+ self.timeout = timeout
361
+ self.retry = retry
362
+ self.gcp_conn_id = gcp_conn_id
363
+ self.impersonation_chain = impersonation_chain
364
+
365
+ def execute(self, context: Context) -> dict:
366
+ hook = TranslateHook(
367
+ gcp_conn_id=self.gcp_conn_id,
368
+ impersonation_chain=self.impersonation_chain,
369
+ )
370
+ translate_operation = hook.batch_translate_text(
371
+ project_id=self.project_id,
372
+ location=self.location,
373
+ target_language_codes=self.target_language_codes,
374
+ source_language_code=self.source_language_code,
375
+ input_configs=self.input_configs,
376
+ output_config=self.output_config,
377
+ models=self.models,
378
+ glossaries=self.glossaries,
379
+ labels=self.labels,
380
+ metadata=self.metadata,
381
+ timeout=self.timeout,
382
+ retry=self.retry,
383
+ )
384
+ self.log.info("Translate text batch job started.")
385
+ TranslateTextBatchLink.persist(
386
+ context=context,
387
+ task_instance=self,
388
+ project_id=self.project_id or hook.project_id,
389
+ output_config=self.output_config,
390
+ )
391
+ hook.wait_for_operation_result(translate_operation)
392
+ self.log.info("Translate text batch job finished")
393
+ return {"batch_text_translate_results": self.output_config["gcs_destination"]}
394
+
395
+
396
+ class TranslateCreateDatasetOperator(GoogleCloudBaseOperator):
397
+ """
398
+ Create a Google Cloud Translate dataset.
399
+
400
+ Creates a `native` translation dataset, using API V3.
401
+ For more information on how to use this operator, take a look at the guide:
402
+ :ref:`howto/operator:TranslateCreateDatasetOperator`.
403
+
404
+ :param dataset: The dataset to create. If a dict is provided, it must correspond to
405
+ the automl_translation.Dataset type.
406
+ :param project_id: ID of the Google Cloud project where dataset is located.
407
+ If not provided default project_id is used.
408
+ :param location: The location of the project.
409
+ :param retry: Designation of what errors, if any, should be retried.
410
+ :param timeout: The timeout for this request.
411
+ :param metadata: Strings which should be sent along with the request as metadata.
412
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
413
+ :param impersonation_chain: Optional service account to impersonate using short-term
414
+ credentials, or chained list of accounts required to get the access_token
415
+ of the last account in the list, which will be impersonated in the request.
416
+ If set as a string, the account must grant the originating account
417
+ the Service Account Token Creator IAM role.
418
+ If set as a sequence, the identities from the list must grant
419
+ Service Account Token Creator IAM role to the directly preceding identity, with first
420
+ account from the list granting this role to the originating account (templated).
421
+ """
422
+
423
+ template_fields: Sequence[str] = (
424
+ "dataset",
425
+ "location",
426
+ "project_id",
427
+ "gcp_conn_id",
428
+ "impersonation_chain",
429
+ )
430
+
431
+ operator_extra_links = (TranslationNativeDatasetLink(),)
432
+
433
+ def __init__(
434
+ self,
435
+ *,
436
+ project_id: str = PROVIDE_PROJECT_ID,
437
+ location: str,
438
+ dataset: dict | automl_translation.Dataset,
439
+ metadata: Sequence[tuple[str, str]] = (),
440
+ timeout: float | _MethodDefault = DEFAULT,
441
+ retry: Retry | _MethodDefault | None = DEFAULT,
442
+ gcp_conn_id: str = "google_cloud_default",
443
+ impersonation_chain: str | Sequence[str] | None = None,
444
+ **kwargs,
445
+ ) -> None:
446
+ super().__init__(**kwargs)
447
+ self.project_id = project_id
448
+ self.location = location
449
+ self.dataset = dataset
450
+ self.metadata = metadata
451
+ self.timeout = timeout
452
+ self.retry = retry
453
+ self.gcp_conn_id = gcp_conn_id
454
+ self.impersonation_chain = impersonation_chain
455
+
456
+ def execute(self, context: Context) -> str:
457
+ hook = TranslateHook(
458
+ gcp_conn_id=self.gcp_conn_id,
459
+ impersonation_chain=self.impersonation_chain,
460
+ )
461
+ self.log.info("Dataset creation started %s...", self.dataset)
462
+ result_operation = hook.create_dataset(
463
+ dataset=self.dataset,
464
+ location=self.location,
465
+ project_id=self.project_id,
466
+ retry=self.retry,
467
+ timeout=self.timeout,
468
+ metadata=self.metadata,
469
+ )
470
+ result = hook.wait_for_operation_result(result_operation)
471
+ result = type(result).to_dict(result)
472
+ dataset_id = hook.extract_object_id(result)
473
+ self.xcom_push(context, key="dataset_id", value=dataset_id)
474
+ self.log.info("Dataset creation complete. The dataset_id: %s.", dataset_id)
475
+
476
+ project_id = self.project_id or hook.project_id
477
+ TranslationNativeDatasetLink.persist(
478
+ context=context,
479
+ task_instance=self,
480
+ dataset_id=dataset_id,
481
+ project_id=project_id,
482
+ )
483
+ return result
484
+
485
+
486
+ class TranslateDatasetsListOperator(GoogleCloudBaseOperator):
487
+ """
488
+ Get a list of native Google Cloud Translation datasets in a project.
489
+
490
+ Get project's list of `native` translation datasets, using API V3.
491
+ For more information on how to use this operator, take a look at the guide:
492
+ :ref:`howto/operator:TranslateDatasetsListOperator`.
493
+
494
+ :param project_id: ID of the Google Cloud project where dataset is located.
495
+ If not provided default project_id is used.
496
+ :param location: The location of the project.
497
+ :param retry: Designation of what errors, if any, should be retried.
498
+ :param timeout: The timeout for this request.
499
+ :param metadata: Strings which should be sent along with the request as metadata.
500
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
501
+ :param impersonation_chain: Optional service account to impersonate using short-term
502
+ credentials, or chained list of accounts required to get the access_token
503
+ of the last account in the list, which will be impersonated in the request.
504
+ If set as a string, the account must grant the originating account
505
+ the Service Account Token Creator IAM role.
506
+ If set as a sequence, the identities from the list must grant
507
+ Service Account Token Creator IAM role to the directly preceding identity, with first
508
+ account from the list granting this role to the originating account (templated).
509
+ """
510
+
511
+ template_fields: Sequence[str] = (
512
+ "location",
513
+ "project_id",
514
+ "gcp_conn_id",
515
+ "impersonation_chain",
516
+ )
517
+
518
+ operator_extra_links = (TranslationDatasetsListLink(),)
519
+
520
+ def __init__(
521
+ self,
522
+ *,
523
+ project_id: str = PROVIDE_PROJECT_ID,
524
+ location: str,
525
+ metadata: Sequence[tuple[str, str]] = (),
526
+ timeout: float | _MethodDefault = DEFAULT,
527
+ retry: Retry | _MethodDefault = DEFAULT,
528
+ gcp_conn_id: str = "google_cloud_default",
529
+ impersonation_chain: str | Sequence[str] | None = None,
530
+ **kwargs,
531
+ ) -> None:
532
+ super().__init__(**kwargs)
533
+ self.project_id = project_id
534
+ self.location = location
535
+ self.metadata = metadata
536
+ self.timeout = timeout
537
+ self.retry = retry
538
+ self.gcp_conn_id = gcp_conn_id
539
+ self.impersonation_chain = impersonation_chain
540
+
541
+ def execute(self, context: Context):
542
+ hook = TranslateHook(
543
+ gcp_conn_id=self.gcp_conn_id,
544
+ impersonation_chain=self.impersonation_chain,
545
+ )
546
+ project_id = self.project_id or hook.project_id
547
+ TranslationDatasetsListLink.persist(
548
+ context=context,
549
+ task_instance=self,
550
+ project_id=project_id,
551
+ )
552
+ self.log.info("Requesting datasets list")
553
+ results_pager = hook.list_datasets(
554
+ location=self.location,
555
+ project_id=self.project_id,
556
+ retry=self.retry,
557
+ timeout=self.timeout,
558
+ metadata=self.metadata,
559
+ )
560
+ result_ids = []
561
+ for ds_item in results_pager:
562
+ ds_data = type(ds_item).to_dict(ds_item)
563
+ ds_id = hook.extract_object_id(ds_data)
564
+ result_ids.append(ds_id)
565
+
566
+ self.log.info("Fetching the datasets list complete.")
567
+ return result_ids
568
+
569
+
570
+ class TranslateImportDataOperator(GoogleCloudBaseOperator):
571
+ """
572
+ Import data to the translation dataset.
573
+
574
+ Loads data to the translation dataset, using API V3.
575
+ For more information on how to use this operator, take a look at the guide:
576
+ :ref:`howto/operator:TranslateImportDataOperator`.
577
+
578
+ :param dataset_id: The dataset_id of target native dataset to import data to.
579
+ :param input_config: The desired input location of translations language pairs file. If a dict provided,
580
+ must follow the structure of DatasetInputConfig.
581
+ If a dict is provided, it must be of the same form as the protobuf message InputConfig.
582
+ :param project_id: ID of the Google Cloud project where dataset is located. If not provided
583
+ default project_id is used.
584
+ :param location: The location of the project.
585
+ :param retry: Designation of what errors, if any, should be retried.
586
+ :param timeout: The timeout for this request.
587
+ :param metadata: Strings which should be sent along with the request as metadata.
588
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
589
+ :param impersonation_chain: Optional service account to impersonate using short-term
590
+ credentials, or chained list of accounts required to get the access_token
591
+ of the last account in the list, which will be impersonated in the request.
592
+ If set as a string, the account must grant the originating account
593
+ the Service Account Token Creator IAM role.
594
+ If set as a sequence, the identities from the list must grant
595
+ Service Account Token Creator IAM role to the directly preceding identity, with first
596
+ account from the list granting this role to the originating account (templated).
597
+ """
598
+
599
+ template_fields: Sequence[str] = (
600
+ "dataset_id",
601
+ "input_config",
602
+ "location",
603
+ "project_id",
604
+ "gcp_conn_id",
605
+ "impersonation_chain",
606
+ )
607
+
608
+ operator_extra_links = (TranslationNativeDatasetLink(),)
609
+
610
+ def __init__(
611
+ self,
612
+ *,
613
+ dataset_id: str,
614
+ location: str,
615
+ input_config: dict | DatasetInputConfig,
616
+ project_id: str = PROVIDE_PROJECT_ID,
617
+ metadata: Sequence[tuple[str, str]] = (),
618
+ timeout: float | None = None,
619
+ retry: Retry | _MethodDefault = DEFAULT,
620
+ gcp_conn_id: str = "google_cloud_default",
621
+ impersonation_chain: str | Sequence[str] | None = None,
622
+ **kwargs,
623
+ ) -> None:
624
+ super().__init__(**kwargs)
625
+ self.dataset_id = dataset_id
626
+ self.input_config = input_config
627
+ self.project_id = project_id
628
+ self.location = location
629
+ self.metadata = metadata
630
+ self.timeout = timeout
631
+ self.retry = retry
632
+ self.gcp_conn_id = gcp_conn_id
633
+ self.impersonation_chain = impersonation_chain
634
+
635
+ def execute(self, context: Context):
636
+ hook = TranslateHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
637
+ self.log.info("Importing data to dataset...")
638
+ operation = hook.import_dataset_data(
639
+ dataset_id=self.dataset_id,
640
+ input_config=self.input_config,
641
+ location=self.location,
642
+ project_id=self.project_id,
643
+ retry=self.retry,
644
+ timeout=self.timeout,
645
+ metadata=self.metadata,
646
+ )
647
+ project_id = self.project_id or hook.project_id
648
+ TranslationNativeDatasetLink.persist(
649
+ context=context,
650
+ task_instance=self,
651
+ dataset_id=self.dataset_id,
652
+ project_id=project_id,
653
+ )
654
+ hook.wait_for_operation_done(operation=operation, timeout=self.timeout)
655
+ self.log.info("Importing data finished!")
656
+
657
+
658
+ class TranslateDeleteDatasetOperator(GoogleCloudBaseOperator):
659
+ """
660
+ Delete translation dataset and all of its contents.
661
+
662
+ Deletes the translation dataset and it's data, using API V3.
663
+ For more information on how to use this operator, take a look at the guide:
664
+ :ref:`howto/operator:TranslateDeleteDatasetOperator`.
665
+
666
+ :param dataset_id: The dataset_id of target native dataset to be deleted.
667
+ :param location: The location of the project.
668
+ :param retry: Designation of what errors, if any, should be retried.
669
+ :param timeout: The timeout for this request.
670
+ :param metadata: Strings which should be sent along with the request as metadata.
671
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
672
+ :param impersonation_chain: Optional service account to impersonate using short-term
673
+ credentials, or chained list of accounts required to get the access_token
674
+ of the last account in the list, which will be impersonated in the request.
675
+ If set as a string, the account must grant the originating account
676
+ the Service Account Token Creator IAM role.
677
+ If set as a sequence, the identities from the list must grant
678
+ Service Account Token Creator IAM role to the directly preceding identity, with first
679
+ account from the list granting this role to the originating account (templated).
680
+ """
681
+
682
+ template_fields: Sequence[str] = (
683
+ "dataset_id",
684
+ "location",
685
+ "project_id",
686
+ "gcp_conn_id",
687
+ "impersonation_chain",
688
+ )
689
+
690
+ def __init__(
691
+ self,
692
+ *,
693
+ dataset_id: str,
694
+ location: str,
695
+ project_id: str = PROVIDE_PROJECT_ID,
696
+ metadata: Sequence[tuple[str, str]] = (),
697
+ timeout: float | None = None,
698
+ retry: Retry | _MethodDefault = DEFAULT,
699
+ gcp_conn_id: str = "google_cloud_default",
700
+ impersonation_chain: str | Sequence[str] | None = None,
701
+ **kwargs,
702
+ ) -> None:
703
+ super().__init__(**kwargs)
704
+ self.dataset_id = dataset_id
705
+ self.project_id = project_id
706
+ self.location = location
707
+ self.metadata = metadata
708
+ self.timeout = timeout
709
+ self.retry = retry
710
+ self.gcp_conn_id = gcp_conn_id
711
+ self.impersonation_chain = impersonation_chain
712
+
713
+ def execute(self, context: Context):
714
+ hook = TranslateHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
715
+ self.log.info("Deleting the dataset %s...", self.dataset_id)
716
+ operation = hook.delete_dataset(
717
+ dataset_id=self.dataset_id,
718
+ location=self.location,
719
+ project_id=self.project_id,
720
+ retry=self.retry,
721
+ timeout=self.timeout,
722
+ metadata=self.metadata,
723
+ )
724
+ hook.wait_for_operation_done(operation=operation, timeout=self.timeout)
725
+ self.log.info("Dataset deletion complete!")