apache-airflow-providers-google 10.26.0rc1__py3-none-any.whl → 11.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +2 -1
  3. airflow/providers/google/ads/operators/ads.py +2 -1
  4. airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -1
  5. airflow/providers/google/assets/gcs.py +17 -1
  6. airflow/providers/google/cloud/hooks/automl.py +3 -6
  7. airflow/providers/google/cloud/hooks/bigquery.py +41 -1486
  8. airflow/providers/google/cloud/hooks/bigquery_dts.py +4 -11
  9. airflow/providers/google/cloud/hooks/bigtable.py +3 -6
  10. airflow/providers/google/cloud/hooks/cloud_batch.py +6 -3
  11. airflow/providers/google/cloud/hooks/cloud_build.py +3 -15
  12. airflow/providers/google/cloud/hooks/cloud_composer.py +2 -17
  13. airflow/providers/google/cloud/hooks/cloud_memorystore.py +5 -6
  14. airflow/providers/google/cloud/hooks/cloud_run.py +10 -5
  15. airflow/providers/google/cloud/hooks/cloud_sql.py +5 -7
  16. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +3 -7
  17. airflow/providers/google/cloud/hooks/compute.py +3 -6
  18. airflow/providers/google/cloud/hooks/compute_ssh.py +0 -5
  19. airflow/providers/google/cloud/hooks/datacatalog.py +3 -6
  20. airflow/providers/google/cloud/hooks/dataflow.py +3 -14
  21. airflow/providers/google/cloud/hooks/dataform.py +2 -9
  22. airflow/providers/google/cloud/hooks/datafusion.py +4 -15
  23. airflow/providers/google/cloud/hooks/dataplex.py +4 -7
  24. airflow/providers/google/cloud/hooks/dataprep.py +2 -2
  25. airflow/providers/google/cloud/hooks/dataproc.py +77 -22
  26. airflow/providers/google/cloud/hooks/dataproc_metastore.py +2 -9
  27. airflow/providers/google/cloud/hooks/datastore.py +3 -6
  28. airflow/providers/google/cloud/hooks/dlp.py +3 -6
  29. airflow/providers/google/cloud/hooks/functions.py +2 -6
  30. airflow/providers/google/cloud/hooks/gcs.py +2 -18
  31. airflow/providers/google/cloud/hooks/gdm.py +1 -17
  32. airflow/providers/google/cloud/hooks/kms.py +3 -6
  33. airflow/providers/google/cloud/hooks/kubernetes_engine.py +7 -97
  34. airflow/providers/google/cloud/hooks/life_sciences.py +2 -6
  35. airflow/providers/google/cloud/hooks/looker.py +2 -1
  36. airflow/providers/google/cloud/hooks/mlengine.py +0 -8
  37. airflow/providers/google/cloud/hooks/natural_language.py +3 -6
  38. airflow/providers/google/cloud/hooks/os_login.py +3 -6
  39. airflow/providers/google/cloud/hooks/pubsub.py +3 -6
  40. airflow/providers/google/cloud/hooks/secret_manager.py +3 -73
  41. airflow/providers/google/cloud/hooks/spanner.py +3 -6
  42. airflow/providers/google/cloud/hooks/speech_to_text.py +3 -6
  43. airflow/providers/google/cloud/hooks/stackdriver.py +3 -6
  44. airflow/providers/google/cloud/hooks/tasks.py +3 -6
  45. airflow/providers/google/cloud/hooks/text_to_speech.py +3 -6
  46. airflow/providers/google/cloud/hooks/translate.py +455 -9
  47. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +3 -6
  48. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -6
  49. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +3 -6
  50. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +2 -9
  51. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -9
  52. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -14
  53. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -6
  54. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -9
  55. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -1
  56. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -1
  57. airflow/providers/google/cloud/hooks/video_intelligence.py +3 -6
  58. airflow/providers/google/cloud/hooks/vision.py +3 -6
  59. airflow/providers/google/cloud/hooks/workflows.py +2 -9
  60. airflow/providers/google/cloud/links/dataproc.py +0 -1
  61. airflow/providers/google/cloud/links/translate.py +91 -0
  62. airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
  63. airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -3
  64. airflow/providers/google/cloud/openlineage/utils.py +54 -21
  65. airflow/providers/google/cloud/operators/automl.py +5 -4
  66. airflow/providers/google/cloud/operators/bigquery.py +2 -341
  67. airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
  68. airflow/providers/google/cloud/operators/bigtable.py +2 -1
  69. airflow/providers/google/cloud/operators/cloud_batch.py +2 -1
  70. airflow/providers/google/cloud/operators/cloud_build.py +2 -1
  71. airflow/providers/google/cloud/operators/cloud_composer.py +2 -1
  72. airflow/providers/google/cloud/operators/cloud_memorystore.py +2 -1
  73. airflow/providers/google/cloud/operators/cloud_run.py +2 -1
  74. airflow/providers/google/cloud/operators/cloud_sql.py +2 -1
  75. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +2 -1
  76. airflow/providers/google/cloud/operators/compute.py +2 -1
  77. airflow/providers/google/cloud/operators/datacatalog.py +2 -1
  78. airflow/providers/google/cloud/operators/dataflow.py +2 -517
  79. airflow/providers/google/cloud/operators/dataform.py +2 -1
  80. airflow/providers/google/cloud/operators/datafusion.py +2 -1
  81. airflow/providers/google/cloud/operators/dataplex.py +37 -31
  82. airflow/providers/google/cloud/operators/dataprep.py +2 -1
  83. airflow/providers/google/cloud/operators/dataproc.py +3 -633
  84. airflow/providers/google/cloud/operators/dataproc_metastore.py +2 -1
  85. airflow/providers/google/cloud/operators/datastore.py +2 -1
  86. airflow/providers/google/cloud/operators/dlp.py +2 -1
  87. airflow/providers/google/cloud/operators/functions.py +2 -1
  88. airflow/providers/google/cloud/operators/gcs.py +5 -4
  89. airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -11
  90. airflow/providers/google/cloud/operators/life_sciences.py +2 -1
  91. airflow/providers/google/cloud/operators/mlengine.py +2 -1
  92. airflow/providers/google/cloud/operators/natural_language.py +3 -2
  93. airflow/providers/google/cloud/operators/pubsub.py +2 -1
  94. airflow/providers/google/cloud/operators/spanner.py +2 -1
  95. airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
  96. airflow/providers/google/cloud/operators/stackdriver.py +2 -1
  97. airflow/providers/google/cloud/operators/tasks.py +3 -2
  98. airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
  99. airflow/providers/google/cloud/operators/translate.py +622 -32
  100. airflow/providers/google/cloud/operators/translate_speech.py +2 -1
  101. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +2 -93
  102. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +3 -13
  103. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +2 -17
  104. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +2 -1
  105. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +2 -1
  106. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +2 -1
  107. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +3 -13
  108. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +2 -1
  109. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +2 -1
  110. airflow/providers/google/cloud/operators/video_intelligence.py +2 -1
  111. airflow/providers/google/cloud/operators/vision.py +3 -2
  112. airflow/providers/google/cloud/operators/workflows.py +3 -2
  113. airflow/providers/google/cloud/secrets/secret_manager.py +2 -19
  114. airflow/providers/google/cloud/sensors/bigquery.py +2 -81
  115. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
  116. airflow/providers/google/cloud/sensors/bigtable.py +2 -1
  117. airflow/providers/google/cloud/sensors/cloud_composer.py +8 -94
  118. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
  119. airflow/providers/google/cloud/sensors/dataflow.py +2 -1
  120. airflow/providers/google/cloud/sensors/dataform.py +2 -1
  121. airflow/providers/google/cloud/sensors/datafusion.py +2 -1
  122. airflow/providers/google/cloud/sensors/dataplex.py +2 -1
  123. airflow/providers/google/cloud/sensors/dataprep.py +2 -1
  124. airflow/providers/google/cloud/sensors/dataproc.py +2 -1
  125. airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -1
  126. airflow/providers/google/cloud/sensors/gcs.py +4 -36
  127. airflow/providers/google/cloud/sensors/pubsub.py +2 -1
  128. airflow/providers/google/cloud/sensors/tasks.py +2 -1
  129. airflow/providers/google/cloud/sensors/workflows.py +2 -1
  130. airflow/providers/google/cloud/transfers/adls_to_gcs.py +2 -1
  131. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -1
  132. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -1
  133. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +75 -18
  134. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +9 -7
  135. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +2 -1
  136. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +1 -1
  137. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +2 -1
  138. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +2 -1
  139. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -1
  140. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +2 -1
  141. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +13 -9
  142. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -1
  143. airflow/providers/google/cloud/transfers/gcs_to_local.py +2 -1
  144. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +2 -1
  145. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +2 -1
  146. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -1
  147. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -1
  148. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  149. airflow/providers/google/cloud/transfers/s3_to_gcs.py +2 -1
  150. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -1
  151. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +2 -1
  152. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +2 -1
  153. airflow/providers/google/cloud/transfers/sql_to_gcs.py +2 -1
  154. airflow/providers/google/cloud/triggers/bigquery.py +2 -1
  155. airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
  156. airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
  157. airflow/providers/google/cloud/triggers/cloud_build.py +2 -1
  158. airflow/providers/google/cloud/triggers/cloud_composer.py +3 -2
  159. airflow/providers/google/cloud/triggers/cloud_run.py +2 -1
  160. airflow/providers/google/cloud/triggers/cloud_sql.py +1 -1
  161. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
  162. airflow/providers/google/cloud/triggers/dataflow.py +2 -1
  163. airflow/providers/google/cloud/triggers/datafusion.py +2 -1
  164. airflow/providers/google/cloud/triggers/dataplex.py +1 -1
  165. airflow/providers/google/cloud/triggers/dataproc.py +2 -1
  166. airflow/providers/google/cloud/triggers/gcs.py +3 -2
  167. airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -1
  168. airflow/providers/google/cloud/triggers/mlengine.py +2 -1
  169. airflow/providers/google/cloud/triggers/pubsub.py +2 -1
  170. airflow/providers/google/cloud/triggers/vertex_ai.py +2 -1
  171. airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
  172. airflow/providers/google/cloud/utils/dataform.py +1 -1
  173. airflow/providers/google/cloud/utils/field_validator.py +2 -1
  174. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +2 -1
  175. airflow/providers/google/common/hooks/base_google.py +4 -11
  176. airflow/providers/google/common/hooks/discovery_api.py +1 -6
  177. airflow/providers/google/firebase/hooks/firestore.py +1 -1
  178. airflow/providers/google/firebase/operators/firestore.py +2 -1
  179. airflow/providers/google/get_provider_info.py +7 -22
  180. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -1
  181. airflow/providers/google/marketing_platform/hooks/campaign_manager.py +2 -3
  182. airflow/providers/google/marketing_platform/hooks/display_video.py +4 -3
  183. airflow/providers/google/marketing_platform/hooks/search_ads.py +6 -6
  184. airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -1
  185. airflow/providers/google/marketing_platform/operators/campaign_manager.py +2 -42
  186. airflow/providers/google/marketing_platform/operators/display_video.py +2 -47
  187. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -1
  188. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -7
  189. airflow/providers/google/marketing_platform/sensors/display_video.py +2 -13
  190. airflow/providers/google/suite/hooks/calendar.py +2 -8
  191. airflow/providers/google/suite/hooks/drive.py +2 -6
  192. airflow/providers/google/suite/hooks/sheets.py +2 -7
  193. airflow/providers/google/suite/operators/sheets.py +2 -7
  194. airflow/providers/google/suite/sensors/drive.py +2 -7
  195. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -7
  196. airflow/providers/google/suite/transfers/gcs_to_sheets.py +2 -7
  197. airflow/providers/google/suite/transfers/local_to_drive.py +2 -7
  198. airflow/providers/google/suite/transfers/sql_to_sheets.py +2 -7
  199. {apache_airflow_providers_google-10.26.0rc1.dist-info → apache_airflow_providers_google-11.0.0.dist-info}/METADATA +17 -17
  200. apache_airflow_providers_google-11.0.0.dist-info/RECORD +315 -0
  201. airflow/providers/google/marketing_platform/hooks/analytics.py +0 -211
  202. airflow/providers/google/marketing_platform/operators/analytics.py +0 -551
  203. apache_airflow_providers_google-10.26.0rc1.dist-info/RECORD +0 -317
  204. {apache_airflow_providers_google-10.26.0rc1.dist-info → apache_airflow_providers_google-11.0.0.dist-info}/WHEEL +0 -0
  205. {apache_airflow_providers_google-10.26.0rc1.dist-info → apache_airflow_providers_google-11.0.0.dist-info}/entry_points.txt +0 -0
@@ -20,18 +20,15 @@
20
20
  from __future__ import annotations
21
21
 
22
22
  import inspect
23
- import ntpath
24
- import os
25
23
  import re
26
24
  import time
27
- import uuid
28
25
  import warnings
29
- from collections.abc import MutableSequence
26
+ from collections.abc import MutableSequence, Sequence
30
27
  from dataclasses import dataclass
31
28
  from datetime import datetime, timedelta
32
29
  from enum import Enum
33
30
  from functools import cached_property
34
- from typing import TYPE_CHECKING, Any, Sequence
31
+ from typing import TYPE_CHECKING, Any
35
32
 
36
33
  from google.api_core.exceptions import AlreadyExists, NotFound
37
34
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
@@ -45,7 +42,6 @@ from airflow.providers.google.cloud.hooks.dataproc import (
45
42
  DataProcJobBuilder,
46
43
  DataprocResourceIsNotReadyError,
47
44
  )
48
- from airflow.providers.google.cloud.hooks.gcs import GCSHook
49
45
  from airflow.providers.google.cloud.links.dataproc import (
50
46
  DATAPROC_BATCH_LINK,
51
47
  DATAPROC_CLUSTER_LINK_DEPRECATED,
@@ -822,7 +818,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
822
818
  try:
823
819
  # First try to create a new cluster
824
820
  operation = self._create_cluster(hook)
825
- if not self.deferrable:
821
+ if not self.deferrable and type(operation) is not str:
826
822
  cluster = hook.wait_for_operation(
827
823
  timeout=self.timeout, result_retry=self.retry, operation=operation
828
824
  )
@@ -1547,632 +1543,6 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
1547
1543
  self.hook.cancel_job(project_id=self.project_id, job_id=self.dataproc_job_id, region=self.region)
1548
1544
 
1549
1545
 
1550
- @deprecated(
1551
- planned_removal_date="November 01, 2024",
1552
- use_instead="DataprocSubmitJobOperator",
1553
- instructions="You can use `generate_job` method to generate dictionary representing your job "
1554
- "and use it with the new operator.",
1555
- category=AirflowProviderDeprecationWarning,
1556
- )
1557
- class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
1558
- """
1559
- Start a Pig query Job on a Cloud DataProc cluster.
1560
-
1561
- .. seealso::
1562
- This operator is deprecated, please use
1563
- :class:`~airflow.providers.google.cloud.operators.dataproc.DataprocSubmitJobOperator`:
1564
-
1565
- The parameters of the operation will be passed to the cluster.
1566
-
1567
- It's a good practice to define dataproc_* parameters in the default_args of the dag
1568
- like the cluster name and UDFs.
1569
-
1570
- .. code-block:: python
1571
-
1572
- default_args = {
1573
- "cluster_name": "cluster-1",
1574
- "dataproc_pig_jars": [
1575
- "gs://example/udf/jar/datafu/1.2.0/datafu.jar",
1576
- "gs://example/udf/jar/gpig/1.2/gpig.jar",
1577
- ],
1578
- }
1579
-
1580
- You can pass a pig script as string or file reference. Use variables to pass on
1581
- variables for the pig script to be resolved on the cluster or use the parameters to
1582
- be resolved in the script as template parameters.
1583
-
1584
- .. code-block:: python
1585
-
1586
- t1 = DataProcPigOperator(
1587
- task_id="dataproc_pig",
1588
- query="a_pig_script.pig",
1589
- variables={"out": "gs://example/output/{{ds}}"},
1590
- )
1591
-
1592
- .. seealso::
1593
- For more detail on about job submission have a look at the reference:
1594
- https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs
1595
-
1596
- :param query: The query or reference to the query
1597
- file (pg or pig extension). (templated)
1598
- :param query_uri: The HCFS URI of the script that contains the Pig queries.
1599
- :param variables: Map of named parameters for the query. (templated)
1600
- """
1601
-
1602
- template_fields: Sequence[str] = (
1603
- "query",
1604
- "variables",
1605
- "job_name",
1606
- "cluster_name",
1607
- "region",
1608
- "dataproc_jars",
1609
- "dataproc_properties",
1610
- "impersonation_chain",
1611
- )
1612
- template_ext = (".pg", ".pig")
1613
- ui_color = "#0273d4"
1614
- job_type = "pig_job"
1615
-
1616
- operator_extra_links = (DataprocLink(),)
1617
-
1618
- def __init__(
1619
- self,
1620
- *,
1621
- query: str | None = None,
1622
- query_uri: str | None = None,
1623
- variables: dict | None = None,
1624
- impersonation_chain: str | Sequence[str] | None = None,
1625
- region: str,
1626
- job_name: str = "{{task.task_id}}_{{ds_nodash}}",
1627
- cluster_name: str = "cluster-1",
1628
- dataproc_properties: dict | None = None,
1629
- dataproc_jars: list[str] | None = None,
1630
- **kwargs,
1631
- ) -> None:
1632
- super().__init__(
1633
- impersonation_chain=impersonation_chain,
1634
- region=region,
1635
- job_name=job_name,
1636
- cluster_name=cluster_name,
1637
- dataproc_properties=dataproc_properties,
1638
- dataproc_jars=dataproc_jars,
1639
- **kwargs,
1640
- )
1641
- self.query = query
1642
- self.query_uri = query_uri
1643
- self.variables = variables
1644
-
1645
- def generate_job(self):
1646
- """
1647
- Act as a helper method for easier migration to `DataprocSubmitJobOperator`.
1648
-
1649
- :return: Dict representing Dataproc job
1650
- """
1651
- job_template = self.create_job_template()
1652
-
1653
- if self.query is None:
1654
- if self.query_uri is None:
1655
- raise AirflowException("One of query or query_uri should be set here")
1656
- job_template.add_query_uri(self.query_uri)
1657
- else:
1658
- job_template.add_query(self.query)
1659
- job_template.add_variables(self.variables)
1660
- return self._generate_job_template()
1661
-
1662
- def execute(self, context: Context):
1663
- job_template = self.create_job_template()
1664
- if self.query is None:
1665
- if self.query_uri is None:
1666
- raise AirflowException("One of query or query_uri should be set here")
1667
- job_template.add_query_uri(self.query_uri)
1668
- else:
1669
- job_template.add_query(self.query)
1670
- job_template.add_variables(self.variables)
1671
-
1672
- super().execute(context)
1673
-
1674
-
1675
- # TODO: Remove one day
1676
-
1677
-
1678
- # TODO: Remove one day
1679
- @deprecated(
1680
- planned_removal_date="November 01, 2024",
1681
- use_instead="DataprocSubmitJobOperator",
1682
- instructions="You can use `generate_job` method to generate dictionary representing your job "
1683
- "and use it with the new operator.",
1684
- category=AirflowProviderDeprecationWarning,
1685
- )
1686
- class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator):
1687
- """
1688
- Start a Hive query Job on a Cloud DataProc cluster.
1689
-
1690
- .. seealso::
1691
- This operator is deprecated, please use
1692
- :class:`~airflow.providers.google.cloud.operators.dataproc.DataprocSubmitJobOperator`:
1693
-
1694
- :param query: The query or reference to the query file (q extension).
1695
- :param query_uri: The HCFS URI of the script that contains the Hive queries.
1696
- :param variables: Map of named parameters for the query.
1697
- """
1698
-
1699
- template_fields: Sequence[str] = (
1700
- "query",
1701
- "variables",
1702
- "job_name",
1703
- "cluster_name",
1704
- "region",
1705
- "dataproc_jars",
1706
- "dataproc_properties",
1707
- "impersonation_chain",
1708
- )
1709
- template_ext = (".q", ".hql")
1710
- ui_color = "#0273d4"
1711
- job_type = "hive_job"
1712
-
1713
- def __init__(
1714
- self,
1715
- *,
1716
- query: str | None = None,
1717
- query_uri: str | None = None,
1718
- variables: dict | None = None,
1719
- impersonation_chain: str | Sequence[str] | None = None,
1720
- region: str,
1721
- job_name: str = "{{task.task_id}}_{{ds_nodash}}",
1722
- cluster_name: str = "cluster-1",
1723
- dataproc_properties: dict | None = None,
1724
- dataproc_jars: list[str] | None = None,
1725
- **kwargs,
1726
- ) -> None:
1727
- super().__init__(
1728
- impersonation_chain=impersonation_chain,
1729
- region=region,
1730
- job_name=job_name,
1731
- cluster_name=cluster_name,
1732
- dataproc_properties=dataproc_properties,
1733
- dataproc_jars=dataproc_jars,
1734
- **kwargs,
1735
- )
1736
- self.query = query
1737
- self.query_uri = query_uri
1738
- self.variables = variables
1739
- if self.query is not None and self.query_uri is not None:
1740
- raise AirflowException("Only one of `query` and `query_uri` can be passed.")
1741
-
1742
- def generate_job(self):
1743
- """
1744
- Act as a helper method for easier migration to `DataprocSubmitJobOperator`.
1745
-
1746
- :return: Dict representing Dataproc job
1747
- """
1748
- job_template = self.create_job_template()
1749
- if self.query is None:
1750
- if self.query_uri is None:
1751
- raise AirflowException("One of query or query_uri should be set here")
1752
- job_template.add_query_uri(self.query_uri)
1753
- else:
1754
- job_template.add_query(self.query)
1755
- job_template.add_variables(self.variables)
1756
- return self._generate_job_template()
1757
-
1758
- def execute(self, context: Context):
1759
- job_template = self.create_job_template()
1760
- if self.query is None:
1761
- if self.query_uri is None:
1762
- raise AirflowException("One of query or query_uri should be set here")
1763
- job_template.add_query_uri(self.query_uri)
1764
- else:
1765
- job_template.add_query(self.query)
1766
- job_template.add_variables(self.variables)
1767
- super().execute(context)
1768
-
1769
-
1770
- # TODO: Remove one day
1771
- @deprecated(
1772
- planned_removal_date="November 01, 2024",
1773
- use_instead="DataprocSubmitJobOperator",
1774
- instructions="You can use `generate_job` method to generate dictionary representing your job "
1775
- "and use it with the new operator.",
1776
- category=AirflowProviderDeprecationWarning,
1777
- )
1778
- class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator):
1779
- """
1780
- Start a Spark SQL query Job on a Cloud DataProc cluster.
1781
-
1782
- .. seealso::
1783
- This operator is deprecated, please use
1784
- :class:`~airflow.providers.google.cloud.operators.dataproc.DataprocSubmitJobOperator`:
1785
-
1786
- :param query: The query or reference to the query file (q extension). (templated)
1787
- :param query_uri: The HCFS URI of the script that contains the SQL queries.
1788
- :param variables: Map of named parameters for the query. (templated)
1789
- """
1790
-
1791
- template_fields: Sequence[str] = (
1792
- "query",
1793
- "variables",
1794
- "job_name",
1795
- "cluster_name",
1796
- "region",
1797
- "dataproc_jars",
1798
- "dataproc_properties",
1799
- "impersonation_chain",
1800
- )
1801
- template_ext = (".q",)
1802
- template_fields_renderers = {"sql": "sql"}
1803
- ui_color = "#0273d4"
1804
- job_type = "spark_sql_job"
1805
-
1806
- def __init__(
1807
- self,
1808
- *,
1809
- query: str | None = None,
1810
- query_uri: str | None = None,
1811
- variables: dict | None = None,
1812
- impersonation_chain: str | Sequence[str] | None = None,
1813
- region: str,
1814
- job_name: str = "{{task.task_id}}_{{ds_nodash}}",
1815
- cluster_name: str = "cluster-1",
1816
- dataproc_properties: dict | None = None,
1817
- dataproc_jars: list[str] | None = None,
1818
- **kwargs,
1819
- ) -> None:
1820
- super().__init__(
1821
- impersonation_chain=impersonation_chain,
1822
- region=region,
1823
- job_name=job_name,
1824
- cluster_name=cluster_name,
1825
- dataproc_properties=dataproc_properties,
1826
- dataproc_jars=dataproc_jars,
1827
- **kwargs,
1828
- )
1829
- self.query = query
1830
- self.query_uri = query_uri
1831
- self.variables = variables
1832
- if self.query is not None and self.query_uri is not None:
1833
- raise AirflowException("Only one of `query` and `query_uri` can be passed.")
1834
-
1835
- def generate_job(self):
1836
- """
1837
- Act as a helper method for easier migration to `DataprocSubmitJobOperator`.
1838
-
1839
- :return: Dict representing Dataproc job
1840
- """
1841
- job_template = self.create_job_template()
1842
- if self.query is None:
1843
- job_template.add_query_uri(self.query_uri)
1844
- else:
1845
- job_template.add_query(self.query)
1846
- job_template.add_variables(self.variables)
1847
- return self._generate_job_template()
1848
-
1849
- def execute(self, context: Context):
1850
- job_template = self.create_job_template()
1851
- if self.query is None:
1852
- if self.query_uri is None:
1853
- raise AirflowException("One of query or query_uri should be set here")
1854
- job_template.add_query_uri(self.query_uri)
1855
- else:
1856
- job_template.add_query(self.query)
1857
- job_template.add_variables(self.variables)
1858
- super().execute(context)
1859
-
1860
-
1861
- # TODO: Remove one day
1862
- @deprecated(
1863
- planned_removal_date="November 01, 2024",
1864
- use_instead="DataprocSubmitJobOperator",
1865
- instructions="You can use `generate_job` method to generate dictionary representing your job "
1866
- "and use it with the new operator.",
1867
- category=AirflowProviderDeprecationWarning,
1868
- )
1869
- class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator):
1870
- """
1871
- Start a Spark Job on a Cloud DataProc cluster.
1872
-
1873
- .. seealso::
1874
- This operator is deprecated, please use
1875
- :class:`~airflow.providers.google.cloud.operators.dataproc.DataprocSubmitJobOperator`:
1876
-
1877
- :param main_jar: The HCFS URI of the jar file that contains the main class
1878
- (use this or the main_class, not both together).
1879
- :param main_class: Name of the job class. (use this or the main_jar, not both
1880
- together).
1881
- :param arguments: Arguments for the job. (templated)
1882
- :param archives: List of archived files that will be unpacked in the work
1883
- directory. Should be stored in Cloud Storage.
1884
- :param files: List of files to be copied to the working directory
1885
- """
1886
-
1887
- template_fields: Sequence[str] = (
1888
- "arguments",
1889
- "job_name",
1890
- "cluster_name",
1891
- "region",
1892
- "dataproc_jars",
1893
- "dataproc_properties",
1894
- "impersonation_chain",
1895
- )
1896
- ui_color = "#0273d4"
1897
- job_type = "spark_job"
1898
-
1899
- def __init__(
1900
- self,
1901
- *,
1902
- main_jar: str | None = None,
1903
- main_class: str | None = None,
1904
- arguments: list | None = None,
1905
- archives: list | None = None,
1906
- files: list | None = None,
1907
- impersonation_chain: str | Sequence[str] | None = None,
1908
- region: str,
1909
- job_name: str = "{{task.task_id}}_{{ds_nodash}}",
1910
- cluster_name: str = "cluster-1",
1911
- dataproc_properties: dict | None = None,
1912
- dataproc_jars: list[str] | None = None,
1913
- **kwargs,
1914
- ) -> None:
1915
- super().__init__(
1916
- impersonation_chain=impersonation_chain,
1917
- region=region,
1918
- job_name=job_name,
1919
- cluster_name=cluster_name,
1920
- dataproc_properties=dataproc_properties,
1921
- dataproc_jars=dataproc_jars,
1922
- **kwargs,
1923
- )
1924
- self.main_jar = main_jar
1925
- self.main_class = main_class
1926
- self.arguments = arguments
1927
- self.archives = archives
1928
- self.files = files
1929
-
1930
- def generate_job(self):
1931
- """
1932
- Act as a helper method for easier migration to `DataprocSubmitJobOperator`.
1933
-
1934
- :return: Dict representing Dataproc job
1935
- """
1936
- job_template = self.create_job_template()
1937
- job_template.set_main(self.main_jar, self.main_class)
1938
- job_template.add_args(self.arguments)
1939
- job_template.add_archive_uris(self.archives)
1940
- job_template.add_file_uris(self.files)
1941
- return self._generate_job_template()
1942
-
1943
- def execute(self, context: Context):
1944
- job_template = self.create_job_template()
1945
- job_template.set_main(self.main_jar, self.main_class)
1946
- job_template.add_args(self.arguments)
1947
- job_template.add_archive_uris(self.archives)
1948
- job_template.add_file_uris(self.files)
1949
- super().execute(context)
1950
-
1951
-
1952
- # TODO: Remove one day
1953
- @deprecated(
1954
- planned_removal_date="November 01, 2024",
1955
- use_instead="DataprocSubmitJobOperator",
1956
- instructions="You can use `generate_job` method to generate dictionary representing your job "
1957
- "and use it with the new operator.",
1958
- category=AirflowProviderDeprecationWarning,
1959
- )
1960
- class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
1961
- """
1962
- Start a Hadoop Job on a Cloud DataProc cluster.
1963
-
1964
- .. seealso::
1965
- This operator is deprecated, please use
1966
- :class:`~airflow.providers.google.cloud.operators.dataproc.DataprocSubmitJobOperator`:
1967
-
1968
- :param main_jar: The HCFS URI of the jar file containing the main class
1969
- (use this or the main_class, not both together).
1970
- :param main_class: Name of the job class. (use this or the main_jar, not both
1971
- together).
1972
- :param arguments: Arguments for the job. (templated)
1973
- :param archives: List of archived files that will be unpacked in the work
1974
- directory. Should be stored in Cloud Storage.
1975
- :param files: List of files to be copied to the working directory
1976
- """
1977
-
1978
- template_fields: Sequence[str] = (
1979
- "arguments",
1980
- "job_name",
1981
- "cluster_name",
1982
- "region",
1983
- "dataproc_jars",
1984
- "dataproc_properties",
1985
- "impersonation_chain",
1986
- )
1987
- ui_color = "#0273d4"
1988
- job_type = "hadoop_job"
1989
-
1990
- def __init__(
1991
- self,
1992
- *,
1993
- main_jar: str | None = None,
1994
- main_class: str | None = None,
1995
- arguments: list | None = None,
1996
- archives: list | None = None,
1997
- files: list | None = None,
1998
- impersonation_chain: str | Sequence[str] | None = None,
1999
- region: str,
2000
- job_name: str = "{{task.task_id}}_{{ds_nodash}}",
2001
- cluster_name: str = "cluster-1",
2002
- dataproc_properties: dict | None = None,
2003
- dataproc_jars: list[str] | None = None,
2004
- **kwargs,
2005
- ) -> None:
2006
- super().__init__(
2007
- impersonation_chain=impersonation_chain,
2008
- region=region,
2009
- job_name=job_name,
2010
- cluster_name=cluster_name,
2011
- dataproc_properties=dataproc_properties,
2012
- dataproc_jars=dataproc_jars,
2013
- **kwargs,
2014
- )
2015
- self.main_jar = main_jar
2016
- self.main_class = main_class
2017
- self.arguments = arguments
2018
- self.archives = archives
2019
- self.files = files
2020
-
2021
- def generate_job(self):
2022
- """
2023
- Act as a helper method for easier migration to `DataprocSubmitJobOperator`.
2024
-
2025
- :return: Dict representing Dataproc job
2026
- """
2027
- job_template = self.create_job_template()
2028
- job_template.set_main(self.main_jar, self.main_class)
2029
- job_template.add_args(self.arguments)
2030
- job_template.add_archive_uris(self.archives)
2031
- job_template.add_file_uris(self.files)
2032
- return self._generate_job_template()
2033
-
2034
- def execute(self, context: Context):
2035
- job_template = self.create_job_template()
2036
- job_template.set_main(self.main_jar, self.main_class)
2037
- job_template.add_args(self.arguments)
2038
- job_template.add_archive_uris(self.archives)
2039
- job_template.add_file_uris(self.files)
2040
- super().execute(context)
2041
-
2042
-
2043
- # TODO: Remove one day
2044
- @deprecated(
2045
- planned_removal_date="November 01, 2024",
2046
- use_instead="DataprocSubmitJobOperator",
2047
- instructions="You can use `generate_job` method to generate dictionary representing your job "
2048
- "and use it with the new operator.",
2049
- category=AirflowProviderDeprecationWarning,
2050
- )
2051
- class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
2052
- """
2053
- Start a PySpark Job on a Cloud DataProc cluster.
2054
-
2055
- .. seealso::
2056
- This operator is deprecated, please use
2057
- :class:`~airflow.providers.google.cloud.operators.dataproc.DataprocSubmitJobOperator`:
2058
-
2059
- :param main: [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main
2060
- Python file to use as the driver. Must be a .py file. (templated)
2061
- :param arguments: Arguments for the job. (templated)
2062
- :param archives: List of archived files that will be unpacked in the work
2063
- directory. Should be stored in Cloud Storage.
2064
- :param files: List of files to be copied to the working directory
2065
- :param pyfiles: List of Python files to pass to the PySpark framework.
2066
- Supported file types: .py, .egg, and .zip
2067
- """
2068
-
2069
- template_fields: Sequence[str] = (
2070
- "main",
2071
- "arguments",
2072
- "job_name",
2073
- "cluster_name",
2074
- "region",
2075
- "dataproc_jars",
2076
- "dataproc_properties",
2077
- "impersonation_chain",
2078
- )
2079
- ui_color = "#0273d4"
2080
- job_type = "pyspark_job"
2081
-
2082
- @staticmethod
2083
- def _generate_temp_filename(filename):
2084
- return f"{time.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4()!s:.8}_{ntpath.basename(filename)}"
2085
-
2086
- def _upload_file_temp(self, bucket, local_file):
2087
- """Upload a local file to a Google Cloud Storage bucket."""
2088
- temp_filename = self._generate_temp_filename(local_file)
2089
- if not bucket:
2090
- raise AirflowException(
2091
- "If you want Airflow to upload the local file to a temporary bucket, set "
2092
- "the 'temp_bucket' key in the connection string"
2093
- )
2094
-
2095
- self.log.info("Uploading %s to %s", local_file, temp_filename)
2096
-
2097
- GCSHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain).upload(
2098
- bucket_name=bucket,
2099
- object_name=temp_filename,
2100
- mime_type="application/x-python",
2101
- filename=local_file,
2102
- )
2103
- return f"gs://{bucket}/{temp_filename}"
2104
-
2105
- def __init__(
2106
- self,
2107
- *,
2108
- main: str,
2109
- arguments: list | None = None,
2110
- archives: list | None = None,
2111
- pyfiles: list | None = None,
2112
- files: list | None = None,
2113
- impersonation_chain: str | Sequence[str] | None = None,
2114
- region: str,
2115
- job_name: str = "{{task.task_id}}_{{ds_nodash}}",
2116
- cluster_name: str = "cluster-1",
2117
- dataproc_properties: dict | None = None,
2118
- dataproc_jars: list[str] | None = None,
2119
- **kwargs,
2120
- ) -> None:
2121
- super().__init__(
2122
- impersonation_chain=impersonation_chain,
2123
- region=region,
2124
- job_name=job_name,
2125
- cluster_name=cluster_name,
2126
- dataproc_properties=dataproc_properties,
2127
- dataproc_jars=dataproc_jars,
2128
- **kwargs,
2129
- )
2130
- self.main = main
2131
- self.arguments = arguments
2132
- self.archives = archives
2133
- self.files = files
2134
- self.pyfiles = pyfiles
2135
-
2136
- def generate_job(self):
2137
- """
2138
- Act as a helper method for easier migration to :class:`DataprocSubmitJobOperator`.
2139
-
2140
- :return: Dict representing Dataproc job
2141
- """
2142
- job_template = self.create_job_template()
2143
- # Check if the file is local, if that is the case, upload it to a bucket
2144
- if os.path.isfile(self.main):
2145
- cluster_info = self.hook.get_cluster(
2146
- project_id=self.project_id, region=self.region, cluster_name=self.cluster_name
2147
- )
2148
- bucket = cluster_info["config"]["config_bucket"]
2149
- self.main = f"gs://{bucket}/{self.main}"
2150
- job_template.set_python_main(self.main)
2151
- job_template.add_args(self.arguments)
2152
- job_template.add_archive_uris(self.archives)
2153
- job_template.add_file_uris(self.files)
2154
- job_template.add_python_file_uris(self.pyfiles)
2155
-
2156
- return self._generate_job_template()
2157
-
2158
- def execute(self, context: Context):
2159
- job_template = self.create_job_template()
2160
- # Check if the file is local, if that is the case, upload it to a bucket
2161
- if os.path.isfile(self.main):
2162
- cluster_info = self.hook.get_cluster(
2163
- project_id=self.project_id, region=self.region, cluster_name=self.cluster_name
2164
- )
2165
- bucket = cluster_info["config"]["config_bucket"]
2166
- self.main = self._upload_file_temp(bucket, self.main)
2167
-
2168
- job_template.set_python_main(self.main)
2169
- job_template.add_args(self.arguments)
2170
- job_template.add_archive_uris(self.archives)
2171
- job_template.add_file_uris(self.files)
2172
- job_template.add_python_file_uris(self.pyfiles)
2173
- super().execute(context)
2174
-
2175
-
2176
1546
  class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
2177
1547
  """
2178
1548
  Creates new workflow template.
@@ -20,7 +20,8 @@
20
20
  from __future__ import annotations
21
21
 
22
22
  import time
23
- from typing import TYPE_CHECKING, Sequence
23
+ from collections.abc import Sequence
24
+ from typing import TYPE_CHECKING
24
25
 
25
26
  from google.api_core.exceptions import AlreadyExists
26
27
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
@@ -19,7 +19,8 @@
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
- from typing import TYPE_CHECKING, Any, Sequence
22
+ from collections.abc import Sequence
23
+ from typing import TYPE_CHECKING, Any
23
24
 
24
25
  from airflow.exceptions import AirflowException
25
26
  from airflow.providers.google.cloud.hooks.datastore import DatastoreHook
@@ -19,7 +19,8 @@
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
- from typing import TYPE_CHECKING, Sequence
22
+ from collections.abc import Sequence
23
+ from typing import TYPE_CHECKING
23
24
 
24
25
  from google.api_core.exceptions import AlreadyExists, InvalidArgument, NotFound
25
26
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault