apache-airflow-providers-google 14.0.0__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. airflow/providers/google/3rd-party-licenses/LICENSES.txt +14 -0
  2. airflow/providers/google/3rd-party-licenses/NOTICE +5 -0
  3. airflow/providers/google/__init__.py +3 -3
  4. airflow/providers/google/_vendor/__init__.py +0 -0
  5. airflow/providers/google/_vendor/json_merge_patch.py +91 -0
  6. airflow/providers/google/ads/hooks/ads.py +52 -43
  7. airflow/providers/google/ads/operators/ads.py +2 -2
  8. airflow/providers/google/ads/transfers/ads_to_gcs.py +3 -19
  9. airflow/providers/google/assets/gcs.py +1 -11
  10. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +3 -2
  11. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  12. airflow/providers/google/cloud/hooks/alloy_db.py +2 -3
  13. airflow/providers/google/cloud/hooks/bigquery.py +195 -318
  14. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  15. airflow/providers/google/cloud/hooks/bigtable.py +3 -2
  16. airflow/providers/google/cloud/hooks/cloud_batch.py +8 -9
  17. airflow/providers/google/cloud/hooks/cloud_build.py +6 -65
  18. airflow/providers/google/cloud/hooks/cloud_composer.py +292 -24
  19. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  20. airflow/providers/google/cloud/hooks/cloud_memorystore.py +4 -3
  21. airflow/providers/google/cloud/hooks/cloud_run.py +20 -11
  22. airflow/providers/google/cloud/hooks/cloud_sql.py +136 -64
  23. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +35 -15
  24. airflow/providers/google/cloud/hooks/compute.py +7 -6
  25. airflow/providers/google/cloud/hooks/compute_ssh.py +7 -4
  26. airflow/providers/google/cloud/hooks/datacatalog.py +12 -3
  27. airflow/providers/google/cloud/hooks/dataflow.py +87 -242
  28. airflow/providers/google/cloud/hooks/dataform.py +9 -14
  29. airflow/providers/google/cloud/hooks/datafusion.py +7 -9
  30. airflow/providers/google/cloud/hooks/dataplex.py +13 -12
  31. airflow/providers/google/cloud/hooks/dataprep.py +2 -2
  32. airflow/providers/google/cloud/hooks/dataproc.py +76 -74
  33. airflow/providers/google/cloud/hooks/dataproc_metastore.py +4 -3
  34. airflow/providers/google/cloud/hooks/dlp.py +5 -4
  35. airflow/providers/google/cloud/hooks/gcs.py +144 -33
  36. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  37. airflow/providers/google/cloud/hooks/kms.py +3 -2
  38. airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -17
  39. airflow/providers/google/cloud/hooks/looker.py +6 -1
  40. airflow/providers/google/cloud/hooks/managed_kafka.py +227 -3
  41. airflow/providers/google/cloud/hooks/mlengine.py +7 -8
  42. airflow/providers/google/cloud/hooks/natural_language.py +3 -2
  43. airflow/providers/google/cloud/hooks/os_login.py +3 -2
  44. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  45. airflow/providers/google/cloud/hooks/secret_manager.py +105 -12
  46. airflow/providers/google/cloud/hooks/spanner.py +75 -10
  47. airflow/providers/google/cloud/hooks/speech_to_text.py +3 -2
  48. airflow/providers/google/cloud/hooks/stackdriver.py +18 -18
  49. airflow/providers/google/cloud/hooks/tasks.py +4 -3
  50. airflow/providers/google/cloud/hooks/text_to_speech.py +3 -2
  51. airflow/providers/google/cloud/hooks/translate.py +8 -17
  52. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -222
  53. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +9 -15
  54. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +33 -283
  55. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +5 -12
  56. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +6 -12
  57. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  58. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +311 -10
  59. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  60. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +7 -13
  61. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +8 -12
  62. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +6 -12
  63. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +3 -2
  64. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  65. airflow/providers/google/cloud/hooks/video_intelligence.py +3 -2
  66. airflow/providers/google/cloud/hooks/vision.py +7 -7
  67. airflow/providers/google/cloud/hooks/workflows.py +4 -3
  68. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  69. airflow/providers/google/cloud/links/base.py +77 -7
  70. airflow/providers/google/cloud/links/bigquery.py +0 -47
  71. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  72. airflow/providers/google/cloud/links/bigtable.py +0 -48
  73. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  74. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  75. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  76. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  77. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  78. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -46
  79. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  80. airflow/providers/google/cloud/links/compute.py +0 -58
  81. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  82. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  83. airflow/providers/google/cloud/links/dataflow.py +0 -34
  84. airflow/providers/google/cloud/links/dataform.py +0 -64
  85. airflow/providers/google/cloud/links/datafusion.py +1 -90
  86. airflow/providers/google/cloud/links/dataplex.py +0 -154
  87. airflow/providers/google/cloud/links/dataprep.py +0 -24
  88. airflow/providers/google/cloud/links/dataproc.py +11 -89
  89. airflow/providers/google/cloud/links/datastore.py +0 -31
  90. airflow/providers/google/cloud/links/kubernetes_engine.py +11 -61
  91. airflow/providers/google/cloud/links/managed_kafka.py +11 -51
  92. airflow/providers/google/cloud/links/mlengine.py +0 -70
  93. airflow/providers/google/cloud/links/pubsub.py +0 -32
  94. airflow/providers/google/cloud/links/spanner.py +0 -33
  95. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  96. airflow/providers/google/cloud/links/translate.py +17 -187
  97. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  98. airflow/providers/google/cloud/links/workflows.py +0 -52
  99. airflow/providers/google/cloud/log/gcs_task_handler.py +166 -118
  100. airflow/providers/google/cloud/log/stackdriver_task_handler.py +14 -9
  101. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  102. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  103. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  104. airflow/providers/google/cloud/openlineage/facets.py +141 -40
  105. airflow/providers/google/cloud/openlineage/mixins.py +14 -13
  106. airflow/providers/google/cloud/openlineage/utils.py +19 -3
  107. airflow/providers/google/cloud/operators/alloy_db.py +76 -61
  108. airflow/providers/google/cloud/operators/bigquery.py +104 -667
  109. airflow/providers/google/cloud/operators/bigquery_dts.py +12 -12
  110. airflow/providers/google/cloud/operators/bigtable.py +38 -7
  111. airflow/providers/google/cloud/operators/cloud_base.py +22 -1
  112. airflow/providers/google/cloud/operators/cloud_batch.py +18 -18
  113. airflow/providers/google/cloud/operators/cloud_build.py +80 -36
  114. airflow/providers/google/cloud/operators/cloud_composer.py +157 -71
  115. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  116. airflow/providers/google/cloud/operators/cloud_memorystore.py +74 -46
  117. airflow/providers/google/cloud/operators/cloud_run.py +39 -20
  118. airflow/providers/google/cloud/operators/cloud_sql.py +46 -61
  119. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -14
  120. airflow/providers/google/cloud/operators/compute.py +18 -50
  121. airflow/providers/google/cloud/operators/datacatalog.py +167 -29
  122. airflow/providers/google/cloud/operators/dataflow.py +38 -15
  123. airflow/providers/google/cloud/operators/dataform.py +19 -7
  124. airflow/providers/google/cloud/operators/datafusion.py +43 -43
  125. airflow/providers/google/cloud/operators/dataplex.py +212 -126
  126. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  127. airflow/providers/google/cloud/operators/dataproc.py +134 -207
  128. airflow/providers/google/cloud/operators/dataproc_metastore.py +102 -84
  129. airflow/providers/google/cloud/operators/datastore.py +22 -6
  130. airflow/providers/google/cloud/operators/dlp.py +24 -45
  131. airflow/providers/google/cloud/operators/functions.py +21 -14
  132. airflow/providers/google/cloud/operators/gcs.py +15 -12
  133. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  134. airflow/providers/google/cloud/operators/kubernetes_engine.py +115 -106
  135. airflow/providers/google/cloud/operators/looker.py +1 -1
  136. airflow/providers/google/cloud/operators/managed_kafka.py +362 -40
  137. airflow/providers/google/cloud/operators/natural_language.py +5 -3
  138. airflow/providers/google/cloud/operators/pubsub.py +69 -21
  139. airflow/providers/google/cloud/operators/spanner.py +53 -45
  140. airflow/providers/google/cloud/operators/speech_to_text.py +5 -4
  141. airflow/providers/google/cloud/operators/stackdriver.py +5 -11
  142. airflow/providers/google/cloud/operators/tasks.py +6 -15
  143. airflow/providers/google/cloud/operators/text_to_speech.py +4 -3
  144. airflow/providers/google/cloud/operators/translate.py +46 -20
  145. airflow/providers/google/cloud/operators/translate_speech.py +4 -3
  146. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +44 -34
  147. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +34 -12
  148. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +62 -53
  149. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +75 -11
  150. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +48 -12
  151. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  152. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  153. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
  154. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +16 -12
  155. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +62 -14
  156. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +35 -10
  157. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  158. airflow/providers/google/cloud/operators/video_intelligence.py +5 -3
  159. airflow/providers/google/cloud/operators/vision.py +7 -5
  160. airflow/providers/google/cloud/operators/workflows.py +24 -19
  161. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  162. airflow/providers/google/cloud/sensors/bigquery.py +2 -2
  163. airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -4
  164. airflow/providers/google/cloud/sensors/bigtable.py +14 -6
  165. airflow/providers/google/cloud/sensors/cloud_composer.py +535 -33
  166. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -5
  167. airflow/providers/google/cloud/sensors/dataflow.py +27 -10
  168. airflow/providers/google/cloud/sensors/dataform.py +2 -2
  169. airflow/providers/google/cloud/sensors/datafusion.py +4 -4
  170. airflow/providers/google/cloud/sensors/dataplex.py +7 -5
  171. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  172. airflow/providers/google/cloud/sensors/dataproc.py +10 -9
  173. airflow/providers/google/cloud/sensors/dataproc_metastore.py +4 -3
  174. airflow/providers/google/cloud/sensors/gcs.py +22 -21
  175. airflow/providers/google/cloud/sensors/looker.py +5 -5
  176. airflow/providers/google/cloud/sensors/pubsub.py +20 -20
  177. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  178. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
  179. airflow/providers/google/cloud/sensors/workflows.py +6 -4
  180. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  181. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  182. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
  183. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  184. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +14 -13
  185. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  186. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  187. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  188. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  189. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  190. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +18 -22
  191. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -5
  192. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +45 -38
  193. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
  194. airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
  195. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
  196. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  197. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  198. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  199. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  200. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  201. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  202. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +44 -12
  203. airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
  204. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  205. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +36 -14
  206. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  207. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  208. airflow/providers/google/cloud/triggers/bigquery.py +75 -34
  209. airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
  210. airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
  211. airflow/providers/google/cloud/triggers/cloud_build.py +3 -2
  212. airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
  213. airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
  214. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +96 -5
  215. airflow/providers/google/cloud/triggers/dataflow.py +125 -2
  216. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  217. airflow/providers/google/cloud/triggers/dataplex.py +16 -3
  218. airflow/providers/google/cloud/triggers/dataproc.py +124 -53
  219. airflow/providers/google/cloud/triggers/kubernetes_engine.py +46 -28
  220. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  221. airflow/providers/google/cloud/triggers/pubsub.py +17 -20
  222. airflow/providers/google/cloud/triggers/vertex_ai.py +8 -7
  223. airflow/providers/google/cloud/utils/bigquery.py +5 -7
  224. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  225. airflow/providers/google/cloud/utils/credentials_provider.py +4 -3
  226. airflow/providers/google/cloud/utils/dataform.py +1 -1
  227. airflow/providers/google/cloud/utils/external_token_supplier.py +0 -1
  228. airflow/providers/google/cloud/utils/field_validator.py +1 -2
  229. airflow/providers/google/cloud/utils/validators.py +43 -0
  230. airflow/providers/google/common/auth_backend/google_openid.py +26 -9
  231. airflow/providers/google/common/consts.py +2 -1
  232. airflow/providers/google/common/deprecated.py +2 -1
  233. airflow/providers/google/common/hooks/base_google.py +40 -43
  234. airflow/providers/google/common/hooks/operation_helpers.py +78 -0
  235. airflow/providers/google/common/links/storage.py +0 -22
  236. airflow/providers/google/common/utils/get_secret.py +31 -0
  237. airflow/providers/google/common/utils/id_token_credentials.py +4 -5
  238. airflow/providers/google/firebase/operators/firestore.py +2 -2
  239. airflow/providers/google/get_provider_info.py +61 -216
  240. airflow/providers/google/go_module_utils.py +35 -3
  241. airflow/providers/google/leveldb/hooks/leveldb.py +30 -6
  242. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  243. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +3 -2
  244. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  245. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  246. airflow/providers/google/marketing_platform/links/analytics_admin.py +4 -5
  247. airflow/providers/google/marketing_platform/operators/analytics_admin.py +7 -6
  248. airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
  249. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  250. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  251. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  252. airflow/providers/google/marketing_platform/sensors/display_video.py +4 -64
  253. airflow/providers/google/suite/hooks/calendar.py +1 -1
  254. airflow/providers/google/suite/hooks/drive.py +2 -2
  255. airflow/providers/google/suite/hooks/sheets.py +15 -1
  256. airflow/providers/google/suite/operators/sheets.py +8 -3
  257. airflow/providers/google/suite/sensors/drive.py +2 -2
  258. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
  259. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  260. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  261. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  262. airflow/providers/google/version_compat.py +15 -1
  263. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +117 -72
  264. apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
  265. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +1 -1
  266. apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
  267. airflow/providers/google/cloud/example_dags/example_cloud_task.py +0 -54
  268. airflow/providers/google/cloud/hooks/automl.py +0 -679
  269. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  270. airflow/providers/google/cloud/links/automl.py +0 -193
  271. airflow/providers/google/cloud/operators/automl.py +0 -1360
  272. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  273. airflow/providers/google/cloud/operators/mlengine.py +0 -1515
  274. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
  275. apache_airflow_providers_google-14.0.0.dist-info/RECORD +0 -318
  276. /airflow/providers/google/cloud/{example_dags → bundles}/__init__.py +0 -0
  277. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
  278. {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -25,20 +25,27 @@ from functools import cached_property
25
25
  from pathlib import Path
26
26
  from typing import TYPE_CHECKING
27
27
 
28
+ import attrs
29
+
30
+ # Make mypy happy by importing as aliases
31
+ import google.cloud.storage as storage
32
+
28
33
  from airflow.configuration import conf
29
34
  from airflow.exceptions import AirflowNotFoundException
30
35
  from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
31
- from airflow.providers.google.cloud.utils.credentials_provider import get_credentials_and_project_id
36
+ from airflow.providers.google.cloud.utils.credentials_provider import (
37
+ get_credentials_and_project_id,
38
+ )
32
39
  from airflow.providers.google.common.consts import CLIENT_INFO
33
40
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
41
+ from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
34
42
  from airflow.utils.log.file_task_handler import FileTaskHandler
35
43
  from airflow.utils.log.logging_mixin import LoggingMixin
36
44
 
37
- # not sure why but mypy complains on missing `storage` but it is clearly there and is importable
38
- from google.cloud import storage # type: ignore[attr-defined]
39
-
40
45
  if TYPE_CHECKING:
41
46
  from airflow.models.taskinstance import TaskInstance
47
+ from airflow.sdk.types import RuntimeTaskInstanceProtocol as RuntimeTI
48
+ from airflow.utils.log.file_task_handler import LogMessages, LogSourceInfo
42
49
 
43
50
  _DEFAULT_SCOPESS = frozenset(
44
51
  [
@@ -49,6 +56,128 @@ _DEFAULT_SCOPESS = frozenset(
49
56
  logger = logging.getLogger(__name__)
50
57
 
51
58
 
59
+ @attrs.define
60
+ class GCSRemoteLogIO(LoggingMixin): # noqa: D101
61
+ remote_base: str
62
+ base_log_folder: Path = attrs.field(converter=Path)
63
+ delete_local_copy: bool
64
+ project_id: str | None = None
65
+
66
+ gcp_key_path: str | None = None
67
+ gcp_keyfile_dict: dict | None = None
68
+ scopes: Collection[str] | None = _DEFAULT_SCOPESS
69
+
70
+ processors = ()
71
+
72
+ def upload(self, path: os.PathLike | str, ti: RuntimeTI):
73
+ """Upload the given log path to the remote storage."""
74
+ path = Path(path)
75
+ if path.is_absolute():
76
+ local_loc = path
77
+ remote_loc = os.path.join(self.remote_base, path.relative_to(self.base_log_folder))
78
+ else:
79
+ local_loc = self.base_log_folder.joinpath(path)
80
+ remote_loc = os.path.join(self.remote_base, path)
81
+
82
+ if local_loc.is_file():
83
+ # read log and remove old logs to get just the latest additions
84
+ log = local_loc.read_text()
85
+ has_uploaded = self.write(log, remote_loc)
86
+ if has_uploaded and self.delete_local_copy:
87
+ shutil.rmtree(os.path.dirname(local_loc))
88
+
89
+ @cached_property
90
+ def hook(self) -> GCSHook | None:
91
+ """Returns GCSHook if remote_log_conn_id configured."""
92
+ conn_id = conf.get("logging", "remote_log_conn_id", fallback=None)
93
+ if conn_id:
94
+ try:
95
+ return GCSHook(gcp_conn_id=conn_id)
96
+ except AirflowNotFoundException:
97
+ pass
98
+ return None
99
+
100
+ @cached_property
101
+ def client(self) -> storage.Client:
102
+ """Returns GCS Client."""
103
+ if self.hook:
104
+ credentials, project_id = self.hook.get_credentials_and_project_id()
105
+ else:
106
+ credentials, project_id = get_credentials_and_project_id(
107
+ key_path=self.gcp_key_path,
108
+ keyfile_dict=self.gcp_keyfile_dict,
109
+ scopes=self.scopes,
110
+ disable_logging=True,
111
+ )
112
+ return storage.Client(
113
+ credentials=credentials,
114
+ client_info=CLIENT_INFO,
115
+ project=self.project_id if self.project_id else project_id,
116
+ )
117
+
118
+ def write(self, log: str, remote_log_location: str) -> bool:
119
+ """
120
+ Write the log to the remote location and return `True`; fail silently and return `False` on error.
121
+
122
+ :param log: the log to write to the remote_log_location
123
+ :param remote_log_location: the log's location in remote storage
124
+ :return: whether the log is successfully written to remote location or not.
125
+ """
126
+ try:
127
+ blob = storage.Blob.from_string(remote_log_location, self.client)
128
+ old_log = blob.download_as_bytes().decode()
129
+ log = f"{old_log}\n{log}" if old_log else log
130
+ except Exception as e:
131
+ if not self.no_log_found(e):
132
+ self.log.warning("Error checking for previous log: %s", e)
133
+ try:
134
+ blob = storage.Blob.from_string(remote_log_location, self.client)
135
+ blob.upload_from_string(log, content_type="text/plain")
136
+ except Exception as e:
137
+ self.log.error("Could not write logs to %s: %s", remote_log_location, e)
138
+ return False
139
+ return True
140
+
141
+ @staticmethod
142
+ def no_log_found(exc):
143
+ """
144
+ Given exception, determine whether it is result of log not found.
145
+
146
+ :meta private:
147
+ """
148
+ return (exc.args and isinstance(exc.args[0], str) and "No such object" in exc.args[0]) or getattr(
149
+ exc, "resp", {}
150
+ ).get("status") == "404"
151
+
152
+ def read(self, relative_path: str, ti: RuntimeTI) -> tuple[LogSourceInfo, LogMessages | None]:
153
+ messages = []
154
+ logs = []
155
+ remote_loc = os.path.join(self.remote_base, relative_path)
156
+ uris = []
157
+ bucket, prefix = _parse_gcs_url(remote_loc)
158
+ blobs = list(self.client.list_blobs(bucket_or_name=bucket, prefix=prefix))
159
+
160
+ if blobs:
161
+ uris = [f"gs://{bucket}/{b.name}" for b in blobs]
162
+ if AIRFLOW_V_3_0_PLUS:
163
+ messages = uris
164
+ else:
165
+ messages.extend(["Found remote logs:", *[f" * {x}" for x in sorted(uris)]])
166
+ else:
167
+ return messages, None
168
+
169
+ try:
170
+ for key in sorted(uris):
171
+ blob = storage.Blob.from_string(key, self.client)
172
+ remote_log = blob.download_as_bytes().decode()
173
+ if remote_log:
174
+ logs.append(remote_log)
175
+ except Exception as e:
176
+ if not AIRFLOW_V_3_0_PLUS:
177
+ messages.append(f"Unable to read remote log {e}")
178
+ return messages, logs
179
+
180
+
52
181
  class GCSTaskHandler(FileTaskHandler, LoggingMixin):
53
182
  """
54
183
  GCSTaskHandler is a python log handler that handles and reads task instance logs.
@@ -84,49 +213,29 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
84
213
  gcp_keyfile_dict: dict | None = None,
85
214
  gcp_scopes: Collection[str] | None = _DEFAULT_SCOPESS,
86
215
  project_id: str = PROVIDE_PROJECT_ID,
216
+ max_bytes: int = 0,
217
+ backup_count: int = 0,
218
+ delay: bool = False,
87
219
  **kwargs,
88
- ):
89
- super().__init__(base_log_folder)
220
+ ) -> None:
221
+ # support log file size handling of FileTaskHandler
222
+ super().__init__(
223
+ base_log_folder=base_log_folder, max_bytes=max_bytes, backup_count=backup_count, delay=delay
224
+ )
90
225
  self.handler: logging.FileHandler | None = None
91
- self.remote_base = gcs_log_folder
92
226
  self.log_relative_path = ""
93
227
  self.closed = False
94
228
  self.upload_on_close = True
95
- self.gcp_key_path = gcp_key_path
96
- self.gcp_keyfile_dict = gcp_keyfile_dict
97
- self.scopes = gcp_scopes
98
- self.project_id = project_id
99
- self.delete_local_copy = kwargs.get(
100
- "delete_local_copy", conf.getboolean("logging", "delete_local_logs")
101
- )
102
-
103
- @cached_property
104
- def hook(self) -> GCSHook | None:
105
- """Returns GCSHook if remote_log_conn_id configured."""
106
- conn_id = conf.get("logging", "remote_log_conn_id", fallback=None)
107
- if conn_id:
108
- try:
109
- return GCSHook(gcp_conn_id=conn_id)
110
- except AirflowNotFoundException:
111
- pass
112
- return None
113
-
114
- @cached_property
115
- def client(self) -> storage.Client:
116
- """Returns GCS Client."""
117
- if self.hook:
118
- credentials, project_id = self.hook.get_credentials_and_project_id()
119
- else:
120
- credentials, project_id = get_credentials_and_project_id(
121
- key_path=self.gcp_key_path,
122
- keyfile_dict=self.gcp_keyfile_dict,
123
- scopes=self.scopes,
124
- disable_logging=True,
125
- )
126
- return storage.Client(
127
- credentials=credentials,
128
- client_info=CLIENT_INFO,
129
- project=self.project_id if self.project_id else project_id,
229
+ self.io = GCSRemoteLogIO(
230
+ base_log_folder=base_log_folder,
231
+ remote_base=gcs_log_folder,
232
+ delete_local_copy=kwargs.get(
233
+ "delete_local_copy", conf.getboolean("logging", "delete_local_logs")
234
+ ),
235
+ gcp_key_path=gcp_key_path,
236
+ gcp_keyfile_dict=gcp_keyfile_dict,
237
+ scopes=gcp_scopes,
238
+ project_id=project_id,
130
239
  )
131
240
 
132
241
  def set_context(self, ti: TaskInstance, *, identifier: str | None = None) -> None:
@@ -137,6 +246,8 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
137
246
  if TYPE_CHECKING:
138
247
  assert self.handler is not None
139
248
 
249
+ self.ti = ti
250
+
140
251
  full_path = self.handler.baseFilename
141
252
  self.log_relative_path = Path(full_path).relative_to(self.local_base).as_posix()
142
253
  is_trigger_log_context = getattr(ti, "is_trigger_log_context", False)
@@ -156,86 +267,23 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
156
267
  if not self.upload_on_close:
157
268
  return
158
269
 
159
- local_loc = os.path.join(self.local_base, self.log_relative_path)
160
- remote_loc = os.path.join(self.remote_base, self.log_relative_path)
161
- if os.path.exists(local_loc):
162
- # read log and remove old logs to get just the latest additions
163
- with open(local_loc) as logfile:
164
- log = logfile.read()
165
- gcs_write = self.gcs_write(log, remote_loc)
166
- if gcs_write and self.delete_local_copy:
167
- shutil.rmtree(os.path.dirname(local_loc))
270
+ if hasattr(self, "ti"):
271
+ self.io.upload(self.log_relative_path, self.ti)
168
272
 
169
273
  # Mark closed so we don't double write if close is called twice
170
274
  self.closed = True
171
275
 
172
- def _add_message(self, msg):
173
- filename, lineno, func, stackinfo = logger.findCaller()
174
- record = logging.LogRecord("", logging.INFO, filename, lineno, msg + "\n", None, None, func=func)
175
- return self.format(record)
176
-
177
- def _read_remote_logs(self, ti, try_number, metadata=None) -> tuple[list[str], list[str]]:
178
- # Explicitly getting log relative path is necessary because this method
179
- # is called from webserver from TaskLogReader, where we don't call set_context
180
- # and can read logs for different TIs in each request
181
- messages = []
182
- logs = []
183
- worker_log_relative_path = self._render_filename(ti, try_number)
184
- remote_loc = os.path.join(self.remote_base, worker_log_relative_path)
185
- uris = []
186
- bucket, prefix = _parse_gcs_url(remote_loc)
187
- blobs = list(self.client.list_blobs(bucket_or_name=bucket, prefix=prefix))
188
-
189
- if blobs:
190
- uris = [f"gs://{bucket}/{b.name}" for b in blobs]
191
- messages.extend(["Found remote logs:", *[f" * {x}" for x in sorted(uris)]])
192
- else:
193
- messages.append(f"No logs found in GCS; ti=%s {ti}")
194
- try:
195
- for key in sorted(uris):
196
- blob = storage.Blob.from_string(key, self.client)
197
- remote_log = blob.download_as_bytes().decode()
198
- if remote_log:
199
- logs.append(remote_log)
200
- except Exception as e:
201
- messages.append(f"Unable to read remote log {e}")
202
- return messages, logs
276
+ def _read_remote_logs(self, ti, try_number, metadata=None) -> tuple[LogSourceInfo, LogMessages]:
277
+ # Explicitly getting log relative path is necessary as the given
278
+ # task instance might be different than task instance passed in
279
+ # in set_context method.
280
+ worker_log_rel_path = self._render_filename(ti, try_number)
203
281
 
204
- def gcs_write(self, log, remote_log_location) -> bool:
205
- """
206
- Write the log to the remote location and return `True`; fail silently and return `False` on error.
282
+ messages, logs = self.io.read(worker_log_rel_path, ti)
207
283
 
208
- :param log: the log to write to the remote_log_location
209
- :param remote_log_location: the log's location in remote storage
210
- :return: whether the log is successfully written to remote location or not.
211
- """
212
- try:
213
- blob = storage.Blob.from_string(remote_log_location, self.client)
214
- old_log = blob.download_as_bytes().decode()
215
- log = f"{old_log}\n{log}" if old_log else log
216
- except Exception as e:
217
- if not self.no_log_found(e):
218
- log += self._add_message(
219
- f"Error checking for previous log; if exists, may be overwritten: {e}"
220
- )
221
- self.log.warning("Error checking for previous log: %s", e)
222
- try:
223
- blob = storage.Blob.from_string(remote_log_location, self.client)
224
- blob.upload_from_string(log, content_type="text/plain")
225
- except Exception as e:
226
- self.log.error("Could not write logs to %s: %s", remote_log_location, e)
227
- return False
228
- return True
229
-
230
- @staticmethod
231
- def no_log_found(exc):
232
- """
233
- Given exception, determine whether it is result of log not found.
284
+ if logs is None:
285
+ logs = []
286
+ if not AIRFLOW_V_3_0_PLUS:
287
+ messages.append(f"No logs found in GCS; ti={ti}")
234
288
 
235
- :meta private:
236
- """
237
- if (exc.args and isinstance(exc.args[0], str) and "No such object" in exc.args[0]) or getattr(
238
- exc, "resp", {}
239
- ).get("status") == "404":
240
- return True
241
- return False
289
+ return messages, logs
@@ -25,25 +25,30 @@ from functools import cached_property
25
25
  from typing import TYPE_CHECKING
26
26
  from urllib.parse import urlencode
27
27
 
28
- from airflow.exceptions import AirflowProviderDeprecationWarning
29
- from airflow.providers.google.cloud.utils.credentials_provider import get_credentials_and_project_id
30
- from airflow.providers.google.common.consts import CLIENT_INFO
31
- from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
32
- from airflow.utils.types import NOTSET, ArgNotSet
33
28
  from google.cloud import logging as gcp_logging
34
29
  from google.cloud.logging import Resource
35
30
  from google.cloud.logging.handlers.transports import BackgroundThreadTransport, Transport
36
31
  from google.cloud.logging_v2.services.logging_service_v2 import LoggingServiceV2Client
37
32
  from google.cloud.logging_v2.types import ListLogEntriesRequest, ListLogEntriesResponse
38
33
 
39
- if TYPE_CHECKING:
40
- from airflow.models import TaskInstance
41
- from google.auth.credentials import Credentials
34
+ from airflow.exceptions import AirflowProviderDeprecationWarning
35
+ from airflow.providers.google.cloud.utils.credentials_provider import get_credentials_and_project_id
36
+ from airflow.providers.google.common.consts import CLIENT_INFO
37
+ from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
42
38
 
39
+ try:
40
+ from airflow.sdk.definitions._internal.types import NOTSET, ArgNotSet
41
+ except ImportError:
42
+ from airflow.utils.types import NOTSET, ArgNotSet # type: ignore[attr-defined,no-redef]
43
43
 
44
44
  if not AIRFLOW_V_3_0_PLUS:
45
45
  from airflow.utils.log.trigger_handler import ctx_indiv_trigger
46
46
 
47
+ if TYPE_CHECKING:
48
+ from google.auth.credentials import Credentials
49
+
50
+ from airflow.models import TaskInstance
51
+
47
52
  DEFAULT_LOGGER_NAME = "airflow"
48
53
  _GLOBAL_RESOURCE = Resource(type="global", labels={})
49
54
 
@@ -157,7 +162,7 @@ class StackdriverTaskHandler(logging.Handler):
157
162
  """Object responsible for sending data to Stackdriver."""
158
163
  # The Transport object is badly defined (no init) but in the docs client/name as constructor
159
164
  # arguments are a requirement for any class that derives from Transport class, hence ignore:
160
- return self.transport_type(self._client, self.gcp_log_name) # type: ignore[call-arg]
165
+ return self.transport_type(self._client, self.gcp_log_name)
161
166
 
162
167
  def _get_labels(self, task_instance=None):
163
168
  if task_instance:
@@ -0,0 +1,68 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$defs": {
4
+ "CloudStorageTransferJobFacet": {
5
+ "allOf": [
6
+ {
7
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
8
+ },
9
+ {
10
+ "type": "object",
11
+ "properties": {
12
+ "jobName": {
13
+ "type": "string",
14
+ "description": "Transfer job name assigned by GCP Storage Transfer Service."
15
+ },
16
+ "projectId": {
17
+ "type": "string",
18
+ "description": "GCP project ID."
19
+ },
20
+ "description": {
21
+ "type": "string",
22
+ "description": "Optional description of the transfer job."
23
+ },
24
+ "status": {
25
+ "type": "string",
26
+ "description": "Status of the transfer job (ENABLED, DISABLED)."
27
+ },
28
+ "sourceBucket": {
29
+ "type": "string",
30
+ "description": "Source AWS S3 bucket."
31
+ },
32
+ "sourcePath": {
33
+ "type": "string",
34
+ "description": "Prefix path inside the source bucket."
35
+ },
36
+ "targetBucket": {
37
+ "type": "string",
38
+ "description": "Target GCS bucket."
39
+ },
40
+ "targetPath": {
41
+ "type": "string",
42
+ "description": "Prefix path inside the target bucket."
43
+ },
44
+ "objectConditions": {
45
+ "type": "object",
46
+ "description": "Filtering conditions for objects transferred."
47
+ },
48
+ "transferOptions": {
49
+ "type": "object",
50
+ "description": "Transfer options such as overwrite or delete."
51
+ },
52
+ "schedule": {
53
+ "type": "object",
54
+ "description": "Transfer schedule details."
55
+ }
56
+ }
57
+ }
58
+ ],
59
+ "type": "object"
60
+ }
61
+ },
62
+ "type": "object",
63
+ "properties": {
64
+ "cloudStorageTransferJob": {
65
+ "$ref": "#/$defs/CloudStorageTransferJobFacet"
66
+ }
67
+ }
68
+ }
@@ -0,0 +1,60 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$defs": {
4
+ "CloudStorageTransferRunFacet": {
5
+ "allOf": [
6
+ {
7
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
8
+ },
9
+ {
10
+ "type": "object",
11
+ "properties": {
12
+ "jobName": {
13
+ "type": "string",
14
+ "description": "Transfer job name associated with this run."
15
+ },
16
+ "operationName": {
17
+ "type": "string",
18
+ "description": "Transfer operation name if available."
19
+ },
20
+ "status": {
21
+ "type": "string",
22
+ "description": "Run status if available."
23
+ },
24
+ "startTime": {
25
+ "type": "string",
26
+ "description": "Start time of the transfer operation."
27
+ },
28
+ "endTime": {
29
+ "type": "string",
30
+ "description": "End time of the transfer operation."
31
+ },
32
+ "wait": {
33
+ "type": "boolean",
34
+ "description": "Whether the operator waited for completion."
35
+ },
36
+ "timeout": {
37
+ "type": ["number", "null"],
38
+ "description": "Timeout in seconds."
39
+ },
40
+ "deferrable": {
41
+ "type": "boolean",
42
+ "description": "Whether the operator used deferrable mode."
43
+ },
44
+ "deleteJobAfterCompletion": {
45
+ "type": "boolean",
46
+ "description": "Whether the transfer job was deleted after completion."
47
+ }
48
+ }
49
+ }
50
+ ],
51
+ "type": "object"
52
+ }
53
+ },
54
+ "type": "object",
55
+ "properties": {
56
+ "cloudStorageTransferRun": {
57
+ "$ref": "#/$defs/CloudStorageTransferRunFacet"
58
+ }
59
+ }
60
+ }
@@ -0,0 +1,32 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$defs": {
4
+ "DataFusionRunFacet": {
5
+ "allOf": [
6
+ {
7
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
8
+ },
9
+ {
10
+ "type": "object",
11
+ "properties": {
12
+ "runId": {
13
+ "type": "string",
14
+ "description": "Pipeline run ID assigned by Cloud Data Fusion."
15
+ },
16
+ "runtimeArgs": {
17
+ "type": "object",
18
+ "description": "Runtime arguments provided when starting the pipeline."
19
+ }
20
+ }
21
+ }
22
+ ],
23
+ "type": "object"
24
+ }
25
+ },
26
+ "type": "object",
27
+ "properties": {
28
+ "dataFusionRun": {
29
+ "$ref": "#/$defs/DataFusionRunFacet"
30
+ }
31
+ }
32
+ }