apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
  2. airflow/providers/google/__init__.py +3 -3
  3. airflow/providers/google/ads/hooks/ads.py +39 -6
  4. airflow/providers/google/ads/operators/ads.py +2 -2
  5. airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
  6. airflow/providers/google/assets/gcs.py +1 -11
  7. airflow/providers/google/cloud/bundles/__init__.py +16 -0
  8. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  9. airflow/providers/google/cloud/hooks/alloy_db.py +1 -1
  10. airflow/providers/google/cloud/hooks/bigquery.py +176 -293
  11. airflow/providers/google/cloud/hooks/cloud_batch.py +1 -1
  12. airflow/providers/google/cloud/hooks/cloud_build.py +1 -1
  13. airflow/providers/google/cloud/hooks/cloud_composer.py +288 -15
  14. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  15. airflow/providers/google/cloud/hooks/cloud_memorystore.py +1 -1
  16. airflow/providers/google/cloud/hooks/cloud_run.py +18 -10
  17. airflow/providers/google/cloud/hooks/cloud_sql.py +102 -23
  18. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +29 -7
  19. airflow/providers/google/cloud/hooks/compute.py +1 -1
  20. airflow/providers/google/cloud/hooks/compute_ssh.py +6 -2
  21. airflow/providers/google/cloud/hooks/datacatalog.py +10 -1
  22. airflow/providers/google/cloud/hooks/dataflow.py +72 -95
  23. airflow/providers/google/cloud/hooks/dataform.py +1 -1
  24. airflow/providers/google/cloud/hooks/datafusion.py +21 -19
  25. airflow/providers/google/cloud/hooks/dataplex.py +2 -2
  26. airflow/providers/google/cloud/hooks/dataprep.py +1 -1
  27. airflow/providers/google/cloud/hooks/dataproc.py +73 -72
  28. airflow/providers/google/cloud/hooks/dataproc_metastore.py +1 -1
  29. airflow/providers/google/cloud/hooks/dlp.py +1 -1
  30. airflow/providers/google/cloud/hooks/functions.py +1 -1
  31. airflow/providers/google/cloud/hooks/gcs.py +112 -15
  32. airflow/providers/google/cloud/hooks/gdm.py +1 -1
  33. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  34. airflow/providers/google/cloud/hooks/kubernetes_engine.py +3 -3
  35. airflow/providers/google/cloud/hooks/looker.py +6 -2
  36. airflow/providers/google/cloud/hooks/managed_kafka.py +1 -1
  37. airflow/providers/google/cloud/hooks/mlengine.py +4 -3
  38. airflow/providers/google/cloud/hooks/pubsub.py +3 -0
  39. airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
  40. airflow/providers/google/cloud/hooks/spanner.py +74 -9
  41. airflow/providers/google/cloud/hooks/stackdriver.py +11 -9
  42. airflow/providers/google/cloud/hooks/tasks.py +1 -1
  43. airflow/providers/google/cloud/hooks/translate.py +2 -2
  44. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +2 -210
  45. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -3
  46. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +28 -2
  47. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  48. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +308 -8
  49. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  50. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -1
  51. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +1 -1
  52. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +1 -1
  53. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  54. airflow/providers/google/cloud/hooks/vision.py +3 -3
  55. airflow/providers/google/cloud/hooks/workflows.py +1 -1
  56. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  57. airflow/providers/google/cloud/links/base.py +77 -13
  58. airflow/providers/google/cloud/links/bigquery.py +0 -47
  59. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  60. airflow/providers/google/cloud/links/bigtable.py +0 -48
  61. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  62. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  63. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  64. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  65. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  66. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
  67. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  68. airflow/providers/google/cloud/links/compute.py +0 -58
  69. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  70. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  71. airflow/providers/google/cloud/links/dataflow.py +0 -34
  72. airflow/providers/google/cloud/links/dataform.py +0 -64
  73. airflow/providers/google/cloud/links/datafusion.py +1 -96
  74. airflow/providers/google/cloud/links/dataplex.py +0 -154
  75. airflow/providers/google/cloud/links/dataprep.py +0 -24
  76. airflow/providers/google/cloud/links/dataproc.py +11 -95
  77. airflow/providers/google/cloud/links/datastore.py +0 -31
  78. airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
  79. airflow/providers/google/cloud/links/managed_kafka.py +0 -70
  80. airflow/providers/google/cloud/links/mlengine.py +0 -70
  81. airflow/providers/google/cloud/links/pubsub.py +0 -32
  82. airflow/providers/google/cloud/links/spanner.py +0 -33
  83. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  84. airflow/providers/google/cloud/links/translate.py +17 -187
  85. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  86. airflow/providers/google/cloud/links/workflows.py +0 -52
  87. airflow/providers/google/cloud/log/gcs_task_handler.py +58 -22
  88. airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
  89. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  90. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  91. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  92. airflow/providers/google/cloud/openlineage/facets.py +102 -1
  93. airflow/providers/google/cloud/openlineage/mixins.py +10 -8
  94. airflow/providers/google/cloud/openlineage/utils.py +15 -1
  95. airflow/providers/google/cloud/operators/alloy_db.py +71 -56
  96. airflow/providers/google/cloud/operators/bigquery.py +73 -636
  97. airflow/providers/google/cloud/operators/bigquery_dts.py +4 -6
  98. airflow/providers/google/cloud/operators/bigtable.py +37 -8
  99. airflow/providers/google/cloud/operators/cloud_base.py +21 -1
  100. airflow/providers/google/cloud/operators/cloud_batch.py +3 -3
  101. airflow/providers/google/cloud/operators/cloud_build.py +76 -33
  102. airflow/providers/google/cloud/operators/cloud_composer.py +129 -41
  103. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  104. airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
  105. airflow/providers/google/cloud/operators/cloud_run.py +24 -6
  106. airflow/providers/google/cloud/operators/cloud_sql.py +8 -17
  107. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +93 -12
  108. airflow/providers/google/cloud/operators/compute.py +9 -41
  109. airflow/providers/google/cloud/operators/datacatalog.py +157 -21
  110. airflow/providers/google/cloud/operators/dataflow.py +40 -16
  111. airflow/providers/google/cloud/operators/dataform.py +15 -5
  112. airflow/providers/google/cloud/operators/datafusion.py +42 -21
  113. airflow/providers/google/cloud/operators/dataplex.py +194 -110
  114. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  115. airflow/providers/google/cloud/operators/dataproc.py +80 -36
  116. airflow/providers/google/cloud/operators/dataproc_metastore.py +97 -89
  117. airflow/providers/google/cloud/operators/datastore.py +23 -7
  118. airflow/providers/google/cloud/operators/dlp.py +6 -29
  119. airflow/providers/google/cloud/operators/functions.py +17 -8
  120. airflow/providers/google/cloud/operators/gcs.py +12 -9
  121. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  122. airflow/providers/google/cloud/operators/kubernetes_engine.py +62 -100
  123. airflow/providers/google/cloud/operators/looker.py +2 -2
  124. airflow/providers/google/cloud/operators/managed_kafka.py +108 -53
  125. airflow/providers/google/cloud/operators/natural_language.py +1 -1
  126. airflow/providers/google/cloud/operators/pubsub.py +68 -15
  127. airflow/providers/google/cloud/operators/spanner.py +26 -13
  128. airflow/providers/google/cloud/operators/speech_to_text.py +2 -3
  129. airflow/providers/google/cloud/operators/stackdriver.py +1 -9
  130. airflow/providers/google/cloud/operators/tasks.py +1 -12
  131. airflow/providers/google/cloud/operators/text_to_speech.py +2 -3
  132. airflow/providers/google/cloud/operators/translate.py +41 -17
  133. airflow/providers/google/cloud/operators/translate_speech.py +2 -3
  134. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
  135. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +30 -10
  136. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +55 -27
  137. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
  138. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
  139. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  140. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  141. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -115
  142. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +12 -10
  143. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
  144. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +31 -8
  145. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  146. airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
  147. airflow/providers/google/cloud/operators/vision.py +2 -2
  148. airflow/providers/google/cloud/operators/workflows.py +18 -15
  149. airflow/providers/google/cloud/secrets/secret_manager.py +3 -2
  150. airflow/providers/google/cloud/sensors/bigquery.py +3 -3
  151. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -3
  152. airflow/providers/google/cloud/sensors/bigtable.py +11 -4
  153. airflow/providers/google/cloud/sensors/cloud_composer.py +533 -30
  154. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -3
  155. airflow/providers/google/cloud/sensors/dataflow.py +26 -10
  156. airflow/providers/google/cloud/sensors/dataform.py +2 -3
  157. airflow/providers/google/cloud/sensors/datafusion.py +4 -5
  158. airflow/providers/google/cloud/sensors/dataplex.py +2 -3
  159. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  160. airflow/providers/google/cloud/sensors/dataproc.py +2 -3
  161. airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -3
  162. airflow/providers/google/cloud/sensors/gcs.py +4 -5
  163. airflow/providers/google/cloud/sensors/looker.py +2 -3
  164. airflow/providers/google/cloud/sensors/pubsub.py +4 -5
  165. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  166. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -3
  167. airflow/providers/google/cloud/sensors/workflows.py +2 -3
  168. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  169. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  170. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +4 -3
  171. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  172. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +10 -5
  173. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  174. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  175. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  176. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  177. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  178. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +3 -3
  179. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
  180. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +21 -13
  181. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +4 -3
  182. airflow/providers/google/cloud/transfers/gcs_to_local.py +6 -4
  183. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +11 -5
  184. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  185. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  186. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  187. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  188. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  189. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  190. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
  191. airflow/providers/google/cloud/transfers/s3_to_gcs.py +13 -7
  192. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  193. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +14 -5
  194. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  195. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  196. airflow/providers/google/cloud/triggers/bigquery.py +76 -35
  197. airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
  198. airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
  199. airflow/providers/google/cloud/triggers/cloud_run.py +3 -3
  200. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +92 -2
  201. airflow/providers/google/cloud/triggers/dataflow.py +122 -0
  202. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  203. airflow/providers/google/cloud/triggers/dataplex.py +14 -2
  204. airflow/providers/google/cloud/triggers/dataproc.py +123 -53
  205. airflow/providers/google/cloud/triggers/kubernetes_engine.py +47 -28
  206. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  207. airflow/providers/google/cloud/triggers/pubsub.py +15 -19
  208. airflow/providers/google/cloud/triggers/vertex_ai.py +1 -1
  209. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  210. airflow/providers/google/cloud/utils/credentials_provider.py +2 -2
  211. airflow/providers/google/cloud/utils/field_sanitizer.py +1 -1
  212. airflow/providers/google/cloud/utils/field_validator.py +2 -3
  213. airflow/providers/google/common/auth_backend/google_openid.py +4 -4
  214. airflow/providers/google/common/deprecated.py +2 -1
  215. airflow/providers/google/common/hooks/base_google.py +27 -9
  216. airflow/providers/google/common/hooks/operation_helpers.py +1 -1
  217. airflow/providers/google/common/links/storage.py +0 -22
  218. airflow/providers/google/common/utils/get_secret.py +31 -0
  219. airflow/providers/google/common/utils/id_token_credentials.py +3 -4
  220. airflow/providers/google/firebase/hooks/firestore.py +1 -1
  221. airflow/providers/google/firebase/operators/firestore.py +3 -3
  222. airflow/providers/google/get_provider_info.py +56 -52
  223. airflow/providers/google/go_module_utils.py +35 -3
  224. airflow/providers/google/leveldb/hooks/leveldb.py +27 -2
  225. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  226. airflow/providers/google/marketing_platform/hooks/campaign_manager.py +1 -1
  227. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  228. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  229. airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
  230. airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -3
  231. airflow/providers/google/marketing_platform/operators/campaign_manager.py +6 -6
  232. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  233. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  234. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  235. airflow/providers/google/marketing_platform/sensors/display_video.py +3 -64
  236. airflow/providers/google/suite/hooks/calendar.py +2 -2
  237. airflow/providers/google/suite/hooks/sheets.py +16 -2
  238. airflow/providers/google/suite/operators/sheets.py +8 -3
  239. airflow/providers/google/suite/sensors/drive.py +2 -2
  240. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +3 -3
  241. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  242. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  243. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  244. airflow/providers/google/version_compat.py +15 -1
  245. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/METADATA +90 -46
  246. apache_airflow_providers_google-19.3.0.dist-info/RECORD +331 -0
  247. apache_airflow_providers_google-19.3.0.dist-info/licenses/NOTICE +5 -0
  248. airflow/providers/google/cloud/hooks/automl.py +0 -673
  249. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  250. airflow/providers/google/cloud/links/automl.py +0 -193
  251. airflow/providers/google/cloud/operators/automl.py +0 -1362
  252. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  253. airflow/providers/google/cloud/operators/mlengine.py +0 -112
  254. apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
  255. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/WHEEL +0 -0
  256. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/entry_points.txt +0 -0
  257. {airflow/providers/google → apache_airflow_providers_google-19.3.0.dist-info/licenses}/LICENSE +0 -0
@@ -27,11 +27,11 @@ from typing import TYPE_CHECKING
27
27
 
28
28
  import attrs
29
29
 
30
- # not sure why but mypy complains on missing `storage` but it is clearly there and is importable
31
- from google.cloud import storage # type: ignore[attr-defined]
30
+ # Make mypy happy by importing as aliases
31
+ import google.cloud.storage as storage
32
32
 
33
33
  from airflow.configuration import conf
34
- from airflow.exceptions import AirflowNotFoundException
34
+ from airflow.providers.common.compat.sdk import AirflowNotFoundException
35
35
  from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
36
36
  from airflow.providers.google.cloud.utils.credentials_provider import (
37
37
  get_credentials_and_project_id,
@@ -43,9 +43,11 @@ from airflow.utils.log.file_task_handler import FileTaskHandler
43
43
  from airflow.utils.log.logging_mixin import LoggingMixin
44
44
 
45
45
  if TYPE_CHECKING:
46
+ from io import TextIOWrapper
47
+
46
48
  from airflow.models.taskinstance import TaskInstance
47
49
  from airflow.sdk.types import RuntimeTaskInstanceProtocol as RuntimeTI
48
- from airflow.utils.log.file_task_handler import LogMessages, LogSourceInfo
50
+ from airflow.utils.log.file_task_handler import LogResponse, RawLogStream, StreamingLogResponse
49
51
 
50
52
  _DEFAULT_SCOPESS = frozenset(
51
53
  [
@@ -61,13 +63,15 @@ class GCSRemoteLogIO(LoggingMixin): # noqa: D101
61
63
  remote_base: str
62
64
  base_log_folder: Path = attrs.field(converter=Path)
63
65
  delete_local_copy: bool
66
+ project_id: str | None = None
67
+
68
+ gcp_key_path: str | None = None
69
+ gcp_keyfile_dict: dict | None = None
70
+ scopes: Collection[str] | None = _DEFAULT_SCOPESS
64
71
 
65
- gcp_key_path: str | None
66
- gcp_keyfile_dict: dict | None
67
- scopes: Collection[str] | None
68
- project_id: str
72
+ processors = ()
69
73
 
70
- def upload(self, path: os.PathLike, ti: RuntimeTI):
74
+ def upload(self, path: os.PathLike | str, ti: RuntimeTI):
71
75
  """Upload the given log path to the remote storage."""
72
76
  path = Path(path)
73
77
  if path.is_absolute():
@@ -147,11 +151,26 @@ class GCSRemoteLogIO(LoggingMixin): # noqa: D101
147
151
  exc, "resp", {}
148
152
  ).get("status") == "404"
149
153
 
150
- def read(self, relative_path: str, ti: RuntimeTI) -> tuple[LogSourceInfo, LogMessages | None]:
151
- messages = []
152
- logs = []
154
+ def read(self, relative_path: str, ti: RuntimeTI) -> LogResponse:
155
+ messages, log_streams = self.stream(relative_path, ti)
156
+ if not log_streams:
157
+ return messages, None
158
+
159
+ logs: list[str] = []
160
+ try:
161
+ # for each log_stream, exhaust the generator into a string
162
+ logs = ["".join(line for line in log_stream) for log_stream in log_streams]
163
+ except Exception as e:
164
+ if not AIRFLOW_V_3_0_PLUS:
165
+ messages.append(f"Unable to read remote log {e}")
166
+
167
+ return messages, logs
168
+
169
+ def stream(self, relative_path: str, ti: RuntimeTI) -> StreamingLogResponse:
170
+ messages: list[str] = []
171
+ log_streams: list[RawLogStream] = []
153
172
  remote_loc = os.path.join(self.remote_base, relative_path)
154
- uris = []
173
+ uris: list[str] = []
155
174
  bucket, prefix = _parse_gcs_url(remote_loc)
156
175
  blobs = list(self.client.list_blobs(bucket_or_name=bucket, prefix=prefix))
157
176
 
@@ -162,18 +181,29 @@ class GCSRemoteLogIO(LoggingMixin): # noqa: D101
162
181
  else:
163
182
  messages.extend(["Found remote logs:", *[f" * {x}" for x in sorted(uris)]])
164
183
  else:
165
- return messages, None
184
+ return messages, []
166
185
 
167
186
  try:
168
187
  for key in sorted(uris):
169
188
  blob = storage.Blob.from_string(key, self.client)
170
- remote_log = blob.download_as_bytes().decode()
171
- if remote_log:
172
- logs.append(remote_log)
189
+ stream = blob.open("r")
190
+ log_streams.append(self._get_log_stream(stream))
173
191
  except Exception as e:
174
192
  if not AIRFLOW_V_3_0_PLUS:
175
193
  messages.append(f"Unable to read remote log {e}")
176
- return messages, logs
194
+ return messages, log_streams
195
+
196
+ def _get_log_stream(self, stream: TextIOWrapper) -> RawLogStream:
197
+ """
198
+ Yield lines from the given stream.
199
+
200
+ :param stream: The opened stream to read from.
201
+ :yield: Lines of the log file.
202
+ """
203
+ try:
204
+ yield from stream
205
+ finally:
206
+ stream.close()
177
207
 
178
208
 
179
209
  class GCSTaskHandler(FileTaskHandler, LoggingMixin):
@@ -211,9 +241,15 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
211
241
  gcp_keyfile_dict: dict | None = None,
212
242
  gcp_scopes: Collection[str] | None = _DEFAULT_SCOPESS,
213
243
  project_id: str = PROVIDE_PROJECT_ID,
244
+ max_bytes: int = 0,
245
+ backup_count: int = 0,
246
+ delay: bool = False,
214
247
  **kwargs,
215
- ):
216
- super().__init__(base_log_folder)
248
+ ) -> None:
249
+ # support log file size handling of FileTaskHandler
250
+ super().__init__(
251
+ base_log_folder=base_log_folder, max_bytes=max_bytes, backup_count=backup_count, delay=delay
252
+ )
217
253
  self.handler: logging.FileHandler | None = None
218
254
  self.log_relative_path = ""
219
255
  self.closed = False
@@ -265,7 +301,7 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
265
301
  # Mark closed so we don't double write if close is called twice
266
302
  self.closed = True
267
303
 
268
- def _read_remote_logs(self, ti, try_number, metadata=None) -> tuple[LogSourceInfo, LogMessages]:
304
+ def _read_remote_logs(self, ti, try_number, metadata=None) -> LogResponse:
269
305
  # Explicitly getting log relative path is necessary as the given
270
306
  # task instance might be different than task instance passed in
271
307
  # in set_context method.
@@ -275,7 +311,7 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
275
311
 
276
312
  if logs is None:
277
313
  logs = []
278
- if not AIRFLOW_V_3_0_PLUS:
314
+ if not AIRFLOW_V_3_0_PLUS and not messages:
279
315
  messages.append(f"No logs found in GCS; ti={ti}")
280
316
 
281
317
  return messages, logs
@@ -35,17 +35,20 @@ from airflow.exceptions import AirflowProviderDeprecationWarning
35
35
  from airflow.providers.google.cloud.utils.credentials_provider import get_credentials_and_project_id
36
36
  from airflow.providers.google.common.consts import CLIENT_INFO
37
37
  from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
38
- from airflow.utils.types import NOTSET, ArgNotSet
38
+
39
+ try:
40
+ from airflow.sdk.definitions._internal.types import NOTSET, ArgNotSet
41
+ except ImportError:
42
+ from airflow.utils.types import NOTSET, ArgNotSet # type: ignore[attr-defined,no-redef]
43
+
44
+ if not AIRFLOW_V_3_0_PLUS:
45
+ from airflow.utils.log.trigger_handler import ctx_indiv_trigger
39
46
 
40
47
  if TYPE_CHECKING:
41
48
  from google.auth.credentials import Credentials
42
49
 
43
50
  from airflow.models import TaskInstance
44
51
 
45
-
46
- if not AIRFLOW_V_3_0_PLUS:
47
- from airflow.utils.log.trigger_handler import ctx_indiv_trigger
48
-
49
52
  DEFAULT_LOGGER_NAME = "airflow"
50
53
  _GLOBAL_RESOURCE = Resource(type="global", labels={})
51
54
 
@@ -159,7 +162,7 @@ class StackdriverTaskHandler(logging.Handler):
159
162
  """Object responsible for sending data to Stackdriver."""
160
163
  # The Transport object is badly defined (no init) but in the docs client/name as constructor
161
164
  # arguments are a requirement for any class that derives from Transport class, hence ignore:
162
- return self.transport_type(self._client, self.gcp_log_name) # type: ignore[call-arg]
165
+ return self.transport_type(self._client, self.gcp_log_name)
163
166
 
164
167
  def _get_labels(self, task_instance=None):
165
168
  if task_instance:
@@ -0,0 +1,68 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$defs": {
4
+ "CloudStorageTransferJobFacet": {
5
+ "allOf": [
6
+ {
7
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
8
+ },
9
+ {
10
+ "type": "object",
11
+ "properties": {
12
+ "jobName": {
13
+ "type": "string",
14
+ "description": "Transfer job name assigned by GCP Storage Transfer Service."
15
+ },
16
+ "projectId": {
17
+ "type": "string",
18
+ "description": "GCP project ID."
19
+ },
20
+ "description": {
21
+ "type": "string",
22
+ "description": "Optional description of the transfer job."
23
+ },
24
+ "status": {
25
+ "type": "string",
26
+ "description": "Status of the transfer job (ENABLED, DISABLED)."
27
+ },
28
+ "sourceBucket": {
29
+ "type": "string",
30
+ "description": "Source AWS S3 bucket."
31
+ },
32
+ "sourcePath": {
33
+ "type": "string",
34
+ "description": "Prefix path inside the source bucket."
35
+ },
36
+ "targetBucket": {
37
+ "type": "string",
38
+ "description": "Target GCS bucket."
39
+ },
40
+ "targetPath": {
41
+ "type": "string",
42
+ "description": "Prefix path inside the target bucket."
43
+ },
44
+ "objectConditions": {
45
+ "type": "object",
46
+ "description": "Filtering conditions for objects transferred."
47
+ },
48
+ "transferOptions": {
49
+ "type": "object",
50
+ "description": "Transfer options such as overwrite or delete."
51
+ },
52
+ "schedule": {
53
+ "type": "object",
54
+ "description": "Transfer schedule details."
55
+ }
56
+ }
57
+ }
58
+ ],
59
+ "type": "object"
60
+ }
61
+ },
62
+ "type": "object",
63
+ "properties": {
64
+ "cloudStorageTransferJob": {
65
+ "$ref": "#/$defs/CloudStorageTransferJobFacet"
66
+ }
67
+ }
68
+ }
@@ -0,0 +1,60 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$defs": {
4
+ "CloudStorageTransferRunFacet": {
5
+ "allOf": [
6
+ {
7
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
8
+ },
9
+ {
10
+ "type": "object",
11
+ "properties": {
12
+ "jobName": {
13
+ "type": "string",
14
+ "description": "Transfer job name associated with this run."
15
+ },
16
+ "operationName": {
17
+ "type": "string",
18
+ "description": "Transfer operation name if available."
19
+ },
20
+ "status": {
21
+ "type": "string",
22
+ "description": "Run status if available."
23
+ },
24
+ "startTime": {
25
+ "type": "string",
26
+ "description": "Start time of the transfer operation."
27
+ },
28
+ "endTime": {
29
+ "type": "string",
30
+ "description": "End time of the transfer operation."
31
+ },
32
+ "wait": {
33
+ "type": "boolean",
34
+ "description": "Whether the operator waited for completion."
35
+ },
36
+ "timeout": {
37
+ "type": ["number", "null"],
38
+ "description": "Timeout in seconds."
39
+ },
40
+ "deferrable": {
41
+ "type": "boolean",
42
+ "description": "Whether the operator used deferrable mode."
43
+ },
44
+ "deleteJobAfterCompletion": {
45
+ "type": "boolean",
46
+ "description": "Whether the transfer job was deleted after completion."
47
+ }
48
+ }
49
+ }
50
+ ],
51
+ "type": "object"
52
+ }
53
+ },
54
+ "type": "object",
55
+ "properties": {
56
+ "cloudStorageTransferRun": {
57
+ "$ref": "#/$defs/CloudStorageTransferRunFacet"
58
+ }
59
+ }
60
+ }
@@ -0,0 +1,32 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$defs": {
4
+ "DataFusionRunFacet": {
5
+ "allOf": [
6
+ {
7
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
8
+ },
9
+ {
10
+ "type": "object",
11
+ "properties": {
12
+ "runId": {
13
+ "type": "string",
14
+ "description": "Pipeline run ID assigned by Cloud Data Fusion."
15
+ },
16
+ "runtimeArgs": {
17
+ "type": "object",
18
+ "description": "Runtime arguments provided when starting the pipeline."
19
+ }
20
+ }
21
+ }
22
+ ],
23
+ "type": "object"
24
+ }
25
+ },
26
+ "type": "object",
27
+ "properties": {
28
+ "dataFusionRun": {
29
+ "$ref": "#/$defs/DataFusionRunFacet"
30
+ }
31
+ }
32
+ }
@@ -24,13 +24,17 @@ from attr import define, field
24
24
  from airflow.providers.google import __version__ as provider_version
25
25
 
26
26
  if TYPE_CHECKING:
27
- from openlineage.client.generated.base import RunFacet
27
+ from openlineage.client.generated.base import JobFacet, RunFacet
28
28
 
29
29
  try:
30
30
  try:
31
31
  from openlineage.client.generated.base import RunFacet
32
32
  except ImportError: # Old OpenLineage client is used
33
33
  from openlineage.client.facet import BaseFacet as RunFacet # type: ignore[assignment]
34
+ try:
35
+ from openlineage.client.generated.base import JobFacet
36
+ except ImportError: # Old OpenLineage client is used
37
+ from openlineage.client.facet import BaseFacet as JobFacet # type: ignore[assignment]
34
38
 
35
39
  @define
36
40
  class BigQueryJobRunFacet(RunFacet):
@@ -53,6 +57,100 @@ try:
53
57
  f"providers-google/{provider_version}/airflow/providers/google/"
54
58
  "openlineage/BigQueryJobRunFacet.json"
55
59
  )
60
+
61
+ @define
62
+ class CloudStorageTransferJobFacet(JobFacet):
63
+ """
64
+ Facet representing a Cloud Storage Transfer Service job configuration.
65
+
66
+ :param jobName: Unique name of the transfer job.
67
+ :param projectId: GCP project where the transfer job is defined.
68
+ :param description: User-provided description of the transfer job.
69
+ :param status: Current status of the transfer job (e.g. "ENABLED", "DISABLED").
70
+ :param sourceBucket: Name of the source bucket (e.g. AWS S3).
71
+ :param sourcePath: Prefix/path inside the source bucket.
72
+ :param targetBucket: Name of the destination bucket (e.g. GCS).
73
+ :param targetPath: Prefix/path inside the destination bucket.
74
+ :param objectConditions: Object selection rules (e.g. include/exclude prefixes).
75
+ :param transferOptions: Transfer options, such as overwrite behavior or whether to delete objects
76
+ from the source after transfer.
77
+ :param schedule: Schedule for the transfer job (if recurring).
78
+ """
79
+
80
+ jobName: str | None = field(default=None)
81
+ projectId: str | None = field(default=None)
82
+ description: str | None = field(default=None)
83
+ status: str | None = field(default=None)
84
+ sourceBucket: str | None = field(default=None)
85
+ sourcePath: str | None = field(default=None)
86
+ targetBucket: str | None = field(default=None)
87
+ targetPath: str | None = field(default=None)
88
+ objectConditions: dict | None = field(default=None)
89
+ transferOptions: dict | None = field(default=None)
90
+ schedule: dict | None = field(default=None)
91
+
92
+ @staticmethod
93
+ def _get_schema() -> str:
94
+ return (
95
+ "https://raw.githubusercontent.com/apache/airflow/"
96
+ f"providers-google/{provider_version}/airflow/providers/google/"
97
+ "openlineage/CloudStorageTransferJobFacet.json"
98
+ )
99
+
100
+ @define
101
+ class CloudStorageTransferRunFacet(RunFacet):
102
+ """
103
+ Facet representing a Cloud Storage Transfer Service job execution run.
104
+
105
+ :param jobName: Name of the transfer job being executed.
106
+ :param operationName: Name of the specific transfer operation instance.
107
+ :param status: Current status of the operation (e.g. "IN_PROGRESS", "SUCCESS", "FAILED").
108
+ :param startTime: Time when the transfer job execution started (ISO 8601 format).
109
+ :param endTime: Time when the transfer job execution finished (ISO 8601 format).
110
+ :param wait: Whether the operator waits for the job to complete before finishing.
111
+ :param timeout: Timeout (in seconds) for the transfer run to complete.
112
+ :param deferrable: Whether the operator defers execution until job completion.
113
+ :param deleteJobAfterCompletion: Whether the operator deletes the transfer job after the run completes.
114
+ """
115
+
116
+ jobName: str | None = field(default=None)
117
+ operationName: str | None = field(default=None)
118
+ status: str | None = field(default=None)
119
+ startTime: str | None = field(default=None)
120
+ endTime: str | None = field(default=None)
121
+ wait: bool = field(default=True)
122
+ timeout: float | None = field(default=None)
123
+ deferrable: bool = field(default=False)
124
+ deleteJobAfterCompletion: bool = field(default=False)
125
+
126
+ @staticmethod
127
+ def _get_schema() -> str:
128
+ return (
129
+ "https://raw.githubusercontent.com/apache/airflow/"
130
+ f"providers-google/{provider_version}/airflow/providers/google/"
131
+ "openlineage/CloudStorageTransferRunFacet.json"
132
+ )
133
+
134
+ @define
135
+ class DataFusionRunFacet(RunFacet):
136
+ """
137
+ Facet that represents relevant details of a Cloud Data Fusion pipeline run.
138
+
139
+ :param runId: The pipeline execution id.
140
+ :param runtimeArgs: Runtime arguments passed to the pipeline.
141
+ """
142
+
143
+ runId: str | None = field(default=None)
144
+ runtimeArgs: dict[str, str] | None = field(default=None)
145
+
146
+ @staticmethod
147
+ def _get_schema() -> str:
148
+ return (
149
+ "https://raw.githubusercontent.com/apache/airflow/"
150
+ f"providers-google/{provider_version}/airflow/providers/google/"
151
+ "openlineage/DataFusionRunFacet.json"
152
+ )
153
+
56
154
  except ImportError: # OpenLineage is not available
57
155
 
58
156
  def create_no_op(*_, **__) -> None:
@@ -65,3 +163,6 @@ except ImportError: # OpenLineage is not available
65
163
  return None
66
164
 
67
165
  BigQueryJobRunFacet = create_no_op # type: ignore[misc, assignment]
166
+ CloudStorageTransferJobFacet = create_no_op # type: ignore[misc, assignment]
167
+ CloudStorageTransferRunFacet = create_no_op # type: ignore[misc, assignment]
168
+ DataFusionRunFacet = create_no_op # type: ignore[misc, assignment]
@@ -80,7 +80,7 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
80
80
  from airflow.providers.openlineage.sqlparser import SQLParser
81
81
 
82
82
  if not self.job_id:
83
- self.log.warning("No BigQuery job_id was found by OpenLineage.") # type: ignore[attr-defined]
83
+ self.log.warning("No BigQuery job_id was found by OpenLineage.")
84
84
  return OperatorLineage()
85
85
 
86
86
  if not self.hook:
@@ -92,14 +92,16 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
92
92
  impersonation_chain=self.impersonation_chain,
93
93
  )
94
94
 
95
- self.log.debug("Extracting data from bigquery job: `%s`", self.job_id) # type: ignore[attr-defined]
95
+ self.log.debug("Extracting data from bigquery job: `%s`", self.job_id)
96
96
  inputs, outputs = [], []
97
97
  run_facets: dict[str, RunFacet] = {
98
98
  "externalQuery": ExternalQueryRunFacet(externalQueryId=self.job_id, source="bigquery")
99
99
  }
100
- self._client = self.hook.get_client(project_id=self.hook.project_id, location=self.location)
100
+ self._client = self.hook.get_client(
101
+ project_id=self.project_id or self.hook.project_id, location=self.location
102
+ )
101
103
  try:
102
- job_properties = self._client.get_job(job_id=self.job_id)._properties # type: ignore
104
+ job_properties = self._client.get_job(job_id=self.job_id)._properties
103
105
 
104
106
  if get_from_nullable_chain(job_properties, ["status", "state"]) != "DONE":
105
107
  raise ValueError(f"Trying to extract data from running bigquery job: `{self.job_id}`")
@@ -107,11 +109,11 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
107
109
  run_facets["bigQueryJob"] = self._get_bigquery_job_run_facet(job_properties)
108
110
 
109
111
  if get_from_nullable_chain(job_properties, ["statistics", "numChildJobs"]):
110
- self.log.debug("Found SCRIPT job. Extracting lineage from child jobs instead.") # type: ignore[attr-defined]
112
+ self.log.debug("Found SCRIPT job. Extracting lineage from child jobs instead.")
111
113
  # SCRIPT job type has no input / output information but spawns child jobs that have one
112
114
  # https://cloud.google.com/bigquery/docs/information-schema-jobs#multi-statement_query_job
113
115
  for child_job_id in self._client.list_jobs(parent_job=self.job_id):
114
- child_job_properties = self._client.get_job(job_id=child_job_id)._properties # type: ignore
116
+ child_job_properties = self._client.get_job(job_id=child_job_id)._properties
115
117
  child_inputs, child_outputs = self._get_inputs_and_outputs(child_job_properties)
116
118
  inputs.extend(child_inputs)
117
119
  outputs.extend(child_outputs)
@@ -119,7 +121,7 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
119
121
  inputs, outputs = self._get_inputs_and_outputs(job_properties)
120
122
 
121
123
  except Exception as e:
122
- self.log.warning("Cannot retrieve job details from BigQuery.Client. %s", e, exc_info=True) # type: ignore[attr-defined]
124
+ self.log.warning("Cannot retrieve job details from BigQuery.Client. %s", e, exc_info=True)
123
125
  exception_msg = traceback.format_exc()
124
126
  run_facets.update(
125
127
  {
@@ -173,7 +175,7 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
173
175
  if (
174
176
  single_output.facets
175
177
  and final_outputs[key].facets
176
- and "columnLineage" in single_output.facets # type: ignore
178
+ and "columnLineage" in single_output.facets
177
179
  and "columnLineage" in final_outputs[key].facets # type: ignore
178
180
  ):
179
181
  single_output.facets["columnLineage"] = merge_column_lineage_facets(
@@ -49,7 +49,7 @@ if TYPE_CHECKING:
49
49
  from google.cloud.bigquery.table import Table
50
50
 
51
51
  from airflow.providers.common.compat.openlineage.facet import Dataset
52
- from airflow.utils.context import Context
52
+ from airflow.providers.common.compat.sdk import Context
53
53
 
54
54
 
55
55
  log = logging.getLogger(__name__)
@@ -214,7 +214,20 @@ def extract_ds_name_from_gcs_path(path: str) -> str:
214
214
 
215
215
  def get_facets_from_bq_table(table: Table) -> dict[str, DatasetFacet]:
216
216
  """Get facets from BigQuery table object."""
217
+ return get_facets_from_bq_table_for_given_fields(table, selected_fields=None)
218
+
219
+
220
+ def get_facets_from_bq_table_for_given_fields(
221
+ table: Table, selected_fields: list[str] | None
222
+ ) -> dict[str, DatasetFacet]:
223
+ """
224
+ Get facets from BigQuery table object for selected fields only.
225
+
226
+ If selected_fields is None, include all fields.
227
+ """
217
228
  facets: dict[str, DatasetFacet] = {}
229
+ selected_fields_set = set(selected_fields) if selected_fields else None
230
+
218
231
  if table.schema:
219
232
  facets["schema"] = SchemaDatasetFacet(
220
233
  fields=[
@@ -222,6 +235,7 @@ def get_facets_from_bq_table(table: Table) -> dict[str, DatasetFacet]:
222
235
  name=schema_field.name, type=schema_field.field_type, description=schema_field.description
223
236
  )
224
237
  for schema_field in table.schema
238
+ if selected_fields_set is None or schema_field.name in selected_fields_set
225
239
  ]
226
240
  )
227
241
  if table.description: