apache-airflow-providers-google 14.0.0__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. airflow/providers/google/3rd-party-licenses/LICENSES.txt +14 -0
  2. airflow/providers/google/3rd-party-licenses/NOTICE +5 -0
  3. airflow/providers/google/__init__.py +3 -3
  4. airflow/providers/google/_vendor/__init__.py +0 -0
  5. airflow/providers/google/_vendor/json_merge_patch.py +91 -0
  6. airflow/providers/google/ads/hooks/ads.py +52 -43
  7. airflow/providers/google/ads/operators/ads.py +2 -2
  8. airflow/providers/google/ads/transfers/ads_to_gcs.py +3 -19
  9. airflow/providers/google/assets/gcs.py +1 -11
  10. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +3 -2
  11. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  12. airflow/providers/google/cloud/hooks/alloy_db.py +2 -3
  13. airflow/providers/google/cloud/hooks/bigquery.py +195 -318
  14. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  15. airflow/providers/google/cloud/hooks/bigtable.py +3 -2
  16. airflow/providers/google/cloud/hooks/cloud_batch.py +8 -9
  17. airflow/providers/google/cloud/hooks/cloud_build.py +6 -65
  18. airflow/providers/google/cloud/hooks/cloud_composer.py +292 -24
  19. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  20. airflow/providers/google/cloud/hooks/cloud_memorystore.py +4 -3
  21. airflow/providers/google/cloud/hooks/cloud_run.py +20 -11
  22. airflow/providers/google/cloud/hooks/cloud_sql.py +136 -64
  23. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +35 -15
  24. airflow/providers/google/cloud/hooks/compute.py +7 -6
  25. airflow/providers/google/cloud/hooks/compute_ssh.py +7 -4
  26. airflow/providers/google/cloud/hooks/datacatalog.py +12 -3
  27. airflow/providers/google/cloud/hooks/dataflow.py +87 -242
  28. airflow/providers/google/cloud/hooks/dataform.py +9 -14
  29. airflow/providers/google/cloud/hooks/datafusion.py +7 -9
  30. airflow/providers/google/cloud/hooks/dataplex.py +13 -12
  31. airflow/providers/google/cloud/hooks/dataprep.py +2 -2
  32. airflow/providers/google/cloud/hooks/dataproc.py +76 -74
  33. airflow/providers/google/cloud/hooks/dataproc_metastore.py +4 -3
  34. airflow/providers/google/cloud/hooks/dlp.py +5 -4
  35. airflow/providers/google/cloud/hooks/gcs.py +144 -33
  36. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  37. airflow/providers/google/cloud/hooks/kms.py +3 -2
  38. airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -17
  39. airflow/providers/google/cloud/hooks/looker.py +6 -1
  40. airflow/providers/google/cloud/hooks/managed_kafka.py +227 -3
  41. airflow/providers/google/cloud/hooks/mlengine.py +7 -8
  42. airflow/providers/google/cloud/hooks/natural_language.py +3 -2
  43. airflow/providers/google/cloud/hooks/os_login.py +3 -2
  44. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  45. airflow/providers/google/cloud/hooks/secret_manager.py +105 -12
  46. airflow/providers/google/cloud/hooks/spanner.py +75 -10
  47. airflow/providers/google/cloud/hooks/speech_to_text.py +3 -2
  48. airflow/providers/google/cloud/hooks/stackdriver.py +18 -18
  49. airflow/providers/google/cloud/hooks/tasks.py +4 -3
  50. airflow/providers/google/cloud/hooks/text_to_speech.py +3 -2
  51. airflow/providers/google/cloud/hooks/translate.py +8 -17
  52. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -222
  53. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +9 -15
  54. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +33 -283
  55. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +5 -12
  56. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +6 -12
  57. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  58. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +311 -10
  59. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  60. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +7 -13
  61. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +8 -12
  62. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +6 -12
  63. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +3 -2
  64. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  65. airflow/providers/google/cloud/hooks/video_intelligence.py +3 -2
  66. airflow/providers/google/cloud/hooks/vision.py +7 -7
  67. airflow/providers/google/cloud/hooks/workflows.py +4 -3
  68. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  69. airflow/providers/google/cloud/links/base.py +77 -7
  70. airflow/providers/google/cloud/links/bigquery.py +0 -47
  71. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  72. airflow/providers/google/cloud/links/bigtable.py +0 -48
  73. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  74. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  75. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  76. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  77. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  78. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -46
  79. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  80. airflow/providers/google/cloud/links/compute.py +0 -58
  81. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  82. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  83. airflow/providers/google/cloud/links/dataflow.py +0 -34
  84. airflow/providers/google/cloud/links/dataform.py +0 -64
  85. airflow/providers/google/cloud/links/datafusion.py +1 -90
  86. airflow/providers/google/cloud/links/dataplex.py +0 -154
  87. airflow/providers/google/cloud/links/dataprep.py +0 -24
  88. airflow/providers/google/cloud/links/dataproc.py +11 -89
  89. airflow/providers/google/cloud/links/datastore.py +0 -31
  90. airflow/providers/google/cloud/links/kubernetes_engine.py +11 -61
  91. airflow/providers/google/cloud/links/managed_kafka.py +11 -51
  92. airflow/providers/google/cloud/links/mlengine.py +0 -70
  93. airflow/providers/google/cloud/links/pubsub.py +0 -32
  94. airflow/providers/google/cloud/links/spanner.py +0 -33
  95. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  96. airflow/providers/google/cloud/links/translate.py +17 -187
  97. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  98. airflow/providers/google/cloud/links/workflows.py +0 -52
  99. airflow/providers/google/cloud/log/gcs_task_handler.py +166 -118
  100. airflow/providers/google/cloud/log/stackdriver_task_handler.py +14 -9
  101. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  102. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  103. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  104. airflow/providers/google/cloud/openlineage/facets.py +141 -40
  105. airflow/providers/google/cloud/openlineage/mixins.py +14 -13
  106. airflow/providers/google/cloud/openlineage/utils.py +19 -3
  107. airflow/providers/google/cloud/operators/alloy_db.py +76 -61
  108. airflow/providers/google/cloud/operators/bigquery.py +104 -667
  109. airflow/providers/google/cloud/operators/bigquery_dts.py +12 -12
  110. airflow/providers/google/cloud/operators/bigtable.py +38 -7
  111. airflow/providers/google/cloud/operators/cloud_base.py +22 -1
  112. airflow/providers/google/cloud/operators/cloud_batch.py +18 -18
  113. airflow/providers/google/cloud/operators/cloud_build.py +80 -36
  114. airflow/providers/google/cloud/operators/cloud_composer.py +157 -71
  115. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  116. airflow/providers/google/cloud/operators/cloud_memorystore.py +74 -46
  117. airflow/providers/google/cloud/operators/cloud_run.py +39 -20
  118. airflow/providers/google/cloud/operators/cloud_sql.py +46 -61
  119. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -14
  120. airflow/providers/google/cloud/operators/compute.py +18 -50
  121. airflow/providers/google/cloud/operators/datacatalog.py +167 -29
  122. airflow/providers/google/cloud/operators/dataflow.py +38 -15
  123. airflow/providers/google/cloud/operators/dataform.py +19 -7
  124. airflow/providers/google/cloud/operators/datafusion.py +43 -43
  125. airflow/providers/google/cloud/operators/dataplex.py +212 -126
  126. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  127. airflow/providers/google/cloud/operators/dataproc.py +134 -207
  128. airflow/providers/google/cloud/operators/dataproc_metastore.py +102 -84
  129. airflow/providers/google/cloud/operators/datastore.py +22 -6
  130. airflow/providers/google/cloud/operators/dlp.py +24 -45
  131. airflow/providers/google/cloud/operators/functions.py +21 -14
  132. airflow/providers/google/cloud/operators/gcs.py +15 -12
  133. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  134. airflow/providers/google/cloud/operators/kubernetes_engine.py +115 -106
  135. airflow/providers/google/cloud/operators/looker.py +1 -1
  136. airflow/providers/google/cloud/operators/managed_kafka.py +362 -40
  137. airflow/providers/google/cloud/operators/natural_language.py +5 -3
  138. airflow/providers/google/cloud/operators/pubsub.py +69 -21
  139. airflow/providers/google/cloud/operators/spanner.py +53 -45
  140. airflow/providers/google/cloud/operators/speech_to_text.py +5 -4
  141. airflow/providers/google/cloud/operators/stackdriver.py +5 -11
  142. airflow/providers/google/cloud/operators/tasks.py +6 -15
  143. airflow/providers/google/cloud/operators/text_to_speech.py +4 -3
  144. airflow/providers/google/cloud/operators/translate.py +46 -20
  145. airflow/providers/google/cloud/operators/translate_speech.py +4 -3
  146. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +44 -34
  147. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +34 -12
  148. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +62 -53
  149. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +75 -11
  150. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +48 -12
  151. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  152. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  153. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
  154. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +16 -12
  155. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +62 -14
  156. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +35 -10
  157. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  158. airflow/providers/google/cloud/operators/video_intelligence.py +5 -3
  159. airflow/providers/google/cloud/operators/vision.py +7 -5
  160. airflow/providers/google/cloud/operators/workflows.py +24 -19
  161. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  162. airflow/providers/google/cloud/sensors/bigquery.py +2 -2
  163. airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -4
  164. airflow/providers/google/cloud/sensors/bigtable.py +14 -6
  165. airflow/providers/google/cloud/sensors/cloud_composer.py +535 -33
  166. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -5
  167. airflow/providers/google/cloud/sensors/dataflow.py +27 -10
  168. airflow/providers/google/cloud/sensors/dataform.py +2 -2
  169. airflow/providers/google/cloud/sensors/datafusion.py +4 -4
  170. airflow/providers/google/cloud/sensors/dataplex.py +7 -5
  171. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  172. airflow/providers/google/cloud/sensors/dataproc.py +10 -9
  173. airflow/providers/google/cloud/sensors/dataproc_metastore.py +4 -3
  174. airflow/providers/google/cloud/sensors/gcs.py +22 -21
  175. airflow/providers/google/cloud/sensors/looker.py +5 -5
  176. airflow/providers/google/cloud/sensors/pubsub.py +20 -20
  177. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  178. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
  179. airflow/providers/google/cloud/sensors/workflows.py +6 -4
  180. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  181. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  182. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
  183. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  184. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +14 -13
  185. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  186. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  187. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  188. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  189. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  190. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +18 -22
  191. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -5
  192. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +45 -38
  193. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
  194. airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
  195. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
  196. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  197. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  198. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  199. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  200. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  201. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  202. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +44 -12
  203. airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
  204. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  205. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +36 -14
  206. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  207. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  208. airflow/providers/google/cloud/triggers/bigquery.py +75 -34
  209. airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
  210. airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
  211. airflow/providers/google/cloud/triggers/cloud_build.py +3 -2
  212. airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
  213. airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
  214. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +96 -5
  215. airflow/providers/google/cloud/triggers/dataflow.py +125 -2
  216. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  217. airflow/providers/google/cloud/triggers/dataplex.py +16 -3
  218. airflow/providers/google/cloud/triggers/dataproc.py +124 -53
  219. airflow/providers/google/cloud/triggers/kubernetes_engine.py +46 -28
  220. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  221. airflow/providers/google/cloud/triggers/pubsub.py +17 -20
  222. airflow/providers/google/cloud/triggers/vertex_ai.py +8 -7
  223. airflow/providers/google/cloud/utils/bigquery.py +5 -7
  224. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  225. airflow/providers/google/cloud/utils/credentials_provider.py +4 -3
  226. airflow/providers/google/cloud/utils/dataform.py +1 -1
  227. airflow/providers/google/cloud/utils/external_token_supplier.py +0 -1
  228. airflow/providers/google/cloud/utils/field_validator.py +1 -2
  229. airflow/providers/google/cloud/utils/validators.py +43 -0
  230. airflow/providers/google/common/auth_backend/google_openid.py +26 -9
  231. airflow/providers/google/common/consts.py +2 -1
  232. airflow/providers/google/common/deprecated.py +2 -1
  233. airflow/providers/google/common/hooks/base_google.py +40 -43
  234. airflow/providers/google/common/hooks/operation_helpers.py +78 -0
  235. airflow/providers/google/common/links/storage.py +0 -22
  236. airflow/providers/google/common/utils/get_secret.py +31 -0
  237. airflow/providers/google/common/utils/id_token_credentials.py +4 -5
  238. airflow/providers/google/firebase/operators/firestore.py +2 -2
  239. airflow/providers/google/get_provider_info.py +61 -216
  240. airflow/providers/google/go_module_utils.py +35 -3
  241. airflow/providers/google/leveldb/hooks/leveldb.py +30 -6
  242. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  243. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +3 -2
  244. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  245. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  246. airflow/providers/google/marketing_platform/links/analytics_admin.py +4 -5
  247. airflow/providers/google/marketing_platform/operators/analytics_admin.py +7 -6
  248. airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
  249. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  250. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  251. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  252. airflow/providers/google/marketing_platform/sensors/display_video.py +4 -64
  253. airflow/providers/google/suite/hooks/calendar.py +1 -1
  254. airflow/providers/google/suite/hooks/drive.py +2 -2
  255. airflow/providers/google/suite/hooks/sheets.py +15 -1
  256. airflow/providers/google/suite/operators/sheets.py +8 -3
  257. airflow/providers/google/suite/sensors/drive.py +2 -2
  258. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
  259. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  260. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  261. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  262. airflow/providers/google/version_compat.py +15 -1
  263. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +117 -72
  264. apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
  265. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +1 -1
  266. apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
  267. airflow/providers/google/cloud/example_dags/example_cloud_task.py +0 -54
  268. airflow/providers/google/cloud/hooks/automl.py +0 -679
  269. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  270. airflow/providers/google/cloud/links/automl.py +0 -193
  271. airflow/providers/google/cloud/operators/automl.py +0 -1360
  272. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  273. airflow/providers/google/cloud/operators/mlengine.py +0 -1515
  274. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
  275. apache_airflow_providers_google-14.0.0.dist-info/RECORD +0 -318
  276. /airflow/providers/google/cloud/{example_dags → bundles}/__init__.py +0 -0
  277. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
  278. {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -24,44 +24,145 @@ from attr import define, field
24
24
  from airflow.providers.google import __version__ as provider_version
25
25
 
26
26
  if TYPE_CHECKING:
27
- from openlineage.client.generated.base import RunFacet
28
- else:
27
+ from openlineage.client.generated.base import JobFacet, RunFacet
28
+
29
+ try:
30
+ try:
31
+ from openlineage.client.generated.base import RunFacet
32
+ except ImportError: # Old OpenLineage client is used
33
+ from openlineage.client.facet import BaseFacet as RunFacet # type: ignore[assignment]
29
34
  try:
30
- try:
31
- from openlineage.client.generated.base import RunFacet
32
- except ImportError: # Old OpenLineage client is used
33
- from openlineage.client.facet import BaseFacet as RunFacet
34
-
35
- @define
36
- class BigQueryJobRunFacet(RunFacet):
37
- """
38
- Facet that represents relevant statistics of bigquery run.
39
-
40
- :param cached: BigQuery caches query results. Rest of the statistics will not be provided for cached queries.
41
- :param billedBytes: How many bytes BigQuery bills for.
42
- :param properties: Full property tree of BigQUery run.
43
- """
44
-
45
- cached: bool
46
- billedBytes: int | None = field(default=None)
47
- properties: str | None = field(default=None)
48
-
49
- @staticmethod
50
- def _get_schema() -> str:
51
- return (
52
- "https://raw.githubusercontent.com/apache/airflow/"
53
- f"providers-google/{provider_version}/airflow/providers/google/"
54
- "openlineage/BigQueryJobRunFacet.json"
55
- )
56
- except ImportError: # OpenLineage is not available
57
-
58
- def create_no_op(*_, **__) -> None:
59
- """
60
- Create a no-op placeholder.
61
-
62
- This function creates and returns a None value, used as a placeholder when the OpenLineage client
63
- library is available. It represents an action that has no effect.
64
- """
65
- return None
66
-
67
- BigQueryJobRunFacet = create_no_op
35
+ from openlineage.client.generated.base import JobFacet
36
+ except ImportError: # Old OpenLineage client is used
37
+ from openlineage.client.facet import BaseFacet as JobFacet # type: ignore[assignment]
38
+
39
+ @define
40
+ class BigQueryJobRunFacet(RunFacet):
41
+ """
42
+ Facet that represents relevant statistics of bigquery run.
43
+
44
+ :param cached: BigQuery caches query results. Rest of the statistics will not be provided for cached queries.
45
+ :param billedBytes: How many bytes BigQuery bills for.
46
+ :param properties: Full property tree of BigQUery run.
47
+ """
48
+
49
+ cached: bool
50
+ billedBytes: int | None = field(default=None)
51
+ properties: str | None = field(default=None)
52
+
53
+ @staticmethod
54
+ def _get_schema() -> str:
55
+ return (
56
+ "https://raw.githubusercontent.com/apache/airflow/"
57
+ f"providers-google/{provider_version}/airflow/providers/google/"
58
+ "openlineage/BigQueryJobRunFacet.json"
59
+ )
60
+
61
+ @define
62
+ class CloudStorageTransferJobFacet(JobFacet):
63
+ """
64
+ Facet representing a Cloud Storage Transfer Service job configuration.
65
+
66
+ :param jobName: Unique name of the transfer job.
67
+ :param projectId: GCP project where the transfer job is defined.
68
+ :param description: User-provided description of the transfer job.
69
+ :param status: Current status of the transfer job (e.g. "ENABLED", "DISABLED").
70
+ :param sourceBucket: Name of the source bucket (e.g. AWS S3).
71
+ :param sourcePath: Prefix/path inside the source bucket.
72
+ :param targetBucket: Name of the destination bucket (e.g. GCS).
73
+ :param targetPath: Prefix/path inside the destination bucket.
74
+ :param objectConditions: Object selection rules (e.g. include/exclude prefixes).
75
+ :param transferOptions: Transfer options, such as overwrite behavior or whether to delete objects
76
+ from the source after transfer.
77
+ :param schedule: Schedule for the transfer job (if recurring).
78
+ """
79
+
80
+ jobName: str | None = field(default=None)
81
+ projectId: str | None = field(default=None)
82
+ description: str | None = field(default=None)
83
+ status: str | None = field(default=None)
84
+ sourceBucket: str | None = field(default=None)
85
+ sourcePath: str | None = field(default=None)
86
+ targetBucket: str | None = field(default=None)
87
+ targetPath: str | None = field(default=None)
88
+ objectConditions: dict | None = field(default=None)
89
+ transferOptions: dict | None = field(default=None)
90
+ schedule: dict | None = field(default=None)
91
+
92
+ @staticmethod
93
+ def _get_schema() -> str:
94
+ return (
95
+ "https://raw.githubusercontent.com/apache/airflow/"
96
+ f"providers-google/{provider_version}/airflow/providers/google/"
97
+ "openlineage/CloudStorageTransferJobFacet.json"
98
+ )
99
+
100
+ @define
101
+ class CloudStorageTransferRunFacet(RunFacet):
102
+ """
103
+ Facet representing a Cloud Storage Transfer Service job execution run.
104
+
105
+ :param jobName: Name of the transfer job being executed.
106
+ :param operationName: Name of the specific transfer operation instance.
107
+ :param status: Current status of the operation (e.g. "IN_PROGRESS", "SUCCESS", "FAILED").
108
+ :param startTime: Time when the transfer job execution started (ISO 8601 format).
109
+ :param endTime: Time when the transfer job execution finished (ISO 8601 format).
110
+ :param wait: Whether the operator waits for the job to complete before finishing.
111
+ :param timeout: Timeout (in seconds) for the transfer run to complete.
112
+ :param deferrable: Whether the operator defers execution until job completion.
113
+ :param deleteJobAfterCompletion: Whether the operator deletes the transfer job after the run completes.
114
+ """
115
+
116
+ jobName: str | None = field(default=None)
117
+ operationName: str | None = field(default=None)
118
+ status: str | None = field(default=None)
119
+ startTime: str | None = field(default=None)
120
+ endTime: str | None = field(default=None)
121
+ wait: bool = field(default=True)
122
+ timeout: float | None = field(default=None)
123
+ deferrable: bool = field(default=False)
124
+ deleteJobAfterCompletion: bool = field(default=False)
125
+
126
+ @staticmethod
127
+ def _get_schema() -> str:
128
+ return (
129
+ "https://raw.githubusercontent.com/apache/airflow/"
130
+ f"providers-google/{provider_version}/airflow/providers/google/"
131
+ "openlineage/CloudStorageTransferRunFacet.json"
132
+ )
133
+
134
+ @define
135
+ class DataFusionRunFacet(RunFacet):
136
+ """
137
+ Facet that represents relevant details of a Cloud Data Fusion pipeline run.
138
+
139
+ :param runId: The pipeline execution id.
140
+ :param runtimeArgs: Runtime arguments passed to the pipeline.
141
+ """
142
+
143
+ runId: str | None = field(default=None)
144
+ runtimeArgs: dict[str, str] | None = field(default=None)
145
+
146
+ @staticmethod
147
+ def _get_schema() -> str:
148
+ return (
149
+ "https://raw.githubusercontent.com/apache/airflow/"
150
+ f"providers-google/{provider_version}/airflow/providers/google/"
151
+ "openlineage/DataFusionRunFacet.json"
152
+ )
153
+
154
+ except ImportError: # OpenLineage is not available
155
+
156
+ def create_no_op(*_, **__) -> None:
157
+ """
158
+ Create a no-op placeholder.
159
+
160
+ This function creates and returns a None value, used as a placeholder when the OpenLineage client
161
+ library is available. It represents an action that has no effect.
162
+ """
163
+ return None
164
+
165
+ BigQueryJobRunFacet = create_no_op # type: ignore[misc, assignment]
166
+ CloudStorageTransferJobFacet = create_no_op # type: ignore[misc, assignment]
167
+ CloudStorageTransferRunFacet = create_no_op # type: ignore[misc, assignment]
168
+ DataFusionRunFacet = create_no_op # type: ignore[misc, assignment]
@@ -80,7 +80,7 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
80
80
  from airflow.providers.openlineage.sqlparser import SQLParser
81
81
 
82
82
  if not self.job_id:
83
- self.log.warning("No BigQuery job_id was found by OpenLineage.") # type: ignore[attr-defined]
83
+ self.log.warning("No BigQuery job_id was found by OpenLineage.")
84
84
  return OperatorLineage()
85
85
 
86
86
  if not self.hook:
@@ -92,14 +92,16 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
92
92
  impersonation_chain=self.impersonation_chain,
93
93
  )
94
94
 
95
- self.log.debug("Extracting data from bigquery job: `%s`", self.job_id) # type: ignore[attr-defined]
95
+ self.log.debug("Extracting data from bigquery job: `%s`", self.job_id)
96
96
  inputs, outputs = [], []
97
97
  run_facets: dict[str, RunFacet] = {
98
98
  "externalQuery": ExternalQueryRunFacet(externalQueryId=self.job_id, source="bigquery")
99
99
  }
100
- self._client = self.hook.get_client(project_id=self.hook.project_id, location=self.location)
100
+ self._client = self.hook.get_client(
101
+ project_id=self.project_id or self.hook.project_id, location=self.location
102
+ )
101
103
  try:
102
- job_properties = self._client.get_job(job_id=self.job_id)._properties # type: ignore
104
+ job_properties = self._client.get_job(job_id=self.job_id)._properties
103
105
 
104
106
  if get_from_nullable_chain(job_properties, ["status", "state"]) != "DONE":
105
107
  raise ValueError(f"Trying to extract data from running bigquery job: `{self.job_id}`")
@@ -107,11 +109,11 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
107
109
  run_facets["bigQueryJob"] = self._get_bigquery_job_run_facet(job_properties)
108
110
 
109
111
  if get_from_nullable_chain(job_properties, ["statistics", "numChildJobs"]):
110
- self.log.debug("Found SCRIPT job. Extracting lineage from child jobs instead.") # type: ignore[attr-defined]
112
+ self.log.debug("Found SCRIPT job. Extracting lineage from child jobs instead.")
111
113
  # SCRIPT job type has no input / output information but spawns child jobs that have one
112
114
  # https://cloud.google.com/bigquery/docs/information-schema-jobs#multi-statement_query_job
113
115
  for child_job_id in self._client.list_jobs(parent_job=self.job_id):
114
- child_job_properties = self._client.get_job(job_id=child_job_id)._properties # type: ignore
116
+ child_job_properties = self._client.get_job(job_id=child_job_id)._properties
115
117
  child_inputs, child_outputs = self._get_inputs_and_outputs(child_job_properties)
116
118
  inputs.extend(child_inputs)
117
119
  outputs.extend(child_outputs)
@@ -119,7 +121,7 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
119
121
  inputs, outputs = self._get_inputs_and_outputs(job_properties)
120
122
 
121
123
  except Exception as e:
122
- self.log.warning("Cannot retrieve job details from BigQuery.Client. %s", e, exc_info=True) # type: ignore[attr-defined]
124
+ self.log.warning("Cannot retrieve job details from BigQuery.Client. %s", e, exc_info=True)
123
125
  exception_msg = traceback.format_exc()
124
126
  run_facets.update(
125
127
  {
@@ -173,7 +175,7 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
173
175
  if (
174
176
  single_output.facets
175
177
  and final_outputs[key].facets
176
- and "columnLineage" in single_output.facets # type: ignore
178
+ and "columnLineage" in single_output.facets
177
179
  and "columnLineage" in final_outputs[key].facets # type: ignore
178
180
  ):
179
181
  single_output.facets["columnLineage"] = merge_column_lineage_facets(
@@ -188,10 +190,10 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
188
190
  return list(final_outputs.values())
189
191
 
190
192
  def _get_input_dataset(self, table: dict) -> InputDataset:
191
- return cast(InputDataset, self._get_dataset(table, "input"))
193
+ return cast("InputDataset", self._get_dataset(table, "input"))
192
194
 
193
195
  def _get_output_dataset(self, table: dict) -> OutputDataset:
194
- return cast(OutputDataset, self._get_dataset(table, "output"))
196
+ return cast("OutputDataset", self._get_dataset(table, "output"))
195
197
 
196
198
  def _get_dataset(self, table: dict, dataset_type: str) -> Dataset:
197
199
  project = table.get("projectId")
@@ -207,15 +209,14 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
207
209
  name=dataset_name,
208
210
  facets=dataset_facets,
209
211
  )
210
- elif dataset_type == "output":
212
+ if dataset_type == "output":
211
213
  # Logic specific to creating OutputDataset (if needed)
212
214
  return OutputDataset(
213
215
  namespace=BIGQUERY_NAMESPACE,
214
216
  name=dataset_name,
215
217
  facets=dataset_facets,
216
218
  )
217
- else:
218
- raise ValueError("Invalid dataset_type. Must be 'input' or 'output'")
219
+ raise ValueError("Invalid dataset_type. Must be 'input' or 'output'")
219
220
 
220
221
  def _get_table_facets_safely(self, table_name: str) -> dict[str, DatasetFacet]:
221
222
  try:
@@ -26,6 +26,8 @@ from collections import defaultdict
26
26
  from collections.abc import Iterable
27
27
  from typing import TYPE_CHECKING, Any
28
28
 
29
+ from google.cloud.dataproc_v1 import Batch, RuntimeConfig
30
+
29
31
  from airflow.providers.common.compat.openlineage.facet import (
30
32
  ColumnLineageDatasetFacet,
31
33
  DatasetFacet,
@@ -42,13 +44,13 @@ from airflow.providers.common.compat.openlineage.utils.spark import (
42
44
  inject_transport_information_into_spark_properties,
43
45
  )
44
46
  from airflow.providers.google.cloud.hooks.gcs import _parse_gcs_url
45
- from google.cloud.dataproc_v1 import Batch, RuntimeConfig
46
47
 
47
48
  if TYPE_CHECKING:
48
- from airflow.providers.common.compat.openlineage.facet import Dataset
49
- from airflow.utils.context import Context
50
49
  from google.cloud.bigquery.table import Table
51
50
 
51
+ from airflow.providers.common.compat.openlineage.facet import Dataset
52
+ from airflow.providers.common.compat.sdk import Context
53
+
52
54
 
53
55
  log = logging.getLogger(__name__)
54
56
 
@@ -212,7 +214,20 @@ def extract_ds_name_from_gcs_path(path: str) -> str:
212
214
 
213
215
  def get_facets_from_bq_table(table: Table) -> dict[str, DatasetFacet]:
214
216
  """Get facets from BigQuery table object."""
217
+ return get_facets_from_bq_table_for_given_fields(table, selected_fields=None)
218
+
219
+
220
+ def get_facets_from_bq_table_for_given_fields(
221
+ table: Table, selected_fields: list[str] | None
222
+ ) -> dict[str, DatasetFacet]:
223
+ """
224
+ Get facets from BigQuery table object for selected fields only.
225
+
226
+ If selected_fields is None, include all fields.
227
+ """
215
228
  facets: dict[str, DatasetFacet] = {}
229
+ selected_fields_set = set(selected_fields) if selected_fields else None
230
+
216
231
  if table.schema:
217
232
  facets["schema"] = SchemaDatasetFacet(
218
233
  fields=[
@@ -220,6 +235,7 @@ def get_facets_from_bq_table(table: Table) -> dict[str, DatasetFacet]:
220
235
  name=schema_field.name, type=schema_field.field_type, description=schema_field.description
221
236
  )
222
237
  for schema_field in table.schema
238
+ if selected_fields_set is None or schema_field.name in selected_fields_set
223
239
  ]
224
240
  )
225
241
  if table.description:
@@ -21,7 +21,11 @@ from __future__ import annotations
21
21
 
22
22
  from collections.abc import Sequence
23
23
  from functools import cached_property
24
- from typing import TYPE_CHECKING
24
+ from typing import TYPE_CHECKING, Any
25
+
26
+ from google.api_core.exceptions import NotFound
27
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
28
+ from google.cloud import alloydb_v1
25
29
 
26
30
  from airflow.exceptions import AirflowException
27
31
  from airflow.providers.google.cloud.hooks.alloy_db import AlloyDbHook
@@ -31,18 +35,15 @@ from airflow.providers.google.cloud.links.alloy_db import (
31
35
  AlloyDBUsersLink,
32
36
  )
33
37
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
34
- from google.api_core.exceptions import NotFound
35
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
36
- from google.cloud import alloydb_v1
37
38
 
38
39
  if TYPE_CHECKING:
39
40
  import proto
40
-
41
- from airflow.utils.context import Context
42
41
  from google.api_core.operation import Operation
43
42
  from google.api_core.retry import Retry
44
43
  from google.protobuf.field_mask_pb2 import FieldMask
45
44
 
45
+ from airflow.providers.common.compat.sdk import Context
46
+
46
47
 
47
48
  class AlloyDBBaseOperator(GoogleCloudBaseOperator):
48
49
  """
@@ -144,8 +145,7 @@ class AlloyDBWriteBaseOperator(AlloyDBBaseOperator):
144
145
  if self.validate_request:
145
146
  # Validation requests are only validated and aren't executed, thus no operation result is expected
146
147
  return None
147
- else:
148
- return self.hook.wait_for_operation(timeout=self.timeout, operation=operation)
148
+ return self.hook.wait_for_operation(timeout=self.timeout, operation=operation)
149
149
 
150
150
 
151
151
  class AlloyDBCreateClusterOperator(AlloyDBWriteBaseOperator):
@@ -228,15 +228,16 @@ class AlloyDBCreateClusterOperator(AlloyDBWriteBaseOperator):
228
228
  return result
229
229
  return None
230
230
 
231
- def execute(self, context: Context) -> dict | None:
232
- AlloyDBClusterLink.persist(
233
- context=context,
234
- task_instance=self,
235
- location_id=self.location,
236
- cluster_id=self.cluster_id,
237
- project_id=self.project_id,
238
- )
231
+ @property
232
+ def extra_links_params(self) -> dict[str, Any]:
233
+ return {
234
+ "location_id": self.location,
235
+ "cluster_id": self.cluster_id,
236
+ "project_id": self.project_id,
237
+ }
239
238
 
239
+ def execute(self, context: Context) -> dict | None:
240
+ AlloyDBClusterLink.persist(context=context)
240
241
  if cluster := self._get_cluster():
241
242
  return cluster
242
243
 
@@ -334,14 +335,16 @@ class AlloyDBUpdateClusterOperator(AlloyDBWriteBaseOperator):
334
335
  self.update_mask = update_mask
335
336
  self.allow_missing = allow_missing
336
337
 
338
+ @property
339
+ def extra_links_params(self) -> dict[str, Any]:
340
+ return {
341
+ "location_id": self.location,
342
+ "cluster_id": self.cluster_id,
343
+ "project_id": self.project_id,
344
+ }
345
+
337
346
  def execute(self, context: Context) -> dict | None:
338
- AlloyDBClusterLink.persist(
339
- context=context,
340
- task_instance=self,
341
- location_id=self.location,
342
- cluster_id=self.cluster_id,
343
- project_id=self.project_id,
344
- )
347
+ AlloyDBClusterLink.persist(context=context)
345
348
  if self.validate_request:
346
349
  self.log.info("Validating an Update AlloyDB cluster request.")
347
350
  else:
@@ -545,14 +548,16 @@ class AlloyDBCreateInstanceOperator(AlloyDBWriteBaseOperator):
545
548
  return result
546
549
  return None
547
550
 
551
+ @property
552
+ def extra_links_params(self) -> dict[str, Any]:
553
+ return {
554
+ "location_id": self.location,
555
+ "cluster_id": self.cluster_id,
556
+ "project_id": self.project_id,
557
+ }
558
+
548
559
  def execute(self, context: Context) -> dict | None:
549
- AlloyDBClusterLink.persist(
550
- context=context,
551
- task_instance=self,
552
- location_id=self.location,
553
- cluster_id=self.cluster_id,
554
- project_id=self.project_id,
555
- )
560
+ AlloyDBClusterLink.persist(context=context)
556
561
  if instance := self._get_instance():
557
562
  return instance
558
563
 
@@ -654,14 +659,16 @@ class AlloyDBUpdateInstanceOperator(AlloyDBWriteBaseOperator):
654
659
  self.update_mask = update_mask
655
660
  self.allow_missing = allow_missing
656
661
 
662
+ @property
663
+ def extra_links_params(self) -> dict[str, Any]:
664
+ return {
665
+ "location_id": self.location,
666
+ "cluster_id": self.cluster_id,
667
+ "project_id": self.project_id,
668
+ }
669
+
657
670
  def execute(self, context: Context) -> dict | None:
658
- AlloyDBClusterLink.persist(
659
- context=context,
660
- task_instance=self,
661
- location_id=self.location,
662
- cluster_id=self.cluster_id,
663
- project_id=self.project_id,
664
- )
671
+ AlloyDBClusterLink.persist(context=context)
665
672
  if self.validate_request:
666
673
  self.log.info("Validating an Update AlloyDB instance request.")
667
674
  else:
@@ -861,14 +868,16 @@ class AlloyDBCreateUserOperator(AlloyDBWriteBaseOperator):
861
868
  return result
862
869
  return None
863
870
 
871
+ @property
872
+ def extra_links_params(self) -> dict[str, Any]:
873
+ return {
874
+ "location_id": self.location,
875
+ "cluster_id": self.cluster_id,
876
+ "project_id": self.project_id,
877
+ }
878
+
864
879
  def execute(self, context: Context) -> dict | None:
865
- AlloyDBUsersLink.persist(
866
- context=context,
867
- task_instance=self,
868
- location_id=self.location,
869
- cluster_id=self.cluster_id,
870
- project_id=self.project_id,
871
- )
880
+ AlloyDBUsersLink.persist(context=context)
872
881
  if (_user := self._get_user()) is not None:
873
882
  return _user
874
883
 
@@ -968,14 +977,16 @@ class AlloyDBUpdateUserOperator(AlloyDBWriteBaseOperator):
968
977
  self.update_mask = update_mask
969
978
  self.allow_missing = allow_missing
970
979
 
980
+ @property
981
+ def extra_links_params(self) -> dict[str, Any]:
982
+ return {
983
+ "location_id": self.location,
984
+ "cluster_id": self.cluster_id,
985
+ "project_id": self.project_id,
986
+ }
987
+
971
988
  def execute(self, context: Context) -> dict | None:
972
- AlloyDBUsersLink.persist(
973
- context=context,
974
- task_instance=self,
975
- location_id=self.location,
976
- cluster_id=self.cluster_id,
977
- project_id=self.project_id,
978
- )
989
+ AlloyDBUsersLink.persist(context=context)
979
990
  if self.validate_request:
980
991
  self.log.info("Validating an Update AlloyDB user request.")
981
992
  else:
@@ -1159,12 +1170,14 @@ class AlloyDBCreateBackupOperator(AlloyDBWriteBaseOperator):
1159
1170
  return result
1160
1171
  return None
1161
1172
 
1173
+ @property
1174
+ def extra_links_params(self) -> dict[str, Any]:
1175
+ return {
1176
+ "project_id": self.project_id,
1177
+ }
1178
+
1162
1179
  def execute(self, context: Context) -> dict | None:
1163
- AlloyDBBackupsLink.persist(
1164
- context=context,
1165
- task_instance=self,
1166
- project_id=self.project_id,
1167
- )
1180
+ AlloyDBBackupsLink.persist(context=context)
1168
1181
  if backup := self._get_backup():
1169
1182
  return backup
1170
1183
 
@@ -1259,12 +1272,14 @@ class AlloyDBUpdateBackupOperator(AlloyDBWriteBaseOperator):
1259
1272
  self.update_mask = update_mask
1260
1273
  self.allow_missing = allow_missing
1261
1274
 
1275
+ @property
1276
+ def extra_links_params(self) -> dict[str, Any]:
1277
+ return {
1278
+ "project_id": self.project_id,
1279
+ }
1280
+
1262
1281
  def execute(self, context: Context) -> dict | None:
1263
- AlloyDBBackupsLink.persist(
1264
- context=context,
1265
- task_instance=self,
1266
- project_id=self.project_id,
1267
- )
1282
+ AlloyDBBackupsLink.persist(context=context)
1268
1283
  if self.validate_request:
1269
1284
  self.log.info("Validating an Update AlloyDB backup request.")
1270
1285
  else: