apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
  2. airflow/providers/google/__init__.py +3 -3
  3. airflow/providers/google/ads/hooks/ads.py +39 -5
  4. airflow/providers/google/ads/operators/ads.py +2 -2
  5. airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
  6. airflow/providers/google/assets/gcs.py +1 -11
  7. airflow/providers/google/cloud/bundles/__init__.py +16 -0
  8. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  9. airflow/providers/google/cloud/hooks/bigquery.py +166 -281
  10. airflow/providers/google/cloud/hooks/cloud_composer.py +287 -14
  11. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  12. airflow/providers/google/cloud/hooks/cloud_run.py +17 -9
  13. airflow/providers/google/cloud/hooks/cloud_sql.py +101 -22
  14. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +27 -6
  15. airflow/providers/google/cloud/hooks/compute_ssh.py +5 -1
  16. airflow/providers/google/cloud/hooks/datacatalog.py +9 -1
  17. airflow/providers/google/cloud/hooks/dataflow.py +71 -94
  18. airflow/providers/google/cloud/hooks/datafusion.py +1 -1
  19. airflow/providers/google/cloud/hooks/dataplex.py +1 -1
  20. airflow/providers/google/cloud/hooks/dataprep.py +1 -1
  21. airflow/providers/google/cloud/hooks/dataproc.py +72 -71
  22. airflow/providers/google/cloud/hooks/gcs.py +111 -14
  23. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  24. airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
  25. airflow/providers/google/cloud/hooks/looker.py +6 -1
  26. airflow/providers/google/cloud/hooks/mlengine.py +3 -2
  27. airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
  28. airflow/providers/google/cloud/hooks/spanner.py +73 -8
  29. airflow/providers/google/cloud/hooks/stackdriver.py +10 -8
  30. airflow/providers/google/cloud/hooks/translate.py +1 -1
  31. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -209
  32. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +2 -2
  33. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +27 -1
  34. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  35. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +307 -7
  36. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  37. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  38. airflow/providers/google/cloud/hooks/vision.py +2 -2
  39. airflow/providers/google/cloud/hooks/workflows.py +1 -1
  40. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  41. airflow/providers/google/cloud/links/base.py +77 -13
  42. airflow/providers/google/cloud/links/bigquery.py +0 -47
  43. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  44. airflow/providers/google/cloud/links/bigtable.py +0 -48
  45. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  46. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  47. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  48. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  49. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  50. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
  51. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  52. airflow/providers/google/cloud/links/compute.py +0 -58
  53. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  54. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  55. airflow/providers/google/cloud/links/dataflow.py +0 -34
  56. airflow/providers/google/cloud/links/dataform.py +0 -64
  57. airflow/providers/google/cloud/links/datafusion.py +1 -96
  58. airflow/providers/google/cloud/links/dataplex.py +0 -154
  59. airflow/providers/google/cloud/links/dataprep.py +0 -24
  60. airflow/providers/google/cloud/links/dataproc.py +11 -95
  61. airflow/providers/google/cloud/links/datastore.py +0 -31
  62. airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
  63. airflow/providers/google/cloud/links/managed_kafka.py +0 -70
  64. airflow/providers/google/cloud/links/mlengine.py +0 -70
  65. airflow/providers/google/cloud/links/pubsub.py +0 -32
  66. airflow/providers/google/cloud/links/spanner.py +0 -33
  67. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  68. airflow/providers/google/cloud/links/translate.py +17 -187
  69. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  70. airflow/providers/google/cloud/links/workflows.py +0 -52
  71. airflow/providers/google/cloud/log/gcs_task_handler.py +17 -9
  72. airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
  73. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  74. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  75. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  76. airflow/providers/google/cloud/openlineage/facets.py +102 -1
  77. airflow/providers/google/cloud/openlineage/mixins.py +10 -8
  78. airflow/providers/google/cloud/openlineage/utils.py +15 -1
  79. airflow/providers/google/cloud/operators/alloy_db.py +70 -55
  80. airflow/providers/google/cloud/operators/bigquery.py +73 -636
  81. airflow/providers/google/cloud/operators/bigquery_dts.py +3 -5
  82. airflow/providers/google/cloud/operators/bigtable.py +36 -7
  83. airflow/providers/google/cloud/operators/cloud_base.py +21 -1
  84. airflow/providers/google/cloud/operators/cloud_batch.py +2 -2
  85. airflow/providers/google/cloud/operators/cloud_build.py +75 -32
  86. airflow/providers/google/cloud/operators/cloud_composer.py +128 -40
  87. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  88. airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
  89. airflow/providers/google/cloud/operators/cloud_run.py +23 -5
  90. airflow/providers/google/cloud/operators/cloud_sql.py +8 -16
  91. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -11
  92. airflow/providers/google/cloud/operators/compute.py +8 -40
  93. airflow/providers/google/cloud/operators/datacatalog.py +157 -21
  94. airflow/providers/google/cloud/operators/dataflow.py +38 -15
  95. airflow/providers/google/cloud/operators/dataform.py +15 -5
  96. airflow/providers/google/cloud/operators/datafusion.py +41 -20
  97. airflow/providers/google/cloud/operators/dataplex.py +193 -109
  98. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  99. airflow/providers/google/cloud/operators/dataproc.py +78 -35
  100. airflow/providers/google/cloud/operators/dataproc_metastore.py +96 -88
  101. airflow/providers/google/cloud/operators/datastore.py +22 -6
  102. airflow/providers/google/cloud/operators/dlp.py +6 -29
  103. airflow/providers/google/cloud/operators/functions.py +16 -7
  104. airflow/providers/google/cloud/operators/gcs.py +10 -8
  105. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  106. airflow/providers/google/cloud/operators/kubernetes_engine.py +60 -99
  107. airflow/providers/google/cloud/operators/looker.py +1 -1
  108. airflow/providers/google/cloud/operators/managed_kafka.py +107 -52
  109. airflow/providers/google/cloud/operators/natural_language.py +1 -1
  110. airflow/providers/google/cloud/operators/pubsub.py +60 -14
  111. airflow/providers/google/cloud/operators/spanner.py +25 -12
  112. airflow/providers/google/cloud/operators/speech_to_text.py +1 -2
  113. airflow/providers/google/cloud/operators/stackdriver.py +1 -9
  114. airflow/providers/google/cloud/operators/tasks.py +1 -12
  115. airflow/providers/google/cloud/operators/text_to_speech.py +1 -2
  116. airflow/providers/google/cloud/operators/translate.py +40 -16
  117. airflow/providers/google/cloud/operators/translate_speech.py +1 -2
  118. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
  119. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +29 -9
  120. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +54 -26
  121. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
  122. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
  123. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  124. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  125. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
  126. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +11 -9
  127. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
  128. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +30 -7
  129. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  130. airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
  131. airflow/providers/google/cloud/operators/vision.py +2 -2
  132. airflow/providers/google/cloud/operators/workflows.py +18 -15
  133. airflow/providers/google/cloud/sensors/bigquery.py +2 -2
  134. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -2
  135. airflow/providers/google/cloud/sensors/bigtable.py +11 -4
  136. airflow/providers/google/cloud/sensors/cloud_composer.py +533 -29
  137. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -2
  138. airflow/providers/google/cloud/sensors/dataflow.py +26 -9
  139. airflow/providers/google/cloud/sensors/dataform.py +2 -2
  140. airflow/providers/google/cloud/sensors/datafusion.py +4 -4
  141. airflow/providers/google/cloud/sensors/dataplex.py +2 -2
  142. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  143. airflow/providers/google/cloud/sensors/dataproc.py +2 -2
  144. airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -2
  145. airflow/providers/google/cloud/sensors/gcs.py +4 -4
  146. airflow/providers/google/cloud/sensors/looker.py +2 -2
  147. airflow/providers/google/cloud/sensors/pubsub.py +4 -4
  148. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  149. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
  150. airflow/providers/google/cloud/sensors/workflows.py +2 -2
  151. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  152. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  153. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
  154. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  155. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +4 -4
  156. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  157. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  158. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  159. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  160. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  161. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -2
  162. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +3 -3
  163. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +20 -12
  164. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
  165. airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
  166. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
  167. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  168. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  169. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  170. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  171. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  172. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  173. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
  174. airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
  175. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  176. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +13 -4
  177. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  178. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  179. airflow/providers/google/cloud/triggers/bigquery.py +75 -34
  180. airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
  181. airflow/providers/google/cloud/triggers/cloud_composer.py +302 -46
  182. airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
  183. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +91 -1
  184. airflow/providers/google/cloud/triggers/dataflow.py +122 -0
  185. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  186. airflow/providers/google/cloud/triggers/dataplex.py +14 -2
  187. airflow/providers/google/cloud/triggers/dataproc.py +122 -52
  188. airflow/providers/google/cloud/triggers/kubernetes_engine.py +45 -27
  189. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  190. airflow/providers/google/cloud/triggers/pubsub.py +15 -19
  191. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  192. airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
  193. airflow/providers/google/cloud/utils/field_validator.py +1 -2
  194. airflow/providers/google/common/auth_backend/google_openid.py +4 -4
  195. airflow/providers/google/common/deprecated.py +2 -1
  196. airflow/providers/google/common/hooks/base_google.py +27 -8
  197. airflow/providers/google/common/links/storage.py +0 -22
  198. airflow/providers/google/common/utils/get_secret.py +31 -0
  199. airflow/providers/google/common/utils/id_token_credentials.py +3 -4
  200. airflow/providers/google/firebase/operators/firestore.py +2 -2
  201. airflow/providers/google/get_provider_info.py +56 -52
  202. airflow/providers/google/go_module_utils.py +35 -3
  203. airflow/providers/google/leveldb/hooks/leveldb.py +26 -1
  204. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  205. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  206. airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
  207. airflow/providers/google/marketing_platform/operators/analytics_admin.py +1 -2
  208. airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
  209. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  210. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  211. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  212. airflow/providers/google/marketing_platform/sensors/display_video.py +3 -63
  213. airflow/providers/google/suite/hooks/calendar.py +1 -1
  214. airflow/providers/google/suite/hooks/sheets.py +15 -1
  215. airflow/providers/google/suite/operators/sheets.py +8 -3
  216. airflow/providers/google/suite/sensors/drive.py +2 -2
  217. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
  218. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  219. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  220. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  221. airflow/providers/google/version_compat.py +15 -1
  222. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +92 -48
  223. apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
  224. apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
  225. airflow/providers/google/cloud/hooks/automl.py +0 -673
  226. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  227. airflow/providers/google/cloud/links/automl.py +0 -193
  228. airflow/providers/google/cloud/operators/automl.py +0 -1362
  229. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  230. airflow/providers/google/cloud/operators/mlengine.py +0 -112
  231. apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
  232. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +0 -0
  233. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
  234. {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -24,13 +24,17 @@ from attr import define, field
24
24
  from airflow.providers.google import __version__ as provider_version
25
25
 
26
26
  if TYPE_CHECKING:
27
- from openlineage.client.generated.base import RunFacet
27
+ from openlineage.client.generated.base import JobFacet, RunFacet
28
28
 
29
29
  try:
30
30
  try:
31
31
  from openlineage.client.generated.base import RunFacet
32
32
  except ImportError: # Old OpenLineage client is used
33
33
  from openlineage.client.facet import BaseFacet as RunFacet # type: ignore[assignment]
34
+ try:
35
+ from openlineage.client.generated.base import JobFacet
36
+ except ImportError: # Old OpenLineage client is used
37
+ from openlineage.client.facet import BaseFacet as JobFacet # type: ignore[assignment]
34
38
 
35
39
  @define
36
40
  class BigQueryJobRunFacet(RunFacet):
@@ -53,6 +57,100 @@ try:
53
57
  f"providers-google/{provider_version}/airflow/providers/google/"
54
58
  "openlineage/BigQueryJobRunFacet.json"
55
59
  )
60
+
61
+ @define
62
+ class CloudStorageTransferJobFacet(JobFacet):
63
+ """
64
+ Facet representing a Cloud Storage Transfer Service job configuration.
65
+
66
+ :param jobName: Unique name of the transfer job.
67
+ :param projectId: GCP project where the transfer job is defined.
68
+ :param description: User-provided description of the transfer job.
69
+ :param status: Current status of the transfer job (e.g. "ENABLED", "DISABLED").
70
+ :param sourceBucket: Name of the source bucket (e.g. AWS S3).
71
+ :param sourcePath: Prefix/path inside the source bucket.
72
+ :param targetBucket: Name of the destination bucket (e.g. GCS).
73
+ :param targetPath: Prefix/path inside the destination bucket.
74
+ :param objectConditions: Object selection rules (e.g. include/exclude prefixes).
75
+ :param transferOptions: Transfer options, such as overwrite behavior or whether to delete objects
76
+ from the source after transfer.
77
+ :param schedule: Schedule for the transfer job (if recurring).
78
+ """
79
+
80
+ jobName: str | None = field(default=None)
81
+ projectId: str | None = field(default=None)
82
+ description: str | None = field(default=None)
83
+ status: str | None = field(default=None)
84
+ sourceBucket: str | None = field(default=None)
85
+ sourcePath: str | None = field(default=None)
86
+ targetBucket: str | None = field(default=None)
87
+ targetPath: str | None = field(default=None)
88
+ objectConditions: dict | None = field(default=None)
89
+ transferOptions: dict | None = field(default=None)
90
+ schedule: dict | None = field(default=None)
91
+
92
+ @staticmethod
93
+ def _get_schema() -> str:
94
+ return (
95
+ "https://raw.githubusercontent.com/apache/airflow/"
96
+ f"providers-google/{provider_version}/airflow/providers/google/"
97
+ "openlineage/CloudStorageTransferJobFacet.json"
98
+ )
99
+
100
+ @define
101
+ class CloudStorageTransferRunFacet(RunFacet):
102
+ """
103
+ Facet representing a Cloud Storage Transfer Service job execution run.
104
+
105
+ :param jobName: Name of the transfer job being executed.
106
+ :param operationName: Name of the specific transfer operation instance.
107
+ :param status: Current status of the operation (e.g. "IN_PROGRESS", "SUCCESS", "FAILED").
108
+ :param startTime: Time when the transfer job execution started (ISO 8601 format).
109
+ :param endTime: Time when the transfer job execution finished (ISO 8601 format).
110
+ :param wait: Whether the operator waits for the job to complete before finishing.
111
+ :param timeout: Timeout (in seconds) for the transfer run to complete.
112
+ :param deferrable: Whether the operator defers execution until job completion.
113
+ :param deleteJobAfterCompletion: Whether the operator deletes the transfer job after the run completes.
114
+ """
115
+
116
+ jobName: str | None = field(default=None)
117
+ operationName: str | None = field(default=None)
118
+ status: str | None = field(default=None)
119
+ startTime: str | None = field(default=None)
120
+ endTime: str | None = field(default=None)
121
+ wait: bool = field(default=True)
122
+ timeout: float | None = field(default=None)
123
+ deferrable: bool = field(default=False)
124
+ deleteJobAfterCompletion: bool = field(default=False)
125
+
126
+ @staticmethod
127
+ def _get_schema() -> str:
128
+ return (
129
+ "https://raw.githubusercontent.com/apache/airflow/"
130
+ f"providers-google/{provider_version}/airflow/providers/google/"
131
+ "openlineage/CloudStorageTransferRunFacet.json"
132
+ )
133
+
134
+ @define
135
+ class DataFusionRunFacet(RunFacet):
136
+ """
137
+ Facet that represents relevant details of a Cloud Data Fusion pipeline run.
138
+
139
+ :param runId: The pipeline execution id.
140
+ :param runtimeArgs: Runtime arguments passed to the pipeline.
141
+ """
142
+
143
+ runId: str | None = field(default=None)
144
+ runtimeArgs: dict[str, str] | None = field(default=None)
145
+
146
+ @staticmethod
147
+ def _get_schema() -> str:
148
+ return (
149
+ "https://raw.githubusercontent.com/apache/airflow/"
150
+ f"providers-google/{provider_version}/airflow/providers/google/"
151
+ "openlineage/DataFusionRunFacet.json"
152
+ )
153
+
56
154
  except ImportError: # OpenLineage is not available
57
155
 
58
156
  def create_no_op(*_, **__) -> None:
@@ -65,3 +163,6 @@ except ImportError: # OpenLineage is not available
65
163
  return None
66
164
 
67
165
  BigQueryJobRunFacet = create_no_op # type: ignore[misc, assignment]
166
+ CloudStorageTransferJobFacet = create_no_op # type: ignore[misc, assignment]
167
+ CloudStorageTransferRunFacet = create_no_op # type: ignore[misc, assignment]
168
+ DataFusionRunFacet = create_no_op # type: ignore[misc, assignment]
@@ -80,7 +80,7 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
80
80
  from airflow.providers.openlineage.sqlparser import SQLParser
81
81
 
82
82
  if not self.job_id:
83
- self.log.warning("No BigQuery job_id was found by OpenLineage.") # type: ignore[attr-defined]
83
+ self.log.warning("No BigQuery job_id was found by OpenLineage.")
84
84
  return OperatorLineage()
85
85
 
86
86
  if not self.hook:
@@ -92,14 +92,16 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
92
92
  impersonation_chain=self.impersonation_chain,
93
93
  )
94
94
 
95
- self.log.debug("Extracting data from bigquery job: `%s`", self.job_id) # type: ignore[attr-defined]
95
+ self.log.debug("Extracting data from bigquery job: `%s`", self.job_id)
96
96
  inputs, outputs = [], []
97
97
  run_facets: dict[str, RunFacet] = {
98
98
  "externalQuery": ExternalQueryRunFacet(externalQueryId=self.job_id, source="bigquery")
99
99
  }
100
- self._client = self.hook.get_client(project_id=self.hook.project_id, location=self.location)
100
+ self._client = self.hook.get_client(
101
+ project_id=self.project_id or self.hook.project_id, location=self.location
102
+ )
101
103
  try:
102
- job_properties = self._client.get_job(job_id=self.job_id)._properties # type: ignore
104
+ job_properties = self._client.get_job(job_id=self.job_id)._properties
103
105
 
104
106
  if get_from_nullable_chain(job_properties, ["status", "state"]) != "DONE":
105
107
  raise ValueError(f"Trying to extract data from running bigquery job: `{self.job_id}`")
@@ -107,11 +109,11 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
107
109
  run_facets["bigQueryJob"] = self._get_bigquery_job_run_facet(job_properties)
108
110
 
109
111
  if get_from_nullable_chain(job_properties, ["statistics", "numChildJobs"]):
110
- self.log.debug("Found SCRIPT job. Extracting lineage from child jobs instead.") # type: ignore[attr-defined]
112
+ self.log.debug("Found SCRIPT job. Extracting lineage from child jobs instead.")
111
113
  # SCRIPT job type has no input / output information but spawns child jobs that have one
112
114
  # https://cloud.google.com/bigquery/docs/information-schema-jobs#multi-statement_query_job
113
115
  for child_job_id in self._client.list_jobs(parent_job=self.job_id):
114
- child_job_properties = self._client.get_job(job_id=child_job_id)._properties # type: ignore
116
+ child_job_properties = self._client.get_job(job_id=child_job_id)._properties
115
117
  child_inputs, child_outputs = self._get_inputs_and_outputs(child_job_properties)
116
118
  inputs.extend(child_inputs)
117
119
  outputs.extend(child_outputs)
@@ -119,7 +121,7 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
119
121
  inputs, outputs = self._get_inputs_and_outputs(job_properties)
120
122
 
121
123
  except Exception as e:
122
- self.log.warning("Cannot retrieve job details from BigQuery.Client. %s", e, exc_info=True) # type: ignore[attr-defined]
124
+ self.log.warning("Cannot retrieve job details from BigQuery.Client. %s", e, exc_info=True)
123
125
  exception_msg = traceback.format_exc()
124
126
  run_facets.update(
125
127
  {
@@ -173,7 +175,7 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
173
175
  if (
174
176
  single_output.facets
175
177
  and final_outputs[key].facets
176
- and "columnLineage" in single_output.facets # type: ignore
178
+ and "columnLineage" in single_output.facets
177
179
  and "columnLineage" in final_outputs[key].facets # type: ignore
178
180
  ):
179
181
  single_output.facets["columnLineage"] = merge_column_lineage_facets(
@@ -49,7 +49,7 @@ if TYPE_CHECKING:
49
49
  from google.cloud.bigquery.table import Table
50
50
 
51
51
  from airflow.providers.common.compat.openlineage.facet import Dataset
52
- from airflow.utils.context import Context
52
+ from airflow.providers.common.compat.sdk import Context
53
53
 
54
54
 
55
55
  log = logging.getLogger(__name__)
@@ -214,7 +214,20 @@ def extract_ds_name_from_gcs_path(path: str) -> str:
214
214
 
215
215
  def get_facets_from_bq_table(table: Table) -> dict[str, DatasetFacet]:
216
216
  """Get facets from BigQuery table object."""
217
+ return get_facets_from_bq_table_for_given_fields(table, selected_fields=None)
218
+
219
+
220
+ def get_facets_from_bq_table_for_given_fields(
221
+ table: Table, selected_fields: list[str] | None
222
+ ) -> dict[str, DatasetFacet]:
223
+ """
224
+ Get facets from BigQuery table object for selected fields only.
225
+
226
+ If selected_fields is None, include all fields.
227
+ """
217
228
  facets: dict[str, DatasetFacet] = {}
229
+ selected_fields_set = set(selected_fields) if selected_fields else None
230
+
218
231
  if table.schema:
219
232
  facets["schema"] = SchemaDatasetFacet(
220
233
  fields=[
@@ -222,6 +235,7 @@ def get_facets_from_bq_table(table: Table) -> dict[str, DatasetFacet]:
222
235
  name=schema_field.name, type=schema_field.field_type, description=schema_field.description
223
236
  )
224
237
  for schema_field in table.schema
238
+ if selected_fields_set is None or schema_field.name in selected_fields_set
225
239
  ]
226
240
  )
227
241
  if table.description:
@@ -21,7 +21,7 @@ from __future__ import annotations
21
21
 
22
22
  from collections.abc import Sequence
23
23
  from functools import cached_property
24
- from typing import TYPE_CHECKING
24
+ from typing import TYPE_CHECKING, Any
25
25
 
26
26
  from google.api_core.exceptions import NotFound
27
27
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
@@ -42,7 +42,7 @@ if TYPE_CHECKING:
42
42
  from google.api_core.retry import Retry
43
43
  from google.protobuf.field_mask_pb2 import FieldMask
44
44
 
45
- from airflow.utils.context import Context
45
+ from airflow.providers.common.compat.sdk import Context
46
46
 
47
47
 
48
48
  class AlloyDBBaseOperator(GoogleCloudBaseOperator):
@@ -228,15 +228,16 @@ class AlloyDBCreateClusterOperator(AlloyDBWriteBaseOperator):
228
228
  return result
229
229
  return None
230
230
 
231
- def execute(self, context: Context) -> dict | None:
232
- AlloyDBClusterLink.persist(
233
- context=context,
234
- task_instance=self,
235
- location_id=self.location,
236
- cluster_id=self.cluster_id,
237
- project_id=self.project_id,
238
- )
231
+ @property
232
+ def extra_links_params(self) -> dict[str, Any]:
233
+ return {
234
+ "location_id": self.location,
235
+ "cluster_id": self.cluster_id,
236
+ "project_id": self.project_id,
237
+ }
239
238
 
239
+ def execute(self, context: Context) -> dict | None:
240
+ AlloyDBClusterLink.persist(context=context)
240
241
  if cluster := self._get_cluster():
241
242
  return cluster
242
243
 
@@ -334,14 +335,16 @@ class AlloyDBUpdateClusterOperator(AlloyDBWriteBaseOperator):
334
335
  self.update_mask = update_mask
335
336
  self.allow_missing = allow_missing
336
337
 
338
+ @property
339
+ def extra_links_params(self) -> dict[str, Any]:
340
+ return {
341
+ "location_id": self.location,
342
+ "cluster_id": self.cluster_id,
343
+ "project_id": self.project_id,
344
+ }
345
+
337
346
  def execute(self, context: Context) -> dict | None:
338
- AlloyDBClusterLink.persist(
339
- context=context,
340
- task_instance=self,
341
- location_id=self.location,
342
- cluster_id=self.cluster_id,
343
- project_id=self.project_id,
344
- )
347
+ AlloyDBClusterLink.persist(context=context)
345
348
  if self.validate_request:
346
349
  self.log.info("Validating an Update AlloyDB cluster request.")
347
350
  else:
@@ -545,14 +548,16 @@ class AlloyDBCreateInstanceOperator(AlloyDBWriteBaseOperator):
545
548
  return result
546
549
  return None
547
550
 
551
+ @property
552
+ def extra_links_params(self) -> dict[str, Any]:
553
+ return {
554
+ "location_id": self.location,
555
+ "cluster_id": self.cluster_id,
556
+ "project_id": self.project_id,
557
+ }
558
+
548
559
  def execute(self, context: Context) -> dict | None:
549
- AlloyDBClusterLink.persist(
550
- context=context,
551
- task_instance=self,
552
- location_id=self.location,
553
- cluster_id=self.cluster_id,
554
- project_id=self.project_id,
555
- )
560
+ AlloyDBClusterLink.persist(context=context)
556
561
  if instance := self._get_instance():
557
562
  return instance
558
563
 
@@ -654,14 +659,16 @@ class AlloyDBUpdateInstanceOperator(AlloyDBWriteBaseOperator):
654
659
  self.update_mask = update_mask
655
660
  self.allow_missing = allow_missing
656
661
 
662
+ @property
663
+ def extra_links_params(self) -> dict[str, Any]:
664
+ return {
665
+ "location_id": self.location,
666
+ "cluster_id": self.cluster_id,
667
+ "project_id": self.project_id,
668
+ }
669
+
657
670
  def execute(self, context: Context) -> dict | None:
658
- AlloyDBClusterLink.persist(
659
- context=context,
660
- task_instance=self,
661
- location_id=self.location,
662
- cluster_id=self.cluster_id,
663
- project_id=self.project_id,
664
- )
671
+ AlloyDBClusterLink.persist(context=context)
665
672
  if self.validate_request:
666
673
  self.log.info("Validating an Update AlloyDB instance request.")
667
674
  else:
@@ -861,14 +868,16 @@ class AlloyDBCreateUserOperator(AlloyDBWriteBaseOperator):
861
868
  return result
862
869
  return None
863
870
 
871
+ @property
872
+ def extra_links_params(self) -> dict[str, Any]:
873
+ return {
874
+ "location_id": self.location,
875
+ "cluster_id": self.cluster_id,
876
+ "project_id": self.project_id,
877
+ }
878
+
864
879
  def execute(self, context: Context) -> dict | None:
865
- AlloyDBUsersLink.persist(
866
- context=context,
867
- task_instance=self,
868
- location_id=self.location,
869
- cluster_id=self.cluster_id,
870
- project_id=self.project_id,
871
- )
880
+ AlloyDBUsersLink.persist(context=context)
872
881
  if (_user := self._get_user()) is not None:
873
882
  return _user
874
883
 
@@ -968,14 +977,16 @@ class AlloyDBUpdateUserOperator(AlloyDBWriteBaseOperator):
968
977
  self.update_mask = update_mask
969
978
  self.allow_missing = allow_missing
970
979
 
980
+ @property
981
+ def extra_links_params(self) -> dict[str, Any]:
982
+ return {
983
+ "location_id": self.location,
984
+ "cluster_id": self.cluster_id,
985
+ "project_id": self.project_id,
986
+ }
987
+
971
988
  def execute(self, context: Context) -> dict | None:
972
- AlloyDBUsersLink.persist(
973
- context=context,
974
- task_instance=self,
975
- location_id=self.location,
976
- cluster_id=self.cluster_id,
977
- project_id=self.project_id,
978
- )
989
+ AlloyDBUsersLink.persist(context=context)
979
990
  if self.validate_request:
980
991
  self.log.info("Validating an Update AlloyDB user request.")
981
992
  else:
@@ -1159,12 +1170,14 @@ class AlloyDBCreateBackupOperator(AlloyDBWriteBaseOperator):
1159
1170
  return result
1160
1171
  return None
1161
1172
 
1173
+ @property
1174
+ def extra_links_params(self) -> dict[str, Any]:
1175
+ return {
1176
+ "project_id": self.project_id,
1177
+ }
1178
+
1162
1179
  def execute(self, context: Context) -> dict | None:
1163
- AlloyDBBackupsLink.persist(
1164
- context=context,
1165
- task_instance=self,
1166
- project_id=self.project_id,
1167
- )
1180
+ AlloyDBBackupsLink.persist(context=context)
1168
1181
  if backup := self._get_backup():
1169
1182
  return backup
1170
1183
 
@@ -1259,12 +1272,14 @@ class AlloyDBUpdateBackupOperator(AlloyDBWriteBaseOperator):
1259
1272
  self.update_mask = update_mask
1260
1273
  self.allow_missing = allow_missing
1261
1274
 
1275
+ @property
1276
+ def extra_links_params(self) -> dict[str, Any]:
1277
+ return {
1278
+ "project_id": self.project_id,
1279
+ }
1280
+
1262
1281
  def execute(self, context: Context) -> dict | None:
1263
- AlloyDBBackupsLink.persist(
1264
- context=context,
1265
- task_instance=self,
1266
- project_id=self.project_id,
1267
- )
1282
+ AlloyDBBackupsLink.persist(context=context)
1268
1283
  if self.validate_request:
1269
1284
  self.log.info("Validating an Update AlloyDB backup request.")
1270
1285
  else: