apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
  2. airflow/providers/google/__init__.py +3 -3
  3. airflow/providers/google/ads/hooks/ads.py +39 -5
  4. airflow/providers/google/ads/operators/ads.py +2 -2
  5. airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
  6. airflow/providers/google/assets/gcs.py +1 -11
  7. airflow/providers/google/cloud/bundles/__init__.py +16 -0
  8. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  9. airflow/providers/google/cloud/hooks/bigquery.py +166 -281
  10. airflow/providers/google/cloud/hooks/cloud_composer.py +287 -14
  11. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  12. airflow/providers/google/cloud/hooks/cloud_run.py +17 -9
  13. airflow/providers/google/cloud/hooks/cloud_sql.py +101 -22
  14. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +27 -6
  15. airflow/providers/google/cloud/hooks/compute_ssh.py +5 -1
  16. airflow/providers/google/cloud/hooks/datacatalog.py +9 -1
  17. airflow/providers/google/cloud/hooks/dataflow.py +71 -94
  18. airflow/providers/google/cloud/hooks/datafusion.py +1 -1
  19. airflow/providers/google/cloud/hooks/dataplex.py +1 -1
  20. airflow/providers/google/cloud/hooks/dataprep.py +1 -1
  21. airflow/providers/google/cloud/hooks/dataproc.py +72 -71
  22. airflow/providers/google/cloud/hooks/gcs.py +111 -14
  23. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  24. airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
  25. airflow/providers/google/cloud/hooks/looker.py +6 -1
  26. airflow/providers/google/cloud/hooks/mlengine.py +3 -2
  27. airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
  28. airflow/providers/google/cloud/hooks/spanner.py +73 -8
  29. airflow/providers/google/cloud/hooks/stackdriver.py +10 -8
  30. airflow/providers/google/cloud/hooks/translate.py +1 -1
  31. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -209
  32. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +2 -2
  33. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +27 -1
  34. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  35. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +307 -7
  36. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  37. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  38. airflow/providers/google/cloud/hooks/vision.py +2 -2
  39. airflow/providers/google/cloud/hooks/workflows.py +1 -1
  40. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  41. airflow/providers/google/cloud/links/base.py +77 -13
  42. airflow/providers/google/cloud/links/bigquery.py +0 -47
  43. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  44. airflow/providers/google/cloud/links/bigtable.py +0 -48
  45. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  46. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  47. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  48. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  49. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  50. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
  51. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  52. airflow/providers/google/cloud/links/compute.py +0 -58
  53. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  54. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  55. airflow/providers/google/cloud/links/dataflow.py +0 -34
  56. airflow/providers/google/cloud/links/dataform.py +0 -64
  57. airflow/providers/google/cloud/links/datafusion.py +1 -96
  58. airflow/providers/google/cloud/links/dataplex.py +0 -154
  59. airflow/providers/google/cloud/links/dataprep.py +0 -24
  60. airflow/providers/google/cloud/links/dataproc.py +11 -95
  61. airflow/providers/google/cloud/links/datastore.py +0 -31
  62. airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
  63. airflow/providers/google/cloud/links/managed_kafka.py +0 -70
  64. airflow/providers/google/cloud/links/mlengine.py +0 -70
  65. airflow/providers/google/cloud/links/pubsub.py +0 -32
  66. airflow/providers/google/cloud/links/spanner.py +0 -33
  67. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  68. airflow/providers/google/cloud/links/translate.py +17 -187
  69. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  70. airflow/providers/google/cloud/links/workflows.py +0 -52
  71. airflow/providers/google/cloud/log/gcs_task_handler.py +17 -9
  72. airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
  73. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  74. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  75. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  76. airflow/providers/google/cloud/openlineage/facets.py +102 -1
  77. airflow/providers/google/cloud/openlineage/mixins.py +10 -8
  78. airflow/providers/google/cloud/openlineage/utils.py +15 -1
  79. airflow/providers/google/cloud/operators/alloy_db.py +70 -55
  80. airflow/providers/google/cloud/operators/bigquery.py +73 -636
  81. airflow/providers/google/cloud/operators/bigquery_dts.py +3 -5
  82. airflow/providers/google/cloud/operators/bigtable.py +36 -7
  83. airflow/providers/google/cloud/operators/cloud_base.py +21 -1
  84. airflow/providers/google/cloud/operators/cloud_batch.py +2 -2
  85. airflow/providers/google/cloud/operators/cloud_build.py +75 -32
  86. airflow/providers/google/cloud/operators/cloud_composer.py +128 -40
  87. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  88. airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
  89. airflow/providers/google/cloud/operators/cloud_run.py +23 -5
  90. airflow/providers/google/cloud/operators/cloud_sql.py +8 -16
  91. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -11
  92. airflow/providers/google/cloud/operators/compute.py +8 -40
  93. airflow/providers/google/cloud/operators/datacatalog.py +157 -21
  94. airflow/providers/google/cloud/operators/dataflow.py +38 -15
  95. airflow/providers/google/cloud/operators/dataform.py +15 -5
  96. airflow/providers/google/cloud/operators/datafusion.py +41 -20
  97. airflow/providers/google/cloud/operators/dataplex.py +193 -109
  98. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  99. airflow/providers/google/cloud/operators/dataproc.py +78 -35
  100. airflow/providers/google/cloud/operators/dataproc_metastore.py +96 -88
  101. airflow/providers/google/cloud/operators/datastore.py +22 -6
  102. airflow/providers/google/cloud/operators/dlp.py +6 -29
  103. airflow/providers/google/cloud/operators/functions.py +16 -7
  104. airflow/providers/google/cloud/operators/gcs.py +10 -8
  105. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  106. airflow/providers/google/cloud/operators/kubernetes_engine.py +60 -99
  107. airflow/providers/google/cloud/operators/looker.py +1 -1
  108. airflow/providers/google/cloud/operators/managed_kafka.py +107 -52
  109. airflow/providers/google/cloud/operators/natural_language.py +1 -1
  110. airflow/providers/google/cloud/operators/pubsub.py +60 -14
  111. airflow/providers/google/cloud/operators/spanner.py +25 -12
  112. airflow/providers/google/cloud/operators/speech_to_text.py +1 -2
  113. airflow/providers/google/cloud/operators/stackdriver.py +1 -9
  114. airflow/providers/google/cloud/operators/tasks.py +1 -12
  115. airflow/providers/google/cloud/operators/text_to_speech.py +1 -2
  116. airflow/providers/google/cloud/operators/translate.py +40 -16
  117. airflow/providers/google/cloud/operators/translate_speech.py +1 -2
  118. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
  119. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +29 -9
  120. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +54 -26
  121. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
  122. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
  123. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  124. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  125. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
  126. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +11 -9
  127. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
  128. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +30 -7
  129. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  130. airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
  131. airflow/providers/google/cloud/operators/vision.py +2 -2
  132. airflow/providers/google/cloud/operators/workflows.py +18 -15
  133. airflow/providers/google/cloud/sensors/bigquery.py +2 -2
  134. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -2
  135. airflow/providers/google/cloud/sensors/bigtable.py +11 -4
  136. airflow/providers/google/cloud/sensors/cloud_composer.py +533 -29
  137. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -2
  138. airflow/providers/google/cloud/sensors/dataflow.py +26 -9
  139. airflow/providers/google/cloud/sensors/dataform.py +2 -2
  140. airflow/providers/google/cloud/sensors/datafusion.py +4 -4
  141. airflow/providers/google/cloud/sensors/dataplex.py +2 -2
  142. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  143. airflow/providers/google/cloud/sensors/dataproc.py +2 -2
  144. airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -2
  145. airflow/providers/google/cloud/sensors/gcs.py +4 -4
  146. airflow/providers/google/cloud/sensors/looker.py +2 -2
  147. airflow/providers/google/cloud/sensors/pubsub.py +4 -4
  148. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  149. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
  150. airflow/providers/google/cloud/sensors/workflows.py +2 -2
  151. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  152. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  153. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
  154. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  155. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +4 -4
  156. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  157. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  158. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  159. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  160. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  161. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -2
  162. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +3 -3
  163. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +20 -12
  164. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
  165. airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
  166. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
  167. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  168. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  169. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  170. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  171. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  172. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  173. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
  174. airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
  175. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  176. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +13 -4
  177. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  178. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  179. airflow/providers/google/cloud/triggers/bigquery.py +75 -34
  180. airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
  181. airflow/providers/google/cloud/triggers/cloud_composer.py +302 -46
  182. airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
  183. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +91 -1
  184. airflow/providers/google/cloud/triggers/dataflow.py +122 -0
  185. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  186. airflow/providers/google/cloud/triggers/dataplex.py +14 -2
  187. airflow/providers/google/cloud/triggers/dataproc.py +122 -52
  188. airflow/providers/google/cloud/triggers/kubernetes_engine.py +45 -27
  189. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  190. airflow/providers/google/cloud/triggers/pubsub.py +15 -19
  191. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  192. airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
  193. airflow/providers/google/cloud/utils/field_validator.py +1 -2
  194. airflow/providers/google/common/auth_backend/google_openid.py +4 -4
  195. airflow/providers/google/common/deprecated.py +2 -1
  196. airflow/providers/google/common/hooks/base_google.py +27 -8
  197. airflow/providers/google/common/links/storage.py +0 -22
  198. airflow/providers/google/common/utils/get_secret.py +31 -0
  199. airflow/providers/google/common/utils/id_token_credentials.py +3 -4
  200. airflow/providers/google/firebase/operators/firestore.py +2 -2
  201. airflow/providers/google/get_provider_info.py +56 -52
  202. airflow/providers/google/go_module_utils.py +35 -3
  203. airflow/providers/google/leveldb/hooks/leveldb.py +26 -1
  204. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  205. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  206. airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
  207. airflow/providers/google/marketing_platform/operators/analytics_admin.py +1 -2
  208. airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
  209. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  210. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  211. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  212. airflow/providers/google/marketing_platform/sensors/display_video.py +3 -63
  213. airflow/providers/google/suite/hooks/calendar.py +1 -1
  214. airflow/providers/google/suite/hooks/sheets.py +15 -1
  215. airflow/providers/google/suite/operators/sheets.py +8 -3
  216. airflow/providers/google/suite/sensors/drive.py +2 -2
  217. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
  218. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  219. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  220. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  221. airflow/providers/google/version_compat.py +15 -1
  222. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +92 -48
  223. apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
  224. apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
  225. airflow/providers/google/cloud/hooks/automl.py +0 -673
  226. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  227. airflow/providers/google/cloud/links/automl.py +0 -193
  228. airflow/providers/google/cloud/operators/automl.py +0 -1362
  229. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  230. airflow/providers/google/cloud/operators/mlengine.py +0 -112
  231. apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
  232. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +0 -0
  233. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
  234. {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -40,7 +40,7 @@ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
40
40
  from airflow.version import version
41
41
 
42
42
  if TYPE_CHECKING:
43
- from airflow.utils.context import Context
43
+ from airflow.providers.common.compat.sdk import Context
44
44
 
45
45
 
46
46
  def _validate_available_memory_in_mb(value):
@@ -219,6 +219,13 @@ class CloudFunctionDeployFunctionOperator(GoogleCloudBaseOperator):
219
219
  self.body["labels"] = {}
220
220
  self.body["labels"].update({"airflow-version": "v" + version.replace(".", "-").replace("+", "-")})
221
221
 
222
+ @property
223
+ def extra_links_params(self) -> dict[str, Any]:
224
+ return {
225
+ "location": self.location,
226
+ "function_name": self.body["name"].split("/")[-1],
227
+ }
228
+
222
229
  def execute(self, context: Context):
223
230
  hook = CloudFunctionsHook(
224
231
  gcp_conn_id=self.gcp_conn_id,
@@ -237,7 +244,6 @@ class CloudFunctionDeployFunctionOperator(GoogleCloudBaseOperator):
237
244
  if project_id:
238
245
  CloudFunctionsDetailsLink.persist(
239
246
  context=context,
240
- task_instance=self,
241
247
  location=self.location,
242
248
  project_id=project_id,
243
249
  function_name=self.body["name"].split("/")[-1],
@@ -394,7 +400,6 @@ class CloudFunctionDeleteFunctionOperator(GoogleCloudBaseOperator):
394
400
  if project_id:
395
401
  CloudFunctionsListLink.persist(
396
402
  context=context,
397
- task_instance=self,
398
403
  project_id=project_id,
399
404
  )
400
405
  return hook.delete_function(self.name)
@@ -462,6 +467,13 @@ class CloudFunctionInvokeFunctionOperator(GoogleCloudBaseOperator):
462
467
  self.api_version = api_version
463
468
  self.impersonation_chain = impersonation_chain
464
469
 
470
+ @property
471
+ def extra_links_params(self) -> dict[str, Any]:
472
+ return {
473
+ "location": self.location,
474
+ "function_name": self.function_id,
475
+ }
476
+
465
477
  def execute(self, context: Context):
466
478
  hook = CloudFunctionsHook(
467
479
  api_version=self.api_version,
@@ -476,16 +488,13 @@ class CloudFunctionInvokeFunctionOperator(GoogleCloudBaseOperator):
476
488
  project_id=self.project_id,
477
489
  )
478
490
  self.log.info("Function called successfully. Execution id %s", result.get("executionId"))
479
- self.xcom_push(context=context, key="execution_id", value=result.get("executionId"))
491
+ context["ti"].xcom_push(key="execution_id", value=result.get("executionId"))
480
492
 
481
493
  project_id = self.project_id or hook.project_id
482
494
  if project_id:
483
495
  CloudFunctionsDetailsLink.persist(
484
496
  context=context,
485
- task_instance=self,
486
- location=self.location,
487
497
  project_id=project_id,
488
- function_name=self.function_id,
489
498
  )
490
499
 
491
500
  return result
@@ -33,7 +33,7 @@ import pendulum
33
33
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
34
34
 
35
35
  if TYPE_CHECKING:
36
- from airflow.utils.context import Context
36
+ from airflow.providers.common.compat.sdk import Context
37
37
 
38
38
  from google.api_core.exceptions import Conflict
39
39
  from google.cloud.exceptions import GoogleCloudError
@@ -110,6 +110,7 @@ class GCSCreateBucketOperator(GoogleCloudBaseOperator):
110
110
  "storage_class",
111
111
  "location",
112
112
  "project_id",
113
+ "gcp_conn_id",
113
114
  "impersonation_chain",
114
115
  )
115
116
  ui_color = "#f0eee4"
@@ -145,7 +146,6 @@ class GCSCreateBucketOperator(GoogleCloudBaseOperator):
145
146
  )
146
147
  StorageLink.persist(
147
148
  context=context,
148
- task_instance=self,
149
149
  uri=self.bucket_name,
150
150
  project_id=self.project_id or hook.project_id,
151
151
  )
@@ -204,6 +204,8 @@ class GCSListObjectsOperator(GoogleCloudBaseOperator):
204
204
  "bucket",
205
205
  "prefix",
206
206
  "delimiter",
207
+ "match_glob",
208
+ "gcp_conn_id",
207
209
  "impersonation_chain",
208
210
  )
209
211
 
@@ -259,7 +261,6 @@ class GCSListObjectsOperator(GoogleCloudBaseOperator):
259
261
 
260
262
  StorageLink.persist(
261
263
  context=context,
262
- task_instance=self,
263
264
  uri=self.bucket,
264
265
  project_id=hook.project_id,
265
266
  )
@@ -292,6 +293,7 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
292
293
  "bucket_name",
293
294
  "prefix",
294
295
  "objects",
296
+ "gcp_conn_id",
295
297
  "impersonation_chain",
296
298
  )
297
299
 
@@ -407,6 +409,7 @@ class GCSBucketCreateAclEntryOperator(GoogleCloudBaseOperator):
407
409
  "entity",
408
410
  "role",
409
411
  "user_project",
412
+ "gcp_conn_id",
410
413
  "impersonation_chain",
411
414
  )
412
415
  # [END gcs_bucket_create_acl_template_fields]
@@ -438,7 +441,6 @@ class GCSBucketCreateAclEntryOperator(GoogleCloudBaseOperator):
438
441
  )
439
442
  StorageLink.persist(
440
443
  context=context,
441
- task_instance=self,
442
444
  uri=self.bucket,
443
445
  project_id=hook.project_id,
444
446
  )
@@ -486,6 +488,7 @@ class GCSObjectCreateAclEntryOperator(GoogleCloudBaseOperator):
486
488
  "generation",
487
489
  "role",
488
490
  "user_project",
491
+ "gcp_conn_id",
489
492
  "impersonation_chain",
490
493
  )
491
494
  # [END gcs_object_create_acl_template_fields]
@@ -521,7 +524,6 @@ class GCSObjectCreateAclEntryOperator(GoogleCloudBaseOperator):
521
524
  )
522
525
  FileDetailsLink.persist(
523
526
  context=context,
524
- task_instance=self,
525
527
  uri=f"{self.bucket}/{self.object_name}",
526
528
  project_id=hook.project_id,
527
529
  )
@@ -574,6 +576,7 @@ class GCSFileTransformOperator(GoogleCloudBaseOperator):
574
576
  "destination_bucket",
575
577
  "destination_object",
576
578
  "transform_script",
579
+ "gcp_conn_id",
577
580
  "impersonation_chain",
578
581
  )
579
582
  operator_extra_links = (FileDetailsLink(),)
@@ -630,7 +633,6 @@ class GCSFileTransformOperator(GoogleCloudBaseOperator):
630
633
  self.log.info("Uploading file to %s as %s", self.destination_bucket, self.destination_object)
631
634
  FileDetailsLink.persist(
632
635
  context=context,
633
- task_instance=self,
634
636
  uri=f"{self.destination_bucket}/{self.destination_object}",
635
637
  project_id=hook.project_id,
636
638
  )
@@ -727,7 +729,9 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
727
729
  "destination_bucket",
728
730
  "destination_prefix",
729
731
  "transform_script",
732
+ "source_gcp_conn_id",
730
733
  "source_impersonation_chain",
734
+ "destination_gcp_conn_id",
731
735
  "destination_impersonation_chain",
732
736
  )
733
737
  operator_extra_links = (StorageLink(),)
@@ -828,7 +832,6 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
828
832
  )
829
833
  StorageLink.persist(
830
834
  context=context,
831
- task_instance=self,
832
835
  uri=self.destination_bucket,
833
836
  project_id=destination_hook.project_id,
834
837
  )
@@ -1079,7 +1082,6 @@ class GCSSynchronizeBucketsOperator(GoogleCloudBaseOperator):
1079
1082
  )
1080
1083
  StorageLink.persist(
1081
1084
  context=context,
1082
- task_instance=self,
1083
1085
  uri=self._get_uri(self.destination_bucket, self.destination_object),
1084
1086
  project_id=hook.project_id,
1085
1087
  )
@@ -0,0 +1,389 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+ """This module contains Google Gen AI operators."""
19
+
20
+ from __future__ import annotations
21
+
22
+ from collections.abc import Sequence
23
+ from typing import TYPE_CHECKING, Any
24
+
25
+ from airflow.providers.google.cloud.hooks.gen_ai import (
26
+ GenAIGenerativeModelHook,
27
+ )
28
+ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
29
+
30
+ if TYPE_CHECKING:
31
+ from google.genai.types import (
32
+ ContentListUnion,
33
+ ContentListUnionDict,
34
+ CountTokensConfigOrDict,
35
+ CreateCachedContentConfigOrDict,
36
+ CreateTuningJobConfigOrDict,
37
+ EmbedContentConfigOrDict,
38
+ GenerateContentConfig,
39
+ TuningDatasetOrDict,
40
+ )
41
+
42
+ from airflow.providers.common.compat.sdk import Context
43
+
44
+
45
+ class GenAIGenerateEmbeddingsOperator(GoogleCloudBaseOperator):
46
+ """
47
+ Uses the Gemini AI Embeddings API to generate embeddings for words, phrases, sentences, and code.
48
+
49
+ :param project_id: Required. The ID of the Google Cloud project that the
50
+ service belongs to (templated).
51
+ :param location: Required. The ID of the Google Cloud location that the
52
+ service belongs to (templated).
53
+ :param model: Required. The name of the model to use for content generation,
54
+ which can be a text-only or multimodal model. For example, `gemini-pro` or
55
+ `gemini-pro-vision`.
56
+ :param contents: Optional. The contents to use for embedding.
57
+ :param config: Optional. Configuration for embeddings.
58
+ :param gcp_conn_id: Optional. The connection ID to use connecting to Google Cloud.
59
+ :param impersonation_chain: Optional. Service account to impersonate using short-term
60
+ credentials, or chained list of accounts required to get the access_token
61
+ of the last account in the list, which will be impersonated in the request.
62
+ If set as a string, the account must grant the originating account
63
+ the Service Account Token Creator IAM role.
64
+ If set as a sequence, the identities from the list must grant
65
+ Service Account Token Creator IAM role to the directly preceding identity, with first
66
+ account from the list granting this role to the originating account (templated).
67
+ """
68
+
69
+ template_fields = ("location", "project_id", "impersonation_chain", "contents", "model", "config")
70
+
71
+ def __init__(
72
+ self,
73
+ *,
74
+ project_id: str,
75
+ location: str,
76
+ model: str,
77
+ contents: ContentListUnion | ContentListUnionDict | list[str],
78
+ config: EmbedContentConfigOrDict | None = None,
79
+ gcp_conn_id: str = "google_cloud_default",
80
+ impersonation_chain: str | Sequence[str] | None = None,
81
+ **kwargs,
82
+ ) -> None:
83
+ super().__init__(**kwargs)
84
+ self.project_id = project_id
85
+ self.location = location
86
+ self.contents = contents
87
+ self.config = config
88
+ self.model = model
89
+ self.gcp_conn_id = gcp_conn_id
90
+ self.impersonation_chain = impersonation_chain
91
+
92
+ def execute(self, context: Context):
93
+ self.hook = GenAIGenerativeModelHook(
94
+ gcp_conn_id=self.gcp_conn_id,
95
+ impersonation_chain=self.impersonation_chain,
96
+ )
97
+
98
+ self.log.info("Generating text embeddings...")
99
+ response = self.hook.embed_content(
100
+ project_id=self.project_id,
101
+ location=self.location,
102
+ contents=self.contents,
103
+ model=self.model,
104
+ config=self.config,
105
+ )
106
+
107
+ self.log.info("Model response: %s", response)
108
+ context["ti"].xcom_push(key="model_response", value=response)
109
+
110
+ return response
111
+
112
+
113
+ class GenAIGenerateContentOperator(GoogleCloudBaseOperator):
114
+ """
115
+ Generate a model response based on given configuration. Input capabilities differ between models, including tuned models.
116
+
117
+ :param project_id: Required. The ID of the Google Cloud project that the
118
+ service belongs to (templated).
119
+ :param location: Required. The ID of the Google Cloud location that the
120
+ service belongs to (templated).
121
+ :param model: Required. The name of the model to use for content generation,
122
+ which can be a text-only or multimodal model. For example, `gemini-pro` or
123
+ `gemini-pro-vision`.
124
+ :param contents: Required. The multi-part content of a message that a user or a program
125
+ gives to the generative model, in order to elicit a specific response.
126
+ :param generation_config: Optional. Generation configuration settings.
127
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
128
+ :param impersonation_chain: Optional service account to impersonate using short-term
129
+ credentials, or chained list of accounts required to get the access_token
130
+ of the last account in the list, which will be impersonated in the request.
131
+ If set as a string, the account must grant the originating account
132
+ the Service Account Token Creator IAM role.
133
+ If set as a sequence, the identities from the list must grant
134
+ Service Account Token Creator IAM role to the directly preceding identity, with first
135
+ account from the list granting this role to the originating account (templated).
136
+ """
137
+
138
+ template_fields = (
139
+ "generation_config",
140
+ "location",
141
+ "project_id",
142
+ "impersonation_chain",
143
+ "contents",
144
+ "model",
145
+ )
146
+
147
+ def __init__(
148
+ self,
149
+ *,
150
+ project_id: str,
151
+ location: str,
152
+ contents: ContentListUnionDict,
153
+ model: str,
154
+ generation_config: GenerateContentConfig | dict[str, Any] | None = None,
155
+ gcp_conn_id: str = "google_cloud_default",
156
+ impersonation_chain: str | Sequence[str] | None = None,
157
+ **kwargs,
158
+ ) -> None:
159
+ super().__init__(**kwargs)
160
+ self.project_id = project_id
161
+ self.location = location
162
+ self.contents = contents
163
+ self.generation_config = generation_config
164
+ self.model = model
165
+ self.gcp_conn_id = gcp_conn_id
166
+ self.impersonation_chain = impersonation_chain
167
+
168
+ def execute(self, context: Context):
169
+ self.hook = GenAIGenerativeModelHook(
170
+ gcp_conn_id=self.gcp_conn_id,
171
+ impersonation_chain=self.impersonation_chain,
172
+ )
173
+ response = self.hook.generate_content(
174
+ project_id=self.project_id,
175
+ location=self.location,
176
+ model=self.model,
177
+ contents=self.contents,
178
+ generation_config=self.generation_config,
179
+ )
180
+
181
+ self.log.info("Created Content: %s", response)
182
+ context["ti"].xcom_push(key="model_response", value=response)
183
+
184
+ return response
185
+
186
+
187
+ class GenAISupervisedFineTuningTrainOperator(GoogleCloudBaseOperator):
188
+ """
189
+ Create a tuning job to adapt model behavior with a labeled dataset.
190
+
191
+ :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
192
+ :param location: Required. The ID of the Google Cloud location that the service belongs to.
193
+ :param source_model: Required. A pre-trained model optimized for performing natural
194
+ language tasks such as classification, summarization, extraction, content
195
+ creation, and ideation.
196
+ :param training_dataset: Required. Cloud Storage URI of your training dataset. The dataset
197
+ must be formatted as a JSONL file. For best results, provide at least 100 to 500 examples.
198
+ :param tuning_job_config: Optional. Configuration of the Tuning job to be created.
199
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
200
+ :param impersonation_chain: Optional service account to impersonate using short-term
201
+ credentials, or chained list of accounts required to get the access_token
202
+ of the last account in the list, which will be impersonated in the request.
203
+ If set as a string, the account must grant the originating account
204
+ the Service Account Token Creator IAM role.
205
+ If set as a sequence, the identities from the list must grant
206
+ Service Account Token Creator IAM role to the directly preceding identity, with first
207
+ account from the list granting this role to the originating account (templated).
208
+ """
209
+
210
+ template_fields = (
211
+ "location",
212
+ "project_id",
213
+ "impersonation_chain",
214
+ "training_dataset",
215
+ "tuning_job_config",
216
+ "source_model",
217
+ )
218
+
219
+ def __init__(
220
+ self,
221
+ *,
222
+ project_id: str,
223
+ location: str,
224
+ source_model: str,
225
+ training_dataset: TuningDatasetOrDict,
226
+ tuning_job_config: CreateTuningJobConfigOrDict | dict[str, Any] | None = None,
227
+ gcp_conn_id: str = "google_cloud_default",
228
+ impersonation_chain: str | Sequence[str] | None = None,
229
+ **kwargs,
230
+ ) -> None:
231
+ super().__init__(**kwargs)
232
+ self.project_id = project_id
233
+ self.location = location
234
+ self.source_model = source_model
235
+ self.training_dataset = training_dataset
236
+ self.tuning_job_config = tuning_job_config
237
+ self.gcp_conn_id = gcp_conn_id
238
+ self.impersonation_chain = impersonation_chain
239
+
240
+ def execute(self, context: Context):
241
+ self.hook = GenAIGenerativeModelHook(
242
+ gcp_conn_id=self.gcp_conn_id,
243
+ impersonation_chain=self.impersonation_chain,
244
+ )
245
+ response = self.hook.supervised_fine_tuning_train(
246
+ project_id=self.project_id,
247
+ location=self.location,
248
+ source_model=self.source_model,
249
+ training_dataset=self.training_dataset,
250
+ tuning_job_config=self.tuning_job_config,
251
+ )
252
+
253
+ self.log.info("Tuned Model Name: %s", response.tuned_model.model) # type: ignore[union-attr,arg-type]
254
+ self.log.info("Tuned Model EndpointName: %s", response.tuned_model.endpoint) # type: ignore[union-attr,arg-type]
255
+
256
+ context["ti"].xcom_push(key="tuned_model_name", value=response.tuned_model.model) # type: ignore[union-attr,arg-type]
257
+ context["ti"].xcom_push(key="tuned_model_endpoint_name", value=response.tuned_model.endpoint) # type: ignore[union-attr,arg-type]
258
+
259
+ result = {
260
+ "tuned_model_name": response.tuned_model.model, # type: ignore[union-attr,arg-type]
261
+ "tuned_model_endpoint_name": response.tuned_model.endpoint, # type: ignore[union-attr,arg-type]
262
+ }
263
+
264
+ return result
265
+
266
+
267
+ class GenAICountTokensOperator(GoogleCloudBaseOperator):
268
+ """
269
+ Use Count Tokens API to calculate the number of input tokens before sending a request to Gemini API.
270
+
271
+ :param project_id: Required. The ID of the Google Cloud project that the
272
+ service belongs to (templated).
273
+ :param location: Required. The ID of the Google Cloud location that the
274
+ service belongs to (templated).
275
+ :param contents: Required. The multi-part content of a message that a user or a program
276
+ gives to the generative model, in order to elicit a specific response.
277
+ :param model: Required. Model, supporting prompts with text-only input,
278
+ including natural language tasks, multi-turn text and code chat,
279
+ and code generation. It can output text and code.
280
+ :param config: Optional. Configuration for Count Tokens.
281
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
282
+ :param impersonation_chain: Optional service account to impersonate using short-term
283
+ credentials, or chained list of accounts required to get the access_token
284
+ of the last account in the list, which will be impersonated in the request.
285
+ If set as a string, the account must grant the originating account
286
+ the Service Account Token Creator IAM role.
287
+ If set as a sequence, the identities from the list must grant
288
+ Service Account Token Creator IAM role to the directly preceding identity, with first
289
+ account from the list granting this role to the originating account (templated).
290
+ """
291
+
292
+ template_fields = ("location", "project_id", "impersonation_chain", "contents", "model", "config")
293
+
294
+ def __init__(
295
+ self,
296
+ *,
297
+ project_id: str,
298
+ location: str,
299
+ contents: ContentListUnion | ContentListUnionDict,
300
+ model: str,
301
+ config: CountTokensConfigOrDict | None = None,
302
+ gcp_conn_id: str = "google_cloud_default",
303
+ impersonation_chain: str | Sequence[str] | None = None,
304
+ **kwargs,
305
+ ) -> None:
306
+ super().__init__(**kwargs)
307
+ self.project_id = project_id
308
+ self.location = location
309
+ self.contents = contents
310
+ self.model = model
311
+ self.config = config
312
+ self.gcp_conn_id = gcp_conn_id
313
+ self.impersonation_chain = impersonation_chain
314
+
315
+ def execute(self, context: Context):
316
+ self.hook = GenAIGenerativeModelHook(
317
+ gcp_conn_id=self.gcp_conn_id,
318
+ impersonation_chain=self.impersonation_chain,
319
+ )
320
+ response = self.hook.count_tokens(
321
+ project_id=self.project_id,
322
+ location=self.location,
323
+ contents=self.contents,
324
+ model=self.model,
325
+ config=self.config,
326
+ )
327
+
328
+ self.log.info("Total tokens: %s", response.total_tokens)
329
+ context["ti"].xcom_push(key="total_tokens", value=response.total_tokens)
330
+
331
+
332
+ class GenAICreateCachedContentOperator(GoogleCloudBaseOperator):
333
+ """
334
+ Create CachedContent resource to reduce the cost of requests that contain repeat content with high input token counts.
335
+
336
+ :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
337
+ :param location: Required. The ID of the Google Cloud location that the service belongs to.
338
+ :param model: Required. The name of the publisher model to use for cached content.
339
+ :param cached_content_config: Optional. Configuration of the Cached Content.
340
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
341
+ :param impersonation_chain: Optional service account to impersonate using short-term
342
+ credentials, or chained list of accounts required to get the access_token
343
+ of the last account in the list, which will be impersonated in the request.
344
+ If set as a string, the account must grant the originating account
345
+ the Service Account Token Creator IAM role.
346
+ If set as a sequence, the identities from the list must grant
347
+ Service Account Token Creator IAM role to the directly preceding identity, with first
348
+ account from the list granting this role to the originating account (templated).
349
+ """
350
+
351
+ template_fields = ("location", "project_id", "impersonation_chain", "model", "cached_content_config")
352
+
353
+ def __init__(
354
+ self,
355
+ *,
356
+ project_id: str,
357
+ location: str,
358
+ model: str,
359
+ cached_content_config: CreateCachedContentConfigOrDict | None = None,
360
+ gcp_conn_id: str = "google_cloud_default",
361
+ impersonation_chain: str | Sequence[str] | None = None,
362
+ **kwargs,
363
+ ) -> None:
364
+ super().__init__(**kwargs)
365
+
366
+ self.project_id = project_id
367
+ self.location = location
368
+ self.model = model
369
+ self.cached_content_config = cached_content_config
370
+ self.gcp_conn_id = gcp_conn_id
371
+ self.impersonation_chain = impersonation_chain
372
+
373
+ def execute(self, context: Context):
374
+ self.hook = GenAIGenerativeModelHook(
375
+ gcp_conn_id=self.gcp_conn_id,
376
+ impersonation_chain=self.impersonation_chain,
377
+ )
378
+
379
+ cached_content_name = self.hook.create_cached_content(
380
+ project_id=self.project_id,
381
+ location=self.location,
382
+ model=self.model,
383
+ cached_content_config=self.cached_content_config,
384
+ )
385
+
386
+ self.log.info("Cached Content Name: %s", cached_content_name)
387
+ context["ti"].xcom_push(key="cached_content", value=cached_content_name)
388
+
389
+ return cached_content_name