apache-airflow-providers-google 10.20.0rc1__py3-none-any.whl → 10.21.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +16 -8
  3. airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -1
  4. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -3
  5. airflow/providers/google/cloud/hooks/bigquery.py +158 -79
  6. airflow/providers/google/cloud/hooks/cloud_sql.py +12 -6
  7. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +34 -17
  8. airflow/providers/google/cloud/hooks/dataflow.py +30 -26
  9. airflow/providers/google/cloud/hooks/dataform.py +2 -1
  10. airflow/providers/google/cloud/hooks/datafusion.py +4 -2
  11. airflow/providers/google/cloud/hooks/dataproc.py +102 -51
  12. airflow/providers/google/cloud/hooks/functions.py +20 -10
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -11
  14. airflow/providers/google/cloud/hooks/os_login.py +2 -1
  15. airflow/providers/google/cloud/hooks/secret_manager.py +18 -9
  16. airflow/providers/google/cloud/hooks/translate.py +2 -1
  17. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +2 -1
  18. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +141 -0
  19. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +2 -1
  20. airflow/providers/google/cloud/links/base.py +2 -1
  21. airflow/providers/google/cloud/links/datafusion.py +2 -1
  22. airflow/providers/google/cloud/log/stackdriver_task_handler.py +4 -2
  23. airflow/providers/google/cloud/openlineage/mixins.py +10 -0
  24. airflow/providers/google/cloud/openlineage/utils.py +4 -2
  25. airflow/providers/google/cloud/operators/bigquery.py +55 -21
  26. airflow/providers/google/cloud/operators/cloud_batch.py +3 -1
  27. airflow/providers/google/cloud/operators/cloud_sql.py +22 -11
  28. airflow/providers/google/cloud/operators/dataform.py +2 -1
  29. airflow/providers/google/cloud/operators/dataproc.py +75 -34
  30. airflow/providers/google/cloud/operators/dataproc_metastore.py +24 -12
  31. airflow/providers/google/cloud/operators/gcs.py +2 -1
  32. airflow/providers/google/cloud/operators/pubsub.py +10 -5
  33. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +3 -3
  34. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +12 -9
  35. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +243 -0
  36. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +2 -1
  37. airflow/providers/google/cloud/operators/vision.py +36 -18
  38. airflow/providers/google/cloud/sensors/gcs.py +11 -2
  39. airflow/providers/google/cloud/sensors/pubsub.py +2 -1
  40. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +21 -12
  41. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +1 -1
  42. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +2 -1
  43. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +17 -5
  44. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +12 -6
  45. airflow/providers/google/cloud/transfers/local_to_gcs.py +5 -1
  46. airflow/providers/google/cloud/transfers/mysql_to_gcs.py +2 -1
  47. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +2 -1
  48. airflow/providers/google/cloud/transfers/presto_to_gcs.py +2 -1
  49. airflow/providers/google/cloud/transfers/s3_to_gcs.py +2 -1
  50. airflow/providers/google/cloud/transfers/trino_to_gcs.py +2 -1
  51. airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
  52. airflow/providers/google/cloud/triggers/cloud_run.py +2 -1
  53. airflow/providers/google/cloud/triggers/dataflow.py +2 -1
  54. airflow/providers/google/cloud/triggers/vertex_ai.py +2 -1
  55. airflow/providers/google/cloud/utils/external_token_supplier.py +4 -2
  56. airflow/providers/google/cloud/utils/field_sanitizer.py +4 -2
  57. airflow/providers/google/cloud/utils/field_validator.py +6 -3
  58. airflow/providers/google/cloud/utils/helpers.py +2 -1
  59. airflow/providers/google/common/hooks/base_google.py +2 -1
  60. airflow/providers/google/common/utils/id_token_credentials.py +2 -1
  61. airflow/providers/google/get_provider_info.py +3 -2
  62. airflow/providers/google/go_module_utils.py +4 -2
  63. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +12 -6
  64. airflow/providers/google/marketing_platform/links/analytics_admin.py +2 -1
  65. airflow/providers/google/suite/transfers/local_to_drive.py +2 -1
  66. {apache_airflow_providers_google-10.20.0rc1.dist-info → apache_airflow_providers_google-10.21.0rc1.dist-info}/METADATA +8 -8
  67. {apache_airflow_providers_google-10.20.0rc1.dist-info → apache_airflow_providers_google-10.21.0rc1.dist-info}/RECORD +69 -69
  68. {apache_airflow_providers_google-10.20.0rc1.dist-info → apache_airflow_providers_google-10.21.0rc1.dist-info}/WHEEL +0 -0
  69. {apache_airflow_providers_google-10.20.0rc1.dist-info → apache_airflow_providers_google-10.21.0rc1.dist-info}/entry_points.txt +0 -0
@@ -908,7 +908,8 @@ class CloudSQLDatabaseHook(BaseHook):
908
908
  def _set_temporary_ssl_file(
909
909
  self, cert_name: str, cert_path: str | None = None, cert_value: str | None = None
910
910
  ) -> str | None:
911
- """Save the certificate as a temporary file.
911
+ """
912
+ Save the certificate as a temporary file.
912
913
 
913
914
  This method was implemented in order to overcome psql connection error caused by excessive file
914
915
  permissions: "private key file "..." has group or world access; file must have permissions
@@ -1019,7 +1020,8 @@ class CloudSQLDatabaseHook(BaseHook):
1019
1020
 
1020
1021
  @staticmethod
1021
1022
  def _generate_unique_path() -> str:
1022
- """Generate a unique path.
1023
+ """
1024
+ Generate a unique path.
1023
1025
 
1024
1026
  We don't using mkdtemp here since it can generate paths close to 60
1025
1027
  characters. We append project/location/instance to the path, Postgres
@@ -1106,7 +1108,8 @@ class CloudSQLDatabaseHook(BaseHook):
1106
1108
  return instance_specification
1107
1109
 
1108
1110
  def create_connection(self) -> Connection:
1109
- """Create a connection.
1111
+ """
1112
+ Create a connection.
1110
1113
 
1111
1114
  Connection ID will be randomly generated according to whether it uses
1112
1115
  proxy, TCP, UNIX sockets, SSL.
@@ -1117,7 +1120,8 @@ class CloudSQLDatabaseHook(BaseHook):
1117
1120
  return connection
1118
1121
 
1119
1122
  def get_sqlproxy_runner(self) -> CloudSqlProxyRunner:
1120
- """Retrieve Cloud SQL Proxy runner.
1123
+ """
1124
+ Retrieve Cloud SQL Proxy runner.
1121
1125
 
1122
1126
  It is used to manage the proxy lifecycle per task.
1123
1127
 
@@ -1137,7 +1141,8 @@ class CloudSQLDatabaseHook(BaseHook):
1137
1141
  )
1138
1142
 
1139
1143
  def get_database_hook(self, connection: Connection) -> PostgresHook | MySqlHook:
1140
- """Retrieve database hook.
1144
+ """
1145
+ Retrieve database hook.
1141
1146
 
1142
1147
  This is the actual Postgres or MySQL database hook that uses proxy or
1143
1148
  connects directly to the Google Cloud SQL database.
@@ -1168,7 +1173,8 @@ class CloudSQLDatabaseHook(BaseHook):
1168
1173
  self.sql_proxy_tcp_port = self.reserved_tcp_socket.getsockname()[1]
1169
1174
 
1170
1175
  def free_reserved_port(self) -> None:
1171
- """Free TCP port.
1176
+ """
1177
+ Free TCP port.
1172
1178
 
1173
1179
  Makes it immediately ready to be used by Cloud SQL Proxy.
1174
1180
  """
@@ -134,7 +134,8 @@ NEGATIVE_STATUSES = {GcpTransferOperationStatus.FAILED, GcpTransferOperationStat
134
134
 
135
135
 
136
136
  def gen_job_name(job_name: str) -> str:
137
- """Add a unique suffix to the job name.
137
+ """
138
+ Add a unique suffix to the job name.
138
139
 
139
140
  :param job_name:
140
141
  :return: job_name with suffix
@@ -144,7 +145,8 @@ def gen_job_name(job_name: str) -> str:
144
145
 
145
146
 
146
147
  class CloudDataTransferServiceHook(GoogleBaseHook):
147
- """Google Storage Transfer Service functionalities.
148
+ """
149
+ Google Storage Transfer Service functionalities.
148
150
 
149
151
  All methods in the hook with *project_id* in the signature must be called
150
152
  with keyword arguments rather than positional.
@@ -171,7 +173,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
171
173
  self._conn = None
172
174
 
173
175
  def get_conn(self) -> Resource:
174
- """Retrieve connection to Google Storage Transfer service.
176
+ """
177
+ Retrieve connection to Google Storage Transfer service.
175
178
 
176
179
  :return: Google Storage Transfer service object
177
180
  """
@@ -183,7 +186,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
183
186
  return self._conn
184
187
 
185
188
  def create_transfer_job(self, body: dict) -> dict:
186
- """Create a transfer job that runs periodically.
189
+ """
190
+ Create a transfer job that runs periodically.
187
191
 
188
192
  :param body: (Required) The request body, as described in
189
193
  https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/patch#request-body
@@ -225,7 +229,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
225
229
 
226
230
  @GoogleBaseHook.fallback_to_default_project_id
227
231
  def get_transfer_job(self, job_name: str, project_id: str) -> dict:
228
- """Get latest state of a long-running Google Storage Transfer Service job.
232
+ """
233
+ Get latest state of a long-running Google Storage Transfer Service job.
229
234
 
230
235
  :param job_name: (Required) Name of the job to be fetched
231
236
  :param project_id: (Optional) the ID of the project that owns the Transfer
@@ -241,7 +246,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
241
246
  )
242
247
 
243
248
  def list_transfer_job(self, request_filter: dict | None = None, **kwargs) -> list[dict]:
244
- """List long-running operations in Google Storage Transfer Service.
249
+ """
250
+ List long-running operations in Google Storage Transfer Service.
245
251
 
246
252
  A filter can be specified to match only certain entries.
247
253
 
@@ -279,7 +285,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
279
285
 
280
286
  @GoogleBaseHook.fallback_to_default_project_id
281
287
  def enable_transfer_job(self, job_name: str, project_id: str) -> dict:
282
- """Make new transfers be performed based on the schedule.
288
+ """
289
+ Make new transfers be performed based on the schedule.
283
290
 
284
291
  :param job_name: (Required) Name of the job to be updated
285
292
  :param project_id: (Optional) the ID of the project that owns the Transfer
@@ -302,7 +309,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
302
309
  )
303
310
 
304
311
  def update_transfer_job(self, job_name: str, body: dict) -> dict:
305
- """Update a transfer job that runs periodically.
312
+ """
313
+ Update a transfer job that runs periodically.
306
314
 
307
315
  :param job_name: (Required) Name of the job to be updated
308
316
  :param body: A request body, as described in
@@ -319,7 +327,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
319
327
 
320
328
  @GoogleBaseHook.fallback_to_default_project_id
321
329
  def delete_transfer_job(self, job_name: str, project_id: str) -> None:
322
- """Delete a transfer job.
330
+ """
331
+ Delete a transfer job.
323
332
 
324
333
  This is a soft delete. After a transfer job is deleted, the job and all
325
334
  the transfer executions are subject to garbage collection. Transfer jobs
@@ -346,7 +355,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
346
355
 
347
356
  @GoogleBaseHook.fallback_to_default_project_id
348
357
  def run_transfer_job(self, job_name: str, project_id: str) -> dict:
349
- """Run Google Storage Transfer Service job.
358
+ """
359
+ Run Google Storage Transfer Service job.
350
360
 
351
361
  :param job_name: (Required) Name of the job to be fetched
352
362
  :param project_id: (Optional) the ID of the project that owns the Transfer
@@ -371,14 +381,16 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
371
381
  )
372
382
 
373
383
  def cancel_transfer_operation(self, operation_name: str) -> None:
374
- """Cancel a transfer operation in Google Storage Transfer Service.
384
+ """
385
+ Cancel a transfer operation in Google Storage Transfer Service.
375
386
 
376
387
  :param operation_name: Name of the transfer operation.
377
388
  """
378
389
  self.get_conn().transferOperations().cancel(name=operation_name).execute(num_retries=self.num_retries)
379
390
 
380
391
  def get_transfer_operation(self, operation_name: str) -> dict:
381
- """Get a transfer operation in Google Storage Transfer Service.
392
+ """
393
+ Get a transfer operation in Google Storage Transfer Service.
382
394
 
383
395
  :param operation_name: (Required) Name of the transfer operation.
384
396
  :return: transfer operation
@@ -393,7 +405,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
393
405
  )
394
406
 
395
407
  def list_transfer_operations(self, request_filter: dict | None = None, **kwargs) -> list[dict]:
396
- """Get a transfer operation in Google Storage Transfer Service.
408
+ """
409
+ Get a transfer operation in Google Storage Transfer Service.
397
410
 
398
411
  :param request_filter: (Required) A request filter, as described in
399
412
  https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/list#body.QUERY_PARAMETERS.filter
@@ -440,14 +453,16 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
440
453
  return operations
441
454
 
442
455
  def pause_transfer_operation(self, operation_name: str) -> None:
443
- """Pause a transfer operation in Google Storage Transfer Service.
456
+ """
457
+ Pause a transfer operation in Google Storage Transfer Service.
444
458
 
445
459
  :param operation_name: (Required) Name of the transfer operation.
446
460
  """
447
461
  self.get_conn().transferOperations().pause(name=operation_name).execute(num_retries=self.num_retries)
448
462
 
449
463
  def resume_transfer_operation(self, operation_name: str) -> None:
450
- """Resume a transfer operation in Google Storage Transfer Service.
464
+ """
465
+ Resume a transfer operation in Google Storage Transfer Service.
451
466
 
452
467
  :param operation_name: (Required) Name of the transfer operation.
453
468
  """
@@ -459,7 +474,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
459
474
  expected_statuses: set[str] | None = None,
460
475
  timeout: float | timedelta | None = None,
461
476
  ) -> None:
462
- """Wait until the job reaches the expected state.
477
+ """
478
+ Wait until the job reaches the expected state.
463
479
 
464
480
  :param job: The transfer job to wait for. See:
465
481
  https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs#TransferJob
@@ -503,7 +519,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
503
519
  def operations_contain_expected_statuses(
504
520
  operations: list[dict], expected_statuses: set[str] | str
505
521
  ) -> bool:
506
- """Check whether an operation exists with the expected status.
522
+ """
523
+ Check whether an operation exists with the expected status.
507
524
 
508
525
  :param operations: (Required) List of transfer operations to check.
509
526
  :param expected_statuses: (Required) The expected status. See:
@@ -80,7 +80,8 @@ T = TypeVar("T", bound=Callable)
80
80
  def process_line_and_extract_dataflow_job_id_callback(
81
81
  on_new_job_id_callback: Callable[[str], None] | None,
82
82
  ) -> Callable[[str], None]:
83
- """Build callback that triggers the specified function.
83
+ """
84
+ Build callback that triggers the specified function.
84
85
 
85
86
  The returned callback is intended to be used as ``process_line_callback`` in
86
87
  :py:class:`~airflow.providers.apache.beam.hooks.beam.BeamCommandRunner`.
@@ -418,32 +419,34 @@ class _DataflowJobsController(LoggingMixin):
418
419
  current_state = job["currentState"]
419
420
  is_streaming = job.get("type") == DataflowJobType.JOB_TYPE_STREAMING
420
421
 
421
- if self._expected_terminal_state is None:
422
+ current_expected_state = self._expected_terminal_state
423
+
424
+ if current_expected_state is None:
422
425
  if is_streaming:
423
- self._expected_terminal_state = DataflowJobStatus.JOB_STATE_RUNNING
426
+ current_expected_state = DataflowJobStatus.JOB_STATE_RUNNING
424
427
  else:
425
- self._expected_terminal_state = DataflowJobStatus.JOB_STATE_DONE
426
- else:
427
- terminal_states = DataflowJobStatus.TERMINAL_STATES | {DataflowJobStatus.JOB_STATE_RUNNING}
428
- if self._expected_terminal_state not in terminal_states:
429
- raise AirflowException(
430
- f"Google Cloud Dataflow job's expected terminal state "
431
- f"'{self._expected_terminal_state}' is invalid."
432
- f" The value should be any of the following: {terminal_states}"
433
- )
434
- elif is_streaming and self._expected_terminal_state == DataflowJobStatus.JOB_STATE_DONE:
435
- raise AirflowException(
436
- "Google Cloud Dataflow job's expected terminal state cannot be "
437
- "JOB_STATE_DONE while it is a streaming job"
438
- )
439
- elif not is_streaming and self._expected_terminal_state == DataflowJobStatus.JOB_STATE_DRAINED:
440
- raise AirflowException(
441
- "Google Cloud Dataflow job's expected terminal state cannot be "
442
- "JOB_STATE_DRAINED while it is a batch job"
443
- )
428
+ current_expected_state = DataflowJobStatus.JOB_STATE_DONE
444
429
 
445
- if current_state == self._expected_terminal_state:
446
- if self._expected_terminal_state == DataflowJobStatus.JOB_STATE_RUNNING:
430
+ terminal_states = DataflowJobStatus.TERMINAL_STATES | {DataflowJobStatus.JOB_STATE_RUNNING}
431
+ if current_expected_state not in terminal_states:
432
+ raise AirflowException(
433
+ f"Google Cloud Dataflow job's expected terminal state "
434
+ f"'{current_expected_state}' is invalid."
435
+ f" The value should be any of the following: {terminal_states}"
436
+ )
437
+ elif is_streaming and current_expected_state == DataflowJobStatus.JOB_STATE_DONE:
438
+ raise AirflowException(
439
+ "Google Cloud Dataflow job's expected terminal state cannot be "
440
+ "JOB_STATE_DONE while it is a streaming job"
441
+ )
442
+ elif not is_streaming and current_expected_state == DataflowJobStatus.JOB_STATE_DRAINED:
443
+ raise AirflowException(
444
+ "Google Cloud Dataflow job's expected terminal state cannot be "
445
+ "JOB_STATE_DRAINED while it is a batch job"
446
+ )
447
+
448
+ if current_state == current_expected_state:
449
+ if current_expected_state == DataflowJobStatus.JOB_STATE_RUNNING:
447
450
  return not self._wait_until_finished
448
451
  return True
449
452
 
@@ -453,7 +456,7 @@ class _DataflowJobsController(LoggingMixin):
453
456
  self.log.debug("Current job: %s", job)
454
457
  raise AirflowException(
455
458
  f"Google Cloud Dataflow job {job['name']} is in an unexpected terminal state: {current_state}, "
456
- f"expected terminal state: {self._expected_terminal_state}"
459
+ f"expected terminal state: {current_expected_state}"
457
460
  )
458
461
 
459
462
  def wait_for_done(self) -> None:
@@ -1583,7 +1586,8 @@ class AsyncDataflowHook(GoogleBaseAsyncHook):
1583
1586
  page_size: int | None = None,
1584
1587
  page_token: str | None = None,
1585
1588
  ) -> ListJobsAsyncPager:
1586
- """List jobs.
1589
+ """
1590
+ List jobs.
1587
1591
 
1588
1592
  For detail see:
1589
1593
  https://cloud.google.com/python/docs/reference/dataflow/latest/google.cloud.dataflow_v1beta3.types.ListJobsRequest
@@ -663,7 +663,8 @@ class DataformHook(GoogleBaseHook):
663
663
  timeout: float | None = None,
664
664
  metadata: Sequence[tuple[str, str]] = (),
665
665
  ) -> InstallNpmPackagesResponse:
666
- """Install NPM dependencies in the provided workspace.
666
+ """
667
+ Install NPM dependencies in the provided workspace.
667
668
 
668
669
  Requires "package.json" to be created in the workspace.
669
670
 
@@ -523,7 +523,8 @@ class DataFusionHook(GoogleBaseHook):
523
523
 
524
524
  @staticmethod
525
525
  def cdap_program_type(pipeline_type: DataFusionPipelineType) -> str:
526
- """Retrieve CDAP Program type depending on the pipeline type.
526
+ """
527
+ Retrieve CDAP Program type depending on the pipeline type.
527
528
 
528
529
  :param pipeline_type: Pipeline type.
529
530
  """
@@ -535,7 +536,8 @@ class DataFusionHook(GoogleBaseHook):
535
536
 
536
537
  @staticmethod
537
538
  def cdap_program_id(pipeline_type: DataFusionPipelineType) -> str:
538
- """Retrieve CDAP Program id depending on the pipeline type.
539
+ """
540
+ Retrieve CDAP Program id depending on the pipeline type.
539
541
 
540
542
  :param pipeline_type: Pipeline type.
541
543
  """