apache-airflow-providers-google 10.20.0rc1__py3-none-any.whl → 10.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +16 -8
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -1
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -3
- airflow/providers/google/cloud/hooks/bigquery.py +158 -79
- airflow/providers/google/cloud/hooks/cloud_sql.py +12 -6
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +34 -17
- airflow/providers/google/cloud/hooks/dataflow.py +30 -26
- airflow/providers/google/cloud/hooks/dataform.py +2 -1
- airflow/providers/google/cloud/hooks/datafusion.py +4 -2
- airflow/providers/google/cloud/hooks/dataproc.py +102 -51
- airflow/providers/google/cloud/hooks/functions.py +20 -10
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -11
- airflow/providers/google/cloud/hooks/os_login.py +2 -1
- airflow/providers/google/cloud/hooks/secret_manager.py +18 -9
- airflow/providers/google/cloud/hooks/translate.py +2 -1
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +2 -1
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +141 -0
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +2 -1
- airflow/providers/google/cloud/links/base.py +2 -1
- airflow/providers/google/cloud/links/datafusion.py +2 -1
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +4 -2
- airflow/providers/google/cloud/openlineage/mixins.py +10 -0
- airflow/providers/google/cloud/openlineage/utils.py +4 -2
- airflow/providers/google/cloud/operators/bigquery.py +55 -21
- airflow/providers/google/cloud/operators/cloud_batch.py +3 -1
- airflow/providers/google/cloud/operators/cloud_sql.py +22 -11
- airflow/providers/google/cloud/operators/dataform.py +2 -1
- airflow/providers/google/cloud/operators/dataproc.py +75 -34
- airflow/providers/google/cloud/operators/dataproc_metastore.py +24 -12
- airflow/providers/google/cloud/operators/gcs.py +2 -1
- airflow/providers/google/cloud/operators/pubsub.py +10 -5
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +3 -3
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +12 -9
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +243 -0
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +2 -1
- airflow/providers/google/cloud/operators/vision.py +36 -18
- airflow/providers/google/cloud/sensors/gcs.py +11 -2
- airflow/providers/google/cloud/sensors/pubsub.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +21 -12
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +1 -1
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +17 -5
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +12 -6
- airflow/providers/google/cloud/transfers/local_to_gcs.py +5 -1
- airflow/providers/google/cloud/transfers/mysql_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/presto_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/trino_to_gcs.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -1
- airflow/providers/google/cloud/triggers/dataflow.py +2 -1
- airflow/providers/google/cloud/triggers/vertex_ai.py +2 -1
- airflow/providers/google/cloud/utils/external_token_supplier.py +4 -2
- airflow/providers/google/cloud/utils/field_sanitizer.py +4 -2
- airflow/providers/google/cloud/utils/field_validator.py +6 -3
- airflow/providers/google/cloud/utils/helpers.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +2 -1
- airflow/providers/google/common/utils/id_token_credentials.py +2 -1
- airflow/providers/google/get_provider_info.py +3 -2
- airflow/providers/google/go_module_utils.py +4 -2
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +12 -6
- airflow/providers/google/marketing_platform/links/analytics_admin.py +2 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +2 -1
- {apache_airflow_providers_google-10.20.0rc1.dist-info → apache_airflow_providers_google-10.21.0.dist-info}/METADATA +14 -14
- {apache_airflow_providers_google-10.20.0rc1.dist-info → apache_airflow_providers_google-10.21.0.dist-info}/RECORD +69 -69
- {apache_airflow_providers_google-10.20.0rc1.dist-info → apache_airflow_providers_google-10.21.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.20.0rc1.dist-info → apache_airflow_providers_google-10.21.0.dist-info}/entry_points.txt +0 -0
@@ -908,7 +908,8 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
908
908
|
def _set_temporary_ssl_file(
|
909
909
|
self, cert_name: str, cert_path: str | None = None, cert_value: str | None = None
|
910
910
|
) -> str | None:
|
911
|
-
"""
|
911
|
+
"""
|
912
|
+
Save the certificate as a temporary file.
|
912
913
|
|
913
914
|
This method was implemented in order to overcome psql connection error caused by excessive file
|
914
915
|
permissions: "private key file "..." has group or world access; file must have permissions
|
@@ -1019,7 +1020,8 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
1019
1020
|
|
1020
1021
|
@staticmethod
|
1021
1022
|
def _generate_unique_path() -> str:
|
1022
|
-
"""
|
1023
|
+
"""
|
1024
|
+
Generate a unique path.
|
1023
1025
|
|
1024
1026
|
We don't using mkdtemp here since it can generate paths close to 60
|
1025
1027
|
characters. We append project/location/instance to the path, Postgres
|
@@ -1106,7 +1108,8 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
1106
1108
|
return instance_specification
|
1107
1109
|
|
1108
1110
|
def create_connection(self) -> Connection:
|
1109
|
-
"""
|
1111
|
+
"""
|
1112
|
+
Create a connection.
|
1110
1113
|
|
1111
1114
|
Connection ID will be randomly generated according to whether it uses
|
1112
1115
|
proxy, TCP, UNIX sockets, SSL.
|
@@ -1117,7 +1120,8 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
1117
1120
|
return connection
|
1118
1121
|
|
1119
1122
|
def get_sqlproxy_runner(self) -> CloudSqlProxyRunner:
|
1120
|
-
"""
|
1123
|
+
"""
|
1124
|
+
Retrieve Cloud SQL Proxy runner.
|
1121
1125
|
|
1122
1126
|
It is used to manage the proxy lifecycle per task.
|
1123
1127
|
|
@@ -1137,7 +1141,8 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
1137
1141
|
)
|
1138
1142
|
|
1139
1143
|
def get_database_hook(self, connection: Connection) -> PostgresHook | MySqlHook:
|
1140
|
-
"""
|
1144
|
+
"""
|
1145
|
+
Retrieve database hook.
|
1141
1146
|
|
1142
1147
|
This is the actual Postgres or MySQL database hook that uses proxy or
|
1143
1148
|
connects directly to the Google Cloud SQL database.
|
@@ -1168,7 +1173,8 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
1168
1173
|
self.sql_proxy_tcp_port = self.reserved_tcp_socket.getsockname()[1]
|
1169
1174
|
|
1170
1175
|
def free_reserved_port(self) -> None:
|
1171
|
-
"""
|
1176
|
+
"""
|
1177
|
+
Free TCP port.
|
1172
1178
|
|
1173
1179
|
Makes it immediately ready to be used by Cloud SQL Proxy.
|
1174
1180
|
"""
|
@@ -134,7 +134,8 @@ NEGATIVE_STATUSES = {GcpTransferOperationStatus.FAILED, GcpTransferOperationStat
|
|
134
134
|
|
135
135
|
|
136
136
|
def gen_job_name(job_name: str) -> str:
|
137
|
-
"""
|
137
|
+
"""
|
138
|
+
Add a unique suffix to the job name.
|
138
139
|
|
139
140
|
:param job_name:
|
140
141
|
:return: job_name with suffix
|
@@ -144,7 +145,8 @@ def gen_job_name(job_name: str) -> str:
|
|
144
145
|
|
145
146
|
|
146
147
|
class CloudDataTransferServiceHook(GoogleBaseHook):
|
147
|
-
"""
|
148
|
+
"""
|
149
|
+
Google Storage Transfer Service functionalities.
|
148
150
|
|
149
151
|
All methods in the hook with *project_id* in the signature must be called
|
150
152
|
with keyword arguments rather than positional.
|
@@ -171,7 +173,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
171
173
|
self._conn = None
|
172
174
|
|
173
175
|
def get_conn(self) -> Resource:
|
174
|
-
"""
|
176
|
+
"""
|
177
|
+
Retrieve connection to Google Storage Transfer service.
|
175
178
|
|
176
179
|
:return: Google Storage Transfer service object
|
177
180
|
"""
|
@@ -183,7 +186,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
183
186
|
return self._conn
|
184
187
|
|
185
188
|
def create_transfer_job(self, body: dict) -> dict:
|
186
|
-
"""
|
189
|
+
"""
|
190
|
+
Create a transfer job that runs periodically.
|
187
191
|
|
188
192
|
:param body: (Required) The request body, as described in
|
189
193
|
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/patch#request-body
|
@@ -225,7 +229,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
225
229
|
|
226
230
|
@GoogleBaseHook.fallback_to_default_project_id
|
227
231
|
def get_transfer_job(self, job_name: str, project_id: str) -> dict:
|
228
|
-
"""
|
232
|
+
"""
|
233
|
+
Get latest state of a long-running Google Storage Transfer Service job.
|
229
234
|
|
230
235
|
:param job_name: (Required) Name of the job to be fetched
|
231
236
|
:param project_id: (Optional) the ID of the project that owns the Transfer
|
@@ -241,7 +246,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
241
246
|
)
|
242
247
|
|
243
248
|
def list_transfer_job(self, request_filter: dict | None = None, **kwargs) -> list[dict]:
|
244
|
-
"""
|
249
|
+
"""
|
250
|
+
List long-running operations in Google Storage Transfer Service.
|
245
251
|
|
246
252
|
A filter can be specified to match only certain entries.
|
247
253
|
|
@@ -279,7 +285,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
279
285
|
|
280
286
|
@GoogleBaseHook.fallback_to_default_project_id
|
281
287
|
def enable_transfer_job(self, job_name: str, project_id: str) -> dict:
|
282
|
-
"""
|
288
|
+
"""
|
289
|
+
Make new transfers be performed based on the schedule.
|
283
290
|
|
284
291
|
:param job_name: (Required) Name of the job to be updated
|
285
292
|
:param project_id: (Optional) the ID of the project that owns the Transfer
|
@@ -302,7 +309,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
302
309
|
)
|
303
310
|
|
304
311
|
def update_transfer_job(self, job_name: str, body: dict) -> dict:
|
305
|
-
"""
|
312
|
+
"""
|
313
|
+
Update a transfer job that runs periodically.
|
306
314
|
|
307
315
|
:param job_name: (Required) Name of the job to be updated
|
308
316
|
:param body: A request body, as described in
|
@@ -319,7 +327,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
319
327
|
|
320
328
|
@GoogleBaseHook.fallback_to_default_project_id
|
321
329
|
def delete_transfer_job(self, job_name: str, project_id: str) -> None:
|
322
|
-
"""
|
330
|
+
"""
|
331
|
+
Delete a transfer job.
|
323
332
|
|
324
333
|
This is a soft delete. After a transfer job is deleted, the job and all
|
325
334
|
the transfer executions are subject to garbage collection. Transfer jobs
|
@@ -346,7 +355,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
346
355
|
|
347
356
|
@GoogleBaseHook.fallback_to_default_project_id
|
348
357
|
def run_transfer_job(self, job_name: str, project_id: str) -> dict:
|
349
|
-
"""
|
358
|
+
"""
|
359
|
+
Run Google Storage Transfer Service job.
|
350
360
|
|
351
361
|
:param job_name: (Required) Name of the job to be fetched
|
352
362
|
:param project_id: (Optional) the ID of the project that owns the Transfer
|
@@ -371,14 +381,16 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
371
381
|
)
|
372
382
|
|
373
383
|
def cancel_transfer_operation(self, operation_name: str) -> None:
|
374
|
-
"""
|
384
|
+
"""
|
385
|
+
Cancel a transfer operation in Google Storage Transfer Service.
|
375
386
|
|
376
387
|
:param operation_name: Name of the transfer operation.
|
377
388
|
"""
|
378
389
|
self.get_conn().transferOperations().cancel(name=operation_name).execute(num_retries=self.num_retries)
|
379
390
|
|
380
391
|
def get_transfer_operation(self, operation_name: str) -> dict:
|
381
|
-
"""
|
392
|
+
"""
|
393
|
+
Get a transfer operation in Google Storage Transfer Service.
|
382
394
|
|
383
395
|
:param operation_name: (Required) Name of the transfer operation.
|
384
396
|
:return: transfer operation
|
@@ -393,7 +405,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
393
405
|
)
|
394
406
|
|
395
407
|
def list_transfer_operations(self, request_filter: dict | None = None, **kwargs) -> list[dict]:
|
396
|
-
"""
|
408
|
+
"""
|
409
|
+
Get a transfer operation in Google Storage Transfer Service.
|
397
410
|
|
398
411
|
:param request_filter: (Required) A request filter, as described in
|
399
412
|
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/list#body.QUERY_PARAMETERS.filter
|
@@ -440,14 +453,16 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
440
453
|
return operations
|
441
454
|
|
442
455
|
def pause_transfer_operation(self, operation_name: str) -> None:
|
443
|
-
"""
|
456
|
+
"""
|
457
|
+
Pause a transfer operation in Google Storage Transfer Service.
|
444
458
|
|
445
459
|
:param operation_name: (Required) Name of the transfer operation.
|
446
460
|
"""
|
447
461
|
self.get_conn().transferOperations().pause(name=operation_name).execute(num_retries=self.num_retries)
|
448
462
|
|
449
463
|
def resume_transfer_operation(self, operation_name: str) -> None:
|
450
|
-
"""
|
464
|
+
"""
|
465
|
+
Resume a transfer operation in Google Storage Transfer Service.
|
451
466
|
|
452
467
|
:param operation_name: (Required) Name of the transfer operation.
|
453
468
|
"""
|
@@ -459,7 +474,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
459
474
|
expected_statuses: set[str] | None = None,
|
460
475
|
timeout: float | timedelta | None = None,
|
461
476
|
) -> None:
|
462
|
-
"""
|
477
|
+
"""
|
478
|
+
Wait until the job reaches the expected state.
|
463
479
|
|
464
480
|
:param job: The transfer job to wait for. See:
|
465
481
|
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs#TransferJob
|
@@ -503,7 +519,8 @@ class CloudDataTransferServiceHook(GoogleBaseHook):
|
|
503
519
|
def operations_contain_expected_statuses(
|
504
520
|
operations: list[dict], expected_statuses: set[str] | str
|
505
521
|
) -> bool:
|
506
|
-
"""
|
522
|
+
"""
|
523
|
+
Check whether an operation exists with the expected status.
|
507
524
|
|
508
525
|
:param operations: (Required) List of transfer operations to check.
|
509
526
|
:param expected_statuses: (Required) The expected status. See:
|
@@ -80,7 +80,8 @@ T = TypeVar("T", bound=Callable)
|
|
80
80
|
def process_line_and_extract_dataflow_job_id_callback(
|
81
81
|
on_new_job_id_callback: Callable[[str], None] | None,
|
82
82
|
) -> Callable[[str], None]:
|
83
|
-
"""
|
83
|
+
"""
|
84
|
+
Build callback that triggers the specified function.
|
84
85
|
|
85
86
|
The returned callback is intended to be used as ``process_line_callback`` in
|
86
87
|
:py:class:`~airflow.providers.apache.beam.hooks.beam.BeamCommandRunner`.
|
@@ -418,32 +419,34 @@ class _DataflowJobsController(LoggingMixin):
|
|
418
419
|
current_state = job["currentState"]
|
419
420
|
is_streaming = job.get("type") == DataflowJobType.JOB_TYPE_STREAMING
|
420
421
|
|
421
|
-
|
422
|
+
current_expected_state = self._expected_terminal_state
|
423
|
+
|
424
|
+
if current_expected_state is None:
|
422
425
|
if is_streaming:
|
423
|
-
|
426
|
+
current_expected_state = DataflowJobStatus.JOB_STATE_RUNNING
|
424
427
|
else:
|
425
|
-
|
426
|
-
else:
|
427
|
-
terminal_states = DataflowJobStatus.TERMINAL_STATES | {DataflowJobStatus.JOB_STATE_RUNNING}
|
428
|
-
if self._expected_terminal_state not in terminal_states:
|
429
|
-
raise AirflowException(
|
430
|
-
f"Google Cloud Dataflow job's expected terminal state "
|
431
|
-
f"'{self._expected_terminal_state}' is invalid."
|
432
|
-
f" The value should be any of the following: {terminal_states}"
|
433
|
-
)
|
434
|
-
elif is_streaming and self._expected_terminal_state == DataflowJobStatus.JOB_STATE_DONE:
|
435
|
-
raise AirflowException(
|
436
|
-
"Google Cloud Dataflow job's expected terminal state cannot be "
|
437
|
-
"JOB_STATE_DONE while it is a streaming job"
|
438
|
-
)
|
439
|
-
elif not is_streaming and self._expected_terminal_state == DataflowJobStatus.JOB_STATE_DRAINED:
|
440
|
-
raise AirflowException(
|
441
|
-
"Google Cloud Dataflow job's expected terminal state cannot be "
|
442
|
-
"JOB_STATE_DRAINED while it is a batch job"
|
443
|
-
)
|
428
|
+
current_expected_state = DataflowJobStatus.JOB_STATE_DONE
|
444
429
|
|
445
|
-
|
446
|
-
|
430
|
+
terminal_states = DataflowJobStatus.TERMINAL_STATES | {DataflowJobStatus.JOB_STATE_RUNNING}
|
431
|
+
if current_expected_state not in terminal_states:
|
432
|
+
raise AirflowException(
|
433
|
+
f"Google Cloud Dataflow job's expected terminal state "
|
434
|
+
f"'{current_expected_state}' is invalid."
|
435
|
+
f" The value should be any of the following: {terminal_states}"
|
436
|
+
)
|
437
|
+
elif is_streaming and current_expected_state == DataflowJobStatus.JOB_STATE_DONE:
|
438
|
+
raise AirflowException(
|
439
|
+
"Google Cloud Dataflow job's expected terminal state cannot be "
|
440
|
+
"JOB_STATE_DONE while it is a streaming job"
|
441
|
+
)
|
442
|
+
elif not is_streaming and current_expected_state == DataflowJobStatus.JOB_STATE_DRAINED:
|
443
|
+
raise AirflowException(
|
444
|
+
"Google Cloud Dataflow job's expected terminal state cannot be "
|
445
|
+
"JOB_STATE_DRAINED while it is a batch job"
|
446
|
+
)
|
447
|
+
|
448
|
+
if current_state == current_expected_state:
|
449
|
+
if current_expected_state == DataflowJobStatus.JOB_STATE_RUNNING:
|
447
450
|
return not self._wait_until_finished
|
448
451
|
return True
|
449
452
|
|
@@ -453,7 +456,7 @@ class _DataflowJobsController(LoggingMixin):
|
|
453
456
|
self.log.debug("Current job: %s", job)
|
454
457
|
raise AirflowException(
|
455
458
|
f"Google Cloud Dataflow job {job['name']} is in an unexpected terminal state: {current_state}, "
|
456
|
-
f"expected terminal state: {
|
459
|
+
f"expected terminal state: {current_expected_state}"
|
457
460
|
)
|
458
461
|
|
459
462
|
def wait_for_done(self) -> None:
|
@@ -1583,7 +1586,8 @@ class AsyncDataflowHook(GoogleBaseAsyncHook):
|
|
1583
1586
|
page_size: int | None = None,
|
1584
1587
|
page_token: str | None = None,
|
1585
1588
|
) -> ListJobsAsyncPager:
|
1586
|
-
"""
|
1589
|
+
"""
|
1590
|
+
List jobs.
|
1587
1591
|
|
1588
1592
|
For detail see:
|
1589
1593
|
https://cloud.google.com/python/docs/reference/dataflow/latest/google.cloud.dataflow_v1beta3.types.ListJobsRequest
|
@@ -663,7 +663,8 @@ class DataformHook(GoogleBaseHook):
|
|
663
663
|
timeout: float | None = None,
|
664
664
|
metadata: Sequence[tuple[str, str]] = (),
|
665
665
|
) -> InstallNpmPackagesResponse:
|
666
|
-
"""
|
666
|
+
"""
|
667
|
+
Install NPM dependencies in the provided workspace.
|
667
668
|
|
668
669
|
Requires "package.json" to be created in the workspace.
|
669
670
|
|
@@ -523,7 +523,8 @@ class DataFusionHook(GoogleBaseHook):
|
|
523
523
|
|
524
524
|
@staticmethod
|
525
525
|
def cdap_program_type(pipeline_type: DataFusionPipelineType) -> str:
|
526
|
-
"""
|
526
|
+
"""
|
527
|
+
Retrieve CDAP Program type depending on the pipeline type.
|
527
528
|
|
528
529
|
:param pipeline_type: Pipeline type.
|
529
530
|
"""
|
@@ -535,7 +536,8 @@ class DataFusionHook(GoogleBaseHook):
|
|
535
536
|
|
536
537
|
@staticmethod
|
537
538
|
def cdap_program_id(pipeline_type: DataFusionPipelineType) -> str:
|
538
|
-
"""
|
539
|
+
"""
|
540
|
+
Retrieve CDAP Program id depending on the pipeline type.
|
539
541
|
|
540
542
|
:param pipeline_type: Pipeline type.
|
541
543
|
"""
|