apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +5 -8
- airflow/providers/google/cloud/hooks/automl.py +35 -1
- airflow/providers/google/cloud/hooks/bigquery.py +126 -41
- airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
- airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
- airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
- airflow/providers/google/cloud/hooks/dataflow.py +246 -32
- airflow/providers/google/cloud/hooks/dataplex.py +6 -2
- airflow/providers/google/cloud/hooks/dlp.py +14 -14
- airflow/providers/google/cloud/hooks/gcs.py +6 -2
- airflow/providers/google/cloud/hooks/gdm.py +2 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/mlengine.py +8 -4
- airflow/providers/google/cloud/hooks/pubsub.py +1 -1
- airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +91 -0
- airflow/providers/google/cloud/links/vertex_ai.py +2 -1
- airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
- airflow/providers/google/cloud/operators/automl.py +243 -37
- airflow/providers/google/cloud/operators/bigquery.py +164 -62
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
- airflow/providers/google/cloud/operators/bigtable.py +7 -6
- airflow/providers/google/cloud/operators/cloud_build.py +12 -11
- airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
- airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
- airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
- airflow/providers/google/cloud/operators/compute.py +12 -11
- airflow/providers/google/cloud/operators/datacatalog.py +21 -20
- airflow/providers/google/cloud/operators/dataflow.py +59 -42
- airflow/providers/google/cloud/operators/datafusion.py +11 -10
- airflow/providers/google/cloud/operators/datapipeline.py +3 -2
- airflow/providers/google/cloud/operators/dataprep.py +5 -4
- airflow/providers/google/cloud/operators/dataproc.py +20 -17
- airflow/providers/google/cloud/operators/datastore.py +8 -7
- airflow/providers/google/cloud/operators/dlp.py +31 -30
- airflow/providers/google/cloud/operators/functions.py +4 -3
- airflow/providers/google/cloud/operators/gcs.py +66 -41
- airflow/providers/google/cloud/operators/kubernetes_engine.py +256 -49
- airflow/providers/google/cloud/operators/life_sciences.py +2 -1
- airflow/providers/google/cloud/operators/mlengine.py +11 -10
- airflow/providers/google/cloud/operators/pubsub.py +6 -5
- airflow/providers/google/cloud/operators/spanner.py +7 -6
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
- airflow/providers/google/cloud/operators/stackdriver.py +11 -10
- airflow/providers/google/cloud/operators/tasks.py +14 -13
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
- airflow/providers/google/cloud/operators/translate_speech.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
- airflow/providers/google/cloud/operators/vision.py +13 -12
- airflow/providers/google/cloud/operators/workflows.py +12 -14
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/sensors/bigtable.py +2 -1
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/sensors/dataflow.py +239 -52
- airflow/providers/google/cloud/sensors/datafusion.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +14 -12
- airflow/providers/google/cloud/sensors/tasks.py +2 -1
- airflow/providers/google/cloud/sensors/workflows.py +2 -1
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
- airflow/providers/google/cloud/triggers/bigquery.py +75 -6
- airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
- airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/triggers/dataflow.py +504 -4
- airflow/providers/google/cloud/triggers/dataproc.py +190 -27
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -3
- airflow/providers/google/cloud/triggers/mlengine.py +2 -1
- airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
- airflow/providers/google/common/hooks/base_google.py +45 -7
- airflow/providers/google/firebase/hooks/firestore.py +2 -2
- airflow/providers/google/firebase/operators/firestore.py +2 -1
- airflow/providers/google/get_provider_info.py +5 -3
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/METADATA +18 -18
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/RECORD +90 -90
- airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/entry_points.txt +0 -0
@@ -59,6 +59,7 @@ from airflow.providers.google.cloud.links.data_loss_prevention import (
|
|
59
59
|
CloudDLPPossibleInfoTypesListLink,
|
60
60
|
)
|
61
61
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
62
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
62
63
|
|
63
64
|
if TYPE_CHECKING:
|
64
65
|
from google.api_core.retry import Retry
|
@@ -108,7 +109,7 @@ class CloudDLPCancelDLPJobOperator(GoogleCloudBaseOperator):
|
|
108
109
|
self,
|
109
110
|
*,
|
110
111
|
dlp_job_id: str,
|
111
|
-
project_id: str
|
112
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
112
113
|
retry: Retry | _MethodDefault = DEFAULT,
|
113
114
|
timeout: float | None = None,
|
114
115
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -195,7 +196,7 @@ class CloudDLPCreateDeidentifyTemplateOperator(GoogleCloudBaseOperator):
|
|
195
196
|
self,
|
196
197
|
*,
|
197
198
|
organization_id: str | None = None,
|
198
|
-
project_id: str
|
199
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
199
200
|
deidentify_template: dict | DeidentifyTemplate | None = None,
|
200
201
|
template_id: str | None = None,
|
201
202
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -304,7 +305,7 @@ class CloudDLPCreateDLPJobOperator(GoogleCloudBaseOperator):
|
|
304
305
|
def __init__(
|
305
306
|
self,
|
306
307
|
*,
|
307
|
-
project_id: str
|
308
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
308
309
|
inspect_job: dict | InspectJobConfig | None = None,
|
309
310
|
risk_job: dict | RiskAnalysisJobConfig | None = None,
|
310
311
|
job_id: str | None = None,
|
@@ -416,7 +417,7 @@ class CloudDLPCreateInspectTemplateOperator(GoogleCloudBaseOperator):
|
|
416
417
|
self,
|
417
418
|
*,
|
418
419
|
organization_id: str | None = None,
|
419
|
-
project_id: str
|
420
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
420
421
|
inspect_template: InspectTemplate | None = None,
|
421
422
|
template_id: str | None = None,
|
422
423
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -522,7 +523,7 @@ class CloudDLPCreateJobTriggerOperator(GoogleCloudBaseOperator):
|
|
522
523
|
def __init__(
|
523
524
|
self,
|
524
525
|
*,
|
525
|
-
project_id: str
|
526
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
526
527
|
job_trigger: dict | JobTrigger | None = None,
|
527
528
|
trigger_id: str | None = None,
|
528
529
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -631,7 +632,7 @@ class CloudDLPCreateStoredInfoTypeOperator(GoogleCloudBaseOperator):
|
|
631
632
|
self,
|
632
633
|
*,
|
633
634
|
organization_id: str | None = None,
|
634
|
-
project_id: str
|
635
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
635
636
|
config: StoredInfoTypeConfig | None = None,
|
636
637
|
stored_info_type_id: str | None = None,
|
637
638
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -752,7 +753,7 @@ class CloudDLPDeidentifyContentOperator(GoogleCloudBaseOperator):
|
|
752
753
|
def __init__(
|
753
754
|
self,
|
754
755
|
*,
|
755
|
-
project_id: str
|
756
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
756
757
|
deidentify_config: dict | DeidentifyConfig | None = None,
|
757
758
|
inspect_config: dict | InspectConfig | None = None,
|
758
759
|
item: dict | ContentItem | None = None,
|
@@ -842,7 +843,7 @@ class CloudDLPDeleteDeidentifyTemplateOperator(GoogleCloudBaseOperator):
|
|
842
843
|
*,
|
843
844
|
template_id: str,
|
844
845
|
organization_id: str | None = None,
|
845
|
-
project_id: str
|
846
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
846
847
|
retry: Retry | _MethodDefault = DEFAULT,
|
847
848
|
timeout: float | None = None,
|
848
849
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -929,7 +930,7 @@ class CloudDLPDeleteDLPJobOperator(GoogleCloudBaseOperator):
|
|
929
930
|
self,
|
930
931
|
*,
|
931
932
|
dlp_job_id: str,
|
932
|
-
project_id: str
|
933
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
933
934
|
retry: Retry | _MethodDefault = DEFAULT,
|
934
935
|
timeout: float | None = None,
|
935
936
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -1017,7 +1018,7 @@ class CloudDLPDeleteInspectTemplateOperator(GoogleCloudBaseOperator):
|
|
1017
1018
|
*,
|
1018
1019
|
template_id: str,
|
1019
1020
|
organization_id: str | None = None,
|
1020
|
-
project_id: str
|
1021
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1021
1022
|
retry: Retry | _MethodDefault = DEFAULT,
|
1022
1023
|
timeout: float | None = None,
|
1023
1024
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -1103,7 +1104,7 @@ class CloudDLPDeleteJobTriggerOperator(GoogleCloudBaseOperator):
|
|
1103
1104
|
self,
|
1104
1105
|
*,
|
1105
1106
|
job_trigger_id: str,
|
1106
|
-
project_id: str
|
1107
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1107
1108
|
retry: Retry | _MethodDefault = DEFAULT,
|
1108
1109
|
timeout: float | None = None,
|
1109
1110
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -1191,7 +1192,7 @@ class CloudDLPDeleteStoredInfoTypeOperator(GoogleCloudBaseOperator):
|
|
1191
1192
|
*,
|
1192
1193
|
stored_info_type_id: str,
|
1193
1194
|
organization_id: str | None = None,
|
1194
|
-
project_id: str
|
1195
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1195
1196
|
retry: Retry | _MethodDefault = DEFAULT,
|
1196
1197
|
timeout: float | None = None,
|
1197
1198
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -1281,7 +1282,7 @@ class CloudDLPGetDeidentifyTemplateOperator(GoogleCloudBaseOperator):
|
|
1281
1282
|
*,
|
1282
1283
|
template_id: str,
|
1283
1284
|
organization_id: str | None = None,
|
1284
|
-
project_id: str
|
1285
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1285
1286
|
retry: Retry | _MethodDefault = DEFAULT,
|
1286
1287
|
timeout: float | None = None,
|
1287
1288
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -1364,7 +1365,7 @@ class CloudDLPGetDLPJobOperator(GoogleCloudBaseOperator):
|
|
1364
1365
|
self,
|
1365
1366
|
*,
|
1366
1367
|
dlp_job_id: str,
|
1367
|
-
project_id: str
|
1368
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1368
1369
|
retry: Retry | _MethodDefault = DEFAULT,
|
1369
1370
|
timeout: float | None = None,
|
1370
1371
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -1452,7 +1453,7 @@ class CloudDLPGetInspectTemplateOperator(GoogleCloudBaseOperator):
|
|
1452
1453
|
*,
|
1453
1454
|
template_id: str,
|
1454
1455
|
organization_id: str | None = None,
|
1455
|
-
project_id: str
|
1456
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1456
1457
|
retry: Retry | _MethodDefault = DEFAULT,
|
1457
1458
|
timeout: float | None = None,
|
1458
1459
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -1538,7 +1539,7 @@ class CloudDLPGetDLPJobTriggerOperator(GoogleCloudBaseOperator):
|
|
1538
1539
|
self,
|
1539
1540
|
*,
|
1540
1541
|
job_trigger_id: str,
|
1541
|
-
project_id: str
|
1542
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1542
1543
|
retry: Retry | _MethodDefault = DEFAULT,
|
1543
1544
|
timeout: float | None = None,
|
1544
1545
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -1626,7 +1627,7 @@ class CloudDLPGetStoredInfoTypeOperator(GoogleCloudBaseOperator):
|
|
1626
1627
|
*,
|
1627
1628
|
stored_info_type_id: str,
|
1628
1629
|
organization_id: str | None = None,
|
1629
|
-
project_id: str
|
1630
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1630
1631
|
retry: Retry | _MethodDefault = DEFAULT,
|
1631
1632
|
timeout: float | None = None,
|
1632
1633
|
metadata: Sequence[tuple[str, str]] = (),
|
@@ -1716,7 +1717,7 @@ class CloudDLPInspectContentOperator(GoogleCloudBaseOperator):
|
|
1716
1717
|
def __init__(
|
1717
1718
|
self,
|
1718
1719
|
*,
|
1719
|
-
project_id: str
|
1720
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1720
1721
|
inspect_config: dict | InspectConfig | None = None,
|
1721
1722
|
item: dict | ContentItem | None = None,
|
1722
1723
|
inspect_template_name: str | None = None,
|
@@ -1802,7 +1803,7 @@ class CloudDLPListDeidentifyTemplatesOperator(GoogleCloudBaseOperator):
|
|
1802
1803
|
self,
|
1803
1804
|
*,
|
1804
1805
|
organization_id: str | None = None,
|
1805
|
-
project_id: str
|
1806
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1806
1807
|
page_size: int | None = None,
|
1807
1808
|
order_by: str | None = None,
|
1808
1809
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -1894,7 +1895,7 @@ class CloudDLPListDLPJobsOperator(GoogleCloudBaseOperator):
|
|
1894
1895
|
def __init__(
|
1895
1896
|
self,
|
1896
1897
|
*,
|
1897
|
-
project_id: str
|
1898
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1898
1899
|
results_filter: str | None = None,
|
1899
1900
|
page_size: int | None = None,
|
1900
1901
|
job_type: str | None = None,
|
@@ -1986,7 +1987,7 @@ class CloudDLPListInfoTypesOperator(GoogleCloudBaseOperator):
|
|
1986
1987
|
def __init__(
|
1987
1988
|
self,
|
1988
1989
|
*,
|
1989
|
-
project_id: str
|
1990
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1990
1991
|
language_code: str | None = None,
|
1991
1992
|
results_filter: str | None = None,
|
1992
1993
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -2077,7 +2078,7 @@ class CloudDLPListInspectTemplatesOperator(GoogleCloudBaseOperator):
|
|
2077
2078
|
self,
|
2078
2079
|
*,
|
2079
2080
|
organization_id: str | None = None,
|
2080
|
-
project_id: str
|
2081
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2081
2082
|
page_size: int | None = None,
|
2082
2083
|
order_by: str | None = None,
|
2083
2084
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -2168,7 +2169,7 @@ class CloudDLPListJobTriggersOperator(GoogleCloudBaseOperator):
|
|
2168
2169
|
def __init__(
|
2169
2170
|
self,
|
2170
2171
|
*,
|
2171
|
-
project_id: str
|
2172
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2172
2173
|
page_size: int | None = None,
|
2173
2174
|
order_by: str | None = None,
|
2174
2175
|
results_filter: str | None = None,
|
@@ -2263,7 +2264,7 @@ class CloudDLPListStoredInfoTypesOperator(GoogleCloudBaseOperator):
|
|
2263
2264
|
self,
|
2264
2265
|
*,
|
2265
2266
|
organization_id: str | None = None,
|
2266
|
-
project_id: str
|
2267
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2267
2268
|
page_size: int | None = None,
|
2268
2269
|
order_by: str | None = None,
|
2269
2270
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -2359,7 +2360,7 @@ class CloudDLPRedactImageOperator(GoogleCloudBaseOperator):
|
|
2359
2360
|
def __init__(
|
2360
2361
|
self,
|
2361
2362
|
*,
|
2362
|
-
project_id: str
|
2363
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2363
2364
|
inspect_config: dict | InspectConfig | None = None,
|
2364
2365
|
image_redaction_configs: None | (list[dict] | list[RedactImageRequest.ImageRedactionConfig]) = None,
|
2365
2366
|
include_findings: bool | None = None,
|
@@ -2453,7 +2454,7 @@ class CloudDLPReidentifyContentOperator(GoogleCloudBaseOperator):
|
|
2453
2454
|
def __init__(
|
2454
2455
|
self,
|
2455
2456
|
*,
|
2456
|
-
project_id: str
|
2457
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2457
2458
|
reidentify_config: dict | DeidentifyConfig | None = None,
|
2458
2459
|
inspect_config: dict | InspectConfig | None = None,
|
2459
2460
|
item: dict | ContentItem | None = None,
|
@@ -2548,7 +2549,7 @@ class CloudDLPUpdateDeidentifyTemplateOperator(GoogleCloudBaseOperator):
|
|
2548
2549
|
*,
|
2549
2550
|
template_id: str,
|
2550
2551
|
organization_id: str | None = None,
|
2551
|
-
project_id: str
|
2552
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2552
2553
|
deidentify_template: dict | DeidentifyTemplate | None = None,
|
2553
2554
|
update_mask: dict | FieldMask | None = None,
|
2554
2555
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -2648,7 +2649,7 @@ class CloudDLPUpdateInspectTemplateOperator(GoogleCloudBaseOperator):
|
|
2648
2649
|
*,
|
2649
2650
|
template_id: str,
|
2650
2651
|
organization_id: str | None = None,
|
2651
|
-
project_id: str
|
2652
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2652
2653
|
inspect_template: dict | InspectTemplate | None = None,
|
2653
2654
|
update_mask: dict | FieldMask | None = None,
|
2654
2655
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -2744,7 +2745,7 @@ class CloudDLPUpdateJobTriggerOperator(GoogleCloudBaseOperator):
|
|
2744
2745
|
self,
|
2745
2746
|
*,
|
2746
2747
|
job_trigger_id,
|
2747
|
-
project_id: str
|
2748
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2748
2749
|
job_trigger: dict | JobTrigger | None = None,
|
2749
2750
|
update_mask: dict | FieldMask | None = None,
|
2750
2751
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -2843,7 +2844,7 @@ class CloudDLPUpdateStoredInfoTypeOperator(GoogleCloudBaseOperator):
|
|
2843
2844
|
*,
|
2844
2845
|
stored_info_type_id,
|
2845
2846
|
organization_id: str | None = None,
|
2846
|
-
project_id: str
|
2847
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2847
2848
|
config: dict | StoredInfoTypeConfig | None = None,
|
2848
2849
|
update_mask: dict | FieldMask | None = None,
|
2849
2850
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -35,6 +35,7 @@ from airflow.providers.google.cloud.utils.field_validator import (
|
|
35
35
|
GcpBodyFieldValidator,
|
36
36
|
GcpFieldValidationException,
|
37
37
|
)
|
38
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
38
39
|
from airflow.version import version
|
39
40
|
|
40
41
|
if TYPE_CHECKING:
|
@@ -155,7 +156,7 @@ class CloudFunctionDeployFunctionOperator(GoogleCloudBaseOperator):
|
|
155
156
|
*,
|
156
157
|
location: str,
|
157
158
|
body: dict,
|
158
|
-
project_id: str
|
159
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
159
160
|
gcp_conn_id: str = "google_cloud_default",
|
160
161
|
api_version: str = "v1",
|
161
162
|
zip_path: str | None = None,
|
@@ -363,7 +364,7 @@ class CloudFunctionDeleteFunctionOperator(GoogleCloudBaseOperator):
|
|
363
364
|
gcp_conn_id: str = "google_cloud_default",
|
364
365
|
api_version: str = "v1",
|
365
366
|
impersonation_chain: str | Sequence[str] | None = None,
|
366
|
-
project_id: str
|
367
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
367
368
|
**kwargs,
|
368
369
|
) -> None:
|
369
370
|
self.name = name
|
@@ -447,7 +448,7 @@ class CloudFunctionInvokeFunctionOperator(GoogleCloudBaseOperator):
|
|
447
448
|
function_id: str,
|
448
449
|
input_data: dict,
|
449
450
|
location: str,
|
450
|
-
project_id: str
|
451
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
451
452
|
gcp_conn_id: str = "google_cloud_default",
|
452
453
|
api_version: str = "v1",
|
453
454
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -29,6 +29,8 @@ from typing import TYPE_CHECKING, Sequence
|
|
29
29
|
|
30
30
|
import pendulum
|
31
31
|
|
32
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
33
|
+
|
32
34
|
if TYPE_CHECKING:
|
33
35
|
from airflow.utils.context import Context
|
34
36
|
|
@@ -119,7 +121,7 @@ class GCSCreateBucketOperator(GoogleCloudBaseOperator):
|
|
119
121
|
resource: dict | None = None,
|
120
122
|
storage_class: str = "MULTI_REGIONAL",
|
121
123
|
location: str = "US",
|
122
|
-
project_id: str
|
124
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
123
125
|
labels: dict | None = None,
|
124
126
|
gcp_conn_id: str = "google_cloud_default",
|
125
127
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -297,7 +299,7 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
|
|
297
299
|
*,
|
298
300
|
bucket_name: str,
|
299
301
|
objects: list[str] | None = None,
|
300
|
-
prefix: str | None = None,
|
302
|
+
prefix: str | list[str] | None = None,
|
301
303
|
gcp_conn_id: str = "google_cloud_default",
|
302
304
|
impersonation_chain: str | Sequence[str] | None = None,
|
303
305
|
**kwargs,
|
@@ -309,12 +311,14 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
|
|
309
311
|
self.impersonation_chain = impersonation_chain
|
310
312
|
|
311
313
|
if objects is None and prefix is None:
|
312
|
-
err_message = "(Task {task_id}) Either
|
314
|
+
err_message = "(Task {task_id}) Either objects or prefix should be set. Both are None.".format(
|
313
315
|
**kwargs
|
314
316
|
)
|
315
317
|
raise ValueError(err_message)
|
318
|
+
if objects is not None and prefix is not None:
|
319
|
+
err_message = "(Task {task_id}) Objects or prefix should be set. Both provided.".format(**kwargs)
|
320
|
+
raise ValueError(err_message)
|
316
321
|
|
317
|
-
self._objects: list[str] = []
|
318
322
|
super().__init__(**kwargs)
|
319
323
|
|
320
324
|
def execute(self, context: Context) -> None:
|
@@ -324,15 +328,14 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
|
|
324
328
|
)
|
325
329
|
|
326
330
|
if self.objects is not None:
|
327
|
-
|
331
|
+
objects = self.objects
|
328
332
|
else:
|
329
|
-
|
330
|
-
self.log.info("Deleting %s objects from %s", len(
|
331
|
-
for object_name in
|
333
|
+
objects = hook.list(bucket_name=self.bucket_name, prefix=self.prefix)
|
334
|
+
self.log.info("Deleting %s objects from %s", len(objects), self.bucket_name)
|
335
|
+
for object_name in objects:
|
332
336
|
hook.delete(bucket_name=self.bucket_name, object_name=object_name)
|
333
337
|
|
334
|
-
def
|
335
|
-
"""Implement on_complete as execute() resolves object names."""
|
338
|
+
def get_openlineage_facets_on_start(self):
|
336
339
|
from openlineage.client.facet import (
|
337
340
|
LifecycleStateChange,
|
338
341
|
LifecycleStateChangeDatasetFacet,
|
@@ -342,8 +345,17 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
|
|
342
345
|
|
343
346
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
344
347
|
|
345
|
-
|
346
|
-
|
348
|
+
objects = []
|
349
|
+
if self.objects is not None:
|
350
|
+
objects = self.objects
|
351
|
+
elif self.prefix is not None:
|
352
|
+
prefixes = [self.prefix] if isinstance(self.prefix, str) else self.prefix
|
353
|
+
for pref in prefixes:
|
354
|
+
# Use parent if not a file (dot not in name) and not a dir (ends with slash)
|
355
|
+
if "." not in pref.split("/")[-1] and not pref.endswith("/"):
|
356
|
+
pref = Path(pref).parent.as_posix()
|
357
|
+
pref = "/" if pref in (".", "", "/") else pref.rstrip("/")
|
358
|
+
objects.append(pref)
|
347
359
|
|
348
360
|
bucket_url = f"gs://{self.bucket_name}"
|
349
361
|
input_datasets = [
|
@@ -360,7 +372,7 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
|
|
360
372
|
)
|
361
373
|
},
|
362
374
|
)
|
363
|
-
for object_name in
|
375
|
+
for object_name in objects
|
364
376
|
]
|
365
377
|
|
366
378
|
return OperatorLineage(inputs=input_datasets)
|
@@ -774,8 +786,8 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
|
|
774
786
|
self.upload_continue_on_fail = upload_continue_on_fail
|
775
787
|
self.upload_num_attempts = upload_num_attempts
|
776
788
|
|
777
|
-
self.
|
778
|
-
self.
|
789
|
+
self._source_prefix_interp: str | None = None
|
790
|
+
self._destination_prefix_interp: str | None = None
|
779
791
|
|
780
792
|
def execute(self, context: Context) -> list[str]:
|
781
793
|
# Define intervals and prefixes.
|
@@ -803,11 +815,11 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
|
|
803
815
|
timespan_start = timespan_start.in_timezone(timezone.utc)
|
804
816
|
timespan_end = timespan_end.in_timezone(timezone.utc)
|
805
817
|
|
806
|
-
|
818
|
+
self._source_prefix_interp = GCSTimeSpanFileTransformOperator.interpolate_prefix(
|
807
819
|
self.source_prefix,
|
808
820
|
timespan_start,
|
809
821
|
)
|
810
|
-
|
822
|
+
self._destination_prefix_interp = GCSTimeSpanFileTransformOperator.interpolate_prefix(
|
811
823
|
self.destination_prefix,
|
812
824
|
timespan_start,
|
813
825
|
)
|
@@ -828,9 +840,9 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
|
|
828
840
|
)
|
829
841
|
|
830
842
|
# Fetch list of files.
|
831
|
-
|
843
|
+
blobs_to_transform = source_hook.list_by_timespan(
|
832
844
|
bucket_name=self.source_bucket,
|
833
|
-
prefix=
|
845
|
+
prefix=self._source_prefix_interp,
|
834
846
|
timespan_start=timespan_start,
|
835
847
|
timespan_end=timespan_end,
|
836
848
|
)
|
@@ -840,7 +852,7 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
|
|
840
852
|
temp_output_dir_path = Path(temp_output_dir)
|
841
853
|
|
842
854
|
# TODO: download in parallel.
|
843
|
-
for blob_to_transform in
|
855
|
+
for blob_to_transform in blobs_to_transform:
|
844
856
|
destination_file = temp_input_dir_path / blob_to_transform
|
845
857
|
destination_file.parent.mkdir(parents=True, exist_ok=True)
|
846
858
|
try:
|
@@ -877,6 +889,8 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
|
|
877
889
|
|
878
890
|
self.log.info("Transformation succeeded. Output temporarily located at %s", temp_output_dir_path)
|
879
891
|
|
892
|
+
files_uploaded = []
|
893
|
+
|
880
894
|
# TODO: upload in parallel.
|
881
895
|
for upload_file in temp_output_dir_path.glob("**/*"):
|
882
896
|
if upload_file.is_dir():
|
@@ -884,8 +898,8 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
|
|
884
898
|
|
885
899
|
upload_file_name = str(upload_file.relative_to(temp_output_dir_path))
|
886
900
|
|
887
|
-
if self.
|
888
|
-
upload_file_name = f"{
|
901
|
+
if self._destination_prefix_interp is not None:
|
902
|
+
upload_file_name = f"{self._destination_prefix_interp.rstrip('/')}/{upload_file_name}"
|
889
903
|
|
890
904
|
self.log.info("Uploading file %s to %s", upload_file, upload_file_name)
|
891
905
|
|
@@ -897,35 +911,46 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
|
|
897
911
|
chunk_size=self.chunk_size,
|
898
912
|
num_max_attempts=self.upload_num_attempts,
|
899
913
|
)
|
900
|
-
|
914
|
+
files_uploaded.append(str(upload_file_name))
|
901
915
|
except GoogleCloudError:
|
902
916
|
if not self.upload_continue_on_fail:
|
903
917
|
raise
|
904
918
|
|
905
|
-
return
|
919
|
+
return files_uploaded
|
906
920
|
|
907
921
|
def get_openlineage_facets_on_complete(self, task_instance):
|
908
|
-
"""Implement on_complete as execute() resolves object
|
922
|
+
"""Implement on_complete as execute() resolves object prefixes."""
|
909
923
|
from openlineage.client.run import Dataset
|
910
924
|
|
911
925
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
912
926
|
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
)
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
)
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
927
|
+
def _parse_prefix(pref):
|
928
|
+
# Use parent if not a file (dot not in name) and not a dir (ends with slash)
|
929
|
+
if "." not in pref.split("/")[-1] and not pref.endswith("/"):
|
930
|
+
pref = Path(pref).parent.as_posix()
|
931
|
+
return "/" if pref in (".", "/", "") else pref.rstrip("/")
|
932
|
+
|
933
|
+
input_prefix, output_prefix = "/", "/"
|
934
|
+
if self._source_prefix_interp is not None:
|
935
|
+
input_prefix = _parse_prefix(self._source_prefix_interp)
|
936
|
+
|
937
|
+
if self._destination_prefix_interp is not None:
|
938
|
+
output_prefix = _parse_prefix(self._destination_prefix_interp)
|
939
|
+
|
940
|
+
return OperatorLineage(
|
941
|
+
inputs=[
|
942
|
+
Dataset(
|
943
|
+
namespace=f"gs://{self.source_bucket}",
|
944
|
+
name=input_prefix,
|
945
|
+
)
|
946
|
+
],
|
947
|
+
outputs=[
|
948
|
+
Dataset(
|
949
|
+
namespace=f"gs://{self.destination_bucket}",
|
950
|
+
name=output_prefix,
|
951
|
+
)
|
952
|
+
],
|
953
|
+
)
|
929
954
|
|
930
955
|
|
931
956
|
class GCSDeleteBucketOperator(GoogleCloudBaseOperator):
|