apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. airflow/providers/google/__init__.py +5 -8
  2. airflow/providers/google/cloud/hooks/automl.py +35 -1
  3. airflow/providers/google/cloud/hooks/bigquery.py +126 -41
  4. airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
  5. airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
  6. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
  7. airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
  8. airflow/providers/google/cloud/hooks/dataflow.py +246 -32
  9. airflow/providers/google/cloud/hooks/dataplex.py +6 -2
  10. airflow/providers/google/cloud/hooks/dlp.py +14 -14
  11. airflow/providers/google/cloud/hooks/gcs.py +6 -2
  12. airflow/providers/google/cloud/hooks/gdm.py +2 -2
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
  14. airflow/providers/google/cloud/hooks/mlengine.py +8 -4
  15. airflow/providers/google/cloud/hooks/pubsub.py +1 -1
  16. airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
  17. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
  18. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +91 -0
  19. airflow/providers/google/cloud/links/vertex_ai.py +2 -1
  20. airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
  21. airflow/providers/google/cloud/operators/automl.py +243 -37
  22. airflow/providers/google/cloud/operators/bigquery.py +164 -62
  23. airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
  24. airflow/providers/google/cloud/operators/bigtable.py +7 -6
  25. airflow/providers/google/cloud/operators/cloud_build.py +12 -11
  26. airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
  27. airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
  28. airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
  29. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
  30. airflow/providers/google/cloud/operators/compute.py +12 -11
  31. airflow/providers/google/cloud/operators/datacatalog.py +21 -20
  32. airflow/providers/google/cloud/operators/dataflow.py +59 -42
  33. airflow/providers/google/cloud/operators/datafusion.py +11 -10
  34. airflow/providers/google/cloud/operators/datapipeline.py +3 -2
  35. airflow/providers/google/cloud/operators/dataprep.py +5 -4
  36. airflow/providers/google/cloud/operators/dataproc.py +20 -17
  37. airflow/providers/google/cloud/operators/datastore.py +8 -7
  38. airflow/providers/google/cloud/operators/dlp.py +31 -30
  39. airflow/providers/google/cloud/operators/functions.py +4 -3
  40. airflow/providers/google/cloud/operators/gcs.py +66 -41
  41. airflow/providers/google/cloud/operators/kubernetes_engine.py +256 -49
  42. airflow/providers/google/cloud/operators/life_sciences.py +2 -1
  43. airflow/providers/google/cloud/operators/mlengine.py +11 -10
  44. airflow/providers/google/cloud/operators/pubsub.py +6 -5
  45. airflow/providers/google/cloud/operators/spanner.py +7 -6
  46. airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
  47. airflow/providers/google/cloud/operators/stackdriver.py +11 -10
  48. airflow/providers/google/cloud/operators/tasks.py +14 -13
  49. airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
  50. airflow/providers/google/cloud/operators/translate_speech.py +2 -1
  51. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
  52. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
  53. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
  54. airflow/providers/google/cloud/operators/vision.py +13 -12
  55. airflow/providers/google/cloud/operators/workflows.py +12 -14
  56. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  57. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
  58. airflow/providers/google/cloud/sensors/bigtable.py +2 -1
  59. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
  60. airflow/providers/google/cloud/sensors/dataflow.py +239 -52
  61. airflow/providers/google/cloud/sensors/datafusion.py +2 -1
  62. airflow/providers/google/cloud/sensors/dataproc.py +3 -2
  63. airflow/providers/google/cloud/sensors/gcs.py +14 -12
  64. airflow/providers/google/cloud/sensors/tasks.py +2 -1
  65. airflow/providers/google/cloud/sensors/workflows.py +2 -1
  66. airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
  67. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
  68. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
  69. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  70. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
  71. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
  72. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
  73. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
  74. airflow/providers/google/cloud/triggers/bigquery.py +75 -6
  75. airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
  76. airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
  77. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
  78. airflow/providers/google/cloud/triggers/dataflow.py +504 -4
  79. airflow/providers/google/cloud/triggers/dataproc.py +190 -27
  80. airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -3
  81. airflow/providers/google/cloud/triggers/mlengine.py +2 -1
  82. airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
  83. airflow/providers/google/common/hooks/base_google.py +45 -7
  84. airflow/providers/google/firebase/hooks/firestore.py +2 -2
  85. airflow/providers/google/firebase/operators/firestore.py +2 -1
  86. airflow/providers/google/get_provider_info.py +5 -3
  87. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/METADATA +18 -18
  88. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/RECORD +90 -90
  89. airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
  90. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/WHEEL +0 -0
  91. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/entry_points.txt +0 -0
@@ -59,6 +59,7 @@ from airflow.providers.google.cloud.links.data_loss_prevention import (
59
59
  CloudDLPPossibleInfoTypesListLink,
60
60
  )
61
61
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
62
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
62
63
 
63
64
  if TYPE_CHECKING:
64
65
  from google.api_core.retry import Retry
@@ -108,7 +109,7 @@ class CloudDLPCancelDLPJobOperator(GoogleCloudBaseOperator):
108
109
  self,
109
110
  *,
110
111
  dlp_job_id: str,
111
- project_id: str | None = None,
112
+ project_id: str = PROVIDE_PROJECT_ID,
112
113
  retry: Retry | _MethodDefault = DEFAULT,
113
114
  timeout: float | None = None,
114
115
  metadata: Sequence[tuple[str, str]] = (),
@@ -195,7 +196,7 @@ class CloudDLPCreateDeidentifyTemplateOperator(GoogleCloudBaseOperator):
195
196
  self,
196
197
  *,
197
198
  organization_id: str | None = None,
198
- project_id: str | None = None,
199
+ project_id: str = PROVIDE_PROJECT_ID,
199
200
  deidentify_template: dict | DeidentifyTemplate | None = None,
200
201
  template_id: str | None = None,
201
202
  retry: Retry | _MethodDefault = DEFAULT,
@@ -304,7 +305,7 @@ class CloudDLPCreateDLPJobOperator(GoogleCloudBaseOperator):
304
305
  def __init__(
305
306
  self,
306
307
  *,
307
- project_id: str | None = None,
308
+ project_id: str = PROVIDE_PROJECT_ID,
308
309
  inspect_job: dict | InspectJobConfig | None = None,
309
310
  risk_job: dict | RiskAnalysisJobConfig | None = None,
310
311
  job_id: str | None = None,
@@ -416,7 +417,7 @@ class CloudDLPCreateInspectTemplateOperator(GoogleCloudBaseOperator):
416
417
  self,
417
418
  *,
418
419
  organization_id: str | None = None,
419
- project_id: str | None = None,
420
+ project_id: str = PROVIDE_PROJECT_ID,
420
421
  inspect_template: InspectTemplate | None = None,
421
422
  template_id: str | None = None,
422
423
  retry: Retry | _MethodDefault = DEFAULT,
@@ -522,7 +523,7 @@ class CloudDLPCreateJobTriggerOperator(GoogleCloudBaseOperator):
522
523
  def __init__(
523
524
  self,
524
525
  *,
525
- project_id: str | None = None,
526
+ project_id: str = PROVIDE_PROJECT_ID,
526
527
  job_trigger: dict | JobTrigger | None = None,
527
528
  trigger_id: str | None = None,
528
529
  retry: Retry | _MethodDefault = DEFAULT,
@@ -631,7 +632,7 @@ class CloudDLPCreateStoredInfoTypeOperator(GoogleCloudBaseOperator):
631
632
  self,
632
633
  *,
633
634
  organization_id: str | None = None,
634
- project_id: str | None = None,
635
+ project_id: str = PROVIDE_PROJECT_ID,
635
636
  config: StoredInfoTypeConfig | None = None,
636
637
  stored_info_type_id: str | None = None,
637
638
  retry: Retry | _MethodDefault = DEFAULT,
@@ -752,7 +753,7 @@ class CloudDLPDeidentifyContentOperator(GoogleCloudBaseOperator):
752
753
  def __init__(
753
754
  self,
754
755
  *,
755
- project_id: str | None = None,
756
+ project_id: str = PROVIDE_PROJECT_ID,
756
757
  deidentify_config: dict | DeidentifyConfig | None = None,
757
758
  inspect_config: dict | InspectConfig | None = None,
758
759
  item: dict | ContentItem | None = None,
@@ -842,7 +843,7 @@ class CloudDLPDeleteDeidentifyTemplateOperator(GoogleCloudBaseOperator):
842
843
  *,
843
844
  template_id: str,
844
845
  organization_id: str | None = None,
845
- project_id: str | None = None,
846
+ project_id: str = PROVIDE_PROJECT_ID,
846
847
  retry: Retry | _MethodDefault = DEFAULT,
847
848
  timeout: float | None = None,
848
849
  metadata: Sequence[tuple[str, str]] = (),
@@ -929,7 +930,7 @@ class CloudDLPDeleteDLPJobOperator(GoogleCloudBaseOperator):
929
930
  self,
930
931
  *,
931
932
  dlp_job_id: str,
932
- project_id: str | None = None,
933
+ project_id: str = PROVIDE_PROJECT_ID,
933
934
  retry: Retry | _MethodDefault = DEFAULT,
934
935
  timeout: float | None = None,
935
936
  metadata: Sequence[tuple[str, str]] = (),
@@ -1017,7 +1018,7 @@ class CloudDLPDeleteInspectTemplateOperator(GoogleCloudBaseOperator):
1017
1018
  *,
1018
1019
  template_id: str,
1019
1020
  organization_id: str | None = None,
1020
- project_id: str | None = None,
1021
+ project_id: str = PROVIDE_PROJECT_ID,
1021
1022
  retry: Retry | _MethodDefault = DEFAULT,
1022
1023
  timeout: float | None = None,
1023
1024
  metadata: Sequence[tuple[str, str]] = (),
@@ -1103,7 +1104,7 @@ class CloudDLPDeleteJobTriggerOperator(GoogleCloudBaseOperator):
1103
1104
  self,
1104
1105
  *,
1105
1106
  job_trigger_id: str,
1106
- project_id: str | None = None,
1107
+ project_id: str = PROVIDE_PROJECT_ID,
1107
1108
  retry: Retry | _MethodDefault = DEFAULT,
1108
1109
  timeout: float | None = None,
1109
1110
  metadata: Sequence[tuple[str, str]] = (),
@@ -1191,7 +1192,7 @@ class CloudDLPDeleteStoredInfoTypeOperator(GoogleCloudBaseOperator):
1191
1192
  *,
1192
1193
  stored_info_type_id: str,
1193
1194
  organization_id: str | None = None,
1194
- project_id: str | None = None,
1195
+ project_id: str = PROVIDE_PROJECT_ID,
1195
1196
  retry: Retry | _MethodDefault = DEFAULT,
1196
1197
  timeout: float | None = None,
1197
1198
  metadata: Sequence[tuple[str, str]] = (),
@@ -1281,7 +1282,7 @@ class CloudDLPGetDeidentifyTemplateOperator(GoogleCloudBaseOperator):
1281
1282
  *,
1282
1283
  template_id: str,
1283
1284
  organization_id: str | None = None,
1284
- project_id: str | None = None,
1285
+ project_id: str = PROVIDE_PROJECT_ID,
1285
1286
  retry: Retry | _MethodDefault = DEFAULT,
1286
1287
  timeout: float | None = None,
1287
1288
  metadata: Sequence[tuple[str, str]] = (),
@@ -1364,7 +1365,7 @@ class CloudDLPGetDLPJobOperator(GoogleCloudBaseOperator):
1364
1365
  self,
1365
1366
  *,
1366
1367
  dlp_job_id: str,
1367
- project_id: str | None = None,
1368
+ project_id: str = PROVIDE_PROJECT_ID,
1368
1369
  retry: Retry | _MethodDefault = DEFAULT,
1369
1370
  timeout: float | None = None,
1370
1371
  metadata: Sequence[tuple[str, str]] = (),
@@ -1452,7 +1453,7 @@ class CloudDLPGetInspectTemplateOperator(GoogleCloudBaseOperator):
1452
1453
  *,
1453
1454
  template_id: str,
1454
1455
  organization_id: str | None = None,
1455
- project_id: str | None = None,
1456
+ project_id: str = PROVIDE_PROJECT_ID,
1456
1457
  retry: Retry | _MethodDefault = DEFAULT,
1457
1458
  timeout: float | None = None,
1458
1459
  metadata: Sequence[tuple[str, str]] = (),
@@ -1538,7 +1539,7 @@ class CloudDLPGetDLPJobTriggerOperator(GoogleCloudBaseOperator):
1538
1539
  self,
1539
1540
  *,
1540
1541
  job_trigger_id: str,
1541
- project_id: str | None = None,
1542
+ project_id: str = PROVIDE_PROJECT_ID,
1542
1543
  retry: Retry | _MethodDefault = DEFAULT,
1543
1544
  timeout: float | None = None,
1544
1545
  metadata: Sequence[tuple[str, str]] = (),
@@ -1626,7 +1627,7 @@ class CloudDLPGetStoredInfoTypeOperator(GoogleCloudBaseOperator):
1626
1627
  *,
1627
1628
  stored_info_type_id: str,
1628
1629
  organization_id: str | None = None,
1629
- project_id: str | None = None,
1630
+ project_id: str = PROVIDE_PROJECT_ID,
1630
1631
  retry: Retry | _MethodDefault = DEFAULT,
1631
1632
  timeout: float | None = None,
1632
1633
  metadata: Sequence[tuple[str, str]] = (),
@@ -1716,7 +1717,7 @@ class CloudDLPInspectContentOperator(GoogleCloudBaseOperator):
1716
1717
  def __init__(
1717
1718
  self,
1718
1719
  *,
1719
- project_id: str | None = None,
1720
+ project_id: str = PROVIDE_PROJECT_ID,
1720
1721
  inspect_config: dict | InspectConfig | None = None,
1721
1722
  item: dict | ContentItem | None = None,
1722
1723
  inspect_template_name: str | None = None,
@@ -1802,7 +1803,7 @@ class CloudDLPListDeidentifyTemplatesOperator(GoogleCloudBaseOperator):
1802
1803
  self,
1803
1804
  *,
1804
1805
  organization_id: str | None = None,
1805
- project_id: str | None = None,
1806
+ project_id: str = PROVIDE_PROJECT_ID,
1806
1807
  page_size: int | None = None,
1807
1808
  order_by: str | None = None,
1808
1809
  retry: Retry | _MethodDefault = DEFAULT,
@@ -1894,7 +1895,7 @@ class CloudDLPListDLPJobsOperator(GoogleCloudBaseOperator):
1894
1895
  def __init__(
1895
1896
  self,
1896
1897
  *,
1897
- project_id: str | None = None,
1898
+ project_id: str = PROVIDE_PROJECT_ID,
1898
1899
  results_filter: str | None = None,
1899
1900
  page_size: int | None = None,
1900
1901
  job_type: str | None = None,
@@ -1986,7 +1987,7 @@ class CloudDLPListInfoTypesOperator(GoogleCloudBaseOperator):
1986
1987
  def __init__(
1987
1988
  self,
1988
1989
  *,
1989
- project_id: str | None = None,
1990
+ project_id: str = PROVIDE_PROJECT_ID,
1990
1991
  language_code: str | None = None,
1991
1992
  results_filter: str | None = None,
1992
1993
  retry: Retry | _MethodDefault = DEFAULT,
@@ -2077,7 +2078,7 @@ class CloudDLPListInspectTemplatesOperator(GoogleCloudBaseOperator):
2077
2078
  self,
2078
2079
  *,
2079
2080
  organization_id: str | None = None,
2080
- project_id: str | None = None,
2081
+ project_id: str = PROVIDE_PROJECT_ID,
2081
2082
  page_size: int | None = None,
2082
2083
  order_by: str | None = None,
2083
2084
  retry: Retry | _MethodDefault = DEFAULT,
@@ -2168,7 +2169,7 @@ class CloudDLPListJobTriggersOperator(GoogleCloudBaseOperator):
2168
2169
  def __init__(
2169
2170
  self,
2170
2171
  *,
2171
- project_id: str | None = None,
2172
+ project_id: str = PROVIDE_PROJECT_ID,
2172
2173
  page_size: int | None = None,
2173
2174
  order_by: str | None = None,
2174
2175
  results_filter: str | None = None,
@@ -2263,7 +2264,7 @@ class CloudDLPListStoredInfoTypesOperator(GoogleCloudBaseOperator):
2263
2264
  self,
2264
2265
  *,
2265
2266
  organization_id: str | None = None,
2266
- project_id: str | None = None,
2267
+ project_id: str = PROVIDE_PROJECT_ID,
2267
2268
  page_size: int | None = None,
2268
2269
  order_by: str | None = None,
2269
2270
  retry: Retry | _MethodDefault = DEFAULT,
@@ -2359,7 +2360,7 @@ class CloudDLPRedactImageOperator(GoogleCloudBaseOperator):
2359
2360
  def __init__(
2360
2361
  self,
2361
2362
  *,
2362
- project_id: str | None = None,
2363
+ project_id: str = PROVIDE_PROJECT_ID,
2363
2364
  inspect_config: dict | InspectConfig | None = None,
2364
2365
  image_redaction_configs: None | (list[dict] | list[RedactImageRequest.ImageRedactionConfig]) = None,
2365
2366
  include_findings: bool | None = None,
@@ -2453,7 +2454,7 @@ class CloudDLPReidentifyContentOperator(GoogleCloudBaseOperator):
2453
2454
  def __init__(
2454
2455
  self,
2455
2456
  *,
2456
- project_id: str | None = None,
2457
+ project_id: str = PROVIDE_PROJECT_ID,
2457
2458
  reidentify_config: dict | DeidentifyConfig | None = None,
2458
2459
  inspect_config: dict | InspectConfig | None = None,
2459
2460
  item: dict | ContentItem | None = None,
@@ -2548,7 +2549,7 @@ class CloudDLPUpdateDeidentifyTemplateOperator(GoogleCloudBaseOperator):
2548
2549
  *,
2549
2550
  template_id: str,
2550
2551
  organization_id: str | None = None,
2551
- project_id: str | None = None,
2552
+ project_id: str = PROVIDE_PROJECT_ID,
2552
2553
  deidentify_template: dict | DeidentifyTemplate | None = None,
2553
2554
  update_mask: dict | FieldMask | None = None,
2554
2555
  retry: Retry | _MethodDefault = DEFAULT,
@@ -2648,7 +2649,7 @@ class CloudDLPUpdateInspectTemplateOperator(GoogleCloudBaseOperator):
2648
2649
  *,
2649
2650
  template_id: str,
2650
2651
  organization_id: str | None = None,
2651
- project_id: str | None = None,
2652
+ project_id: str = PROVIDE_PROJECT_ID,
2652
2653
  inspect_template: dict | InspectTemplate | None = None,
2653
2654
  update_mask: dict | FieldMask | None = None,
2654
2655
  retry: Retry | _MethodDefault = DEFAULT,
@@ -2744,7 +2745,7 @@ class CloudDLPUpdateJobTriggerOperator(GoogleCloudBaseOperator):
2744
2745
  self,
2745
2746
  *,
2746
2747
  job_trigger_id,
2747
- project_id: str | None = None,
2748
+ project_id: str = PROVIDE_PROJECT_ID,
2748
2749
  job_trigger: dict | JobTrigger | None = None,
2749
2750
  update_mask: dict | FieldMask | None = None,
2750
2751
  retry: Retry | _MethodDefault = DEFAULT,
@@ -2843,7 +2844,7 @@ class CloudDLPUpdateStoredInfoTypeOperator(GoogleCloudBaseOperator):
2843
2844
  *,
2844
2845
  stored_info_type_id,
2845
2846
  organization_id: str | None = None,
2846
- project_id: str | None = None,
2847
+ project_id: str = PROVIDE_PROJECT_ID,
2847
2848
  config: dict | StoredInfoTypeConfig | None = None,
2848
2849
  update_mask: dict | FieldMask | None = None,
2849
2850
  retry: Retry | _MethodDefault = DEFAULT,
@@ -35,6 +35,7 @@ from airflow.providers.google.cloud.utils.field_validator import (
35
35
  GcpBodyFieldValidator,
36
36
  GcpFieldValidationException,
37
37
  )
38
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
38
39
  from airflow.version import version
39
40
 
40
41
  if TYPE_CHECKING:
@@ -155,7 +156,7 @@ class CloudFunctionDeployFunctionOperator(GoogleCloudBaseOperator):
155
156
  *,
156
157
  location: str,
157
158
  body: dict,
158
- project_id: str | None = None,
159
+ project_id: str = PROVIDE_PROJECT_ID,
159
160
  gcp_conn_id: str = "google_cloud_default",
160
161
  api_version: str = "v1",
161
162
  zip_path: str | None = None,
@@ -363,7 +364,7 @@ class CloudFunctionDeleteFunctionOperator(GoogleCloudBaseOperator):
363
364
  gcp_conn_id: str = "google_cloud_default",
364
365
  api_version: str = "v1",
365
366
  impersonation_chain: str | Sequence[str] | None = None,
366
- project_id: str | None = None,
367
+ project_id: str = PROVIDE_PROJECT_ID,
367
368
  **kwargs,
368
369
  ) -> None:
369
370
  self.name = name
@@ -447,7 +448,7 @@ class CloudFunctionInvokeFunctionOperator(GoogleCloudBaseOperator):
447
448
  function_id: str,
448
449
  input_data: dict,
449
450
  location: str,
450
- project_id: str | None = None,
451
+ project_id: str = PROVIDE_PROJECT_ID,
451
452
  gcp_conn_id: str = "google_cloud_default",
452
453
  api_version: str = "v1",
453
454
  impersonation_chain: str | Sequence[str] | None = None,
@@ -29,6 +29,8 @@ from typing import TYPE_CHECKING, Sequence
29
29
 
30
30
  import pendulum
31
31
 
32
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
33
+
32
34
  if TYPE_CHECKING:
33
35
  from airflow.utils.context import Context
34
36
 
@@ -119,7 +121,7 @@ class GCSCreateBucketOperator(GoogleCloudBaseOperator):
119
121
  resource: dict | None = None,
120
122
  storage_class: str = "MULTI_REGIONAL",
121
123
  location: str = "US",
122
- project_id: str | None = None,
124
+ project_id: str = PROVIDE_PROJECT_ID,
123
125
  labels: dict | None = None,
124
126
  gcp_conn_id: str = "google_cloud_default",
125
127
  impersonation_chain: str | Sequence[str] | None = None,
@@ -297,7 +299,7 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
297
299
  *,
298
300
  bucket_name: str,
299
301
  objects: list[str] | None = None,
300
- prefix: str | None = None,
302
+ prefix: str | list[str] | None = None,
301
303
  gcp_conn_id: str = "google_cloud_default",
302
304
  impersonation_chain: str | Sequence[str] | None = None,
303
305
  **kwargs,
@@ -309,12 +311,14 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
309
311
  self.impersonation_chain = impersonation_chain
310
312
 
311
313
  if objects is None and prefix is None:
312
- err_message = "(Task {task_id}) Either object or prefix should be set. Both are None.".format(
314
+ err_message = "(Task {task_id}) Either objects or prefix should be set. Both are None.".format(
313
315
  **kwargs
314
316
  )
315
317
  raise ValueError(err_message)
318
+ if objects is not None and prefix is not None:
319
+ err_message = "(Task {task_id}) Objects or prefix should be set. Both provided.".format(**kwargs)
320
+ raise ValueError(err_message)
316
321
 
317
- self._objects: list[str] = []
318
322
  super().__init__(**kwargs)
319
323
 
320
324
  def execute(self, context: Context) -> None:
@@ -324,15 +328,14 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
324
328
  )
325
329
 
326
330
  if self.objects is not None:
327
- self._objects = self.objects
331
+ objects = self.objects
328
332
  else:
329
- self._objects = hook.list(bucket_name=self.bucket_name, prefix=self.prefix)
330
- self.log.info("Deleting %s objects from %s", len(self._objects), self.bucket_name)
331
- for object_name in self._objects:
333
+ objects = hook.list(bucket_name=self.bucket_name, prefix=self.prefix)
334
+ self.log.info("Deleting %s objects from %s", len(objects), self.bucket_name)
335
+ for object_name in objects:
332
336
  hook.delete(bucket_name=self.bucket_name, object_name=object_name)
333
337
 
334
- def get_openlineage_facets_on_complete(self, task_instance):
335
- """Implement on_complete as execute() resolves object names."""
338
+ def get_openlineage_facets_on_start(self):
336
339
  from openlineage.client.facet import (
337
340
  LifecycleStateChange,
338
341
  LifecycleStateChangeDatasetFacet,
@@ -342,8 +345,17 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
342
345
 
343
346
  from airflow.providers.openlineage.extractors import OperatorLineage
344
347
 
345
- if not self._objects:
346
- return OperatorLineage()
348
+ objects = []
349
+ if self.objects is not None:
350
+ objects = self.objects
351
+ elif self.prefix is not None:
352
+ prefixes = [self.prefix] if isinstance(self.prefix, str) else self.prefix
353
+ for pref in prefixes:
354
+ # Use parent if not a file (dot not in name) and not a dir (ends with slash)
355
+ if "." not in pref.split("/")[-1] and not pref.endswith("/"):
356
+ pref = Path(pref).parent.as_posix()
357
+ pref = "/" if pref in (".", "", "/") else pref.rstrip("/")
358
+ objects.append(pref)
347
359
 
348
360
  bucket_url = f"gs://{self.bucket_name}"
349
361
  input_datasets = [
@@ -360,7 +372,7 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
360
372
  )
361
373
  },
362
374
  )
363
- for object_name in self._objects
375
+ for object_name in objects
364
376
  ]
365
377
 
366
378
  return OperatorLineage(inputs=input_datasets)
@@ -774,8 +786,8 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
774
786
  self.upload_continue_on_fail = upload_continue_on_fail
775
787
  self.upload_num_attempts = upload_num_attempts
776
788
 
777
- self._source_object_names: list[str] = []
778
- self._destination_object_names: list[str] = []
789
+ self._source_prefix_interp: str | None = None
790
+ self._destination_prefix_interp: str | None = None
779
791
 
780
792
  def execute(self, context: Context) -> list[str]:
781
793
  # Define intervals and prefixes.
@@ -803,11 +815,11 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
803
815
  timespan_start = timespan_start.in_timezone(timezone.utc)
804
816
  timespan_end = timespan_end.in_timezone(timezone.utc)
805
817
 
806
- source_prefix_interp = GCSTimeSpanFileTransformOperator.interpolate_prefix(
818
+ self._source_prefix_interp = GCSTimeSpanFileTransformOperator.interpolate_prefix(
807
819
  self.source_prefix,
808
820
  timespan_start,
809
821
  )
810
- destination_prefix_interp = GCSTimeSpanFileTransformOperator.interpolate_prefix(
822
+ self._destination_prefix_interp = GCSTimeSpanFileTransformOperator.interpolate_prefix(
811
823
  self.destination_prefix,
812
824
  timespan_start,
813
825
  )
@@ -828,9 +840,9 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
828
840
  )
829
841
 
830
842
  # Fetch list of files.
831
- self._source_object_names = source_hook.list_by_timespan(
843
+ blobs_to_transform = source_hook.list_by_timespan(
832
844
  bucket_name=self.source_bucket,
833
- prefix=source_prefix_interp,
845
+ prefix=self._source_prefix_interp,
834
846
  timespan_start=timespan_start,
835
847
  timespan_end=timespan_end,
836
848
  )
@@ -840,7 +852,7 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
840
852
  temp_output_dir_path = Path(temp_output_dir)
841
853
 
842
854
  # TODO: download in parallel.
843
- for blob_to_transform in self._source_object_names:
855
+ for blob_to_transform in blobs_to_transform:
844
856
  destination_file = temp_input_dir_path / blob_to_transform
845
857
  destination_file.parent.mkdir(parents=True, exist_ok=True)
846
858
  try:
@@ -877,6 +889,8 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
877
889
 
878
890
  self.log.info("Transformation succeeded. Output temporarily located at %s", temp_output_dir_path)
879
891
 
892
+ files_uploaded = []
893
+
880
894
  # TODO: upload in parallel.
881
895
  for upload_file in temp_output_dir_path.glob("**/*"):
882
896
  if upload_file.is_dir():
@@ -884,8 +898,8 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
884
898
 
885
899
  upload_file_name = str(upload_file.relative_to(temp_output_dir_path))
886
900
 
887
- if self.destination_prefix is not None:
888
- upload_file_name = f"{destination_prefix_interp}/{upload_file_name}"
901
+ if self._destination_prefix_interp is not None:
902
+ upload_file_name = f"{self._destination_prefix_interp.rstrip('/')}/{upload_file_name}"
889
903
 
890
904
  self.log.info("Uploading file %s to %s", upload_file, upload_file_name)
891
905
 
@@ -897,35 +911,46 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
897
911
  chunk_size=self.chunk_size,
898
912
  num_max_attempts=self.upload_num_attempts,
899
913
  )
900
- self._destination_object_names.append(str(upload_file_name))
914
+ files_uploaded.append(str(upload_file_name))
901
915
  except GoogleCloudError:
902
916
  if not self.upload_continue_on_fail:
903
917
  raise
904
918
 
905
- return self._destination_object_names
919
+ return files_uploaded
906
920
 
907
921
  def get_openlineage_facets_on_complete(self, task_instance):
908
- """Implement on_complete as execute() resolves object names."""
922
+ """Implement on_complete as execute() resolves object prefixes."""
909
923
  from openlineage.client.run import Dataset
910
924
 
911
925
  from airflow.providers.openlineage.extractors import OperatorLineage
912
926
 
913
- input_datasets = [
914
- Dataset(
915
- namespace=f"gs://{self.source_bucket}",
916
- name=object_name,
917
- )
918
- for object_name in self._source_object_names
919
- ]
920
- output_datasets = [
921
- Dataset(
922
- namespace=f"gs://{self.destination_bucket}",
923
- name=object_name,
924
- )
925
- for object_name in self._destination_object_names
926
- ]
927
-
928
- return OperatorLineage(inputs=input_datasets, outputs=output_datasets)
927
+ def _parse_prefix(pref):
928
+ # Use parent if not a file (dot not in name) and not a dir (ends with slash)
929
+ if "." not in pref.split("/")[-1] and not pref.endswith("/"):
930
+ pref = Path(pref).parent.as_posix()
931
+ return "/" if pref in (".", "/", "") else pref.rstrip("/")
932
+
933
+ input_prefix, output_prefix = "/", "/"
934
+ if self._source_prefix_interp is not None:
935
+ input_prefix = _parse_prefix(self._source_prefix_interp)
936
+
937
+ if self._destination_prefix_interp is not None:
938
+ output_prefix = _parse_prefix(self._destination_prefix_interp)
939
+
940
+ return OperatorLineage(
941
+ inputs=[
942
+ Dataset(
943
+ namespace=f"gs://{self.source_bucket}",
944
+ name=input_prefix,
945
+ )
946
+ ],
947
+ outputs=[
948
+ Dataset(
949
+ namespace=f"gs://{self.destination_bucket}",
950
+ name=output_prefix,
951
+ )
952
+ ],
953
+ )
929
954
 
930
955
 
931
956
  class GCSDeleteBucketOperator(GoogleCloudBaseOperator):