apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. airflow/providers/google/__init__.py +3 -3
  2. airflow/providers/google/cloud/hooks/automl.py +1 -1
  3. airflow/providers/google/cloud/hooks/bigquery.py +64 -33
  4. airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
  5. airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
  6. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
  7. airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
  8. airflow/providers/google/cloud/hooks/dataflow.py +246 -32
  9. airflow/providers/google/cloud/hooks/dataplex.py +6 -2
  10. airflow/providers/google/cloud/hooks/dlp.py +14 -14
  11. airflow/providers/google/cloud/hooks/gcs.py +6 -2
  12. airflow/providers/google/cloud/hooks/gdm.py +2 -2
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
  14. airflow/providers/google/cloud/hooks/mlengine.py +8 -4
  15. airflow/providers/google/cloud/hooks/pubsub.py +1 -1
  16. airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
  17. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
  18. airflow/providers/google/cloud/links/vertex_ai.py +2 -1
  19. airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
  20. airflow/providers/google/cloud/operators/automl.py +13 -12
  21. airflow/providers/google/cloud/operators/bigquery.py +36 -22
  22. airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
  23. airflow/providers/google/cloud/operators/bigtable.py +7 -6
  24. airflow/providers/google/cloud/operators/cloud_build.py +12 -11
  25. airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
  26. airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
  27. airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
  28. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
  29. airflow/providers/google/cloud/operators/compute.py +12 -11
  30. airflow/providers/google/cloud/operators/datacatalog.py +21 -20
  31. airflow/providers/google/cloud/operators/dataflow.py +59 -42
  32. airflow/providers/google/cloud/operators/datafusion.py +11 -10
  33. airflow/providers/google/cloud/operators/datapipeline.py +3 -2
  34. airflow/providers/google/cloud/operators/dataprep.py +5 -4
  35. airflow/providers/google/cloud/operators/dataproc.py +19 -16
  36. airflow/providers/google/cloud/operators/datastore.py +8 -7
  37. airflow/providers/google/cloud/operators/dlp.py +31 -30
  38. airflow/providers/google/cloud/operators/functions.py +4 -3
  39. airflow/providers/google/cloud/operators/gcs.py +66 -41
  40. airflow/providers/google/cloud/operators/kubernetes_engine.py +232 -12
  41. airflow/providers/google/cloud/operators/life_sciences.py +2 -1
  42. airflow/providers/google/cloud/operators/mlengine.py +11 -10
  43. airflow/providers/google/cloud/operators/pubsub.py +6 -5
  44. airflow/providers/google/cloud/operators/spanner.py +7 -6
  45. airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
  46. airflow/providers/google/cloud/operators/stackdriver.py +11 -10
  47. airflow/providers/google/cloud/operators/tasks.py +14 -13
  48. airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
  49. airflow/providers/google/cloud/operators/translate_speech.py +2 -1
  50. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
  51. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
  52. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
  53. airflow/providers/google/cloud/operators/vision.py +13 -12
  54. airflow/providers/google/cloud/operators/workflows.py +10 -9
  55. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  56. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
  57. airflow/providers/google/cloud/sensors/bigtable.py +2 -1
  58. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
  59. airflow/providers/google/cloud/sensors/dataflow.py +239 -52
  60. airflow/providers/google/cloud/sensors/datafusion.py +2 -1
  61. airflow/providers/google/cloud/sensors/dataproc.py +3 -2
  62. airflow/providers/google/cloud/sensors/gcs.py +14 -12
  63. airflow/providers/google/cloud/sensors/tasks.py +2 -1
  64. airflow/providers/google/cloud/sensors/workflows.py +2 -1
  65. airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
  66. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
  67. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
  68. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  69. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
  70. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
  71. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
  72. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
  73. airflow/providers/google/cloud/triggers/bigquery.py +14 -3
  74. airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
  75. airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
  76. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
  77. airflow/providers/google/cloud/triggers/dataflow.py +504 -4
  78. airflow/providers/google/cloud/triggers/dataproc.py +110 -26
  79. airflow/providers/google/cloud/triggers/mlengine.py +2 -1
  80. airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
  81. airflow/providers/google/common/hooks/base_google.py +45 -7
  82. airflow/providers/google/firebase/hooks/firestore.py +2 -2
  83. airflow/providers/google/firebase/operators/firestore.py +2 -1
  84. airflow/providers/google/get_provider_info.py +3 -2
  85. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/METADATA +8 -8
  86. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/RECORD +88 -89
  87. airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
  88. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/WHEEL +0 -0
  89. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/entry_points.txt +0 -0
@@ -30,6 +30,7 @@ from airflow.models import BaseOperator
30
30
  from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
31
31
  from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
32
32
  from airflow.providers.google.cloud.triggers.bigquery import BigQueryInsertJobTrigger
33
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
33
34
  from airflow.utils.helpers import merge_dicts
34
35
 
35
36
  if TYPE_CHECKING:
@@ -104,7 +105,7 @@ class BigQueryToGCSOperator(BaseOperator):
104
105
  *,
105
106
  source_project_dataset_table: str,
106
107
  destination_cloud_storage_uris: list[str],
107
- project_id: str | None = None,
108
+ project_id: str = PROVIDE_PROJECT_ID,
108
109
  compression: str = "NONE",
109
110
  export_format: str = "CSV",
110
111
  field_delimiter: str = ",",
@@ -91,7 +91,7 @@ class BigQueryToMsSqlOperator(BigQueryToSqlBaseOperator):
91
91
  self.source_project_dataset_table = source_project_dataset_table
92
92
 
93
93
  def get_sql_hook(self) -> MsSqlHook:
94
- return MsSqlHook(schema=self.database, mysql_conn_id=self.mssql_conn_id)
94
+ return MsSqlHook(schema=self.database, mssql_conn_id=self.mssql_conn_id)
95
95
 
96
96
  def persist_links(self, context: Context) -> None:
97
97
  project_id, dataset_id, table_id = self.source_project_dataset_table.split(".")
@@ -133,4 +133,5 @@ class BigQueryToSqlBaseOperator(BaseOperator):
133
133
  rows=rows,
134
134
  target_fields=self.selected_fields,
135
135
  replace=self.replace,
136
+ commit_every=self.batch_size,
136
137
  )
@@ -42,6 +42,7 @@ from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQuery
42
42
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
43
43
  from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
44
44
  from airflow.providers.google.cloud.triggers.bigquery import BigQueryInsertJobTrigger
45
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
45
46
  from airflow.utils.helpers import merge_dicts
46
47
 
47
48
  if TYPE_CHECKING:
@@ -229,7 +230,7 @@ class GCSToBigQueryOperator(BaseOperator):
229
230
  job_id: str | None = None,
230
231
  force_rerun: bool = True,
231
232
  reattach_states: set[str] | None = None,
232
- project_id: str | None = None,
233
+ project_id: str = PROVIDE_PROJECT_ID,
233
234
  **kwargs,
234
235
  ) -> None:
235
236
  super().__init__(**kwargs)
@@ -749,7 +750,6 @@ class GCSToBigQueryOperator(BaseOperator):
749
750
  )
750
751
  from openlineage.client.run import Dataset
751
752
 
752
- from airflow.providers.google.cloud.hooks.gcs import _parse_gcs_url
753
753
  from airflow.providers.google.cloud.utils.openlineage import (
754
754
  get_facets_from_bq_table,
755
755
  get_identity_column_lineage_facet,
@@ -766,8 +766,7 @@ class GCSToBigQueryOperator(BaseOperator):
766
766
  "schema": output_dataset_facets["schema"],
767
767
  }
768
768
  input_datasets = []
769
- for uri in sorted(self.source_uris):
770
- bucket, blob = _parse_gcs_url(uri)
769
+ for blob in sorted(self.source_objects):
771
770
  additional_facets = {}
772
771
 
773
772
  if "*" in blob:
@@ -777,7 +776,7 @@ class GCSToBigQueryOperator(BaseOperator):
777
776
  "symlink": SymlinksDatasetFacet(
778
777
  identifiers=[
779
778
  SymlinksDatasetFacetIdentifiers(
780
- namespace=f"gs://{bucket}", name=blob, type="file"
779
+ namespace=f"gs://{self.bucket}", name=blob, type="file"
781
780
  )
782
781
  ]
783
782
  ),
@@ -788,7 +787,7 @@ class GCSToBigQueryOperator(BaseOperator):
788
787
  blob = "/"
789
788
 
790
789
  dataset = Dataset(
791
- namespace=f"gs://{bucket}",
790
+ namespace=f"gs://{self.bucket}",
792
791
  name=blob,
793
792
  facets=merge_dicts(input_dataset_facets, additional_facets),
794
793
  )
@@ -234,8 +234,6 @@ class GCSToGCSOperator(BaseOperator):
234
234
  self.source_object_required = source_object_required
235
235
  self.exact_match = exact_match
236
236
  self.match_glob = match_glob
237
- self.resolved_source_objects: set[str] = set()
238
- self.resolved_target_objects: set[str] = set()
239
237
 
240
238
  def execute(self, context: Context):
241
239
  hook = GCSHook(
@@ -540,13 +538,6 @@ class GCSToGCSOperator(BaseOperator):
540
538
  self.destination_bucket,
541
539
  destination_object,
542
540
  )
543
-
544
- self.resolved_source_objects.add(source_object)
545
- if not destination_object:
546
- self.resolved_target_objects.add(source_object)
547
- else:
548
- self.resolved_target_objects.add(destination_object)
549
-
550
541
  hook.rewrite(self.source_bucket, source_object, self.destination_bucket, destination_object)
551
542
 
552
543
  if self.move_object:
@@ -559,17 +550,36 @@ class GCSToGCSOperator(BaseOperator):
559
550
  This means we won't have to normalize self.source_object and self.source_objects,
560
551
  destination bucket and so on.
561
552
  """
553
+ from pathlib import Path
554
+
562
555
  from openlineage.client.run import Dataset
563
556
 
564
557
  from airflow.providers.openlineage.extractors import OperatorLineage
565
558
 
559
+ def _process_prefix(pref):
560
+ if WILDCARD in pref:
561
+ pref = pref.split(WILDCARD)[0]
562
+ # Use parent if not a file (dot not in name) and not a dir (ends with slash)
563
+ if "." not in pref.split("/")[-1] and not pref.endswith("/"):
564
+ pref = Path(pref).parent.as_posix()
565
+ return ["/" if pref in ("", "/", ".") else pref.rstrip("/")] # Adjust root path
566
+
567
+ inputs = []
568
+ for prefix in self.source_objects:
569
+ result = _process_prefix(prefix)
570
+ inputs.extend(result)
571
+
572
+ if self.destination_object is None:
573
+ outputs = inputs.copy()
574
+ else:
575
+ outputs = _process_prefix(self.destination_object)
576
+
566
577
  return OperatorLineage(
567
578
  inputs=[
568
- Dataset(namespace=f"gs://{self.source_bucket}", name=source)
569
- for source in sorted(self.resolved_source_objects)
579
+ Dataset(namespace=f"gs://{self.source_bucket}", name=source) for source in sorted(set(inputs))
570
580
  ],
571
581
  outputs=[
572
582
  Dataset(namespace=f"gs://{self.destination_bucket}", name=target)
573
- for target in sorted(self.resolved_target_objects)
583
+ for target in sorted(set(outputs))
574
584
  ],
575
585
  )
@@ -51,12 +51,13 @@ class BigQueryInsertJobTrigger(BaseTrigger):
51
51
  self,
52
52
  conn_id: str,
53
53
  job_id: str | None,
54
- project_id: str | None,
54
+ project_id: str,
55
55
  location: str | None,
56
56
  dataset_id: str | None = None,
57
57
  table_id: str | None = None,
58
58
  poll_interval: float = 4.0,
59
59
  impersonation_chain: str | Sequence[str] | None = None,
60
+ cancel_on_kill: bool = True,
60
61
  ):
61
62
  super().__init__()
62
63
  self.log.info("Using the connection %s .", conn_id)
@@ -69,6 +70,7 @@ class BigQueryInsertJobTrigger(BaseTrigger):
69
70
  self.table_id = table_id
70
71
  self.poll_interval = poll_interval
71
72
  self.impersonation_chain = impersonation_chain
73
+ self.cancel_on_kill = cancel_on_kill
72
74
 
73
75
  def serialize(self) -> tuple[str, dict[str, Any]]:
74
76
  """Serialize BigQueryInsertJobTrigger arguments and classpath."""
@@ -83,6 +85,7 @@ class BigQueryInsertJobTrigger(BaseTrigger):
83
85
  "table_id": self.table_id,
84
86
  "poll_interval": self.poll_interval,
85
87
  "impersonation_chain": self.impersonation_chain,
88
+ "cancel_on_kill": self.cancel_on_kill,
86
89
  },
87
90
  )
88
91
 
@@ -113,6 +116,14 @@ class BigQueryInsertJobTrigger(BaseTrigger):
113
116
  self.poll_interval,
114
117
  )
115
118
  await asyncio.sleep(self.poll_interval)
119
+ except asyncio.CancelledError:
120
+ self.log.info("Task was killed.")
121
+ if self.job_id and self.cancel_on_kill:
122
+ await hook.cancel_job( # type: ignore[union-attr]
123
+ job_id=self.job_id, project_id=self.project_id, location=self.location
124
+ )
125
+ else:
126
+ self.log.info("Skipping to cancel job: %s:%s.%s", self.project_id, self.location, self.job_id)
116
127
  except Exception as e:
117
128
  self.log.exception("Exception occurred while checking for query completion")
118
129
  yield TriggerEvent({"status": "error", "message": str(e)})
@@ -282,7 +293,7 @@ class BigQueryIntervalCheckTrigger(BigQueryInsertJobTrigger):
282
293
  conn_id: str,
283
294
  first_job_id: str,
284
295
  second_job_id: str,
285
- project_id: str | None,
296
+ project_id: str,
286
297
  table: str,
287
298
  metrics_thresholds: dict[str, int],
288
299
  location: str | None = None,
@@ -443,7 +454,7 @@ class BigQueryValueCheckTrigger(BigQueryInsertJobTrigger):
443
454
  sql: str,
444
455
  pass_value: int | float | str,
445
456
  job_id: str | None,
446
- project_id: str | None,
457
+ project_id: str,
447
458
  tolerance: Any = None,
448
459
  dataset_id: str | None = None,
449
460
  table_id: str | None = None,
@@ -78,3 +78,71 @@ class CloudComposerExecutionTrigger(BaseTrigger):
78
78
  "operation_done": operation.done,
79
79
  }
80
80
  )
81
+
82
+
83
+ class CloudComposerAirflowCLICommandTrigger(BaseTrigger):
84
+ """The trigger wait for the Airflow CLI command result."""
85
+
86
+ def __init__(
87
+ self,
88
+ project_id: str,
89
+ region: str,
90
+ environment_id: str,
91
+ execution_cmd_info: dict,
92
+ gcp_conn_id: str = "google_cloud_default",
93
+ impersonation_chain: str | Sequence[str] | None = None,
94
+ poll_interval: int = 10,
95
+ ):
96
+ super().__init__()
97
+ self.project_id = project_id
98
+ self.region = region
99
+ self.environment_id = environment_id
100
+ self.execution_cmd_info = execution_cmd_info
101
+ self.gcp_conn_id = gcp_conn_id
102
+ self.impersonation_chain = impersonation_chain
103
+ self.poll_interval = poll_interval
104
+
105
+ self.gcp_hook = CloudComposerAsyncHook(
106
+ gcp_conn_id=self.gcp_conn_id,
107
+ impersonation_chain=self.impersonation_chain,
108
+ )
109
+
110
+ def serialize(self) -> tuple[str, dict[str, Any]]:
111
+ return (
112
+ "airflow.providers.google.cloud.triggers.cloud_composer.CloudComposerAirflowCLICommandTrigger",
113
+ {
114
+ "project_id": self.project_id,
115
+ "region": self.region,
116
+ "environment_id": self.environment_id,
117
+ "execution_cmd_info": self.execution_cmd_info,
118
+ "gcp_conn_id": self.gcp_conn_id,
119
+ "impersonation_chain": self.impersonation_chain,
120
+ "poll_interval": self.poll_interval,
121
+ },
122
+ )
123
+
124
+ async def run(self):
125
+ try:
126
+ result = await self.gcp_hook.wait_command_execution_result(
127
+ project_id=self.project_id,
128
+ region=self.region,
129
+ environment_id=self.environment_id,
130
+ execution_cmd_info=self.execution_cmd_info,
131
+ poll_interval=self.poll_interval,
132
+ )
133
+ except AirflowException as ex:
134
+ yield TriggerEvent(
135
+ {
136
+ "status": "error",
137
+ "message": str(ex),
138
+ }
139
+ )
140
+ return
141
+
142
+ yield TriggerEvent(
143
+ {
144
+ "status": "success",
145
+ "result": result,
146
+ }
147
+ )
148
+ return
@@ -23,6 +23,7 @@ import asyncio
23
23
  from typing import Sequence
24
24
 
25
25
  from airflow.providers.google.cloud.hooks.cloud_sql import CloudSQLAsyncHook, CloudSqlOperationStatus
26
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
26
27
  from airflow.triggers.base import BaseTrigger, TriggerEvent
27
28
 
28
29
 
@@ -36,7 +37,7 @@ class CloudSQLExportTrigger(BaseTrigger):
36
37
  def __init__(
37
38
  self,
38
39
  operation_name: str,
39
- project_id: str | None = None,
40
+ project_id: str = PROVIDE_PROJECT_ID,
40
41
  gcp_conn_id: str = "google_cloud_default",
41
42
  impersonation_chain: str | Sequence[str] | None = None,
42
43
  poke_interval: int = 20,
@@ -27,6 +27,7 @@ from airflow.exceptions import AirflowException
27
27
  from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
28
28
  CloudDataTransferServiceAsyncHook,
29
29
  )
30
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
30
31
  from airflow.triggers.base import BaseTrigger, TriggerEvent
31
32
 
32
33
 
@@ -43,7 +44,7 @@ class CloudStorageTransferServiceCreateJobsTrigger(BaseTrigger):
43
44
  def __init__(
44
45
  self,
45
46
  job_names: list[str],
46
- project_id: str | None = None,
47
+ project_id: str = PROVIDE_PROJECT_ID,
47
48
  poll_interval: int = 10,
48
49
  gcp_conn_id: str = "google_cloud_default",
49
50
  ) -> None: