apache-airflow-providers-google 10.26.0__py3-none-any.whl → 11.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +2 -1
- airflow/providers/google/ads/operators/ads.py +2 -1
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -1
- airflow/providers/google/assets/gcs.py +17 -1
- airflow/providers/google/cloud/hooks/automl.py +3 -6
- airflow/providers/google/cloud/hooks/bigquery.py +41 -1486
- airflow/providers/google/cloud/hooks/bigquery_dts.py +4 -11
- airflow/providers/google/cloud/hooks/bigtable.py +3 -6
- airflow/providers/google/cloud/hooks/cloud_batch.py +6 -3
- airflow/providers/google/cloud/hooks/cloud_build.py +3 -15
- airflow/providers/google/cloud/hooks/cloud_composer.py +2 -17
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +5 -6
- airflow/providers/google/cloud/hooks/cloud_run.py +10 -5
- airflow/providers/google/cloud/hooks/cloud_sql.py +5 -7
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +3 -7
- airflow/providers/google/cloud/hooks/compute.py +3 -6
- airflow/providers/google/cloud/hooks/compute_ssh.py +0 -5
- airflow/providers/google/cloud/hooks/datacatalog.py +3 -6
- airflow/providers/google/cloud/hooks/dataflow.py +3 -14
- airflow/providers/google/cloud/hooks/dataform.py +2 -9
- airflow/providers/google/cloud/hooks/datafusion.py +4 -15
- airflow/providers/google/cloud/hooks/dataplex.py +4 -7
- airflow/providers/google/cloud/hooks/dataprep.py +2 -2
- airflow/providers/google/cloud/hooks/dataproc.py +77 -22
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +2 -9
- airflow/providers/google/cloud/hooks/datastore.py +3 -6
- airflow/providers/google/cloud/hooks/dlp.py +3 -6
- airflow/providers/google/cloud/hooks/functions.py +2 -6
- airflow/providers/google/cloud/hooks/gcs.py +2 -18
- airflow/providers/google/cloud/hooks/gdm.py +1 -17
- airflow/providers/google/cloud/hooks/kms.py +3 -6
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +7 -97
- airflow/providers/google/cloud/hooks/life_sciences.py +2 -6
- airflow/providers/google/cloud/hooks/looker.py +2 -1
- airflow/providers/google/cloud/hooks/mlengine.py +0 -8
- airflow/providers/google/cloud/hooks/natural_language.py +3 -6
- airflow/providers/google/cloud/hooks/os_login.py +3 -6
- airflow/providers/google/cloud/hooks/pubsub.py +3 -6
- airflow/providers/google/cloud/hooks/secret_manager.py +3 -73
- airflow/providers/google/cloud/hooks/spanner.py +3 -6
- airflow/providers/google/cloud/hooks/speech_to_text.py +3 -6
- airflow/providers/google/cloud/hooks/stackdriver.py +3 -6
- airflow/providers/google/cloud/hooks/tasks.py +3 -6
- airflow/providers/google/cloud/hooks/text_to_speech.py +3 -6
- airflow/providers/google/cloud/hooks/translate.py +455 -9
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +3 -6
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -6
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +3 -6
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +2 -9
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -9
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -14
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -6
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -9
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -1
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -1
- airflow/providers/google/cloud/hooks/video_intelligence.py +3 -6
- airflow/providers/google/cloud/hooks/vision.py +3 -6
- airflow/providers/google/cloud/hooks/workflows.py +2 -9
- airflow/providers/google/cloud/links/dataproc.py +0 -1
- airflow/providers/google/cloud/links/translate.py +91 -0
- airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -3
- airflow/providers/google/cloud/openlineage/utils.py +54 -21
- airflow/providers/google/cloud/operators/automl.py +5 -4
- airflow/providers/google/cloud/operators/bigquery.py +2 -341
- airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/operators/bigtable.py +2 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +2 -1
- airflow/providers/google/cloud/operators/cloud_build.py +2 -1
- airflow/providers/google/cloud/operators/cloud_composer.py +2 -1
- airflow/providers/google/cloud/operators/cloud_memorystore.py +2 -1
- airflow/providers/google/cloud/operators/cloud_run.py +2 -1
- airflow/providers/google/cloud/operators/cloud_sql.py +2 -1
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/operators/compute.py +2 -1
- airflow/providers/google/cloud/operators/datacatalog.py +2 -1
- airflow/providers/google/cloud/operators/dataflow.py +2 -517
- airflow/providers/google/cloud/operators/dataform.py +2 -1
- airflow/providers/google/cloud/operators/datafusion.py +2 -1
- airflow/providers/google/cloud/operators/dataplex.py +37 -31
- airflow/providers/google/cloud/operators/dataprep.py +2 -1
- airflow/providers/google/cloud/operators/dataproc.py +3 -633
- airflow/providers/google/cloud/operators/dataproc_metastore.py +2 -1
- airflow/providers/google/cloud/operators/datastore.py +2 -1
- airflow/providers/google/cloud/operators/dlp.py +2 -1
- airflow/providers/google/cloud/operators/functions.py +2 -1
- airflow/providers/google/cloud/operators/gcs.py +5 -4
- airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -11
- airflow/providers/google/cloud/operators/life_sciences.py +2 -1
- airflow/providers/google/cloud/operators/mlengine.py +2 -1
- airflow/providers/google/cloud/operators/natural_language.py +3 -2
- airflow/providers/google/cloud/operators/pubsub.py +2 -1
- airflow/providers/google/cloud/operators/spanner.py +2 -1
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
- airflow/providers/google/cloud/operators/stackdriver.py +2 -1
- airflow/providers/google/cloud/operators/tasks.py +3 -2
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
- airflow/providers/google/cloud/operators/translate.py +622 -32
- airflow/providers/google/cloud/operators/translate_speech.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +2 -93
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +3 -13
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +2 -17
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +3 -13
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +2 -1
- airflow/providers/google/cloud/operators/video_intelligence.py +2 -1
- airflow/providers/google/cloud/operators/vision.py +3 -2
- airflow/providers/google/cloud/operators/workflows.py +3 -2
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -19
- airflow/providers/google/cloud/sensors/bigquery.py +2 -81
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/sensors/bigtable.py +2 -1
- airflow/providers/google/cloud/sensors/cloud_composer.py +8 -94
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/sensors/dataflow.py +2 -1
- airflow/providers/google/cloud/sensors/dataform.py +2 -1
- airflow/providers/google/cloud/sensors/datafusion.py +2 -1
- airflow/providers/google/cloud/sensors/dataplex.py +2 -1
- airflow/providers/google/cloud/sensors/dataprep.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -1
- airflow/providers/google/cloud/sensors/gcs.py +4 -36
- airflow/providers/google/cloud/sensors/pubsub.py +2 -1
- airflow/providers/google/cloud/sensors/tasks.py +2 -1
- airflow/providers/google/cloud/sensors/workflows.py +2 -1
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +75 -18
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +9 -7
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +1 -1
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +2 -1
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +13 -9
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/gcs_to_local.py +2 -1
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +2 -1
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -1
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +2 -1
- airflow/providers/google/cloud/triggers/bigquery.py +2 -1
- airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_build.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +3 -2
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_sql.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/triggers/dataflow.py +2 -1
- airflow/providers/google/cloud/triggers/datafusion.py +2 -1
- airflow/providers/google/cloud/triggers/dataplex.py +1 -1
- airflow/providers/google/cloud/triggers/dataproc.py +2 -1
- airflow/providers/google/cloud/triggers/gcs.py +3 -2
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -1
- airflow/providers/google/cloud/triggers/mlengine.py +2 -1
- airflow/providers/google/cloud/triggers/pubsub.py +2 -1
- airflow/providers/google/cloud/triggers/vertex_ai.py +2 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
- airflow/providers/google/cloud/utils/dataform.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +2 -1
- airflow/providers/google/cloud/utils/mlengine_operator_utils.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +4 -11
- airflow/providers/google/common/hooks/discovery_api.py +1 -6
- airflow/providers/google/firebase/hooks/firestore.py +1 -1
- airflow/providers/google/firebase/operators/firestore.py +2 -1
- airflow/providers/google/get_provider_info.py +7 -22
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -1
- airflow/providers/google/marketing_platform/hooks/campaign_manager.py +2 -3
- airflow/providers/google/marketing_platform/hooks/display_video.py +4 -3
- airflow/providers/google/marketing_platform/hooks/search_ads.py +6 -6
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -1
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +2 -42
- airflow/providers/google/marketing_platform/operators/display_video.py +2 -47
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -1
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -7
- airflow/providers/google/marketing_platform/sensors/display_video.py +2 -13
- airflow/providers/google/suite/hooks/calendar.py +2 -8
- airflow/providers/google/suite/hooks/drive.py +2 -6
- airflow/providers/google/suite/hooks/sheets.py +2 -7
- airflow/providers/google/suite/operators/sheets.py +2 -7
- airflow/providers/google/suite/sensors/drive.py +2 -7
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -7
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +2 -7
- airflow/providers/google/suite/transfers/local_to_drive.py +2 -7
- airflow/providers/google/suite/transfers/sql_to_sheets.py +2 -7
- {apache_airflow_providers_google-10.26.0.dist-info → apache_airflow_providers_google-11.0.0.dist-info}/METADATA +10 -10
- apache_airflow_providers_google-11.0.0.dist-info/RECORD +315 -0
- airflow/providers/google/marketing_platform/hooks/analytics.py +0 -211
- airflow/providers/google/marketing_platform/operators/analytics.py +0 -551
- apache_airflow_providers_google-10.26.0.dist-info/RECORD +0 -317
- {apache_airflow_providers_google-10.26.0.dist-info → apache_airflow_providers_google-11.0.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.26.0.dist-info → apache_airflow_providers_google-11.0.0.dist-info}/entry_points.txt +0 -0
@@ -20,21 +20,18 @@
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
22
|
import json
|
23
|
+
from collections.abc import Iterable, Sequence
|
23
24
|
from datetime import datetime, timedelta
|
24
|
-
from typing import TYPE_CHECKING
|
25
|
+
from typing import TYPE_CHECKING
|
25
26
|
|
26
27
|
from dateutil import parser
|
27
28
|
from google.cloud.orchestration.airflow.service_v1.types import ExecuteAirflowCommandResponse
|
28
29
|
|
29
30
|
from airflow.configuration import conf
|
30
|
-
from airflow.exceptions import AirflowException
|
31
|
+
from airflow.exceptions import AirflowException
|
31
32
|
from airflow.providers.google.cloud.hooks.cloud_composer import CloudComposerHook
|
32
|
-
from airflow.providers.google.cloud.triggers.cloud_composer import
|
33
|
-
CloudComposerDAGRunTrigger,
|
34
|
-
CloudComposerExecutionTrigger,
|
35
|
-
)
|
33
|
+
from airflow.providers.google.cloud.triggers.cloud_composer import CloudComposerDAGRunTrigger
|
36
34
|
from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME
|
37
|
-
from airflow.providers.google.common.deprecated import deprecated
|
38
35
|
from airflow.sensors.base import BaseSensorOperator
|
39
36
|
from airflow.utils.state import TaskInstanceState
|
40
37
|
|
@@ -42,89 +39,6 @@ if TYPE_CHECKING:
|
|
42
39
|
from airflow.utils.context import Context
|
43
40
|
|
44
41
|
|
45
|
-
@deprecated(
|
46
|
-
planned_removal_date="November 01, 2024",
|
47
|
-
use_instead="CloudComposerCreateEnvironmentOperator, CloudComposerDeleteEnvironmentOperator, "
|
48
|
-
"CloudComposerUpdateEnvironmentOperator",
|
49
|
-
instructions="Please use CloudComposerCreateEnvironmentOperator, CloudComposerDeleteEnvironmentOperator "
|
50
|
-
"or CloudComposerUpdateEnvironmentOperator in deferrable or non-deferrable mode, "
|
51
|
-
"since since every operator gives user a possibility to wait (asynchronously or synchronously) "
|
52
|
-
"until the Operation is finished.",
|
53
|
-
category=AirflowProviderDeprecationWarning,
|
54
|
-
)
|
55
|
-
class CloudComposerEnvironmentSensor(BaseSensorOperator):
|
56
|
-
"""
|
57
|
-
Check the status of the Cloud Composer Environment task.
|
58
|
-
|
59
|
-
This Sensor is deprecated. You can achieve the same functionality by using Cloud Composer Operators
|
60
|
-
CloudComposerCreateEnvironmentOperator, CloudComposerDeleteEnvironmentOperator and
|
61
|
-
CloudComposerUpdateEnvironmentOperator in deferrable or non-deferrable mode, since every operator
|
62
|
-
gives user a possibility to wait (asynchronously or synchronously) until Operation will be finished.
|
63
|
-
|
64
|
-
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
65
|
-
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
66
|
-
:param operation_name: The name of the operation resource
|
67
|
-
:param gcp_conn_id: The connection ID to use when fetching connection info.
|
68
|
-
:param impersonation_chain: Optional service account to impersonate using short-term
|
69
|
-
credentials, or chained list of accounts required to get the access_token
|
70
|
-
of the last account in the list, which will be impersonated in the request.
|
71
|
-
If set as a string, the account must grant the originating account
|
72
|
-
the Service Account Token Creator IAM role.
|
73
|
-
If set as a sequence, the identities from the list must grant
|
74
|
-
Service Account Token Creator IAM role to the directly preceding identity, with first
|
75
|
-
account from the list granting this role to the originating account (templated).
|
76
|
-
:param pooling_period_seconds: Optional: Control the rate of the poll for the result of deferrable run.
|
77
|
-
"""
|
78
|
-
|
79
|
-
def __init__(
|
80
|
-
self,
|
81
|
-
*,
|
82
|
-
project_id: str,
|
83
|
-
region: str,
|
84
|
-
operation_name: str,
|
85
|
-
gcp_conn_id: str = "google_cloud_default",
|
86
|
-
impersonation_chain: str | Sequence[str] | None = None,
|
87
|
-
pooling_period_seconds: int = 30,
|
88
|
-
**kwargs,
|
89
|
-
):
|
90
|
-
super().__init__(**kwargs)
|
91
|
-
self.project_id = project_id
|
92
|
-
self.region = region
|
93
|
-
self.operation_name = operation_name
|
94
|
-
self.pooling_period_seconds = pooling_period_seconds
|
95
|
-
self.gcp_conn_id = gcp_conn_id
|
96
|
-
self.impersonation_chain = impersonation_chain
|
97
|
-
|
98
|
-
def execute(self, context: Context) -> None:
|
99
|
-
"""Airflow runs this method on the worker and defers using the trigger."""
|
100
|
-
self.defer(
|
101
|
-
trigger=CloudComposerExecutionTrigger(
|
102
|
-
project_id=self.project_id,
|
103
|
-
region=self.region,
|
104
|
-
operation_name=self.operation_name,
|
105
|
-
gcp_conn_id=self.gcp_conn_id,
|
106
|
-
impersonation_chain=self.impersonation_chain,
|
107
|
-
pooling_period_seconds=self.pooling_period_seconds,
|
108
|
-
),
|
109
|
-
method_name="execute_complete",
|
110
|
-
)
|
111
|
-
|
112
|
-
def execute_complete(self, context: dict[str, Any], event: dict[str, str] | None = None) -> str:
|
113
|
-
"""
|
114
|
-
Act as a callback for when the trigger fires - returns immediately.
|
115
|
-
|
116
|
-
Relies on trigger to throw an exception, otherwise it assumes execution was successful.
|
117
|
-
"""
|
118
|
-
if event:
|
119
|
-
if event.get("operation_done"):
|
120
|
-
return event["operation_done"]
|
121
|
-
|
122
|
-
raise AirflowException(event["message"])
|
123
|
-
|
124
|
-
message = "No event received in trigger callback"
|
125
|
-
raise AirflowException(message)
|
126
|
-
|
127
|
-
|
128
42
|
class CloudComposerDAGRunSensor(BaseSensorOperator):
|
129
43
|
"""
|
130
44
|
Check if a DAG run has completed.
|
@@ -189,7 +103,7 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
189
103
|
self.deferrable = deferrable
|
190
104
|
self.poll_interval = poll_interval
|
191
105
|
|
192
|
-
def
|
106
|
+
def _get_logical_dates(self, context) -> tuple[datetime, datetime]:
|
193
107
|
if isinstance(self.execution_range, timedelta):
|
194
108
|
if self.execution_range < timedelta(0):
|
195
109
|
return context["logical_date"], context["logical_date"] - self.execution_range
|
@@ -203,7 +117,7 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
203
117
|
return context["logical_date"] - timedelta(1), context["logical_date"]
|
204
118
|
|
205
119
|
def poke(self, context: Context) -> bool:
|
206
|
-
start_date, end_date = self.
|
120
|
+
start_date, end_date = self._get_logical_dates(context)
|
207
121
|
|
208
122
|
if datetime.now(end_date.tzinfo) < end_date:
|
209
123
|
return False
|
@@ -251,7 +165,7 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
251
165
|
for dag_run in dag_runs:
|
252
166
|
if (
|
253
167
|
start_date.timestamp()
|
254
|
-
< parser.parse(dag_run["
|
168
|
+
< parser.parse(dag_run["logical_date"]).timestamp()
|
255
169
|
< end_date.timestamp()
|
256
170
|
) and dag_run["state"] not in self.allowed_states:
|
257
171
|
return False
|
@@ -259,7 +173,7 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
|
|
259
173
|
|
260
174
|
def execute(self, context: Context) -> None:
|
261
175
|
if self.deferrable:
|
262
|
-
start_date, end_date = self.
|
176
|
+
start_date, end_date = self._get_logical_dates(context)
|
263
177
|
self.defer(
|
264
178
|
trigger=CloudComposerDAGRunTrigger(
|
265
179
|
project_id=self.project_id,
|
@@ -19,7 +19,8 @@
|
|
19
19
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
|
-
from
|
22
|
+
from collections.abc import Sequence
|
23
|
+
from typing import TYPE_CHECKING
|
23
24
|
|
24
25
|
from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
|
25
26
|
COUNTERS,
|
@@ -19,8 +19,9 @@
|
|
19
19
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
|
+
from collections.abc import Sequence
|
22
23
|
from functools import cached_property
|
23
|
-
from typing import TYPE_CHECKING, Any, Callable
|
24
|
+
from typing import TYPE_CHECKING, Any, Callable
|
24
25
|
|
25
26
|
from airflow.configuration import conf
|
26
27
|
from airflow.exceptions import AirflowException
|
@@ -19,7 +19,8 @@
|
|
19
19
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
|
-
from
|
22
|
+
from collections.abc import Iterable, Sequence
|
23
|
+
from typing import TYPE_CHECKING
|
23
24
|
|
24
25
|
from airflow.exceptions import AirflowException
|
25
26
|
from airflow.providers.google.cloud.hooks.dataform import DataformHook
|
@@ -19,7 +19,8 @@
|
|
19
19
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
|
-
from
|
22
|
+
from collections.abc import Iterable, Sequence
|
23
|
+
from typing import TYPE_CHECKING
|
23
24
|
|
24
25
|
from airflow.exceptions import AirflowException, AirflowNotFoundException
|
25
26
|
from airflow.providers.google.cloud.hooks.datafusion import DataFusionHook
|
@@ -19,7 +19,8 @@
|
|
19
19
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
|
-
from
|
22
|
+
from collections.abc import Sequence
|
23
|
+
from typing import TYPE_CHECKING
|
23
24
|
|
24
25
|
from airflow.providers.google.cloud.hooks.dataprep import GoogleDataprepHook, JobGroupStatuses
|
25
26
|
from airflow.sensors.base import BaseSensorOperator
|
@@ -20,7 +20,8 @@
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
22
|
import time
|
23
|
-
from
|
23
|
+
from collections.abc import Sequence
|
24
|
+
from typing import TYPE_CHECKING
|
24
25
|
|
25
26
|
from google.api_core.exceptions import ServerError
|
26
27
|
from google.cloud.dataproc_v1.types import Batch, JobStatus
|
@@ -17,7 +17,8 @@
|
|
17
17
|
# under the License.
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
|
-
from
|
20
|
+
from collections.abc import Sequence
|
21
|
+
from typing import TYPE_CHECKING
|
21
22
|
|
22
23
|
from airflow.exceptions import AirflowException
|
23
24
|
from airflow.providers.google.cloud.hooks.dataproc_metastore import DataprocMetastoreHook
|
@@ -21,13 +21,14 @@ from __future__ import annotations
|
|
21
21
|
|
22
22
|
import os
|
23
23
|
import textwrap
|
24
|
+
from collections.abc import Sequence
|
24
25
|
from datetime import datetime, timedelta
|
25
|
-
from typing import TYPE_CHECKING, Any, Callable
|
26
|
+
from typing import TYPE_CHECKING, Any, Callable
|
26
27
|
|
27
28
|
from google.cloud.storage.retry import DEFAULT_RETRY
|
28
29
|
|
29
30
|
from airflow.configuration import conf
|
30
|
-
from airflow.exceptions import AirflowException
|
31
|
+
from airflow.exceptions import AirflowException
|
31
32
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
32
33
|
from airflow.providers.google.cloud.triggers.gcs import (
|
33
34
|
GCSBlobTrigger,
|
@@ -35,7 +36,6 @@ from airflow.providers.google.cloud.triggers.gcs import (
|
|
35
36
|
GCSPrefixBlobTrigger,
|
36
37
|
GCSUploadSessionTrigger,
|
37
38
|
)
|
38
|
-
from airflow.providers.google.common.deprecated import deprecated
|
39
39
|
from airflow.sensors.base import BaseSensorOperator, poke_mode_only
|
40
40
|
|
41
41
|
if TYPE_CHECKING:
|
@@ -142,38 +142,6 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
|
|
142
142
|
return True
|
143
143
|
|
144
144
|
|
145
|
-
@deprecated(
|
146
|
-
planned_removal_date="November 01, 2024",
|
147
|
-
use_instead="GCSObjectExistenceSensor",
|
148
|
-
instructions="Please use GCSObjectExistenceSensor and set deferrable attribute to True.",
|
149
|
-
category=AirflowProviderDeprecationWarning,
|
150
|
-
)
|
151
|
-
class GCSObjectExistenceAsyncSensor(GCSObjectExistenceSensor):
|
152
|
-
"""
|
153
|
-
Checks for the existence of a file in Google Cloud Storage.
|
154
|
-
|
155
|
-
This class is deprecated and will be removed in a future release.
|
156
|
-
|
157
|
-
Please use :class:`airflow.providers.google.cloud.sensors.gcs.GCSObjectExistenceSensor`
|
158
|
-
and set *deferrable* attribute to *True* instead.
|
159
|
-
|
160
|
-
:param bucket: The Google Cloud Storage bucket where the object is.
|
161
|
-
:param object: The name of the object to check in the Google cloud storage bucket.
|
162
|
-
:param google_cloud_conn_id: The connection ID to use when connecting to Google Cloud Storage.
|
163
|
-
:param impersonation_chain: Optional service account to impersonate using short-term
|
164
|
-
credentials, or chained list of accounts required to get the access_token
|
165
|
-
of the last account in the list, which will be impersonated in the request.
|
166
|
-
If set as a string, the account must grant the originating account
|
167
|
-
the Service Account Token Creator IAM role.
|
168
|
-
If set as a sequence, the identities from the list must grant
|
169
|
-
Service Account Token Creator IAM role to the directly preceding identity, with first
|
170
|
-
account from the list granting this role to the originating account (templated).
|
171
|
-
"""
|
172
|
-
|
173
|
-
def __init__(self, **kwargs: Any) -> None:
|
174
|
-
super().__init__(deferrable=True, **kwargs)
|
175
|
-
|
176
|
-
|
177
145
|
def ts_function(context):
|
178
146
|
"""
|
179
147
|
Act as a default callback for the GoogleCloudStorageObjectUpdatedSensor.
|
@@ -192,7 +160,7 @@ class GCSObjectUpdateSensor(BaseSensorOperator):
|
|
192
160
|
:param object: The name of the object to download in the Google cloud
|
193
161
|
storage bucket.
|
194
162
|
:param ts_func: Callback for defining the update condition. The default callback
|
195
|
-
returns
|
163
|
+
returns logical_date + schedule_interval. The callback takes the context
|
196
164
|
as parameter.
|
197
165
|
:param google_cloud_conn_id: The connection ID to use when
|
198
166
|
connecting to Google Cloud Storage.
|
@@ -19,8 +19,9 @@
|
|
19
19
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
|
+
from collections.abc import Sequence
|
22
23
|
from datetime import timedelta
|
23
|
-
from typing import TYPE_CHECKING, Any, Callable
|
24
|
+
from typing import TYPE_CHECKING, Any, Callable
|
24
25
|
|
25
26
|
from google.cloud import pubsub_v1
|
26
27
|
from google.cloud.pubsub_v1.types import ReceivedMessage
|
@@ -19,7 +19,8 @@
|
|
19
19
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
|
-
from
|
22
|
+
from collections.abc import Sequence
|
23
|
+
from typing import TYPE_CHECKING
|
23
24
|
|
24
25
|
from airflow.providers.google.cloud.hooks.tasks import CloudTasksHook
|
25
26
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
@@ -16,7 +16,8 @@
|
|
16
16
|
# under the License.
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
|
-
from
|
19
|
+
from collections.abc import Sequence
|
20
|
+
from typing import TYPE_CHECKING
|
20
21
|
|
21
22
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
22
23
|
from google.cloud.workflows.executions_v1beta import Execution
|
@@ -20,8 +20,9 @@
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
22
|
import os
|
23
|
+
from collections.abc import Sequence
|
23
24
|
from tempfile import NamedTemporaryFile
|
24
|
-
from typing import TYPE_CHECKING
|
25
|
+
from typing import TYPE_CHECKING
|
25
26
|
|
26
27
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
|
27
28
|
|
@@ -18,7 +18,8 @@
|
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
20
|
import tempfile
|
21
|
-
from
|
21
|
+
from collections.abc import Sequence
|
22
|
+
from typing import TYPE_CHECKING
|
22
23
|
|
23
24
|
from airflow.models import BaseOperator
|
24
25
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
@@ -18,8 +18,9 @@
|
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
20
|
import warnings
|
21
|
+
from collections.abc import Sequence
|
21
22
|
from tempfile import NamedTemporaryFile
|
22
|
-
from typing import TYPE_CHECKING
|
23
|
+
from typing import TYPE_CHECKING
|
23
24
|
|
24
25
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
25
26
|
from airflow.models import BaseOperator
|
@@ -19,7 +19,8 @@
|
|
19
19
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
|
-
from
|
22
|
+
from collections.abc import Sequence
|
23
|
+
from typing import TYPE_CHECKING
|
23
24
|
|
24
25
|
from airflow.models import BaseOperator
|
25
26
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook
|
@@ -110,6 +111,7 @@ class BigQueryToBigQueryOperator(BaseOperator):
|
|
110
111
|
self.location = location
|
111
112
|
self.impersonation_chain = impersonation_chain
|
112
113
|
self.hook: BigQueryHook | None = None
|
114
|
+
self._job_conf: dict = {}
|
113
115
|
|
114
116
|
def _prepare_job_configuration(self):
|
115
117
|
self.source_project_dataset_tables = (
|
@@ -154,39 +156,94 @@ class BigQueryToBigQueryOperator(BaseOperator):
|
|
154
156
|
|
155
157
|
return configuration
|
156
158
|
|
157
|
-
def _submit_job(
|
158
|
-
self,
|
159
|
-
hook: BigQueryHook,
|
160
|
-
configuration: dict,
|
161
|
-
) -> str:
|
162
|
-
job = hook.insert_job(configuration=configuration, project_id=hook.project_id)
|
163
|
-
return job.job_id
|
164
|
-
|
165
159
|
def execute(self, context: Context) -> None:
|
166
160
|
self.log.info(
|
167
161
|
"Executing copy of %s into: %s",
|
168
162
|
self.source_project_dataset_tables,
|
169
163
|
self.destination_project_dataset_table,
|
170
164
|
)
|
171
|
-
hook = BigQueryHook(
|
165
|
+
self.hook = BigQueryHook(
|
172
166
|
gcp_conn_id=self.gcp_conn_id,
|
173
167
|
location=self.location,
|
174
168
|
impersonation_chain=self.impersonation_chain,
|
175
169
|
)
|
176
|
-
self.hook = hook
|
177
170
|
|
178
|
-
if not hook.project_id:
|
171
|
+
if not self.hook.project_id:
|
179
172
|
raise ValueError("The project_id should be set")
|
180
173
|
|
181
174
|
configuration = self._prepare_job_configuration()
|
182
|
-
|
175
|
+
self._job_conf = self.hook.insert_job(
|
176
|
+
configuration=configuration, project_id=self.hook.project_id
|
177
|
+
).to_api_repr()
|
183
178
|
|
184
|
-
|
185
|
-
conf = job["configuration"]["copy"]["destinationTable"]
|
179
|
+
dest_table_info = self._job_conf["configuration"]["copy"]["destinationTable"]
|
186
180
|
BigQueryTableLink.persist(
|
187
181
|
context=context,
|
188
182
|
task_instance=self,
|
189
|
-
dataset_id=
|
190
|
-
project_id=
|
191
|
-
table_id=
|
183
|
+
dataset_id=dest_table_info["datasetId"],
|
184
|
+
project_id=dest_table_info["projectId"],
|
185
|
+
table_id=dest_table_info["tableId"],
|
186
|
+
)
|
187
|
+
|
188
|
+
def get_openlineage_facets_on_complete(self, task_instance):
|
189
|
+
"""Implement on_complete as we will include final BQ job id."""
|
190
|
+
from airflow.providers.common.compat.openlineage.facet import (
|
191
|
+
Dataset,
|
192
|
+
ExternalQueryRunFacet,
|
193
|
+
)
|
194
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
195
|
+
BIGQUERY_NAMESPACE,
|
196
|
+
get_facets_from_bq_table,
|
197
|
+
get_identity_column_lineage_facet,
|
192
198
|
)
|
199
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
200
|
+
|
201
|
+
if not self.hook:
|
202
|
+
self.hook = BigQueryHook(
|
203
|
+
gcp_conn_id=self.gcp_conn_id,
|
204
|
+
location=self.location,
|
205
|
+
impersonation_chain=self.impersonation_chain,
|
206
|
+
)
|
207
|
+
|
208
|
+
if not self._job_conf:
|
209
|
+
self.log.debug("OpenLineage could not find BQ job configuration.")
|
210
|
+
return OperatorLineage()
|
211
|
+
|
212
|
+
bq_job_id = self._job_conf["jobReference"]["jobId"]
|
213
|
+
source_tables_info = self._job_conf["configuration"]["copy"]["sourceTables"]
|
214
|
+
dest_table_info = self._job_conf["configuration"]["copy"]["destinationTable"]
|
215
|
+
|
216
|
+
run_facets = {
|
217
|
+
"externalQuery": ExternalQueryRunFacet(externalQueryId=bq_job_id, source="bigquery"),
|
218
|
+
}
|
219
|
+
|
220
|
+
input_datasets = []
|
221
|
+
for in_table_info in source_tables_info:
|
222
|
+
table_id = ".".join(
|
223
|
+
(in_table_info["projectId"], in_table_info["datasetId"], in_table_info["tableId"])
|
224
|
+
)
|
225
|
+
table_object = self.hook.get_client().get_table(table_id)
|
226
|
+
input_datasets.append(
|
227
|
+
Dataset(
|
228
|
+
namespace=BIGQUERY_NAMESPACE, name=table_id, facets=get_facets_from_bq_table(table_object)
|
229
|
+
)
|
230
|
+
)
|
231
|
+
|
232
|
+
out_table_id = ".".join(
|
233
|
+
(dest_table_info["projectId"], dest_table_info["datasetId"], dest_table_info["tableId"])
|
234
|
+
)
|
235
|
+
out_table_object = self.hook.get_client().get_table(out_table_id)
|
236
|
+
output_dataset_facets = {
|
237
|
+
**get_facets_from_bq_table(out_table_object),
|
238
|
+
**get_identity_column_lineage_facet(
|
239
|
+
dest_field_names=[field.name for field in out_table_object.schema],
|
240
|
+
input_datasets=input_datasets,
|
241
|
+
),
|
242
|
+
}
|
243
|
+
output_dataset = Dataset(
|
244
|
+
namespace=BIGQUERY_NAMESPACE,
|
245
|
+
name=out_table_id,
|
246
|
+
facets=output_dataset_facets,
|
247
|
+
)
|
248
|
+
|
249
|
+
return OperatorLineage(inputs=input_datasets, outputs=[output_dataset], run_facets=run_facets)
|
@@ -19,7 +19,8 @@
|
|
19
19
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
|
-
from
|
22
|
+
from collections.abc import Sequence
|
23
|
+
from typing import TYPE_CHECKING, Any
|
23
24
|
|
24
25
|
from google.api_core.exceptions import Conflict
|
25
26
|
from google.cloud.bigquery import DEFAULT_RETRY, UnknownJob
|
@@ -294,6 +295,7 @@ class BigQueryToGCSOperator(BaseOperator):
|
|
294
295
|
from pathlib import Path
|
295
296
|
|
296
297
|
from airflow.providers.common.compat.openlineage.facet import (
|
298
|
+
BaseFacet,
|
297
299
|
Dataset,
|
298
300
|
ExternalQueryRunFacet,
|
299
301
|
Identifier,
|
@@ -322,12 +324,12 @@ class BigQueryToGCSOperator(BaseOperator):
|
|
322
324
|
facets=get_facets_from_bq_table(table_object),
|
323
325
|
)
|
324
326
|
|
325
|
-
output_dataset_facets =
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
327
|
+
output_dataset_facets: dict[str, BaseFacet] = get_identity_column_lineage_facet(
|
328
|
+
dest_field_names=[field.name for field in table_object.schema], input_datasets=[input_dataset]
|
329
|
+
)
|
330
|
+
if "schema" in input_dataset.facets:
|
331
|
+
output_dataset_facets["schema"] = input_dataset.facets["schema"]
|
332
|
+
|
331
333
|
output_datasets = []
|
332
334
|
for uri in sorted(self.destination_cloud_storage_uris):
|
333
335
|
bucket, blob = _parse_gcs_url(uri)
|
@@ -20,7 +20,8 @@
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
22
|
import warnings
|
23
|
-
from
|
23
|
+
from collections.abc import Sequence
|
24
|
+
from typing import TYPE_CHECKING
|
24
25
|
|
25
26
|
from airflow.exceptions import AirflowProviderDeprecationWarning
|
26
27
|
from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
|
@@ -20,7 +20,7 @@
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
22
|
import warnings
|
23
|
-
from
|
23
|
+
from collections.abc import Sequence
|
24
24
|
|
25
25
|
from airflow.exceptions import AirflowProviderDeprecationWarning
|
26
26
|
from airflow.providers.google.cloud.transfers.bigquery_to_sql import BigQueryToSqlBaseOperator
|
@@ -20,7 +20,8 @@
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
22
|
import abc
|
23
|
-
from
|
23
|
+
from collections.abc import Sequence
|
24
|
+
from typing import TYPE_CHECKING
|
24
25
|
|
25
26
|
from airflow.models import BaseOperator
|
26
27
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook
|
@@ -17,8 +17,9 @@
|
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
19
|
import json
|
20
|
+
from collections.abc import Sequence
|
20
21
|
from tempfile import NamedTemporaryFile
|
21
|
-
from typing import TYPE_CHECKING, Any
|
22
|
+
from typing import TYPE_CHECKING, Any
|
22
23
|
|
23
24
|
from airflow.models import BaseOperator
|
24
25
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
@@ -21,10 +21,11 @@ from __future__ import annotations
|
|
21
21
|
|
22
22
|
import json
|
23
23
|
from base64 import b64encode
|
24
|
+
from collections.abc import Iterable, Sequence
|
24
25
|
from datetime import datetime
|
25
26
|
from decimal import Decimal
|
26
27
|
from tempfile import NamedTemporaryFile
|
27
|
-
from typing import TYPE_CHECKING, Any,
|
28
|
+
from typing import TYPE_CHECKING, Any, NewType
|
28
29
|
from uuid import UUID
|
29
30
|
|
30
31
|
from cassandra.util import Date, OrderedMapSerializedKey, SortedSet, Time
|
@@ -21,8 +21,9 @@ from __future__ import annotations
|
|
21
21
|
|
22
22
|
import csv
|
23
23
|
import tempfile
|
24
|
+
from collections.abc import Sequence
|
24
25
|
from enum import Enum
|
25
|
-
from typing import TYPE_CHECKING, Any
|
26
|
+
from typing import TYPE_CHECKING, Any
|
26
27
|
|
27
28
|
from airflow.exceptions import AirflowException
|
28
29
|
from airflow.models import BaseOperator
|
@@ -20,7 +20,8 @@
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
22
|
import json
|
23
|
-
from
|
23
|
+
from collections.abc import Sequence
|
24
|
+
from typing import TYPE_CHECKING, Any
|
24
25
|
|
25
26
|
from google.api_core.exceptions import BadRequest, Conflict
|
26
27
|
from google.cloud.bigquery import (
|
@@ -784,9 +785,10 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
784
785
|
source_objects = (
|
785
786
|
self.source_objects if isinstance(self.source_objects, list) else [self.source_objects]
|
786
787
|
)
|
787
|
-
input_dataset_facets = {
|
788
|
-
|
789
|
-
|
788
|
+
input_dataset_facets = {}
|
789
|
+
if "schema" in output_dataset_facets:
|
790
|
+
input_dataset_facets["schema"] = output_dataset_facets["schema"]
|
791
|
+
|
790
792
|
input_datasets = []
|
791
793
|
for blob in sorted(source_objects):
|
792
794
|
additional_facets = {}
|
@@ -811,14 +813,16 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
811
813
|
)
|
812
814
|
input_datasets.append(dataset)
|
813
815
|
|
814
|
-
output_dataset_facets["columnLineage"] = get_identity_column_lineage_facet(
|
815
|
-
field_names=[field.name for field in table_object.schema], input_datasets=input_datasets
|
816
|
-
)
|
817
|
-
|
818
816
|
output_dataset = Dataset(
|
819
817
|
namespace="bigquery",
|
820
818
|
name=str(table_object.reference),
|
821
|
-
facets=
|
819
|
+
facets={
|
820
|
+
**output_dataset_facets,
|
821
|
+
**get_identity_column_lineage_facet(
|
822
|
+
dest_field_names=[field.name for field in table_object.schema],
|
823
|
+
input_datasets=input_datasets,
|
824
|
+
),
|
825
|
+
},
|
822
826
|
)
|
823
827
|
|
824
828
|
run_facets = {}
|