apache-airflow-providers-google 18.0.0__py3-none-any.whl → 18.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-google might be problematic. Click here for more details.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +5 -5
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/bigquery.py +45 -42
- airflow/providers/google/cloud/hooks/cloud_composer.py +131 -1
- airflow/providers/google/cloud/hooks/cloud_sql.py +88 -13
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +16 -0
- airflow/providers/google/cloud/hooks/dataflow.py +1 -1
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +3 -0
- airflow/providers/google/cloud/hooks/gcs.py +107 -3
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/looker.py +1 -1
- airflow/providers/google/cloud/hooks/spanner.py +45 -0
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +30 -0
- airflow/providers/google/cloud/links/base.py +11 -11
- airflow/providers/google/cloud/links/dataproc.py +2 -10
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +3 -1
- airflow/providers/google/cloud/operators/bigquery.py +2 -9
- airflow/providers/google/cloud/operators/cloud_run.py +2 -1
- airflow/providers/google/cloud/operators/cloud_sql.py +1 -1
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +89 -6
- airflow/providers/google/cloud/operators/datafusion.py +36 -7
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/spanner.py +22 -6
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +7 -0
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +30 -0
- airflow/providers/google/cloud/operators/workflows.py +17 -6
- airflow/providers/google/cloud/sensors/bigquery.py +1 -1
- airflow/providers/google/cloud/sensors/bigquery_dts.py +1 -6
- airflow/providers/google/cloud/sensors/bigtable.py +1 -6
- airflow/providers/google/cloud/sensors/cloud_composer.py +65 -31
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +1 -6
- airflow/providers/google/cloud/sensors/dataflow.py +1 -1
- airflow/providers/google/cloud/sensors/dataform.py +1 -6
- airflow/providers/google/cloud/sensors/datafusion.py +1 -6
- airflow/providers/google/cloud/sensors/dataplex.py +1 -6
- airflow/providers/google/cloud/sensors/dataprep.py +1 -6
- airflow/providers/google/cloud/sensors/dataproc.py +1 -6
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -6
- airflow/providers/google/cloud/sensors/gcs.py +1 -7
- airflow/providers/google/cloud/sensors/looker.py +1 -6
- airflow/providers/google/cloud/sensors/pubsub.py +1 -6
- airflow/providers/google/cloud/sensors/tasks.py +1 -6
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +1 -6
- airflow/providers/google/cloud/sensors/workflows.py +1 -6
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +2 -1
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +11 -2
- airflow/providers/google/cloud/triggers/bigquery.py +15 -3
- airflow/providers/google/cloud/triggers/cloud_composer.py +51 -21
- airflow/providers/google/cloud/triggers/cloud_run.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +90 -0
- airflow/providers/google/cloud/triggers/pubsub.py +14 -18
- airflow/providers/google/common/hooks/base_google.py +1 -1
- airflow/providers/google/get_provider_info.py +15 -0
- airflow/providers/google/leveldb/hooks/leveldb.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +2 -8
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +1 -6
- airflow/providers/google/marketing_platform/sensors/display_video.py +1 -6
- airflow/providers/google/suite/sensors/drive.py +1 -6
- airflow/providers/google/version_compat.py +0 -20
- {apache_airflow_providers_google-18.0.0.dist-info → apache_airflow_providers_google-18.1.0rc1.dist-info}/METADATA +15 -15
- {apache_airflow_providers_google-18.0.0.dist-info → apache_airflow_providers_google-18.1.0rc1.dist-info}/RECORD +72 -65
- {apache_airflow_providers_google-18.0.0.dist-info → apache_airflow_providers_google-18.1.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-18.0.0.dist-info → apache_airflow_providers_google-18.1.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "18.
|
|
32
|
+
__version__ = "18.1.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.10.0"
|
|
@@ -28,14 +28,14 @@ from google.ads.googleads.errors import GoogleAdsException
|
|
|
28
28
|
from google.auth.exceptions import GoogleAuthError
|
|
29
29
|
|
|
30
30
|
from airflow.exceptions import AirflowException
|
|
31
|
+
from airflow.providers.common.compat.sdk import BaseHook
|
|
31
32
|
from airflow.providers.google.common.hooks.base_google import get_field
|
|
32
|
-
from airflow.providers.google.version_compat import BaseHook
|
|
33
33
|
|
|
34
34
|
if TYPE_CHECKING:
|
|
35
|
-
from google.ads.googleads.
|
|
36
|
-
from google.ads.googleads.
|
|
37
|
-
from google.ads.googleads.
|
|
38
|
-
from google.ads.googleads.
|
|
35
|
+
from google.ads.googleads.v21.services.services.customer_service import CustomerServiceClient
|
|
36
|
+
from google.ads.googleads.v21.services.services.google_ads_service import GoogleAdsServiceClient
|
|
37
|
+
from google.ads.googleads.v21.services.services.google_ads_service.pagers import SearchPager
|
|
38
|
+
from google.ads.googleads.v21.services.types.google_ads_service import GoogleAdsRow
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class GoogleAdsHook(BaseHook):
|
|
@@ -18,23 +18,13 @@ from __future__ import annotations
|
|
|
18
18
|
|
|
19
19
|
from typing import TYPE_CHECKING
|
|
20
20
|
|
|
21
|
+
from airflow.providers.common.compat.sdk import Asset
|
|
21
22
|
from airflow.providers.google.cloud.hooks.gcs import _parse_gcs_url
|
|
22
|
-
from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
|
|
23
23
|
|
|
24
24
|
if TYPE_CHECKING:
|
|
25
25
|
from urllib.parse import SplitResult
|
|
26
26
|
|
|
27
|
-
from airflow.providers.common.compat.assets import Asset
|
|
28
27
|
from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset
|
|
29
|
-
else:
|
|
30
|
-
try:
|
|
31
|
-
from airflow.providers.common.compat.assets import Asset
|
|
32
|
-
except ImportError:
|
|
33
|
-
if AIRFLOW_V_3_0_PLUS:
|
|
34
|
-
from airflow.sdk.definitions.asset import Asset
|
|
35
|
-
else:
|
|
36
|
-
# dataset is renamed to asset since Airflow 3.0
|
|
37
|
-
from airflow.datasets import Dataset as Asset
|
|
38
28
|
|
|
39
29
|
|
|
40
30
|
def create_asset(*, bucket: str, key: str, extra: dict | None = None) -> Asset:
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import os
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
import structlog
|
|
23
|
+
from google.api_core.exceptions import NotFound
|
|
24
|
+
|
|
25
|
+
from airflow.dag_processing.bundles.base import BaseDagBundle
|
|
26
|
+
from airflow.exceptions import AirflowException
|
|
27
|
+
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
|
28
|
+
from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class GCSDagBundle(BaseDagBundle):
|
|
32
|
+
"""
|
|
33
|
+
GCS Dag bundle - exposes a directory in GCS as a Dag bundle.
|
|
34
|
+
|
|
35
|
+
This allows Airflow to load Dags directly from a GCS bucket.
|
|
36
|
+
|
|
37
|
+
:param gcp_conn_id: Airflow connection ID for GCS. Defaults to GoogleBaseHook.default_conn_name.
|
|
38
|
+
:param bucket_name: The name of the GCS bucket containing the Dag files.
|
|
39
|
+
:param prefix: Optional subdirectory within the GCS bucket where the Dags are stored.
|
|
40
|
+
If None, Dags are assumed to be at the root of the bucket (Optional).
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
supports_versioning = False
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
*,
|
|
48
|
+
gcp_conn_id: str = GoogleBaseHook.default_conn_name,
|
|
49
|
+
bucket_name: str,
|
|
50
|
+
prefix: str = "",
|
|
51
|
+
**kwargs,
|
|
52
|
+
) -> None:
|
|
53
|
+
super().__init__(**kwargs)
|
|
54
|
+
self.gcp_conn_id = gcp_conn_id
|
|
55
|
+
self.bucket_name = bucket_name
|
|
56
|
+
self.prefix = prefix
|
|
57
|
+
# Local path where GCS Dags are downloaded
|
|
58
|
+
self.gcs_dags_dir: Path = self.base_dir
|
|
59
|
+
|
|
60
|
+
log = structlog.get_logger(__name__)
|
|
61
|
+
self._log = log.bind(
|
|
62
|
+
bundle_name=self.name,
|
|
63
|
+
version=self.version,
|
|
64
|
+
bucket_name=self.bucket_name,
|
|
65
|
+
prefix=self.prefix,
|
|
66
|
+
gcp_conn_id=self.gcp_conn_id,
|
|
67
|
+
)
|
|
68
|
+
self._gcs_hook: GCSHook | None = None
|
|
69
|
+
|
|
70
|
+
def _initialize(self):
|
|
71
|
+
with self.lock():
|
|
72
|
+
if not self.gcs_dags_dir.exists():
|
|
73
|
+
self._log.info("Creating local Dags directory: %s", self.gcs_dags_dir)
|
|
74
|
+
os.makedirs(self.gcs_dags_dir)
|
|
75
|
+
|
|
76
|
+
if not self.gcs_dags_dir.is_dir():
|
|
77
|
+
raise NotADirectoryError(f"Local Dags path: {self.gcs_dags_dir} is not a directory.")
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
self.gcs_hook.get_bucket(bucket_name=self.bucket_name)
|
|
81
|
+
except NotFound:
|
|
82
|
+
raise ValueError(f"GCS bucket '{self.bucket_name}' does not exist.")
|
|
83
|
+
|
|
84
|
+
if self.prefix:
|
|
85
|
+
# don't check when prefix is ""
|
|
86
|
+
if not self.gcs_hook.list(bucket_name=self.bucket_name, prefix=self.prefix):
|
|
87
|
+
raise ValueError(f"GCS prefix 'gs://{self.bucket_name}/{self.prefix}' does not exist.")
|
|
88
|
+
self.refresh()
|
|
89
|
+
|
|
90
|
+
def initialize(self) -> None:
|
|
91
|
+
self._initialize()
|
|
92
|
+
super().initialize()
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def gcs_hook(self):
|
|
96
|
+
if self._gcs_hook is None:
|
|
97
|
+
try:
|
|
98
|
+
self._gcs_hook: GCSHook = GCSHook(gcp_conn_id=self.gcp_conn_id) # Initialize GCS hook.
|
|
99
|
+
except AirflowException as e:
|
|
100
|
+
self._log.warning("Could not create GCSHook for connection %s: %s", self.gcp_conn_id, e)
|
|
101
|
+
return self._gcs_hook
|
|
102
|
+
|
|
103
|
+
def __repr__(self):
|
|
104
|
+
return (
|
|
105
|
+
f"<GCSDagBundle("
|
|
106
|
+
f"name={self.name!r}, "
|
|
107
|
+
f"bucket_name={self.bucket_name!r}, "
|
|
108
|
+
f"prefix={self.prefix!r}, "
|
|
109
|
+
f"version={self.version!r}"
|
|
110
|
+
f")>"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def get_current_version(self) -> str | None:
|
|
114
|
+
"""Return the current version of the Dag bundle. Currently not supported."""
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def path(self) -> Path:
|
|
119
|
+
"""Return the local path to the Dag files."""
|
|
120
|
+
return self.gcs_dags_dir # Path where Dags are downloaded.
|
|
121
|
+
|
|
122
|
+
def refresh(self) -> None:
|
|
123
|
+
"""Refresh the Dag bundle by re-downloading the Dags from GCS."""
|
|
124
|
+
if self.version:
|
|
125
|
+
raise ValueError("Refreshing a specific version is not supported")
|
|
126
|
+
|
|
127
|
+
with self.lock():
|
|
128
|
+
self._log.debug(
|
|
129
|
+
"Downloading Dags from gs://%s/%s to %s", self.bucket_name, self.prefix, self.gcs_dags_dir
|
|
130
|
+
)
|
|
131
|
+
self.gcs_hook.sync_to_local_dir(
|
|
132
|
+
bucket_name=self.bucket_name,
|
|
133
|
+
prefix=self.prefix,
|
|
134
|
+
local_dir=self.gcs_dags_dir,
|
|
135
|
+
delete_stale=True,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def view_url(self, version: str | None = None) -> str | None:
|
|
139
|
+
"""
|
|
140
|
+
Return a URL for viewing the Dags in GCS. Currently, versioning is not supported.
|
|
141
|
+
|
|
142
|
+
This method is deprecated and will be removed when the minimum supported Airflow version is 3.1.
|
|
143
|
+
Use `view_url_template` instead.
|
|
144
|
+
"""
|
|
145
|
+
return self.view_url_template()
|
|
146
|
+
|
|
147
|
+
def view_url_template(self) -> str | None:
|
|
148
|
+
"""Return a URL for viewing the Dags in GCS. Currently, versioning is not supported."""
|
|
149
|
+
if self.version:
|
|
150
|
+
raise ValueError("GCS url with version is not supported")
|
|
151
|
+
if hasattr(self, "_view_url_template") and self._view_url_template:
|
|
152
|
+
# Because we use this method in the view_url method, we need to handle
|
|
153
|
+
# backward compatibility for Airflow versions that doesn't have the
|
|
154
|
+
# _view_url_template attribute. Should be removed when we drop support for Airflow 3.0
|
|
155
|
+
return self._view_url_template
|
|
156
|
+
# https://console.cloud.google.com/storage/browser/<bucket-name>/<prefix>
|
|
157
|
+
url = f"https://console.cloud.google.com/storage/browser/{self.bucket_name}"
|
|
158
|
+
if self.prefix:
|
|
159
|
+
url += f"/{self.prefix}"
|
|
160
|
+
|
|
161
|
+
return url
|
|
@@ -20,17 +20,18 @@
|
|
|
20
20
|
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
|
-
import asyncio
|
|
24
23
|
import json
|
|
25
24
|
import logging
|
|
26
25
|
import re
|
|
27
26
|
import time
|
|
28
27
|
import uuid
|
|
28
|
+
import warnings
|
|
29
29
|
from collections.abc import Iterable, Mapping, Sequence
|
|
30
30
|
from copy import deepcopy
|
|
31
31
|
from datetime import datetime, timedelta
|
|
32
32
|
from typing import TYPE_CHECKING, Any, Literal, NoReturn, cast, overload
|
|
33
33
|
|
|
34
|
+
import pendulum
|
|
34
35
|
from aiohttp import ClientSession as ClientSession
|
|
35
36
|
from gcloud.aio.bigquery import Job, Table as Table_async
|
|
36
37
|
from google.cloud.bigquery import (
|
|
@@ -75,6 +76,7 @@ from airflow.providers.google.common.hooks.base_google import (
|
|
|
75
76
|
GoogleBaseHook,
|
|
76
77
|
get_field,
|
|
77
78
|
)
|
|
79
|
+
from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
|
|
78
80
|
from airflow.utils.hashlib_wrapper import md5
|
|
79
81
|
from airflow.utils.helpers import convert_camel_to_snake
|
|
80
82
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
@@ -86,6 +88,8 @@ if TYPE_CHECKING:
|
|
|
86
88
|
from google.api_core.retry import Retry
|
|
87
89
|
from requests import Session
|
|
88
90
|
|
|
91
|
+
from airflow.sdk import Context
|
|
92
|
+
|
|
89
93
|
log = logging.getLogger(__name__)
|
|
90
94
|
|
|
91
95
|
BigQueryJob = CopyJob | QueryJob | LoadJob | ExtractJob
|
|
@@ -1274,7 +1278,16 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
|
1274
1278
|
job_api_repr.result(timeout=timeout, retry=retry)
|
|
1275
1279
|
return job_api_repr
|
|
1276
1280
|
|
|
1277
|
-
def generate_job_id(
|
|
1281
|
+
def generate_job_id(
|
|
1282
|
+
self,
|
|
1283
|
+
job_id: str | None,
|
|
1284
|
+
dag_id: str,
|
|
1285
|
+
task_id: str,
|
|
1286
|
+
logical_date: datetime | None,
|
|
1287
|
+
configuration: dict,
|
|
1288
|
+
run_after: pendulum.DateTime | None = None,
|
|
1289
|
+
force_rerun: bool = False,
|
|
1290
|
+
) -> str:
|
|
1278
1291
|
if force_rerun:
|
|
1279
1292
|
hash_base = str(uuid.uuid4())
|
|
1280
1293
|
else:
|
|
@@ -1285,10 +1298,35 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
|
1285
1298
|
if job_id:
|
|
1286
1299
|
return f"{job_id}_{uniqueness_suffix}"
|
|
1287
1300
|
|
|
1288
|
-
|
|
1289
|
-
|
|
1301
|
+
if logical_date is not None:
|
|
1302
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
1303
|
+
warnings.warn(
|
|
1304
|
+
"The 'logical_date' parameter is deprecated. Please use 'run_after' instead.",
|
|
1305
|
+
AirflowProviderDeprecationWarning,
|
|
1306
|
+
stacklevel=1,
|
|
1307
|
+
)
|
|
1308
|
+
job_id_timestamp = logical_date
|
|
1309
|
+
elif run_after is not None:
|
|
1310
|
+
job_id_timestamp = run_after
|
|
1311
|
+
else:
|
|
1312
|
+
job_id_timestamp = pendulum.now("UTC")
|
|
1313
|
+
|
|
1314
|
+
job_id = f"airflow_{dag_id}_{task_id}_{job_id_timestamp.isoformat()}_{uniqueness_suffix}"
|
|
1290
1315
|
return re.sub(r"[:\-+.]", "_", job_id)
|
|
1291
1316
|
|
|
1317
|
+
def get_run_after_or_logical_date(self, context: Context) -> pendulum.DateTime:
|
|
1318
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
1319
|
+
if dag_run := context.get("dag_run"):
|
|
1320
|
+
run_after = pendulum.instance(dag_run.run_after)
|
|
1321
|
+
else:
|
|
1322
|
+
run_after = pendulum.now("UTC")
|
|
1323
|
+
else:
|
|
1324
|
+
if logical_date := context.get("logical_date"):
|
|
1325
|
+
run_after = logical_date
|
|
1326
|
+
else:
|
|
1327
|
+
run_after = pendulum.now("UTC")
|
|
1328
|
+
return run_after
|
|
1329
|
+
|
|
1292
1330
|
def split_tablename(
|
|
1293
1331
|
self, table_input: str, default_project_id: str, var_name: str | None = None
|
|
1294
1332
|
) -> tuple[str, str, str]:
|
|
@@ -1975,46 +2013,11 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
|
1975
2013
|
async def _get_job(
|
|
1976
2014
|
self, job_id: str | None, project_id: str = PROVIDE_PROJECT_ID, location: str | None = None
|
|
1977
2015
|
) -> BigQueryJob | UnknownJob:
|
|
1978
|
-
"""
|
|
1979
|
-
|
|
1980
|
-
|
|
1981
|
-
WARNING.
|
|
1982
|
-
This is a temporary workaround for issues below, and it's not intended to be used elsewhere!
|
|
1983
|
-
https://github.com/apache/airflow/issues/35833
|
|
1984
|
-
https://github.com/talkiq/gcloud-aio/issues/584
|
|
1985
|
-
|
|
1986
|
-
This method was developed, because neither the `google-cloud-bigquery` nor the `gcloud-aio-bigquery`
|
|
1987
|
-
provides asynchronous access to a BigQuery jobs with location parameter. That's why this method wraps
|
|
1988
|
-
synchronous client call with the event loop's run_in_executor() method.
|
|
1989
|
-
|
|
1990
|
-
This workaround must be deleted along with the method _get_job_sync() and replaced by more robust and
|
|
1991
|
-
cleaner solution in one of two cases:
|
|
1992
|
-
1. The `google-cloud-bigquery` library provides async client with get_job method, that supports
|
|
1993
|
-
optional parameter `location`
|
|
1994
|
-
2. The `gcloud-aio-bigquery` library supports the `location` parameter in get_job() method.
|
|
1995
|
-
"""
|
|
1996
|
-
loop = asyncio.get_event_loop()
|
|
1997
|
-
job = await loop.run_in_executor(None, self._get_job_sync, job_id, project_id, location)
|
|
2016
|
+
"""Get BigQuery job by its ID, project ID and location."""
|
|
2017
|
+
sync_hook = await self.get_sync_hook()
|
|
2018
|
+
job = sync_hook.get_job(job_id=job_id, project_id=project_id, location=location)
|
|
1998
2019
|
return job
|
|
1999
2020
|
|
|
2000
|
-
def _get_job_sync(self, job_id, project_id, location):
|
|
2001
|
-
"""
|
|
2002
|
-
Get BigQuery job by its ID, project ID and location synchronously.
|
|
2003
|
-
|
|
2004
|
-
WARNING
|
|
2005
|
-
This is a temporary workaround for issues below, and it's not intended to be used elsewhere!
|
|
2006
|
-
https://github.com/apache/airflow/issues/35833
|
|
2007
|
-
https://github.com/talkiq/gcloud-aio/issues/584
|
|
2008
|
-
|
|
2009
|
-
This workaround must be deleted along with the method _get_job() and replaced by more robust and
|
|
2010
|
-
cleaner solution in one of two cases:
|
|
2011
|
-
1. The `google-cloud-bigquery` library provides async client with get_job method, that supports
|
|
2012
|
-
optional parameter `location`
|
|
2013
|
-
2. The `gcloud-aio-bigquery` library supports the `location` parameter in get_job() method.
|
|
2014
|
-
"""
|
|
2015
|
-
hook = BigQueryHook(**self._hook_kwargs)
|
|
2016
|
-
return hook.get_job(job_id=job_id, project_id=project_id, location=location)
|
|
2017
|
-
|
|
2018
2021
|
async def get_job_status(
|
|
2019
2022
|
self, job_id: str | None, project_id: str = PROVIDE_PROJECT_ID, location: str | None = None
|
|
2020
2023
|
) -> dict[str, str]:
|
|
@@ -24,9 +24,10 @@ from collections.abc import MutableSequence, Sequence
|
|
|
24
24
|
from typing import TYPE_CHECKING, Any
|
|
25
25
|
from urllib.parse import urljoin
|
|
26
26
|
|
|
27
|
+
from aiohttp import ClientSession
|
|
27
28
|
from google.api_core.client_options import ClientOptions
|
|
28
29
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
29
|
-
from google.auth.transport.requests import AuthorizedSession
|
|
30
|
+
from google.auth.transport.requests import AuthorizedSession, Request
|
|
30
31
|
from google.cloud.orchestration.airflow.service_v1 import (
|
|
31
32
|
EnvironmentsAsyncClient,
|
|
32
33
|
EnvironmentsClient,
|
|
@@ -472,6 +473,38 @@ class CloudComposerHook(GoogleBaseHook, OperationHelper):
|
|
|
472
473
|
|
|
473
474
|
return response.json()
|
|
474
475
|
|
|
476
|
+
def get_dag_runs(
|
|
477
|
+
self,
|
|
478
|
+
composer_airflow_uri: str,
|
|
479
|
+
composer_dag_id: str,
|
|
480
|
+
timeout: float | None = None,
|
|
481
|
+
) -> dict:
|
|
482
|
+
"""
|
|
483
|
+
Get the list of dag runs for provided DAG.
|
|
484
|
+
|
|
485
|
+
:param composer_airflow_uri: The URI of the Apache Airflow Web UI hosted within Composer environment.
|
|
486
|
+
:param composer_dag_id: The ID of DAG.
|
|
487
|
+
:param timeout: The timeout for this request.
|
|
488
|
+
"""
|
|
489
|
+
response = self.make_composer_airflow_api_request(
|
|
490
|
+
method="GET",
|
|
491
|
+
airflow_uri=composer_airflow_uri,
|
|
492
|
+
path=f"/api/v1/dags/{composer_dag_id}/dagRuns",
|
|
493
|
+
timeout=timeout,
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
if response.status_code != 200:
|
|
497
|
+
self.log.error(
|
|
498
|
+
"Failed to get DAG runs for dag_id=%s from %s (status=%s): %s",
|
|
499
|
+
composer_dag_id,
|
|
500
|
+
composer_airflow_uri,
|
|
501
|
+
response.status_code,
|
|
502
|
+
response.text,
|
|
503
|
+
)
|
|
504
|
+
response.raise_for_status()
|
|
505
|
+
|
|
506
|
+
return response.json()
|
|
507
|
+
|
|
475
508
|
|
|
476
509
|
class CloudComposerAsyncHook(GoogleBaseAsyncHook):
|
|
477
510
|
"""Hook for Google Cloud Composer async APIs."""
|
|
@@ -489,6 +522,42 @@ class CloudComposerAsyncHook(GoogleBaseAsyncHook):
|
|
|
489
522
|
client_options=self.client_options,
|
|
490
523
|
)
|
|
491
524
|
|
|
525
|
+
async def make_composer_airflow_api_request(
|
|
526
|
+
self,
|
|
527
|
+
method: str,
|
|
528
|
+
airflow_uri: str,
|
|
529
|
+
path: str,
|
|
530
|
+
data: Any | None = None,
|
|
531
|
+
timeout: float | None = None,
|
|
532
|
+
):
|
|
533
|
+
"""
|
|
534
|
+
Make a request to Cloud Composer environment's web server.
|
|
535
|
+
|
|
536
|
+
:param method: The request method to use ('GET', 'OPTIONS', 'HEAD', 'POST', 'PUT', 'PATCH', 'DELETE').
|
|
537
|
+
:param airflow_uri: The URI of the Apache Airflow Web UI hosted within this environment.
|
|
538
|
+
:param path: The path to send the request.
|
|
539
|
+
:param data: Dictionary, list of tuples, bytes, or file-like object to send in the body of the request.
|
|
540
|
+
:param timeout: The timeout for this request.
|
|
541
|
+
"""
|
|
542
|
+
sync_hook = await self.get_sync_hook()
|
|
543
|
+
credentials = sync_hook.get_credentials()
|
|
544
|
+
|
|
545
|
+
if not credentials.valid:
|
|
546
|
+
credentials.refresh(Request())
|
|
547
|
+
|
|
548
|
+
async with ClientSession() as session:
|
|
549
|
+
async with session.request(
|
|
550
|
+
method=method,
|
|
551
|
+
url=urljoin(airflow_uri, path),
|
|
552
|
+
data=data,
|
|
553
|
+
headers={
|
|
554
|
+
"Content-Type": "application/json",
|
|
555
|
+
"Authorization": f"Bearer {credentials.token}",
|
|
556
|
+
},
|
|
557
|
+
timeout=timeout,
|
|
558
|
+
) as response:
|
|
559
|
+
return await response.json(), response.status
|
|
560
|
+
|
|
492
561
|
def get_environment_name(self, project_id, region, environment_id):
|
|
493
562
|
return f"projects/{project_id}/locations/{region}/environments/{environment_id}"
|
|
494
563
|
|
|
@@ -594,6 +663,35 @@ class CloudComposerAsyncHook(GoogleBaseAsyncHook):
|
|
|
594
663
|
metadata=metadata,
|
|
595
664
|
)
|
|
596
665
|
|
|
666
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
|
667
|
+
async def get_environment(
|
|
668
|
+
self,
|
|
669
|
+
project_id: str,
|
|
670
|
+
region: str,
|
|
671
|
+
environment_id: str,
|
|
672
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
|
673
|
+
timeout: float | None = None,
|
|
674
|
+
metadata: Sequence[tuple[str, str]] = (),
|
|
675
|
+
) -> Environment:
|
|
676
|
+
"""
|
|
677
|
+
Get an existing environment.
|
|
678
|
+
|
|
679
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
|
680
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
|
681
|
+
:param environment_id: Required. The ID of the Google Cloud environment that the service belongs to.
|
|
682
|
+
:param retry: Designation of what errors, if any, should be retried.
|
|
683
|
+
:param timeout: The timeout for this request.
|
|
684
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
|
685
|
+
"""
|
|
686
|
+
client = await self.get_environment_client()
|
|
687
|
+
|
|
688
|
+
return await client.get_environment(
|
|
689
|
+
request={"name": self.get_environment_name(project_id, region, environment_id)},
|
|
690
|
+
retry=retry,
|
|
691
|
+
timeout=timeout,
|
|
692
|
+
metadata=metadata,
|
|
693
|
+
)
|
|
694
|
+
|
|
597
695
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
598
696
|
async def execute_airflow_command(
|
|
599
697
|
self,
|
|
@@ -719,3 +817,35 @@ class CloudComposerAsyncHook(GoogleBaseAsyncHook):
|
|
|
719
817
|
|
|
720
818
|
self.log.info("Sleeping for %s seconds.", poll_interval)
|
|
721
819
|
await asyncio.sleep(poll_interval)
|
|
820
|
+
|
|
821
|
+
async def get_dag_runs(
|
|
822
|
+
self,
|
|
823
|
+
composer_airflow_uri: str,
|
|
824
|
+
composer_dag_id: str,
|
|
825
|
+
timeout: float | None = None,
|
|
826
|
+
) -> dict:
|
|
827
|
+
"""
|
|
828
|
+
Get the list of dag runs for provided DAG.
|
|
829
|
+
|
|
830
|
+
:param composer_airflow_uri: The URI of the Apache Airflow Web UI hosted within Composer environment.
|
|
831
|
+
:param composer_dag_id: The ID of DAG.
|
|
832
|
+
:param timeout: The timeout for this request.
|
|
833
|
+
"""
|
|
834
|
+
response_body, response_status_code = await self.make_composer_airflow_api_request(
|
|
835
|
+
method="GET",
|
|
836
|
+
airflow_uri=composer_airflow_uri,
|
|
837
|
+
path=f"/api/v1/dags/{composer_dag_id}/dagRuns",
|
|
838
|
+
timeout=timeout,
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
if response_status_code != 200:
|
|
842
|
+
self.log.error(
|
|
843
|
+
"Failed to get DAG runs for dag_id=%s from %s (status=%s): %s",
|
|
844
|
+
composer_dag_id,
|
|
845
|
+
composer_airflow_uri,
|
|
846
|
+
response_status_code,
|
|
847
|
+
response_body["title"],
|
|
848
|
+
)
|
|
849
|
+
raise AirflowException(response_body["title"])
|
|
850
|
+
|
|
851
|
+
return response_body
|