apache-airflow-providers-google 14.0.0__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/LICENSES.txt +14 -0
- airflow/providers/google/3rd-party-licenses/NOTICE +5 -0
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/_vendor/__init__.py +0 -0
- airflow/providers/google/_vendor/json_merge_patch.py +91 -0
- airflow/providers/google/ads/hooks/ads.py +52 -43
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +3 -19
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +3 -2
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/alloy_db.py +2 -3
- airflow/providers/google/cloud/hooks/bigquery.py +195 -318
- airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
- airflow/providers/google/cloud/hooks/bigtable.py +3 -2
- airflow/providers/google/cloud/hooks/cloud_batch.py +8 -9
- airflow/providers/google/cloud/hooks/cloud_build.py +6 -65
- airflow/providers/google/cloud/hooks/cloud_composer.py +292 -24
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +4 -3
- airflow/providers/google/cloud/hooks/cloud_run.py +20 -11
- airflow/providers/google/cloud/hooks/cloud_sql.py +136 -64
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +35 -15
- airflow/providers/google/cloud/hooks/compute.py +7 -6
- airflow/providers/google/cloud/hooks/compute_ssh.py +7 -4
- airflow/providers/google/cloud/hooks/datacatalog.py +12 -3
- airflow/providers/google/cloud/hooks/dataflow.py +87 -242
- airflow/providers/google/cloud/hooks/dataform.py +9 -14
- airflow/providers/google/cloud/hooks/datafusion.py +7 -9
- airflow/providers/google/cloud/hooks/dataplex.py +13 -12
- airflow/providers/google/cloud/hooks/dataprep.py +2 -2
- airflow/providers/google/cloud/hooks/dataproc.py +76 -74
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +4 -3
- airflow/providers/google/cloud/hooks/dlp.py +5 -4
- airflow/providers/google/cloud/hooks/gcs.py +144 -33
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kms.py +3 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -17
- airflow/providers/google/cloud/hooks/looker.py +6 -1
- airflow/providers/google/cloud/hooks/managed_kafka.py +227 -3
- airflow/providers/google/cloud/hooks/mlengine.py +7 -8
- airflow/providers/google/cloud/hooks/natural_language.py +3 -2
- airflow/providers/google/cloud/hooks/os_login.py +3 -2
- airflow/providers/google/cloud/hooks/pubsub.py +6 -6
- airflow/providers/google/cloud/hooks/secret_manager.py +105 -12
- airflow/providers/google/cloud/hooks/spanner.py +75 -10
- airflow/providers/google/cloud/hooks/speech_to_text.py +3 -2
- airflow/providers/google/cloud/hooks/stackdriver.py +18 -18
- airflow/providers/google/cloud/hooks/tasks.py +4 -3
- airflow/providers/google/cloud/hooks/text_to_speech.py +3 -2
- airflow/providers/google/cloud/hooks/translate.py +8 -17
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -222
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +9 -15
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +33 -283
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +5 -12
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +6 -12
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +311 -10
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +7 -13
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +8 -12
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +6 -12
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +3 -2
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/video_intelligence.py +3 -2
- airflow/providers/google/cloud/hooks/vision.py +7 -7
- airflow/providers/google/cloud/hooks/workflows.py +4 -3
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -7
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -46
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -90
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -89
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +11 -61
- airflow/providers/google/cloud/links/managed_kafka.py +11 -51
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +166 -118
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +14 -9
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +141 -40
- airflow/providers/google/cloud/openlineage/mixins.py +14 -13
- airflow/providers/google/cloud/openlineage/utils.py +19 -3
- airflow/providers/google/cloud/operators/alloy_db.py +76 -61
- airflow/providers/google/cloud/operators/bigquery.py +104 -667
- airflow/providers/google/cloud/operators/bigquery_dts.py +12 -12
- airflow/providers/google/cloud/operators/bigtable.py +38 -7
- airflow/providers/google/cloud/operators/cloud_base.py +22 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +18 -18
- airflow/providers/google/cloud/operators/cloud_build.py +80 -36
- airflow/providers/google/cloud/operators/cloud_composer.py +157 -71
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +74 -46
- airflow/providers/google/cloud/operators/cloud_run.py +39 -20
- airflow/providers/google/cloud/operators/cloud_sql.py +46 -61
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -14
- airflow/providers/google/cloud/operators/compute.py +18 -50
- airflow/providers/google/cloud/operators/datacatalog.py +167 -29
- airflow/providers/google/cloud/operators/dataflow.py +38 -15
- airflow/providers/google/cloud/operators/dataform.py +19 -7
- airflow/providers/google/cloud/operators/datafusion.py +43 -43
- airflow/providers/google/cloud/operators/dataplex.py +212 -126
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +134 -207
- airflow/providers/google/cloud/operators/dataproc_metastore.py +102 -84
- airflow/providers/google/cloud/operators/datastore.py +22 -6
- airflow/providers/google/cloud/operators/dlp.py +24 -45
- airflow/providers/google/cloud/operators/functions.py +21 -14
- airflow/providers/google/cloud/operators/gcs.py +15 -12
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +115 -106
- airflow/providers/google/cloud/operators/looker.py +1 -1
- airflow/providers/google/cloud/operators/managed_kafka.py +362 -40
- airflow/providers/google/cloud/operators/natural_language.py +5 -3
- airflow/providers/google/cloud/operators/pubsub.py +69 -21
- airflow/providers/google/cloud/operators/spanner.py +53 -45
- airflow/providers/google/cloud/operators/speech_to_text.py +5 -4
- airflow/providers/google/cloud/operators/stackdriver.py +5 -11
- airflow/providers/google/cloud/operators/tasks.py +6 -15
- airflow/providers/google/cloud/operators/text_to_speech.py +4 -3
- airflow/providers/google/cloud/operators/translate.py +46 -20
- airflow/providers/google/cloud/operators/translate_speech.py +4 -3
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +44 -34
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +34 -12
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +62 -53
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +75 -11
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +48 -12
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +16 -12
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +62 -14
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +35 -10
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +5 -3
- airflow/providers/google/cloud/operators/vision.py +7 -5
- airflow/providers/google/cloud/operators/workflows.py +24 -19
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery.py +2 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -4
- airflow/providers/google/cloud/sensors/bigtable.py +14 -6
- airflow/providers/google/cloud/sensors/cloud_composer.py +535 -33
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -5
- airflow/providers/google/cloud/sensors/dataflow.py +27 -10
- airflow/providers/google/cloud/sensors/dataform.py +2 -2
- airflow/providers/google/cloud/sensors/datafusion.py +4 -4
- airflow/providers/google/cloud/sensors/dataplex.py +7 -5
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +10 -9
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +4 -3
- airflow/providers/google/cloud/sensors/gcs.py +22 -21
- airflow/providers/google/cloud/sensors/looker.py +5 -5
- airflow/providers/google/cloud/sensors/pubsub.py +20 -20
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
- airflow/providers/google/cloud/sensors/workflows.py +6 -4
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +14 -13
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +18 -22
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -5
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +45 -38
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +44 -12
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +36 -14
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +75 -34
- airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_build.py +3 -2
- airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +96 -5
- airflow/providers/google/cloud/triggers/dataflow.py +125 -2
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +16 -3
- airflow/providers/google/cloud/triggers/dataproc.py +124 -53
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +46 -28
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +17 -20
- airflow/providers/google/cloud/triggers/vertex_ai.py +8 -7
- airflow/providers/google/cloud/utils/bigquery.py +5 -7
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +4 -3
- airflow/providers/google/cloud/utils/dataform.py +1 -1
- airflow/providers/google/cloud/utils/external_token_supplier.py +0 -1
- airflow/providers/google/cloud/utils/field_validator.py +1 -2
- airflow/providers/google/cloud/utils/validators.py +43 -0
- airflow/providers/google/common/auth_backend/google_openid.py +26 -9
- airflow/providers/google/common/consts.py +2 -1
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +40 -43
- airflow/providers/google/common/hooks/operation_helpers.py +78 -0
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +4 -5
- airflow/providers/google/firebase/operators/firestore.py +2 -2
- airflow/providers/google/get_provider_info.py +61 -216
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +30 -6
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +3 -2
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +4 -5
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +7 -6
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +4 -64
- airflow/providers/google/suite/hooks/calendar.py +1 -1
- airflow/providers/google/suite/hooks/drive.py +2 -2
- airflow/providers/google/suite/hooks/sheets.py +15 -1
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +117 -72
- apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +1 -1
- apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/example_dags/example_cloud_task.py +0 -54
- airflow/providers/google/cloud/hooks/automl.py +0 -679
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1360
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -1515
- airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
- apache_airflow_providers_google-14.0.0.dist-info/RECORD +0 -318
- /airflow/providers/google/cloud/{example_dags → bundles}/__init__.py +0 -0
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
|
@@ -26,19 +26,26 @@ import os
|
|
|
26
26
|
import shutil
|
|
27
27
|
import time
|
|
28
28
|
import warnings
|
|
29
|
-
from collections.abc import Generator, Sequence
|
|
29
|
+
from collections.abc import Callable, Generator, Sequence
|
|
30
30
|
from contextlib import contextmanager
|
|
31
|
+
from datetime import datetime
|
|
31
32
|
from functools import partial
|
|
32
33
|
from io import BytesIO
|
|
34
|
+
from pathlib import Path
|
|
33
35
|
from tempfile import NamedTemporaryFile
|
|
34
|
-
from typing import IO, TYPE_CHECKING, Any,
|
|
36
|
+
from typing import IO, TYPE_CHECKING, Any, ParamSpec, TypeVar, cast, overload
|
|
35
37
|
from urllib.parse import urlsplit
|
|
36
38
|
|
|
39
|
+
# Make mypy happy by importing as aliases
|
|
40
|
+
import google.cloud.storage as storage
|
|
37
41
|
from gcloud.aio.storage import Storage
|
|
38
|
-
from
|
|
42
|
+
from google.api_core.exceptions import GoogleAPICallError, NotFound
|
|
43
|
+
from google.cloud.exceptions import GoogleCloudError
|
|
44
|
+
from google.cloud.storage.retry import DEFAULT_RETRY
|
|
39
45
|
|
|
40
46
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
|
41
47
|
from airflow.providers.common.compat.lineage.hook import get_hook_lineage_collector
|
|
48
|
+
from airflow.providers.common.compat.sdk import timezone
|
|
42
49
|
from airflow.providers.google.cloud.utils.helpers import normalize_directory_path
|
|
43
50
|
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
44
51
|
from airflow.providers.google.common.hooks.base_google import (
|
|
@@ -46,23 +53,13 @@ from airflow.providers.google.common.hooks.base_google import (
|
|
|
46
53
|
GoogleBaseAsyncHook,
|
|
47
54
|
GoogleBaseHook,
|
|
48
55
|
)
|
|
49
|
-
from airflow.typing_compat import ParamSpec
|
|
50
|
-
from airflow.utils import timezone
|
|
51
56
|
from airflow.version import version
|
|
52
|
-
from google.api_core.exceptions import GoogleAPICallError, NotFound
|
|
53
|
-
|
|
54
|
-
# not sure why but mypy complains on missing `storage` but it is clearly there and is importable
|
|
55
|
-
from google.cloud import storage # type: ignore[attr-defined]
|
|
56
|
-
from google.cloud.exceptions import GoogleCloudError
|
|
57
|
-
from google.cloud.storage.retry import DEFAULT_RETRY
|
|
58
57
|
|
|
59
58
|
if TYPE_CHECKING:
|
|
60
|
-
from datetime import datetime
|
|
61
|
-
|
|
62
59
|
from aiohttp import ClientSession
|
|
63
|
-
|
|
64
60
|
from google.api_core.retry import Retry
|
|
65
61
|
from google.cloud.storage.blob import Blob
|
|
62
|
+
from requests import Session
|
|
66
63
|
|
|
67
64
|
|
|
68
65
|
RT = TypeVar("RT")
|
|
@@ -136,16 +133,16 @@ def _fallback_object_url_to_object_name_and_bucket_name(
|
|
|
136
133
|
|
|
137
134
|
return func(self, *args, **kwargs)
|
|
138
135
|
|
|
139
|
-
return cast(Callable[FParams, RT], _inner_wrapper)
|
|
136
|
+
return cast("Callable[FParams, RT]", _inner_wrapper)
|
|
140
137
|
|
|
141
|
-
return cast(Callable[[T], T], _wrapper)
|
|
138
|
+
return cast("Callable[[T], T]", _wrapper)
|
|
142
139
|
|
|
143
140
|
|
|
144
141
|
# A fake bucket to use in functions decorated by _fallback_object_url_to_object_name_and_bucket_name.
|
|
145
142
|
# This allows the 'bucket' argument to be of type str instead of str | None,
|
|
146
143
|
# making it easier to type hint the function body without dealing with the None
|
|
147
144
|
# case that can never happen at runtime.
|
|
148
|
-
PROVIDE_BUCKET: str = cast(str, None)
|
|
145
|
+
PROVIDE_BUCKET: str = cast("str", None)
|
|
149
146
|
|
|
150
147
|
|
|
151
148
|
class GCSHook(GoogleBaseHook):
|
|
@@ -359,11 +356,10 @@ class GCSHook(GoogleBaseHook):
|
|
|
359
356
|
)
|
|
360
357
|
self.log.info("File downloaded to %s", filename)
|
|
361
358
|
return filename
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
return blob.download_as_bytes()
|
|
359
|
+
get_hook_lineage_collector().add_input_asset(
|
|
360
|
+
context=self, scheme="gs", asset_kwargs={"bucket": bucket.name, "key": blob.name}
|
|
361
|
+
)
|
|
362
|
+
return blob.download_as_bytes()
|
|
367
363
|
|
|
368
364
|
except GoogleCloudError:
|
|
369
365
|
if attempt == num_max_attempts - 1:
|
|
@@ -375,8 +371,7 @@ class GCSHook(GoogleBaseHook):
|
|
|
375
371
|
num_max_attempts,
|
|
376
372
|
)
|
|
377
373
|
raise
|
|
378
|
-
|
|
379
|
-
raise NotImplementedError # should not reach this, but makes mypy happy
|
|
374
|
+
raise NotImplementedError # should not reach this, but makes mypy happy
|
|
380
375
|
|
|
381
376
|
def download_as_byte_array(
|
|
382
377
|
self,
|
|
@@ -551,13 +546,13 @@ class GCSHook(GoogleBaseHook):
|
|
|
551
546
|
if cache_control:
|
|
552
547
|
blob.cache_control = cache_control
|
|
553
548
|
|
|
554
|
-
if filename and data:
|
|
549
|
+
if filename is not None and data is not None:
|
|
555
550
|
raise ValueError(
|
|
556
551
|
"'filename' and 'data' parameter provided. Please "
|
|
557
552
|
"specify a single parameter, either 'filename' for "
|
|
558
553
|
"local file uploads or 'data' for file content uploads."
|
|
559
554
|
)
|
|
560
|
-
|
|
555
|
+
if filename is not None:
|
|
561
556
|
if not mime_type:
|
|
562
557
|
mime_type = "application/octet-stream"
|
|
563
558
|
if gzip:
|
|
@@ -577,7 +572,7 @@ class GCSHook(GoogleBaseHook):
|
|
|
577
572
|
if gzip:
|
|
578
573
|
os.remove(filename)
|
|
579
574
|
self.log.info("File %s uploaded to %s in %s bucket", filename, object_name, bucket_name)
|
|
580
|
-
elif data:
|
|
575
|
+
elif data is not None:
|
|
581
576
|
if not mime_type:
|
|
582
577
|
mime_type = "text/plain"
|
|
583
578
|
if gzip:
|
|
@@ -598,7 +593,13 @@ class GCSHook(GoogleBaseHook):
|
|
|
598
593
|
context=self, scheme="gs", asset_kwargs={"bucket": bucket.name, "key": blob.name}
|
|
599
594
|
)
|
|
600
595
|
|
|
601
|
-
def exists(
|
|
596
|
+
def exists(
|
|
597
|
+
self,
|
|
598
|
+
bucket_name: str,
|
|
599
|
+
object_name: str,
|
|
600
|
+
retry: Retry = DEFAULT_RETRY,
|
|
601
|
+
user_project: str | None = None,
|
|
602
|
+
) -> bool:
|
|
602
603
|
"""
|
|
603
604
|
Check for the existence of a file in Google Cloud Storage.
|
|
604
605
|
|
|
@@ -606,9 +607,11 @@ class GCSHook(GoogleBaseHook):
|
|
|
606
607
|
:param object_name: The name of the blob_name to check in the Google cloud
|
|
607
608
|
storage bucket.
|
|
608
609
|
:param retry: (Optional) How to retry the RPC
|
|
610
|
+
:param user_project: The identifier of the Google Cloud project to bill for the request.
|
|
611
|
+
Required for Requester Pays buckets.
|
|
609
612
|
"""
|
|
610
613
|
client = self.get_conn()
|
|
611
|
-
bucket = client.bucket(bucket_name)
|
|
614
|
+
bucket = client.bucket(bucket_name, user_project=user_project)
|
|
612
615
|
blob = bucket.blob(blob_name=object_name)
|
|
613
616
|
return blob.exists(retry=retry)
|
|
614
617
|
|
|
@@ -625,7 +628,7 @@ class GCSHook(GoogleBaseHook):
|
|
|
625
628
|
|
|
626
629
|
def is_updated_after(self, bucket_name: str, object_name: str, ts: datetime) -> bool:
|
|
627
630
|
"""
|
|
628
|
-
Check if
|
|
631
|
+
Check if a blob_name is updated in Google Cloud Storage.
|
|
629
632
|
|
|
630
633
|
:param bucket_name: The Google Cloud Storage bucket where the object is.
|
|
631
634
|
:param object_name: The name of the object to check in the Google cloud
|
|
@@ -645,7 +648,7 @@ class GCSHook(GoogleBaseHook):
|
|
|
645
648
|
self, bucket_name: str, object_name: str, min_ts: datetime, max_ts: datetime
|
|
646
649
|
) -> bool:
|
|
647
650
|
"""
|
|
648
|
-
Check if
|
|
651
|
+
Check if a blob_name is updated in Google Cloud Storage.
|
|
649
652
|
|
|
650
653
|
:param bucket_name: The Google Cloud Storage bucket where the object is.
|
|
651
654
|
:param object_name: The name of the object to check in the Google cloud
|
|
@@ -666,7 +669,7 @@ class GCSHook(GoogleBaseHook):
|
|
|
666
669
|
|
|
667
670
|
def is_updated_before(self, bucket_name: str, object_name: str, ts: datetime) -> bool:
|
|
668
671
|
"""
|
|
669
|
-
Check if
|
|
672
|
+
Check if a blob_name is updated before given time in Google Cloud Storage.
|
|
670
673
|
|
|
671
674
|
:param bucket_name: The Google Cloud Storage bucket where the object is.
|
|
672
675
|
:param object_name: The name of the object to check in the Google cloud
|
|
@@ -719,6 +722,14 @@ class GCSHook(GoogleBaseHook):
|
|
|
719
722
|
|
|
720
723
|
self.log.info("Blob %s deleted.", object_name)
|
|
721
724
|
|
|
725
|
+
def get_bucket(self, bucket_name: str) -> storage.Bucket:
|
|
726
|
+
"""
|
|
727
|
+
Get a bucket object from the Google Cloud Storage.
|
|
728
|
+
|
|
729
|
+
:param bucket_name: name of the bucket
|
|
730
|
+
"""
|
|
731
|
+
return self.get_conn().bucket(bucket_name)
|
|
732
|
+
|
|
722
733
|
def delete_bucket(self, bucket_name: str, force: bool = False, user_project: str | None = None) -> None:
|
|
723
734
|
"""
|
|
724
735
|
Delete a bucket object from the Google Cloud Storage.
|
|
@@ -1237,6 +1248,106 @@ class GCSHook(GoogleBaseHook):
|
|
|
1237
1248
|
|
|
1238
1249
|
self.log.info("Completed successfully.")
|
|
1239
1250
|
|
|
1251
|
+
def _sync_to_local_dir_delete_stale_local_files(self, current_gcs_objects: List[Path], local_dir: Path):
|
|
1252
|
+
current_gcs_keys = {key.resolve() for key in current_gcs_objects}
|
|
1253
|
+
|
|
1254
|
+
for item in local_dir.rglob("*"):
|
|
1255
|
+
if item.is_file():
|
|
1256
|
+
if item.resolve() not in current_gcs_keys:
|
|
1257
|
+
self.log.debug("Deleting stale local file: %s", item)
|
|
1258
|
+
item.unlink()
|
|
1259
|
+
# Clean up empty directories
|
|
1260
|
+
for root, dirs, _ in os.walk(local_dir, topdown=False):
|
|
1261
|
+
for d in dirs:
|
|
1262
|
+
dir_path = os.path.join(root, d)
|
|
1263
|
+
if not os.listdir(dir_path):
|
|
1264
|
+
self.log.debug("Deleting stale empty directory: %s", dir_path)
|
|
1265
|
+
os.rmdir(dir_path)
|
|
1266
|
+
|
|
1267
|
+
def _sync_to_local_dir_if_changed(self, blob: Blob, local_target_path: Path):
|
|
1268
|
+
should_download = False
|
|
1269
|
+
download_msg = ""
|
|
1270
|
+
if not local_target_path.exists():
|
|
1271
|
+
should_download = True
|
|
1272
|
+
download_msg = f"Local file {local_target_path} does not exist."
|
|
1273
|
+
else:
|
|
1274
|
+
local_stats = local_target_path.stat()
|
|
1275
|
+
# Reload blob to get fresh metadata, including size and updated time
|
|
1276
|
+
blob.reload()
|
|
1277
|
+
|
|
1278
|
+
if blob.size != local_stats.st_size:
|
|
1279
|
+
should_download = True
|
|
1280
|
+
download_msg = (
|
|
1281
|
+
f"GCS object size ({blob.size}) and local file size ({local_stats.st_size}) differ."
|
|
1282
|
+
)
|
|
1283
|
+
|
|
1284
|
+
gcs_last_modified = blob.updated
|
|
1285
|
+
if (
|
|
1286
|
+
not should_download
|
|
1287
|
+
and gcs_last_modified
|
|
1288
|
+
and local_stats.st_mtime < gcs_last_modified.timestamp()
|
|
1289
|
+
):
|
|
1290
|
+
should_download = True
|
|
1291
|
+
download_msg = f"GCS object last modified ({gcs_last_modified}) is newer than local file last modified ({datetime.fromtimestamp(local_stats.st_mtime, tz=timezone.utc)})."
|
|
1292
|
+
|
|
1293
|
+
if should_download:
|
|
1294
|
+
self.log.debug("%s Downloading %s to %s", download_msg, blob.name, local_target_path.as_posix())
|
|
1295
|
+
self.download(
|
|
1296
|
+
bucket_name=blob.bucket.name, object_name=blob.name, filename=str(local_target_path)
|
|
1297
|
+
)
|
|
1298
|
+
else:
|
|
1299
|
+
self.log.debug(
|
|
1300
|
+
"Local file %s is up-to-date with GCS object %s. Skipping download.",
|
|
1301
|
+
local_target_path.as_posix(),
|
|
1302
|
+
blob.name,
|
|
1303
|
+
)
|
|
1304
|
+
|
|
1305
|
+
def sync_to_local_dir(
|
|
1306
|
+
self,
|
|
1307
|
+
bucket_name: str,
|
|
1308
|
+
local_dir: str | Path,
|
|
1309
|
+
prefix: str | None = None,
|
|
1310
|
+
delete_stale: bool = False,
|
|
1311
|
+
) -> None:
|
|
1312
|
+
"""
|
|
1313
|
+
Download files from a GCS bucket to a local directory.
|
|
1314
|
+
|
|
1315
|
+
It will download all files from the given ``prefix`` and create the corresponding
|
|
1316
|
+
directory structure in the ``local_dir``.
|
|
1317
|
+
|
|
1318
|
+
If ``delete_stale`` is ``True``, it will delete all local files that do not exist in the GCS bucket.
|
|
1319
|
+
|
|
1320
|
+
:param bucket_name: The name of the GCS bucket.
|
|
1321
|
+
:param local_dir: The local directory to which the files will be downloaded.
|
|
1322
|
+
:param prefix: The prefix of the files to be downloaded.
|
|
1323
|
+
:param delete_stale: If ``True``, deletes local files that don't exist in the bucket.
|
|
1324
|
+
"""
|
|
1325
|
+
prefix = prefix or ""
|
|
1326
|
+
local_dir_path = Path(local_dir)
|
|
1327
|
+
self.log.debug("Downloading data from gs://%s/%s to %s", bucket_name, prefix, local_dir_path)
|
|
1328
|
+
|
|
1329
|
+
gcs_bucket = self.get_bucket(bucket_name)
|
|
1330
|
+
local_gcs_objects = []
|
|
1331
|
+
|
|
1332
|
+
for blob in gcs_bucket.list_blobs(prefix=prefix):
|
|
1333
|
+
# GCS lists "directories" as objects ending with a slash. We should skip them.
|
|
1334
|
+
if blob.name.endswith("/"):
|
|
1335
|
+
continue
|
|
1336
|
+
|
|
1337
|
+
blob_path = Path(blob.name)
|
|
1338
|
+
local_target_path = local_dir_path.joinpath(blob_path.relative_to(prefix))
|
|
1339
|
+
if not local_target_path.parent.exists():
|
|
1340
|
+
local_target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1341
|
+
self.log.debug("Created local directory: %s", local_target_path.parent)
|
|
1342
|
+
|
|
1343
|
+
self._sync_to_local_dir_if_changed(blob=blob, local_target_path=local_target_path)
|
|
1344
|
+
local_gcs_objects.append(local_target_path)
|
|
1345
|
+
|
|
1346
|
+
if delete_stale:
|
|
1347
|
+
self._sync_to_local_dir_delete_stale_local_files(
|
|
1348
|
+
current_gcs_objects=local_gcs_objects, local_dir=local_dir_path
|
|
1349
|
+
)
|
|
1350
|
+
|
|
1240
1351
|
def sync(
|
|
1241
1352
|
self,
|
|
1242
1353
|
source_bucket: str,
|
|
@@ -1487,5 +1598,5 @@ class GCSAsyncHook(GoogleBaseAsyncHook):
|
|
|
1487
1598
|
token = await self.get_token(session=session)
|
|
1488
1599
|
return Storage(
|
|
1489
1600
|
token=token,
|
|
1490
|
-
session=cast(Session, session),
|
|
1601
|
+
session=cast("Session", session),
|
|
1491
1602
|
)
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
|
4
|
+
# distributed with this work for additional information
|
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
|
7
|
+
# "License"); you may not use this file except in compliance
|
|
8
|
+
# with the License. You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
|
13
|
+
# software distributed under the License is distributed on an
|
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
# KIND, either express or implied. See the License for the
|
|
16
|
+
# specific language governing permissions and limitations
|
|
17
|
+
# under the License.
|
|
18
|
+
"""This module contains a Google Cloud GenAI Generative Model hook."""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import time
|
|
23
|
+
from typing import TYPE_CHECKING, Any
|
|
24
|
+
|
|
25
|
+
from google import genai
|
|
26
|
+
|
|
27
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from google.genai.types import (
|
|
31
|
+
ContentListUnion,
|
|
32
|
+
ContentListUnionDict,
|
|
33
|
+
CountTokensConfigOrDict,
|
|
34
|
+
CountTokensResponse,
|
|
35
|
+
CreateCachedContentConfigOrDict,
|
|
36
|
+
CreateTuningJobConfigOrDict,
|
|
37
|
+
EmbedContentConfigOrDict,
|
|
38
|
+
EmbedContentResponse,
|
|
39
|
+
GenerateContentConfig,
|
|
40
|
+
TuningDatasetOrDict,
|
|
41
|
+
TuningJob,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class GenAIGenerativeModelHook(GoogleBaseHook):
|
|
46
|
+
"""Class for Google Cloud Generative AI Vertex AI hook."""
|
|
47
|
+
|
|
48
|
+
def get_genai_client(self, project_id: str, location: str):
|
|
49
|
+
return genai.Client(
|
|
50
|
+
vertexai=True,
|
|
51
|
+
project=project_id,
|
|
52
|
+
location=location,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
|
56
|
+
def embed_content(
|
|
57
|
+
self,
|
|
58
|
+
model: str,
|
|
59
|
+
location: str,
|
|
60
|
+
contents: ContentListUnion | ContentListUnionDict | list[str],
|
|
61
|
+
config: EmbedContentConfigOrDict | None = None,
|
|
62
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
|
63
|
+
) -> EmbedContentResponse:
|
|
64
|
+
"""
|
|
65
|
+
Generate embeddings for words, phrases, sentences, and code.
|
|
66
|
+
|
|
67
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
|
68
|
+
:param location: Required. The ID of the Google Cloud location that the service belongs to.
|
|
69
|
+
:param model: Required. The model to use.
|
|
70
|
+
:param contents: Optional. The contents to use for embedding.
|
|
71
|
+
:param config: Optional. Configuration for embeddings.
|
|
72
|
+
"""
|
|
73
|
+
client = self.get_genai_client(project_id=project_id, location=location)
|
|
74
|
+
|
|
75
|
+
resp = client.models.embed_content(model=model, contents=contents, config=config)
|
|
76
|
+
return resp
|
|
77
|
+
|
|
78
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
|
79
|
+
def generate_content(
|
|
80
|
+
self,
|
|
81
|
+
location: str,
|
|
82
|
+
model: str,
|
|
83
|
+
contents: ContentListUnionDict,
|
|
84
|
+
generation_config: GenerateContentConfig | None = None,
|
|
85
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
|
86
|
+
) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Make an API request to generate content using a model.
|
|
89
|
+
|
|
90
|
+
:param location: Required. The ID of the Google Cloud location that the service belongs to.
|
|
91
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
|
92
|
+
:param model: Required. The model to use.
|
|
93
|
+
:param contents: Required. The multi-part content of a message that a user or a program
|
|
94
|
+
gives to the generative model, in order to elicit a specific response.
|
|
95
|
+
:param generation_config: Optional. Generation configuration settings.
|
|
96
|
+
"""
|
|
97
|
+
client = self.get_genai_client(project_id=project_id, location=location)
|
|
98
|
+
response = client.models.generate_content(
|
|
99
|
+
model=model,
|
|
100
|
+
contents=contents,
|
|
101
|
+
config=generation_config,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
return response.text
|
|
105
|
+
|
|
106
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
|
107
|
+
def supervised_fine_tuning_train(
|
|
108
|
+
self,
|
|
109
|
+
source_model: str,
|
|
110
|
+
location: str,
|
|
111
|
+
training_dataset: TuningDatasetOrDict,
|
|
112
|
+
tuning_job_config: CreateTuningJobConfigOrDict | dict[str, Any] | None = None,
|
|
113
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
|
114
|
+
) -> TuningJob:
|
|
115
|
+
"""
|
|
116
|
+
Create a tuning job to adapt model behavior with a labeled dataset.
|
|
117
|
+
|
|
118
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
|
119
|
+
:param location: Required. The ID of the Google Cloud location that the service belongs to.
|
|
120
|
+
:param source_model: Required. A pre-trained model optimized for performing natural
|
|
121
|
+
language tasks such as classification, summarization, extraction, content
|
|
122
|
+
creation, and ideation.
|
|
123
|
+
:param train_dataset: Required. Cloud Storage URI of your training dataset. The dataset
|
|
124
|
+
must be formatted as a JSONL file. For best results, provide at least 100 to 500 examples.
|
|
125
|
+
:param tuning_job_config: Optional. Configuration of the Tuning job to be created.
|
|
126
|
+
"""
|
|
127
|
+
client = self.get_genai_client(project_id=project_id, location=location)
|
|
128
|
+
|
|
129
|
+
tuning_job = client.tunings.tune(
|
|
130
|
+
base_model=source_model,
|
|
131
|
+
training_dataset=training_dataset,
|
|
132
|
+
config=tuning_job_config,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Poll until completion
|
|
136
|
+
running = {"JOB_STATE_PENDING", "JOB_STATE_RUNNING"}
|
|
137
|
+
while tuning_job.state in running:
|
|
138
|
+
time.sleep(60)
|
|
139
|
+
tuning_job = client.tunings.get(name=tuning_job.name)
|
|
140
|
+
|
|
141
|
+
return tuning_job
|
|
142
|
+
|
|
143
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
|
144
|
+
def count_tokens(
|
|
145
|
+
self,
|
|
146
|
+
location: str,
|
|
147
|
+
model: str,
|
|
148
|
+
contents: ContentListUnion | ContentListUnionDict,
|
|
149
|
+
config: CountTokensConfigOrDict | None = None,
|
|
150
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
|
151
|
+
) -> CountTokensResponse:
|
|
152
|
+
"""
|
|
153
|
+
Use Count Tokens API to calculate the number of input tokens before sending a request to Gemini API.
|
|
154
|
+
|
|
155
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
|
156
|
+
:param location: Required. The ID of the Google Cloud location that the service belongs to.
|
|
157
|
+
:param contents: Required. The multi-part content of a message that a user or a program
|
|
158
|
+
gives to the generative model, in order to elicit a specific response.
|
|
159
|
+
:param model: Required. Model,
|
|
160
|
+
supporting prompts with text-only input, including natural language
|
|
161
|
+
tasks, multi-turn text and code chat, and code generation. It can
|
|
162
|
+
output text and code.
|
|
163
|
+
:param config: Optional. Configuration for Count Tokens.
|
|
164
|
+
"""
|
|
165
|
+
client = self.get_genai_client(project_id=project_id, location=location)
|
|
166
|
+
response = client.models.count_tokens(
|
|
167
|
+
model=model,
|
|
168
|
+
contents=contents,
|
|
169
|
+
config=config,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return response
|
|
173
|
+
|
|
174
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
|
175
|
+
def create_cached_content(
|
|
176
|
+
self,
|
|
177
|
+
model: str,
|
|
178
|
+
location: str,
|
|
179
|
+
cached_content_config: CreateCachedContentConfigOrDict | None = None,
|
|
180
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
|
181
|
+
) -> str:
|
|
182
|
+
"""
|
|
183
|
+
Create CachedContent to reduce the cost of requests containing repeat content.
|
|
184
|
+
|
|
185
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
|
186
|
+
:param location: Required. The ID of the Google Cloud location that the service belongs to.
|
|
187
|
+
:param model: Required. The name of the publisher model to use for cached content.
|
|
188
|
+
:param cached_content_config: Optional. Configuration of the Cached Content.
|
|
189
|
+
"""
|
|
190
|
+
client = self.get_genai_client(project_id=project_id, location=location)
|
|
191
|
+
resp = client.caches.create(
|
|
192
|
+
model=model,
|
|
193
|
+
config=cached_content_config,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
return resp.name
|
|
@@ -23,11 +23,12 @@ import base64
|
|
|
23
23
|
from collections.abc import Sequence
|
|
24
24
|
from typing import TYPE_CHECKING
|
|
25
25
|
|
|
26
|
-
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
27
|
-
from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
|
|
28
26
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
29
27
|
from google.cloud.kms_v1 import KeyManagementServiceClient
|
|
30
28
|
|
|
29
|
+
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
30
|
+
from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
|
|
31
|
+
|
|
31
32
|
if TYPE_CHECKING:
|
|
32
33
|
from google.api_core.retry import Retry
|
|
33
34
|
|
|
@@ -25,6 +25,14 @@ import time
|
|
|
25
25
|
from collections.abc import Sequence
|
|
26
26
|
from typing import TYPE_CHECKING, Any
|
|
27
27
|
|
|
28
|
+
from google.api_core.exceptions import NotFound
|
|
29
|
+
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
30
|
+
from google.auth.transport import requests as google_requests
|
|
31
|
+
|
|
32
|
+
# not sure why but mypy complains on missing `container_v1` but it is clearly there and is importable
|
|
33
|
+
from google.cloud import exceptions
|
|
34
|
+
from google.cloud.container_v1 import ClusterManagerAsyncClient, ClusterManagerClient
|
|
35
|
+
from google.cloud.container_v1.types import Cluster, Operation
|
|
28
36
|
from kubernetes import client
|
|
29
37
|
from kubernetes_asyncio import client as async_client
|
|
30
38
|
from kubernetes_asyncio.config.kube_config import FileOrData
|
|
@@ -39,14 +47,6 @@ from airflow.providers.google.common.hooks.base_google import (
|
|
|
39
47
|
GoogleBaseAsyncHook,
|
|
40
48
|
GoogleBaseHook,
|
|
41
49
|
)
|
|
42
|
-
from google.api_core.exceptions import NotFound
|
|
43
|
-
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
44
|
-
from google.auth.transport import requests as google_requests
|
|
45
|
-
|
|
46
|
-
# not sure why but mypy complains on missing `container_v1` but it is clearly there and is importable
|
|
47
|
-
from google.cloud import exceptions # type: ignore[attr-defined]
|
|
48
|
-
from google.cloud.container_v1 import ClusterManagerAsyncClient, ClusterManagerClient
|
|
49
|
-
from google.cloud.container_v1.types import Cluster, Operation
|
|
50
50
|
|
|
51
51
|
if TYPE_CHECKING:
|
|
52
52
|
import google.auth.credentials
|
|
@@ -64,11 +64,13 @@ class GKEClusterConnection:
|
|
|
64
64
|
ssl_ca_cert: str,
|
|
65
65
|
credentials: google.auth.credentials.Credentials,
|
|
66
66
|
enable_tcp_keepalive: bool = False,
|
|
67
|
+
use_dns_endpoint: bool = False,
|
|
67
68
|
):
|
|
68
69
|
self._cluster_url = cluster_url
|
|
69
70
|
self._ssl_ca_cert = ssl_ca_cert
|
|
70
71
|
self._credentials = credentials
|
|
71
72
|
self.enable_tcp_keepalive = enable_tcp_keepalive
|
|
73
|
+
self.use_dns_endpoint = use_dns_endpoint
|
|
72
74
|
|
|
73
75
|
def get_conn(self) -> client.ApiClient:
|
|
74
76
|
configuration = self._get_config()
|
|
@@ -86,12 +88,13 @@ class GKEClusterConnection:
|
|
|
86
88
|
api_key_prefix={"authorization": "Bearer"},
|
|
87
89
|
api_key={"authorization": self._get_token(self._credentials)},
|
|
88
90
|
)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
91
|
+
if not self.use_dns_endpoint:
|
|
92
|
+
configuration.ssl_ca_cert = FileOrData(
|
|
93
|
+
{
|
|
94
|
+
"certificate-authority-data": self._ssl_ca_cert,
|
|
95
|
+
},
|
|
96
|
+
file_key_name="certificate-authority",
|
|
97
|
+
).as_file()
|
|
95
98
|
return configuration
|
|
96
99
|
|
|
97
100
|
@staticmethod
|
|
@@ -349,8 +352,7 @@ class GKEHook(GoogleBaseHook):
|
|
|
349
352
|
or node_pools_autoscaled
|
|
350
353
|
):
|
|
351
354
|
return True
|
|
352
|
-
|
|
353
|
-
return False
|
|
355
|
+
return False
|
|
354
356
|
|
|
355
357
|
|
|
356
358
|
class GKEAsyncHook(GoogleBaseAsyncHook):
|
|
@@ -417,6 +419,7 @@ class GKEKubernetesHook(GoogleBaseHook, KubernetesHook):
|
|
|
417
419
|
cluster_url: str,
|
|
418
420
|
ssl_ca_cert: str,
|
|
419
421
|
enable_tcp_keepalive: bool = False,
|
|
422
|
+
use_dns_endpoint: bool = False,
|
|
420
423
|
*args,
|
|
421
424
|
**kwargs,
|
|
422
425
|
):
|
|
@@ -424,6 +427,7 @@ class GKEKubernetesHook(GoogleBaseHook, KubernetesHook):
|
|
|
424
427
|
self._cluster_url = cluster_url
|
|
425
428
|
self._ssl_ca_cert = ssl_ca_cert
|
|
426
429
|
self.enable_tcp_keepalive = enable_tcp_keepalive
|
|
430
|
+
self.use_dns_endpoint = use_dns_endpoint
|
|
427
431
|
|
|
428
432
|
def get_conn(self) -> client.ApiClient:
|
|
429
433
|
return GKEClusterConnection(
|
|
@@ -431,6 +435,7 @@ class GKEKubernetesHook(GoogleBaseHook, KubernetesHook):
|
|
|
431
435
|
ssl_ca_cert=self._ssl_ca_cert,
|
|
432
436
|
credentials=self.get_credentials(),
|
|
433
437
|
enable_tcp_keepalive=self.enable_tcp_keepalive,
|
|
438
|
+
use_dns_endpoint=self.use_dns_endpoint,
|
|
434
439
|
).get_conn()
|
|
435
440
|
|
|
436
441
|
def apply_from_yaml_file(
|
|
@@ -493,7 +498,7 @@ class GKEKubernetesAsyncHook(GoogleBaseAsyncHook, AsyncKubernetesHook):
|
|
|
493
498
|
)
|
|
494
499
|
|
|
495
500
|
@contextlib.asynccontextmanager
|
|
496
|
-
async def get_conn(self) -> async_client.ApiClient:
|
|
501
|
+
async def get_conn(self) -> async_client.ApiClient:
|
|
497
502
|
kube_client = None
|
|
498
503
|
try:
|
|
499
504
|
kube_client = await self._load_config()
|
|
@@ -29,7 +29,7 @@ from looker_sdk.sdk.api40 import methods as methods40
|
|
|
29
29
|
from packaging.version import parse as parse_version
|
|
30
30
|
|
|
31
31
|
from airflow.exceptions import AirflowException
|
|
32
|
-
from airflow.
|
|
32
|
+
from airflow.providers.common.compat.sdk import BaseHook
|
|
33
33
|
from airflow.version import version
|
|
34
34
|
|
|
35
35
|
if TYPE_CHECKING:
|
|
@@ -39,6 +39,11 @@ if TYPE_CHECKING:
|
|
|
39
39
|
class LookerHook(BaseHook):
|
|
40
40
|
"""Hook for Looker APIs."""
|
|
41
41
|
|
|
42
|
+
conn_name_attr = "looker_conn_id"
|
|
43
|
+
default_conn_name = "looker_default"
|
|
44
|
+
conn_type = "gcp_looker"
|
|
45
|
+
hook_name = "Google Looker"
|
|
46
|
+
|
|
42
47
|
def __init__(
|
|
43
48
|
self,
|
|
44
49
|
looker_conn_id: str,
|