apache-airflow-providers-google 14.0.0__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/LICENSES.txt +14 -0
- airflow/providers/google/3rd-party-licenses/NOTICE +5 -0
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/_vendor/__init__.py +0 -0
- airflow/providers/google/_vendor/json_merge_patch.py +91 -0
- airflow/providers/google/ads/hooks/ads.py +52 -43
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +3 -19
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +3 -2
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/alloy_db.py +2 -3
- airflow/providers/google/cloud/hooks/bigquery.py +195 -318
- airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
- airflow/providers/google/cloud/hooks/bigtable.py +3 -2
- airflow/providers/google/cloud/hooks/cloud_batch.py +8 -9
- airflow/providers/google/cloud/hooks/cloud_build.py +6 -65
- airflow/providers/google/cloud/hooks/cloud_composer.py +292 -24
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +4 -3
- airflow/providers/google/cloud/hooks/cloud_run.py +20 -11
- airflow/providers/google/cloud/hooks/cloud_sql.py +136 -64
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +35 -15
- airflow/providers/google/cloud/hooks/compute.py +7 -6
- airflow/providers/google/cloud/hooks/compute_ssh.py +7 -4
- airflow/providers/google/cloud/hooks/datacatalog.py +12 -3
- airflow/providers/google/cloud/hooks/dataflow.py +87 -242
- airflow/providers/google/cloud/hooks/dataform.py +9 -14
- airflow/providers/google/cloud/hooks/datafusion.py +7 -9
- airflow/providers/google/cloud/hooks/dataplex.py +13 -12
- airflow/providers/google/cloud/hooks/dataprep.py +2 -2
- airflow/providers/google/cloud/hooks/dataproc.py +76 -74
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +4 -3
- airflow/providers/google/cloud/hooks/dlp.py +5 -4
- airflow/providers/google/cloud/hooks/gcs.py +144 -33
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kms.py +3 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -17
- airflow/providers/google/cloud/hooks/looker.py +6 -1
- airflow/providers/google/cloud/hooks/managed_kafka.py +227 -3
- airflow/providers/google/cloud/hooks/mlengine.py +7 -8
- airflow/providers/google/cloud/hooks/natural_language.py +3 -2
- airflow/providers/google/cloud/hooks/os_login.py +3 -2
- airflow/providers/google/cloud/hooks/pubsub.py +6 -6
- airflow/providers/google/cloud/hooks/secret_manager.py +105 -12
- airflow/providers/google/cloud/hooks/spanner.py +75 -10
- airflow/providers/google/cloud/hooks/speech_to_text.py +3 -2
- airflow/providers/google/cloud/hooks/stackdriver.py +18 -18
- airflow/providers/google/cloud/hooks/tasks.py +4 -3
- airflow/providers/google/cloud/hooks/text_to_speech.py +3 -2
- airflow/providers/google/cloud/hooks/translate.py +8 -17
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -222
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +9 -15
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +33 -283
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +5 -12
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +6 -12
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +311 -10
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +7 -13
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +8 -12
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +6 -12
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +3 -2
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/video_intelligence.py +3 -2
- airflow/providers/google/cloud/hooks/vision.py +7 -7
- airflow/providers/google/cloud/hooks/workflows.py +4 -3
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -7
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -46
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -90
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -89
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +11 -61
- airflow/providers/google/cloud/links/managed_kafka.py +11 -51
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +166 -118
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +14 -9
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +141 -40
- airflow/providers/google/cloud/openlineage/mixins.py +14 -13
- airflow/providers/google/cloud/openlineage/utils.py +19 -3
- airflow/providers/google/cloud/operators/alloy_db.py +76 -61
- airflow/providers/google/cloud/operators/bigquery.py +104 -667
- airflow/providers/google/cloud/operators/bigquery_dts.py +12 -12
- airflow/providers/google/cloud/operators/bigtable.py +38 -7
- airflow/providers/google/cloud/operators/cloud_base.py +22 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +18 -18
- airflow/providers/google/cloud/operators/cloud_build.py +80 -36
- airflow/providers/google/cloud/operators/cloud_composer.py +157 -71
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +74 -46
- airflow/providers/google/cloud/operators/cloud_run.py +39 -20
- airflow/providers/google/cloud/operators/cloud_sql.py +46 -61
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -14
- airflow/providers/google/cloud/operators/compute.py +18 -50
- airflow/providers/google/cloud/operators/datacatalog.py +167 -29
- airflow/providers/google/cloud/operators/dataflow.py +38 -15
- airflow/providers/google/cloud/operators/dataform.py +19 -7
- airflow/providers/google/cloud/operators/datafusion.py +43 -43
- airflow/providers/google/cloud/operators/dataplex.py +212 -126
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +134 -207
- airflow/providers/google/cloud/operators/dataproc_metastore.py +102 -84
- airflow/providers/google/cloud/operators/datastore.py +22 -6
- airflow/providers/google/cloud/operators/dlp.py +24 -45
- airflow/providers/google/cloud/operators/functions.py +21 -14
- airflow/providers/google/cloud/operators/gcs.py +15 -12
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +115 -106
- airflow/providers/google/cloud/operators/looker.py +1 -1
- airflow/providers/google/cloud/operators/managed_kafka.py +362 -40
- airflow/providers/google/cloud/operators/natural_language.py +5 -3
- airflow/providers/google/cloud/operators/pubsub.py +69 -21
- airflow/providers/google/cloud/operators/spanner.py +53 -45
- airflow/providers/google/cloud/operators/speech_to_text.py +5 -4
- airflow/providers/google/cloud/operators/stackdriver.py +5 -11
- airflow/providers/google/cloud/operators/tasks.py +6 -15
- airflow/providers/google/cloud/operators/text_to_speech.py +4 -3
- airflow/providers/google/cloud/operators/translate.py +46 -20
- airflow/providers/google/cloud/operators/translate_speech.py +4 -3
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +44 -34
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +34 -12
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +62 -53
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +75 -11
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +48 -12
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +16 -12
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +62 -14
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +35 -10
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +5 -3
- airflow/providers/google/cloud/operators/vision.py +7 -5
- airflow/providers/google/cloud/operators/workflows.py +24 -19
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery.py +2 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -4
- airflow/providers/google/cloud/sensors/bigtable.py +14 -6
- airflow/providers/google/cloud/sensors/cloud_composer.py +535 -33
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -5
- airflow/providers/google/cloud/sensors/dataflow.py +27 -10
- airflow/providers/google/cloud/sensors/dataform.py +2 -2
- airflow/providers/google/cloud/sensors/datafusion.py +4 -4
- airflow/providers/google/cloud/sensors/dataplex.py +7 -5
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +10 -9
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +4 -3
- airflow/providers/google/cloud/sensors/gcs.py +22 -21
- airflow/providers/google/cloud/sensors/looker.py +5 -5
- airflow/providers/google/cloud/sensors/pubsub.py +20 -20
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
- airflow/providers/google/cloud/sensors/workflows.py +6 -4
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +14 -13
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +18 -22
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -5
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +45 -38
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +44 -12
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +36 -14
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +75 -34
- airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_build.py +3 -2
- airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +96 -5
- airflow/providers/google/cloud/triggers/dataflow.py +125 -2
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +16 -3
- airflow/providers/google/cloud/triggers/dataproc.py +124 -53
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +46 -28
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +17 -20
- airflow/providers/google/cloud/triggers/vertex_ai.py +8 -7
- airflow/providers/google/cloud/utils/bigquery.py +5 -7
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +4 -3
- airflow/providers/google/cloud/utils/dataform.py +1 -1
- airflow/providers/google/cloud/utils/external_token_supplier.py +0 -1
- airflow/providers/google/cloud/utils/field_validator.py +1 -2
- airflow/providers/google/cloud/utils/validators.py +43 -0
- airflow/providers/google/common/auth_backend/google_openid.py +26 -9
- airflow/providers/google/common/consts.py +2 -1
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +40 -43
- airflow/providers/google/common/hooks/operation_helpers.py +78 -0
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +4 -5
- airflow/providers/google/firebase/operators/firestore.py +2 -2
- airflow/providers/google/get_provider_info.py +61 -216
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +30 -6
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +3 -2
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +4 -5
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +7 -6
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +4 -64
- airflow/providers/google/suite/hooks/calendar.py +1 -1
- airflow/providers/google/suite/hooks/drive.py +2 -2
- airflow/providers/google/suite/hooks/sheets.py +15 -1
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +117 -72
- apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +1 -1
- apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/example_dags/example_cloud_task.py +0 -54
- airflow/providers/google/cloud/hooks/automl.py +0 -679
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1360
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -1515
- airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
- apache_airflow_providers_google-14.0.0.dist-info/RECORD +0 -318
- /airflow/providers/google/cloud/{example_dags → bundles}/__init__.py +0 -0
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
|
@@ -25,20 +25,27 @@ from functools import cached_property
|
|
|
25
25
|
from pathlib import Path
|
|
26
26
|
from typing import TYPE_CHECKING
|
|
27
27
|
|
|
28
|
+
import attrs
|
|
29
|
+
|
|
30
|
+
# Make mypy happy by importing as aliases
|
|
31
|
+
import google.cloud.storage as storage
|
|
32
|
+
|
|
28
33
|
from airflow.configuration import conf
|
|
29
34
|
from airflow.exceptions import AirflowNotFoundException
|
|
30
35
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
|
|
31
|
-
from airflow.providers.google.cloud.utils.credentials_provider import
|
|
36
|
+
from airflow.providers.google.cloud.utils.credentials_provider import (
|
|
37
|
+
get_credentials_and_project_id,
|
|
38
|
+
)
|
|
32
39
|
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
33
40
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
41
|
+
from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
|
|
34
42
|
from airflow.utils.log.file_task_handler import FileTaskHandler
|
|
35
43
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
36
44
|
|
|
37
|
-
# not sure why but mypy complains on missing `storage` but it is clearly there and is importable
|
|
38
|
-
from google.cloud import storage # type: ignore[attr-defined]
|
|
39
|
-
|
|
40
45
|
if TYPE_CHECKING:
|
|
41
46
|
from airflow.models.taskinstance import TaskInstance
|
|
47
|
+
from airflow.sdk.types import RuntimeTaskInstanceProtocol as RuntimeTI
|
|
48
|
+
from airflow.utils.log.file_task_handler import LogMessages, LogSourceInfo
|
|
42
49
|
|
|
43
50
|
_DEFAULT_SCOPESS = frozenset(
|
|
44
51
|
[
|
|
@@ -49,6 +56,128 @@ _DEFAULT_SCOPESS = frozenset(
|
|
|
49
56
|
logger = logging.getLogger(__name__)
|
|
50
57
|
|
|
51
58
|
|
|
59
|
+
@attrs.define
|
|
60
|
+
class GCSRemoteLogIO(LoggingMixin): # noqa: D101
|
|
61
|
+
remote_base: str
|
|
62
|
+
base_log_folder: Path = attrs.field(converter=Path)
|
|
63
|
+
delete_local_copy: bool
|
|
64
|
+
project_id: str | None = None
|
|
65
|
+
|
|
66
|
+
gcp_key_path: str | None = None
|
|
67
|
+
gcp_keyfile_dict: dict | None = None
|
|
68
|
+
scopes: Collection[str] | None = _DEFAULT_SCOPESS
|
|
69
|
+
|
|
70
|
+
processors = ()
|
|
71
|
+
|
|
72
|
+
def upload(self, path: os.PathLike | str, ti: RuntimeTI):
|
|
73
|
+
"""Upload the given log path to the remote storage."""
|
|
74
|
+
path = Path(path)
|
|
75
|
+
if path.is_absolute():
|
|
76
|
+
local_loc = path
|
|
77
|
+
remote_loc = os.path.join(self.remote_base, path.relative_to(self.base_log_folder))
|
|
78
|
+
else:
|
|
79
|
+
local_loc = self.base_log_folder.joinpath(path)
|
|
80
|
+
remote_loc = os.path.join(self.remote_base, path)
|
|
81
|
+
|
|
82
|
+
if local_loc.is_file():
|
|
83
|
+
# read log and remove old logs to get just the latest additions
|
|
84
|
+
log = local_loc.read_text()
|
|
85
|
+
has_uploaded = self.write(log, remote_loc)
|
|
86
|
+
if has_uploaded and self.delete_local_copy:
|
|
87
|
+
shutil.rmtree(os.path.dirname(local_loc))
|
|
88
|
+
|
|
89
|
+
@cached_property
|
|
90
|
+
def hook(self) -> GCSHook | None:
|
|
91
|
+
"""Returns GCSHook if remote_log_conn_id configured."""
|
|
92
|
+
conn_id = conf.get("logging", "remote_log_conn_id", fallback=None)
|
|
93
|
+
if conn_id:
|
|
94
|
+
try:
|
|
95
|
+
return GCSHook(gcp_conn_id=conn_id)
|
|
96
|
+
except AirflowNotFoundException:
|
|
97
|
+
pass
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
@cached_property
|
|
101
|
+
def client(self) -> storage.Client:
|
|
102
|
+
"""Returns GCS Client."""
|
|
103
|
+
if self.hook:
|
|
104
|
+
credentials, project_id = self.hook.get_credentials_and_project_id()
|
|
105
|
+
else:
|
|
106
|
+
credentials, project_id = get_credentials_and_project_id(
|
|
107
|
+
key_path=self.gcp_key_path,
|
|
108
|
+
keyfile_dict=self.gcp_keyfile_dict,
|
|
109
|
+
scopes=self.scopes,
|
|
110
|
+
disable_logging=True,
|
|
111
|
+
)
|
|
112
|
+
return storage.Client(
|
|
113
|
+
credentials=credentials,
|
|
114
|
+
client_info=CLIENT_INFO,
|
|
115
|
+
project=self.project_id if self.project_id else project_id,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def write(self, log: str, remote_log_location: str) -> bool:
|
|
119
|
+
"""
|
|
120
|
+
Write the log to the remote location and return `True`; fail silently and return `False` on error.
|
|
121
|
+
|
|
122
|
+
:param log: the log to write to the remote_log_location
|
|
123
|
+
:param remote_log_location: the log's location in remote storage
|
|
124
|
+
:return: whether the log is successfully written to remote location or not.
|
|
125
|
+
"""
|
|
126
|
+
try:
|
|
127
|
+
blob = storage.Blob.from_string(remote_log_location, self.client)
|
|
128
|
+
old_log = blob.download_as_bytes().decode()
|
|
129
|
+
log = f"{old_log}\n{log}" if old_log else log
|
|
130
|
+
except Exception as e:
|
|
131
|
+
if not self.no_log_found(e):
|
|
132
|
+
self.log.warning("Error checking for previous log: %s", e)
|
|
133
|
+
try:
|
|
134
|
+
blob = storage.Blob.from_string(remote_log_location, self.client)
|
|
135
|
+
blob.upload_from_string(log, content_type="text/plain")
|
|
136
|
+
except Exception as e:
|
|
137
|
+
self.log.error("Could not write logs to %s: %s", remote_log_location, e)
|
|
138
|
+
return False
|
|
139
|
+
return True
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def no_log_found(exc):
|
|
143
|
+
"""
|
|
144
|
+
Given exception, determine whether it is result of log not found.
|
|
145
|
+
|
|
146
|
+
:meta private:
|
|
147
|
+
"""
|
|
148
|
+
return (exc.args and isinstance(exc.args[0], str) and "No such object" in exc.args[0]) or getattr(
|
|
149
|
+
exc, "resp", {}
|
|
150
|
+
).get("status") == "404"
|
|
151
|
+
|
|
152
|
+
def read(self, relative_path: str, ti: RuntimeTI) -> tuple[LogSourceInfo, LogMessages | None]:
|
|
153
|
+
messages = []
|
|
154
|
+
logs = []
|
|
155
|
+
remote_loc = os.path.join(self.remote_base, relative_path)
|
|
156
|
+
uris = []
|
|
157
|
+
bucket, prefix = _parse_gcs_url(remote_loc)
|
|
158
|
+
blobs = list(self.client.list_blobs(bucket_or_name=bucket, prefix=prefix))
|
|
159
|
+
|
|
160
|
+
if blobs:
|
|
161
|
+
uris = [f"gs://{bucket}/{b.name}" for b in blobs]
|
|
162
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
163
|
+
messages = uris
|
|
164
|
+
else:
|
|
165
|
+
messages.extend(["Found remote logs:", *[f" * {x}" for x in sorted(uris)]])
|
|
166
|
+
else:
|
|
167
|
+
return messages, None
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
for key in sorted(uris):
|
|
171
|
+
blob = storage.Blob.from_string(key, self.client)
|
|
172
|
+
remote_log = blob.download_as_bytes().decode()
|
|
173
|
+
if remote_log:
|
|
174
|
+
logs.append(remote_log)
|
|
175
|
+
except Exception as e:
|
|
176
|
+
if not AIRFLOW_V_3_0_PLUS:
|
|
177
|
+
messages.append(f"Unable to read remote log {e}")
|
|
178
|
+
return messages, logs
|
|
179
|
+
|
|
180
|
+
|
|
52
181
|
class GCSTaskHandler(FileTaskHandler, LoggingMixin):
|
|
53
182
|
"""
|
|
54
183
|
GCSTaskHandler is a python log handler that handles and reads task instance logs.
|
|
@@ -84,49 +213,29 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
|
|
|
84
213
|
gcp_keyfile_dict: dict | None = None,
|
|
85
214
|
gcp_scopes: Collection[str] | None = _DEFAULT_SCOPESS,
|
|
86
215
|
project_id: str = PROVIDE_PROJECT_ID,
|
|
216
|
+
max_bytes: int = 0,
|
|
217
|
+
backup_count: int = 0,
|
|
218
|
+
delay: bool = False,
|
|
87
219
|
**kwargs,
|
|
88
|
-
):
|
|
89
|
-
|
|
220
|
+
) -> None:
|
|
221
|
+
# support log file size handling of FileTaskHandler
|
|
222
|
+
super().__init__(
|
|
223
|
+
base_log_folder=base_log_folder, max_bytes=max_bytes, backup_count=backup_count, delay=delay
|
|
224
|
+
)
|
|
90
225
|
self.handler: logging.FileHandler | None = None
|
|
91
|
-
self.remote_base = gcs_log_folder
|
|
92
226
|
self.log_relative_path = ""
|
|
93
227
|
self.closed = False
|
|
94
228
|
self.upload_on_close = True
|
|
95
|
-
self.
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
"""Returns GCSHook if remote_log_conn_id configured."""
|
|
106
|
-
conn_id = conf.get("logging", "remote_log_conn_id", fallback=None)
|
|
107
|
-
if conn_id:
|
|
108
|
-
try:
|
|
109
|
-
return GCSHook(gcp_conn_id=conn_id)
|
|
110
|
-
except AirflowNotFoundException:
|
|
111
|
-
pass
|
|
112
|
-
return None
|
|
113
|
-
|
|
114
|
-
@cached_property
|
|
115
|
-
def client(self) -> storage.Client:
|
|
116
|
-
"""Returns GCS Client."""
|
|
117
|
-
if self.hook:
|
|
118
|
-
credentials, project_id = self.hook.get_credentials_and_project_id()
|
|
119
|
-
else:
|
|
120
|
-
credentials, project_id = get_credentials_and_project_id(
|
|
121
|
-
key_path=self.gcp_key_path,
|
|
122
|
-
keyfile_dict=self.gcp_keyfile_dict,
|
|
123
|
-
scopes=self.scopes,
|
|
124
|
-
disable_logging=True,
|
|
125
|
-
)
|
|
126
|
-
return storage.Client(
|
|
127
|
-
credentials=credentials,
|
|
128
|
-
client_info=CLIENT_INFO,
|
|
129
|
-
project=self.project_id if self.project_id else project_id,
|
|
229
|
+
self.io = GCSRemoteLogIO(
|
|
230
|
+
base_log_folder=base_log_folder,
|
|
231
|
+
remote_base=gcs_log_folder,
|
|
232
|
+
delete_local_copy=kwargs.get(
|
|
233
|
+
"delete_local_copy", conf.getboolean("logging", "delete_local_logs")
|
|
234
|
+
),
|
|
235
|
+
gcp_key_path=gcp_key_path,
|
|
236
|
+
gcp_keyfile_dict=gcp_keyfile_dict,
|
|
237
|
+
scopes=gcp_scopes,
|
|
238
|
+
project_id=project_id,
|
|
130
239
|
)
|
|
131
240
|
|
|
132
241
|
def set_context(self, ti: TaskInstance, *, identifier: str | None = None) -> None:
|
|
@@ -137,6 +246,8 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
|
|
|
137
246
|
if TYPE_CHECKING:
|
|
138
247
|
assert self.handler is not None
|
|
139
248
|
|
|
249
|
+
self.ti = ti
|
|
250
|
+
|
|
140
251
|
full_path = self.handler.baseFilename
|
|
141
252
|
self.log_relative_path = Path(full_path).relative_to(self.local_base).as_posix()
|
|
142
253
|
is_trigger_log_context = getattr(ti, "is_trigger_log_context", False)
|
|
@@ -156,86 +267,23 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
|
|
|
156
267
|
if not self.upload_on_close:
|
|
157
268
|
return
|
|
158
269
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
if os.path.exists(local_loc):
|
|
162
|
-
# read log and remove old logs to get just the latest additions
|
|
163
|
-
with open(local_loc) as logfile:
|
|
164
|
-
log = logfile.read()
|
|
165
|
-
gcs_write = self.gcs_write(log, remote_loc)
|
|
166
|
-
if gcs_write and self.delete_local_copy:
|
|
167
|
-
shutil.rmtree(os.path.dirname(local_loc))
|
|
270
|
+
if hasattr(self, "ti"):
|
|
271
|
+
self.io.upload(self.log_relative_path, self.ti)
|
|
168
272
|
|
|
169
273
|
# Mark closed so we don't double write if close is called twice
|
|
170
274
|
self.closed = True
|
|
171
275
|
|
|
172
|
-
def
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
def _read_remote_logs(self, ti, try_number, metadata=None) -> tuple[list[str], list[str]]:
|
|
178
|
-
# Explicitly getting log relative path is necessary because this method
|
|
179
|
-
# is called from webserver from TaskLogReader, where we don't call set_context
|
|
180
|
-
# and can read logs for different TIs in each request
|
|
181
|
-
messages = []
|
|
182
|
-
logs = []
|
|
183
|
-
worker_log_relative_path = self._render_filename(ti, try_number)
|
|
184
|
-
remote_loc = os.path.join(self.remote_base, worker_log_relative_path)
|
|
185
|
-
uris = []
|
|
186
|
-
bucket, prefix = _parse_gcs_url(remote_loc)
|
|
187
|
-
blobs = list(self.client.list_blobs(bucket_or_name=bucket, prefix=prefix))
|
|
188
|
-
|
|
189
|
-
if blobs:
|
|
190
|
-
uris = [f"gs://{bucket}/{b.name}" for b in blobs]
|
|
191
|
-
messages.extend(["Found remote logs:", *[f" * {x}" for x in sorted(uris)]])
|
|
192
|
-
else:
|
|
193
|
-
messages.append(f"No logs found in GCS; ti=%s {ti}")
|
|
194
|
-
try:
|
|
195
|
-
for key in sorted(uris):
|
|
196
|
-
blob = storage.Blob.from_string(key, self.client)
|
|
197
|
-
remote_log = blob.download_as_bytes().decode()
|
|
198
|
-
if remote_log:
|
|
199
|
-
logs.append(remote_log)
|
|
200
|
-
except Exception as e:
|
|
201
|
-
messages.append(f"Unable to read remote log {e}")
|
|
202
|
-
return messages, logs
|
|
276
|
+
def _read_remote_logs(self, ti, try_number, metadata=None) -> tuple[LogSourceInfo, LogMessages]:
|
|
277
|
+
# Explicitly getting log relative path is necessary as the given
|
|
278
|
+
# task instance might be different than task instance passed in
|
|
279
|
+
# in set_context method.
|
|
280
|
+
worker_log_rel_path = self._render_filename(ti, try_number)
|
|
203
281
|
|
|
204
|
-
|
|
205
|
-
"""
|
|
206
|
-
Write the log to the remote location and return `True`; fail silently and return `False` on error.
|
|
282
|
+
messages, logs = self.io.read(worker_log_rel_path, ti)
|
|
207
283
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
try:
|
|
213
|
-
blob = storage.Blob.from_string(remote_log_location, self.client)
|
|
214
|
-
old_log = blob.download_as_bytes().decode()
|
|
215
|
-
log = f"{old_log}\n{log}" if old_log else log
|
|
216
|
-
except Exception as e:
|
|
217
|
-
if not self.no_log_found(e):
|
|
218
|
-
log += self._add_message(
|
|
219
|
-
f"Error checking for previous log; if exists, may be overwritten: {e}"
|
|
220
|
-
)
|
|
221
|
-
self.log.warning("Error checking for previous log: %s", e)
|
|
222
|
-
try:
|
|
223
|
-
blob = storage.Blob.from_string(remote_log_location, self.client)
|
|
224
|
-
blob.upload_from_string(log, content_type="text/plain")
|
|
225
|
-
except Exception as e:
|
|
226
|
-
self.log.error("Could not write logs to %s: %s", remote_log_location, e)
|
|
227
|
-
return False
|
|
228
|
-
return True
|
|
229
|
-
|
|
230
|
-
@staticmethod
|
|
231
|
-
def no_log_found(exc):
|
|
232
|
-
"""
|
|
233
|
-
Given exception, determine whether it is result of log not found.
|
|
284
|
+
if logs is None:
|
|
285
|
+
logs = []
|
|
286
|
+
if not AIRFLOW_V_3_0_PLUS:
|
|
287
|
+
messages.append(f"No logs found in GCS; ti={ti}")
|
|
234
288
|
|
|
235
|
-
|
|
236
|
-
"""
|
|
237
|
-
if (exc.args and isinstance(exc.args[0], str) and "No such object" in exc.args[0]) or getattr(
|
|
238
|
-
exc, "resp", {}
|
|
239
|
-
).get("status") == "404":
|
|
240
|
-
return True
|
|
241
|
-
return False
|
|
289
|
+
return messages, logs
|
|
@@ -25,25 +25,30 @@ from functools import cached_property
|
|
|
25
25
|
from typing import TYPE_CHECKING
|
|
26
26
|
from urllib.parse import urlencode
|
|
27
27
|
|
|
28
|
-
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
29
|
-
from airflow.providers.google.cloud.utils.credentials_provider import get_credentials_and_project_id
|
|
30
|
-
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
31
|
-
from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
|
|
32
|
-
from airflow.utils.types import NOTSET, ArgNotSet
|
|
33
28
|
from google.cloud import logging as gcp_logging
|
|
34
29
|
from google.cloud.logging import Resource
|
|
35
30
|
from google.cloud.logging.handlers.transports import BackgroundThreadTransport, Transport
|
|
36
31
|
from google.cloud.logging_v2.services.logging_service_v2 import LoggingServiceV2Client
|
|
37
32
|
from google.cloud.logging_v2.types import ListLogEntriesRequest, ListLogEntriesResponse
|
|
38
33
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
34
|
+
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
35
|
+
from airflow.providers.google.cloud.utils.credentials_provider import get_credentials_and_project_id
|
|
36
|
+
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
37
|
+
from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
|
|
42
38
|
|
|
39
|
+
try:
|
|
40
|
+
from airflow.sdk.definitions._internal.types import NOTSET, ArgNotSet
|
|
41
|
+
except ImportError:
|
|
42
|
+
from airflow.utils.types import NOTSET, ArgNotSet # type: ignore[attr-defined,no-redef]
|
|
43
43
|
|
|
44
44
|
if not AIRFLOW_V_3_0_PLUS:
|
|
45
45
|
from airflow.utils.log.trigger_handler import ctx_indiv_trigger
|
|
46
46
|
|
|
47
|
+
if TYPE_CHECKING:
|
|
48
|
+
from google.auth.credentials import Credentials
|
|
49
|
+
|
|
50
|
+
from airflow.models import TaskInstance
|
|
51
|
+
|
|
47
52
|
DEFAULT_LOGGER_NAME = "airflow"
|
|
48
53
|
_GLOBAL_RESOURCE = Resource(type="global", labels={})
|
|
49
54
|
|
|
@@ -157,7 +162,7 @@ class StackdriverTaskHandler(logging.Handler):
|
|
|
157
162
|
"""Object responsible for sending data to Stackdriver."""
|
|
158
163
|
# The Transport object is badly defined (no init) but in the docs client/name as constructor
|
|
159
164
|
# arguments are a requirement for any class that derives from Transport class, hence ignore:
|
|
160
|
-
return self.transport_type(self._client, self.gcp_log_name)
|
|
165
|
+
return self.transport_type(self._client, self.gcp_log_name)
|
|
161
166
|
|
|
162
167
|
def _get_labels(self, task_instance=None):
|
|
163
168
|
if task_instance:
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$defs": {
|
|
4
|
+
"CloudStorageTransferJobFacet": {
|
|
5
|
+
"allOf": [
|
|
6
|
+
{
|
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"jobName": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Transfer job name assigned by GCP Storage Transfer Service."
|
|
15
|
+
},
|
|
16
|
+
"projectId": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "GCP project ID."
|
|
19
|
+
},
|
|
20
|
+
"description": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "Optional description of the transfer job."
|
|
23
|
+
},
|
|
24
|
+
"status": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "Status of the transfer job (ENABLED, DISABLED)."
|
|
27
|
+
},
|
|
28
|
+
"sourceBucket": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Source AWS S3 bucket."
|
|
31
|
+
},
|
|
32
|
+
"sourcePath": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "Prefix path inside the source bucket."
|
|
35
|
+
},
|
|
36
|
+
"targetBucket": {
|
|
37
|
+
"type": "string",
|
|
38
|
+
"description": "Target GCS bucket."
|
|
39
|
+
},
|
|
40
|
+
"targetPath": {
|
|
41
|
+
"type": "string",
|
|
42
|
+
"description": "Prefix path inside the target bucket."
|
|
43
|
+
},
|
|
44
|
+
"objectConditions": {
|
|
45
|
+
"type": "object",
|
|
46
|
+
"description": "Filtering conditions for objects transferred."
|
|
47
|
+
},
|
|
48
|
+
"transferOptions": {
|
|
49
|
+
"type": "object",
|
|
50
|
+
"description": "Transfer options such as overwrite or delete."
|
|
51
|
+
},
|
|
52
|
+
"schedule": {
|
|
53
|
+
"type": "object",
|
|
54
|
+
"description": "Transfer schedule details."
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
"type": "object"
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
"type": "object",
|
|
63
|
+
"properties": {
|
|
64
|
+
"cloudStorageTransferJob": {
|
|
65
|
+
"$ref": "#/$defs/CloudStorageTransferJobFacet"
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$defs": {
|
|
4
|
+
"CloudStorageTransferRunFacet": {
|
|
5
|
+
"allOf": [
|
|
6
|
+
{
|
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"jobName": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Transfer job name associated with this run."
|
|
15
|
+
},
|
|
16
|
+
"operationName": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "Transfer operation name if available."
|
|
19
|
+
},
|
|
20
|
+
"status": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "Run status if available."
|
|
23
|
+
},
|
|
24
|
+
"startTime": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "Start time of the transfer operation."
|
|
27
|
+
},
|
|
28
|
+
"endTime": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "End time of the transfer operation."
|
|
31
|
+
},
|
|
32
|
+
"wait": {
|
|
33
|
+
"type": "boolean",
|
|
34
|
+
"description": "Whether the operator waited for completion."
|
|
35
|
+
},
|
|
36
|
+
"timeout": {
|
|
37
|
+
"type": ["number", "null"],
|
|
38
|
+
"description": "Timeout in seconds."
|
|
39
|
+
},
|
|
40
|
+
"deferrable": {
|
|
41
|
+
"type": "boolean",
|
|
42
|
+
"description": "Whether the operator used deferrable mode."
|
|
43
|
+
},
|
|
44
|
+
"deleteJobAfterCompletion": {
|
|
45
|
+
"type": "boolean",
|
|
46
|
+
"description": "Whether the transfer job was deleted after completion."
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
],
|
|
51
|
+
"type": "object"
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
"type": "object",
|
|
55
|
+
"properties": {
|
|
56
|
+
"cloudStorageTransferRun": {
|
|
57
|
+
"$ref": "#/$defs/CloudStorageTransferRunFacet"
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$defs": {
|
|
4
|
+
"DataFusionRunFacet": {
|
|
5
|
+
"allOf": [
|
|
6
|
+
{
|
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"runId": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Pipeline run ID assigned by Cloud Data Fusion."
|
|
15
|
+
},
|
|
16
|
+
"runtimeArgs": {
|
|
17
|
+
"type": "object",
|
|
18
|
+
"description": "Runtime arguments provided when starting the pipeline."
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
],
|
|
23
|
+
"type": "object"
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"type": "object",
|
|
27
|
+
"properties": {
|
|
28
|
+
"dataFusionRun": {
|
|
29
|
+
"$ref": "#/$defs/DataFusionRunFacet"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|