apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/ads/hooks/ads.py +39 -5
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/bigquery.py +166 -281
- airflow/providers/google/cloud/hooks/cloud_composer.py +287 -14
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_run.py +17 -9
- airflow/providers/google/cloud/hooks/cloud_sql.py +101 -22
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +27 -6
- airflow/providers/google/cloud/hooks/compute_ssh.py +5 -1
- airflow/providers/google/cloud/hooks/datacatalog.py +9 -1
- airflow/providers/google/cloud/hooks/dataflow.py +71 -94
- airflow/providers/google/cloud/hooks/datafusion.py +1 -1
- airflow/providers/google/cloud/hooks/dataplex.py +1 -1
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +72 -71
- airflow/providers/google/cloud/hooks/gcs.py +111 -14
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/looker.py +6 -1
- airflow/providers/google/cloud/hooks/mlengine.py +3 -2
- airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
- airflow/providers/google/cloud/hooks/spanner.py +73 -8
- airflow/providers/google/cloud/hooks/stackdriver.py +10 -8
- airflow/providers/google/cloud/hooks/translate.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -209
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +2 -2
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +27 -1
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +307 -7
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/vision.py +2 -2
- airflow/providers/google/cloud/hooks/workflows.py +1 -1
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -13
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -96
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -95
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
- airflow/providers/google/cloud/links/managed_kafka.py +0 -70
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +17 -9
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +10 -8
- airflow/providers/google/cloud/openlineage/utils.py +15 -1
- airflow/providers/google/cloud/operators/alloy_db.py +70 -55
- airflow/providers/google/cloud/operators/bigquery.py +73 -636
- airflow/providers/google/cloud/operators/bigquery_dts.py +3 -5
- airflow/providers/google/cloud/operators/bigtable.py +36 -7
- airflow/providers/google/cloud/operators/cloud_base.py +21 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +2 -2
- airflow/providers/google/cloud/operators/cloud_build.py +75 -32
- airflow/providers/google/cloud/operators/cloud_composer.py +128 -40
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
- airflow/providers/google/cloud/operators/cloud_run.py +23 -5
- airflow/providers/google/cloud/operators/cloud_sql.py +8 -16
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -11
- airflow/providers/google/cloud/operators/compute.py +8 -40
- airflow/providers/google/cloud/operators/datacatalog.py +157 -21
- airflow/providers/google/cloud/operators/dataflow.py +38 -15
- airflow/providers/google/cloud/operators/dataform.py +15 -5
- airflow/providers/google/cloud/operators/datafusion.py +41 -20
- airflow/providers/google/cloud/operators/dataplex.py +193 -109
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +78 -35
- airflow/providers/google/cloud/operators/dataproc_metastore.py +96 -88
- airflow/providers/google/cloud/operators/datastore.py +22 -6
- airflow/providers/google/cloud/operators/dlp.py +6 -29
- airflow/providers/google/cloud/operators/functions.py +16 -7
- airflow/providers/google/cloud/operators/gcs.py +10 -8
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +60 -99
- airflow/providers/google/cloud/operators/looker.py +1 -1
- airflow/providers/google/cloud/operators/managed_kafka.py +107 -52
- airflow/providers/google/cloud/operators/natural_language.py +1 -1
- airflow/providers/google/cloud/operators/pubsub.py +60 -14
- airflow/providers/google/cloud/operators/spanner.py +25 -12
- airflow/providers/google/cloud/operators/speech_to_text.py +1 -2
- airflow/providers/google/cloud/operators/stackdriver.py +1 -9
- airflow/providers/google/cloud/operators/tasks.py +1 -12
- airflow/providers/google/cloud/operators/text_to_speech.py +1 -2
- airflow/providers/google/cloud/operators/translate.py +40 -16
- airflow/providers/google/cloud/operators/translate_speech.py +1 -2
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +29 -9
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +54 -26
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +11 -9
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +30 -7
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
- airflow/providers/google/cloud/operators/vision.py +2 -2
- airflow/providers/google/cloud/operators/workflows.py +18 -15
- airflow/providers/google/cloud/sensors/bigquery.py +2 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -2
- airflow/providers/google/cloud/sensors/bigtable.py +11 -4
- airflow/providers/google/cloud/sensors/cloud_composer.py +533 -29
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -2
- airflow/providers/google/cloud/sensors/dataflow.py +26 -9
- airflow/providers/google/cloud/sensors/dataform.py +2 -2
- airflow/providers/google/cloud/sensors/datafusion.py +4 -4
- airflow/providers/google/cloud/sensors/dataplex.py +2 -2
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -2
- airflow/providers/google/cloud/sensors/gcs.py +4 -4
- airflow/providers/google/cloud/sensors/looker.py +2 -2
- airflow/providers/google/cloud/sensors/pubsub.py +4 -4
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
- airflow/providers/google/cloud/sensors/workflows.py +2 -2
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +20 -12
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +13 -4
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +75 -34
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +302 -46
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +91 -1
- airflow/providers/google/cloud/triggers/dataflow.py +122 -0
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +14 -2
- airflow/providers/google/cloud/triggers/dataproc.py +122 -52
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +45 -27
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +15 -19
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +1 -2
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +27 -8
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +3 -4
- airflow/providers/google/firebase/operators/firestore.py +2 -2
- airflow/providers/google/get_provider_info.py +56 -52
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +26 -1
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +1 -2
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +3 -63
- airflow/providers/google/suite/hooks/calendar.py +1 -1
- airflow/providers/google/suite/hooks/sheets.py +15 -1
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +92 -48
- apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
- apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/hooks/automl.py +0 -673
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1362
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -112
- apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
|
@@ -24,13 +24,17 @@ from attr import define, field
|
|
|
24
24
|
from airflow.providers.google import __version__ as provider_version
|
|
25
25
|
|
|
26
26
|
if TYPE_CHECKING:
|
|
27
|
-
from openlineage.client.generated.base import RunFacet
|
|
27
|
+
from openlineage.client.generated.base import JobFacet, RunFacet
|
|
28
28
|
|
|
29
29
|
try:
|
|
30
30
|
try:
|
|
31
31
|
from openlineage.client.generated.base import RunFacet
|
|
32
32
|
except ImportError: # Old OpenLineage client is used
|
|
33
33
|
from openlineage.client.facet import BaseFacet as RunFacet # type: ignore[assignment]
|
|
34
|
+
try:
|
|
35
|
+
from openlineage.client.generated.base import JobFacet
|
|
36
|
+
except ImportError: # Old OpenLineage client is used
|
|
37
|
+
from openlineage.client.facet import BaseFacet as JobFacet # type: ignore[assignment]
|
|
34
38
|
|
|
35
39
|
@define
|
|
36
40
|
class BigQueryJobRunFacet(RunFacet):
|
|
@@ -53,6 +57,100 @@ try:
|
|
|
53
57
|
f"providers-google/{provider_version}/airflow/providers/google/"
|
|
54
58
|
"openlineage/BigQueryJobRunFacet.json"
|
|
55
59
|
)
|
|
60
|
+
|
|
61
|
+
@define
|
|
62
|
+
class CloudStorageTransferJobFacet(JobFacet):
|
|
63
|
+
"""
|
|
64
|
+
Facet representing a Cloud Storage Transfer Service job configuration.
|
|
65
|
+
|
|
66
|
+
:param jobName: Unique name of the transfer job.
|
|
67
|
+
:param projectId: GCP project where the transfer job is defined.
|
|
68
|
+
:param description: User-provided description of the transfer job.
|
|
69
|
+
:param status: Current status of the transfer job (e.g. "ENABLED", "DISABLED").
|
|
70
|
+
:param sourceBucket: Name of the source bucket (e.g. AWS S3).
|
|
71
|
+
:param sourcePath: Prefix/path inside the source bucket.
|
|
72
|
+
:param targetBucket: Name of the destination bucket (e.g. GCS).
|
|
73
|
+
:param targetPath: Prefix/path inside the destination bucket.
|
|
74
|
+
:param objectConditions: Object selection rules (e.g. include/exclude prefixes).
|
|
75
|
+
:param transferOptions: Transfer options, such as overwrite behavior or whether to delete objects
|
|
76
|
+
from the source after transfer.
|
|
77
|
+
:param schedule: Schedule for the transfer job (if recurring).
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
jobName: str | None = field(default=None)
|
|
81
|
+
projectId: str | None = field(default=None)
|
|
82
|
+
description: str | None = field(default=None)
|
|
83
|
+
status: str | None = field(default=None)
|
|
84
|
+
sourceBucket: str | None = field(default=None)
|
|
85
|
+
sourcePath: str | None = field(default=None)
|
|
86
|
+
targetBucket: str | None = field(default=None)
|
|
87
|
+
targetPath: str | None = field(default=None)
|
|
88
|
+
objectConditions: dict | None = field(default=None)
|
|
89
|
+
transferOptions: dict | None = field(default=None)
|
|
90
|
+
schedule: dict | None = field(default=None)
|
|
91
|
+
|
|
92
|
+
@staticmethod
|
|
93
|
+
def _get_schema() -> str:
|
|
94
|
+
return (
|
|
95
|
+
"https://raw.githubusercontent.com/apache/airflow/"
|
|
96
|
+
f"providers-google/{provider_version}/airflow/providers/google/"
|
|
97
|
+
"openlineage/CloudStorageTransferJobFacet.json"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
@define
|
|
101
|
+
class CloudStorageTransferRunFacet(RunFacet):
|
|
102
|
+
"""
|
|
103
|
+
Facet representing a Cloud Storage Transfer Service job execution run.
|
|
104
|
+
|
|
105
|
+
:param jobName: Name of the transfer job being executed.
|
|
106
|
+
:param operationName: Name of the specific transfer operation instance.
|
|
107
|
+
:param status: Current status of the operation (e.g. "IN_PROGRESS", "SUCCESS", "FAILED").
|
|
108
|
+
:param startTime: Time when the transfer job execution started (ISO 8601 format).
|
|
109
|
+
:param endTime: Time when the transfer job execution finished (ISO 8601 format).
|
|
110
|
+
:param wait: Whether the operator waits for the job to complete before finishing.
|
|
111
|
+
:param timeout: Timeout (in seconds) for the transfer run to complete.
|
|
112
|
+
:param deferrable: Whether the operator defers execution until job completion.
|
|
113
|
+
:param deleteJobAfterCompletion: Whether the operator deletes the transfer job after the run completes.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
jobName: str | None = field(default=None)
|
|
117
|
+
operationName: str | None = field(default=None)
|
|
118
|
+
status: str | None = field(default=None)
|
|
119
|
+
startTime: str | None = field(default=None)
|
|
120
|
+
endTime: str | None = field(default=None)
|
|
121
|
+
wait: bool = field(default=True)
|
|
122
|
+
timeout: float | None = field(default=None)
|
|
123
|
+
deferrable: bool = field(default=False)
|
|
124
|
+
deleteJobAfterCompletion: bool = field(default=False)
|
|
125
|
+
|
|
126
|
+
@staticmethod
|
|
127
|
+
def _get_schema() -> str:
|
|
128
|
+
return (
|
|
129
|
+
"https://raw.githubusercontent.com/apache/airflow/"
|
|
130
|
+
f"providers-google/{provider_version}/airflow/providers/google/"
|
|
131
|
+
"openlineage/CloudStorageTransferRunFacet.json"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
@define
|
|
135
|
+
class DataFusionRunFacet(RunFacet):
|
|
136
|
+
"""
|
|
137
|
+
Facet that represents relevant details of a Cloud Data Fusion pipeline run.
|
|
138
|
+
|
|
139
|
+
:param runId: The pipeline execution id.
|
|
140
|
+
:param runtimeArgs: Runtime arguments passed to the pipeline.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
runId: str | None = field(default=None)
|
|
144
|
+
runtimeArgs: dict[str, str] | None = field(default=None)
|
|
145
|
+
|
|
146
|
+
@staticmethod
|
|
147
|
+
def _get_schema() -> str:
|
|
148
|
+
return (
|
|
149
|
+
"https://raw.githubusercontent.com/apache/airflow/"
|
|
150
|
+
f"providers-google/{provider_version}/airflow/providers/google/"
|
|
151
|
+
"openlineage/DataFusionRunFacet.json"
|
|
152
|
+
)
|
|
153
|
+
|
|
56
154
|
except ImportError: # OpenLineage is not available
|
|
57
155
|
|
|
58
156
|
def create_no_op(*_, **__) -> None:
|
|
@@ -65,3 +163,6 @@ except ImportError: # OpenLineage is not available
|
|
|
65
163
|
return None
|
|
66
164
|
|
|
67
165
|
BigQueryJobRunFacet = create_no_op # type: ignore[misc, assignment]
|
|
166
|
+
CloudStorageTransferJobFacet = create_no_op # type: ignore[misc, assignment]
|
|
167
|
+
CloudStorageTransferRunFacet = create_no_op # type: ignore[misc, assignment]
|
|
168
|
+
DataFusionRunFacet = create_no_op # type: ignore[misc, assignment]
|
|
@@ -80,7 +80,7 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
|
|
|
80
80
|
from airflow.providers.openlineage.sqlparser import SQLParser
|
|
81
81
|
|
|
82
82
|
if not self.job_id:
|
|
83
|
-
self.log.warning("No BigQuery job_id was found by OpenLineage.")
|
|
83
|
+
self.log.warning("No BigQuery job_id was found by OpenLineage.")
|
|
84
84
|
return OperatorLineage()
|
|
85
85
|
|
|
86
86
|
if not self.hook:
|
|
@@ -92,14 +92,16 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
|
|
|
92
92
|
impersonation_chain=self.impersonation_chain,
|
|
93
93
|
)
|
|
94
94
|
|
|
95
|
-
self.log.debug("Extracting data from bigquery job: `%s`", self.job_id)
|
|
95
|
+
self.log.debug("Extracting data from bigquery job: `%s`", self.job_id)
|
|
96
96
|
inputs, outputs = [], []
|
|
97
97
|
run_facets: dict[str, RunFacet] = {
|
|
98
98
|
"externalQuery": ExternalQueryRunFacet(externalQueryId=self.job_id, source="bigquery")
|
|
99
99
|
}
|
|
100
|
-
self._client = self.hook.get_client(
|
|
100
|
+
self._client = self.hook.get_client(
|
|
101
|
+
project_id=self.project_id or self.hook.project_id, location=self.location
|
|
102
|
+
)
|
|
101
103
|
try:
|
|
102
|
-
job_properties = self._client.get_job(job_id=self.job_id)._properties
|
|
104
|
+
job_properties = self._client.get_job(job_id=self.job_id)._properties
|
|
103
105
|
|
|
104
106
|
if get_from_nullable_chain(job_properties, ["status", "state"]) != "DONE":
|
|
105
107
|
raise ValueError(f"Trying to extract data from running bigquery job: `{self.job_id}`")
|
|
@@ -107,11 +109,11 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
|
|
|
107
109
|
run_facets["bigQueryJob"] = self._get_bigquery_job_run_facet(job_properties)
|
|
108
110
|
|
|
109
111
|
if get_from_nullable_chain(job_properties, ["statistics", "numChildJobs"]):
|
|
110
|
-
self.log.debug("Found SCRIPT job. Extracting lineage from child jobs instead.")
|
|
112
|
+
self.log.debug("Found SCRIPT job. Extracting lineage from child jobs instead.")
|
|
111
113
|
# SCRIPT job type has no input / output information but spawns child jobs that have one
|
|
112
114
|
# https://cloud.google.com/bigquery/docs/information-schema-jobs#multi-statement_query_job
|
|
113
115
|
for child_job_id in self._client.list_jobs(parent_job=self.job_id):
|
|
114
|
-
child_job_properties = self._client.get_job(job_id=child_job_id)._properties
|
|
116
|
+
child_job_properties = self._client.get_job(job_id=child_job_id)._properties
|
|
115
117
|
child_inputs, child_outputs = self._get_inputs_and_outputs(child_job_properties)
|
|
116
118
|
inputs.extend(child_inputs)
|
|
117
119
|
outputs.extend(child_outputs)
|
|
@@ -119,7 +121,7 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
|
|
|
119
121
|
inputs, outputs = self._get_inputs_and_outputs(job_properties)
|
|
120
122
|
|
|
121
123
|
except Exception as e:
|
|
122
|
-
self.log.warning("Cannot retrieve job details from BigQuery.Client. %s", e, exc_info=True)
|
|
124
|
+
self.log.warning("Cannot retrieve job details from BigQuery.Client. %s", e, exc_info=True)
|
|
123
125
|
exception_msg = traceback.format_exc()
|
|
124
126
|
run_facets.update(
|
|
125
127
|
{
|
|
@@ -173,7 +175,7 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
|
|
|
173
175
|
if (
|
|
174
176
|
single_output.facets
|
|
175
177
|
and final_outputs[key].facets
|
|
176
|
-
and "columnLineage" in single_output.facets
|
|
178
|
+
and "columnLineage" in single_output.facets
|
|
177
179
|
and "columnLineage" in final_outputs[key].facets # type: ignore
|
|
178
180
|
):
|
|
179
181
|
single_output.facets["columnLineage"] = merge_column_lineage_facets(
|
|
@@ -49,7 +49,7 @@ if TYPE_CHECKING:
|
|
|
49
49
|
from google.cloud.bigquery.table import Table
|
|
50
50
|
|
|
51
51
|
from airflow.providers.common.compat.openlineage.facet import Dataset
|
|
52
|
-
from airflow.
|
|
52
|
+
from airflow.providers.common.compat.sdk import Context
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
log = logging.getLogger(__name__)
|
|
@@ -214,7 +214,20 @@ def extract_ds_name_from_gcs_path(path: str) -> str:
|
|
|
214
214
|
|
|
215
215
|
def get_facets_from_bq_table(table: Table) -> dict[str, DatasetFacet]:
|
|
216
216
|
"""Get facets from BigQuery table object."""
|
|
217
|
+
return get_facets_from_bq_table_for_given_fields(table, selected_fields=None)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def get_facets_from_bq_table_for_given_fields(
|
|
221
|
+
table: Table, selected_fields: list[str] | None
|
|
222
|
+
) -> dict[str, DatasetFacet]:
|
|
223
|
+
"""
|
|
224
|
+
Get facets from BigQuery table object for selected fields only.
|
|
225
|
+
|
|
226
|
+
If selected_fields is None, include all fields.
|
|
227
|
+
"""
|
|
217
228
|
facets: dict[str, DatasetFacet] = {}
|
|
229
|
+
selected_fields_set = set(selected_fields) if selected_fields else None
|
|
230
|
+
|
|
218
231
|
if table.schema:
|
|
219
232
|
facets["schema"] = SchemaDatasetFacet(
|
|
220
233
|
fields=[
|
|
@@ -222,6 +235,7 @@ def get_facets_from_bq_table(table: Table) -> dict[str, DatasetFacet]:
|
|
|
222
235
|
name=schema_field.name, type=schema_field.field_type, description=schema_field.description
|
|
223
236
|
)
|
|
224
237
|
for schema_field in table.schema
|
|
238
|
+
if selected_fields_set is None or schema_field.name in selected_fields_set
|
|
225
239
|
]
|
|
226
240
|
)
|
|
227
241
|
if table.description:
|
|
@@ -21,7 +21,7 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
from collections.abc import Sequence
|
|
23
23
|
from functools import cached_property
|
|
24
|
-
from typing import TYPE_CHECKING
|
|
24
|
+
from typing import TYPE_CHECKING, Any
|
|
25
25
|
|
|
26
26
|
from google.api_core.exceptions import NotFound
|
|
27
27
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
@@ -42,7 +42,7 @@ if TYPE_CHECKING:
|
|
|
42
42
|
from google.api_core.retry import Retry
|
|
43
43
|
from google.protobuf.field_mask_pb2 import FieldMask
|
|
44
44
|
|
|
45
|
-
from airflow.
|
|
45
|
+
from airflow.providers.common.compat.sdk import Context
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
class AlloyDBBaseOperator(GoogleCloudBaseOperator):
|
|
@@ -228,15 +228,16 @@ class AlloyDBCreateClusterOperator(AlloyDBWriteBaseOperator):
|
|
|
228
228
|
return result
|
|
229
229
|
return None
|
|
230
230
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
)
|
|
231
|
+
@property
|
|
232
|
+
def extra_links_params(self) -> dict[str, Any]:
|
|
233
|
+
return {
|
|
234
|
+
"location_id": self.location,
|
|
235
|
+
"cluster_id": self.cluster_id,
|
|
236
|
+
"project_id": self.project_id,
|
|
237
|
+
}
|
|
239
238
|
|
|
239
|
+
def execute(self, context: Context) -> dict | None:
|
|
240
|
+
AlloyDBClusterLink.persist(context=context)
|
|
240
241
|
if cluster := self._get_cluster():
|
|
241
242
|
return cluster
|
|
242
243
|
|
|
@@ -334,14 +335,16 @@ class AlloyDBUpdateClusterOperator(AlloyDBWriteBaseOperator):
|
|
|
334
335
|
self.update_mask = update_mask
|
|
335
336
|
self.allow_missing = allow_missing
|
|
336
337
|
|
|
338
|
+
@property
|
|
339
|
+
def extra_links_params(self) -> dict[str, Any]:
|
|
340
|
+
return {
|
|
341
|
+
"location_id": self.location,
|
|
342
|
+
"cluster_id": self.cluster_id,
|
|
343
|
+
"project_id": self.project_id,
|
|
344
|
+
}
|
|
345
|
+
|
|
337
346
|
def execute(self, context: Context) -> dict | None:
|
|
338
|
-
AlloyDBClusterLink.persist(
|
|
339
|
-
context=context,
|
|
340
|
-
task_instance=self,
|
|
341
|
-
location_id=self.location,
|
|
342
|
-
cluster_id=self.cluster_id,
|
|
343
|
-
project_id=self.project_id,
|
|
344
|
-
)
|
|
347
|
+
AlloyDBClusterLink.persist(context=context)
|
|
345
348
|
if self.validate_request:
|
|
346
349
|
self.log.info("Validating an Update AlloyDB cluster request.")
|
|
347
350
|
else:
|
|
@@ -545,14 +548,16 @@ class AlloyDBCreateInstanceOperator(AlloyDBWriteBaseOperator):
|
|
|
545
548
|
return result
|
|
546
549
|
return None
|
|
547
550
|
|
|
551
|
+
@property
|
|
552
|
+
def extra_links_params(self) -> dict[str, Any]:
|
|
553
|
+
return {
|
|
554
|
+
"location_id": self.location,
|
|
555
|
+
"cluster_id": self.cluster_id,
|
|
556
|
+
"project_id": self.project_id,
|
|
557
|
+
}
|
|
558
|
+
|
|
548
559
|
def execute(self, context: Context) -> dict | None:
|
|
549
|
-
AlloyDBClusterLink.persist(
|
|
550
|
-
context=context,
|
|
551
|
-
task_instance=self,
|
|
552
|
-
location_id=self.location,
|
|
553
|
-
cluster_id=self.cluster_id,
|
|
554
|
-
project_id=self.project_id,
|
|
555
|
-
)
|
|
560
|
+
AlloyDBClusterLink.persist(context=context)
|
|
556
561
|
if instance := self._get_instance():
|
|
557
562
|
return instance
|
|
558
563
|
|
|
@@ -654,14 +659,16 @@ class AlloyDBUpdateInstanceOperator(AlloyDBWriteBaseOperator):
|
|
|
654
659
|
self.update_mask = update_mask
|
|
655
660
|
self.allow_missing = allow_missing
|
|
656
661
|
|
|
662
|
+
@property
|
|
663
|
+
def extra_links_params(self) -> dict[str, Any]:
|
|
664
|
+
return {
|
|
665
|
+
"location_id": self.location,
|
|
666
|
+
"cluster_id": self.cluster_id,
|
|
667
|
+
"project_id": self.project_id,
|
|
668
|
+
}
|
|
669
|
+
|
|
657
670
|
def execute(self, context: Context) -> dict | None:
|
|
658
|
-
AlloyDBClusterLink.persist(
|
|
659
|
-
context=context,
|
|
660
|
-
task_instance=self,
|
|
661
|
-
location_id=self.location,
|
|
662
|
-
cluster_id=self.cluster_id,
|
|
663
|
-
project_id=self.project_id,
|
|
664
|
-
)
|
|
671
|
+
AlloyDBClusterLink.persist(context=context)
|
|
665
672
|
if self.validate_request:
|
|
666
673
|
self.log.info("Validating an Update AlloyDB instance request.")
|
|
667
674
|
else:
|
|
@@ -861,14 +868,16 @@ class AlloyDBCreateUserOperator(AlloyDBWriteBaseOperator):
|
|
|
861
868
|
return result
|
|
862
869
|
return None
|
|
863
870
|
|
|
871
|
+
@property
|
|
872
|
+
def extra_links_params(self) -> dict[str, Any]:
|
|
873
|
+
return {
|
|
874
|
+
"location_id": self.location,
|
|
875
|
+
"cluster_id": self.cluster_id,
|
|
876
|
+
"project_id": self.project_id,
|
|
877
|
+
}
|
|
878
|
+
|
|
864
879
|
def execute(self, context: Context) -> dict | None:
|
|
865
|
-
AlloyDBUsersLink.persist(
|
|
866
|
-
context=context,
|
|
867
|
-
task_instance=self,
|
|
868
|
-
location_id=self.location,
|
|
869
|
-
cluster_id=self.cluster_id,
|
|
870
|
-
project_id=self.project_id,
|
|
871
|
-
)
|
|
880
|
+
AlloyDBUsersLink.persist(context=context)
|
|
872
881
|
if (_user := self._get_user()) is not None:
|
|
873
882
|
return _user
|
|
874
883
|
|
|
@@ -968,14 +977,16 @@ class AlloyDBUpdateUserOperator(AlloyDBWriteBaseOperator):
|
|
|
968
977
|
self.update_mask = update_mask
|
|
969
978
|
self.allow_missing = allow_missing
|
|
970
979
|
|
|
980
|
+
@property
|
|
981
|
+
def extra_links_params(self) -> dict[str, Any]:
|
|
982
|
+
return {
|
|
983
|
+
"location_id": self.location,
|
|
984
|
+
"cluster_id": self.cluster_id,
|
|
985
|
+
"project_id": self.project_id,
|
|
986
|
+
}
|
|
987
|
+
|
|
971
988
|
def execute(self, context: Context) -> dict | None:
|
|
972
|
-
AlloyDBUsersLink.persist(
|
|
973
|
-
context=context,
|
|
974
|
-
task_instance=self,
|
|
975
|
-
location_id=self.location,
|
|
976
|
-
cluster_id=self.cluster_id,
|
|
977
|
-
project_id=self.project_id,
|
|
978
|
-
)
|
|
989
|
+
AlloyDBUsersLink.persist(context=context)
|
|
979
990
|
if self.validate_request:
|
|
980
991
|
self.log.info("Validating an Update AlloyDB user request.")
|
|
981
992
|
else:
|
|
@@ -1159,12 +1170,14 @@ class AlloyDBCreateBackupOperator(AlloyDBWriteBaseOperator):
|
|
|
1159
1170
|
return result
|
|
1160
1171
|
return None
|
|
1161
1172
|
|
|
1173
|
+
@property
|
|
1174
|
+
def extra_links_params(self) -> dict[str, Any]:
|
|
1175
|
+
return {
|
|
1176
|
+
"project_id": self.project_id,
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1162
1179
|
def execute(self, context: Context) -> dict | None:
|
|
1163
|
-
AlloyDBBackupsLink.persist(
|
|
1164
|
-
context=context,
|
|
1165
|
-
task_instance=self,
|
|
1166
|
-
project_id=self.project_id,
|
|
1167
|
-
)
|
|
1180
|
+
AlloyDBBackupsLink.persist(context=context)
|
|
1168
1181
|
if backup := self._get_backup():
|
|
1169
1182
|
return backup
|
|
1170
1183
|
|
|
@@ -1259,12 +1272,14 @@ class AlloyDBUpdateBackupOperator(AlloyDBWriteBaseOperator):
|
|
|
1259
1272
|
self.update_mask = update_mask
|
|
1260
1273
|
self.allow_missing = allow_missing
|
|
1261
1274
|
|
|
1275
|
+
@property
|
|
1276
|
+
def extra_links_params(self) -> dict[str, Any]:
|
|
1277
|
+
return {
|
|
1278
|
+
"project_id": self.project_id,
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1262
1281
|
def execute(self, context: Context) -> dict | None:
|
|
1263
|
-
AlloyDBBackupsLink.persist(
|
|
1264
|
-
context=context,
|
|
1265
|
-
task_instance=self,
|
|
1266
|
-
project_id=self.project_id,
|
|
1267
|
-
)
|
|
1282
|
+
AlloyDBBackupsLink.persist(context=context)
|
|
1268
1283
|
if self.validate_request:
|
|
1269
1284
|
self.log.info("Validating an Update AlloyDB backup request.")
|
|
1270
1285
|
else:
|