apache-airflow-providers-google 10.18.0rc1__py3-none-any.whl → 10.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +3 -6
- airflow/providers/google/cloud/hooks/automl.py +34 -0
- airflow/providers/google/cloud/hooks/bigquery.py +73 -18
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +91 -0
- airflow/providers/google/cloud/links/automl.py +38 -0
- airflow/providers/google/cloud/links/translate.py +180 -0
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +1 -2
- airflow/providers/google/cloud/openlineage/BigQueryErrorRunFacet.json +30 -0
- airflow/providers/google/cloud/openlineage/BigQueryJobRunFacet.json +37 -0
- airflow/providers/google/cloud/openlineage/__init__.py +16 -0
- airflow/providers/google/cloud/openlineage/utils.py +388 -0
- airflow/providers/google/cloud/operators/automl.py +271 -54
- airflow/providers/google/cloud/operators/bigquery.py +126 -99
- airflow/providers/google/cloud/operators/dataproc.py +1 -1
- airflow/providers/google/cloud/operators/kubernetes_engine.py +24 -37
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +5 -0
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +6 -0
- airflow/providers/google/cloud/operators/workflows.py +2 -5
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -4
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +1 -1
- airflow/providers/google/cloud/triggers/bigquery.py +64 -6
- airflow/providers/google/cloud/triggers/dataproc.py +82 -3
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -3
- airflow/providers/google/cloud/triggers/pubsub.py +8 -11
- airflow/providers/google/cloud/utils/credentials_provider.py +41 -32
- airflow/providers/google/common/hooks/base_google.py +11 -5
- airflow/providers/google/get_provider_info.py +10 -3
- {apache_airflow_providers_google-10.18.0rc1.dist-info → apache_airflow_providers_google-10.19.0.dist-info}/METADATA +19 -19
- {apache_airflow_providers_google-10.18.0rc1.dist-info → apache_airflow_providers_google-10.19.0.dist-info}/RECORD +32 -27
- airflow/providers/google/cloud/utils/openlineage.py +0 -81
- {apache_airflow_providers_google-10.18.0rc1.dist-info → apache_airflow_providers_google-10.19.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.18.0rc1.dist-info → apache_airflow_providers_google-10.19.0.dist-info}/entry_points.txt +0 -0
@@ -25,14 +25,11 @@ from __future__ import annotations
|
|
25
25
|
|
26
26
|
import packaging.version
|
27
27
|
|
28
|
-
|
28
|
+
from airflow import __version__ as airflow_version
|
29
29
|
|
30
|
-
|
30
|
+
__all__ = ["__version__"]
|
31
31
|
|
32
|
-
|
33
|
-
from airflow import __version__ as airflow_version
|
34
|
-
except ImportError:
|
35
|
-
from airflow.version import version as airflow_version
|
32
|
+
__version__ = "10.19.0"
|
36
33
|
|
37
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
38
35
|
"2.7.0"
|
@@ -640,3 +640,37 @@ class CloudAutoMLHook(GoogleBaseHook):
|
|
640
640
|
metadata=metadata,
|
641
641
|
)
|
642
642
|
return result
|
643
|
+
|
644
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
645
|
+
def get_dataset(
|
646
|
+
self,
|
647
|
+
dataset_id: str,
|
648
|
+
location: str,
|
649
|
+
project_id: str,
|
650
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
651
|
+
timeout: float | None = None,
|
652
|
+
metadata: Sequence[tuple[str, str]] = (),
|
653
|
+
) -> Dataset:
|
654
|
+
"""
|
655
|
+
Retrieve the dataset for the given dataset_id.
|
656
|
+
|
657
|
+
:param dataset_id: ID of dataset to be retrieved.
|
658
|
+
:param location: The location of the project.
|
659
|
+
:param project_id: ID of the Google Cloud project where dataset is located if None then
|
660
|
+
default project_id is used.
|
661
|
+
:param retry: A retry object used to retry requests. If `None` is specified, requests will not be
|
662
|
+
retried.
|
663
|
+
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
|
664
|
+
`retry` is specified, the timeout applies to each individual attempt.
|
665
|
+
:param metadata: Additional metadata that is provided to the method.
|
666
|
+
|
667
|
+
:return: `google.cloud.automl_v1beta1.types.dataset.Dataset` instance.
|
668
|
+
"""
|
669
|
+
client = self.get_conn()
|
670
|
+
name = f"projects/{project_id}/locations/{location}/datasets/{dataset_id}"
|
671
|
+
return client.get_dataset(
|
672
|
+
request={"name": name},
|
673
|
+
retry=retry,
|
674
|
+
timeout=timeout,
|
675
|
+
metadata=metadata,
|
676
|
+
)
|
@@ -46,7 +46,14 @@ from google.cloud.bigquery import (
|
|
46
46
|
UnknownJob,
|
47
47
|
)
|
48
48
|
from google.cloud.bigquery.dataset import AccessEntry, Dataset, DatasetListItem, DatasetReference
|
49
|
-
from google.cloud.bigquery.
|
49
|
+
from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY
|
50
|
+
from google.cloud.bigquery.table import (
|
51
|
+
EncryptionConfiguration,
|
52
|
+
Row,
|
53
|
+
RowIterator,
|
54
|
+
Table,
|
55
|
+
TableReference,
|
56
|
+
)
|
50
57
|
from google.cloud.exceptions import NotFound
|
51
58
|
from googleapiclient.discovery import Resource, build
|
52
59
|
from pandas_gbq import read_gbq
|
@@ -65,12 +72,7 @@ from airflow.providers.google.common.hooks.base_google import (
|
|
65
72
|
GoogleBaseHook,
|
66
73
|
get_field,
|
67
74
|
)
|
68
|
-
|
69
|
-
try:
|
70
|
-
from airflow.utils.hashlib_wrapper import md5
|
71
|
-
except ModuleNotFoundError:
|
72
|
-
# Remove when Airflow providers min Airflow version is "2.7.0"
|
73
|
-
from hashlib import md5
|
75
|
+
from airflow.utils.hashlib_wrapper import md5
|
74
76
|
from airflow.utils.helpers import convert_camel_to_snake
|
75
77
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
76
78
|
|
@@ -1586,7 +1588,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1586
1588
|
job_id: str,
|
1587
1589
|
project_id: str = PROVIDE_PROJECT_ID,
|
1588
1590
|
location: str | None = None,
|
1589
|
-
) ->
|
1591
|
+
) -> BigQueryJob | UnknownJob:
|
1590
1592
|
"""Retrieve a BigQuery job.
|
1591
1593
|
|
1592
1594
|
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
@@ -1594,8 +1596,8 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1594
1596
|
:param job_id: The ID of the job. The ID must contain only letters (a-z, A-Z),
|
1595
1597
|
numbers (0-9), underscores (_), or dashes (-). The maximum length is 1,024
|
1596
1598
|
characters.
|
1597
|
-
:param project_id: Google Cloud Project where the job is running
|
1598
|
-
:param location:
|
1599
|
+
:param project_id: Google Cloud Project where the job is running.
|
1600
|
+
:param location: Location where the job is running.
|
1599
1601
|
"""
|
1600
1602
|
client = self.get_client(project_id=project_id, location=location)
|
1601
1603
|
job = client.get_job(job_id=job_id, project=project_id, location=location)
|
@@ -2390,6 +2392,48 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
2390
2392
|
|
2391
2393
|
return project_id, dataset_id, table_id
|
2392
2394
|
|
2395
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
2396
|
+
def get_query_results(
|
2397
|
+
self,
|
2398
|
+
job_id: str,
|
2399
|
+
location: str,
|
2400
|
+
max_results: int | None = None,
|
2401
|
+
selected_fields: list[str] | str | None = None,
|
2402
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2403
|
+
retry: Retry = DEFAULT_RETRY,
|
2404
|
+
job_retry: Retry = DEFAULT_JOB_RETRY,
|
2405
|
+
) -> list[dict[str, Any]]:
|
2406
|
+
"""
|
2407
|
+
Get query results given a job_id.
|
2408
|
+
|
2409
|
+
:param job_id: The ID of the job.
|
2410
|
+
The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or
|
2411
|
+
dashes (-). The maximum length is 1,024 characters.
|
2412
|
+
:param location: The location used for the operation.
|
2413
|
+
:param selected_fields: List of fields to return (comma-separated). If
|
2414
|
+
unspecified, all fields are returned.
|
2415
|
+
:param max_results: The maximum number of records (rows) to be fetched
|
2416
|
+
from the table.
|
2417
|
+
:param project_id: Google Cloud Project where the job ran.
|
2418
|
+
:param retry: How to retry the RPC.
|
2419
|
+
:param job_retry: How to retry failed jobs.
|
2420
|
+
|
2421
|
+
:return: List of rows where columns are filtered by selected fields, when given
|
2422
|
+
|
2423
|
+
:raises: AirflowException
|
2424
|
+
"""
|
2425
|
+
if isinstance(selected_fields, str):
|
2426
|
+
selected_fields = selected_fields.split(",")
|
2427
|
+
job = self.get_job(job_id=job_id, project_id=project_id, location=location)
|
2428
|
+
if not isinstance(job, QueryJob):
|
2429
|
+
raise AirflowException(f"Job '{job_id}' is not a query job")
|
2430
|
+
|
2431
|
+
if job.state != "DONE":
|
2432
|
+
raise AirflowException(f"Job '{job_id}' is not in DONE state")
|
2433
|
+
|
2434
|
+
rows = [dict(row) for row in job.result(max_results=max_results, retry=retry, job_retry=job_retry)]
|
2435
|
+
return [{k: row[k] for k in row if k in selected_fields} for row in rows] if selected_fields else rows
|
2436
|
+
|
2393
2437
|
@property
|
2394
2438
|
def scopes(self) -> Sequence[str]:
|
2395
2439
|
"""
|
@@ -2805,15 +2849,16 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
2805
2849
|
return -1
|
2806
2850
|
|
2807
2851
|
def execute(self, operation: str, parameters: dict | None = None) -> None:
|
2808
|
-
"""Execute a BigQuery query, and
|
2852
|
+
"""Execute a BigQuery query, and update the BigQueryCursor description.
|
2809
2853
|
|
2810
2854
|
:param operation: The query to execute.
|
2811
2855
|
:param parameters: Parameters to substitute into the query.
|
2812
2856
|
"""
|
2813
2857
|
sql = _bind_parameters(operation, parameters) if parameters else operation
|
2814
2858
|
self.flush_results()
|
2815
|
-
|
2816
|
-
|
2859
|
+
job = self._run_query(sql)
|
2860
|
+
self.job_id = job.job_id
|
2861
|
+
self.location = self.location or job.location
|
2817
2862
|
query_results = self._get_query_result()
|
2818
2863
|
if "schema" in query_results:
|
2819
2864
|
self.description = _format_schema_for_description(query_results["schema"])
|
@@ -2953,15 +2998,15 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
2953
2998
|
self,
|
2954
2999
|
sql,
|
2955
3000
|
location: str | None = None,
|
2956
|
-
) ->
|
2957
|
-
"""Run job query."""
|
3001
|
+
) -> BigQueryJob:
|
3002
|
+
"""Run a job query and return the job instance."""
|
2958
3003
|
if not self.project_id:
|
2959
3004
|
raise ValueError("The project_id should be set")
|
2960
3005
|
|
2961
3006
|
configuration = self._prepare_query_configuration(sql)
|
2962
3007
|
job = self.hook.insert_job(configuration=configuration, project_id=self.project_id, location=location)
|
2963
3008
|
|
2964
|
-
return job
|
3009
|
+
return job
|
2965
3010
|
|
2966
3011
|
def _prepare_query_configuration(
|
2967
3012
|
self,
|
@@ -3313,7 +3358,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3313
3358
|
|
3314
3359
|
async def _get_job(
|
3315
3360
|
self, job_id: str | None, project_id: str = PROVIDE_PROJECT_ID, location: str | None = None
|
3316
|
-
) ->
|
3361
|
+
) -> BigQueryJob | UnknownJob:
|
3317
3362
|
"""
|
3318
3363
|
Get BigQuery job by its ID, project ID and location.
|
3319
3364
|
|
@@ -3421,15 +3466,25 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3421
3466
|
self.log.error("Failed to cancel BigQuery job %s: %s", job_id, str(e))
|
3422
3467
|
raise
|
3423
3468
|
|
3424
|
-
|
3469
|
+
# TODO: Convert get_records into an async method
|
3470
|
+
def get_records(
|
3471
|
+
self,
|
3472
|
+
query_results: dict[str, Any],
|
3473
|
+
as_dict: bool = False,
|
3474
|
+
selected_fields: str | list[str] | None = None,
|
3475
|
+
) -> list[Any]:
|
3425
3476
|
"""Convert a response from BigQuery to records.
|
3426
3477
|
|
3427
3478
|
:param query_results: the results from a SQL query
|
3428
3479
|
:param as_dict: if True returns the result as a list of dictionaries, otherwise as list of lists.
|
3480
|
+
:param selected_fields:
|
3429
3481
|
"""
|
3482
|
+
if isinstance(selected_fields, str):
|
3483
|
+
selected_fields = selected_fields.split(",")
|
3430
3484
|
buffer: list[Any] = []
|
3431
3485
|
if rows := query_results.get("rows"):
|
3432
3486
|
fields = query_results["schema"]["fields"]
|
3487
|
+
fields = [field for field in fields if not selected_fields or field["name"] in selected_fields]
|
3433
3488
|
fields_names = [field["name"] for field in fields]
|
3434
3489
|
col_types = [field["type"] for field in fields]
|
3435
3490
|
for dict_row in rows:
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
from __future__ import annotations
|
19
|
+
|
20
|
+
from typing import TYPE_CHECKING, Sequence
|
21
|
+
|
22
|
+
from google.api_core.client_options import ClientOptions
|
23
|
+
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
24
|
+
from google.cloud.aiplatform_v1 import PredictionServiceClient
|
25
|
+
|
26
|
+
from airflow.providers.google.common.consts import CLIENT_INFO
|
27
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
28
|
+
|
29
|
+
if TYPE_CHECKING:
|
30
|
+
from google.api_core.retry import Retry
|
31
|
+
from google.cloud.aiplatform_v1.types import PredictResponse
|
32
|
+
|
33
|
+
|
34
|
+
class PredictionServiceHook(GoogleBaseHook):
|
35
|
+
"""Hook for Google Cloud Vertex AI Prediction API."""
|
36
|
+
|
37
|
+
def get_prediction_service_client(self, region: str | None = None) -> PredictionServiceClient:
|
38
|
+
"""
|
39
|
+
Return PredictionServiceClient object.
|
40
|
+
|
41
|
+
:param region: The ID of the Google Cloud region that the service belongs to. Default is None.
|
42
|
+
|
43
|
+
:return: `google.cloud.aiplatform_v1.services.prediction_service.client.PredictionServiceClient` instance.
|
44
|
+
"""
|
45
|
+
if region and region != "global":
|
46
|
+
client_options = ClientOptions(api_endpoint=f"{region}-aiplatform.googleapis.com:443")
|
47
|
+
else:
|
48
|
+
client_options = ClientOptions()
|
49
|
+
|
50
|
+
return PredictionServiceClient(
|
51
|
+
credentials=self.get_credentials(), client_info=CLIENT_INFO, client_options=client_options
|
52
|
+
)
|
53
|
+
|
54
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
55
|
+
def predict(
|
56
|
+
self,
|
57
|
+
endpoint_id: str,
|
58
|
+
instances: list[str],
|
59
|
+
location: str,
|
60
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
61
|
+
parameters: dict[str, str] | None = None,
|
62
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
63
|
+
timeout: float | None = None,
|
64
|
+
metadata: Sequence[tuple[str, str]] = (),
|
65
|
+
) -> PredictResponse:
|
66
|
+
"""
|
67
|
+
Perform an online prediction and returns the prediction result in the response.
|
68
|
+
|
69
|
+
:param endpoint_id: Name of the endpoint_id requested to serve the prediction.
|
70
|
+
:param instances: Required. The instances that are the input to the prediction call. A DeployedModel
|
71
|
+
may have an upper limit on the number of instances it supports per request, and when it is
|
72
|
+
exceeded the prediction call errors in case of AutoML Models, or, in case of customer created
|
73
|
+
Models, the behaviour is as documented by that Model.
|
74
|
+
:param parameters: Additional domain-specific parameters, any string must be up to 25000 characters long.
|
75
|
+
:param project_id: ID of the Google Cloud project where model is located if None then
|
76
|
+
default project_id is used.
|
77
|
+
:param location: The location of the project.
|
78
|
+
:param retry: A retry object used to retry requests. If `None` is specified, requests will not be
|
79
|
+
retried.
|
80
|
+
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
|
81
|
+
`retry` is specified, the timeout applies to each individual attempt.
|
82
|
+
:param metadata: Additional metadata that is provided to the method.
|
83
|
+
"""
|
84
|
+
client = self.get_prediction_service_client(location)
|
85
|
+
endpoint = f"projects/{project_id}/locations/{location}/endpoints/{endpoint_id}"
|
86
|
+
return client.predict(
|
87
|
+
request={"endpoint": endpoint, "instances": instances, "parameters": parameters},
|
88
|
+
retry=retry,
|
89
|
+
timeout=timeout,
|
90
|
+
metadata=metadata,
|
91
|
+
)
|
@@ -21,6 +21,9 @@ from __future__ import annotations
|
|
21
21
|
|
22
22
|
from typing import TYPE_CHECKING
|
23
23
|
|
24
|
+
from deprecated import deprecated
|
25
|
+
|
26
|
+
from airflow.exceptions import AirflowProviderDeprecationWarning
|
24
27
|
from airflow.providers.google.cloud.links.base import BaseGoogleLink
|
25
28
|
|
26
29
|
if TYPE_CHECKING:
|
@@ -44,6 +47,13 @@ AUTOML_MODEL_PREDICT_LINK = (
|
|
44
47
|
)
|
45
48
|
|
46
49
|
|
50
|
+
@deprecated(
|
51
|
+
reason=(
|
52
|
+
"Class `AutoMLDatasetLink` has been deprecated and will be removed after 31.12.2024. "
|
53
|
+
"Please use `TranslationLegacyDatasetLink` from `airflow/providers/google/cloud/links/translate.py` instead."
|
54
|
+
),
|
55
|
+
category=AirflowProviderDeprecationWarning,
|
56
|
+
)
|
47
57
|
class AutoMLDatasetLink(BaseGoogleLink):
|
48
58
|
"""Helper class for constructing AutoML Dataset link."""
|
49
59
|
|
@@ -65,6 +75,13 @@ class AutoMLDatasetLink(BaseGoogleLink):
|
|
65
75
|
)
|
66
76
|
|
67
77
|
|
78
|
+
@deprecated(
|
79
|
+
reason=(
|
80
|
+
"Class `AutoMLDatasetListLink` has been deprecated and will be removed after 31.12.2024. "
|
81
|
+
"Please use `TranslationDatasetListLink` from `airflow/providers/google/cloud/links/translate.py` instead."
|
82
|
+
),
|
83
|
+
category=AirflowProviderDeprecationWarning,
|
84
|
+
)
|
68
85
|
class AutoMLDatasetListLink(BaseGoogleLink):
|
69
86
|
"""Helper class for constructing AutoML Dataset List link."""
|
70
87
|
|
@@ -87,6 +104,13 @@ class AutoMLDatasetListLink(BaseGoogleLink):
|
|
87
104
|
)
|
88
105
|
|
89
106
|
|
107
|
+
@deprecated(
|
108
|
+
reason=(
|
109
|
+
"Class `AutoMLModelLink` has been deprecated and will be removed after 31.12.2024. "
|
110
|
+
"Please use `TranslationLegacyModelLink` from `airflow/providers/google/cloud/links/translate.py` instead."
|
111
|
+
),
|
112
|
+
category=AirflowProviderDeprecationWarning,
|
113
|
+
)
|
90
114
|
class AutoMLModelLink(BaseGoogleLink):
|
91
115
|
"""Helper class for constructing AutoML Model link."""
|
92
116
|
|
@@ -114,6 +138,13 @@ class AutoMLModelLink(BaseGoogleLink):
|
|
114
138
|
)
|
115
139
|
|
116
140
|
|
141
|
+
@deprecated(
|
142
|
+
reason=(
|
143
|
+
"Class `AutoMLModelTrainLink` has been deprecated and will be removed after 31.12.2024. "
|
144
|
+
"Please use `TranslationLegacyModelTrainLink` from `airflow/providers/google/cloud/links/translate.py` instead."
|
145
|
+
),
|
146
|
+
category=AirflowProviderDeprecationWarning,
|
147
|
+
)
|
117
148
|
class AutoMLModelTrainLink(BaseGoogleLink):
|
118
149
|
"""Helper class for constructing AutoML Model Train link."""
|
119
150
|
|
@@ -138,6 +169,13 @@ class AutoMLModelTrainLink(BaseGoogleLink):
|
|
138
169
|
)
|
139
170
|
|
140
171
|
|
172
|
+
@deprecated(
|
173
|
+
reason=(
|
174
|
+
"Class `AutoMLModelPredictLink` has been deprecated and will be removed after 31.12.2024. "
|
175
|
+
"Please use `TranslationLegacyModelPredictLink` from `airflow/providers/google/cloud/links/translate.py` instead."
|
176
|
+
),
|
177
|
+
category=AirflowProviderDeprecationWarning,
|
178
|
+
)
|
141
179
|
class AutoMLModelPredictLink(BaseGoogleLink):
|
142
180
|
"""Helper class for constructing AutoML Model Predict link."""
|
143
181
|
|
@@ -0,0 +1,180 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
"""This module contains Google Translate links."""
|
18
|
+
|
19
|
+
from __future__ import annotations
|
20
|
+
|
21
|
+
from typing import TYPE_CHECKING
|
22
|
+
|
23
|
+
from airflow.providers.google.cloud.links.base import BASE_LINK, BaseGoogleLink
|
24
|
+
|
25
|
+
if TYPE_CHECKING:
|
26
|
+
from airflow.utils.context import Context
|
27
|
+
|
28
|
+
|
29
|
+
TRANSLATION_BASE_LINK = BASE_LINK + "/translation"
|
30
|
+
TRANSLATION_LEGACY_DATASET_LINK = (
|
31
|
+
TRANSLATION_BASE_LINK + "/locations/{location}/datasets/{dataset_id}/sentences?project={project_id}"
|
32
|
+
)
|
33
|
+
TRANSLATION_DATASET_LIST_LINK = TRANSLATION_BASE_LINK + "/datasets?project={project_id}"
|
34
|
+
TRANSLATION_LEGACY_MODEL_LINK = (
|
35
|
+
TRANSLATION_BASE_LINK
|
36
|
+
+ "/locations/{location}/datasets/{dataset_id}/evaluate;modelId={model_id}?project={project_id}"
|
37
|
+
)
|
38
|
+
TRANSLATION_LEGACY_MODEL_TRAIN_LINK = (
|
39
|
+
TRANSLATION_BASE_LINK + "/locations/{location}/datasets/{dataset_id}/train?project={project_id}"
|
40
|
+
)
|
41
|
+
TRANSLATION_LEGACY_MODEL_PREDICT_LINK = (
|
42
|
+
TRANSLATION_BASE_LINK
|
43
|
+
+ "/locations/{location}/datasets/{dataset_id}/predict;modelId={model_id}?project={project_id}"
|
44
|
+
)
|
45
|
+
|
46
|
+
|
47
|
+
class TranslationLegacyDatasetLink(BaseGoogleLink):
|
48
|
+
"""
|
49
|
+
Helper class for constructing Legacy Translation Dataset link.
|
50
|
+
|
51
|
+
Legacy Datasets are created and managed by AutoML API.
|
52
|
+
"""
|
53
|
+
|
54
|
+
name = "Translation Legacy Dataset"
|
55
|
+
key = "translation_legacy_dataset"
|
56
|
+
format_str = TRANSLATION_LEGACY_DATASET_LINK
|
57
|
+
|
58
|
+
@staticmethod
|
59
|
+
def persist(
|
60
|
+
context: Context,
|
61
|
+
task_instance,
|
62
|
+
dataset_id: str,
|
63
|
+
project_id: str,
|
64
|
+
):
|
65
|
+
task_instance.xcom_push(
|
66
|
+
context,
|
67
|
+
key=TranslationLegacyDatasetLink.key,
|
68
|
+
value={"location": task_instance.location, "dataset_id": dataset_id, "project_id": project_id},
|
69
|
+
)
|
70
|
+
|
71
|
+
|
72
|
+
class TranslationDatasetListLink(BaseGoogleLink):
|
73
|
+
"""Helper class for constructing Translation Dataset List link."""
|
74
|
+
|
75
|
+
name = "Translation Dataset List"
|
76
|
+
key = "translation_dataset_list"
|
77
|
+
format_str = TRANSLATION_DATASET_LIST_LINK
|
78
|
+
|
79
|
+
@staticmethod
|
80
|
+
def persist(
|
81
|
+
context: Context,
|
82
|
+
task_instance,
|
83
|
+
project_id: str,
|
84
|
+
):
|
85
|
+
task_instance.xcom_push(
|
86
|
+
context,
|
87
|
+
key=TranslationDatasetListLink.key,
|
88
|
+
value={
|
89
|
+
"project_id": project_id,
|
90
|
+
},
|
91
|
+
)
|
92
|
+
|
93
|
+
|
94
|
+
class TranslationLegacyModelLink(BaseGoogleLink):
|
95
|
+
"""
|
96
|
+
Helper class for constructing Translation Legacy Model link.
|
97
|
+
|
98
|
+
Legacy Models are created and managed by AutoML API.
|
99
|
+
"""
|
100
|
+
|
101
|
+
name = "Translation Legacy Model"
|
102
|
+
key = "translation_legacy_model"
|
103
|
+
format_str = TRANSLATION_LEGACY_MODEL_LINK
|
104
|
+
|
105
|
+
@staticmethod
|
106
|
+
def persist(
|
107
|
+
context: Context,
|
108
|
+
task_instance,
|
109
|
+
dataset_id: str,
|
110
|
+
model_id: str,
|
111
|
+
project_id: str,
|
112
|
+
):
|
113
|
+
task_instance.xcom_push(
|
114
|
+
context,
|
115
|
+
key=TranslationLegacyModelLink.key,
|
116
|
+
value={
|
117
|
+
"location": task_instance.location,
|
118
|
+
"dataset_id": dataset_id,
|
119
|
+
"model_id": model_id,
|
120
|
+
"project_id": project_id,
|
121
|
+
},
|
122
|
+
)
|
123
|
+
|
124
|
+
|
125
|
+
class TranslationLegacyModelTrainLink(BaseGoogleLink):
|
126
|
+
"""
|
127
|
+
Helper class for constructing Translation Legacy Model Train link.
|
128
|
+
|
129
|
+
Legacy Models are created and managed by AutoML API.
|
130
|
+
"""
|
131
|
+
|
132
|
+
name = "Translation Legacy Model Train"
|
133
|
+
key = "translation_legacy_model_train"
|
134
|
+
format_str = TRANSLATION_LEGACY_MODEL_TRAIN_LINK
|
135
|
+
|
136
|
+
@staticmethod
|
137
|
+
def persist(
|
138
|
+
context: Context,
|
139
|
+
task_instance,
|
140
|
+
project_id: str,
|
141
|
+
):
|
142
|
+
task_instance.xcom_push(
|
143
|
+
context,
|
144
|
+
key=TranslationLegacyModelTrainLink.key,
|
145
|
+
value={
|
146
|
+
"location": task_instance.location,
|
147
|
+
"dataset_id": task_instance.model["dataset_id"],
|
148
|
+
"project_id": project_id,
|
149
|
+
},
|
150
|
+
)
|
151
|
+
|
152
|
+
|
153
|
+
class TranslationLegacyModelPredictLink(BaseGoogleLink):
|
154
|
+
"""
|
155
|
+
Helper class for constructing Translation Legacy Model Predict link.
|
156
|
+
|
157
|
+
Legacy Models are created and managed by AutoML API.
|
158
|
+
"""
|
159
|
+
|
160
|
+
name = "Translation Legacy Model Predict"
|
161
|
+
key = "translation_legacy_model_predict"
|
162
|
+
format_str = TRANSLATION_LEGACY_MODEL_PREDICT_LINK
|
163
|
+
|
164
|
+
@staticmethod
|
165
|
+
def persist(
|
166
|
+
context: Context,
|
167
|
+
task_instance,
|
168
|
+
model_id: str,
|
169
|
+
project_id: str,
|
170
|
+
):
|
171
|
+
task_instance.xcom_push(
|
172
|
+
context,
|
173
|
+
key=TranslationLegacyModelPredictLink.key,
|
174
|
+
value={
|
175
|
+
"location": task_instance.location,
|
176
|
+
"dataset_id": task_instance.model.dataset_id,
|
177
|
+
"model_id": model_id,
|
178
|
+
"project_id": project_id,
|
179
|
+
},
|
180
|
+
)
|
@@ -180,8 +180,7 @@ class StackdriverTaskHandler(logging.Handler):
|
|
180
180
|
"""
|
181
181
|
message = self.format(record)
|
182
182
|
ti = None
|
183
|
-
|
184
|
-
if ctx_indiv_trigger is not None and getattr(record, ctx_indiv_trigger.name, None):
|
183
|
+
if getattr(record, ctx_indiv_trigger.name, None):
|
185
184
|
ti = getattr(record, "task_instance", None) # trigger context
|
186
185
|
labels = self._get_labels(ti)
|
187
186
|
self._transport.send(record, message, resource=self.resource, labels=labels)
|
@@ -0,0 +1,30 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$defs": {
|
4
|
+
"BigQueryErrorRunFacet": {
|
5
|
+
"allOf": [
|
6
|
+
{
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
8
|
+
},
|
9
|
+
{
|
10
|
+
"type": "object",
|
11
|
+
"properties": {
|
12
|
+
"clientError": {
|
13
|
+
"type": "string"
|
14
|
+
},
|
15
|
+
"parserError": {
|
16
|
+
"type": "string"
|
17
|
+
}
|
18
|
+
}
|
19
|
+
}
|
20
|
+
],
|
21
|
+
"type": "object"
|
22
|
+
}
|
23
|
+
},
|
24
|
+
"type": "object",
|
25
|
+
"properties": {
|
26
|
+
"bigQuery_error": {
|
27
|
+
"$ref": "#/$defs/BigQueryErrorRunFacet"
|
28
|
+
}
|
29
|
+
}
|
30
|
+
}
|
@@ -0,0 +1,37 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
3
|
+
"$defs": {
|
4
|
+
"BigQueryJobRunFacet": {
|
5
|
+
"allOf": [
|
6
|
+
{
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
8
|
+
},
|
9
|
+
{
|
10
|
+
"type": "object",
|
11
|
+
"properties": {
|
12
|
+
"cached": {
|
13
|
+
"type": "boolean"
|
14
|
+
},
|
15
|
+
"billedBytes": {
|
16
|
+
"type": "int",
|
17
|
+
"example": 321
|
18
|
+
},
|
19
|
+
"properties": {
|
20
|
+
"type": "string"
|
21
|
+
}
|
22
|
+
},
|
23
|
+
"required": [
|
24
|
+
"cached"
|
25
|
+
]
|
26
|
+
}
|
27
|
+
],
|
28
|
+
"type": "object"
|
29
|
+
}
|
30
|
+
},
|
31
|
+
"type": "object",
|
32
|
+
"properties": {
|
33
|
+
"bigQueryJob": {
|
34
|
+
"$ref": "#/$defs/BigQueryJobRunFacet"
|
35
|
+
}
|
36
|
+
}
|
37
|
+
}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|