apache-airflow-providers-google 11.0.0rc1__py3-none-any.whl → 12.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/assets/gcs.py +1 -7
- airflow/providers/google/cloud/hooks/alloy_db.py +289 -0
- airflow/providers/google/cloud/hooks/cloud_batch.py +13 -5
- airflow/providers/google/cloud/hooks/dataproc.py +7 -3
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +41 -22
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +7 -38
- airflow/providers/google/cloud/hooks/translate.py +355 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +147 -0
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +10 -0
- airflow/providers/google/cloud/links/alloy_db.py +55 -0
- airflow/providers/google/cloud/links/translate.py +98 -0
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +1 -5
- airflow/providers/google/cloud/openlineage/mixins.py +4 -12
- airflow/providers/google/cloud/openlineage/utils.py +200 -22
- airflow/providers/google/cloud/operators/alloy_db.py +459 -0
- airflow/providers/google/cloud/operators/automl.py +55 -44
- airflow/providers/google/cloud/operators/bigquery.py +60 -15
- airflow/providers/google/cloud/operators/dataproc.py +12 -0
- airflow/providers/google/cloud/operators/gcs.py +5 -14
- airflow/providers/google/cloud/operators/kubernetes_engine.py +377 -705
- airflow/providers/google/cloud/operators/mlengine.py +41 -31
- airflow/providers/google/cloud/operators/translate.py +586 -1
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +163 -0
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +5 -0
- airflow/providers/google/cloud/sensors/dataproc.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/__init__.py +16 -0
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +112 -0
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +6 -11
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +3 -0
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +3 -0
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -10
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +3 -15
- airflow/providers/google/cloud/transfers/gcs_to_local.py +9 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +41 -6
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +15 -0
- airflow/providers/google/get_provider_info.py +30 -18
- airflow/providers/google/version_compat.py +36 -0
- {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0rc1.dist-info}/METADATA +16 -18
- {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0rc1.dist-info}/RECORD +42 -37
- airflow/providers/google/cloud/hooks/datapipeline.py +0 -71
- airflow/providers/google/cloud/openlineage/BigQueryErrorRunFacet.json +0 -30
- airflow/providers/google/cloud/operators/datapipeline.py +0 -63
- {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0rc1.dist-info}/entry_points.txt +0 -0
@@ -1365,7 +1365,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1365
1365
|
|
1366
1366
|
try:
|
1367
1367
|
self.log.info("Creating table")
|
1368
|
-
|
1368
|
+
self._table = bq_hook.create_empty_table(
|
1369
1369
|
project_id=self.project_id,
|
1370
1370
|
dataset_id=self.dataset_id,
|
1371
1371
|
table_id=self.table_id,
|
@@ -1382,12 +1382,15 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1382
1382
|
persist_kwargs = {
|
1383
1383
|
"context": context,
|
1384
1384
|
"task_instance": self,
|
1385
|
-
"project_id":
|
1386
|
-
"dataset_id":
|
1387
|
-
"table_id":
|
1385
|
+
"project_id": self._table.to_api_repr()["tableReference"]["projectId"],
|
1386
|
+
"dataset_id": self._table.to_api_repr()["tableReference"]["datasetId"],
|
1387
|
+
"table_id": self._table.to_api_repr()["tableReference"]["tableId"],
|
1388
1388
|
}
|
1389
1389
|
self.log.info(
|
1390
|
-
"Table %s.%s.%s created successfully",
|
1390
|
+
"Table %s.%s.%s created successfully",
|
1391
|
+
self._table.project,
|
1392
|
+
self._table.dataset_id,
|
1393
|
+
self._table.table_id,
|
1391
1394
|
)
|
1392
1395
|
except Conflict:
|
1393
1396
|
error_msg = f"Table {self.dataset_id}.{self.table_id} already exists."
|
@@ -1407,6 +1410,24 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1407
1410
|
|
1408
1411
|
BigQueryTableLink.persist(**persist_kwargs)
|
1409
1412
|
|
1413
|
+
def get_openlineage_facets_on_complete(self, task_instance):
|
1414
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
1415
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
1416
|
+
BIGQUERY_NAMESPACE,
|
1417
|
+
get_facets_from_bq_table,
|
1418
|
+
)
|
1419
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
1420
|
+
|
1421
|
+
table_info = self._table.to_api_repr()["tableReference"]
|
1422
|
+
table_id = ".".join((table_info["projectId"], table_info["datasetId"], table_info["tableId"]))
|
1423
|
+
output_dataset = Dataset(
|
1424
|
+
namespace=BIGQUERY_NAMESPACE,
|
1425
|
+
name=table_id,
|
1426
|
+
facets=get_facets_from_bq_table(self._table),
|
1427
|
+
)
|
1428
|
+
|
1429
|
+
return OperatorLineage(outputs=[output_dataset])
|
1430
|
+
|
1410
1431
|
|
1411
1432
|
class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
1412
1433
|
"""
|
@@ -1632,15 +1653,15 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1632
1653
|
impersonation_chain=self.impersonation_chain,
|
1633
1654
|
)
|
1634
1655
|
if self.table_resource:
|
1635
|
-
|
1656
|
+
self._table = bq_hook.create_empty_table(
|
1636
1657
|
table_resource=self.table_resource,
|
1637
1658
|
)
|
1638
1659
|
BigQueryTableLink.persist(
|
1639
1660
|
context=context,
|
1640
1661
|
task_instance=self,
|
1641
|
-
dataset_id=
|
1642
|
-
project_id=
|
1643
|
-
table_id=
|
1662
|
+
dataset_id=self._table.to_api_repr()["tableReference"]["datasetId"],
|
1663
|
+
project_id=self._table.to_api_repr()["tableReference"]["projectId"],
|
1664
|
+
table_id=self._table.to_api_repr()["tableReference"]["tableId"],
|
1644
1665
|
)
|
1645
1666
|
return
|
1646
1667
|
|
@@ -1691,18 +1712,36 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1691
1712
|
"encryptionConfiguration": self.encryption_configuration,
|
1692
1713
|
}
|
1693
1714
|
|
1694
|
-
|
1715
|
+
self._table = bq_hook.create_empty_table(
|
1695
1716
|
table_resource=table_resource,
|
1696
1717
|
)
|
1697
1718
|
|
1698
1719
|
BigQueryTableLink.persist(
|
1699
1720
|
context=context,
|
1700
1721
|
task_instance=self,
|
1701
|
-
dataset_id=
|
1702
|
-
project_id=
|
1703
|
-
table_id=
|
1722
|
+
dataset_id=self._table.to_api_repr()["tableReference"]["datasetId"],
|
1723
|
+
project_id=self._table.to_api_repr()["tableReference"]["projectId"],
|
1724
|
+
table_id=self._table.to_api_repr()["tableReference"]["tableId"],
|
1725
|
+
)
|
1726
|
+
|
1727
|
+
def get_openlineage_facets_on_complete(self, task_instance):
|
1728
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
1729
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
1730
|
+
BIGQUERY_NAMESPACE,
|
1731
|
+
get_facets_from_bq_table,
|
1732
|
+
)
|
1733
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
1734
|
+
|
1735
|
+
table_info = self._table.to_api_repr()["tableReference"]
|
1736
|
+
table_id = ".".join((table_info["projectId"], table_info["datasetId"], table_info["tableId"]))
|
1737
|
+
output_dataset = Dataset(
|
1738
|
+
namespace=BIGQUERY_NAMESPACE,
|
1739
|
+
name=table_id,
|
1740
|
+
facets=get_facets_from_bq_table(self._table),
|
1704
1741
|
)
|
1705
1742
|
|
1743
|
+
return OperatorLineage(outputs=[output_dataset])
|
1744
|
+
|
1706
1745
|
|
1707
1746
|
class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
|
1708
1747
|
"""
|
@@ -2593,10 +2632,16 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
|
|
2593
2632
|
nowait=True,
|
2594
2633
|
)
|
2595
2634
|
|
2596
|
-
|
2597
|
-
|
2635
|
+
def _handle_job_error(self, job: BigQueryJob | UnknownJob) -> None:
|
2636
|
+
self.log.info("Job %s is completed. Checking the job status", self.job_id)
|
2637
|
+
# Log any transient errors encountered during the job execution
|
2638
|
+
for error in job.errors or []:
|
2639
|
+
self.log.error("BigQuery Job Error: %s", error)
|
2598
2640
|
if job.error_result:
|
2599
2641
|
raise AirflowException(f"BigQuery job {job.job_id} failed: {job.error_result}")
|
2642
|
+
# Check the final state.
|
2643
|
+
if job.state != "DONE":
|
2644
|
+
raise AirflowException(f"Job failed with state: {job.state}")
|
2600
2645
|
|
2601
2646
|
def execute(self, context: Any):
|
2602
2647
|
hook = BigQueryHook(
|
@@ -54,6 +54,9 @@ from airflow.providers.google.cloud.links.dataproc import (
|
|
54
54
|
DataprocWorkflowLink,
|
55
55
|
DataprocWorkflowTemplateLink,
|
56
56
|
)
|
57
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
58
|
+
inject_openlineage_properties_into_dataproc_job,
|
59
|
+
)
|
57
60
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
58
61
|
from airflow.providers.google.cloud.triggers.dataproc import (
|
59
62
|
DataprocBatchTrigger,
|
@@ -1962,6 +1965,9 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
1962
1965
|
polling_interval_seconds: int = 10,
|
1963
1966
|
cancel_on_kill: bool = True,
|
1964
1967
|
wait_timeout: int | None = None,
|
1968
|
+
openlineage_inject_parent_job_info: bool = conf.getboolean(
|
1969
|
+
"openlineage", "spark_inject_parent_job_info", fallback=False
|
1970
|
+
),
|
1965
1971
|
**kwargs,
|
1966
1972
|
) -> None:
|
1967
1973
|
super().__init__(**kwargs)
|
@@ -1983,10 +1989,16 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
1983
1989
|
self.hook: DataprocHook | None = None
|
1984
1990
|
self.job_id: str | None = None
|
1985
1991
|
self.wait_timeout = wait_timeout
|
1992
|
+
self.openlineage_inject_parent_job_info = openlineage_inject_parent_job_info
|
1986
1993
|
|
1987
1994
|
def execute(self, context: Context):
|
1988
1995
|
self.log.info("Submitting job")
|
1989
1996
|
self.hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
1997
|
+
if self.openlineage_inject_parent_job_info:
|
1998
|
+
self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
|
1999
|
+
self.job = inject_openlineage_properties_into_dataproc_job(
|
2000
|
+
job=self.job, context=context, inject_parent_job_info=self.openlineage_inject_parent_job_info
|
2001
|
+
)
|
1990
2002
|
job_object = self.hook.submit_job(
|
1991
2003
|
project_id=self.project_id,
|
1992
2004
|
region=self.region,
|
@@ -343,6 +343,7 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
|
|
343
343
|
LifecycleStateChangeDatasetFacet,
|
344
344
|
PreviousIdentifier,
|
345
345
|
)
|
346
|
+
from airflow.providers.google.cloud.openlineage.utils import extract_ds_name_from_gcs_path
|
346
347
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
347
348
|
|
348
349
|
objects = []
|
@@ -350,12 +351,7 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
|
|
350
351
|
objects = self.objects
|
351
352
|
elif self.prefix is not None:
|
352
353
|
prefixes = [self.prefix] if isinstance(self.prefix, str) else self.prefix
|
353
|
-
for pref in prefixes
|
354
|
-
# Use parent if not a file (dot not in name) and not a dir (ends with slash)
|
355
|
-
if "." not in pref.split("/")[-1] and not pref.endswith("/"):
|
356
|
-
pref = Path(pref).parent.as_posix()
|
357
|
-
pref = "/" if pref in (".", "", "/") else pref.rstrip("/")
|
358
|
-
objects.append(pref)
|
354
|
+
objects = [extract_ds_name_from_gcs_path(pref) for pref in prefixes]
|
359
355
|
|
360
356
|
bucket_url = f"gs://{self.bucket_name}"
|
361
357
|
input_datasets = [
|
@@ -921,20 +917,15 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
|
|
921
917
|
def get_openlineage_facets_on_complete(self, task_instance):
|
922
918
|
"""Implement on_complete as execute() resolves object prefixes."""
|
923
919
|
from airflow.providers.common.compat.openlineage.facet import Dataset
|
920
|
+
from airflow.providers.google.cloud.openlineage.utils import extract_ds_name_from_gcs_path
|
924
921
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
925
922
|
|
926
|
-
def _parse_prefix(pref):
|
927
|
-
# Use parent if not a file (dot not in name) and not a dir (ends with slash)
|
928
|
-
if "." not in pref.split("/")[-1] and not pref.endswith("/"):
|
929
|
-
pref = Path(pref).parent.as_posix()
|
930
|
-
return "/" if pref in (".", "/", "") else pref.rstrip("/")
|
931
|
-
|
932
923
|
input_prefix, output_prefix = "/", "/"
|
933
924
|
if self._source_prefix_interp is not None:
|
934
|
-
input_prefix =
|
925
|
+
input_prefix = extract_ds_name_from_gcs_path(self._source_prefix_interp)
|
935
926
|
|
936
927
|
if self._destination_prefix_interp is not None:
|
937
|
-
output_prefix =
|
928
|
+
output_prefix = extract_ds_name_from_gcs_path(self._destination_prefix_interp)
|
938
929
|
|
939
930
|
return OperatorLineage(
|
940
931
|
inputs=[
|