apache-airflow-providers-google 11.0.0rc1__py3-none-any.whl → 12.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. airflow/providers/google/__init__.py +3 -3
  2. airflow/providers/google/assets/gcs.py +1 -7
  3. airflow/providers/google/cloud/hooks/alloy_db.py +289 -0
  4. airflow/providers/google/cloud/hooks/cloud_batch.py +13 -5
  5. airflow/providers/google/cloud/hooks/dataproc.py +7 -3
  6. airflow/providers/google/cloud/hooks/dataproc_metastore.py +41 -22
  7. airflow/providers/google/cloud/hooks/kubernetes_engine.py +7 -38
  8. airflow/providers/google/cloud/hooks/translate.py +355 -0
  9. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +147 -0
  10. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +10 -0
  11. airflow/providers/google/cloud/links/alloy_db.py +55 -0
  12. airflow/providers/google/cloud/links/translate.py +98 -0
  13. airflow/providers/google/cloud/log/stackdriver_task_handler.py +1 -5
  14. airflow/providers/google/cloud/openlineage/mixins.py +4 -12
  15. airflow/providers/google/cloud/openlineage/utils.py +200 -22
  16. airflow/providers/google/cloud/operators/alloy_db.py +459 -0
  17. airflow/providers/google/cloud/operators/automl.py +55 -44
  18. airflow/providers/google/cloud/operators/bigquery.py +60 -15
  19. airflow/providers/google/cloud/operators/dataproc.py +12 -0
  20. airflow/providers/google/cloud/operators/gcs.py +5 -14
  21. airflow/providers/google/cloud/operators/kubernetes_engine.py +377 -705
  22. airflow/providers/google/cloud/operators/mlengine.py +41 -31
  23. airflow/providers/google/cloud/operators/translate.py +586 -1
  24. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +163 -0
  25. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +5 -0
  26. airflow/providers/google/cloud/sensors/dataproc.py +2 -2
  27. airflow/providers/google/cloud/sensors/vertex_ai/__init__.py +16 -0
  28. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +112 -0
  29. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +6 -11
  30. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +3 -0
  31. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +3 -0
  32. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -10
  33. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +3 -15
  34. airflow/providers/google/cloud/transfers/gcs_to_local.py +9 -0
  35. airflow/providers/google/cloud/transfers/local_to_gcs.py +41 -6
  36. airflow/providers/google/cloud/transfers/s3_to_gcs.py +15 -0
  37. airflow/providers/google/get_provider_info.py +30 -18
  38. airflow/providers/google/version_compat.py +36 -0
  39. {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0.dist-info}/METADATA +20 -22
  40. {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0.dist-info}/RECORD +42 -37
  41. airflow/providers/google/cloud/hooks/datapipeline.py +0 -71
  42. airflow/providers/google/cloud/openlineage/BigQueryErrorRunFacet.json +0 -30
  43. airflow/providers/google/cloud/operators/datapipeline.py +0 -63
  44. {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0.dist-info}/WHEEL +0 -0
  45. {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0.dist-info}/entry_points.txt +0 -0
@@ -1365,7 +1365,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1365
1365
 
1366
1366
  try:
1367
1367
  self.log.info("Creating table")
1368
- table = bq_hook.create_empty_table(
1368
+ self._table = bq_hook.create_empty_table(
1369
1369
  project_id=self.project_id,
1370
1370
  dataset_id=self.dataset_id,
1371
1371
  table_id=self.table_id,
@@ -1382,12 +1382,15 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1382
1382
  persist_kwargs = {
1383
1383
  "context": context,
1384
1384
  "task_instance": self,
1385
- "project_id": table.to_api_repr()["tableReference"]["projectId"],
1386
- "dataset_id": table.to_api_repr()["tableReference"]["datasetId"],
1387
- "table_id": table.to_api_repr()["tableReference"]["tableId"],
1385
+ "project_id": self._table.to_api_repr()["tableReference"]["projectId"],
1386
+ "dataset_id": self._table.to_api_repr()["tableReference"]["datasetId"],
1387
+ "table_id": self._table.to_api_repr()["tableReference"]["tableId"],
1388
1388
  }
1389
1389
  self.log.info(
1390
- "Table %s.%s.%s created successfully", table.project, table.dataset_id, table.table_id
1390
+ "Table %s.%s.%s created successfully",
1391
+ self._table.project,
1392
+ self._table.dataset_id,
1393
+ self._table.table_id,
1391
1394
  )
1392
1395
  except Conflict:
1393
1396
  error_msg = f"Table {self.dataset_id}.{self.table_id} already exists."
@@ -1407,6 +1410,24 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1407
1410
 
1408
1411
  BigQueryTableLink.persist(**persist_kwargs)
1409
1412
 
1413
+ def get_openlineage_facets_on_complete(self, task_instance):
1414
+ from airflow.providers.common.compat.openlineage.facet import Dataset
1415
+ from airflow.providers.google.cloud.openlineage.utils import (
1416
+ BIGQUERY_NAMESPACE,
1417
+ get_facets_from_bq_table,
1418
+ )
1419
+ from airflow.providers.openlineage.extractors import OperatorLineage
1420
+
1421
+ table_info = self._table.to_api_repr()["tableReference"]
1422
+ table_id = ".".join((table_info["projectId"], table_info["datasetId"], table_info["tableId"]))
1423
+ output_dataset = Dataset(
1424
+ namespace=BIGQUERY_NAMESPACE,
1425
+ name=table_id,
1426
+ facets=get_facets_from_bq_table(self._table),
1427
+ )
1428
+
1429
+ return OperatorLineage(outputs=[output_dataset])
1430
+
1410
1431
 
1411
1432
  class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1412
1433
  """
@@ -1632,15 +1653,15 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1632
1653
  impersonation_chain=self.impersonation_chain,
1633
1654
  )
1634
1655
  if self.table_resource:
1635
- table = bq_hook.create_empty_table(
1656
+ self._table = bq_hook.create_empty_table(
1636
1657
  table_resource=self.table_resource,
1637
1658
  )
1638
1659
  BigQueryTableLink.persist(
1639
1660
  context=context,
1640
1661
  task_instance=self,
1641
- dataset_id=table.to_api_repr()["tableReference"]["datasetId"],
1642
- project_id=table.to_api_repr()["tableReference"]["projectId"],
1643
- table_id=table.to_api_repr()["tableReference"]["tableId"],
1662
+ dataset_id=self._table.to_api_repr()["tableReference"]["datasetId"],
1663
+ project_id=self._table.to_api_repr()["tableReference"]["projectId"],
1664
+ table_id=self._table.to_api_repr()["tableReference"]["tableId"],
1644
1665
  )
1645
1666
  return
1646
1667
 
@@ -1691,18 +1712,36 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1691
1712
  "encryptionConfiguration": self.encryption_configuration,
1692
1713
  }
1693
1714
 
1694
- table = bq_hook.create_empty_table(
1715
+ self._table = bq_hook.create_empty_table(
1695
1716
  table_resource=table_resource,
1696
1717
  )
1697
1718
 
1698
1719
  BigQueryTableLink.persist(
1699
1720
  context=context,
1700
1721
  task_instance=self,
1701
- dataset_id=table.to_api_repr()["tableReference"]["datasetId"],
1702
- project_id=table.to_api_repr()["tableReference"]["projectId"],
1703
- table_id=table.to_api_repr()["tableReference"]["tableId"],
1722
+ dataset_id=self._table.to_api_repr()["tableReference"]["datasetId"],
1723
+ project_id=self._table.to_api_repr()["tableReference"]["projectId"],
1724
+ table_id=self._table.to_api_repr()["tableReference"]["tableId"],
1725
+ )
1726
+
1727
+ def get_openlineage_facets_on_complete(self, task_instance):
1728
+ from airflow.providers.common.compat.openlineage.facet import Dataset
1729
+ from airflow.providers.google.cloud.openlineage.utils import (
1730
+ BIGQUERY_NAMESPACE,
1731
+ get_facets_from_bq_table,
1732
+ )
1733
+ from airflow.providers.openlineage.extractors import OperatorLineage
1734
+
1735
+ table_info = self._table.to_api_repr()["tableReference"]
1736
+ table_id = ".".join((table_info["projectId"], table_info["datasetId"], table_info["tableId"]))
1737
+ output_dataset = Dataset(
1738
+ namespace=BIGQUERY_NAMESPACE,
1739
+ name=table_id,
1740
+ facets=get_facets_from_bq_table(self._table),
1704
1741
  )
1705
1742
 
1743
+ return OperatorLineage(outputs=[output_dataset])
1744
+
1706
1745
 
1707
1746
  class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
1708
1747
  """
@@ -2593,10 +2632,16 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
2593
2632
  nowait=True,
2594
2633
  )
2595
2634
 
2596
- @staticmethod
2597
- def _handle_job_error(job: BigQueryJob | UnknownJob) -> None:
2635
+ def _handle_job_error(self, job: BigQueryJob | UnknownJob) -> None:
2636
+ self.log.info("Job %s is completed. Checking the job status", self.job_id)
2637
+ # Log any transient errors encountered during the job execution
2638
+ for error in job.errors or []:
2639
+ self.log.error("BigQuery Job Error: %s", error)
2598
2640
  if job.error_result:
2599
2641
  raise AirflowException(f"BigQuery job {job.job_id} failed: {job.error_result}")
2642
+ # Check the final state.
2643
+ if job.state != "DONE":
2644
+ raise AirflowException(f"Job failed with state: {job.state}")
2600
2645
 
2601
2646
  def execute(self, context: Any):
2602
2647
  hook = BigQueryHook(
@@ -54,6 +54,9 @@ from airflow.providers.google.cloud.links.dataproc import (
54
54
  DataprocWorkflowLink,
55
55
  DataprocWorkflowTemplateLink,
56
56
  )
57
+ from airflow.providers.google.cloud.openlineage.utils import (
58
+ inject_openlineage_properties_into_dataproc_job,
59
+ )
57
60
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
58
61
  from airflow.providers.google.cloud.triggers.dataproc import (
59
62
  DataprocBatchTrigger,
@@ -1962,6 +1965,9 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
1962
1965
  polling_interval_seconds: int = 10,
1963
1966
  cancel_on_kill: bool = True,
1964
1967
  wait_timeout: int | None = None,
1968
+ openlineage_inject_parent_job_info: bool = conf.getboolean(
1969
+ "openlineage", "spark_inject_parent_job_info", fallback=False
1970
+ ),
1965
1971
  **kwargs,
1966
1972
  ) -> None:
1967
1973
  super().__init__(**kwargs)
@@ -1983,10 +1989,16 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
1983
1989
  self.hook: DataprocHook | None = None
1984
1990
  self.job_id: str | None = None
1985
1991
  self.wait_timeout = wait_timeout
1992
+ self.openlineage_inject_parent_job_info = openlineage_inject_parent_job_info
1986
1993
 
1987
1994
  def execute(self, context: Context):
1988
1995
  self.log.info("Submitting job")
1989
1996
  self.hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
1997
+ if self.openlineage_inject_parent_job_info:
1998
+ self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
1999
+ self.job = inject_openlineage_properties_into_dataproc_job(
2000
+ job=self.job, context=context, inject_parent_job_info=self.openlineage_inject_parent_job_info
2001
+ )
1990
2002
  job_object = self.hook.submit_job(
1991
2003
  project_id=self.project_id,
1992
2004
  region=self.region,
@@ -343,6 +343,7 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
343
343
  LifecycleStateChangeDatasetFacet,
344
344
  PreviousIdentifier,
345
345
  )
346
+ from airflow.providers.google.cloud.openlineage.utils import extract_ds_name_from_gcs_path
346
347
  from airflow.providers.openlineage.extractors import OperatorLineage
347
348
 
348
349
  objects = []
@@ -350,12 +351,7 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
350
351
  objects = self.objects
351
352
  elif self.prefix is not None:
352
353
  prefixes = [self.prefix] if isinstance(self.prefix, str) else self.prefix
353
- for pref in prefixes:
354
- # Use parent if not a file (dot not in name) and not a dir (ends with slash)
355
- if "." not in pref.split("/")[-1] and not pref.endswith("/"):
356
- pref = Path(pref).parent.as_posix()
357
- pref = "/" if pref in (".", "", "/") else pref.rstrip("/")
358
- objects.append(pref)
354
+ objects = [extract_ds_name_from_gcs_path(pref) for pref in prefixes]
359
355
 
360
356
  bucket_url = f"gs://{self.bucket_name}"
361
357
  input_datasets = [
@@ -921,20 +917,15 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
921
917
  def get_openlineage_facets_on_complete(self, task_instance):
922
918
  """Implement on_complete as execute() resolves object prefixes."""
923
919
  from airflow.providers.common.compat.openlineage.facet import Dataset
920
+ from airflow.providers.google.cloud.openlineage.utils import extract_ds_name_from_gcs_path
924
921
  from airflow.providers.openlineage.extractors import OperatorLineage
925
922
 
926
- def _parse_prefix(pref):
927
- # Use parent if not a file (dot not in name) and not a dir (ends with slash)
928
- if "." not in pref.split("/")[-1] and not pref.endswith("/"):
929
- pref = Path(pref).parent.as_posix()
930
- return "/" if pref in (".", "/", "") else pref.rstrip("/")
931
-
932
923
  input_prefix, output_prefix = "/", "/"
933
924
  if self._source_prefix_interp is not None:
934
- input_prefix = _parse_prefix(self._source_prefix_interp)
925
+ input_prefix = extract_ds_name_from_gcs_path(self._source_prefix_interp)
935
926
 
936
927
  if self._destination_prefix_interp is not None:
937
- output_prefix = _parse_prefix(self._destination_prefix_interp)
928
+ output_prefix = extract_ds_name_from_gcs_path(self._destination_prefix_interp)
938
929
 
939
930
  return OperatorLineage(
940
931
  inputs=[