apache-airflow-providers-google 14.0.0__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/LICENSES.txt +14 -0
- airflow/providers/google/3rd-party-licenses/NOTICE +5 -0
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/_vendor/__init__.py +0 -0
- airflow/providers/google/_vendor/json_merge_patch.py +91 -0
- airflow/providers/google/ads/hooks/ads.py +52 -43
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +3 -19
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +3 -2
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/alloy_db.py +2 -3
- airflow/providers/google/cloud/hooks/bigquery.py +195 -318
- airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
- airflow/providers/google/cloud/hooks/bigtable.py +3 -2
- airflow/providers/google/cloud/hooks/cloud_batch.py +8 -9
- airflow/providers/google/cloud/hooks/cloud_build.py +6 -65
- airflow/providers/google/cloud/hooks/cloud_composer.py +292 -24
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +4 -3
- airflow/providers/google/cloud/hooks/cloud_run.py +20 -11
- airflow/providers/google/cloud/hooks/cloud_sql.py +136 -64
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +35 -15
- airflow/providers/google/cloud/hooks/compute.py +7 -6
- airflow/providers/google/cloud/hooks/compute_ssh.py +7 -4
- airflow/providers/google/cloud/hooks/datacatalog.py +12 -3
- airflow/providers/google/cloud/hooks/dataflow.py +87 -242
- airflow/providers/google/cloud/hooks/dataform.py +9 -14
- airflow/providers/google/cloud/hooks/datafusion.py +7 -9
- airflow/providers/google/cloud/hooks/dataplex.py +13 -12
- airflow/providers/google/cloud/hooks/dataprep.py +2 -2
- airflow/providers/google/cloud/hooks/dataproc.py +76 -74
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +4 -3
- airflow/providers/google/cloud/hooks/dlp.py +5 -4
- airflow/providers/google/cloud/hooks/gcs.py +144 -33
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kms.py +3 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -17
- airflow/providers/google/cloud/hooks/looker.py +6 -1
- airflow/providers/google/cloud/hooks/managed_kafka.py +227 -3
- airflow/providers/google/cloud/hooks/mlengine.py +7 -8
- airflow/providers/google/cloud/hooks/natural_language.py +3 -2
- airflow/providers/google/cloud/hooks/os_login.py +3 -2
- airflow/providers/google/cloud/hooks/pubsub.py +6 -6
- airflow/providers/google/cloud/hooks/secret_manager.py +105 -12
- airflow/providers/google/cloud/hooks/spanner.py +75 -10
- airflow/providers/google/cloud/hooks/speech_to_text.py +3 -2
- airflow/providers/google/cloud/hooks/stackdriver.py +18 -18
- airflow/providers/google/cloud/hooks/tasks.py +4 -3
- airflow/providers/google/cloud/hooks/text_to_speech.py +3 -2
- airflow/providers/google/cloud/hooks/translate.py +8 -17
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -222
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +9 -15
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +33 -283
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +5 -12
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +6 -12
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +311 -10
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +7 -13
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +8 -12
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +6 -12
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +3 -2
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/video_intelligence.py +3 -2
- airflow/providers/google/cloud/hooks/vision.py +7 -7
- airflow/providers/google/cloud/hooks/workflows.py +4 -3
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -7
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -46
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -90
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -89
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +11 -61
- airflow/providers/google/cloud/links/managed_kafka.py +11 -51
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +166 -118
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +14 -9
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +141 -40
- airflow/providers/google/cloud/openlineage/mixins.py +14 -13
- airflow/providers/google/cloud/openlineage/utils.py +19 -3
- airflow/providers/google/cloud/operators/alloy_db.py +76 -61
- airflow/providers/google/cloud/operators/bigquery.py +104 -667
- airflow/providers/google/cloud/operators/bigquery_dts.py +12 -12
- airflow/providers/google/cloud/operators/bigtable.py +38 -7
- airflow/providers/google/cloud/operators/cloud_base.py +22 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +18 -18
- airflow/providers/google/cloud/operators/cloud_build.py +80 -36
- airflow/providers/google/cloud/operators/cloud_composer.py +157 -71
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +74 -46
- airflow/providers/google/cloud/operators/cloud_run.py +39 -20
- airflow/providers/google/cloud/operators/cloud_sql.py +46 -61
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -14
- airflow/providers/google/cloud/operators/compute.py +18 -50
- airflow/providers/google/cloud/operators/datacatalog.py +167 -29
- airflow/providers/google/cloud/operators/dataflow.py +38 -15
- airflow/providers/google/cloud/operators/dataform.py +19 -7
- airflow/providers/google/cloud/operators/datafusion.py +43 -43
- airflow/providers/google/cloud/operators/dataplex.py +212 -126
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +134 -207
- airflow/providers/google/cloud/operators/dataproc_metastore.py +102 -84
- airflow/providers/google/cloud/operators/datastore.py +22 -6
- airflow/providers/google/cloud/operators/dlp.py +24 -45
- airflow/providers/google/cloud/operators/functions.py +21 -14
- airflow/providers/google/cloud/operators/gcs.py +15 -12
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +115 -106
- airflow/providers/google/cloud/operators/looker.py +1 -1
- airflow/providers/google/cloud/operators/managed_kafka.py +362 -40
- airflow/providers/google/cloud/operators/natural_language.py +5 -3
- airflow/providers/google/cloud/operators/pubsub.py +69 -21
- airflow/providers/google/cloud/operators/spanner.py +53 -45
- airflow/providers/google/cloud/operators/speech_to_text.py +5 -4
- airflow/providers/google/cloud/operators/stackdriver.py +5 -11
- airflow/providers/google/cloud/operators/tasks.py +6 -15
- airflow/providers/google/cloud/operators/text_to_speech.py +4 -3
- airflow/providers/google/cloud/operators/translate.py +46 -20
- airflow/providers/google/cloud/operators/translate_speech.py +4 -3
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +44 -34
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +34 -12
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +62 -53
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +75 -11
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +48 -12
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +16 -12
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +62 -14
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +35 -10
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +5 -3
- airflow/providers/google/cloud/operators/vision.py +7 -5
- airflow/providers/google/cloud/operators/workflows.py +24 -19
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery.py +2 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -4
- airflow/providers/google/cloud/sensors/bigtable.py +14 -6
- airflow/providers/google/cloud/sensors/cloud_composer.py +535 -33
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -5
- airflow/providers/google/cloud/sensors/dataflow.py +27 -10
- airflow/providers/google/cloud/sensors/dataform.py +2 -2
- airflow/providers/google/cloud/sensors/datafusion.py +4 -4
- airflow/providers/google/cloud/sensors/dataplex.py +7 -5
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +10 -9
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +4 -3
- airflow/providers/google/cloud/sensors/gcs.py +22 -21
- airflow/providers/google/cloud/sensors/looker.py +5 -5
- airflow/providers/google/cloud/sensors/pubsub.py +20 -20
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
- airflow/providers/google/cloud/sensors/workflows.py +6 -4
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +14 -13
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +18 -22
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -5
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +45 -38
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +44 -12
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +36 -14
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +75 -34
- airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_build.py +3 -2
- airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +96 -5
- airflow/providers/google/cloud/triggers/dataflow.py +125 -2
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +16 -3
- airflow/providers/google/cloud/triggers/dataproc.py +124 -53
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +46 -28
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +17 -20
- airflow/providers/google/cloud/triggers/vertex_ai.py +8 -7
- airflow/providers/google/cloud/utils/bigquery.py +5 -7
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +4 -3
- airflow/providers/google/cloud/utils/dataform.py +1 -1
- airflow/providers/google/cloud/utils/external_token_supplier.py +0 -1
- airflow/providers/google/cloud/utils/field_validator.py +1 -2
- airflow/providers/google/cloud/utils/validators.py +43 -0
- airflow/providers/google/common/auth_backend/google_openid.py +26 -9
- airflow/providers/google/common/consts.py +2 -1
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +40 -43
- airflow/providers/google/common/hooks/operation_helpers.py +78 -0
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +4 -5
- airflow/providers/google/firebase/operators/firestore.py +2 -2
- airflow/providers/google/get_provider_info.py +61 -216
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +30 -6
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +3 -2
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +4 -5
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +7 -6
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +4 -64
- airflow/providers/google/suite/hooks/calendar.py +1 -1
- airflow/providers/google/suite/hooks/drive.py +2 -2
- airflow/providers/google/suite/hooks/sheets.py +15 -1
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +117 -72
- apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +1 -1
- apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/example_dags/example_cloud_task.py +0 -54
- airflow/providers/google/cloud/hooks/automl.py +0 -679
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1360
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -1515
- airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
- apache_airflow_providers_google-14.0.0.dist-info/RECORD +0 -318
- /airflow/providers/google/cloud/{example_dags → bundles}/__init__.py +0 -0
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
|
@@ -22,21 +22,23 @@ from __future__ import annotations
|
|
|
22
22
|
from collections.abc import Sequence
|
|
23
23
|
from typing import TYPE_CHECKING, Any
|
|
24
24
|
|
|
25
|
+
from google.api_core.exceptions import Conflict
|
|
26
|
+
from google.cloud.bigquery import DEFAULT_RETRY, UnknownJob
|
|
27
|
+
|
|
25
28
|
from airflow.configuration import conf
|
|
26
29
|
from airflow.exceptions import AirflowException
|
|
27
|
-
from airflow.models import BaseOperator
|
|
28
30
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
|
|
29
31
|
from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
|
|
30
32
|
from airflow.providers.google.cloud.triggers.bigquery import BigQueryInsertJobTrigger
|
|
31
33
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
34
|
+
from airflow.providers.google.version_compat import BaseOperator
|
|
32
35
|
from airflow.utils.helpers import merge_dicts
|
|
33
|
-
from google.api_core.exceptions import Conflict
|
|
34
|
-
from google.cloud.bigquery import DEFAULT_RETRY, UnknownJob
|
|
35
36
|
|
|
36
37
|
if TYPE_CHECKING:
|
|
37
|
-
from airflow.utils.context import Context
|
|
38
38
|
from google.api_core.retry import Retry
|
|
39
39
|
|
|
40
|
+
from airflow.providers.common.compat.sdk import Context
|
|
41
|
+
|
|
40
42
|
|
|
41
43
|
class BigQueryToGCSOperator(BaseOperator):
|
|
42
44
|
"""
|
|
@@ -213,8 +215,9 @@ class BigQueryToGCSOperator(BaseOperator):
|
|
|
213
215
|
job_id=self.job_id,
|
|
214
216
|
dag_id=self.dag_id,
|
|
215
217
|
task_id=self.task_id,
|
|
216
|
-
logical_date=
|
|
218
|
+
logical_date=None,
|
|
217
219
|
configuration=configuration,
|
|
220
|
+
run_after=hook.get_run_after_or_logical_date(context),
|
|
218
221
|
force_rerun=self.force_rerun,
|
|
219
222
|
)
|
|
220
223
|
|
|
@@ -239,21 +242,19 @@ class BigQueryToGCSOperator(BaseOperator):
|
|
|
239
242
|
f"want to force rerun it consider setting `force_rerun=True`."
|
|
240
243
|
f"Or, if you want to reattach in this scenario add {job.state} to `reattach_states`"
|
|
241
244
|
)
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
raise AirflowException("Job is already in state DONE. Can not reattach to this job.")
|
|
245
|
+
# Job already reached state DONE
|
|
246
|
+
if job.state == "DONE":
|
|
247
|
+
raise AirflowException("Job is already in state DONE. Can not reattach to this job.")
|
|
246
248
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
249
|
+
# We are reattaching to a job
|
|
250
|
+
self.log.info("Reattaching to existing Job in state %s", job.state)
|
|
251
|
+
self._handle_job_error(job)
|
|
250
252
|
|
|
251
253
|
self.job_id = job.job_id
|
|
252
254
|
conf = job.to_api_repr()["configuration"]["extract"]["sourceTable"]
|
|
253
255
|
dataset_id, project_id, table_id = conf["datasetId"], conf["projectId"], conf["tableId"]
|
|
254
256
|
BigQueryTableLink.persist(
|
|
255
257
|
context=context,
|
|
256
|
-
task_instance=self,
|
|
257
258
|
dataset_id=dataset_id,
|
|
258
259
|
project_id=project_id,
|
|
259
260
|
table_id=table_id,
|
|
@@ -21,6 +21,7 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import warnings
|
|
23
23
|
from collections.abc import Sequence
|
|
24
|
+
from functools import cached_property
|
|
24
25
|
from typing import TYPE_CHECKING
|
|
25
26
|
|
|
26
27
|
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
@@ -29,7 +30,7 @@ from airflow.providers.google.cloud.transfers.bigquery_to_sql import BigQueryToS
|
|
|
29
30
|
from airflow.providers.microsoft.mssql.hooks.mssql import MsSqlHook
|
|
30
31
|
|
|
31
32
|
if TYPE_CHECKING:
|
|
32
|
-
from airflow.
|
|
33
|
+
from airflow.providers.common.compat.sdk import Context
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
class BigQueryToMsSqlOperator(BigQueryToSqlBaseOperator):
|
|
@@ -94,14 +95,17 @@ class BigQueryToMsSqlOperator(BigQueryToSqlBaseOperator):
|
|
|
94
95
|
self.mssql_conn_id = mssql_conn_id
|
|
95
96
|
self.source_project_dataset_table = source_project_dataset_table
|
|
96
97
|
|
|
97
|
-
|
|
98
|
+
@cached_property
|
|
99
|
+
def mssql_hook(self) -> MsSqlHook:
|
|
98
100
|
return MsSqlHook(schema=self.database, mssql_conn_id=self.mssql_conn_id)
|
|
99
101
|
|
|
102
|
+
def get_sql_hook(self) -> MsSqlHook:
|
|
103
|
+
return self.mssql_hook
|
|
104
|
+
|
|
100
105
|
def persist_links(self, context: Context) -> None:
|
|
101
106
|
project_id, dataset_id, table_id = self.source_project_dataset_table.split(".")
|
|
102
107
|
BigQueryTableLink.persist(
|
|
103
108
|
context=context,
|
|
104
|
-
task_instance=self,
|
|
105
109
|
dataset_id=dataset_id,
|
|
106
110
|
project_id=project_id,
|
|
107
111
|
table_id=table_id,
|
|
@@ -21,6 +21,7 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import warnings
|
|
23
23
|
from collections.abc import Sequence
|
|
24
|
+
from functools import cached_property
|
|
24
25
|
|
|
25
26
|
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
26
27
|
from airflow.providers.google.cloud.transfers.bigquery_to_sql import BigQueryToSqlBaseOperator
|
|
@@ -76,5 +77,15 @@ class BigQueryToMySqlOperator(BigQueryToSqlBaseOperator):
|
|
|
76
77
|
)
|
|
77
78
|
self.mysql_conn_id = mysql_conn_id
|
|
78
79
|
|
|
79
|
-
|
|
80
|
+
@cached_property
|
|
81
|
+
def mysql_hook(self) -> MySqlHook:
|
|
80
82
|
return MySqlHook(schema=self.database, mysql_conn_id=self.mysql_conn_id)
|
|
83
|
+
|
|
84
|
+
def get_sql_hook(self) -> MySqlHook:
|
|
85
|
+
return self.mysql_hook
|
|
86
|
+
|
|
87
|
+
def execute(self, context):
|
|
88
|
+
# Set source_project_dataset_table here, after hooks are initialized and project_id is available
|
|
89
|
+
project_id = self.bigquery_hook.project_id
|
|
90
|
+
self.source_project_dataset_table = f"{project_id}.{self.dataset_id}.{self.table_id}"
|
|
91
|
+
return super().execute(context)
|
|
@@ -19,15 +19,19 @@
|
|
|
19
19
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
|
+
from functools import cached_property
|
|
22
23
|
from typing import TYPE_CHECKING
|
|
23
24
|
|
|
25
|
+
from psycopg2.extensions import register_adapter
|
|
26
|
+
from psycopg2.extras import Json
|
|
27
|
+
|
|
24
28
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook
|
|
25
29
|
from airflow.providers.google.cloud.transfers.bigquery_to_sql import BigQueryToSqlBaseOperator
|
|
26
30
|
from airflow.providers.google.cloud.utils.bigquery_get_data import bigquery_get_data
|
|
27
31
|
from airflow.providers.postgres.hooks.postgres import PostgresHook
|
|
28
32
|
|
|
29
33
|
if TYPE_CHECKING:
|
|
30
|
-
from airflow.
|
|
34
|
+
from airflow.providers.common.compat.sdk import Context
|
|
31
35
|
|
|
32
36
|
|
|
33
37
|
class BigQueryToPostgresOperator(BigQueryToSqlBaseOperator):
|
|
@@ -75,26 +79,36 @@ class BigQueryToPostgresOperator(BigQueryToSqlBaseOperator):
|
|
|
75
79
|
self.postgres_conn_id = postgres_conn_id
|
|
76
80
|
self.replace_index = replace_index
|
|
77
81
|
|
|
78
|
-
|
|
82
|
+
@cached_property
|
|
83
|
+
def postgres_hook(self) -> PostgresHook:
|
|
84
|
+
register_adapter(list, Json)
|
|
85
|
+
register_adapter(dict, Json)
|
|
79
86
|
return PostgresHook(database=self.database, postgres_conn_id=self.postgres_conn_id)
|
|
80
87
|
|
|
88
|
+
def get_sql_hook(self) -> PostgresHook:
|
|
89
|
+
return self.postgres_hook
|
|
90
|
+
|
|
81
91
|
def execute(self, context: Context) -> None:
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
92
|
+
if not self.bigquery_hook:
|
|
93
|
+
self.bigquery_hook = BigQueryHook(
|
|
94
|
+
gcp_conn_id=self.gcp_conn_id,
|
|
95
|
+
location=self.location,
|
|
96
|
+
impersonation_chain=self.impersonation_chain,
|
|
97
|
+
)
|
|
98
|
+
# Set source_project_dataset_table here, after hooks are initialized and project_id is available
|
|
99
|
+
project_id = self.bigquery_hook.project_id
|
|
100
|
+
self.source_project_dataset_table = f"{project_id}.{self.dataset_id}.{self.table_id}"
|
|
101
|
+
|
|
87
102
|
self.persist_links(context)
|
|
88
|
-
sql_hook: PostgresHook = self.get_sql_hook()
|
|
89
103
|
for rows in bigquery_get_data(
|
|
90
104
|
self.log,
|
|
91
105
|
self.dataset_id,
|
|
92
106
|
self.table_id,
|
|
93
|
-
|
|
107
|
+
self.bigquery_hook,
|
|
94
108
|
self.batch_size,
|
|
95
109
|
self.selected_fields,
|
|
96
110
|
):
|
|
97
|
-
|
|
111
|
+
self.postgres_hook.insert_rows(
|
|
98
112
|
table=self.target_table_name,
|
|
99
113
|
rows=rows,
|
|
100
114
|
target_fields=self.selected_fields,
|
|
@@ -21,15 +21,17 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import abc
|
|
23
23
|
from collections.abc import Sequence
|
|
24
|
+
from functools import cached_property
|
|
24
25
|
from typing import TYPE_CHECKING
|
|
25
26
|
|
|
26
|
-
from airflow.models import BaseOperator
|
|
27
27
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook
|
|
28
28
|
from airflow.providers.google.cloud.utils.bigquery_get_data import bigquery_get_data
|
|
29
|
+
from airflow.providers.google.version_compat import BaseOperator
|
|
29
30
|
|
|
30
31
|
if TYPE_CHECKING:
|
|
32
|
+
from airflow.providers.common.compat.sdk import Context
|
|
31
33
|
from airflow.providers.common.sql.hooks.sql import DbApiHook
|
|
32
|
-
from airflow.
|
|
34
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
33
35
|
|
|
34
36
|
|
|
35
37
|
class BigQueryToSqlBaseOperator(BaseOperator):
|
|
@@ -113,19 +115,22 @@ class BigQueryToSqlBaseOperator(BaseOperator):
|
|
|
113
115
|
def persist_links(self, context: Context) -> None:
|
|
114
116
|
"""Persist the connection to the SQL provider."""
|
|
115
117
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
+
@cached_property
|
|
119
|
+
def bigquery_hook(self) -> BigQueryHook:
|
|
120
|
+
return BigQueryHook(
|
|
118
121
|
gcp_conn_id=self.gcp_conn_id,
|
|
119
122
|
location=self.location,
|
|
120
123
|
impersonation_chain=self.impersonation_chain,
|
|
121
124
|
)
|
|
125
|
+
|
|
126
|
+
def execute(self, context: Context) -> None:
|
|
122
127
|
self.persist_links(context)
|
|
123
128
|
sql_hook = self.get_sql_hook()
|
|
124
129
|
for rows in bigquery_get_data(
|
|
125
130
|
self.log,
|
|
126
131
|
self.dataset_id,
|
|
127
132
|
self.table_id,
|
|
128
|
-
|
|
133
|
+
self.bigquery_hook,
|
|
129
134
|
self.batch_size,
|
|
130
135
|
self.selected_fields,
|
|
131
136
|
):
|
|
@@ -136,3 +141,97 @@ class BigQueryToSqlBaseOperator(BaseOperator):
|
|
|
136
141
|
replace=self.replace,
|
|
137
142
|
commit_every=self.batch_size,
|
|
138
143
|
)
|
|
144
|
+
|
|
145
|
+
def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
146
|
+
"""
|
|
147
|
+
Build a generic OpenLineage facet for BigQuery -> SQL transfers.
|
|
148
|
+
|
|
149
|
+
This consolidates nearly identical implementations from child
|
|
150
|
+
operators. Children still provide a concrete SQL hook via
|
|
151
|
+
``get_sql_hook()`` and may override behavior if needed.
|
|
152
|
+
"""
|
|
153
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
|
154
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
|
155
|
+
BIGQUERY_NAMESPACE,
|
|
156
|
+
get_facets_from_bq_table_for_given_fields,
|
|
157
|
+
get_identity_column_lineage_facet,
|
|
158
|
+
)
|
|
159
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
160
|
+
|
|
161
|
+
if not self.bigquery_hook:
|
|
162
|
+
self.bigquery_hook = BigQueryHook(
|
|
163
|
+
gcp_conn_id=self.gcp_conn_id,
|
|
164
|
+
location=self.location,
|
|
165
|
+
impersonation_chain=self.impersonation_chain,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
if not getattr(self, "source_project_dataset_table", None):
|
|
170
|
+
project_id = self.bigquery_hook.project_id
|
|
171
|
+
self.source_project_dataset_table = f"{project_id}.{self.dataset_id}.{self.table_id}"
|
|
172
|
+
|
|
173
|
+
table_obj = self.bigquery_hook.get_client().get_table(self.source_project_dataset_table)
|
|
174
|
+
except Exception:
|
|
175
|
+
self.log.debug(
|
|
176
|
+
"OpenLineage: could not fetch BigQuery table %s",
|
|
177
|
+
getattr(self, "source_project_dataset_table", None),
|
|
178
|
+
exc_info=True,
|
|
179
|
+
)
|
|
180
|
+
return OperatorLineage()
|
|
181
|
+
|
|
182
|
+
if self.selected_fields:
|
|
183
|
+
if isinstance(self.selected_fields, str):
|
|
184
|
+
bigquery_field_names = list(self.selected_fields)
|
|
185
|
+
else:
|
|
186
|
+
bigquery_field_names = self.selected_fields
|
|
187
|
+
else:
|
|
188
|
+
bigquery_field_names = [f.name for f in getattr(table_obj, "schema", [])]
|
|
189
|
+
|
|
190
|
+
input_dataset = Dataset(
|
|
191
|
+
namespace=BIGQUERY_NAMESPACE,
|
|
192
|
+
name=self.source_project_dataset_table,
|
|
193
|
+
facets=get_facets_from_bq_table_for_given_fields(table_obj, bigquery_field_names),
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
sql_hook = self.get_sql_hook()
|
|
197
|
+
db_info = sql_hook.get_openlineage_database_info(sql_hook.get_conn())
|
|
198
|
+
if db_info is None:
|
|
199
|
+
self.log.debug("OpenLineage: could not get database info from SQL hook %s", type(sql_hook))
|
|
200
|
+
return OperatorLineage()
|
|
201
|
+
namespace = f"{db_info.scheme}://{db_info.authority}"
|
|
202
|
+
|
|
203
|
+
schema_name = None
|
|
204
|
+
if hasattr(sql_hook, "get_openlineage_default_schema"):
|
|
205
|
+
try:
|
|
206
|
+
schema_name = sql_hook.get_openlineage_default_schema()
|
|
207
|
+
except Exception:
|
|
208
|
+
schema_name = None
|
|
209
|
+
|
|
210
|
+
if self.target_table_name and "." in self.target_table_name:
|
|
211
|
+
schema_part, table_part = self.target_table_name.split(".", 1)
|
|
212
|
+
else:
|
|
213
|
+
schema_part = schema_name or ""
|
|
214
|
+
table_part = self.target_table_name or ""
|
|
215
|
+
|
|
216
|
+
if db_info and db_info.scheme == "mysql":
|
|
217
|
+
output_name = f"{self.database}.{table_part}" if self.database else f"{table_part}"
|
|
218
|
+
else:
|
|
219
|
+
if self.database:
|
|
220
|
+
if schema_part:
|
|
221
|
+
output_name = f"{self.database}.{schema_part}.{table_part}"
|
|
222
|
+
else:
|
|
223
|
+
output_name = f"{self.database}.{table_part}"
|
|
224
|
+
else:
|
|
225
|
+
if schema_part:
|
|
226
|
+
output_name = f"{schema_part}.{table_part}"
|
|
227
|
+
else:
|
|
228
|
+
output_name = f"{table_part}"
|
|
229
|
+
|
|
230
|
+
column_lineage_facet = get_identity_column_lineage_facet(
|
|
231
|
+
bigquery_field_names, input_datasets=[input_dataset]
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
output_facets = column_lineage_facet or {}
|
|
235
|
+
output_dataset = Dataset(namespace=namespace, name=output_name, facets=output_facets)
|
|
236
|
+
|
|
237
|
+
return OperatorLineage(inputs=[input_dataset], outputs=[output_dataset])
|
|
@@ -21,9 +21,9 @@ from collections.abc import Sequence
|
|
|
21
21
|
from tempfile import NamedTemporaryFile
|
|
22
22
|
from typing import TYPE_CHECKING, Any
|
|
23
23
|
|
|
24
|
-
from airflow.models import BaseOperator
|
|
25
24
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
|
26
25
|
from airflow.providers.google.suite.hooks.calendar import GoogleCalendarHook
|
|
26
|
+
from airflow.providers.google.version_compat import BaseOperator
|
|
27
27
|
|
|
28
28
|
if TYPE_CHECKING:
|
|
29
29
|
from datetime import datetime
|
|
@@ -31,12 +31,12 @@ from uuid import UUID
|
|
|
31
31
|
from cassandra.util import Date, OrderedMapSerializedKey, SortedSet, Time
|
|
32
32
|
|
|
33
33
|
from airflow.exceptions import AirflowException
|
|
34
|
-
from airflow.models import BaseOperator
|
|
35
34
|
from airflow.providers.apache.cassandra.hooks.cassandra import CassandraHook
|
|
36
35
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
|
36
|
+
from airflow.providers.google.version_compat import BaseOperator
|
|
37
37
|
|
|
38
38
|
if TYPE_CHECKING:
|
|
39
|
-
from airflow.
|
|
39
|
+
from airflow.providers.common.compat.sdk import Context
|
|
40
40
|
|
|
41
41
|
NotSetType = NewType("NotSetType", object)
|
|
42
42
|
NOT_SET = NotSetType(object())
|
|
@@ -261,29 +261,27 @@ class CassandraToGCSOperator(BaseOperator):
|
|
|
261
261
|
"""Convert value to BQ type."""
|
|
262
262
|
if not value or isinstance(value, (str, int, float, bool, dict)):
|
|
263
263
|
return value
|
|
264
|
-
|
|
264
|
+
if isinstance(value, bytes):
|
|
265
265
|
return b64encode(value).decode("ascii")
|
|
266
|
-
|
|
266
|
+
if isinstance(value, UUID):
|
|
267
267
|
if self.encode_uuid:
|
|
268
268
|
return b64encode(value.bytes).decode("ascii")
|
|
269
|
-
else:
|
|
270
|
-
return str(value)
|
|
271
|
-
elif isinstance(value, (datetime, Date)):
|
|
272
269
|
return str(value)
|
|
273
|
-
|
|
270
|
+
if isinstance(value, (datetime, Date)):
|
|
271
|
+
return str(value)
|
|
272
|
+
if isinstance(value, Decimal):
|
|
274
273
|
return float(value)
|
|
275
|
-
|
|
274
|
+
if isinstance(value, Time):
|
|
276
275
|
return str(value).split(".")[0]
|
|
277
|
-
|
|
276
|
+
if isinstance(value, (list, SortedSet)):
|
|
278
277
|
return self.convert_array_types(value)
|
|
279
|
-
|
|
278
|
+
if hasattr(value, "_fields"):
|
|
280
279
|
return self.convert_user_type(value)
|
|
281
|
-
|
|
280
|
+
if isinstance(value, tuple):
|
|
282
281
|
return self.convert_tuple_type(value)
|
|
283
|
-
|
|
282
|
+
if isinstance(value, OrderedMapSerializedKey):
|
|
284
283
|
return self.convert_map_type(value)
|
|
285
|
-
|
|
286
|
-
raise AirflowException(f"Unexpected value: {value}")
|
|
284
|
+
raise AirflowException(f"Unexpected value: {value}")
|
|
287
285
|
|
|
288
286
|
def convert_array_types(self, value: list[Any] | SortedSet) -> list[Any]:
|
|
289
287
|
"""Map convert_value over array."""
|
|
@@ -376,19 +374,17 @@ class CassandraToGCSOperator(BaseOperator):
|
|
|
376
374
|
"""Convert type to equivalent BQ type."""
|
|
377
375
|
if cls.is_simple_type(type_):
|
|
378
376
|
return CassandraToGCSOperator.CQL_TYPE_MAP[type_.cassname]
|
|
379
|
-
|
|
377
|
+
if cls.is_record_type(type_):
|
|
380
378
|
return "RECORD"
|
|
381
|
-
|
|
379
|
+
if cls.is_array_type(type_):
|
|
382
380
|
return cls.get_bq_type(type_.subtypes[0])
|
|
383
|
-
|
|
384
|
-
raise AirflowException("Not a supported type_: " + type_.cassname)
|
|
381
|
+
raise AirflowException("Not a supported type_: " + type_.cassname)
|
|
385
382
|
|
|
386
383
|
@classmethod
|
|
387
384
|
def get_bq_mode(cls, type_: Any) -> str:
|
|
388
385
|
"""Convert type to equivalent BQ mode."""
|
|
389
386
|
if cls.is_array_type(type_) or type_.cassname == "MapType":
|
|
390
387
|
return "REPEATED"
|
|
391
|
-
|
|
388
|
+
if cls.is_record_type(type_) or cls.is_simple_type(type_):
|
|
392
389
|
return "NULLABLE"
|
|
393
|
-
|
|
394
|
-
raise AirflowException("Not a supported type_: " + type_.cassname)
|
|
390
|
+
raise AirflowException("Not a supported type_: " + type_.cassname)
|
|
@@ -26,14 +26,14 @@ from enum import Enum
|
|
|
26
26
|
from typing import TYPE_CHECKING, Any
|
|
27
27
|
|
|
28
28
|
from airflow.exceptions import AirflowException
|
|
29
|
-
from airflow.models import BaseOperator
|
|
30
29
|
from airflow.providers.facebook.ads.hooks.ads import FacebookAdsReportingHook
|
|
31
30
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
|
31
|
+
from airflow.providers.google.version_compat import BaseOperator
|
|
32
32
|
|
|
33
33
|
if TYPE_CHECKING:
|
|
34
34
|
from facebook_business.adobjects.adsinsights import AdsInsights
|
|
35
35
|
|
|
36
|
-
from airflow.
|
|
36
|
+
from airflow.providers.common.compat.sdk import Context
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
class FlushAction(Enum):
|
|
@@ -157,8 +157,7 @@ class FacebookAdsReportToGcsOperator(BaseOperator):
|
|
|
157
157
|
def _generate_rows_with_action(self, type_check: bool):
|
|
158
158
|
if type_check and self.upload_as_account:
|
|
159
159
|
return {FlushAction.EXPORT_EVERY_ACCOUNT: []}
|
|
160
|
-
|
|
161
|
-
return {FlushAction.EXPORT_ONCE: []}
|
|
160
|
+
return {FlushAction.EXPORT_ONCE: []}
|
|
162
161
|
|
|
163
162
|
def _prepare_rows_for_upload(
|
|
164
163
|
self,
|
|
@@ -209,7 +208,7 @@ class FacebookAdsReportToGcsOperator(BaseOperator):
|
|
|
209
208
|
|
|
210
209
|
def _flush_rows(self, converted_rows: list[Any] | None, object_name: str):
|
|
211
210
|
if converted_rows:
|
|
212
|
-
headers =
|
|
211
|
+
headers = self.fields
|
|
213
212
|
with tempfile.NamedTemporaryFile("w", suffix=".csv") as csvfile:
|
|
214
213
|
writer = csv.DictWriter(csvfile, fieldnames=headers)
|
|
215
214
|
writer.writeheader()
|