apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/ads/hooks/ads.py +39 -6
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/alloy_db.py +1 -1
- airflow/providers/google/cloud/hooks/bigquery.py +176 -293
- airflow/providers/google/cloud/hooks/cloud_batch.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_build.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_composer.py +288 -15
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_run.py +18 -10
- airflow/providers/google/cloud/hooks/cloud_sql.py +102 -23
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +29 -7
- airflow/providers/google/cloud/hooks/compute.py +1 -1
- airflow/providers/google/cloud/hooks/compute_ssh.py +6 -2
- airflow/providers/google/cloud/hooks/datacatalog.py +10 -1
- airflow/providers/google/cloud/hooks/dataflow.py +72 -95
- airflow/providers/google/cloud/hooks/dataform.py +1 -1
- airflow/providers/google/cloud/hooks/datafusion.py +21 -19
- airflow/providers/google/cloud/hooks/dataplex.py +2 -2
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +73 -72
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +1 -1
- airflow/providers/google/cloud/hooks/dlp.py +1 -1
- airflow/providers/google/cloud/hooks/functions.py +1 -1
- airflow/providers/google/cloud/hooks/gcs.py +112 -15
- airflow/providers/google/cloud/hooks/gdm.py +1 -1
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +3 -3
- airflow/providers/google/cloud/hooks/looker.py +6 -2
- airflow/providers/google/cloud/hooks/managed_kafka.py +1 -1
- airflow/providers/google/cloud/hooks/mlengine.py +4 -3
- airflow/providers/google/cloud/hooks/pubsub.py +3 -0
- airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
- airflow/providers/google/cloud/hooks/spanner.py +74 -9
- airflow/providers/google/cloud/hooks/stackdriver.py +11 -9
- airflow/providers/google/cloud/hooks/tasks.py +1 -1
- airflow/providers/google/cloud/hooks/translate.py +2 -2
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +2 -210
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -3
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +28 -2
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +308 -8
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/vision.py +3 -3
- airflow/providers/google/cloud/hooks/workflows.py +1 -1
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -13
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -96
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -95
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
- airflow/providers/google/cloud/links/managed_kafka.py +0 -70
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +58 -22
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +10 -8
- airflow/providers/google/cloud/openlineage/utils.py +15 -1
- airflow/providers/google/cloud/operators/alloy_db.py +71 -56
- airflow/providers/google/cloud/operators/bigquery.py +73 -636
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -6
- airflow/providers/google/cloud/operators/bigtable.py +37 -8
- airflow/providers/google/cloud/operators/cloud_base.py +21 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +3 -3
- airflow/providers/google/cloud/operators/cloud_build.py +76 -33
- airflow/providers/google/cloud/operators/cloud_composer.py +129 -41
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
- airflow/providers/google/cloud/operators/cloud_run.py +24 -6
- airflow/providers/google/cloud/operators/cloud_sql.py +8 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +93 -12
- airflow/providers/google/cloud/operators/compute.py +9 -41
- airflow/providers/google/cloud/operators/datacatalog.py +157 -21
- airflow/providers/google/cloud/operators/dataflow.py +40 -16
- airflow/providers/google/cloud/operators/dataform.py +15 -5
- airflow/providers/google/cloud/operators/datafusion.py +42 -21
- airflow/providers/google/cloud/operators/dataplex.py +194 -110
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +80 -36
- airflow/providers/google/cloud/operators/dataproc_metastore.py +97 -89
- airflow/providers/google/cloud/operators/datastore.py +23 -7
- airflow/providers/google/cloud/operators/dlp.py +6 -29
- airflow/providers/google/cloud/operators/functions.py +17 -8
- airflow/providers/google/cloud/operators/gcs.py +12 -9
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +62 -100
- airflow/providers/google/cloud/operators/looker.py +2 -2
- airflow/providers/google/cloud/operators/managed_kafka.py +108 -53
- airflow/providers/google/cloud/operators/natural_language.py +1 -1
- airflow/providers/google/cloud/operators/pubsub.py +68 -15
- airflow/providers/google/cloud/operators/spanner.py +26 -13
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -3
- airflow/providers/google/cloud/operators/stackdriver.py +1 -9
- airflow/providers/google/cloud/operators/tasks.py +1 -12
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -3
- airflow/providers/google/cloud/operators/translate.py +41 -17
- airflow/providers/google/cloud/operators/translate_speech.py +2 -3
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +30 -10
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +55 -27
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -115
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +12 -10
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +31 -8
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
- airflow/providers/google/cloud/operators/vision.py +2 -2
- airflow/providers/google/cloud/operators/workflows.py +18 -15
- airflow/providers/google/cloud/secrets/secret_manager.py +3 -2
- airflow/providers/google/cloud/sensors/bigquery.py +3 -3
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -3
- airflow/providers/google/cloud/sensors/bigtable.py +11 -4
- airflow/providers/google/cloud/sensors/cloud_composer.py +533 -30
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -3
- airflow/providers/google/cloud/sensors/dataflow.py +26 -10
- airflow/providers/google/cloud/sensors/dataform.py +2 -3
- airflow/providers/google/cloud/sensors/datafusion.py +4 -5
- airflow/providers/google/cloud/sensors/dataplex.py +2 -3
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +2 -3
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -3
- airflow/providers/google/cloud/sensors/gcs.py +4 -5
- airflow/providers/google/cloud/sensors/looker.py +2 -3
- airflow/providers/google/cloud/sensors/pubsub.py +4 -5
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -3
- airflow/providers/google/cloud/sensors/workflows.py +2 -3
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +4 -3
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +10 -5
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +21 -13
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +4 -3
- airflow/providers/google/cloud/transfers/gcs_to_local.py +6 -4
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +11 -5
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +13 -7
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +14 -5
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +76 -35
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
- airflow/providers/google/cloud/triggers/cloud_run.py +3 -3
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +92 -2
- airflow/providers/google/cloud/triggers/dataflow.py +122 -0
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +14 -2
- airflow/providers/google/cloud/triggers/dataproc.py +123 -53
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +47 -28
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +15 -19
- airflow/providers/google/cloud/triggers/vertex_ai.py +1 -1
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +2 -2
- airflow/providers/google/cloud/utils/field_sanitizer.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +2 -3
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +27 -9
- airflow/providers/google/common/hooks/operation_helpers.py +1 -1
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +3 -4
- airflow/providers/google/firebase/hooks/firestore.py +1 -1
- airflow/providers/google/firebase/operators/firestore.py +3 -3
- airflow/providers/google/get_provider_info.py +56 -52
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +27 -2
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/campaign_manager.py +1 -1
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -3
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +6 -6
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +3 -64
- airflow/providers/google/suite/hooks/calendar.py +2 -2
- airflow/providers/google/suite/hooks/sheets.py +16 -2
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +3 -3
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/METADATA +90 -46
- apache_airflow_providers_google-19.3.0.dist-info/RECORD +331 -0
- apache_airflow_providers_google-19.3.0.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/hooks/automl.py +0 -673
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1362
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -112
- apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.3.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -24,11 +24,11 @@ from collections.abc import Sequence
|
|
|
24
24
|
from glob import glob
|
|
25
25
|
from typing import TYPE_CHECKING
|
|
26
26
|
|
|
27
|
-
from airflow.models import BaseOperator
|
|
28
27
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
|
28
|
+
from airflow.providers.google.version_compat import BaseOperator
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
|
-
from airflow.
|
|
31
|
+
from airflow.providers.common.compat.sdk import Context
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
class LocalFilesystemToGCSOperator(BaseOperator):
|
|
@@ -21,12 +21,17 @@ import base64
|
|
|
21
21
|
import calendar
|
|
22
22
|
from datetime import date, datetime, timedelta
|
|
23
23
|
from decimal import Decimal
|
|
24
|
+
from functools import cached_property
|
|
25
|
+
from typing import TYPE_CHECKING
|
|
24
26
|
|
|
25
27
|
import oracledb
|
|
26
28
|
|
|
27
29
|
from airflow.providers.google.cloud.transfers.sql_to_gcs import BaseSQLToGCSOperator
|
|
28
30
|
from airflow.providers.oracle.hooks.oracle import OracleHook
|
|
29
31
|
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
34
|
+
|
|
30
35
|
|
|
31
36
|
class OracleToGCSOperator(BaseSQLToGCSOperator):
|
|
32
37
|
"""
|
|
@@ -46,15 +51,15 @@ class OracleToGCSOperator(BaseSQLToGCSOperator):
|
|
|
46
51
|
ui_color = "#a0e08c"
|
|
47
52
|
|
|
48
53
|
type_map = {
|
|
49
|
-
oracledb.DB_TYPE_BINARY_DOUBLE: "DECIMAL",
|
|
50
|
-
oracledb.DB_TYPE_BINARY_FLOAT: "DECIMAL",
|
|
51
|
-
oracledb.DB_TYPE_BINARY_INTEGER: "INTEGER",
|
|
52
|
-
oracledb.DB_TYPE_BOOLEAN: "BOOLEAN",
|
|
53
|
-
oracledb.DB_TYPE_DATE: "TIMESTAMP",
|
|
54
|
-
oracledb.DB_TYPE_NUMBER: "NUMERIC",
|
|
55
|
-
oracledb.DB_TYPE_TIMESTAMP: "TIMESTAMP",
|
|
56
|
-
oracledb.DB_TYPE_TIMESTAMP_LTZ: "TIMESTAMP",
|
|
57
|
-
oracledb.DB_TYPE_TIMESTAMP_TZ: "TIMESTAMP",
|
|
54
|
+
oracledb.DB_TYPE_BINARY_DOUBLE: "DECIMAL",
|
|
55
|
+
oracledb.DB_TYPE_BINARY_FLOAT: "DECIMAL",
|
|
56
|
+
oracledb.DB_TYPE_BINARY_INTEGER: "INTEGER",
|
|
57
|
+
oracledb.DB_TYPE_BOOLEAN: "BOOLEAN",
|
|
58
|
+
oracledb.DB_TYPE_DATE: "TIMESTAMP",
|
|
59
|
+
oracledb.DB_TYPE_NUMBER: "NUMERIC",
|
|
60
|
+
oracledb.DB_TYPE_TIMESTAMP: "TIMESTAMP",
|
|
61
|
+
oracledb.DB_TYPE_TIMESTAMP_LTZ: "TIMESTAMP",
|
|
62
|
+
oracledb.DB_TYPE_TIMESTAMP_TZ: "TIMESTAMP",
|
|
58
63
|
}
|
|
59
64
|
|
|
60
65
|
def __init__(self, *, oracle_conn_id="oracle_default", ensure_utc=False, **kwargs):
|
|
@@ -62,10 +67,13 @@ class OracleToGCSOperator(BaseSQLToGCSOperator):
|
|
|
62
67
|
self.ensure_utc = ensure_utc
|
|
63
68
|
self.oracle_conn_id = oracle_conn_id
|
|
64
69
|
|
|
70
|
+
@cached_property
|
|
71
|
+
def db_hook(self) -> OracleHook:
|
|
72
|
+
return OracleHook(oracle_conn_id=self.oracle_conn_id)
|
|
73
|
+
|
|
65
74
|
def query(self):
|
|
66
75
|
"""Query Oracle and returns a cursor to the results."""
|
|
67
|
-
|
|
68
|
-
conn = oracle.get_conn()
|
|
76
|
+
conn = self.db_hook.get_conn()
|
|
69
77
|
cursor = conn.cursor()
|
|
70
78
|
if self.ensure_utc:
|
|
71
79
|
# Ensure TIMESTAMP results are in UTC
|
|
@@ -121,3 +129,20 @@ class OracleToGCSOperator(BaseSQLToGCSOperator):
|
|
|
121
129
|
else:
|
|
122
130
|
value = base64.standard_b64encode(value).decode("ascii")
|
|
123
131
|
return value
|
|
132
|
+
|
|
133
|
+
def get_openlineage_facets_on_start(self) -> OperatorLineage | None:
|
|
134
|
+
from airflow.providers.common.compat.openlineage.facet import SQLJobFacet
|
|
135
|
+
from airflow.providers.common.compat.openlineage.utils.sql import get_openlineage_facets_with_sql
|
|
136
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
137
|
+
|
|
138
|
+
sql_parsing_result = get_openlineage_facets_with_sql(
|
|
139
|
+
hook=self.db_hook,
|
|
140
|
+
sql=self.sql,
|
|
141
|
+
conn_id=self.oracle_conn_id,
|
|
142
|
+
database=self.db_hook.service_name or self.db_hook.sid,
|
|
143
|
+
)
|
|
144
|
+
gcs_output_datasets = self._get_openlineage_output_datasets()
|
|
145
|
+
if sql_parsing_result:
|
|
146
|
+
sql_parsing_result.outputs = gcs_output_datasets
|
|
147
|
+
return sql_parsing_result
|
|
148
|
+
return OperatorLineage(outputs=gcs_output_datasets, job_facets={"sql": SQLJobFacet(self.sql)})
|
|
@@ -31,7 +31,7 @@ import pendulum
|
|
|
31
31
|
from slugify import slugify
|
|
32
32
|
|
|
33
33
|
from airflow.providers.google.cloud.transfers.sql_to_gcs import BaseSQLToGCSOperator
|
|
34
|
-
from airflow.providers.postgres.hooks.postgres import PostgresHook
|
|
34
|
+
from airflow.providers.postgres.hooks.postgres import USE_PSYCOPG3, PostgresHook
|
|
35
35
|
|
|
36
36
|
if TYPE_CHECKING:
|
|
37
37
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
@@ -52,9 +52,20 @@ class _PostgresServerSideCursorDecorator:
|
|
|
52
52
|
self.initialized = False
|
|
53
53
|
|
|
54
54
|
def __iter__(self):
|
|
55
|
+
"""Make the cursor iterable."""
|
|
55
56
|
return self
|
|
56
57
|
|
|
57
58
|
def __next__(self):
|
|
59
|
+
"""Fetch next row from the cursor."""
|
|
60
|
+
if USE_PSYCOPG3:
|
|
61
|
+
if self.rows:
|
|
62
|
+
return self.rows.pop()
|
|
63
|
+
self.initialized = True
|
|
64
|
+
row = self.cursor.fetchone()
|
|
65
|
+
if row is None:
|
|
66
|
+
raise StopIteration
|
|
67
|
+
return row
|
|
68
|
+
# psycopg2
|
|
58
69
|
if self.rows:
|
|
59
70
|
return self.rows.pop()
|
|
60
71
|
self.initialized = True
|
|
@@ -141,13 +152,29 @@ class PostgresToGCSOperator(BaseSQLToGCSOperator):
|
|
|
141
152
|
return PostgresHook(postgres_conn_id=self.postgres_conn_id)
|
|
142
153
|
|
|
143
154
|
def query(self):
|
|
144
|
-
"""
|
|
155
|
+
"""Execute the query and return a cursor."""
|
|
145
156
|
conn = self.db_hook.get_conn()
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
157
|
+
|
|
158
|
+
if USE_PSYCOPG3:
|
|
159
|
+
from psycopg.types.json import register_default_adapters
|
|
160
|
+
|
|
161
|
+
# Register JSON handlers for this connection if not already done
|
|
162
|
+
register_default_adapters(conn)
|
|
163
|
+
|
|
164
|
+
if self.use_server_side_cursor:
|
|
165
|
+
cursor_name = f"airflow_{self.task_id.replace('-', '_')}_{uuid.uuid4().hex}"[:63]
|
|
166
|
+
cursor = conn.cursor(name=cursor_name)
|
|
167
|
+
cursor.itersize = self.cursor_itersize
|
|
168
|
+
cursor.execute(self.sql, self.parameters)
|
|
169
|
+
return _PostgresServerSideCursorDecorator(cursor)
|
|
170
|
+
cursor = conn.cursor()
|
|
171
|
+
cursor.execute(self.sql, self.parameters)
|
|
172
|
+
else:
|
|
173
|
+
cursor = conn.cursor(name=self._unique_name())
|
|
174
|
+
cursor.execute(self.sql, self.parameters)
|
|
175
|
+
if self.use_server_side_cursor:
|
|
176
|
+
cursor.itersize = self.cursor_itersize
|
|
177
|
+
return _PostgresServerSideCursorDecorator(cursor)
|
|
151
178
|
return cursor
|
|
152
179
|
|
|
153
180
|
def field_to_bigquery(self, field) -> dict[str, str]:
|
|
@@ -182,8 +209,14 @@ class PostgresToGCSOperator(BaseSQLToGCSOperator):
|
|
|
182
209
|
hours=formatted_time.tm_hour, minutes=formatted_time.tm_min, seconds=formatted_time.tm_sec
|
|
183
210
|
)
|
|
184
211
|
return str(time_delta)
|
|
185
|
-
if stringify_dict
|
|
186
|
-
|
|
212
|
+
if stringify_dict:
|
|
213
|
+
if USE_PSYCOPG3:
|
|
214
|
+
from psycopg.types.json import Json
|
|
215
|
+
|
|
216
|
+
if isinstance(value, (dict, Json)):
|
|
217
|
+
return json.dumps(value)
|
|
218
|
+
elif isinstance(value, dict):
|
|
219
|
+
return json.dumps(value)
|
|
187
220
|
if isinstance(value, Decimal):
|
|
188
221
|
return float(value)
|
|
189
222
|
return value
|
|
@@ -23,8 +23,8 @@ from tempfile import NamedTemporaryFile
|
|
|
23
23
|
from typing import TYPE_CHECKING, Any
|
|
24
24
|
|
|
25
25
|
from airflow.configuration import conf
|
|
26
|
-
from airflow.exceptions import AirflowException
|
|
27
26
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
|
27
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
28
28
|
from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
|
|
29
29
|
ACCESS_KEY_ID,
|
|
30
30
|
AWS_ACCESS_KEY,
|
|
@@ -57,7 +57,7 @@ except ImportError:
|
|
|
57
57
|
from airflow.providers.amazon.aws.operators.s3_list import S3ListOperator # type: ignore[no-redef]
|
|
58
58
|
|
|
59
59
|
if TYPE_CHECKING:
|
|
60
|
-
from airflow.
|
|
60
|
+
from airflow.providers.common.compat.sdk import Context
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
class S3ToGCSOperator(S3ListOperator):
|
|
@@ -181,21 +181,27 @@ class S3ToGCSOperator(S3ListOperator):
|
|
|
181
181
|
'The destination Google Cloud Storage path must end with a slash "/" or be empty.'
|
|
182
182
|
)
|
|
183
183
|
|
|
184
|
-
def
|
|
185
|
-
self._check_inputs()
|
|
184
|
+
def _get_files(self, context: Context, gcs_hook: GCSHook) -> list[str]:
|
|
186
185
|
# use the super method to list all the files in an S3 bucket/key
|
|
187
186
|
s3_objects = super().execute(context)
|
|
188
187
|
|
|
188
|
+
if not self.replace:
|
|
189
|
+
s3_objects = self.exclude_existing_objects(s3_objects=s3_objects, gcs_hook=gcs_hook)
|
|
190
|
+
|
|
191
|
+
return s3_objects
|
|
192
|
+
|
|
193
|
+
def execute(self, context: Context):
|
|
194
|
+
self._check_inputs()
|
|
189
195
|
gcs_hook = GCSHook(
|
|
190
196
|
gcp_conn_id=self.gcp_conn_id,
|
|
191
197
|
impersonation_chain=self.google_impersonation_chain,
|
|
192
198
|
)
|
|
193
|
-
if not self.replace:
|
|
194
|
-
s3_objects = self.exclude_existing_objects(s3_objects=s3_objects, gcs_hook=gcs_hook)
|
|
195
|
-
|
|
196
199
|
s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
|
|
200
|
+
|
|
201
|
+
s3_objects = self._get_files(context, gcs_hook)
|
|
197
202
|
if not s3_objects:
|
|
198
203
|
self.log.info("In sync, no files needed to be uploaded to Google Cloud Storage")
|
|
204
|
+
|
|
199
205
|
elif self.deferrable:
|
|
200
206
|
self.transfer_files_async(s3_objects, gcs_hook, s3_hook)
|
|
201
207
|
else:
|
|
@@ -21,12 +21,12 @@ import tempfile
|
|
|
21
21
|
from collections.abc import Sequence
|
|
22
22
|
from typing import TYPE_CHECKING
|
|
23
23
|
|
|
24
|
-
from airflow.models import BaseOperator
|
|
25
24
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
|
25
|
+
from airflow.providers.google.version_compat import BaseOperator
|
|
26
26
|
from airflow.providers.salesforce.hooks.salesforce import SalesforceHook
|
|
27
27
|
|
|
28
28
|
if TYPE_CHECKING:
|
|
29
|
-
from airflow.
|
|
29
|
+
from airflow.providers.common.compat.sdk import Context
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class SalesforceToGcsOperator(BaseOperator):
|
|
@@ -25,13 +25,13 @@ from functools import cached_property
|
|
|
25
25
|
from tempfile import NamedTemporaryFile
|
|
26
26
|
from typing import TYPE_CHECKING
|
|
27
27
|
|
|
28
|
-
from airflow.
|
|
29
|
-
from airflow.models import BaseOperator
|
|
28
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
30
29
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
|
30
|
+
from airflow.providers.google.version_compat import BaseOperator
|
|
31
31
|
from airflow.providers.sftp.hooks.sftp import SFTPHook
|
|
32
32
|
|
|
33
33
|
if TYPE_CHECKING:
|
|
34
|
-
from airflow.
|
|
34
|
+
from airflow.providers.common.compat.sdk import Context
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
WILDCARD = "*"
|
|
@@ -78,6 +78,8 @@ class SFTPToGCSOperator(BaseOperator):
|
|
|
78
78
|
then uploads (may require significant disk space).
|
|
79
79
|
When ``True``, the file streams directly without using local disk.
|
|
80
80
|
Defaults to ``False``.
|
|
81
|
+
:param fail_on_file_not_exist: If True, operator fails when file does not exist,
|
|
82
|
+
if False, operator will not fail and skips transfer. Default is True.
|
|
81
83
|
"""
|
|
82
84
|
|
|
83
85
|
template_fields: Sequence[str] = (
|
|
@@ -101,6 +103,7 @@ class SFTPToGCSOperator(BaseOperator):
|
|
|
101
103
|
impersonation_chain: str | Sequence[str] | None = None,
|
|
102
104
|
sftp_prefetch: bool = True,
|
|
103
105
|
use_stream: bool = False,
|
|
106
|
+
fail_on_file_not_exist: bool = True,
|
|
104
107
|
**kwargs,
|
|
105
108
|
) -> None:
|
|
106
109
|
super().__init__(**kwargs)
|
|
@@ -116,6 +119,7 @@ class SFTPToGCSOperator(BaseOperator):
|
|
|
116
119
|
self.impersonation_chain = impersonation_chain
|
|
117
120
|
self.sftp_prefetch = sftp_prefetch
|
|
118
121
|
self.use_stream = use_stream
|
|
122
|
+
self.fail_on_file_not_exist = fail_on_file_not_exist
|
|
119
123
|
|
|
120
124
|
@cached_property
|
|
121
125
|
def sftp_hook(self):
|
|
@@ -156,7 +160,13 @@ class SFTPToGCSOperator(BaseOperator):
|
|
|
156
160
|
destination_object = (
|
|
157
161
|
self.destination_path if self.destination_path else self.source_path.rsplit("/", 1)[1]
|
|
158
162
|
)
|
|
159
|
-
|
|
163
|
+
try:
|
|
164
|
+
self._copy_single_object(gcs_hook, self.sftp_hook, self.source_path, destination_object)
|
|
165
|
+
except FileNotFoundError as e:
|
|
166
|
+
if self.fail_on_file_not_exist:
|
|
167
|
+
raise e
|
|
168
|
+
self.log.info("File %s not found on SFTP server. Skipping transfer.", self.source_path)
|
|
169
|
+
return
|
|
160
170
|
|
|
161
171
|
def _copy_single_object(
|
|
162
172
|
self,
|
|
@@ -172,7 +182,6 @@ class SFTPToGCSOperator(BaseOperator):
|
|
|
172
182
|
self.destination_bucket,
|
|
173
183
|
destination_object,
|
|
174
184
|
)
|
|
175
|
-
|
|
176
185
|
if self.use_stream:
|
|
177
186
|
dest_bucket = gcs_hook.get_bucket(self.destination_bucket)
|
|
178
187
|
dest_blob = dest_bucket.blob(destination_object)
|
|
@@ -21,12 +21,12 @@ from collections.abc import Sequence
|
|
|
21
21
|
from tempfile import NamedTemporaryFile
|
|
22
22
|
from typing import TYPE_CHECKING, Any
|
|
23
23
|
|
|
24
|
-
from airflow.models import BaseOperator
|
|
25
24
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
|
26
25
|
from airflow.providers.google.suite.hooks.sheets import GSheetsHook
|
|
26
|
+
from airflow.providers.google.version_compat import BaseOperator
|
|
27
27
|
|
|
28
28
|
if TYPE_CHECKING:
|
|
29
|
-
from airflow.
|
|
29
|
+
from airflow.providers.common.compat.sdk import Context
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class GoogleSheetsToGCSOperator(BaseOperator):
|
|
@@ -130,5 +130,5 @@ class GoogleSheetsToGCSOperator(BaseOperator):
|
|
|
130
130
|
gcs_path_to_file = self._upload_data(gcs_hook, sheet_hook, sheet_range, data)
|
|
131
131
|
destination_array.append(gcs_path_to_file)
|
|
132
132
|
|
|
133
|
-
|
|
133
|
+
context["ti"].xcom_push(key="destination_objects", value=destination_array)
|
|
134
134
|
return destination_array
|
|
@@ -30,12 +30,12 @@ from typing import TYPE_CHECKING, Any
|
|
|
30
30
|
import pyarrow as pa
|
|
31
31
|
import pyarrow.parquet as pq
|
|
32
32
|
|
|
33
|
-
from airflow.models import BaseOperator
|
|
34
33
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
|
34
|
+
from airflow.providers.google.version_compat import BaseOperator
|
|
35
35
|
|
|
36
36
|
if TYPE_CHECKING:
|
|
37
37
|
from airflow.providers.common.compat.openlineage.facet import OutputDataset
|
|
38
|
-
from airflow.
|
|
38
|
+
from airflow.providers.common.compat.sdk import Context
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class BaseSQLToGCSOperator(BaseOperator):
|
|
@@ -295,21 +295,21 @@ class BaseSQLToGCSOperator(BaseOperator):
|
|
|
295
295
|
|
|
296
296
|
# Proceed to write the row to the localfile
|
|
297
297
|
if self.export_format == "csv":
|
|
298
|
-
|
|
298
|
+
row2 = self.convert_types(schema, col_type_dict, row)
|
|
299
299
|
if self.null_marker is not None:
|
|
300
|
-
|
|
301
|
-
csv_writer.writerow(
|
|
300
|
+
row2 = [value or self.null_marker for value in row2]
|
|
301
|
+
csv_writer.writerow(row2)
|
|
302
302
|
elif self.export_format == "parquet":
|
|
303
|
-
|
|
303
|
+
row2 = self.convert_types(schema, col_type_dict, row)
|
|
304
304
|
if self.null_marker is not None:
|
|
305
|
-
|
|
306
|
-
rows_buffer.append(
|
|
305
|
+
row2 = [value or self.null_marker for value in row2]
|
|
306
|
+
rows_buffer.append(row2)
|
|
307
307
|
if len(rows_buffer) >= self.parquet_row_group_size:
|
|
308
308
|
self._write_rows_to_parquet(parquet_writer, rows_buffer)
|
|
309
309
|
rows_buffer = []
|
|
310
310
|
else:
|
|
311
|
-
|
|
312
|
-
row_dict = dict(zip(schema,
|
|
311
|
+
row2 = self.convert_types(schema, col_type_dict, row)
|
|
312
|
+
row_dict = dict(zip(schema, row2))
|
|
313
313
|
|
|
314
314
|
json.dump(row_dict, tmp_file_handle, sort_keys=True, ensure_ascii=False)
|
|
315
315
|
|
|
@@ -22,17 +22,21 @@ from typing import TYPE_CHECKING, Any, SupportsAbs
|
|
|
22
22
|
|
|
23
23
|
from aiohttp import ClientSession
|
|
24
24
|
from aiohttp.client_exceptions import ClientResponseError
|
|
25
|
+
from asgiref.sync import sync_to_async
|
|
25
26
|
|
|
26
|
-
from airflow.
|
|
27
|
-
from airflow.models.taskinstance import TaskInstance
|
|
27
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
28
28
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryAsyncHook, BigQueryTableAsyncHook
|
|
29
|
+
from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
|
|
29
30
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
|
30
|
-
from airflow.utils.session import provide_session
|
|
31
31
|
from airflow.utils.state import TaskInstanceState
|
|
32
32
|
|
|
33
33
|
if TYPE_CHECKING:
|
|
34
34
|
from sqlalchemy.orm.session import Session
|
|
35
35
|
|
|
36
|
+
if not AIRFLOW_V_3_0_PLUS:
|
|
37
|
+
from airflow.models.taskinstance import TaskInstance
|
|
38
|
+
from airflow.utils.session import provide_session
|
|
39
|
+
|
|
36
40
|
|
|
37
41
|
class BigQueryInsertJobTrigger(BaseTrigger):
|
|
38
42
|
"""
|
|
@@ -97,16 +101,39 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
|
97
101
|
},
|
|
98
102
|
)
|
|
99
103
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
104
|
+
if not AIRFLOW_V_3_0_PLUS:
|
|
105
|
+
|
|
106
|
+
@provide_session
|
|
107
|
+
def get_task_instance(self, session: Session) -> TaskInstance:
|
|
108
|
+
query = session.query(TaskInstance).filter(
|
|
109
|
+
TaskInstance.dag_id == self.task_instance.dag_id,
|
|
110
|
+
TaskInstance.task_id == self.task_instance.task_id,
|
|
111
|
+
TaskInstance.run_id == self.task_instance.run_id,
|
|
112
|
+
TaskInstance.map_index == self.task_instance.map_index,
|
|
113
|
+
)
|
|
114
|
+
task_instance = query.one_or_none()
|
|
115
|
+
if task_instance is None:
|
|
116
|
+
raise AirflowException(
|
|
117
|
+
"TaskInstance with dag_id: %s, task_id: %s, run_id: %s and map_index: %s is not found",
|
|
118
|
+
self.task_instance.dag_id,
|
|
119
|
+
self.task_instance.task_id,
|
|
120
|
+
self.task_instance.run_id,
|
|
121
|
+
self.task_instance.map_index,
|
|
122
|
+
)
|
|
123
|
+
return task_instance
|
|
124
|
+
|
|
125
|
+
async def get_task_state(self):
|
|
126
|
+
from airflow.sdk.execution_time.task_runner import RuntimeTaskInstance
|
|
127
|
+
|
|
128
|
+
task_states_response = await sync_to_async(RuntimeTaskInstance.get_task_states)(
|
|
129
|
+
dag_id=self.task_instance.dag_id,
|
|
130
|
+
task_ids=[self.task_instance.task_id],
|
|
131
|
+
run_ids=[self.task_instance.run_id],
|
|
132
|
+
map_index=self.task_instance.map_index,
|
|
107
133
|
)
|
|
108
|
-
|
|
109
|
-
|
|
134
|
+
try:
|
|
135
|
+
task_state = task_states_response[self.task_instance.run_id][self.task_instance.task_id]
|
|
136
|
+
except Exception:
|
|
110
137
|
raise AirflowException(
|
|
111
138
|
"TaskInstance with dag_id: %s, task_id: %s, run_id: %s and map_index: %s is not found",
|
|
112
139
|
self.task_instance.dag_id,
|
|
@@ -114,20 +141,24 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
|
114
141
|
self.task_instance.run_id,
|
|
115
142
|
self.task_instance.map_index,
|
|
116
143
|
)
|
|
117
|
-
return
|
|
144
|
+
return task_state
|
|
118
145
|
|
|
119
|
-
def safe_to_cancel(self) -> bool:
|
|
146
|
+
async def safe_to_cancel(self) -> bool:
|
|
120
147
|
"""
|
|
121
148
|
Whether it is safe to cancel the external job which is being executed by this trigger.
|
|
122
149
|
|
|
123
150
|
This is to avoid the case that `asyncio.CancelledError` is called because the trigger itself is stopped.
|
|
124
151
|
Because in those cases, we should NOT cancel the external job.
|
|
125
152
|
"""
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
153
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
154
|
+
task_state = await self.get_task_state()
|
|
155
|
+
else:
|
|
156
|
+
# Database query is needed to get the latest state of the task instance.
|
|
157
|
+
task_instance = self.get_task_instance() # type: ignore[call-arg]
|
|
158
|
+
task_state = task_instance.state
|
|
159
|
+
return task_state != TaskInstanceState.DEFERRED
|
|
160
|
+
|
|
161
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
131
162
|
"""Get current job execution status and yields a TriggerEvent."""
|
|
132
163
|
hook = self._get_async_hook()
|
|
133
164
|
try:
|
|
@@ -136,6 +167,7 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
|
136
167
|
job_id=self.job_id, project_id=self.project_id, location=self.location
|
|
137
168
|
)
|
|
138
169
|
if job_status["status"] == "success":
|
|
170
|
+
self.log.info("BigQuery Job succeeded")
|
|
139
171
|
yield TriggerEvent(
|
|
140
172
|
{
|
|
141
173
|
"job_id": self.job_id,
|
|
@@ -145,7 +177,13 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
|
145
177
|
)
|
|
146
178
|
return
|
|
147
179
|
elif job_status["status"] == "error":
|
|
148
|
-
|
|
180
|
+
self.log.info("BigQuery Job failed: %s", job_status)
|
|
181
|
+
yield TriggerEvent(
|
|
182
|
+
{
|
|
183
|
+
"status": job_status["status"],
|
|
184
|
+
"message": job_status["message"],
|
|
185
|
+
}
|
|
186
|
+
)
|
|
149
187
|
return
|
|
150
188
|
else:
|
|
151
189
|
self.log.info(
|
|
@@ -155,7 +193,7 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
|
155
193
|
)
|
|
156
194
|
await asyncio.sleep(self.poll_interval)
|
|
157
195
|
except asyncio.CancelledError:
|
|
158
|
-
if self.job_id and self.cancel_on_kill and self.safe_to_cancel():
|
|
196
|
+
if self.job_id and self.cancel_on_kill and await self.safe_to_cancel():
|
|
159
197
|
self.log.info(
|
|
160
198
|
"The job is safe to cancel the as airflow TaskInstance is not in deferred state."
|
|
161
199
|
)
|
|
@@ -165,9 +203,7 @@ class BigQueryInsertJobTrigger(BaseTrigger):
|
|
|
165
203
|
self.location,
|
|
166
204
|
self.job_id,
|
|
167
205
|
)
|
|
168
|
-
await hook.cancel_job(
|
|
169
|
-
job_id=self.job_id, project_id=self.project_id, location=self.location
|
|
170
|
-
)
|
|
206
|
+
await hook.cancel_job(job_id=self.job_id, project_id=self.project_id, location=self.location)
|
|
171
207
|
else:
|
|
172
208
|
self.log.info(
|
|
173
209
|
"Trigger may have shutdown. Skipping to cancel job because the airflow "
|
|
@@ -204,7 +240,7 @@ class BigQueryCheckTrigger(BigQueryInsertJobTrigger):
|
|
|
204
240
|
},
|
|
205
241
|
)
|
|
206
242
|
|
|
207
|
-
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
243
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
208
244
|
"""Get current job execution status and yields a TriggerEvent."""
|
|
209
245
|
hook = self._get_async_hook()
|
|
210
246
|
try:
|
|
@@ -281,7 +317,7 @@ class BigQueryGetDataTrigger(BigQueryInsertJobTrigger):
|
|
|
281
317
|
},
|
|
282
318
|
)
|
|
283
319
|
|
|
284
|
-
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
320
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
285
321
|
"""Get current job execution status and yields a TriggerEvent with response data."""
|
|
286
322
|
hook = self._get_async_hook()
|
|
287
323
|
try:
|
|
@@ -305,7 +341,12 @@ class BigQueryGetDataTrigger(BigQueryInsertJobTrigger):
|
|
|
305
341
|
)
|
|
306
342
|
return
|
|
307
343
|
elif job_status["status"] == "error":
|
|
308
|
-
yield TriggerEvent(
|
|
344
|
+
yield TriggerEvent(
|
|
345
|
+
{
|
|
346
|
+
"status": job_status["status"],
|
|
347
|
+
"message": job_status["message"],
|
|
348
|
+
}
|
|
349
|
+
)
|
|
309
350
|
return
|
|
310
351
|
else:
|
|
311
352
|
self.log.info(
|
|
@@ -406,7 +447,7 @@ class BigQueryIntervalCheckTrigger(BigQueryInsertJobTrigger):
|
|
|
406
447
|
},
|
|
407
448
|
)
|
|
408
449
|
|
|
409
|
-
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
450
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
410
451
|
"""Get current job execution status and yields a TriggerEvent."""
|
|
411
452
|
hook = self._get_async_hook()
|
|
412
453
|
try:
|
|
@@ -554,7 +595,7 @@ class BigQueryValueCheckTrigger(BigQueryInsertJobTrigger):
|
|
|
554
595
|
},
|
|
555
596
|
)
|
|
556
597
|
|
|
557
|
-
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
598
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
558
599
|
"""Get current job execution status and yields a TriggerEvent."""
|
|
559
600
|
hook = self._get_async_hook()
|
|
560
601
|
try:
|
|
@@ -564,9 +605,9 @@ class BigQueryValueCheckTrigger(BigQueryInsertJobTrigger):
|
|
|
564
605
|
if response_from_hook["status"] == "success":
|
|
565
606
|
query_results = await hook.get_job_output(job_id=self.job_id, project_id=self.project_id)
|
|
566
607
|
records = hook.get_records(query_results)
|
|
567
|
-
|
|
568
|
-
hook.value_check(self.sql, self.pass_value,
|
|
569
|
-
yield TriggerEvent({"status": "success", "message": "Job completed", "records":
|
|
608
|
+
_records = records.pop(0) if records else None
|
|
609
|
+
hook.value_check(self.sql, self.pass_value, _records, self.tolerance)
|
|
610
|
+
yield TriggerEvent({"status": "success", "message": "Job completed", "records": _records})
|
|
570
611
|
return
|
|
571
612
|
elif response_from_hook["status"] == "pending":
|
|
572
613
|
self.log.info("Query is still running...")
|
|
@@ -640,7 +681,7 @@ class BigQueryTableExistenceTrigger(BaseTrigger):
|
|
|
640
681
|
gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
|
|
641
682
|
)
|
|
642
683
|
|
|
643
|
-
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
684
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
644
685
|
"""Will run until the table exists in the Google Big Query."""
|
|
645
686
|
try:
|
|
646
687
|
while True:
|
|
@@ -723,7 +764,7 @@ class BigQueryTablePartitionExistenceTrigger(BigQueryTableExistenceTrigger):
|
|
|
723
764
|
},
|
|
724
765
|
)
|
|
725
766
|
|
|
726
|
-
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
767
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
727
768
|
"""Will run until the table exists in the Google Big Query."""
|
|
728
769
|
hook = BigQueryAsyncHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
|
729
770
|
job_id = None
|
|
@@ -744,7 +785,7 @@ class BigQueryTablePartitionExistenceTrigger(BigQueryTableExistenceTrigger):
|
|
|
744
785
|
return
|
|
745
786
|
job_id = None
|
|
746
787
|
elif job_status["status"] == "error":
|
|
747
|
-
yield TriggerEvent(job_status)
|
|
788
|
+
yield TriggerEvent({"status": job_status["status"]})
|
|
748
789
|
return
|
|
749
790
|
self.log.info("Sleeping for %s seconds.", self.poll_interval)
|
|
750
791
|
await asyncio.sleep(self.poll_interval)
|
|
@@ -76,7 +76,7 @@ class CloudBuildCreateBuildTrigger(BaseTrigger):
|
|
|
76
76
|
},
|
|
77
77
|
)
|
|
78
78
|
|
|
79
|
-
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
79
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
80
80
|
"""Get current build execution status and yields a TriggerEvent."""
|
|
81
81
|
hook = self._get_async_hook()
|
|
82
82
|
try:
|