apache-airflow-providers-google 12.0.0rc2__py3-none-any.whl → 13.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/LICENSE +0 -52
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +27 -13
- airflow/providers/google/ads/transfers/ads_to_gcs.py +18 -4
- airflow/providers/google/assets/bigquery.py +17 -0
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +2 -3
- airflow/providers/google/cloud/hooks/alloy_db.py +736 -8
- airflow/providers/google/cloud/hooks/automl.py +10 -4
- airflow/providers/google/cloud/hooks/bigquery.py +125 -22
- airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
- airflow/providers/google/cloud/hooks/bigtable.py +2 -3
- airflow/providers/google/cloud/hooks/cloud_batch.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_build.py +4 -5
- airflow/providers/google/cloud/hooks/cloud_composer.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_run.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_sql.py +7 -3
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +119 -7
- airflow/providers/google/cloud/hooks/compute.py +3 -3
- airflow/providers/google/cloud/hooks/datacatalog.py +3 -4
- airflow/providers/google/cloud/hooks/dataflow.py +12 -12
- airflow/providers/google/cloud/hooks/dataform.py +2 -3
- airflow/providers/google/cloud/hooks/datafusion.py +2 -2
- airflow/providers/google/cloud/hooks/dataplex.py +1032 -11
- airflow/providers/google/cloud/hooks/dataproc.py +4 -5
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +3 -4
- airflow/providers/google/cloud/hooks/dlp.py +3 -4
- airflow/providers/google/cloud/hooks/gcs.py +7 -6
- airflow/providers/google/cloud/hooks/kms.py +2 -3
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +8 -8
- airflow/providers/google/cloud/hooks/life_sciences.py +1 -1
- airflow/providers/google/cloud/hooks/managed_kafka.py +482 -0
- airflow/providers/google/cloud/hooks/natural_language.py +2 -3
- airflow/providers/google/cloud/hooks/os_login.py +2 -3
- airflow/providers/google/cloud/hooks/pubsub.py +6 -6
- airflow/providers/google/cloud/hooks/secret_manager.py +2 -3
- airflow/providers/google/cloud/hooks/spanner.py +2 -2
- airflow/providers/google/cloud/hooks/speech_to_text.py +2 -3
- airflow/providers/google/cloud/hooks/stackdriver.py +4 -4
- airflow/providers/google/cloud/hooks/tasks.py +3 -4
- airflow/providers/google/cloud/hooks/text_to_speech.py +2 -3
- airflow/providers/google/cloud/hooks/translate.py +236 -5
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +9 -4
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +4 -5
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -3
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -181
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -3
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -3
- airflow/providers/google/cloud/hooks/video_intelligence.py +2 -3
- airflow/providers/google/cloud/hooks/vision.py +3 -4
- airflow/providers/google/cloud/hooks/workflows.py +2 -3
- airflow/providers/google/cloud/links/alloy_db.py +46 -0
- airflow/providers/google/cloud/links/bigquery.py +25 -0
- airflow/providers/google/cloud/links/dataplex.py +172 -2
- airflow/providers/google/cloud/links/kubernetes_engine.py +1 -2
- airflow/providers/google/cloud/links/managed_kafka.py +104 -0
- airflow/providers/google/cloud/links/translate.py +28 -0
- airflow/providers/google/cloud/log/gcs_task_handler.py +3 -3
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -10
- airflow/providers/google/cloud/openlineage/facets.py +67 -0
- airflow/providers/google/cloud/openlineage/mixins.py +438 -173
- airflow/providers/google/cloud/openlineage/utils.py +394 -61
- airflow/providers/google/cloud/operators/alloy_db.py +980 -69
- airflow/providers/google/cloud/operators/automl.py +83 -245
- airflow/providers/google/cloud/operators/bigquery.py +377 -74
- airflow/providers/google/cloud/operators/bigquery_dts.py +126 -13
- airflow/providers/google/cloud/operators/bigtable.py +1 -3
- airflow/providers/google/cloud/operators/cloud_base.py +1 -2
- airflow/providers/google/cloud/operators/cloud_batch.py +2 -4
- airflow/providers/google/cloud/operators/cloud_build.py +3 -5
- airflow/providers/google/cloud/operators/cloud_composer.py +5 -7
- airflow/providers/google/cloud/operators/cloud_memorystore.py +4 -6
- airflow/providers/google/cloud/operators/cloud_run.py +6 -5
- airflow/providers/google/cloud/operators/cloud_sql.py +20 -8
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +62 -8
- airflow/providers/google/cloud/operators/compute.py +3 -4
- airflow/providers/google/cloud/operators/datacatalog.py +9 -11
- airflow/providers/google/cloud/operators/dataflow.py +1 -112
- airflow/providers/google/cloud/operators/dataform.py +3 -5
- airflow/providers/google/cloud/operators/datafusion.py +1 -1
- airflow/providers/google/cloud/operators/dataplex.py +2046 -7
- airflow/providers/google/cloud/operators/dataproc.py +102 -17
- airflow/providers/google/cloud/operators/dataproc_metastore.py +7 -9
- airflow/providers/google/cloud/operators/dlp.py +17 -19
- airflow/providers/google/cloud/operators/gcs.py +14 -17
- airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/operators/managed_kafka.py +788 -0
- airflow/providers/google/cloud/operators/natural_language.py +3 -5
- airflow/providers/google/cloud/operators/pubsub.py +39 -7
- airflow/providers/google/cloud/operators/speech_to_text.py +3 -5
- airflow/providers/google/cloud/operators/stackdriver.py +3 -5
- airflow/providers/google/cloud/operators/tasks.py +4 -6
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -4
- airflow/providers/google/cloud/operators/translate.py +414 -5
- airflow/providers/google/cloud/operators/translate_speech.py +2 -4
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +9 -8
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +6 -8
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +0 -322
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +4 -6
- airflow/providers/google/cloud/operators/video_intelligence.py +3 -5
- airflow/providers/google/cloud/operators/vision.py +4 -6
- airflow/providers/google/cloud/operators/workflows.py +5 -7
- airflow/providers/google/cloud/secrets/secret_manager.py +1 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +3 -5
- airflow/providers/google/cloud/sensors/bigtable.py +2 -3
- airflow/providers/google/cloud/sensors/cloud_composer.py +32 -8
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +39 -1
- airflow/providers/google/cloud/sensors/dataplex.py +4 -6
- airflow/providers/google/cloud/sensors/dataproc.py +2 -3
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -2
- airflow/providers/google/cloud/sensors/gcs.py +2 -4
- airflow/providers/google/cloud/sensors/pubsub.py +2 -3
- airflow/providers/google/cloud/sensors/workflows.py +3 -5
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +5 -5
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +10 -12
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +36 -4
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +27 -2
- airflow/providers/google/cloud/transfers/mysql_to_gcs.py +27 -2
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +27 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +34 -5
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +15 -0
- airflow/providers/google/cloud/transfers/trino_to_gcs.py +25 -2
- airflow/providers/google/cloud/triggers/bigquery_dts.py +1 -2
- airflow/providers/google/cloud/triggers/cloud_batch.py +1 -2
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -2
- airflow/providers/google/cloud/triggers/cloud_composer.py +13 -3
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +102 -4
- airflow/providers/google/cloud/triggers/dataflow.py +2 -3
- airflow/providers/google/cloud/triggers/dataplex.py +1 -2
- airflow/providers/google/cloud/triggers/dataproc.py +2 -3
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +1 -2
- airflow/providers/google/cloud/triggers/vertex_ai.py +7 -8
- airflow/providers/google/cloud/utils/credentials_provider.py +15 -8
- airflow/providers/google/cloud/utils/external_token_supplier.py +1 -0
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/consts.py +1 -2
- airflow/providers/google/common/hooks/base_google.py +8 -7
- airflow/providers/google/get_provider_info.py +186 -134
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -3
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +5 -7
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/METADATA +41 -58
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/RECORD +157 -159
- airflow/providers/google/cloud/example_dags/example_facebook_ads_to_gcs.py +0 -141
- airflow/providers/google/cloud/example_dags/example_looker.py +0 -64
- airflow/providers/google/cloud/example_dags/example_presto_to_gcs.py +0 -194
- airflow/providers/google/cloud/example_dags/example_salesforce_to_gcs.py +0 -129
- airflow/providers/google/marketing_platform/example_dags/__init__.py +0 -16
- airflow/providers/google/marketing_platform/example_dags/example_display_video.py +0 -213
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/entry_points.txt +0 -0
@@ -1,141 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
-
# or more contributor license agreements. See the NOTICE file
|
4
|
-
# distributed with this work for additional information
|
5
|
-
# regarding copyright ownership. The ASF licenses this file
|
6
|
-
# to you under the Apache License, Version 2.0 (the
|
7
|
-
# "License"); you may not use this file except in compliance
|
8
|
-
# with the License. You may obtain a copy of the License at
|
9
|
-
#
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
#
|
12
|
-
# Unless required by applicable law or agreed to in writing,
|
13
|
-
# software distributed under the License is distributed on an
|
14
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
-
# KIND, either express or implied. See the License for the
|
16
|
-
# specific language governing permissions and limitations
|
17
|
-
# under the License.
|
18
|
-
"""
|
19
|
-
Example Airflow DAG that shows how to use FacebookAdsReportToGcsOperator.
|
20
|
-
"""
|
21
|
-
|
22
|
-
from __future__ import annotations
|
23
|
-
|
24
|
-
import os
|
25
|
-
from datetime import datetime
|
26
|
-
|
27
|
-
from facebook_business.adobjects.adsinsights import AdsInsights
|
28
|
-
|
29
|
-
from airflow.models.baseoperator import chain
|
30
|
-
from airflow.models.dag import DAG
|
31
|
-
from airflow.providers.google.cloud.operators.bigquery import (
|
32
|
-
BigQueryCreateEmptyDatasetOperator,
|
33
|
-
BigQueryCreateEmptyTableOperator,
|
34
|
-
BigQueryDeleteDatasetOperator,
|
35
|
-
BigQueryInsertJobOperator,
|
36
|
-
)
|
37
|
-
from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
|
38
|
-
from airflow.providers.google.cloud.transfers.facebook_ads_to_gcs import FacebookAdsReportToGcsOperator
|
39
|
-
from airflow.providers.google.cloud.transfers.gcs_to_bigquery import GCSToBigQueryOperator
|
40
|
-
|
41
|
-
# [START howto_GCS_env_variables]
|
42
|
-
GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "free-tier-1997")
|
43
|
-
GCS_BUCKET = os.environ.get("GCS_BUCKET", "airflow_bucket_fb")
|
44
|
-
GCS_OBJ_PATH = os.environ.get("GCS_OBJ_PATH", "Temp/this_is_my_report_csv.csv")
|
45
|
-
GCS_CONN_ID = os.environ.get("GCS_CONN_ID", "google_cloud_default")
|
46
|
-
DATASET_NAME = os.environ.get("DATASET_NAME", "airflow_test_dataset")
|
47
|
-
TABLE_NAME = os.environ.get("FB_TABLE_NAME", "airflow_test_datatable")
|
48
|
-
# [END howto_GCS_env_variables]
|
49
|
-
|
50
|
-
# [START howto_FB_ADS_variables]
|
51
|
-
FIELDS = [
|
52
|
-
AdsInsights.Field.campaign_name,
|
53
|
-
AdsInsights.Field.campaign_id,
|
54
|
-
AdsInsights.Field.ad_id,
|
55
|
-
AdsInsights.Field.clicks,
|
56
|
-
AdsInsights.Field.impressions,
|
57
|
-
]
|
58
|
-
PARAMETERS = {"level": "ad", "date_preset": "yesterday"}
|
59
|
-
# [END howto_FB_ADS_variables]
|
60
|
-
|
61
|
-
with DAG(
|
62
|
-
"example_facebook_ads_to_gcs",
|
63
|
-
start_date=datetime(2021, 1, 1),
|
64
|
-
catchup=False,
|
65
|
-
) as dag:
|
66
|
-
create_bucket = GCSCreateBucketOperator(
|
67
|
-
task_id="create_bucket",
|
68
|
-
bucket_name=GCS_BUCKET,
|
69
|
-
project_id=GCP_PROJECT_ID,
|
70
|
-
)
|
71
|
-
|
72
|
-
create_dataset = BigQueryCreateEmptyDatasetOperator(
|
73
|
-
task_id="create_dataset",
|
74
|
-
dataset_id=DATASET_NAME,
|
75
|
-
)
|
76
|
-
|
77
|
-
create_table = BigQueryCreateEmptyTableOperator(
|
78
|
-
task_id="create_table",
|
79
|
-
dataset_id=DATASET_NAME,
|
80
|
-
table_id=TABLE_NAME,
|
81
|
-
schema_fields=[
|
82
|
-
{"name": "campaign_name", "type": "STRING", "mode": "NULLABLE"},
|
83
|
-
{"name": "campaign_id", "type": "STRING", "mode": "NULLABLE"},
|
84
|
-
{"name": "ad_id", "type": "STRING", "mode": "NULLABLE"},
|
85
|
-
{"name": "clicks", "type": "STRING", "mode": "NULLABLE"},
|
86
|
-
{"name": "impressions", "type": "STRING", "mode": "NULLABLE"},
|
87
|
-
],
|
88
|
-
)
|
89
|
-
|
90
|
-
# [START howto_operator_facebook_ads_to_gcs]
|
91
|
-
run_operator = FacebookAdsReportToGcsOperator(
|
92
|
-
task_id="run_fetch_data",
|
93
|
-
owner="airflow",
|
94
|
-
bucket_name=GCS_BUCKET,
|
95
|
-
parameters=PARAMETERS,
|
96
|
-
fields=FIELDS,
|
97
|
-
gcp_conn_id=GCS_CONN_ID,
|
98
|
-
object_name=GCS_OBJ_PATH,
|
99
|
-
)
|
100
|
-
# [END howto_operator_facebook_ads_to_gcs]
|
101
|
-
|
102
|
-
load_csv = GCSToBigQueryOperator(
|
103
|
-
task_id="gcs_to_bq_example",
|
104
|
-
bucket=GCS_BUCKET,
|
105
|
-
source_objects=[GCS_OBJ_PATH],
|
106
|
-
destination_project_dataset_table=f"{DATASET_NAME}.{TABLE_NAME}",
|
107
|
-
write_disposition="WRITE_TRUNCATE",
|
108
|
-
)
|
109
|
-
|
110
|
-
read_data_from_gcs_many_chunks = BigQueryInsertJobOperator(
|
111
|
-
task_id="read_data_from_gcs_many_chunks",
|
112
|
-
configuration={
|
113
|
-
"query": {
|
114
|
-
"query": f"SELECT COUNT(*) FROM `{GCP_PROJECT_ID}.{DATASET_NAME}.{TABLE_NAME}`",
|
115
|
-
"useLegacySql": False,
|
116
|
-
}
|
117
|
-
},
|
118
|
-
)
|
119
|
-
|
120
|
-
delete_bucket = GCSDeleteBucketOperator(
|
121
|
-
task_id="delete_bucket",
|
122
|
-
bucket_name=GCS_BUCKET,
|
123
|
-
)
|
124
|
-
|
125
|
-
delete_dataset = BigQueryDeleteDatasetOperator(
|
126
|
-
task_id="delete_dataset",
|
127
|
-
project_id=GCP_PROJECT_ID,
|
128
|
-
dataset_id=DATASET_NAME,
|
129
|
-
delete_contents=True,
|
130
|
-
)
|
131
|
-
|
132
|
-
chain(
|
133
|
-
create_bucket,
|
134
|
-
create_dataset,
|
135
|
-
create_table,
|
136
|
-
run_operator,
|
137
|
-
load_csv,
|
138
|
-
read_data_from_gcs_many_chunks,
|
139
|
-
delete_bucket,
|
140
|
-
delete_dataset,
|
141
|
-
)
|
@@ -1,64 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
-
# or more contributor license agreements. See the NOTICE file
|
4
|
-
# distributed with this work for additional information
|
5
|
-
# regarding copyright ownership. The ASF licenses this file
|
6
|
-
# to you under the Apache License, Version 2.0 (the
|
7
|
-
# "License"); you may not use this file except in compliance
|
8
|
-
# with the License. You may obtain a copy of the License at
|
9
|
-
#
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
#
|
12
|
-
# Unless required by applicable law or agreed to in writing,
|
13
|
-
# software distributed under the License is distributed on an
|
14
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
-
# KIND, either express or implied. See the License for the
|
16
|
-
# specific language governing permissions and limitations
|
17
|
-
# under the License.
|
18
|
-
"""
|
19
|
-
Example Airflow DAG that show how to use various Looker
|
20
|
-
operators to submit PDT materialization job and manage it.
|
21
|
-
"""
|
22
|
-
|
23
|
-
from __future__ import annotations
|
24
|
-
|
25
|
-
from datetime import datetime
|
26
|
-
|
27
|
-
from airflow.models.dag import DAG
|
28
|
-
from airflow.providers.google.cloud.operators.looker import LookerStartPdtBuildOperator
|
29
|
-
from airflow.providers.google.cloud.sensors.looker import LookerCheckPdtBuildSensor
|
30
|
-
|
31
|
-
with DAG(
|
32
|
-
dag_id="example_gcp_looker",
|
33
|
-
start_date=datetime(2021, 1, 1),
|
34
|
-
catchup=False,
|
35
|
-
) as dag:
|
36
|
-
# [START cloud_looker_async_start_pdt_sensor]
|
37
|
-
start_pdt_task_async = LookerStartPdtBuildOperator(
|
38
|
-
task_id="start_pdt_task_async",
|
39
|
-
looker_conn_id="your_airflow_connection_for_looker",
|
40
|
-
model="your_lookml_model",
|
41
|
-
view="your_lookml_view",
|
42
|
-
asynchronous=True,
|
43
|
-
)
|
44
|
-
|
45
|
-
check_pdt_task_async_sensor = LookerCheckPdtBuildSensor(
|
46
|
-
task_id="check_pdt_task_async_sensor",
|
47
|
-
looker_conn_id="your_airflow_connection_for_looker",
|
48
|
-
materialization_id=start_pdt_task_async.output,
|
49
|
-
poke_interval=10,
|
50
|
-
)
|
51
|
-
# [END cloud_looker_async_start_pdt_sensor]
|
52
|
-
|
53
|
-
# [START how_to_cloud_looker_start_pdt_build_operator]
|
54
|
-
build_pdt_task = LookerStartPdtBuildOperator(
|
55
|
-
task_id="build_pdt_task",
|
56
|
-
looker_conn_id="your_airflow_connection_for_looker",
|
57
|
-
model="your_lookml_model",
|
58
|
-
view="your_lookml_view",
|
59
|
-
)
|
60
|
-
# [END how_to_cloud_looker_start_pdt_build_operator]
|
61
|
-
|
62
|
-
start_pdt_task_async >> check_pdt_task_async_sensor
|
63
|
-
|
64
|
-
build_pdt_task
|
@@ -1,194 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
-
# or more contributor license agreements. See the NOTICE file
|
4
|
-
# distributed with this work for additional information
|
5
|
-
# regarding copyright ownership. The ASF licenses this file
|
6
|
-
# to you under the Apache License, Version 2.0 (the
|
7
|
-
# "License"); you may not use this file except in compliance
|
8
|
-
# with the License. You may obtain a copy of the License at
|
9
|
-
#
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
#
|
12
|
-
# Unless required by applicable law or agreed to in writing,
|
13
|
-
# software distributed under the License is distributed on an
|
14
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
-
# KIND, either express or implied. See the License for the
|
16
|
-
# specific language governing permissions and limitations
|
17
|
-
# under the License.
|
18
|
-
"""
|
19
|
-
Example DAG using PrestoToGCSOperator.
|
20
|
-
"""
|
21
|
-
|
22
|
-
from __future__ import annotations
|
23
|
-
|
24
|
-
import os
|
25
|
-
import re
|
26
|
-
from datetime import datetime
|
27
|
-
|
28
|
-
from airflow.models.dag import DAG
|
29
|
-
from airflow.providers.google.cloud.operators.bigquery import (
|
30
|
-
BigQueryCreateEmptyDatasetOperator,
|
31
|
-
BigQueryCreateExternalTableOperator,
|
32
|
-
BigQueryDeleteDatasetOperator,
|
33
|
-
BigQueryInsertJobOperator,
|
34
|
-
)
|
35
|
-
from airflow.providers.google.cloud.transfers.presto_to_gcs import PrestoToGCSOperator
|
36
|
-
|
37
|
-
GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-project")
|
38
|
-
GCS_BUCKET = os.environ.get("GCP_PRESTO_TO_GCS_BUCKET_NAME", "INVALID BUCKET NAME")
|
39
|
-
DATASET_NAME = os.environ.get("GCP_PRESTO_TO_GCS_DATASET_NAME", "test_presto_to_gcs_dataset")
|
40
|
-
|
41
|
-
SOURCE_MULTIPLE_TYPES = "memory.default.test_multiple_types"
|
42
|
-
SOURCE_CUSTOMER_TABLE = "tpch.sf1.customer"
|
43
|
-
|
44
|
-
|
45
|
-
def safe_name(s: str) -> str:
|
46
|
-
"""
|
47
|
-
Remove invalid characters for filename
|
48
|
-
"""
|
49
|
-
return re.sub("[^0-9a-zA-Z_]+", "_", s)
|
50
|
-
|
51
|
-
|
52
|
-
with DAG(
|
53
|
-
dag_id="example_presto_to_gcs",
|
54
|
-
start_date=datetime(2021, 1, 1),
|
55
|
-
catchup=False,
|
56
|
-
tags=["example"],
|
57
|
-
) as dag:
|
58
|
-
create_dataset = BigQueryCreateEmptyDatasetOperator(task_id="create-dataset", dataset_id=DATASET_NAME)
|
59
|
-
|
60
|
-
delete_dataset = BigQueryDeleteDatasetOperator(
|
61
|
-
task_id="delete_dataset", dataset_id=DATASET_NAME, delete_contents=True
|
62
|
-
)
|
63
|
-
|
64
|
-
# [START howto_operator_presto_to_gcs_basic]
|
65
|
-
presto_to_gcs_basic = PrestoToGCSOperator(
|
66
|
-
task_id="presto_to_gcs_basic",
|
67
|
-
sql=f"select * from {SOURCE_MULTIPLE_TYPES}",
|
68
|
-
bucket=GCS_BUCKET,
|
69
|
-
filename=f"{safe_name(SOURCE_MULTIPLE_TYPES)}.{{}}.json",
|
70
|
-
)
|
71
|
-
# [END howto_operator_presto_to_gcs_basic]
|
72
|
-
|
73
|
-
# [START howto_operator_presto_to_gcs_multiple_types]
|
74
|
-
presto_to_gcs_multiple_types = PrestoToGCSOperator(
|
75
|
-
task_id="presto_to_gcs_multiple_types",
|
76
|
-
sql=f"select * from {SOURCE_MULTIPLE_TYPES}",
|
77
|
-
bucket=GCS_BUCKET,
|
78
|
-
filename=f"{safe_name(SOURCE_MULTIPLE_TYPES)}.{{}}.json",
|
79
|
-
schema_filename=f"{safe_name(SOURCE_MULTIPLE_TYPES)}-schema.json",
|
80
|
-
gzip=False,
|
81
|
-
)
|
82
|
-
# [END howto_operator_presto_to_gcs_multiple_types]
|
83
|
-
|
84
|
-
# [START howto_operator_create_external_table_multiple_types]
|
85
|
-
create_external_table_multiple_types = BigQueryCreateExternalTableOperator(
|
86
|
-
task_id="create_external_table_multiple_types",
|
87
|
-
bucket=GCS_BUCKET,
|
88
|
-
source_objects=[f"{safe_name(SOURCE_MULTIPLE_TYPES)}.*.json"],
|
89
|
-
table_resource={
|
90
|
-
"tableReference": {
|
91
|
-
"projectId": GCP_PROJECT_ID,
|
92
|
-
"datasetId": DATASET_NAME,
|
93
|
-
"tableId": f"{safe_name(SOURCE_MULTIPLE_TYPES)}",
|
94
|
-
},
|
95
|
-
"schema": {
|
96
|
-
"fields": [
|
97
|
-
{"name": "name", "type": "STRING"},
|
98
|
-
{"name": "post_abbr", "type": "STRING"},
|
99
|
-
]
|
100
|
-
},
|
101
|
-
"externalDataConfiguration": {
|
102
|
-
"sourceFormat": "NEWLINE_DELIMITED_JSON",
|
103
|
-
"compression": "NONE",
|
104
|
-
"csvOptions": {"skipLeadingRows": 1},
|
105
|
-
},
|
106
|
-
},
|
107
|
-
schema_object=f"{safe_name(SOURCE_MULTIPLE_TYPES)}-schema.json",
|
108
|
-
)
|
109
|
-
# [END howto_operator_create_external_table_multiple_types]
|
110
|
-
|
111
|
-
read_data_from_gcs_multiple_types = BigQueryInsertJobOperator(
|
112
|
-
task_id="read_data_from_gcs_multiple_types",
|
113
|
-
configuration={
|
114
|
-
"query": {
|
115
|
-
"query": f"SELECT COUNT(*) FROM `{GCP_PROJECT_ID}.{DATASET_NAME}."
|
116
|
-
f"{safe_name(SOURCE_MULTIPLE_TYPES)}`",
|
117
|
-
"useLegacySql": False,
|
118
|
-
}
|
119
|
-
},
|
120
|
-
)
|
121
|
-
|
122
|
-
# [START howto_operator_presto_to_gcs_many_chunks]
|
123
|
-
presto_to_gcs_many_chunks = PrestoToGCSOperator(
|
124
|
-
task_id="presto_to_gcs_many_chunks",
|
125
|
-
sql=f"select * from {SOURCE_CUSTOMER_TABLE}",
|
126
|
-
bucket=GCS_BUCKET,
|
127
|
-
filename=f"{safe_name(SOURCE_CUSTOMER_TABLE)}.{{}}.json",
|
128
|
-
schema_filename=f"{safe_name(SOURCE_CUSTOMER_TABLE)}-schema.json",
|
129
|
-
approx_max_file_size_bytes=10_000_000,
|
130
|
-
gzip=False,
|
131
|
-
)
|
132
|
-
# [END howto_operator_presto_to_gcs_many_chunks]
|
133
|
-
|
134
|
-
create_external_table_many_chunks = BigQueryCreateExternalTableOperator(
|
135
|
-
task_id="create_external_table_many_chunks",
|
136
|
-
bucket=GCS_BUCKET,
|
137
|
-
table_resource={
|
138
|
-
"tableReference": {
|
139
|
-
"projectId": GCP_PROJECT_ID,
|
140
|
-
"datasetId": DATASET_NAME,
|
141
|
-
"tableId": f"{safe_name(SOURCE_CUSTOMER_TABLE)}",
|
142
|
-
},
|
143
|
-
"schema": {
|
144
|
-
"fields": [
|
145
|
-
{"name": "name", "type": "STRING"},
|
146
|
-
{"name": "post_abbr", "type": "STRING"},
|
147
|
-
]
|
148
|
-
},
|
149
|
-
"externalDataConfiguration": {
|
150
|
-
"sourceFormat": "NEWLINE_DELIMITED_JSON",
|
151
|
-
"compression": "NONE",
|
152
|
-
"csvOptions": {"skipLeadingRows": 1},
|
153
|
-
},
|
154
|
-
},
|
155
|
-
source_objects=[f"{safe_name(SOURCE_CUSTOMER_TABLE)}.*.json"],
|
156
|
-
schema_object=f"{safe_name(SOURCE_CUSTOMER_TABLE)}-schema.json",
|
157
|
-
)
|
158
|
-
|
159
|
-
# [START howto_operator_read_data_from_gcs_many_chunks]
|
160
|
-
read_data_from_gcs_many_chunks = BigQueryInsertJobOperator(
|
161
|
-
task_id="read_data_from_gcs_many_chunks",
|
162
|
-
configuration={
|
163
|
-
"query": {
|
164
|
-
"query": f"SELECT COUNT(*) FROM `{GCP_PROJECT_ID}.{DATASET_NAME}."
|
165
|
-
f"{safe_name(SOURCE_CUSTOMER_TABLE)}`",
|
166
|
-
"useLegacySql": False,
|
167
|
-
}
|
168
|
-
},
|
169
|
-
)
|
170
|
-
# [END howto_operator_read_data_from_gcs_many_chunks]
|
171
|
-
|
172
|
-
# [START howto_operator_presto_to_gcs_csv]
|
173
|
-
presto_to_gcs_csv = PrestoToGCSOperator(
|
174
|
-
task_id="presto_to_gcs_csv",
|
175
|
-
sql=f"select * from {SOURCE_MULTIPLE_TYPES}",
|
176
|
-
bucket=GCS_BUCKET,
|
177
|
-
filename=f"{safe_name(SOURCE_MULTIPLE_TYPES)}.{{}}.csv",
|
178
|
-
schema_filename=f"{safe_name(SOURCE_MULTIPLE_TYPES)}-schema.json",
|
179
|
-
export_format="csv",
|
180
|
-
)
|
181
|
-
# [END howto_operator_presto_to_gcs_csv]
|
182
|
-
|
183
|
-
create_dataset >> presto_to_gcs_basic
|
184
|
-
create_dataset >> presto_to_gcs_multiple_types
|
185
|
-
create_dataset >> presto_to_gcs_many_chunks
|
186
|
-
create_dataset >> presto_to_gcs_csv
|
187
|
-
|
188
|
-
presto_to_gcs_multiple_types >> create_external_table_multiple_types >> read_data_from_gcs_multiple_types
|
189
|
-
presto_to_gcs_many_chunks >> create_external_table_many_chunks >> read_data_from_gcs_many_chunks
|
190
|
-
|
191
|
-
presto_to_gcs_basic >> delete_dataset
|
192
|
-
presto_to_gcs_csv >> delete_dataset
|
193
|
-
read_data_from_gcs_multiple_types >> delete_dataset
|
194
|
-
read_data_from_gcs_many_chunks >> delete_dataset
|
@@ -1,129 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
"""
|
18
|
-
Example Airflow DAG that shows how to use SalesforceToGcsOperator.
|
19
|
-
"""
|
20
|
-
|
21
|
-
from __future__ import annotations
|
22
|
-
|
23
|
-
import os
|
24
|
-
from datetime import datetime
|
25
|
-
|
26
|
-
from airflow.models.dag import DAG
|
27
|
-
from airflow.providers.google.cloud.operators.bigquery import (
|
28
|
-
BigQueryCreateEmptyDatasetOperator,
|
29
|
-
BigQueryCreateEmptyTableOperator,
|
30
|
-
BigQueryDeleteDatasetOperator,
|
31
|
-
BigQueryInsertJobOperator,
|
32
|
-
)
|
33
|
-
from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
|
34
|
-
from airflow.providers.google.cloud.transfers.gcs_to_bigquery import GCSToBigQueryOperator
|
35
|
-
from airflow.providers.google.cloud.transfers.salesforce_to_gcs import SalesforceToGcsOperator
|
36
|
-
|
37
|
-
GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-project")
|
38
|
-
GCS_BUCKET = os.environ.get("GCS_BUCKET", "airflow-salesforce-bucket")
|
39
|
-
DATASET_NAME = os.environ.get("SALESFORCE_DATASET_NAME", "salesforce_test_dataset")
|
40
|
-
TABLE_NAME = os.environ.get("SALESFORCE_TABLE_NAME", "salesforce_test_datatable")
|
41
|
-
GCS_OBJ_PATH = os.environ.get("GCS_OBJ_PATH", "results.csv")
|
42
|
-
QUERY = "SELECT Id, Name, Company, Phone, Email, CreatedDate, LastModifiedDate, IsDeleted FROM Lead"
|
43
|
-
GCS_CONN_ID = os.environ.get("GCS_CONN_ID", "google_cloud_default")
|
44
|
-
SALESFORCE_CONN_ID = os.environ.get("SALESFORCE_CONN_ID", "salesforce_default")
|
45
|
-
|
46
|
-
|
47
|
-
with DAG(
|
48
|
-
"example_salesforce_to_gcs",
|
49
|
-
start_date=datetime(2021, 1, 1),
|
50
|
-
catchup=False,
|
51
|
-
) as dag:
|
52
|
-
create_bucket = GCSCreateBucketOperator(
|
53
|
-
task_id="create_bucket",
|
54
|
-
bucket_name=GCS_BUCKET,
|
55
|
-
project_id=GCP_PROJECT_ID,
|
56
|
-
gcp_conn_id=GCS_CONN_ID,
|
57
|
-
)
|
58
|
-
|
59
|
-
# [START howto_operator_salesforce_to_gcs]
|
60
|
-
gcs_upload_task = SalesforceToGcsOperator(
|
61
|
-
query=QUERY,
|
62
|
-
include_deleted=True,
|
63
|
-
bucket_name=GCS_BUCKET,
|
64
|
-
object_name=GCS_OBJ_PATH,
|
65
|
-
salesforce_conn_id=SALESFORCE_CONN_ID,
|
66
|
-
export_format="csv",
|
67
|
-
coerce_to_timestamp=False,
|
68
|
-
record_time_added=False,
|
69
|
-
gcp_conn_id=GCS_CONN_ID,
|
70
|
-
task_id="upload_to_gcs",
|
71
|
-
dag=dag,
|
72
|
-
)
|
73
|
-
# [END howto_operator_salesforce_to_gcs]
|
74
|
-
|
75
|
-
create_dataset = BigQueryCreateEmptyDatasetOperator(
|
76
|
-
task_id="create_dataset", dataset_id=DATASET_NAME, project_id=GCP_PROJECT_ID, gcp_conn_id=GCS_CONN_ID
|
77
|
-
)
|
78
|
-
|
79
|
-
create_table = BigQueryCreateEmptyTableOperator(
|
80
|
-
task_id="create_table",
|
81
|
-
dataset_id=DATASET_NAME,
|
82
|
-
table_id=TABLE_NAME,
|
83
|
-
schema_fields=[
|
84
|
-
{"name": "id", "type": "STRING", "mode": "NULLABLE"},
|
85
|
-
{"name": "name", "type": "STRING", "mode": "NULLABLE"},
|
86
|
-
{"name": "company", "type": "STRING", "mode": "NULLABLE"},
|
87
|
-
{"name": "phone", "type": "STRING", "mode": "NULLABLE"},
|
88
|
-
{"name": "email", "type": "STRING", "mode": "NULLABLE"},
|
89
|
-
{"name": "createddate", "type": "STRING", "mode": "NULLABLE"},
|
90
|
-
{"name": "lastmodifieddate", "type": "STRING", "mode": "NULLABLE"},
|
91
|
-
{"name": "isdeleted", "type": "BOOL", "mode": "NULLABLE"},
|
92
|
-
],
|
93
|
-
)
|
94
|
-
|
95
|
-
load_csv = GCSToBigQueryOperator(
|
96
|
-
task_id="gcs_to_bq",
|
97
|
-
bucket=GCS_BUCKET,
|
98
|
-
source_objects=[GCS_OBJ_PATH],
|
99
|
-
destination_project_dataset_table=f"{DATASET_NAME}.{TABLE_NAME}",
|
100
|
-
write_disposition="WRITE_TRUNCATE",
|
101
|
-
)
|
102
|
-
|
103
|
-
read_data_from_gcs = BigQueryInsertJobOperator(
|
104
|
-
task_id="read_data_from_gcs",
|
105
|
-
configuration={
|
106
|
-
"query": {
|
107
|
-
"query": f"SELECT COUNT(*) FROM `{GCP_PROJECT_ID}.{DATASET_NAME}.{TABLE_NAME}`",
|
108
|
-
"useLegacySql": False,
|
109
|
-
}
|
110
|
-
},
|
111
|
-
)
|
112
|
-
|
113
|
-
delete_bucket = GCSDeleteBucketOperator(
|
114
|
-
task_id="delete_bucket",
|
115
|
-
bucket_name=GCS_BUCKET,
|
116
|
-
)
|
117
|
-
|
118
|
-
delete_dataset = BigQueryDeleteDatasetOperator(
|
119
|
-
task_id="delete_dataset",
|
120
|
-
project_id=GCP_PROJECT_ID,
|
121
|
-
dataset_id=DATASET_NAME,
|
122
|
-
delete_contents=True,
|
123
|
-
)
|
124
|
-
|
125
|
-
create_bucket >> gcs_upload_task >> load_csv
|
126
|
-
create_dataset >> create_table >> load_csv
|
127
|
-
load_csv >> read_data_from_gcs
|
128
|
-
read_data_from_gcs >> delete_bucket
|
129
|
-
read_data_from_gcs >> delete_dataset
|
@@ -1,16 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|