apache-airflow-providers-google 12.0.0rc1__py3-none-any.whl → 13.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. airflow/providers/google/LICENSE +0 -52
  2. airflow/providers/google/__init__.py +1 -1
  3. airflow/providers/google/ads/hooks/ads.py +27 -13
  4. airflow/providers/google/ads/transfers/ads_to_gcs.py +18 -4
  5. airflow/providers/google/assets/bigquery.py +17 -0
  6. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +2 -3
  7. airflow/providers/google/cloud/hooks/alloy_db.py +736 -8
  8. airflow/providers/google/cloud/hooks/automl.py +10 -4
  9. airflow/providers/google/cloud/hooks/bigquery.py +125 -22
  10. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  11. airflow/providers/google/cloud/hooks/bigtable.py +2 -3
  12. airflow/providers/google/cloud/hooks/cloud_batch.py +3 -4
  13. airflow/providers/google/cloud/hooks/cloud_build.py +4 -5
  14. airflow/providers/google/cloud/hooks/cloud_composer.py +3 -4
  15. airflow/providers/google/cloud/hooks/cloud_memorystore.py +3 -4
  16. airflow/providers/google/cloud/hooks/cloud_run.py +3 -4
  17. airflow/providers/google/cloud/hooks/cloud_sql.py +7 -3
  18. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +119 -7
  19. airflow/providers/google/cloud/hooks/compute.py +3 -3
  20. airflow/providers/google/cloud/hooks/datacatalog.py +3 -4
  21. airflow/providers/google/cloud/hooks/dataflow.py +12 -12
  22. airflow/providers/google/cloud/hooks/dataform.py +2 -3
  23. airflow/providers/google/cloud/hooks/datafusion.py +2 -2
  24. airflow/providers/google/cloud/hooks/dataplex.py +1032 -11
  25. airflow/providers/google/cloud/hooks/dataproc.py +4 -5
  26. airflow/providers/google/cloud/hooks/dataproc_metastore.py +3 -4
  27. airflow/providers/google/cloud/hooks/dlp.py +3 -4
  28. airflow/providers/google/cloud/hooks/gcs.py +7 -6
  29. airflow/providers/google/cloud/hooks/kms.py +2 -3
  30. airflow/providers/google/cloud/hooks/kubernetes_engine.py +8 -8
  31. airflow/providers/google/cloud/hooks/life_sciences.py +1 -1
  32. airflow/providers/google/cloud/hooks/managed_kafka.py +482 -0
  33. airflow/providers/google/cloud/hooks/natural_language.py +2 -3
  34. airflow/providers/google/cloud/hooks/os_login.py +2 -3
  35. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  36. airflow/providers/google/cloud/hooks/secret_manager.py +2 -3
  37. airflow/providers/google/cloud/hooks/spanner.py +2 -2
  38. airflow/providers/google/cloud/hooks/speech_to_text.py +2 -3
  39. airflow/providers/google/cloud/hooks/stackdriver.py +4 -4
  40. airflow/providers/google/cloud/hooks/tasks.py +3 -4
  41. airflow/providers/google/cloud/hooks/text_to_speech.py +2 -3
  42. airflow/providers/google/cloud/hooks/translate.py +236 -5
  43. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +9 -4
  44. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -4
  45. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +4 -5
  46. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +3 -4
  47. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -3
  48. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +3 -4
  49. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -181
  50. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -4
  51. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -3
  52. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -4
  53. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -3
  54. airflow/providers/google/cloud/hooks/video_intelligence.py +2 -3
  55. airflow/providers/google/cloud/hooks/vision.py +3 -4
  56. airflow/providers/google/cloud/hooks/workflows.py +2 -3
  57. airflow/providers/google/cloud/links/alloy_db.py +46 -0
  58. airflow/providers/google/cloud/links/bigquery.py +25 -0
  59. airflow/providers/google/cloud/links/dataplex.py +172 -2
  60. airflow/providers/google/cloud/links/kubernetes_engine.py +1 -2
  61. airflow/providers/google/cloud/links/managed_kafka.py +104 -0
  62. airflow/providers/google/cloud/links/translate.py +28 -0
  63. airflow/providers/google/cloud/log/gcs_task_handler.py +3 -3
  64. airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -10
  65. airflow/providers/google/cloud/openlineage/facets.py +67 -0
  66. airflow/providers/google/cloud/openlineage/mixins.py +438 -173
  67. airflow/providers/google/cloud/openlineage/utils.py +394 -61
  68. airflow/providers/google/cloud/operators/alloy_db.py +980 -69
  69. airflow/providers/google/cloud/operators/automl.py +83 -245
  70. airflow/providers/google/cloud/operators/bigquery.py +377 -74
  71. airflow/providers/google/cloud/operators/bigquery_dts.py +126 -13
  72. airflow/providers/google/cloud/operators/bigtable.py +1 -3
  73. airflow/providers/google/cloud/operators/cloud_base.py +1 -2
  74. airflow/providers/google/cloud/operators/cloud_batch.py +2 -4
  75. airflow/providers/google/cloud/operators/cloud_build.py +3 -5
  76. airflow/providers/google/cloud/operators/cloud_composer.py +5 -7
  77. airflow/providers/google/cloud/operators/cloud_memorystore.py +4 -6
  78. airflow/providers/google/cloud/operators/cloud_run.py +6 -5
  79. airflow/providers/google/cloud/operators/cloud_sql.py +20 -8
  80. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +62 -8
  81. airflow/providers/google/cloud/operators/compute.py +3 -4
  82. airflow/providers/google/cloud/operators/datacatalog.py +9 -11
  83. airflow/providers/google/cloud/operators/dataflow.py +1 -112
  84. airflow/providers/google/cloud/operators/dataform.py +3 -5
  85. airflow/providers/google/cloud/operators/datafusion.py +1 -1
  86. airflow/providers/google/cloud/operators/dataplex.py +2046 -7
  87. airflow/providers/google/cloud/operators/dataproc.py +102 -17
  88. airflow/providers/google/cloud/operators/dataproc_metastore.py +7 -9
  89. airflow/providers/google/cloud/operators/dlp.py +17 -19
  90. airflow/providers/google/cloud/operators/gcs.py +14 -17
  91. airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -2
  92. airflow/providers/google/cloud/operators/managed_kafka.py +788 -0
  93. airflow/providers/google/cloud/operators/natural_language.py +3 -5
  94. airflow/providers/google/cloud/operators/pubsub.py +39 -7
  95. airflow/providers/google/cloud/operators/speech_to_text.py +3 -5
  96. airflow/providers/google/cloud/operators/stackdriver.py +3 -5
  97. airflow/providers/google/cloud/operators/tasks.py +4 -6
  98. airflow/providers/google/cloud/operators/text_to_speech.py +2 -4
  99. airflow/providers/google/cloud/operators/translate.py +414 -5
  100. airflow/providers/google/cloud/operators/translate_speech.py +2 -4
  101. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +9 -8
  102. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +4 -6
  103. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +6 -8
  104. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +4 -6
  105. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +4 -6
  106. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +0 -322
  107. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +4 -6
  108. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +4 -6
  109. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +4 -6
  110. airflow/providers/google/cloud/operators/video_intelligence.py +3 -5
  111. airflow/providers/google/cloud/operators/vision.py +4 -6
  112. airflow/providers/google/cloud/operators/workflows.py +5 -7
  113. airflow/providers/google/cloud/secrets/secret_manager.py +1 -2
  114. airflow/providers/google/cloud/sensors/bigquery_dts.py +3 -5
  115. airflow/providers/google/cloud/sensors/bigtable.py +2 -3
  116. airflow/providers/google/cloud/sensors/cloud_composer.py +32 -8
  117. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +39 -1
  118. airflow/providers/google/cloud/sensors/dataplex.py +4 -6
  119. airflow/providers/google/cloud/sensors/dataproc.py +2 -3
  120. airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -2
  121. airflow/providers/google/cloud/sensors/gcs.py +2 -4
  122. airflow/providers/google/cloud/sensors/pubsub.py +2 -3
  123. airflow/providers/google/cloud/sensors/workflows.py +3 -5
  124. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +5 -5
  125. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +10 -12
  126. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +1 -1
  127. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +36 -4
  128. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +27 -2
  129. airflow/providers/google/cloud/transfers/mysql_to_gcs.py +27 -2
  130. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +27 -2
  131. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +34 -5
  132. airflow/providers/google/cloud/transfers/sql_to_gcs.py +15 -0
  133. airflow/providers/google/cloud/transfers/trino_to_gcs.py +25 -2
  134. airflow/providers/google/cloud/triggers/bigquery_dts.py +1 -2
  135. airflow/providers/google/cloud/triggers/cloud_batch.py +1 -2
  136. airflow/providers/google/cloud/triggers/cloud_build.py +1 -2
  137. airflow/providers/google/cloud/triggers/cloud_composer.py +13 -3
  138. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +102 -4
  139. airflow/providers/google/cloud/triggers/dataflow.py +2 -3
  140. airflow/providers/google/cloud/triggers/dataplex.py +1 -2
  141. airflow/providers/google/cloud/triggers/dataproc.py +2 -3
  142. airflow/providers/google/cloud/triggers/kubernetes_engine.py +1 -1
  143. airflow/providers/google/cloud/triggers/pubsub.py +1 -2
  144. airflow/providers/google/cloud/triggers/vertex_ai.py +7 -8
  145. airflow/providers/google/cloud/utils/credentials_provider.py +15 -8
  146. airflow/providers/google/cloud/utils/external_token_supplier.py +1 -0
  147. airflow/providers/google/common/auth_backend/google_openid.py +4 -4
  148. airflow/providers/google/common/consts.py +1 -2
  149. airflow/providers/google/common/hooks/base_google.py +8 -7
  150. airflow/providers/google/get_provider_info.py +186 -134
  151. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -3
  152. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  153. airflow/providers/google/marketing_platform/operators/analytics_admin.py +5 -7
  154. {apache_airflow_providers_google-12.0.0rc1.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/METADATA +41 -58
  155. {apache_airflow_providers_google-12.0.0rc1.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/RECORD +157 -159
  156. airflow/providers/google/cloud/example_dags/example_facebook_ads_to_gcs.py +0 -141
  157. airflow/providers/google/cloud/example_dags/example_looker.py +0 -64
  158. airflow/providers/google/cloud/example_dags/example_presto_to_gcs.py +0 -194
  159. airflow/providers/google/cloud/example_dags/example_salesforce_to_gcs.py +0 -129
  160. airflow/providers/google/marketing_platform/example_dags/__init__.py +0 -16
  161. airflow/providers/google/marketing_platform/example_dags/example_display_video.py +0 -213
  162. {apache_airflow_providers_google-12.0.0rc1.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/WHEEL +0 -0
  163. {apache_airflow_providers_google-12.0.0rc1.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/entry_points.txt +0 -0
@@ -1,141 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one
3
- # or more contributor license agreements. See the NOTICE file
4
- # distributed with this work for additional information
5
- # regarding copyright ownership. The ASF licenses this file
6
- # to you under the Apache License, Version 2.0 (the
7
- # "License"); you may not use this file except in compliance
8
- # with the License. You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing,
13
- # software distributed under the License is distributed on an
14
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- # KIND, either express or implied. See the License for the
16
- # specific language governing permissions and limitations
17
- # under the License.
18
- """
19
- Example Airflow DAG that shows how to use FacebookAdsReportToGcsOperator.
20
- """
21
-
22
- from __future__ import annotations
23
-
24
- import os
25
- from datetime import datetime
26
-
27
- from facebook_business.adobjects.adsinsights import AdsInsights
28
-
29
- from airflow.models.baseoperator import chain
30
- from airflow.models.dag import DAG
31
- from airflow.providers.google.cloud.operators.bigquery import (
32
- BigQueryCreateEmptyDatasetOperator,
33
- BigQueryCreateEmptyTableOperator,
34
- BigQueryDeleteDatasetOperator,
35
- BigQueryInsertJobOperator,
36
- )
37
- from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
38
- from airflow.providers.google.cloud.transfers.facebook_ads_to_gcs import FacebookAdsReportToGcsOperator
39
- from airflow.providers.google.cloud.transfers.gcs_to_bigquery import GCSToBigQueryOperator
40
-
41
- # [START howto_GCS_env_variables]
42
- GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "free-tier-1997")
43
- GCS_BUCKET = os.environ.get("GCS_BUCKET", "airflow_bucket_fb")
44
- GCS_OBJ_PATH = os.environ.get("GCS_OBJ_PATH", "Temp/this_is_my_report_csv.csv")
45
- GCS_CONN_ID = os.environ.get("GCS_CONN_ID", "google_cloud_default")
46
- DATASET_NAME = os.environ.get("DATASET_NAME", "airflow_test_dataset")
47
- TABLE_NAME = os.environ.get("FB_TABLE_NAME", "airflow_test_datatable")
48
- # [END howto_GCS_env_variables]
49
-
50
- # [START howto_FB_ADS_variables]
51
- FIELDS = [
52
- AdsInsights.Field.campaign_name,
53
- AdsInsights.Field.campaign_id,
54
- AdsInsights.Field.ad_id,
55
- AdsInsights.Field.clicks,
56
- AdsInsights.Field.impressions,
57
- ]
58
- PARAMETERS = {"level": "ad", "date_preset": "yesterday"}
59
- # [END howto_FB_ADS_variables]
60
-
61
- with DAG(
62
- "example_facebook_ads_to_gcs",
63
- start_date=datetime(2021, 1, 1),
64
- catchup=False,
65
- ) as dag:
66
- create_bucket = GCSCreateBucketOperator(
67
- task_id="create_bucket",
68
- bucket_name=GCS_BUCKET,
69
- project_id=GCP_PROJECT_ID,
70
- )
71
-
72
- create_dataset = BigQueryCreateEmptyDatasetOperator(
73
- task_id="create_dataset",
74
- dataset_id=DATASET_NAME,
75
- )
76
-
77
- create_table = BigQueryCreateEmptyTableOperator(
78
- task_id="create_table",
79
- dataset_id=DATASET_NAME,
80
- table_id=TABLE_NAME,
81
- schema_fields=[
82
- {"name": "campaign_name", "type": "STRING", "mode": "NULLABLE"},
83
- {"name": "campaign_id", "type": "STRING", "mode": "NULLABLE"},
84
- {"name": "ad_id", "type": "STRING", "mode": "NULLABLE"},
85
- {"name": "clicks", "type": "STRING", "mode": "NULLABLE"},
86
- {"name": "impressions", "type": "STRING", "mode": "NULLABLE"},
87
- ],
88
- )
89
-
90
- # [START howto_operator_facebook_ads_to_gcs]
91
- run_operator = FacebookAdsReportToGcsOperator(
92
- task_id="run_fetch_data",
93
- owner="airflow",
94
- bucket_name=GCS_BUCKET,
95
- parameters=PARAMETERS,
96
- fields=FIELDS,
97
- gcp_conn_id=GCS_CONN_ID,
98
- object_name=GCS_OBJ_PATH,
99
- )
100
- # [END howto_operator_facebook_ads_to_gcs]
101
-
102
- load_csv = GCSToBigQueryOperator(
103
- task_id="gcs_to_bq_example",
104
- bucket=GCS_BUCKET,
105
- source_objects=[GCS_OBJ_PATH],
106
- destination_project_dataset_table=f"{DATASET_NAME}.{TABLE_NAME}",
107
- write_disposition="WRITE_TRUNCATE",
108
- )
109
-
110
- read_data_from_gcs_many_chunks = BigQueryInsertJobOperator(
111
- task_id="read_data_from_gcs_many_chunks",
112
- configuration={
113
- "query": {
114
- "query": f"SELECT COUNT(*) FROM `{GCP_PROJECT_ID}.{DATASET_NAME}.{TABLE_NAME}`",
115
- "useLegacySql": False,
116
- }
117
- },
118
- )
119
-
120
- delete_bucket = GCSDeleteBucketOperator(
121
- task_id="delete_bucket",
122
- bucket_name=GCS_BUCKET,
123
- )
124
-
125
- delete_dataset = BigQueryDeleteDatasetOperator(
126
- task_id="delete_dataset",
127
- project_id=GCP_PROJECT_ID,
128
- dataset_id=DATASET_NAME,
129
- delete_contents=True,
130
- )
131
-
132
- chain(
133
- create_bucket,
134
- create_dataset,
135
- create_table,
136
- run_operator,
137
- load_csv,
138
- read_data_from_gcs_many_chunks,
139
- delete_bucket,
140
- delete_dataset,
141
- )
@@ -1,64 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one
3
- # or more contributor license agreements. See the NOTICE file
4
- # distributed with this work for additional information
5
- # regarding copyright ownership. The ASF licenses this file
6
- # to you under the Apache License, Version 2.0 (the
7
- # "License"); you may not use this file except in compliance
8
- # with the License. You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing,
13
- # software distributed under the License is distributed on an
14
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- # KIND, either express or implied. See the License for the
16
- # specific language governing permissions and limitations
17
- # under the License.
18
- """
19
- Example Airflow DAG that show how to use various Looker
20
- operators to submit PDT materialization job and manage it.
21
- """
22
-
23
- from __future__ import annotations
24
-
25
- from datetime import datetime
26
-
27
- from airflow.models.dag import DAG
28
- from airflow.providers.google.cloud.operators.looker import LookerStartPdtBuildOperator
29
- from airflow.providers.google.cloud.sensors.looker import LookerCheckPdtBuildSensor
30
-
31
- with DAG(
32
- dag_id="example_gcp_looker",
33
- start_date=datetime(2021, 1, 1),
34
- catchup=False,
35
- ) as dag:
36
- # [START cloud_looker_async_start_pdt_sensor]
37
- start_pdt_task_async = LookerStartPdtBuildOperator(
38
- task_id="start_pdt_task_async",
39
- looker_conn_id="your_airflow_connection_for_looker",
40
- model="your_lookml_model",
41
- view="your_lookml_view",
42
- asynchronous=True,
43
- )
44
-
45
- check_pdt_task_async_sensor = LookerCheckPdtBuildSensor(
46
- task_id="check_pdt_task_async_sensor",
47
- looker_conn_id="your_airflow_connection_for_looker",
48
- materialization_id=start_pdt_task_async.output,
49
- poke_interval=10,
50
- )
51
- # [END cloud_looker_async_start_pdt_sensor]
52
-
53
- # [START how_to_cloud_looker_start_pdt_build_operator]
54
- build_pdt_task = LookerStartPdtBuildOperator(
55
- task_id="build_pdt_task",
56
- looker_conn_id="your_airflow_connection_for_looker",
57
- model="your_lookml_model",
58
- view="your_lookml_view",
59
- )
60
- # [END how_to_cloud_looker_start_pdt_build_operator]
61
-
62
- start_pdt_task_async >> check_pdt_task_async_sensor
63
-
64
- build_pdt_task
@@ -1,194 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one
3
- # or more contributor license agreements. See the NOTICE file
4
- # distributed with this work for additional information
5
- # regarding copyright ownership. The ASF licenses this file
6
- # to you under the Apache License, Version 2.0 (the
7
- # "License"); you may not use this file except in compliance
8
- # with the License. You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing,
13
- # software distributed under the License is distributed on an
14
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- # KIND, either express or implied. See the License for the
16
- # specific language governing permissions and limitations
17
- # under the License.
18
- """
19
- Example DAG using PrestoToGCSOperator.
20
- """
21
-
22
- from __future__ import annotations
23
-
24
- import os
25
- import re
26
- from datetime import datetime
27
-
28
- from airflow.models.dag import DAG
29
- from airflow.providers.google.cloud.operators.bigquery import (
30
- BigQueryCreateEmptyDatasetOperator,
31
- BigQueryCreateExternalTableOperator,
32
- BigQueryDeleteDatasetOperator,
33
- BigQueryInsertJobOperator,
34
- )
35
- from airflow.providers.google.cloud.transfers.presto_to_gcs import PrestoToGCSOperator
36
-
37
- GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-project")
38
- GCS_BUCKET = os.environ.get("GCP_PRESTO_TO_GCS_BUCKET_NAME", "INVALID BUCKET NAME")
39
- DATASET_NAME = os.environ.get("GCP_PRESTO_TO_GCS_DATASET_NAME", "test_presto_to_gcs_dataset")
40
-
41
- SOURCE_MULTIPLE_TYPES = "memory.default.test_multiple_types"
42
- SOURCE_CUSTOMER_TABLE = "tpch.sf1.customer"
43
-
44
-
45
- def safe_name(s: str) -> str:
46
- """
47
- Remove invalid characters for filename
48
- """
49
- return re.sub("[^0-9a-zA-Z_]+", "_", s)
50
-
51
-
52
- with DAG(
53
- dag_id="example_presto_to_gcs",
54
- start_date=datetime(2021, 1, 1),
55
- catchup=False,
56
- tags=["example"],
57
- ) as dag:
58
- create_dataset = BigQueryCreateEmptyDatasetOperator(task_id="create-dataset", dataset_id=DATASET_NAME)
59
-
60
- delete_dataset = BigQueryDeleteDatasetOperator(
61
- task_id="delete_dataset", dataset_id=DATASET_NAME, delete_contents=True
62
- )
63
-
64
- # [START howto_operator_presto_to_gcs_basic]
65
- presto_to_gcs_basic = PrestoToGCSOperator(
66
- task_id="presto_to_gcs_basic",
67
- sql=f"select * from {SOURCE_MULTIPLE_TYPES}",
68
- bucket=GCS_BUCKET,
69
- filename=f"{safe_name(SOURCE_MULTIPLE_TYPES)}.{{}}.json",
70
- )
71
- # [END howto_operator_presto_to_gcs_basic]
72
-
73
- # [START howto_operator_presto_to_gcs_multiple_types]
74
- presto_to_gcs_multiple_types = PrestoToGCSOperator(
75
- task_id="presto_to_gcs_multiple_types",
76
- sql=f"select * from {SOURCE_MULTIPLE_TYPES}",
77
- bucket=GCS_BUCKET,
78
- filename=f"{safe_name(SOURCE_MULTIPLE_TYPES)}.{{}}.json",
79
- schema_filename=f"{safe_name(SOURCE_MULTIPLE_TYPES)}-schema.json",
80
- gzip=False,
81
- )
82
- # [END howto_operator_presto_to_gcs_multiple_types]
83
-
84
- # [START howto_operator_create_external_table_multiple_types]
85
- create_external_table_multiple_types = BigQueryCreateExternalTableOperator(
86
- task_id="create_external_table_multiple_types",
87
- bucket=GCS_BUCKET,
88
- source_objects=[f"{safe_name(SOURCE_MULTIPLE_TYPES)}.*.json"],
89
- table_resource={
90
- "tableReference": {
91
- "projectId": GCP_PROJECT_ID,
92
- "datasetId": DATASET_NAME,
93
- "tableId": f"{safe_name(SOURCE_MULTIPLE_TYPES)}",
94
- },
95
- "schema": {
96
- "fields": [
97
- {"name": "name", "type": "STRING"},
98
- {"name": "post_abbr", "type": "STRING"},
99
- ]
100
- },
101
- "externalDataConfiguration": {
102
- "sourceFormat": "NEWLINE_DELIMITED_JSON",
103
- "compression": "NONE",
104
- "csvOptions": {"skipLeadingRows": 1},
105
- },
106
- },
107
- schema_object=f"{safe_name(SOURCE_MULTIPLE_TYPES)}-schema.json",
108
- )
109
- # [END howto_operator_create_external_table_multiple_types]
110
-
111
- read_data_from_gcs_multiple_types = BigQueryInsertJobOperator(
112
- task_id="read_data_from_gcs_multiple_types",
113
- configuration={
114
- "query": {
115
- "query": f"SELECT COUNT(*) FROM `{GCP_PROJECT_ID}.{DATASET_NAME}."
116
- f"{safe_name(SOURCE_MULTIPLE_TYPES)}`",
117
- "useLegacySql": False,
118
- }
119
- },
120
- )
121
-
122
- # [START howto_operator_presto_to_gcs_many_chunks]
123
- presto_to_gcs_many_chunks = PrestoToGCSOperator(
124
- task_id="presto_to_gcs_many_chunks",
125
- sql=f"select * from {SOURCE_CUSTOMER_TABLE}",
126
- bucket=GCS_BUCKET,
127
- filename=f"{safe_name(SOURCE_CUSTOMER_TABLE)}.{{}}.json",
128
- schema_filename=f"{safe_name(SOURCE_CUSTOMER_TABLE)}-schema.json",
129
- approx_max_file_size_bytes=10_000_000,
130
- gzip=False,
131
- )
132
- # [END howto_operator_presto_to_gcs_many_chunks]
133
-
134
- create_external_table_many_chunks = BigQueryCreateExternalTableOperator(
135
- task_id="create_external_table_many_chunks",
136
- bucket=GCS_BUCKET,
137
- table_resource={
138
- "tableReference": {
139
- "projectId": GCP_PROJECT_ID,
140
- "datasetId": DATASET_NAME,
141
- "tableId": f"{safe_name(SOURCE_CUSTOMER_TABLE)}",
142
- },
143
- "schema": {
144
- "fields": [
145
- {"name": "name", "type": "STRING"},
146
- {"name": "post_abbr", "type": "STRING"},
147
- ]
148
- },
149
- "externalDataConfiguration": {
150
- "sourceFormat": "NEWLINE_DELIMITED_JSON",
151
- "compression": "NONE",
152
- "csvOptions": {"skipLeadingRows": 1},
153
- },
154
- },
155
- source_objects=[f"{safe_name(SOURCE_CUSTOMER_TABLE)}.*.json"],
156
- schema_object=f"{safe_name(SOURCE_CUSTOMER_TABLE)}-schema.json",
157
- )
158
-
159
- # [START howto_operator_read_data_from_gcs_many_chunks]
160
- read_data_from_gcs_many_chunks = BigQueryInsertJobOperator(
161
- task_id="read_data_from_gcs_many_chunks",
162
- configuration={
163
- "query": {
164
- "query": f"SELECT COUNT(*) FROM `{GCP_PROJECT_ID}.{DATASET_NAME}."
165
- f"{safe_name(SOURCE_CUSTOMER_TABLE)}`",
166
- "useLegacySql": False,
167
- }
168
- },
169
- )
170
- # [END howto_operator_read_data_from_gcs_many_chunks]
171
-
172
- # [START howto_operator_presto_to_gcs_csv]
173
- presto_to_gcs_csv = PrestoToGCSOperator(
174
- task_id="presto_to_gcs_csv",
175
- sql=f"select * from {SOURCE_MULTIPLE_TYPES}",
176
- bucket=GCS_BUCKET,
177
- filename=f"{safe_name(SOURCE_MULTIPLE_TYPES)}.{{}}.csv",
178
- schema_filename=f"{safe_name(SOURCE_MULTIPLE_TYPES)}-schema.json",
179
- export_format="csv",
180
- )
181
- # [END howto_operator_presto_to_gcs_csv]
182
-
183
- create_dataset >> presto_to_gcs_basic
184
- create_dataset >> presto_to_gcs_multiple_types
185
- create_dataset >> presto_to_gcs_many_chunks
186
- create_dataset >> presto_to_gcs_csv
187
-
188
- presto_to_gcs_multiple_types >> create_external_table_multiple_types >> read_data_from_gcs_multiple_types
189
- presto_to_gcs_many_chunks >> create_external_table_many_chunks >> read_data_from_gcs_many_chunks
190
-
191
- presto_to_gcs_basic >> delete_dataset
192
- presto_to_gcs_csv >> delete_dataset
193
- read_data_from_gcs_multiple_types >> delete_dataset
194
- read_data_from_gcs_many_chunks >> delete_dataset
@@ -1,129 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
- """
18
- Example Airflow DAG that shows how to use SalesforceToGcsOperator.
19
- """
20
-
21
- from __future__ import annotations
22
-
23
- import os
24
- from datetime import datetime
25
-
26
- from airflow.models.dag import DAG
27
- from airflow.providers.google.cloud.operators.bigquery import (
28
- BigQueryCreateEmptyDatasetOperator,
29
- BigQueryCreateEmptyTableOperator,
30
- BigQueryDeleteDatasetOperator,
31
- BigQueryInsertJobOperator,
32
- )
33
- from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
34
- from airflow.providers.google.cloud.transfers.gcs_to_bigquery import GCSToBigQueryOperator
35
- from airflow.providers.google.cloud.transfers.salesforce_to_gcs import SalesforceToGcsOperator
36
-
37
- GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-project")
38
- GCS_BUCKET = os.environ.get("GCS_BUCKET", "airflow-salesforce-bucket")
39
- DATASET_NAME = os.environ.get("SALESFORCE_DATASET_NAME", "salesforce_test_dataset")
40
- TABLE_NAME = os.environ.get("SALESFORCE_TABLE_NAME", "salesforce_test_datatable")
41
- GCS_OBJ_PATH = os.environ.get("GCS_OBJ_PATH", "results.csv")
42
- QUERY = "SELECT Id, Name, Company, Phone, Email, CreatedDate, LastModifiedDate, IsDeleted FROM Lead"
43
- GCS_CONN_ID = os.environ.get("GCS_CONN_ID", "google_cloud_default")
44
- SALESFORCE_CONN_ID = os.environ.get("SALESFORCE_CONN_ID", "salesforce_default")
45
-
46
-
47
- with DAG(
48
- "example_salesforce_to_gcs",
49
- start_date=datetime(2021, 1, 1),
50
- catchup=False,
51
- ) as dag:
52
- create_bucket = GCSCreateBucketOperator(
53
- task_id="create_bucket",
54
- bucket_name=GCS_BUCKET,
55
- project_id=GCP_PROJECT_ID,
56
- gcp_conn_id=GCS_CONN_ID,
57
- )
58
-
59
- # [START howto_operator_salesforce_to_gcs]
60
- gcs_upload_task = SalesforceToGcsOperator(
61
- query=QUERY,
62
- include_deleted=True,
63
- bucket_name=GCS_BUCKET,
64
- object_name=GCS_OBJ_PATH,
65
- salesforce_conn_id=SALESFORCE_CONN_ID,
66
- export_format="csv",
67
- coerce_to_timestamp=False,
68
- record_time_added=False,
69
- gcp_conn_id=GCS_CONN_ID,
70
- task_id="upload_to_gcs",
71
- dag=dag,
72
- )
73
- # [END howto_operator_salesforce_to_gcs]
74
-
75
- create_dataset = BigQueryCreateEmptyDatasetOperator(
76
- task_id="create_dataset", dataset_id=DATASET_NAME, project_id=GCP_PROJECT_ID, gcp_conn_id=GCS_CONN_ID
77
- )
78
-
79
- create_table = BigQueryCreateEmptyTableOperator(
80
- task_id="create_table",
81
- dataset_id=DATASET_NAME,
82
- table_id=TABLE_NAME,
83
- schema_fields=[
84
- {"name": "id", "type": "STRING", "mode": "NULLABLE"},
85
- {"name": "name", "type": "STRING", "mode": "NULLABLE"},
86
- {"name": "company", "type": "STRING", "mode": "NULLABLE"},
87
- {"name": "phone", "type": "STRING", "mode": "NULLABLE"},
88
- {"name": "email", "type": "STRING", "mode": "NULLABLE"},
89
- {"name": "createddate", "type": "STRING", "mode": "NULLABLE"},
90
- {"name": "lastmodifieddate", "type": "STRING", "mode": "NULLABLE"},
91
- {"name": "isdeleted", "type": "BOOL", "mode": "NULLABLE"},
92
- ],
93
- )
94
-
95
- load_csv = GCSToBigQueryOperator(
96
- task_id="gcs_to_bq",
97
- bucket=GCS_BUCKET,
98
- source_objects=[GCS_OBJ_PATH],
99
- destination_project_dataset_table=f"{DATASET_NAME}.{TABLE_NAME}",
100
- write_disposition="WRITE_TRUNCATE",
101
- )
102
-
103
- read_data_from_gcs = BigQueryInsertJobOperator(
104
- task_id="read_data_from_gcs",
105
- configuration={
106
- "query": {
107
- "query": f"SELECT COUNT(*) FROM `{GCP_PROJECT_ID}.{DATASET_NAME}.{TABLE_NAME}`",
108
- "useLegacySql": False,
109
- }
110
- },
111
- )
112
-
113
- delete_bucket = GCSDeleteBucketOperator(
114
- task_id="delete_bucket",
115
- bucket_name=GCS_BUCKET,
116
- )
117
-
118
- delete_dataset = BigQueryDeleteDatasetOperator(
119
- task_id="delete_dataset",
120
- project_id=GCP_PROJECT_ID,
121
- dataset_id=DATASET_NAME,
122
- delete_contents=True,
123
- )
124
-
125
- create_bucket >> gcs_upload_task >> load_csv
126
- create_dataset >> create_table >> load_csv
127
- load_csv >> read_data_from_gcs
128
- read_data_from_gcs >> delete_bucket
129
- read_data_from_gcs >> delete_dataset
@@ -1,16 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.