apache-airflow-providers-google 14.0.0__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. airflow/providers/google/3rd-party-licenses/LICENSES.txt +14 -0
  2. airflow/providers/google/3rd-party-licenses/NOTICE +5 -0
  3. airflow/providers/google/__init__.py +3 -3
  4. airflow/providers/google/_vendor/__init__.py +0 -0
  5. airflow/providers/google/_vendor/json_merge_patch.py +91 -0
  6. airflow/providers/google/ads/hooks/ads.py +52 -43
  7. airflow/providers/google/ads/operators/ads.py +2 -2
  8. airflow/providers/google/ads/transfers/ads_to_gcs.py +3 -19
  9. airflow/providers/google/assets/gcs.py +1 -11
  10. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +3 -2
  11. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  12. airflow/providers/google/cloud/hooks/alloy_db.py +2 -3
  13. airflow/providers/google/cloud/hooks/bigquery.py +195 -318
  14. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  15. airflow/providers/google/cloud/hooks/bigtable.py +3 -2
  16. airflow/providers/google/cloud/hooks/cloud_batch.py +8 -9
  17. airflow/providers/google/cloud/hooks/cloud_build.py +6 -65
  18. airflow/providers/google/cloud/hooks/cloud_composer.py +292 -24
  19. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  20. airflow/providers/google/cloud/hooks/cloud_memorystore.py +4 -3
  21. airflow/providers/google/cloud/hooks/cloud_run.py +20 -11
  22. airflow/providers/google/cloud/hooks/cloud_sql.py +136 -64
  23. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +35 -15
  24. airflow/providers/google/cloud/hooks/compute.py +7 -6
  25. airflow/providers/google/cloud/hooks/compute_ssh.py +7 -4
  26. airflow/providers/google/cloud/hooks/datacatalog.py +12 -3
  27. airflow/providers/google/cloud/hooks/dataflow.py +87 -242
  28. airflow/providers/google/cloud/hooks/dataform.py +9 -14
  29. airflow/providers/google/cloud/hooks/datafusion.py +7 -9
  30. airflow/providers/google/cloud/hooks/dataplex.py +13 -12
  31. airflow/providers/google/cloud/hooks/dataprep.py +2 -2
  32. airflow/providers/google/cloud/hooks/dataproc.py +76 -74
  33. airflow/providers/google/cloud/hooks/dataproc_metastore.py +4 -3
  34. airflow/providers/google/cloud/hooks/dlp.py +5 -4
  35. airflow/providers/google/cloud/hooks/gcs.py +144 -33
  36. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  37. airflow/providers/google/cloud/hooks/kms.py +3 -2
  38. airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -17
  39. airflow/providers/google/cloud/hooks/looker.py +6 -1
  40. airflow/providers/google/cloud/hooks/managed_kafka.py +227 -3
  41. airflow/providers/google/cloud/hooks/mlengine.py +7 -8
  42. airflow/providers/google/cloud/hooks/natural_language.py +3 -2
  43. airflow/providers/google/cloud/hooks/os_login.py +3 -2
  44. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  45. airflow/providers/google/cloud/hooks/secret_manager.py +105 -12
  46. airflow/providers/google/cloud/hooks/spanner.py +75 -10
  47. airflow/providers/google/cloud/hooks/speech_to_text.py +3 -2
  48. airflow/providers/google/cloud/hooks/stackdriver.py +18 -18
  49. airflow/providers/google/cloud/hooks/tasks.py +4 -3
  50. airflow/providers/google/cloud/hooks/text_to_speech.py +3 -2
  51. airflow/providers/google/cloud/hooks/translate.py +8 -17
  52. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -222
  53. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +9 -15
  54. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +33 -283
  55. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +5 -12
  56. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +6 -12
  57. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  58. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +311 -10
  59. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  60. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +7 -13
  61. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +8 -12
  62. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +6 -12
  63. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +3 -2
  64. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  65. airflow/providers/google/cloud/hooks/video_intelligence.py +3 -2
  66. airflow/providers/google/cloud/hooks/vision.py +7 -7
  67. airflow/providers/google/cloud/hooks/workflows.py +4 -3
  68. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  69. airflow/providers/google/cloud/links/base.py +77 -7
  70. airflow/providers/google/cloud/links/bigquery.py +0 -47
  71. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  72. airflow/providers/google/cloud/links/bigtable.py +0 -48
  73. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  74. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  75. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  76. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  77. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  78. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -46
  79. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  80. airflow/providers/google/cloud/links/compute.py +0 -58
  81. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  82. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  83. airflow/providers/google/cloud/links/dataflow.py +0 -34
  84. airflow/providers/google/cloud/links/dataform.py +0 -64
  85. airflow/providers/google/cloud/links/datafusion.py +1 -90
  86. airflow/providers/google/cloud/links/dataplex.py +0 -154
  87. airflow/providers/google/cloud/links/dataprep.py +0 -24
  88. airflow/providers/google/cloud/links/dataproc.py +11 -89
  89. airflow/providers/google/cloud/links/datastore.py +0 -31
  90. airflow/providers/google/cloud/links/kubernetes_engine.py +11 -61
  91. airflow/providers/google/cloud/links/managed_kafka.py +11 -51
  92. airflow/providers/google/cloud/links/mlengine.py +0 -70
  93. airflow/providers/google/cloud/links/pubsub.py +0 -32
  94. airflow/providers/google/cloud/links/spanner.py +0 -33
  95. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  96. airflow/providers/google/cloud/links/translate.py +17 -187
  97. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  98. airflow/providers/google/cloud/links/workflows.py +0 -52
  99. airflow/providers/google/cloud/log/gcs_task_handler.py +166 -118
  100. airflow/providers/google/cloud/log/stackdriver_task_handler.py +14 -9
  101. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  102. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  103. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  104. airflow/providers/google/cloud/openlineage/facets.py +141 -40
  105. airflow/providers/google/cloud/openlineage/mixins.py +14 -13
  106. airflow/providers/google/cloud/openlineage/utils.py +19 -3
  107. airflow/providers/google/cloud/operators/alloy_db.py +76 -61
  108. airflow/providers/google/cloud/operators/bigquery.py +104 -667
  109. airflow/providers/google/cloud/operators/bigquery_dts.py +12 -12
  110. airflow/providers/google/cloud/operators/bigtable.py +38 -7
  111. airflow/providers/google/cloud/operators/cloud_base.py +22 -1
  112. airflow/providers/google/cloud/operators/cloud_batch.py +18 -18
  113. airflow/providers/google/cloud/operators/cloud_build.py +80 -36
  114. airflow/providers/google/cloud/operators/cloud_composer.py +157 -71
  115. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  116. airflow/providers/google/cloud/operators/cloud_memorystore.py +74 -46
  117. airflow/providers/google/cloud/operators/cloud_run.py +39 -20
  118. airflow/providers/google/cloud/operators/cloud_sql.py +46 -61
  119. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -14
  120. airflow/providers/google/cloud/operators/compute.py +18 -50
  121. airflow/providers/google/cloud/operators/datacatalog.py +167 -29
  122. airflow/providers/google/cloud/operators/dataflow.py +38 -15
  123. airflow/providers/google/cloud/operators/dataform.py +19 -7
  124. airflow/providers/google/cloud/operators/datafusion.py +43 -43
  125. airflow/providers/google/cloud/operators/dataplex.py +212 -126
  126. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  127. airflow/providers/google/cloud/operators/dataproc.py +134 -207
  128. airflow/providers/google/cloud/operators/dataproc_metastore.py +102 -84
  129. airflow/providers/google/cloud/operators/datastore.py +22 -6
  130. airflow/providers/google/cloud/operators/dlp.py +24 -45
  131. airflow/providers/google/cloud/operators/functions.py +21 -14
  132. airflow/providers/google/cloud/operators/gcs.py +15 -12
  133. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  134. airflow/providers/google/cloud/operators/kubernetes_engine.py +115 -106
  135. airflow/providers/google/cloud/operators/looker.py +1 -1
  136. airflow/providers/google/cloud/operators/managed_kafka.py +362 -40
  137. airflow/providers/google/cloud/operators/natural_language.py +5 -3
  138. airflow/providers/google/cloud/operators/pubsub.py +69 -21
  139. airflow/providers/google/cloud/operators/spanner.py +53 -45
  140. airflow/providers/google/cloud/operators/speech_to_text.py +5 -4
  141. airflow/providers/google/cloud/operators/stackdriver.py +5 -11
  142. airflow/providers/google/cloud/operators/tasks.py +6 -15
  143. airflow/providers/google/cloud/operators/text_to_speech.py +4 -3
  144. airflow/providers/google/cloud/operators/translate.py +46 -20
  145. airflow/providers/google/cloud/operators/translate_speech.py +4 -3
  146. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +44 -34
  147. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +34 -12
  148. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +62 -53
  149. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +75 -11
  150. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +48 -12
  151. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  152. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  153. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
  154. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +16 -12
  155. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +62 -14
  156. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +35 -10
  157. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  158. airflow/providers/google/cloud/operators/video_intelligence.py +5 -3
  159. airflow/providers/google/cloud/operators/vision.py +7 -5
  160. airflow/providers/google/cloud/operators/workflows.py +24 -19
  161. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  162. airflow/providers/google/cloud/sensors/bigquery.py +2 -2
  163. airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -4
  164. airflow/providers/google/cloud/sensors/bigtable.py +14 -6
  165. airflow/providers/google/cloud/sensors/cloud_composer.py +535 -33
  166. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -5
  167. airflow/providers/google/cloud/sensors/dataflow.py +27 -10
  168. airflow/providers/google/cloud/sensors/dataform.py +2 -2
  169. airflow/providers/google/cloud/sensors/datafusion.py +4 -4
  170. airflow/providers/google/cloud/sensors/dataplex.py +7 -5
  171. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  172. airflow/providers/google/cloud/sensors/dataproc.py +10 -9
  173. airflow/providers/google/cloud/sensors/dataproc_metastore.py +4 -3
  174. airflow/providers/google/cloud/sensors/gcs.py +22 -21
  175. airflow/providers/google/cloud/sensors/looker.py +5 -5
  176. airflow/providers/google/cloud/sensors/pubsub.py +20 -20
  177. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  178. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
  179. airflow/providers/google/cloud/sensors/workflows.py +6 -4
  180. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  181. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  182. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
  183. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  184. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +14 -13
  185. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  186. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  187. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  188. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  189. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  190. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +18 -22
  191. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -5
  192. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +45 -38
  193. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
  194. airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
  195. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
  196. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  197. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  198. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  199. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  200. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  201. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  202. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +44 -12
  203. airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
  204. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  205. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +36 -14
  206. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  207. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  208. airflow/providers/google/cloud/triggers/bigquery.py +75 -34
  209. airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
  210. airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
  211. airflow/providers/google/cloud/triggers/cloud_build.py +3 -2
  212. airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
  213. airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
  214. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +96 -5
  215. airflow/providers/google/cloud/triggers/dataflow.py +125 -2
  216. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  217. airflow/providers/google/cloud/triggers/dataplex.py +16 -3
  218. airflow/providers/google/cloud/triggers/dataproc.py +124 -53
  219. airflow/providers/google/cloud/triggers/kubernetes_engine.py +46 -28
  220. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  221. airflow/providers/google/cloud/triggers/pubsub.py +17 -20
  222. airflow/providers/google/cloud/triggers/vertex_ai.py +8 -7
  223. airflow/providers/google/cloud/utils/bigquery.py +5 -7
  224. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  225. airflow/providers/google/cloud/utils/credentials_provider.py +4 -3
  226. airflow/providers/google/cloud/utils/dataform.py +1 -1
  227. airflow/providers/google/cloud/utils/external_token_supplier.py +0 -1
  228. airflow/providers/google/cloud/utils/field_validator.py +1 -2
  229. airflow/providers/google/cloud/utils/validators.py +43 -0
  230. airflow/providers/google/common/auth_backend/google_openid.py +26 -9
  231. airflow/providers/google/common/consts.py +2 -1
  232. airflow/providers/google/common/deprecated.py +2 -1
  233. airflow/providers/google/common/hooks/base_google.py +40 -43
  234. airflow/providers/google/common/hooks/operation_helpers.py +78 -0
  235. airflow/providers/google/common/links/storage.py +0 -22
  236. airflow/providers/google/common/utils/get_secret.py +31 -0
  237. airflow/providers/google/common/utils/id_token_credentials.py +4 -5
  238. airflow/providers/google/firebase/operators/firestore.py +2 -2
  239. airflow/providers/google/get_provider_info.py +61 -216
  240. airflow/providers/google/go_module_utils.py +35 -3
  241. airflow/providers/google/leveldb/hooks/leveldb.py +30 -6
  242. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  243. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +3 -2
  244. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  245. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  246. airflow/providers/google/marketing_platform/links/analytics_admin.py +4 -5
  247. airflow/providers/google/marketing_platform/operators/analytics_admin.py +7 -6
  248. airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
  249. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  250. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  251. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  252. airflow/providers/google/marketing_platform/sensors/display_video.py +4 -64
  253. airflow/providers/google/suite/hooks/calendar.py +1 -1
  254. airflow/providers/google/suite/hooks/drive.py +2 -2
  255. airflow/providers/google/suite/hooks/sheets.py +15 -1
  256. airflow/providers/google/suite/operators/sheets.py +8 -3
  257. airflow/providers/google/suite/sensors/drive.py +2 -2
  258. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
  259. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  260. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  261. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  262. airflow/providers/google/version_compat.py +15 -1
  263. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +117 -72
  264. apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
  265. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +1 -1
  266. apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
  267. airflow/providers/google/cloud/example_dags/example_cloud_task.py +0 -54
  268. airflow/providers/google/cloud/hooks/automl.py +0 -679
  269. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  270. airflow/providers/google/cloud/links/automl.py +0 -193
  271. airflow/providers/google/cloud/operators/automl.py +0 -1360
  272. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  273. airflow/providers/google/cloud/operators/mlengine.py +0 -1515
  274. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
  275. apache_airflow_providers_google-14.0.0.dist-info/RECORD +0 -318
  276. /airflow/providers/google/cloud/{example_dags → bundles}/__init__.py +0 -0
  277. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
  278. {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -23,15 +23,6 @@ import json
23
23
  from collections.abc import Sequence
24
24
  from typing import TYPE_CHECKING, Any
25
25
 
26
- from airflow.configuration import conf
27
- from airflow.exceptions import AirflowException
28
- from airflow.models import BaseOperator
29
- from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
30
- from airflow.providers.google.cloud.hooks.gcs import GCSHook
31
- from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
32
- from airflow.providers.google.cloud.triggers.bigquery import BigQueryInsertJobTrigger
33
- from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
34
- from airflow.utils.helpers import merge_dicts
35
26
  from google.api_core.exceptions import BadRequest, Conflict
36
27
  from google.cloud.bigquery import (
37
28
  DEFAULT_RETRY,
@@ -45,10 +36,21 @@ from google.cloud.bigquery import (
45
36
  )
46
37
  from google.cloud.bigquery.table import EncryptionConfiguration, Table, TableReference
47
38
 
39
+ from airflow.configuration import conf
40
+ from airflow.exceptions import AirflowException
41
+ from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
42
+ from airflow.providers.google.cloud.hooks.gcs import GCSHook
43
+ from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
44
+ from airflow.providers.google.cloud.triggers.bigquery import BigQueryInsertJobTrigger
45
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
46
+ from airflow.providers.google.version_compat import BaseOperator
47
+ from airflow.utils.helpers import merge_dicts
48
+
48
49
  if TYPE_CHECKING:
49
- from airflow.utils.context import Context
50
50
  from google.api_core.retry import Retry
51
51
 
52
+ from airflow.providers.common.compat.sdk import Context
53
+
52
54
  ALLOWED_FORMATS = [
53
55
  "CSV",
54
56
  "NEWLINE_DELIMITED_JSON",
@@ -56,6 +58,7 @@ ALLOWED_FORMATS = [
56
58
  "GOOGLE_SHEETS",
57
59
  "DATASTORE_BACKUP",
58
60
  "PARQUET",
61
+ "ORC",
59
62
  ]
60
63
 
61
64
 
@@ -141,6 +144,9 @@ class GCSToBigQueryOperator(BaseOperator):
141
144
  partition by field, type and expiration as per API specifications.
142
145
  Note that 'field' is not available in concurrency with
143
146
  dataset.table$partition.
147
+ Ignored if 'range_partitioning' is set.
148
+ :param range_partitioning: configure optional range partitioning fields i.e.
149
+ partition by field and integer interval as per API specifications.
144
150
  :param cluster_fields: Request that the result of this load be stored sorted
145
151
  by one or more columns. BigQuery supports clustering for both partitioned and
146
152
  non-partitioned tables. The order of columns given determines the sort order.
@@ -216,6 +222,7 @@ class GCSToBigQueryOperator(BaseOperator):
216
222
  src_fmt_configs=None,
217
223
  external_table=False,
218
224
  time_partitioning=None,
225
+ range_partitioning=None,
219
226
  cluster_fields=None,
220
227
  autodetect=True,
221
228
  encryption_configuration=None,
@@ -243,6 +250,10 @@ class GCSToBigQueryOperator(BaseOperator):
243
250
  src_fmt_configs = {}
244
251
  if time_partitioning is None:
245
252
  time_partitioning = {}
253
+ if range_partitioning is None:
254
+ range_partitioning = {}
255
+ if range_partitioning and time_partitioning:
256
+ raise ValueError("Only one of time_partitioning or range_partitioning can be set.")
246
257
  self.bucket = bucket
247
258
  self.source_objects = source_objects
248
259
  self.schema_object = schema_object
@@ -260,8 +271,7 @@ class GCSToBigQueryOperator(BaseOperator):
260
271
  f"{source_format} is not a valid source format. "
261
272
  f"Please use one of the following types: {ALLOWED_FORMATS}."
262
273
  )
263
- else:
264
- self.source_format = source_format.upper()
274
+ self.source_format = source_format.upper()
265
275
  self.compression = compression
266
276
  self.create_disposition = create_disposition
267
277
  self.skip_leading_rows = skip_leading_rows
@@ -281,6 +291,7 @@ class GCSToBigQueryOperator(BaseOperator):
281
291
  self.schema_update_options = schema_update_options
282
292
  self.src_fmt_configs = src_fmt_configs
283
293
  self.time_partitioning = time_partitioning
294
+ self.range_partitioning = range_partitioning
284
295
  self.cluster_fields = cluster_fields
285
296
  self.autodetect = autodetect
286
297
  self.encryption_configuration = encryption_configuration
@@ -335,8 +346,9 @@ class GCSToBigQueryOperator(BaseOperator):
335
346
  job_id=self.job_id,
336
347
  dag_id=self.dag_id,
337
348
  task_id=self.task_id,
338
- logical_date=context["logical_date"],
349
+ logical_date=None,
339
350
  configuration=self.configuration,
351
+ run_after=hook.get_run_after_or_logical_date(context),
340
352
  force_rerun=self.force_rerun,
341
353
  )
342
354
 
@@ -371,7 +383,6 @@ class GCSToBigQueryOperator(BaseOperator):
371
383
 
372
384
  BigQueryTableLink.persist(
373
385
  context=context,
374
- task_instance=self,
375
386
  dataset_id=table_obj_api_repr["tableReference"]["datasetId"],
376
387
  project_id=table_obj_api_repr["tableReference"]["projectId"],
377
388
  table_id=table_obj_api_repr["tableReference"]["tableId"],
@@ -404,14 +415,13 @@ class GCSToBigQueryOperator(BaseOperator):
404
415
  f"want to force rerun it consider setting `force_rerun=True`."
405
416
  f"Or, if you want to reattach in this scenario add {job.state} to `reattach_states`"
406
417
  )
407
- else:
408
- # Job already reached state DONE
409
- if job.state == "DONE":
410
- raise AirflowException("Job is already in state DONE. Can not reattach to this job.")
418
+ # Job already reached state DONE
419
+ if job.state == "DONE":
420
+ raise AirflowException("Job is already in state DONE. Can not reattach to this job.")
411
421
 
412
- # We are reattaching to a job
413
- self.log.info("Reattaching to existing Job in state %s", job.state)
414
- self._handle_job_error(job)
422
+ # We are reattaching to a job
423
+ self.log.info("Reattaching to existing Job in state %s", job.state)
424
+ self._handle_job_error(job)
415
425
 
416
426
  job_types = {
417
427
  LoadJob._JOB_TYPE: ["sourceTable", "destinationTable"],
@@ -429,7 +439,6 @@ class GCSToBigQueryOperator(BaseOperator):
429
439
  table = job_configuration[job_type][table_prop]
430
440
  persist_kwargs = {
431
441
  "context": context,
432
- "task_instance": self,
433
442
  "table_id": table,
434
443
  }
435
444
  if not isinstance(table, str):
@@ -484,8 +493,7 @@ class GCSToBigQueryOperator(BaseOperator):
484
493
  if self.max_id_key:
485
494
  self.log.info("Selecting the MAX value from BigQuery column %r...", self.max_id_key)
486
495
  select_command = (
487
- f"SELECT MAX({self.max_id_key}) AS max_value "
488
- f"FROM {self.destination_project_dataset_table}"
496
+ f"SELECT MAX({self.max_id_key}) AS max_value FROM {self.destination_project_dataset_table}"
489
497
  )
490
498
  self.configuration = {
491
499
  "query": {
@@ -505,8 +513,7 @@ class GCSToBigQueryOperator(BaseOperator):
505
513
  f"Could not determine MAX value in column {self.max_id_key} "
506
514
  f"since the default value of 'string_field_n' was set by BQ"
507
515
  )
508
- else:
509
- raise AirflowException(e.message)
516
+ raise AirflowException(e.message)
510
517
  if rows:
511
518
  for row in rows:
512
519
  max_id = row[0] if row[0] else 0
@@ -548,6 +555,7 @@ class GCSToBigQueryOperator(BaseOperator):
548
555
  "quote",
549
556
  "encoding",
550
557
  "preserveAsciiControlCharacters",
558
+ "columnNameCharacterMap",
551
559
  ],
552
560
  "googleSheetsOptions": ["skipLeadingRows"],
553
561
  }
@@ -582,9 +590,11 @@ class GCSToBigQueryOperator(BaseOperator):
582
590
  table_obj_api_repr = table.to_api_repr()
583
591
 
584
592
  self.log.info("Creating external table: %s", self.destination_project_dataset_table)
585
- self.hook.create_empty_table(
593
+ self.hook.create_table(
586
594
  table_resource=table_obj_api_repr,
587
595
  project_id=self.project_id or self.hook.project_id,
596
+ dataset_id=table.dataset_id,
597
+ table_id=table.table_id,
588
598
  location=self.location,
589
599
  exists_ok=True,
590
600
  )
@@ -629,6 +639,8 @@ class GCSToBigQueryOperator(BaseOperator):
629
639
  )
630
640
  if self.time_partitioning:
631
641
  self.configuration["load"].update({"timePartitioning": self.time_partitioning})
642
+ if self.range_partitioning:
643
+ self.configuration["load"].update({"rangePartitioning": self.range_partitioning})
632
644
 
633
645
  if self.cluster_fields:
634
646
  self.configuration["load"].update({"clustering": {"fields": self.cluster_fields}})
@@ -637,17 +649,10 @@ class GCSToBigQueryOperator(BaseOperator):
637
649
  self.configuration["load"]["schema"] = {"fields": self.schema_fields}
638
650
 
639
651
  if self.schema_update_options:
640
- if self.write_disposition not in ["WRITE_APPEND", "WRITE_TRUNCATE"]:
641
- raise ValueError(
642
- "schema_update_options is only "
643
- "allowed if write_disposition is "
644
- "'WRITE_APPEND' or 'WRITE_TRUNCATE'."
645
- )
646
- else:
647
- # To provide backward compatibility
648
- self.schema_update_options = list(self.schema_update_options or [])
649
- self.log.info("Adding experimental 'schemaUpdateOptions': %s", self.schema_update_options)
650
- self.configuration["load"]["schemaUpdateOptions"] = self.schema_update_options
652
+ # To provide backward compatibility
653
+ self.schema_update_options = list(self.schema_update_options or [])
654
+ self.log.info("Adding experimental 'schemaUpdateOptions': %s", self.schema_update_options)
655
+ self.configuration["load"]["schemaUpdateOptions"] = self.schema_update_options
651
656
 
652
657
  if self.max_bad_records:
653
658
  self.configuration["load"]["maxBadRecords"] = self.max_bad_records
@@ -674,11 +679,13 @@ class GCSToBigQueryOperator(BaseOperator):
674
679
  "quote",
675
680
  "encoding",
676
681
  "preserveAsciiControlCharacters",
682
+ "columnNameCharacterMap",
677
683
  ],
678
684
  "DATASTORE_BACKUP": ["projectionFields"],
679
685
  "NEWLINE_DELIMITED_JSON": ["autodetect", "ignoreUnknownValues"],
680
686
  "PARQUET": ["autodetect", "ignoreUnknownValues"],
681
687
  "AVRO": ["useAvroLogicalTypes"],
688
+ "ORC": ["autodetect"],
682
689
  }
683
690
 
684
691
  valid_configs = src_fmt_to_configs_mapping[self.source_format]
@@ -24,13 +24,13 @@ from collections.abc import Sequence
24
24
  from typing import TYPE_CHECKING
25
25
 
26
26
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
27
- from airflow.models import BaseOperator
28
27
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
28
+ from airflow.providers.google.version_compat import BaseOperator
29
29
 
30
30
  WILDCARD = "*"
31
31
 
32
32
  if TYPE_CHECKING:
33
- from airflow.utils.context import Context
33
+ from airflow.providers.common.compat.sdk import Context
34
34
 
35
35
 
36
36
  class GCSToGCSOperator(BaseOperator):
@@ -20,12 +20,14 @@ from collections.abc import Sequence
20
20
  from typing import TYPE_CHECKING
21
21
 
22
22
  from airflow.exceptions import AirflowException
23
- from airflow.models import BaseOperator
24
- from airflow.models.xcom import MAX_XCOM_SIZE
25
23
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
24
+ from airflow.providers.google.version_compat import BaseOperator
26
25
 
27
26
  if TYPE_CHECKING:
28
- from airflow.utils.context import Context
27
+ from airflow.providers.common.compat.sdk import Context
28
+
29
+ # MAX XCOM Size is 48KB, check discussion: https://github.com/apache/airflow/pull/1618#discussion_r68249677
30
+ MAX_XCOM_SIZE = 49344
29
31
 
30
32
 
31
33
  class GCSToLocalFilesystemOperator(BaseOperator):
@@ -26,14 +26,14 @@ from tempfile import NamedTemporaryFile
26
26
  from typing import TYPE_CHECKING
27
27
 
28
28
  from airflow.exceptions import AirflowException
29
- from airflow.models import BaseOperator
30
29
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
30
+ from airflow.providers.google.version_compat import BaseOperator
31
31
  from airflow.providers.sftp.hooks.sftp import SFTPHook
32
32
 
33
33
  WILDCARD = "*"
34
34
 
35
35
  if TYPE_CHECKING:
36
- from airflow.utils.context import Context
36
+ from airflow.providers.common.compat.sdk import Context
37
37
 
38
38
 
39
39
  class GCSToSFTPOperator(BaseOperator):
@@ -80,7 +80,9 @@ class GCSToSFTPOperator(BaseOperator):
80
80
  :param destination_path: The sftp remote path. This is the specified directory path for
81
81
  uploading to the SFTP server.
82
82
  :param keep_directory_structure: (Optional) When set to False the path of the file
83
- on the bucket is recreated within path passed in destination_path.
83
+ on the bucket is recreated within path passed in destination_path.
84
+ :param create_intermediate_dirs: (Optional) When set to True the intermediate directories
85
+ in the specified file path will be created.
84
86
  :param move_object: When move object is True, the object is moved instead
85
87
  of copied to the new location. This is the equivalent of a mv command
86
88
  as opposed to a cp command.
@@ -112,6 +114,7 @@ class GCSToSFTPOperator(BaseOperator):
112
114
  source_object: str,
113
115
  destination_path: str,
114
116
  keep_directory_structure: bool = True,
117
+ create_intermediate_dirs: bool = True,
115
118
  move_object: bool = False,
116
119
  gcp_conn_id: str = "google_cloud_default",
117
120
  sftp_conn_id: str = "ssh_default",
@@ -124,6 +127,7 @@ class GCSToSFTPOperator(BaseOperator):
124
127
  self.source_object = source_object
125
128
  self.destination_path = destination_path
126
129
  self.keep_directory_structure = keep_directory_structure
130
+ self.create_intermediate_dirs = create_intermediate_dirs
127
131
  self.move_object = move_object
128
132
  self.gcp_conn_id = gcp_conn_id
129
133
  self.sftp_conn_id = sftp_conn_id
@@ -190,7 +194,9 @@ class GCSToSFTPOperator(BaseOperator):
190
194
  )
191
195
 
192
196
  dir_path = os.path.dirname(destination_path)
193
- sftp_hook.create_directory(dir_path)
197
+
198
+ if self.create_intermediate_dirs:
199
+ sftp_hook.create_directory(dir_path)
194
200
 
195
201
  with NamedTemporaryFile("w") as tmp:
196
202
  gcs_hook.download(
@@ -19,12 +19,12 @@ from __future__ import annotations
19
19
  from collections.abc import Sequence
20
20
  from typing import TYPE_CHECKING
21
21
 
22
- from airflow.models import BaseOperator
23
22
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
24
23
  from airflow.providers.google.suite.hooks.drive import GoogleDriveHook
24
+ from airflow.providers.google.version_compat import BaseOperator
25
25
 
26
26
  if TYPE_CHECKING:
27
- from airflow.utils.context import Context
27
+ from airflow.providers.common.compat.sdk import Context
28
28
 
29
29
 
30
30
  class GoogleDriveToGCSOperator(BaseOperator):
@@ -99,3 +99,7 @@ class GoogleDriveToGCSOperator(BaseOperator):
99
99
  bucket_name=self.bucket_name, object_name=self.object_name
100
100
  ) as file:
101
101
  gdrive_hook.download_file(file_id=file_metadata["id"], file_handle=file)
102
+
103
+ def dry_run(self):
104
+ """Perform a dry run of the operator."""
105
+ return None
@@ -19,11 +19,11 @@ from __future__ import annotations
19
19
  from collections.abc import Sequence
20
20
  from typing import TYPE_CHECKING
21
21
 
22
- from airflow.models import BaseOperator
23
22
  from airflow.providers.google.suite.hooks.drive import GoogleDriveHook
23
+ from airflow.providers.google.version_compat import BaseOperator
24
24
 
25
25
  if TYPE_CHECKING:
26
- from airflow.utils.context import Context
26
+ from airflow.providers.common.compat.sdk import Context
27
27
 
28
28
 
29
29
  class GoogleDriveToLocalOperator(BaseOperator):
@@ -0,0 +1,193 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+ """This module contains operator to move data from HTTP endpoint to GCS."""
19
+
20
+ from __future__ import annotations
21
+
22
+ from functools import cached_property
23
+ from typing import TYPE_CHECKING, Any
24
+
25
+ from airflow.providers.google.cloud.hooks.gcs import GCSHook
26
+ from airflow.providers.google.version_compat import BaseOperator
27
+ from airflow.providers.http.hooks.http import HttpHook
28
+
29
+ if TYPE_CHECKING:
30
+ from collections.abc import Sequence
31
+
32
+ from requests.auth import AuthBase
33
+
34
+ from airflow.providers.common.compat.sdk import Context
35
+
36
+
37
+ class HttpToGCSOperator(BaseOperator):
38
+ """
39
+ Calls an endpoint on an HTTP system to execute an action and store the result in GCS.
40
+
41
+ :param http_conn_id: The :ref:`http connection<howto/connection:http>` to run
42
+ the operator against
43
+ :param endpoint: The relative part of the full url. (templated)
44
+ :param method: The HTTP method to use, default = "POST"
45
+ :param data: The data to pass. POST-data in POST/PUT and params
46
+ in the URL for a GET request. (templated)
47
+ :param headers: The HTTP headers to be added to the GET request
48
+ :param response_check: A check against the 'requests' response object.
49
+ The callable takes the response object as the first positional argument
50
+ and optionally any number of keyword arguments available in the context dictionary.
51
+ It should return True for 'pass' and False otherwise.
52
+ :param response_filter: A function allowing you to manipulate the response
53
+ text. e.g response_filter=lambda response: json.loads(response.text).
54
+ The callable takes the response object as the first positional argument
55
+ and optionally any number of keyword arguments available in the context dictionary.
56
+ :param extra_options: Extra options for the 'requests' library, see the
57
+ 'requests' documentation (options to modify timeout, ssl, etc.)
58
+ :param log_response: Log the response (default: False)
59
+ :param auth_type: The auth type for the service
60
+ :param tcp_keep_alive: Enable TCP Keep Alive for the connection.
61
+ :param tcp_keep_alive_idle: The TCP Keep Alive Idle parameter (corresponds to ``socket.TCP_KEEPIDLE``).
62
+ :param tcp_keep_alive_count: The TCP Keep Alive count parameter (corresponds to ``socket.TCP_KEEPCNT``)
63
+ :param tcp_keep_alive_interval: The TCP Keep Alive interval parameter (corresponds to
64
+ ``socket.TCP_KEEPINTVL``)
65
+ :param gcp_conn_id: The connection ID to use when fetching connection info.
66
+ :param impersonation_chain: Optional service account to impersonate using short-term credentials,
67
+ or chained list of accounts required to get the access_token of the last account in the list,
68
+ which will be impersonated in the request. If set as a string,
69
+ the account must grant the originating account the Service Account Token Creator IAM role.
70
+ If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity,
71
+ with first account from the list granting this role to the originating account.
72
+ :param bucket_name: The bucket to upload to.
73
+ :param object_name: The object name to set when uploading the file.
74
+ :param mime_type: The file mime type set when uploading the file.
75
+ :param gzip: Option to compress local file or file data for upload
76
+ :param encoding: bytes encoding for file data if provided as string
77
+ :param chunk_size: Blob chunk size.
78
+ :param timeout: Request timeout in seconds.
79
+ :param num_max_attempts: Number of attempts to try to upload the file.
80
+ :param metadata: The metadata to be uploaded with the file.
81
+ :param cache_contro: Cache-Control metadata field.
82
+ :param user_project: The identifier of the Google Cloud project to bill for the request. Required for Requester Pays buckets.
83
+ """
84
+
85
+ template_fields: Sequence[str] = (
86
+ "http_conn_id",
87
+ "endpoint",
88
+ "data",
89
+ "headers",
90
+ "gcp_conn_id",
91
+ "bucket_name",
92
+ "object_name",
93
+ )
94
+ template_fields_renderers = {"headers": "json", "data": "py"}
95
+ template_ext: Sequence[str] = ()
96
+ ui_color = "#f4a460"
97
+
98
+ def __init__(
99
+ self,
100
+ *,
101
+ endpoint: str | None = None,
102
+ method: str = "GET",
103
+ data: Any = None,
104
+ headers: dict[str, str] | None = None,
105
+ extra_options: dict[str, Any] | None = None,
106
+ http_conn_id: str = "http_default",
107
+ log_response: bool = False,
108
+ auth_type: type[AuthBase] | None = None,
109
+ tcp_keep_alive: bool = True,
110
+ tcp_keep_alive_idle: int = 120,
111
+ tcp_keep_alive_count: int = 20,
112
+ tcp_keep_alive_interval: int = 30,
113
+ gcp_conn_id: str = "google_cloud_default",
114
+ impersonation_chain: str | Sequence[str] | None = None,
115
+ bucket_name: str,
116
+ object_name: str,
117
+ mime_type: str | None = None,
118
+ gzip: bool = False,
119
+ encoding: str | None = None,
120
+ chunk_size: int | None = None,
121
+ timeout: int | None = None,
122
+ num_max_attempts: int = 3,
123
+ metadata: dict | None = None,
124
+ cache_control: str | None = None,
125
+ user_project: str | None = None,
126
+ **kwargs,
127
+ ):
128
+ super().__init__(**kwargs)
129
+ self.http_conn_id = http_conn_id
130
+ self.method = method
131
+ self.endpoint = endpoint
132
+ self.headers = headers or {}
133
+ self.data = data or {}
134
+ self.extra_options = extra_options or {}
135
+ self.log_response = log_response
136
+ self.auth_type = auth_type
137
+ self.tcp_keep_alive = tcp_keep_alive
138
+ self.tcp_keep_alive_idle = tcp_keep_alive_idle
139
+ self.tcp_keep_alive_count = tcp_keep_alive_count
140
+ self.tcp_keep_alive_interval = tcp_keep_alive_interval
141
+ self.gcp_conn_id = gcp_conn_id
142
+ self.impersonation_chain = impersonation_chain
143
+ self.bucket_name = bucket_name
144
+ self.object_name = object_name
145
+ self.mime_type = mime_type
146
+ self.gzip = gzip
147
+ self.encoding = encoding
148
+ self.chunk_size = chunk_size
149
+ self.timeout = timeout
150
+ self.num_max_attempts = num_max_attempts
151
+ self.metadata = metadata
152
+ self.cache_control = cache_control
153
+ self.user_project = user_project
154
+
155
+ @cached_property
156
+ def http_hook(self) -> HttpHook:
157
+ """Create and return an HttpHook."""
158
+ return HttpHook(
159
+ self.method,
160
+ http_conn_id=self.http_conn_id,
161
+ auth_type=self.auth_type,
162
+ tcp_keep_alive=self.tcp_keep_alive,
163
+ tcp_keep_alive_idle=self.tcp_keep_alive_idle,
164
+ tcp_keep_alive_count=self.tcp_keep_alive_count,
165
+ tcp_keep_alive_interval=self.tcp_keep_alive_interval,
166
+ )
167
+
168
+ @cached_property
169
+ def gcs_hook(self) -> GCSHook:
170
+ """Create and return an GCSHook."""
171
+ return GCSHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
172
+
173
+ def execute(self, context: Context):
174
+ self.log.info("Calling HTTP method")
175
+ response = self.http_hook.run(
176
+ endpoint=self.endpoint, data=self.data, headers=self.headers, extra_options=self.extra_options
177
+ )
178
+
179
+ self.log.info("Uploading to GCS")
180
+ self.gcs_hook.upload(
181
+ data=response.content,
182
+ bucket_name=self.bucket_name,
183
+ object_name=self.object_name,
184
+ mime_type=self.mime_type,
185
+ gzip=self.gzip,
186
+ encoding=self.encoding or response.encoding,
187
+ chunk_size=self.chunk_size,
188
+ timeout=self.timeout,
189
+ num_max_attempts=self.num_max_attempts,
190
+ metadata=self.metadata,
191
+ cache_control=self.cache_control,
192
+ user_project=self.user_project,
193
+ )
@@ -24,11 +24,11 @@ from collections.abc import Sequence
24
24
  from glob import glob
25
25
  from typing import TYPE_CHECKING
26
26
 
27
- from airflow.models import BaseOperator
28
27
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
28
+ from airflow.providers.google.version_compat import BaseOperator
29
29
 
30
30
  if TYPE_CHECKING:
31
- from airflow.utils.context import Context
31
+ from airflow.providers.common.compat.sdk import Context
32
32
 
33
33
 
34
34
  class LocalFilesystemToGCSOperator(BaseOperator):
@@ -67,7 +67,7 @@ class MSSQLToGCSOperator(BaseSQLToGCSOperator):
67
67
 
68
68
  ui_color = "#e0a98c"
69
69
 
70
- type_map = {2: "BOOLEAN", 3: "INTEGER", 4: "TIMESTAMP", 5: "NUMERIC"}
70
+ type_map = {2: "BOOL", 3: "INTEGER", 4: "TIMESTAMP", 5: "NUMERIC"}
71
71
 
72
72
  def __init__(
73
73
  self,
@@ -21,12 +21,17 @@ import base64
21
21
  import calendar
22
22
  from datetime import date, datetime, timedelta
23
23
  from decimal import Decimal
24
+ from functools import cached_property
25
+ from typing import TYPE_CHECKING
24
26
 
25
27
  import oracledb
26
28
 
27
29
  from airflow.providers.google.cloud.transfers.sql_to_gcs import BaseSQLToGCSOperator
28
30
  from airflow.providers.oracle.hooks.oracle import OracleHook
29
31
 
32
+ if TYPE_CHECKING:
33
+ from airflow.providers.openlineage.extractors import OperatorLineage
34
+
30
35
 
31
36
  class OracleToGCSOperator(BaseSQLToGCSOperator):
32
37
  """
@@ -46,15 +51,15 @@ class OracleToGCSOperator(BaseSQLToGCSOperator):
46
51
  ui_color = "#a0e08c"
47
52
 
48
53
  type_map = {
49
- oracledb.DB_TYPE_BINARY_DOUBLE: "DECIMAL", # type: ignore
50
- oracledb.DB_TYPE_BINARY_FLOAT: "DECIMAL", # type: ignore
51
- oracledb.DB_TYPE_BINARY_INTEGER: "INTEGER", # type: ignore
52
- oracledb.DB_TYPE_BOOLEAN: "BOOLEAN", # type: ignore
53
- oracledb.DB_TYPE_DATE: "TIMESTAMP", # type: ignore
54
- oracledb.DB_TYPE_NUMBER: "NUMERIC", # type: ignore
55
- oracledb.DB_TYPE_TIMESTAMP: "TIMESTAMP", # type: ignore
56
- oracledb.DB_TYPE_TIMESTAMP_LTZ: "TIMESTAMP", # type: ignore
57
- oracledb.DB_TYPE_TIMESTAMP_TZ: "TIMESTAMP", # type: ignore
54
+ oracledb.DB_TYPE_BINARY_DOUBLE: "DECIMAL",
55
+ oracledb.DB_TYPE_BINARY_FLOAT: "DECIMAL",
56
+ oracledb.DB_TYPE_BINARY_INTEGER: "INTEGER",
57
+ oracledb.DB_TYPE_BOOLEAN: "BOOLEAN",
58
+ oracledb.DB_TYPE_DATE: "TIMESTAMP",
59
+ oracledb.DB_TYPE_NUMBER: "NUMERIC",
60
+ oracledb.DB_TYPE_TIMESTAMP: "TIMESTAMP",
61
+ oracledb.DB_TYPE_TIMESTAMP_LTZ: "TIMESTAMP",
62
+ oracledb.DB_TYPE_TIMESTAMP_TZ: "TIMESTAMP",
58
63
  }
59
64
 
60
65
  def __init__(self, *, oracle_conn_id="oracle_default", ensure_utc=False, **kwargs):
@@ -62,10 +67,13 @@ class OracleToGCSOperator(BaseSQLToGCSOperator):
62
67
  self.ensure_utc = ensure_utc
63
68
  self.oracle_conn_id = oracle_conn_id
64
69
 
70
+ @cached_property
71
+ def db_hook(self) -> OracleHook:
72
+ return OracleHook(oracle_conn_id=self.oracle_conn_id)
73
+
65
74
  def query(self):
66
75
  """Query Oracle and returns a cursor to the results."""
67
- oracle = OracleHook(oracle_conn_id=self.oracle_conn_id)
68
- conn = oracle.get_conn()
76
+ conn = self.db_hook.get_conn()
69
77
  cursor = conn.cursor()
70
78
  if self.ensure_utc:
71
79
  # Ensure TIMESTAMP results are in UTC
@@ -121,3 +129,20 @@ class OracleToGCSOperator(BaseSQLToGCSOperator):
121
129
  else:
122
130
  value = base64.standard_b64encode(value).decode("ascii")
123
131
  return value
132
+
133
+ def get_openlineage_facets_on_start(self) -> OperatorLineage | None:
134
+ from airflow.providers.common.compat.openlineage.facet import SQLJobFacet
135
+ from airflow.providers.common.compat.openlineage.utils.sql import get_openlineage_facets_with_sql
136
+ from airflow.providers.openlineage.extractors import OperatorLineage
137
+
138
+ sql_parsing_result = get_openlineage_facets_with_sql(
139
+ hook=self.db_hook,
140
+ sql=self.sql,
141
+ conn_id=self.oracle_conn_id,
142
+ database=self.db_hook.service_name or self.db_hook.sid,
143
+ )
144
+ gcs_output_datasets = self._get_openlineage_output_datasets()
145
+ if sql_parsing_result:
146
+ sql_parsing_result.outputs = gcs_output_datasets
147
+ return sql_parsing_result
148
+ return OperatorLineage(outputs=gcs_output_datasets, job_facets={"sql": SQLJobFacet(self.sql)})