apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
  2. airflow/providers/google/__init__.py +3 -3
  3. airflow/providers/google/ads/hooks/ads.py +39 -5
  4. airflow/providers/google/ads/operators/ads.py +2 -2
  5. airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
  6. airflow/providers/google/assets/gcs.py +1 -11
  7. airflow/providers/google/cloud/bundles/__init__.py +16 -0
  8. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  9. airflow/providers/google/cloud/hooks/bigquery.py +166 -281
  10. airflow/providers/google/cloud/hooks/cloud_composer.py +287 -14
  11. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  12. airflow/providers/google/cloud/hooks/cloud_run.py +17 -9
  13. airflow/providers/google/cloud/hooks/cloud_sql.py +101 -22
  14. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +27 -6
  15. airflow/providers/google/cloud/hooks/compute_ssh.py +5 -1
  16. airflow/providers/google/cloud/hooks/datacatalog.py +9 -1
  17. airflow/providers/google/cloud/hooks/dataflow.py +71 -94
  18. airflow/providers/google/cloud/hooks/datafusion.py +1 -1
  19. airflow/providers/google/cloud/hooks/dataplex.py +1 -1
  20. airflow/providers/google/cloud/hooks/dataprep.py +1 -1
  21. airflow/providers/google/cloud/hooks/dataproc.py +72 -71
  22. airflow/providers/google/cloud/hooks/gcs.py +111 -14
  23. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  24. airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
  25. airflow/providers/google/cloud/hooks/looker.py +6 -1
  26. airflow/providers/google/cloud/hooks/mlengine.py +3 -2
  27. airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
  28. airflow/providers/google/cloud/hooks/spanner.py +73 -8
  29. airflow/providers/google/cloud/hooks/stackdriver.py +10 -8
  30. airflow/providers/google/cloud/hooks/translate.py +1 -1
  31. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -209
  32. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +2 -2
  33. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +27 -1
  34. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  35. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +307 -7
  36. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  37. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  38. airflow/providers/google/cloud/hooks/vision.py +2 -2
  39. airflow/providers/google/cloud/hooks/workflows.py +1 -1
  40. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  41. airflow/providers/google/cloud/links/base.py +77 -13
  42. airflow/providers/google/cloud/links/bigquery.py +0 -47
  43. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  44. airflow/providers/google/cloud/links/bigtable.py +0 -48
  45. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  46. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  47. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  48. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  49. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  50. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
  51. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  52. airflow/providers/google/cloud/links/compute.py +0 -58
  53. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  54. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  55. airflow/providers/google/cloud/links/dataflow.py +0 -34
  56. airflow/providers/google/cloud/links/dataform.py +0 -64
  57. airflow/providers/google/cloud/links/datafusion.py +1 -96
  58. airflow/providers/google/cloud/links/dataplex.py +0 -154
  59. airflow/providers/google/cloud/links/dataprep.py +0 -24
  60. airflow/providers/google/cloud/links/dataproc.py +11 -95
  61. airflow/providers/google/cloud/links/datastore.py +0 -31
  62. airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
  63. airflow/providers/google/cloud/links/managed_kafka.py +0 -70
  64. airflow/providers/google/cloud/links/mlengine.py +0 -70
  65. airflow/providers/google/cloud/links/pubsub.py +0 -32
  66. airflow/providers/google/cloud/links/spanner.py +0 -33
  67. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  68. airflow/providers/google/cloud/links/translate.py +17 -187
  69. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  70. airflow/providers/google/cloud/links/workflows.py +0 -52
  71. airflow/providers/google/cloud/log/gcs_task_handler.py +17 -9
  72. airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
  73. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  74. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  75. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  76. airflow/providers/google/cloud/openlineage/facets.py +102 -1
  77. airflow/providers/google/cloud/openlineage/mixins.py +10 -8
  78. airflow/providers/google/cloud/openlineage/utils.py +15 -1
  79. airflow/providers/google/cloud/operators/alloy_db.py +70 -55
  80. airflow/providers/google/cloud/operators/bigquery.py +73 -636
  81. airflow/providers/google/cloud/operators/bigquery_dts.py +3 -5
  82. airflow/providers/google/cloud/operators/bigtable.py +36 -7
  83. airflow/providers/google/cloud/operators/cloud_base.py +21 -1
  84. airflow/providers/google/cloud/operators/cloud_batch.py +2 -2
  85. airflow/providers/google/cloud/operators/cloud_build.py +75 -32
  86. airflow/providers/google/cloud/operators/cloud_composer.py +128 -40
  87. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  88. airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
  89. airflow/providers/google/cloud/operators/cloud_run.py +23 -5
  90. airflow/providers/google/cloud/operators/cloud_sql.py +8 -16
  91. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -11
  92. airflow/providers/google/cloud/operators/compute.py +8 -40
  93. airflow/providers/google/cloud/operators/datacatalog.py +157 -21
  94. airflow/providers/google/cloud/operators/dataflow.py +38 -15
  95. airflow/providers/google/cloud/operators/dataform.py +15 -5
  96. airflow/providers/google/cloud/operators/datafusion.py +41 -20
  97. airflow/providers/google/cloud/operators/dataplex.py +193 -109
  98. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  99. airflow/providers/google/cloud/operators/dataproc.py +78 -35
  100. airflow/providers/google/cloud/operators/dataproc_metastore.py +96 -88
  101. airflow/providers/google/cloud/operators/datastore.py +22 -6
  102. airflow/providers/google/cloud/operators/dlp.py +6 -29
  103. airflow/providers/google/cloud/operators/functions.py +16 -7
  104. airflow/providers/google/cloud/operators/gcs.py +10 -8
  105. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  106. airflow/providers/google/cloud/operators/kubernetes_engine.py +60 -99
  107. airflow/providers/google/cloud/operators/looker.py +1 -1
  108. airflow/providers/google/cloud/operators/managed_kafka.py +107 -52
  109. airflow/providers/google/cloud/operators/natural_language.py +1 -1
  110. airflow/providers/google/cloud/operators/pubsub.py +60 -14
  111. airflow/providers/google/cloud/operators/spanner.py +25 -12
  112. airflow/providers/google/cloud/operators/speech_to_text.py +1 -2
  113. airflow/providers/google/cloud/operators/stackdriver.py +1 -9
  114. airflow/providers/google/cloud/operators/tasks.py +1 -12
  115. airflow/providers/google/cloud/operators/text_to_speech.py +1 -2
  116. airflow/providers/google/cloud/operators/translate.py +40 -16
  117. airflow/providers/google/cloud/operators/translate_speech.py +1 -2
  118. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
  119. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +29 -9
  120. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +54 -26
  121. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
  122. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
  123. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  124. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  125. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
  126. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +11 -9
  127. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
  128. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +30 -7
  129. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  130. airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
  131. airflow/providers/google/cloud/operators/vision.py +2 -2
  132. airflow/providers/google/cloud/operators/workflows.py +18 -15
  133. airflow/providers/google/cloud/sensors/bigquery.py +2 -2
  134. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -2
  135. airflow/providers/google/cloud/sensors/bigtable.py +11 -4
  136. airflow/providers/google/cloud/sensors/cloud_composer.py +533 -29
  137. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -2
  138. airflow/providers/google/cloud/sensors/dataflow.py +26 -9
  139. airflow/providers/google/cloud/sensors/dataform.py +2 -2
  140. airflow/providers/google/cloud/sensors/datafusion.py +4 -4
  141. airflow/providers/google/cloud/sensors/dataplex.py +2 -2
  142. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  143. airflow/providers/google/cloud/sensors/dataproc.py +2 -2
  144. airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -2
  145. airflow/providers/google/cloud/sensors/gcs.py +4 -4
  146. airflow/providers/google/cloud/sensors/looker.py +2 -2
  147. airflow/providers/google/cloud/sensors/pubsub.py +4 -4
  148. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  149. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
  150. airflow/providers/google/cloud/sensors/workflows.py +2 -2
  151. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  152. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  153. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
  154. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  155. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +4 -4
  156. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  157. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  158. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  159. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  160. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  161. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -2
  162. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +3 -3
  163. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +20 -12
  164. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
  165. airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
  166. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
  167. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  168. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  169. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  170. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  171. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  172. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  173. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
  174. airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
  175. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  176. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +13 -4
  177. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  178. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  179. airflow/providers/google/cloud/triggers/bigquery.py +75 -34
  180. airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
  181. airflow/providers/google/cloud/triggers/cloud_composer.py +302 -46
  182. airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
  183. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +91 -1
  184. airflow/providers/google/cloud/triggers/dataflow.py +122 -0
  185. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  186. airflow/providers/google/cloud/triggers/dataplex.py +14 -2
  187. airflow/providers/google/cloud/triggers/dataproc.py +122 -52
  188. airflow/providers/google/cloud/triggers/kubernetes_engine.py +45 -27
  189. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  190. airflow/providers/google/cloud/triggers/pubsub.py +15 -19
  191. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  192. airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
  193. airflow/providers/google/cloud/utils/field_validator.py +1 -2
  194. airflow/providers/google/common/auth_backend/google_openid.py +4 -4
  195. airflow/providers/google/common/deprecated.py +2 -1
  196. airflow/providers/google/common/hooks/base_google.py +27 -8
  197. airflow/providers/google/common/links/storage.py +0 -22
  198. airflow/providers/google/common/utils/get_secret.py +31 -0
  199. airflow/providers/google/common/utils/id_token_credentials.py +3 -4
  200. airflow/providers/google/firebase/operators/firestore.py +2 -2
  201. airflow/providers/google/get_provider_info.py +56 -52
  202. airflow/providers/google/go_module_utils.py +35 -3
  203. airflow/providers/google/leveldb/hooks/leveldb.py +26 -1
  204. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  205. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  206. airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
  207. airflow/providers/google/marketing_platform/operators/analytics_admin.py +1 -2
  208. airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
  209. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  210. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  211. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  212. airflow/providers/google/marketing_platform/sensors/display_video.py +3 -63
  213. airflow/providers/google/suite/hooks/calendar.py +1 -1
  214. airflow/providers/google/suite/hooks/sheets.py +15 -1
  215. airflow/providers/google/suite/operators/sheets.py +8 -3
  216. airflow/providers/google/suite/sensors/drive.py +2 -2
  217. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
  218. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  219. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  220. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  221. airflow/providers/google/version_compat.py +15 -1
  222. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +92 -48
  223. apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
  224. apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
  225. airflow/providers/google/cloud/hooks/automl.py +0 -673
  226. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  227. airflow/providers/google/cloud/links/automl.py +0 -193
  228. airflow/providers/google/cloud/operators/automl.py +0 -1362
  229. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  230. airflow/providers/google/cloud/operators/mlengine.py +0 -112
  231. apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
  232. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +0 -0
  233. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
  234. {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -21,12 +21,17 @@ import base64
21
21
  import calendar
22
22
  from datetime import date, datetime, timedelta
23
23
  from decimal import Decimal
24
+ from functools import cached_property
25
+ from typing import TYPE_CHECKING
24
26
 
25
27
  import oracledb
26
28
 
27
29
  from airflow.providers.google.cloud.transfers.sql_to_gcs import BaseSQLToGCSOperator
28
30
  from airflow.providers.oracle.hooks.oracle import OracleHook
29
31
 
32
+ if TYPE_CHECKING:
33
+ from airflow.providers.openlineage.extractors import OperatorLineage
34
+
30
35
 
31
36
  class OracleToGCSOperator(BaseSQLToGCSOperator):
32
37
  """
@@ -46,15 +51,15 @@ class OracleToGCSOperator(BaseSQLToGCSOperator):
46
51
  ui_color = "#a0e08c"
47
52
 
48
53
  type_map = {
49
- oracledb.DB_TYPE_BINARY_DOUBLE: "DECIMAL", # type: ignore
50
- oracledb.DB_TYPE_BINARY_FLOAT: "DECIMAL", # type: ignore
51
- oracledb.DB_TYPE_BINARY_INTEGER: "INTEGER", # type: ignore
52
- oracledb.DB_TYPE_BOOLEAN: "BOOLEAN", # type: ignore
53
- oracledb.DB_TYPE_DATE: "TIMESTAMP", # type: ignore
54
- oracledb.DB_TYPE_NUMBER: "NUMERIC", # type: ignore
55
- oracledb.DB_TYPE_TIMESTAMP: "TIMESTAMP", # type: ignore
56
- oracledb.DB_TYPE_TIMESTAMP_LTZ: "TIMESTAMP", # type: ignore
57
- oracledb.DB_TYPE_TIMESTAMP_TZ: "TIMESTAMP", # type: ignore
54
+ oracledb.DB_TYPE_BINARY_DOUBLE: "DECIMAL",
55
+ oracledb.DB_TYPE_BINARY_FLOAT: "DECIMAL",
56
+ oracledb.DB_TYPE_BINARY_INTEGER: "INTEGER",
57
+ oracledb.DB_TYPE_BOOLEAN: "BOOLEAN",
58
+ oracledb.DB_TYPE_DATE: "TIMESTAMP",
59
+ oracledb.DB_TYPE_NUMBER: "NUMERIC",
60
+ oracledb.DB_TYPE_TIMESTAMP: "TIMESTAMP",
61
+ oracledb.DB_TYPE_TIMESTAMP_LTZ: "TIMESTAMP",
62
+ oracledb.DB_TYPE_TIMESTAMP_TZ: "TIMESTAMP",
58
63
  }
59
64
 
60
65
  def __init__(self, *, oracle_conn_id="oracle_default", ensure_utc=False, **kwargs):
@@ -62,10 +67,13 @@ class OracleToGCSOperator(BaseSQLToGCSOperator):
62
67
  self.ensure_utc = ensure_utc
63
68
  self.oracle_conn_id = oracle_conn_id
64
69
 
70
+ @cached_property
71
+ def db_hook(self) -> OracleHook:
72
+ return OracleHook(oracle_conn_id=self.oracle_conn_id)
73
+
65
74
  def query(self):
66
75
  """Query Oracle and returns a cursor to the results."""
67
- oracle = OracleHook(oracle_conn_id=self.oracle_conn_id)
68
- conn = oracle.get_conn()
76
+ conn = self.db_hook.get_conn()
69
77
  cursor = conn.cursor()
70
78
  if self.ensure_utc:
71
79
  # Ensure TIMESTAMP results are in UTC
@@ -121,3 +129,20 @@ class OracleToGCSOperator(BaseSQLToGCSOperator):
121
129
  else:
122
130
  value = base64.standard_b64encode(value).decode("ascii")
123
131
  return value
132
+
133
+ def get_openlineage_facets_on_start(self) -> OperatorLineage | None:
134
+ from airflow.providers.common.compat.openlineage.facet import SQLJobFacet
135
+ from airflow.providers.common.compat.openlineage.utils.sql import get_openlineage_facets_with_sql
136
+ from airflow.providers.openlineage.extractors import OperatorLineage
137
+
138
+ sql_parsing_result = get_openlineage_facets_with_sql(
139
+ hook=self.db_hook,
140
+ sql=self.sql,
141
+ conn_id=self.oracle_conn_id,
142
+ database=self.db_hook.service_name or self.db_hook.sid,
143
+ )
144
+ gcs_output_datasets = self._get_openlineage_output_datasets()
145
+ if sql_parsing_result:
146
+ sql_parsing_result.outputs = gcs_output_datasets
147
+ return sql_parsing_result
148
+ return OperatorLineage(outputs=gcs_output_datasets, job_facets={"sql": SQLJobFacet(self.sql)})
@@ -31,7 +31,7 @@ import pendulum
31
31
  from slugify import slugify
32
32
 
33
33
  from airflow.providers.google.cloud.transfers.sql_to_gcs import BaseSQLToGCSOperator
34
- from airflow.providers.postgres.hooks.postgres import PostgresHook
34
+ from airflow.providers.postgres.hooks.postgres import USE_PSYCOPG3, PostgresHook
35
35
 
36
36
  if TYPE_CHECKING:
37
37
  from airflow.providers.openlineage.extractors import OperatorLineage
@@ -52,9 +52,20 @@ class _PostgresServerSideCursorDecorator:
52
52
  self.initialized = False
53
53
 
54
54
  def __iter__(self):
55
+ """Make the cursor iterable."""
55
56
  return self
56
57
 
57
58
  def __next__(self):
59
+ """Fetch next row from the cursor."""
60
+ if USE_PSYCOPG3:
61
+ if self.rows:
62
+ return self.rows.pop()
63
+ self.initialized = True
64
+ row = self.cursor.fetchone()
65
+ if row is None:
66
+ raise StopIteration
67
+ return row
68
+ # psycopg2
58
69
  if self.rows:
59
70
  return self.rows.pop()
60
71
  self.initialized = True
@@ -141,13 +152,29 @@ class PostgresToGCSOperator(BaseSQLToGCSOperator):
141
152
  return PostgresHook(postgres_conn_id=self.postgres_conn_id)
142
153
 
143
154
  def query(self):
144
- """Query Postgres and returns a cursor to the results."""
155
+ """Execute the query and return a cursor."""
145
156
  conn = self.db_hook.get_conn()
146
- cursor = conn.cursor(name=self._unique_name())
147
- cursor.execute(self.sql, self.parameters)
148
- if self.use_server_side_cursor:
149
- cursor.itersize = self.cursor_itersize
150
- return _PostgresServerSideCursorDecorator(cursor)
157
+
158
+ if USE_PSYCOPG3:
159
+ from psycopg.types.json import register_default_adapters
160
+
161
+ # Register JSON handlers for this connection if not already done
162
+ register_default_adapters(conn)
163
+
164
+ if self.use_server_side_cursor:
165
+ cursor_name = f"airflow_{self.task_id.replace('-', '_')}_{uuid.uuid4().hex}"[:63]
166
+ cursor = conn.cursor(name=cursor_name)
167
+ cursor.itersize = self.cursor_itersize
168
+ cursor.execute(self.sql, self.parameters)
169
+ return _PostgresServerSideCursorDecorator(cursor)
170
+ cursor = conn.cursor()
171
+ cursor.execute(self.sql, self.parameters)
172
+ else:
173
+ cursor = conn.cursor(name=self._unique_name())
174
+ cursor.execute(self.sql, self.parameters)
175
+ if self.use_server_side_cursor:
176
+ cursor.itersize = self.cursor_itersize
177
+ return _PostgresServerSideCursorDecorator(cursor)
151
178
  return cursor
152
179
 
153
180
  def field_to_bigquery(self, field) -> dict[str, str]:
@@ -182,8 +209,14 @@ class PostgresToGCSOperator(BaseSQLToGCSOperator):
182
209
  hours=formatted_time.tm_hour, minutes=formatted_time.tm_min, seconds=formatted_time.tm_sec
183
210
  )
184
211
  return str(time_delta)
185
- if stringify_dict and isinstance(value, dict):
186
- return json.dumps(value)
212
+ if stringify_dict:
213
+ if USE_PSYCOPG3:
214
+ from psycopg.types.json import Json
215
+
216
+ if isinstance(value, (dict, Json)):
217
+ return json.dumps(value)
218
+ elif isinstance(value, dict):
219
+ return json.dumps(value)
187
220
  if isinstance(value, Decimal):
188
221
  return float(value)
189
222
  return value
@@ -57,7 +57,7 @@ except ImportError:
57
57
  from airflow.providers.amazon.aws.operators.s3_list import S3ListOperator # type: ignore[no-redef]
58
58
 
59
59
  if TYPE_CHECKING:
60
- from airflow.utils.context import Context
60
+ from airflow.providers.common.compat.sdk import Context
61
61
 
62
62
 
63
63
  class S3ToGCSOperator(S3ListOperator):
@@ -181,21 +181,27 @@ class S3ToGCSOperator(S3ListOperator):
181
181
  'The destination Google Cloud Storage path must end with a slash "/" or be empty.'
182
182
  )
183
183
 
184
- def execute(self, context: Context):
185
- self._check_inputs()
184
+ def _get_files(self, context: Context, gcs_hook: GCSHook) -> list[str]:
186
185
  # use the super method to list all the files in an S3 bucket/key
187
186
  s3_objects = super().execute(context)
188
187
 
188
+ if not self.replace:
189
+ s3_objects = self.exclude_existing_objects(s3_objects=s3_objects, gcs_hook=gcs_hook)
190
+
191
+ return s3_objects
192
+
193
+ def execute(self, context: Context):
194
+ self._check_inputs()
189
195
  gcs_hook = GCSHook(
190
196
  gcp_conn_id=self.gcp_conn_id,
191
197
  impersonation_chain=self.google_impersonation_chain,
192
198
  )
193
- if not self.replace:
194
- s3_objects = self.exclude_existing_objects(s3_objects=s3_objects, gcs_hook=gcs_hook)
195
-
196
199
  s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
200
+
201
+ s3_objects = self._get_files(context, gcs_hook)
197
202
  if not s3_objects:
198
203
  self.log.info("In sync, no files needed to be uploaded to Google Cloud Storage")
204
+
199
205
  elif self.deferrable:
200
206
  self.transfer_files_async(s3_objects, gcs_hook, s3_hook)
201
207
  else:
@@ -21,12 +21,12 @@ import tempfile
21
21
  from collections.abc import Sequence
22
22
  from typing import TYPE_CHECKING
23
23
 
24
- from airflow.models import BaseOperator
25
24
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
25
+ from airflow.providers.google.version_compat import BaseOperator
26
26
  from airflow.providers.salesforce.hooks.salesforce import SalesforceHook
27
27
 
28
28
  if TYPE_CHECKING:
29
- from airflow.utils.context import Context
29
+ from airflow.providers.common.compat.sdk import Context
30
30
 
31
31
 
32
32
  class SalesforceToGcsOperator(BaseOperator):
@@ -26,12 +26,12 @@ from tempfile import NamedTemporaryFile
26
26
  from typing import TYPE_CHECKING
27
27
 
28
28
  from airflow.exceptions import AirflowException
29
- from airflow.models import BaseOperator
30
29
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
30
+ from airflow.providers.google.version_compat import BaseOperator
31
31
  from airflow.providers.sftp.hooks.sftp import SFTPHook
32
32
 
33
33
  if TYPE_CHECKING:
34
- from airflow.utils.context import Context
34
+ from airflow.providers.common.compat.sdk import Context
35
35
 
36
36
 
37
37
  WILDCARD = "*"
@@ -78,6 +78,8 @@ class SFTPToGCSOperator(BaseOperator):
78
78
  then uploads (may require significant disk space).
79
79
  When ``True``, the file streams directly without using local disk.
80
80
  Defaults to ``False``.
81
+ :param fail_on_file_not_exist: If True, operator fails when file does not exist,
82
+ if False, operator will not fail and skips transfer. Default is True.
81
83
  """
82
84
 
83
85
  template_fields: Sequence[str] = (
@@ -101,6 +103,7 @@ class SFTPToGCSOperator(BaseOperator):
101
103
  impersonation_chain: str | Sequence[str] | None = None,
102
104
  sftp_prefetch: bool = True,
103
105
  use_stream: bool = False,
106
+ fail_on_file_not_exist: bool = True,
104
107
  **kwargs,
105
108
  ) -> None:
106
109
  super().__init__(**kwargs)
@@ -116,6 +119,7 @@ class SFTPToGCSOperator(BaseOperator):
116
119
  self.impersonation_chain = impersonation_chain
117
120
  self.sftp_prefetch = sftp_prefetch
118
121
  self.use_stream = use_stream
122
+ self.fail_on_file_not_exist = fail_on_file_not_exist
119
123
 
120
124
  @cached_property
121
125
  def sftp_hook(self):
@@ -156,7 +160,13 @@ class SFTPToGCSOperator(BaseOperator):
156
160
  destination_object = (
157
161
  self.destination_path if self.destination_path else self.source_path.rsplit("/", 1)[1]
158
162
  )
159
- self._copy_single_object(gcs_hook, self.sftp_hook, self.source_path, destination_object)
163
+ try:
164
+ self._copy_single_object(gcs_hook, self.sftp_hook, self.source_path, destination_object)
165
+ except FileNotFoundError as e:
166
+ if self.fail_on_file_not_exist:
167
+ raise e
168
+ self.log.info("File %s not found on SFTP server. Skipping transfer.", self.source_path)
169
+ return
160
170
 
161
171
  def _copy_single_object(
162
172
  self,
@@ -172,7 +182,6 @@ class SFTPToGCSOperator(BaseOperator):
172
182
  self.destination_bucket,
173
183
  destination_object,
174
184
  )
175
-
176
185
  if self.use_stream:
177
186
  dest_bucket = gcs_hook.get_bucket(self.destination_bucket)
178
187
  dest_blob = dest_bucket.blob(destination_object)
@@ -21,12 +21,12 @@ from collections.abc import Sequence
21
21
  from tempfile import NamedTemporaryFile
22
22
  from typing import TYPE_CHECKING, Any
23
23
 
24
- from airflow.models import BaseOperator
25
24
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
26
25
  from airflow.providers.google.suite.hooks.sheets import GSheetsHook
26
+ from airflow.providers.google.version_compat import BaseOperator
27
27
 
28
28
  if TYPE_CHECKING:
29
- from airflow.utils.context import Context
29
+ from airflow.providers.common.compat.sdk import Context
30
30
 
31
31
 
32
32
  class GoogleSheetsToGCSOperator(BaseOperator):
@@ -130,5 +130,5 @@ class GoogleSheetsToGCSOperator(BaseOperator):
130
130
  gcs_path_to_file = self._upload_data(gcs_hook, sheet_hook, sheet_range, data)
131
131
  destination_array.append(gcs_path_to_file)
132
132
 
133
- self.xcom_push(context, "destination_objects", destination_array)
133
+ context["ti"].xcom_push(key="destination_objects", value=destination_array)
134
134
  return destination_array
@@ -30,12 +30,12 @@ from typing import TYPE_CHECKING, Any
30
30
  import pyarrow as pa
31
31
  import pyarrow.parquet as pq
32
32
 
33
- from airflow.models import BaseOperator
34
33
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
34
+ from airflow.providers.google.version_compat import BaseOperator
35
35
 
36
36
  if TYPE_CHECKING:
37
37
  from airflow.providers.common.compat.openlineage.facet import OutputDataset
38
- from airflow.utils.context import Context
38
+ from airflow.providers.common.compat.sdk import Context
39
39
 
40
40
 
41
41
  class BaseSQLToGCSOperator(BaseOperator):
@@ -295,21 +295,21 @@ class BaseSQLToGCSOperator(BaseOperator):
295
295
 
296
296
  # Proceed to write the row to the localfile
297
297
  if self.export_format == "csv":
298
- row = self.convert_types(schema, col_type_dict, row)
298
+ row2 = self.convert_types(schema, col_type_dict, row)
299
299
  if self.null_marker is not None:
300
- row = [value or self.null_marker for value in row]
301
- csv_writer.writerow(row)
300
+ row2 = [value or self.null_marker for value in row2]
301
+ csv_writer.writerow(row2)
302
302
  elif self.export_format == "parquet":
303
- row = self.convert_types(schema, col_type_dict, row)
303
+ row2 = self.convert_types(schema, col_type_dict, row)
304
304
  if self.null_marker is not None:
305
- row = [value or self.null_marker for value in row]
306
- rows_buffer.append(row)
305
+ row2 = [value or self.null_marker for value in row2]
306
+ rows_buffer.append(row2)
307
307
  if len(rows_buffer) >= self.parquet_row_group_size:
308
308
  self._write_rows_to_parquet(parquet_writer, rows_buffer)
309
309
  rows_buffer = []
310
310
  else:
311
- row = self.convert_types(schema, col_type_dict, row)
312
- row_dict = dict(zip(schema, row))
311
+ row2 = self.convert_types(schema, col_type_dict, row)
312
+ row_dict = dict(zip(schema, row2))
313
313
 
314
314
  json.dump(row_dict, tmp_file_handle, sort_keys=True, ensure_ascii=False)
315
315
 
@@ -22,17 +22,21 @@ from typing import TYPE_CHECKING, Any, SupportsAbs
22
22
 
23
23
  from aiohttp import ClientSession
24
24
  from aiohttp.client_exceptions import ClientResponseError
25
+ from asgiref.sync import sync_to_async
25
26
 
26
27
  from airflow.exceptions import AirflowException
27
- from airflow.models.taskinstance import TaskInstance
28
28
  from airflow.providers.google.cloud.hooks.bigquery import BigQueryAsyncHook, BigQueryTableAsyncHook
29
+ from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
29
30
  from airflow.triggers.base import BaseTrigger, TriggerEvent
30
- from airflow.utils.session import provide_session
31
31
  from airflow.utils.state import TaskInstanceState
32
32
 
33
33
  if TYPE_CHECKING:
34
34
  from sqlalchemy.orm.session import Session
35
35
 
36
+ if not AIRFLOW_V_3_0_PLUS:
37
+ from airflow.models.taskinstance import TaskInstance
38
+ from airflow.utils.session import provide_session
39
+
36
40
 
37
41
  class BigQueryInsertJobTrigger(BaseTrigger):
38
42
  """
@@ -97,16 +101,39 @@ class BigQueryInsertJobTrigger(BaseTrigger):
97
101
  },
98
102
  )
99
103
 
100
- @provide_session
101
- def get_task_instance(self, session: Session) -> TaskInstance:
102
- query = session.query(TaskInstance).filter(
103
- TaskInstance.dag_id == self.task_instance.dag_id,
104
- TaskInstance.task_id == self.task_instance.task_id,
105
- TaskInstance.run_id == self.task_instance.run_id,
106
- TaskInstance.map_index == self.task_instance.map_index,
104
+ if not AIRFLOW_V_3_0_PLUS:
105
+
106
+ @provide_session
107
+ def get_task_instance(self, session: Session) -> TaskInstance:
108
+ query = session.query(TaskInstance).filter(
109
+ TaskInstance.dag_id == self.task_instance.dag_id,
110
+ TaskInstance.task_id == self.task_instance.task_id,
111
+ TaskInstance.run_id == self.task_instance.run_id,
112
+ TaskInstance.map_index == self.task_instance.map_index,
113
+ )
114
+ task_instance = query.one_or_none()
115
+ if task_instance is None:
116
+ raise AirflowException(
117
+ "TaskInstance with dag_id: %s, task_id: %s, run_id: %s and map_index: %s is not found",
118
+ self.task_instance.dag_id,
119
+ self.task_instance.task_id,
120
+ self.task_instance.run_id,
121
+ self.task_instance.map_index,
122
+ )
123
+ return task_instance
124
+
125
+ async def get_task_state(self):
126
+ from airflow.sdk.execution_time.task_runner import RuntimeTaskInstance
127
+
128
+ task_states_response = await sync_to_async(RuntimeTaskInstance.get_task_states)(
129
+ dag_id=self.task_instance.dag_id,
130
+ task_ids=[self.task_instance.task_id],
131
+ run_ids=[self.task_instance.run_id],
132
+ map_index=self.task_instance.map_index,
107
133
  )
108
- task_instance = query.one_or_none()
109
- if task_instance is None:
134
+ try:
135
+ task_state = task_states_response[self.task_instance.run_id][self.task_instance.task_id]
136
+ except Exception:
110
137
  raise AirflowException(
111
138
  "TaskInstance with dag_id: %s, task_id: %s, run_id: %s and map_index: %s is not found",
112
139
  self.task_instance.dag_id,
@@ -114,20 +141,24 @@ class BigQueryInsertJobTrigger(BaseTrigger):
114
141
  self.task_instance.run_id,
115
142
  self.task_instance.map_index,
116
143
  )
117
- return task_instance
144
+ return task_state
118
145
 
119
- def safe_to_cancel(self) -> bool:
146
+ async def safe_to_cancel(self) -> bool:
120
147
  """
121
148
  Whether it is safe to cancel the external job which is being executed by this trigger.
122
149
 
123
150
  This is to avoid the case that `asyncio.CancelledError` is called because the trigger itself is stopped.
124
151
  Because in those cases, we should NOT cancel the external job.
125
152
  """
126
- # Database query is needed to get the latest state of the task instance.
127
- task_instance = self.get_task_instance() # type: ignore[call-arg]
128
- return task_instance.state != TaskInstanceState.DEFERRED
129
-
130
- async def run(self) -> AsyncIterator[TriggerEvent]: # type: ignore[override]
153
+ if AIRFLOW_V_3_0_PLUS:
154
+ task_state = await self.get_task_state()
155
+ else:
156
+ # Database query is needed to get the latest state of the task instance.
157
+ task_instance = self.get_task_instance() # type: ignore[call-arg]
158
+ task_state = task_instance.state
159
+ return task_state != TaskInstanceState.DEFERRED
160
+
161
+ async def run(self) -> AsyncIterator[TriggerEvent]:
131
162
  """Get current job execution status and yields a TriggerEvent."""
132
163
  hook = self._get_async_hook()
133
164
  try:
@@ -136,6 +167,7 @@ class BigQueryInsertJobTrigger(BaseTrigger):
136
167
  job_id=self.job_id, project_id=self.project_id, location=self.location
137
168
  )
138
169
  if job_status["status"] == "success":
170
+ self.log.info("BigQuery Job succeeded")
139
171
  yield TriggerEvent(
140
172
  {
141
173
  "job_id": self.job_id,
@@ -145,7 +177,13 @@ class BigQueryInsertJobTrigger(BaseTrigger):
145
177
  )
146
178
  return
147
179
  elif job_status["status"] == "error":
148
- yield TriggerEvent(job_status)
180
+ self.log.info("BigQuery Job failed: %s", job_status)
181
+ yield TriggerEvent(
182
+ {
183
+ "status": job_status["status"],
184
+ "message": job_status["message"],
185
+ }
186
+ )
149
187
  return
150
188
  else:
151
189
  self.log.info(
@@ -155,7 +193,7 @@ class BigQueryInsertJobTrigger(BaseTrigger):
155
193
  )
156
194
  await asyncio.sleep(self.poll_interval)
157
195
  except asyncio.CancelledError:
158
- if self.job_id and self.cancel_on_kill and self.safe_to_cancel():
196
+ if self.job_id and self.cancel_on_kill and await self.safe_to_cancel():
159
197
  self.log.info(
160
198
  "The job is safe to cancel the as airflow TaskInstance is not in deferred state."
161
199
  )
@@ -165,9 +203,7 @@ class BigQueryInsertJobTrigger(BaseTrigger):
165
203
  self.location,
166
204
  self.job_id,
167
205
  )
168
- await hook.cancel_job( # type: ignore[union-attr]
169
- job_id=self.job_id, project_id=self.project_id, location=self.location
170
- )
206
+ await hook.cancel_job(job_id=self.job_id, project_id=self.project_id, location=self.location)
171
207
  else:
172
208
  self.log.info(
173
209
  "Trigger may have shutdown. Skipping to cancel job because the airflow "
@@ -204,7 +240,7 @@ class BigQueryCheckTrigger(BigQueryInsertJobTrigger):
204
240
  },
205
241
  )
206
242
 
207
- async def run(self) -> AsyncIterator[TriggerEvent]: # type: ignore[override]
243
+ async def run(self) -> AsyncIterator[TriggerEvent]:
208
244
  """Get current job execution status and yields a TriggerEvent."""
209
245
  hook = self._get_async_hook()
210
246
  try:
@@ -281,7 +317,7 @@ class BigQueryGetDataTrigger(BigQueryInsertJobTrigger):
281
317
  },
282
318
  )
283
319
 
284
- async def run(self) -> AsyncIterator[TriggerEvent]: # type: ignore[override]
320
+ async def run(self) -> AsyncIterator[TriggerEvent]:
285
321
  """Get current job execution status and yields a TriggerEvent with response data."""
286
322
  hook = self._get_async_hook()
287
323
  try:
@@ -305,7 +341,12 @@ class BigQueryGetDataTrigger(BigQueryInsertJobTrigger):
305
341
  )
306
342
  return
307
343
  elif job_status["status"] == "error":
308
- yield TriggerEvent(job_status)
344
+ yield TriggerEvent(
345
+ {
346
+ "status": job_status["status"],
347
+ "message": job_status["message"],
348
+ }
349
+ )
309
350
  return
310
351
  else:
311
352
  self.log.info(
@@ -406,7 +447,7 @@ class BigQueryIntervalCheckTrigger(BigQueryInsertJobTrigger):
406
447
  },
407
448
  )
408
449
 
409
- async def run(self) -> AsyncIterator[TriggerEvent]: # type: ignore[override]
450
+ async def run(self) -> AsyncIterator[TriggerEvent]:
410
451
  """Get current job execution status and yields a TriggerEvent."""
411
452
  hook = self._get_async_hook()
412
453
  try:
@@ -554,7 +595,7 @@ class BigQueryValueCheckTrigger(BigQueryInsertJobTrigger):
554
595
  },
555
596
  )
556
597
 
557
- async def run(self) -> AsyncIterator[TriggerEvent]: # type: ignore[override]
598
+ async def run(self) -> AsyncIterator[TriggerEvent]:
558
599
  """Get current job execution status and yields a TriggerEvent."""
559
600
  hook = self._get_async_hook()
560
601
  try:
@@ -564,9 +605,9 @@ class BigQueryValueCheckTrigger(BigQueryInsertJobTrigger):
564
605
  if response_from_hook["status"] == "success":
565
606
  query_results = await hook.get_job_output(job_id=self.job_id, project_id=self.project_id)
566
607
  records = hook.get_records(query_results)
567
- records = records.pop(0) if records else None
568
- hook.value_check(self.sql, self.pass_value, records, self.tolerance)
569
- yield TriggerEvent({"status": "success", "message": "Job completed", "records": records})
608
+ _records = records.pop(0) if records else None
609
+ hook.value_check(self.sql, self.pass_value, _records, self.tolerance)
610
+ yield TriggerEvent({"status": "success", "message": "Job completed", "records": _records})
570
611
  return
571
612
  elif response_from_hook["status"] == "pending":
572
613
  self.log.info("Query is still running...")
@@ -640,7 +681,7 @@ class BigQueryTableExistenceTrigger(BaseTrigger):
640
681
  gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
641
682
  )
642
683
 
643
- async def run(self) -> AsyncIterator[TriggerEvent]: # type: ignore[override]
684
+ async def run(self) -> AsyncIterator[TriggerEvent]:
644
685
  """Will run until the table exists in the Google Big Query."""
645
686
  try:
646
687
  while True:
@@ -723,7 +764,7 @@ class BigQueryTablePartitionExistenceTrigger(BigQueryTableExistenceTrigger):
723
764
  },
724
765
  )
725
766
 
726
- async def run(self) -> AsyncIterator[TriggerEvent]: # type: ignore[override]
767
+ async def run(self) -> AsyncIterator[TriggerEvent]:
727
768
  """Will run until the table exists in the Google Big Query."""
728
769
  hook = BigQueryAsyncHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
729
770
  job_id = None
@@ -744,7 +785,7 @@ class BigQueryTablePartitionExistenceTrigger(BigQueryTableExistenceTrigger):
744
785
  return
745
786
  job_id = None
746
787
  elif job_status["status"] == "error":
747
- yield TriggerEvent(job_status)
788
+ yield TriggerEvent({"status": job_status["status"]})
748
789
  return
749
790
  self.log.info("Sleeping for %s seconds.", self.poll_interval)
750
791
  await asyncio.sleep(self.poll_interval)
@@ -76,7 +76,7 @@ class CloudBuildCreateBuildTrigger(BaseTrigger):
76
76
  },
77
77
  )
78
78
 
79
- async def run(self) -> AsyncIterator[TriggerEvent]: # type: ignore[override]
79
+ async def run(self) -> AsyncIterator[TriggerEvent]:
80
80
  """Get current build execution status and yields a TriggerEvent."""
81
81
  hook = self._get_async_hook()
82
82
  try: